{ "best_metric": 1.0812546, "best_model_checkpoint": "/mnt/bn/haiyang-dataset-lq/medical/outputfi/qwen2-vl-2b-instruct/v11-20241103-174447/checkpoint-20000", "epoch": 4.0, "eval_steps": 10000, "global_step": 147732, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.45148322, "epoch": 2.7076056643110497e-05, "grad_norm": 346.446533203125, "learning_rate": 0.0, "loss": 3.9020741, "memory(GiB)": 8.79, "step": 1, "train_speed(iter/s)": 0.028447 }, { "acc": 0.42280599, "epoch": 0.00013538028321555249, "grad_norm": 280.17205810546875, "learning_rate": 1.8068392615488047e-06, "loss": 4.03564072, "memory(GiB)": 28.45, "step": 5, "train_speed(iter/s)": 0.11262 }, { "acc": 0.46291366, "epoch": 0.00027076056643110497, "grad_norm": 213.1456756591797, "learning_rate": 2.5850025757045154e-06, "loss": 3.68787003, "memory(GiB)": 28.45, "step": 10, "train_speed(iter/s)": 0.178576 }, { "acc": 0.59757223, "epoch": 0.0004061408496466575, "grad_norm": 90.66999053955078, "learning_rate": 3.0401989339225023e-06, "loss": 2.66619225, "memory(GiB)": 28.46, "step": 15, "train_speed(iter/s)": 0.222098 }, { "acc": 0.64061618, "epoch": 0.0005415211328622099, "grad_norm": 179.97215270996094, "learning_rate": 3.3631658898602256e-06, "loss": 2.48459263, "memory(GiB)": 28.46, "step": 20, "train_speed(iter/s)": 0.253032 }, { "acc": 0.66885128, "epoch": 0.0006769014160777625, "grad_norm": 39.00424575805664, "learning_rate": 3.6136785230976095e-06, "loss": 2.23407917, "memory(GiB)": 28.46, "step": 25, "train_speed(iter/s)": 0.27586 }, { "acc": 0.64537458, "epoch": 0.000812281699293315, "grad_norm": 144.207275390625, "learning_rate": 3.8183622480782125e-06, "loss": 2.30637627, "memory(GiB)": 28.46, "step": 30, "train_speed(iter/s)": 0.293757 }, { "acc": 0.69129181, "epoch": 0.0009476619825088674, "grad_norm": 41.6117057800293, "learning_rate": 3.991419871708496e-06, "loss": 2.07367706, "memory(GiB)": 28.46, "step": 35, "train_speed(iter/s)": 0.308089 }, { "acc": 0.67421389, "epoch": 0.0010830422657244199, "grad_norm": 65.31231689453125, "learning_rate": 4.141329204015936e-06, "loss": 2.08000641, "memory(GiB)": 28.46, "step": 40, "train_speed(iter/s)": 0.319637 }, { "acc": 0.67726688, "epoch": 0.0012184225489399723, "grad_norm": 65.36363220214844, "learning_rate": 4.2735586062962e-06, "loss": 2.06048431, "memory(GiB)": 28.47, "step": 45, "train_speed(iter/s)": 0.329279 }, { "acc": 0.68841658, "epoch": 0.001353802832155525, "grad_norm": 56.52592849731445, "learning_rate": 4.39184183725332e-06, "loss": 2.09315453, "memory(GiB)": 28.47, "step": 50, "train_speed(iter/s)": 0.337374 }, { "acc": 0.70032711, "epoch": 0.0014891831153710773, "grad_norm": 52.509063720703125, "learning_rate": 4.498842035002658e-06, "loss": 2.09425812, "memory(GiB)": 28.47, "step": 55, "train_speed(iter/s)": 0.344477 }, { "acc": 0.68044944, "epoch": 0.00162456339858663, "grad_norm": 48.859554290771484, "learning_rate": 4.5965255622339235e-06, "loss": 2.01463165, "memory(GiB)": 28.47, "step": 60, "train_speed(iter/s)": 0.350344 }, { "acc": 0.71200347, "epoch": 0.0017599436818021823, "grad_norm": 47.272159576416016, "learning_rate": 4.6863856964509e-06, "loss": 1.84007874, "memory(GiB)": 28.47, "step": 65, "train_speed(iter/s)": 0.355424 }, { "acc": 0.72653852, "epoch": 0.0018953239650177347, "grad_norm": 38.672950744628906, "learning_rate": 4.769583185864207e-06, "loss": 1.75063477, "memory(GiB)": 28.47, "step": 70, "train_speed(iter/s)": 0.360054 }, { "acc": 0.70482211, "epoch": 0.002030704248233287, "grad_norm": 24.17927360534668, "learning_rate": 4.847038195471307e-06, "loss": 1.81687145, "memory(GiB)": 28.47, "step": 75, "train_speed(iter/s)": 0.364087 }, { "acc": 0.65640478, "epoch": 0.0021660845314488398, "grad_norm": 37.3122444152832, "learning_rate": 4.919492518171646e-06, "loss": 2.09632397, "memory(GiB)": 28.47, "step": 80, "train_speed(iter/s)": 0.367781 }, { "acc": 0.72201424, "epoch": 0.0023014648146643924, "grad_norm": 37.58719253540039, "learning_rate": 4.987552892584486e-06, "loss": 1.68951111, "memory(GiB)": 28.47, "step": 85, "train_speed(iter/s)": 0.371179 }, { "acc": 0.7330864, "epoch": 0.0024368450978799446, "grad_norm": 33.3140869140625, "learning_rate": 5.051721920451911e-06, "loss": 1.53672142, "memory(GiB)": 28.47, "step": 90, "train_speed(iter/s)": 0.37422 }, { "acc": 0.71755047, "epoch": 0.002572225381095497, "grad_norm": 46.44352340698242, "learning_rate": 5.112420613703292e-06, "loss": 1.79071217, "memory(GiB)": 28.47, "step": 95, "train_speed(iter/s)": 0.376883 }, { "acc": 0.70878782, "epoch": 0.00270760566431105, "grad_norm": 46.86865234375, "learning_rate": 5.170005151409031e-06, "loss": 1.76887646, "memory(GiB)": 28.47, "step": 100, "train_speed(iter/s)": 0.379369 }, { "acc": 0.71399679, "epoch": 0.002842985947526602, "grad_norm": 29.735746383666992, "learning_rate": 5.224779544082193e-06, "loss": 1.69283524, "memory(GiB)": 28.47, "step": 105, "train_speed(iter/s)": 0.381665 }, { "acc": 0.72997365, "epoch": 0.0029783662307421546, "grad_norm": 62.102996826171875, "learning_rate": 5.277005349158368e-06, "loss": 1.58044806, "memory(GiB)": 28.47, "step": 110, "train_speed(iter/s)": 0.383701 }, { "acc": 0.74426751, "epoch": 0.0031137465139577073, "grad_norm": 28.140213012695312, "learning_rate": 5.326909225062818e-06, "loss": 1.60063095, "memory(GiB)": 28.47, "step": 115, "train_speed(iter/s)": 0.385616 }, { "acc": 0.72010956, "epoch": 0.00324912679717326, "grad_norm": 20.522037506103516, "learning_rate": 5.374688876389633e-06, "loss": 1.62195015, "memory(GiB)": 28.47, "step": 120, "train_speed(iter/s)": 0.387477 }, { "acc": 0.70541563, "epoch": 0.003384507080388812, "grad_norm": 32.7753791809082, "learning_rate": 5.420517784646415e-06, "loss": 1.71133995, "memory(GiB)": 28.47, "step": 125, "train_speed(iter/s)": 0.389197 }, { "acc": 0.72932739, "epoch": 0.0035198873636043647, "grad_norm": 40.58510208129883, "learning_rate": 5.46454901060661e-06, "loss": 1.44248943, "memory(GiB)": 28.47, "step": 130, "train_speed(iter/s)": 0.390711 }, { "acc": 0.71621628, "epoch": 0.0036552676468199173, "grad_norm": 24.753826141357422, "learning_rate": 5.506918278669898e-06, "loss": 1.60792427, "memory(GiB)": 28.47, "step": 135, "train_speed(iter/s)": 0.392046 }, { "acc": 0.68684249, "epoch": 0.0037906479300354695, "grad_norm": 72.22347259521484, "learning_rate": 5.547746500019917e-06, "loss": 1.67813339, "memory(GiB)": 28.47, "step": 140, "train_speed(iter/s)": 0.393398 }, { "acc": 0.74723558, "epoch": 0.0039260282132510226, "grad_norm": 47.14006423950195, "learning_rate": 5.5871418528227325e-06, "loss": 1.40591602, "memory(GiB)": 28.47, "step": 145, "train_speed(iter/s)": 0.394636 }, { "acc": 0.75660648, "epoch": 0.004061408496466574, "grad_norm": 25.869583129882812, "learning_rate": 5.625201509627017e-06, "loss": 1.49471169, "memory(GiB)": 28.47, "step": 150, "train_speed(iter/s)": 0.395795 }, { "acc": 0.73472042, "epoch": 0.004196788779682127, "grad_norm": 43.633270263671875, "learning_rate": 5.662013081417449e-06, "loss": 1.55185108, "memory(GiB)": 28.47, "step": 155, "train_speed(iter/s)": 0.396881 }, { "acc": 0.7396801, "epoch": 0.0043321690628976795, "grad_norm": 34.58249282836914, "learning_rate": 5.697655832327357e-06, "loss": 1.52121353, "memory(GiB)": 28.47, "step": 160, "train_speed(iter/s)": 0.397815 }, { "acc": 0.7250124, "epoch": 0.004467549346113232, "grad_norm": 29.288068771362305, "learning_rate": 5.732201707376355e-06, "loss": 1.54146595, "memory(GiB)": 28.47, "step": 165, "train_speed(iter/s)": 0.398821 }, { "acc": 0.7303894, "epoch": 0.004602929629328785, "grad_norm": 31.657745361328125, "learning_rate": 5.765716206740196e-06, "loss": 1.5895195, "memory(GiB)": 28.47, "step": 170, "train_speed(iter/s)": 0.399761 }, { "acc": 0.74661703, "epoch": 0.004738309912544337, "grad_norm": 31.884483337402344, "learning_rate": 5.798259133257301e-06, "loss": 1.3361124, "memory(GiB)": 28.47, "step": 175, "train_speed(iter/s)": 0.400705 }, { "acc": 0.69479675, "epoch": 0.004873690195759889, "grad_norm": 76.82917785644531, "learning_rate": 5.829885234607621e-06, "loss": 1.76387405, "memory(GiB)": 28.47, "step": 180, "train_speed(iter/s)": 0.401449 }, { "acc": 0.75952878, "epoch": 0.005009070478975442, "grad_norm": 14.232649803161621, "learning_rate": 5.860644757486248e-06, "loss": 1.44493847, "memory(GiB)": 28.47, "step": 185, "train_speed(iter/s)": 0.402245 }, { "acc": 0.75111537, "epoch": 0.005144450762190994, "grad_norm": 207.5111083984375, "learning_rate": 5.890583927859003e-06, "loss": 1.42119026, "memory(GiB)": 28.47, "step": 190, "train_speed(iter/s)": 0.403048 }, { "acc": 0.76825466, "epoch": 0.005279831045406547, "grad_norm": 46.204200744628906, "learning_rate": 5.919745368824597e-06, "loss": 1.37647629, "memory(GiB)": 28.47, "step": 195, "train_speed(iter/s)": 0.403804 }, { "acc": 0.74965258, "epoch": 0.0054152113286221, "grad_norm": 39.78371047973633, "learning_rate": 5.948168465564741e-06, "loss": 1.39622726, "memory(GiB)": 28.47, "step": 200, "train_speed(iter/s)": 0.404497 }, { "acc": 0.74271493, "epoch": 0.005550591611837652, "grad_norm": 39.89901351928711, "learning_rate": 5.975889685223983e-06, "loss": 1.46901474, "memory(GiB)": 28.47, "step": 205, "train_speed(iter/s)": 0.405146 }, { "acc": 0.75756416, "epoch": 0.005685971895053204, "grad_norm": 43.084041595458984, "learning_rate": 6.0029428582379044e-06, "loss": 1.36771832, "memory(GiB)": 28.47, "step": 210, "train_speed(iter/s)": 0.405784 }, { "acc": 0.7482583, "epoch": 0.005821352178268757, "grad_norm": 17.165063858032227, "learning_rate": 6.029359426554112e-06, "loss": 1.48745337, "memory(GiB)": 28.47, "step": 215, "train_speed(iter/s)": 0.406394 }, { "acc": 0.78638349, "epoch": 0.005956732461484309, "grad_norm": 122.8006362915039, "learning_rate": 6.055168663314079e-06, "loss": 1.20034819, "memory(GiB)": 28.47, "step": 220, "train_speed(iter/s)": 0.406957 }, { "acc": 0.74411774, "epoch": 0.006092112744699862, "grad_norm": 30.372440338134766, "learning_rate": 6.0803978678450046e-06, "loss": 1.41628666, "memory(GiB)": 28.47, "step": 225, "train_speed(iter/s)": 0.407573 }, { "acc": 0.7675015, "epoch": 0.0062274930279154145, "grad_norm": 28.823251724243164, "learning_rate": 6.105072539218529e-06, "loss": 1.20924282, "memory(GiB)": 28.47, "step": 230, "train_speed(iter/s)": 0.40813 }, { "acc": 0.74364758, "epoch": 0.006362873311130967, "grad_norm": 41.487979888916016, "learning_rate": 6.1292165311426374e-06, "loss": 1.49394474, "memory(GiB)": 28.47, "step": 235, "train_speed(iter/s)": 0.40863 }, { "acc": 0.76842074, "epoch": 0.00649825359434652, "grad_norm": 62.35819625854492, "learning_rate": 6.152852190545344e-06, "loss": 1.32590446, "memory(GiB)": 28.47, "step": 240, "train_speed(iter/s)": 0.409142 }, { "acc": 0.76771197, "epoch": 0.0066336338775620715, "grad_norm": 36.83146286010742, "learning_rate": 6.176000481868189e-06, "loss": 1.2897233, "memory(GiB)": 28.47, "step": 245, "train_speed(iter/s)": 0.40966 }, { "acc": 0.75754929, "epoch": 0.006769014160777624, "grad_norm": 22.896739959716797, "learning_rate": 6.198681098802125e-06, "loss": 1.25817184, "memory(GiB)": 28.47, "step": 250, "train_speed(iter/s)": 0.410106 }, { "acc": 0.75422621, "epoch": 0.006904394443993177, "grad_norm": 21.237205505371094, "learning_rate": 6.220912564958183e-06, "loss": 1.24382277, "memory(GiB)": 28.47, "step": 255, "train_speed(iter/s)": 0.410542 }, { "acc": 0.74912558, "epoch": 0.007039774727208729, "grad_norm": 21.332916259765625, "learning_rate": 6.242712324762321e-06, "loss": 1.36964283, "memory(GiB)": 28.47, "step": 260, "train_speed(iter/s)": 0.410995 }, { "acc": 0.76978259, "epoch": 0.007175155010424282, "grad_norm": 58.77531814575195, "learning_rate": 6.2640968256919875e-06, "loss": 1.22230873, "memory(GiB)": 28.47, "step": 265, "train_speed(iter/s)": 0.411348 }, { "acc": 0.74620876, "epoch": 0.007310535293639835, "grad_norm": 52.5131721496582, "learning_rate": 6.285081592825608e-06, "loss": 1.37934361, "memory(GiB)": 28.47, "step": 270, "train_speed(iter/s)": 0.411789 }, { "acc": 0.73467755, "epoch": 0.007445915576855386, "grad_norm": 26.394594192504883, "learning_rate": 6.305681296551464e-06, "loss": 1.38361931, "memory(GiB)": 28.47, "step": 275, "train_speed(iter/s)": 0.412138 }, { "acc": 0.75384245, "epoch": 0.007581295860070939, "grad_norm": 30.06842041015625, "learning_rate": 6.325909814175627e-06, "loss": 1.30312872, "memory(GiB)": 28.47, "step": 280, "train_speed(iter/s)": 0.412484 }, { "acc": 0.76500826, "epoch": 0.007716676143286492, "grad_norm": 63.801753997802734, "learning_rate": 6.345780286076989e-06, "loss": 1.3312067, "memory(GiB)": 28.47, "step": 285, "train_speed(iter/s)": 0.412839 }, { "acc": 0.77337275, "epoch": 0.007852056426502045, "grad_norm": 19.838594436645508, "learning_rate": 6.365305166978443e-06, "loss": 1.26238384, "memory(GiB)": 28.47, "step": 290, "train_speed(iter/s)": 0.413169 }, { "acc": 0.75004659, "epoch": 0.007987436709717596, "grad_norm": 24.770008087158203, "learning_rate": 6.38449627283527e-06, "loss": 1.33455553, "memory(GiB)": 28.47, "step": 295, "train_speed(iter/s)": 0.413476 }, { "acc": 0.75936427, "epoch": 0.008122816992933149, "grad_norm": 21.581491470336914, "learning_rate": 6.403364823782728e-06, "loss": 1.35005274, "memory(GiB)": 28.47, "step": 300, "train_speed(iter/s)": 0.413723 }, { "acc": 0.75189805, "epoch": 0.008258197276148701, "grad_norm": 19.883638381958008, "learning_rate": 6.421921483533755e-06, "loss": 1.31090031, "memory(GiB)": 28.47, "step": 305, "train_speed(iter/s)": 0.413999 }, { "acc": 0.76362214, "epoch": 0.008393577559364254, "grad_norm": 54.277462005615234, "learning_rate": 6.440176395573159e-06, "loss": 1.18956289, "memory(GiB)": 28.47, "step": 310, "train_speed(iter/s)": 0.414291 }, { "acc": 0.74582882, "epoch": 0.008528957842579806, "grad_norm": 85.86351013183594, "learning_rate": 6.458139216455891e-06, "loss": 1.34747524, "memory(GiB)": 28.47, "step": 315, "train_speed(iter/s)": 0.414604 }, { "acc": 0.7674891, "epoch": 0.008664338125795359, "grad_norm": 21.533084869384766, "learning_rate": 6.475819146483068e-06, "loss": 1.18806791, "memory(GiB)": 28.47, "step": 320, "train_speed(iter/s)": 0.414903 }, { "acc": 0.75977321, "epoch": 0.008799718409010912, "grad_norm": 107.82225036621094, "learning_rate": 6.493224957999705e-06, "loss": 1.24604883, "memory(GiB)": 28.47, "step": 325, "train_speed(iter/s)": 0.415204 }, { "acc": 0.74834051, "epoch": 0.008935098692226464, "grad_norm": 16.738231658935547, "learning_rate": 6.510365021532066e-06, "loss": 1.43375626, "memory(GiB)": 28.47, "step": 330, "train_speed(iter/s)": 0.415483 }, { "acc": 0.76733131, "epoch": 0.009070478975442017, "grad_norm": 41.9313850402832, "learning_rate": 6.527247329959555e-06, "loss": 1.24144087, "memory(GiB)": 28.47, "step": 335, "train_speed(iter/s)": 0.415781 }, { "acc": 0.76663637, "epoch": 0.00920585925865757, "grad_norm": 41.70188522338867, "learning_rate": 6.543879520895907e-06, "loss": 1.22832489, "memory(GiB)": 28.47, "step": 340, "train_speed(iter/s)": 0.416051 }, { "acc": 0.75811729, "epoch": 0.009341239541873122, "grad_norm": 69.4166488647461, "learning_rate": 6.560268897436516e-06, "loss": 1.29724388, "memory(GiB)": 28.47, "step": 345, "train_speed(iter/s)": 0.416288 }, { "acc": 0.75754604, "epoch": 0.009476619825088675, "grad_norm": 38.718135833740234, "learning_rate": 6.576422447413011e-06, "loss": 1.26762066, "memory(GiB)": 28.47, "step": 350, "train_speed(iter/s)": 0.416549 }, { "acc": 0.75170364, "epoch": 0.009612000108304226, "grad_norm": 26.602710723876953, "learning_rate": 6.592346861282103e-06, "loss": 1.27370529, "memory(GiB)": 28.47, "step": 355, "train_speed(iter/s)": 0.416808 }, { "acc": 0.76861525, "epoch": 0.009747380391519778, "grad_norm": 39.67243194580078, "learning_rate": 6.608048548763331e-06, "loss": 1.08505106, "memory(GiB)": 28.47, "step": 360, "train_speed(iter/s)": 0.417053 }, { "acc": 0.76969757, "epoch": 0.009882760674735331, "grad_norm": 83.77722930908203, "learning_rate": 6.623533654329287e-06, "loss": 1.22564735, "memory(GiB)": 28.47, "step": 365, "train_speed(iter/s)": 0.417293 }, { "acc": 0.76398106, "epoch": 0.010018140957950884, "grad_norm": 13.518319129943848, "learning_rate": 6.638808071641959e-06, "loss": 1.27384644, "memory(GiB)": 28.47, "step": 370, "train_speed(iter/s)": 0.417499 }, { "acc": 0.76734691, "epoch": 0.010153521241166436, "grad_norm": 15.39355182647705, "learning_rate": 6.653877457020112e-06, "loss": 1.26965418, "memory(GiB)": 28.47, "step": 375, "train_speed(iter/s)": 0.41768 }, { "acc": 0.75656462, "epoch": 0.010288901524381989, "grad_norm": 15.901528358459473, "learning_rate": 6.668747242014712e-06, "loss": 1.22897301, "memory(GiB)": 28.47, "step": 380, "train_speed(iter/s)": 0.417895 }, { "acc": 0.7697772, "epoch": 0.010424281807597541, "grad_norm": 37.60743713378906, "learning_rate": 6.683422645162349e-06, "loss": 1.18862276, "memory(GiB)": 28.47, "step": 385, "train_speed(iter/s)": 0.418113 }, { "acc": 0.7697865, "epoch": 0.010559662090813094, "grad_norm": 22.571388244628906, "learning_rate": 6.697908682980308e-06, "loss": 1.13993988, "memory(GiB)": 28.47, "step": 390, "train_speed(iter/s)": 0.418307 }, { "acc": 0.73849959, "epoch": 0.010695042374028647, "grad_norm": 16.837387084960938, "learning_rate": 6.712210180261264e-06, "loss": 1.33469086, "memory(GiB)": 28.47, "step": 395, "train_speed(iter/s)": 0.418462 }, { "acc": 0.75963402, "epoch": 0.0108304226572442, "grad_norm": 14.966313362121582, "learning_rate": 6.726331779720451e-06, "loss": 1.13547478, "memory(GiB)": 28.47, "step": 400, "train_speed(iter/s)": 0.418657 }, { "acc": 0.77997618, "epoch": 0.010965802940459752, "grad_norm": 58.32382583618164, "learning_rate": 6.740277951043595e-06, "loss": 1.0593791, "memory(GiB)": 28.47, "step": 405, "train_speed(iter/s)": 0.418813 }, { "acc": 0.75953102, "epoch": 0.011101183223675305, "grad_norm": 31.342849731445312, "learning_rate": 6.754052999379694e-06, "loss": 1.26877251, "memory(GiB)": 28.47, "step": 410, "train_speed(iter/s)": 0.418974 }, { "acc": 0.77483325, "epoch": 0.011236563506890857, "grad_norm": 17.4677677154541, "learning_rate": 6.767661073319063e-06, "loss": 1.09361496, "memory(GiB)": 28.47, "step": 415, "train_speed(iter/s)": 0.41912 }, { "acc": 0.7491847, "epoch": 0.011371943790106408, "grad_norm": 25.412336349487305, "learning_rate": 6.781106172393615e-06, "loss": 1.3030529, "memory(GiB)": 28.47, "step": 420, "train_speed(iter/s)": 0.419274 }, { "acc": 0.77331271, "epoch": 0.01150732407332196, "grad_norm": 24.650249481201172, "learning_rate": 6.7943921541332915e-06, "loss": 1.1505022, "memory(GiB)": 28.47, "step": 425, "train_speed(iter/s)": 0.41943 }, { "acc": 0.7754056, "epoch": 0.011642704356537513, "grad_norm": 27.79362678527832, "learning_rate": 6.807522740709823e-06, "loss": 1.139711, "memory(GiB)": 28.47, "step": 430, "train_speed(iter/s)": 0.419607 }, { "acc": 0.76574249, "epoch": 0.011778084639753066, "grad_norm": 39.20711898803711, "learning_rate": 6.82050152519643e-06, "loss": 1.20613575, "memory(GiB)": 28.47, "step": 435, "train_speed(iter/s)": 0.419754 }, { "acc": 0.7656105, "epoch": 0.011913464922968619, "grad_norm": 20.9473819732666, "learning_rate": 6.83333197746979e-06, "loss": 1.1315258, "memory(GiB)": 28.47, "step": 440, "train_speed(iter/s)": 0.419917 }, { "acc": 0.78529477, "epoch": 0.012048845206184171, "grad_norm": 49.961727142333984, "learning_rate": 6.846017449778547e-06, "loss": 1.13602877, "memory(GiB)": 28.47, "step": 445, "train_speed(iter/s)": 0.420088 }, { "acc": 0.7537159, "epoch": 0.012184225489399724, "grad_norm": 59.563514709472656, "learning_rate": 6.858561182000716e-06, "loss": 1.28552742, "memory(GiB)": 28.47, "step": 450, "train_speed(iter/s)": 0.420191 }, { "acc": 0.77353191, "epoch": 0.012319605772615276, "grad_norm": 25.0849552154541, "learning_rate": 6.870966306610592e-06, "loss": 1.16485262, "memory(GiB)": 28.47, "step": 455, "train_speed(iter/s)": 0.42032 }, { "acc": 0.76384287, "epoch": 0.012454986055830829, "grad_norm": 17.37344741821289, "learning_rate": 6.8832358533742394e-06, "loss": 1.21540966, "memory(GiB)": 28.47, "step": 460, "train_speed(iter/s)": 0.420469 }, { "acc": 0.77975698, "epoch": 0.012590366339046382, "grad_norm": 17.187837600708008, "learning_rate": 6.8953727537911455e-06, "loss": 1.13869686, "memory(GiB)": 28.47, "step": 465, "train_speed(iter/s)": 0.420566 }, { "acc": 0.7616106, "epoch": 0.012725746622261934, "grad_norm": 102.61223602294922, "learning_rate": 6.907379845298348e-06, "loss": 1.34731731, "memory(GiB)": 28.47, "step": 470, "train_speed(iter/s)": 0.420705 }, { "acc": 0.79113207, "epoch": 0.012861126905477487, "grad_norm": 54.45975112915039, "learning_rate": 6.919259875252097e-06, "loss": 1.1477973, "memory(GiB)": 28.47, "step": 475, "train_speed(iter/s)": 0.420853 }, { "acc": 0.77367859, "epoch": 0.01299650718869304, "grad_norm": 38.33256530761719, "learning_rate": 6.931015504701055e-06, "loss": 1.18279581, "memory(GiB)": 28.47, "step": 480, "train_speed(iter/s)": 0.421008 }, { "acc": 0.75678458, "epoch": 0.01313188747190859, "grad_norm": 32.562686920166016, "learning_rate": 6.942649311963975e-06, "loss": 1.14016075, "memory(GiB)": 28.47, "step": 485, "train_speed(iter/s)": 0.421153 }, { "acc": 0.79390364, "epoch": 0.013267267755124143, "grad_norm": 45.491355895996094, "learning_rate": 6.954163796023898e-06, "loss": 1.04894772, "memory(GiB)": 28.47, "step": 490, "train_speed(iter/s)": 0.42128 }, { "acc": 0.7674758, "epoch": 0.013402648038339696, "grad_norm": 16.359416961669922, "learning_rate": 6.965561379750053e-06, "loss": 1.26071377, "memory(GiB)": 28.47, "step": 495, "train_speed(iter/s)": 0.421389 }, { "acc": 0.77343121, "epoch": 0.013538028321555248, "grad_norm": 53.38888168334961, "learning_rate": 6.976844412957835e-06, "loss": 1.13966236, "memory(GiB)": 28.47, "step": 500, "train_speed(iter/s)": 0.421514 }, { "acc": 0.76468163, "epoch": 0.013673408604770801, "grad_norm": 37.058990478515625, "learning_rate": 6.988015175316549e-06, "loss": 1.24177761, "memory(GiB)": 28.47, "step": 505, "train_speed(iter/s)": 0.421641 }, { "acc": 0.77173214, "epoch": 0.013808788887986354, "grad_norm": 76.59449768066406, "learning_rate": 6.999075879113894e-06, "loss": 1.17179003, "memory(GiB)": 28.47, "step": 510, "train_speed(iter/s)": 0.42177 }, { "acc": 0.79152455, "epoch": 0.013944169171201906, "grad_norm": 39.969993591308594, "learning_rate": 7.010028671885604e-06, "loss": 1.03893204, "memory(GiB)": 28.47, "step": 515, "train_speed(iter/s)": 0.42189 }, { "acc": 0.79214258, "epoch": 0.014079549454417459, "grad_norm": 38.0523796081543, "learning_rate": 7.020875638918032e-06, "loss": 1.07199326, "memory(GiB)": 28.47, "step": 520, "train_speed(iter/s)": 0.422019 }, { "acc": 0.78158331, "epoch": 0.014214929737633011, "grad_norm": 22.40958595275879, "learning_rate": 7.031618805630999e-06, "loss": 1.20070496, "memory(GiB)": 28.47, "step": 525, "train_speed(iter/s)": 0.422151 }, { "acc": 0.7680625, "epoch": 0.014350310020848564, "grad_norm": 17.328720092773438, "learning_rate": 7.042260139847698e-06, "loss": 1.23607855, "memory(GiB)": 28.47, "step": 530, "train_speed(iter/s)": 0.422233 }, { "acc": 0.76291323, "epoch": 0.014485690304064117, "grad_norm": 36.72405242919922, "learning_rate": 7.052801553958031e-06, "loss": 1.28600845, "memory(GiB)": 28.47, "step": 535, "train_speed(iter/s)": 0.422302 }, { "acc": 0.77962103, "epoch": 0.01462107058727967, "grad_norm": 18.083690643310547, "learning_rate": 7.0632449069813195e-06, "loss": 1.11225424, "memory(GiB)": 28.47, "step": 540, "train_speed(iter/s)": 0.422432 }, { "acc": 0.76176562, "epoch": 0.014756450870495222, "grad_norm": 27.582637786865234, "learning_rate": 7.073592006533947e-06, "loss": 1.29641323, "memory(GiB)": 28.47, "step": 545, "train_speed(iter/s)": 0.422538 }, { "acc": 0.77830443, "epoch": 0.014891831153710773, "grad_norm": 24.782081604003906, "learning_rate": 7.083844610707172e-06, "loss": 1.16086922, "memory(GiB)": 28.47, "step": 550, "train_speed(iter/s)": 0.422648 }, { "acc": 0.78323164, "epoch": 0.015027211436926325, "grad_norm": 41.240413665771484, "learning_rate": 7.094004429859946e-06, "loss": 1.18874416, "memory(GiB)": 28.47, "step": 555, "train_speed(iter/s)": 0.422747 }, { "acc": 0.76567259, "epoch": 0.015162591720141878, "grad_norm": 17.56224822998047, "learning_rate": 7.104073128331338e-06, "loss": 1.28132782, "memory(GiB)": 28.47, "step": 560, "train_speed(iter/s)": 0.422816 }, { "acc": 0.74983554, "epoch": 0.01529797200335743, "grad_norm": 24.24003028869629, "learning_rate": 7.114052326076863e-06, "loss": 1.32913857, "memory(GiB)": 28.47, "step": 565, "train_speed(iter/s)": 0.422887 }, { "acc": 0.80167103, "epoch": 0.015433352286572983, "grad_norm": 32.42514419555664, "learning_rate": 7.1239436002327e-06, "loss": 1.00016956, "memory(GiB)": 28.47, "step": 570, "train_speed(iter/s)": 0.422966 }, { "acc": 0.80483799, "epoch": 0.015568732569788536, "grad_norm": 25.822864532470703, "learning_rate": 7.133748486611624e-06, "loss": 1.07820063, "memory(GiB)": 28.47, "step": 575, "train_speed(iter/s)": 0.423042 }, { "acc": 0.76155329, "epoch": 0.01570411285300409, "grad_norm": 34.02480697631836, "learning_rate": 7.143468481134154e-06, "loss": 1.17952385, "memory(GiB)": 28.47, "step": 580, "train_speed(iter/s)": 0.423144 }, { "acc": 0.79444656, "epoch": 0.01583949313621964, "grad_norm": 13.716586112976074, "learning_rate": 7.1531050411982965e-06, "loss": 1.08870993, "memory(GiB)": 28.47, "step": 585, "train_speed(iter/s)": 0.423236 }, { "acc": 0.80175896, "epoch": 0.015974873419435192, "grad_norm": 15.257658958435059, "learning_rate": 7.16265958699098e-06, "loss": 1.0459384, "memory(GiB)": 28.47, "step": 590, "train_speed(iter/s)": 0.423334 }, { "acc": 0.78752136, "epoch": 0.016110253702650745, "grad_norm": 19.341657638549805, "learning_rate": 7.172133502744178e-06, "loss": 1.14620323, "memory(GiB)": 28.47, "step": 595, "train_speed(iter/s)": 0.423405 }, { "acc": 0.76781006, "epoch": 0.016245633985866297, "grad_norm": 29.85979652404785, "learning_rate": 7.1815281379384385e-06, "loss": 1.19547615, "memory(GiB)": 28.47, "step": 600, "train_speed(iter/s)": 0.423486 }, { "acc": 0.78458171, "epoch": 0.01638101426908185, "grad_norm": 16.517009735107422, "learning_rate": 7.1908448084565115e-06, "loss": 1.15113077, "memory(GiB)": 28.47, "step": 605, "train_speed(iter/s)": 0.423572 }, { "acc": 0.80624981, "epoch": 0.016516394552297402, "grad_norm": 19.436080932617188, "learning_rate": 7.200084797689466e-06, "loss": 1.01262512, "memory(GiB)": 28.47, "step": 610, "train_speed(iter/s)": 0.423675 }, { "acc": 0.8099762, "epoch": 0.016651774835512955, "grad_norm": 39.589202880859375, "learning_rate": 7.209249357597681e-06, "loss": 0.99961205, "memory(GiB)": 28.47, "step": 615, "train_speed(iter/s)": 0.423744 }, { "acc": 0.79506359, "epoch": 0.016787155118728508, "grad_norm": 22.99797821044922, "learning_rate": 7.21833970972887e-06, "loss": 0.9299036, "memory(GiB)": 28.47, "step": 620, "train_speed(iter/s)": 0.423809 }, { "acc": 0.79485745, "epoch": 0.01692253540194406, "grad_norm": 29.70042610168457, "learning_rate": 7.227357046195219e-06, "loss": 0.99421177, "memory(GiB)": 28.47, "step": 625, "train_speed(iter/s)": 0.423891 }, { "acc": 0.76939774, "epoch": 0.017057915685159613, "grad_norm": 21.1697998046875, "learning_rate": 7.236302530611602e-06, "loss": 1.20224934, "memory(GiB)": 28.47, "step": 630, "train_speed(iter/s)": 0.423953 }, { "acc": 0.78637896, "epoch": 0.017193295968375166, "grad_norm": 17.24368667602539, "learning_rate": 7.245177298996697e-06, "loss": 1.12994871, "memory(GiB)": 28.47, "step": 635, "train_speed(iter/s)": 0.424024 }, { "acc": 0.80436211, "epoch": 0.017328676251590718, "grad_norm": 28.434371948242188, "learning_rate": 7.253982460638778e-06, "loss": 0.9308053, "memory(GiB)": 28.47, "step": 640, "train_speed(iter/s)": 0.424104 }, { "acc": 0.77594805, "epoch": 0.01746405653480627, "grad_norm": 42.99869918823242, "learning_rate": 7.262719098927811e-06, "loss": 1.14034386, "memory(GiB)": 28.47, "step": 645, "train_speed(iter/s)": 0.424182 }, { "acc": 0.81325369, "epoch": 0.017599436818021823, "grad_norm": 14.943174362182617, "learning_rate": 7.2713882721554155e-06, "loss": 0.95113239, "memory(GiB)": 28.47, "step": 650, "train_speed(iter/s)": 0.424269 }, { "acc": 0.78444023, "epoch": 0.017734817101237376, "grad_norm": 14.768656730651855, "learning_rate": 7.279991014284191e-06, "loss": 1.03135843, "memory(GiB)": 28.47, "step": 655, "train_speed(iter/s)": 0.424317 }, { "acc": 0.79550686, "epoch": 0.01787019738445293, "grad_norm": 20.68779182434082, "learning_rate": 7.288528335687777e-06, "loss": 0.98707142, "memory(GiB)": 28.47, "step": 660, "train_speed(iter/s)": 0.424372 }, { "acc": 0.77247601, "epoch": 0.01800557766766848, "grad_norm": 24.57453155517578, "learning_rate": 7.297001223862983e-06, "loss": 1.20001631, "memory(GiB)": 28.47, "step": 665, "train_speed(iter/s)": 0.424438 }, { "acc": 0.80776329, "epoch": 0.018140957950884034, "grad_norm": 19.007492065429688, "learning_rate": 7.305410644115266e-06, "loss": 0.90276117, "memory(GiB)": 28.47, "step": 670, "train_speed(iter/s)": 0.424519 }, { "acc": 0.80622234, "epoch": 0.018276338234099587, "grad_norm": 23.66937828063965, "learning_rate": 7.313757540218702e-06, "loss": 0.87083912, "memory(GiB)": 28.47, "step": 675, "train_speed(iter/s)": 0.424558 }, { "acc": 0.79168367, "epoch": 0.01841171851731514, "grad_norm": 15.131926536560059, "learning_rate": 7.322042835051617e-06, "loss": 1.059653, "memory(GiB)": 28.47, "step": 680, "train_speed(iter/s)": 0.424634 }, { "acc": 0.79028511, "epoch": 0.018547098800530692, "grad_norm": 25.595077514648438, "learning_rate": 7.330267431208929e-06, "loss": 1.07788153, "memory(GiB)": 28.47, "step": 685, "train_speed(iter/s)": 0.424691 }, { "acc": 0.75685644, "epoch": 0.018682479083746244, "grad_norm": 33.01803970336914, "learning_rate": 7.338432211592226e-06, "loss": 1.15561705, "memory(GiB)": 28.47, "step": 690, "train_speed(iter/s)": 0.424767 }, { "acc": 0.79497108, "epoch": 0.018817859366961797, "grad_norm": 22.528541564941406, "learning_rate": 7.346538039978538e-06, "loss": 1.03493834, "memory(GiB)": 28.47, "step": 695, "train_speed(iter/s)": 0.424822 }, { "acc": 0.80516987, "epoch": 0.01895323965017735, "grad_norm": 20.92061996459961, "learning_rate": 7.354585761568722e-06, "loss": 1.04469986, "memory(GiB)": 28.47, "step": 700, "train_speed(iter/s)": 0.42489 }, { "acc": 0.81441479, "epoch": 0.019088619933392902, "grad_norm": 21.621543884277344, "learning_rate": 7.362576203516335e-06, "loss": 0.98144779, "memory(GiB)": 28.47, "step": 705, "train_speed(iter/s)": 0.424945 }, { "acc": 0.78062983, "epoch": 0.01922400021660845, "grad_norm": 15.866767883300781, "learning_rate": 7.370510175437813e-06, "loss": 1.11809158, "memory(GiB)": 28.47, "step": 710, "train_speed(iter/s)": 0.425018 }, { "acc": 0.79208069, "epoch": 0.019359380499824004, "grad_norm": 38.72313690185547, "learning_rate": 7.378388469904754e-06, "loss": 1.10249424, "memory(GiB)": 28.47, "step": 715, "train_speed(iter/s)": 0.425079 }, { "acc": 0.79539213, "epoch": 0.019494760783039557, "grad_norm": 25.780410766601562, "learning_rate": 7.3862118629190415e-06, "loss": 0.9623313, "memory(GiB)": 28.47, "step": 720, "train_speed(iter/s)": 0.425149 }, { "acc": 0.77808414, "epoch": 0.01963014106625511, "grad_norm": 22.95226287841797, "learning_rate": 7.393981114371538e-06, "loss": 1.13494244, "memory(GiB)": 28.47, "step": 725, "train_speed(iter/s)": 0.42519 }, { "acc": 0.77497969, "epoch": 0.019765521349470662, "grad_norm": 42.45054626464844, "learning_rate": 7.401696968484998e-06, "loss": 1.1507, "memory(GiB)": 28.47, "step": 730, "train_speed(iter/s)": 0.425222 }, { "acc": 0.79057803, "epoch": 0.019900901632686215, "grad_norm": 41.002784729003906, "learning_rate": 7.4093601542418845e-06, "loss": 1.10156364, "memory(GiB)": 28.47, "step": 735, "train_speed(iter/s)": 0.4253 }, { "acc": 0.80164042, "epoch": 0.020036281915901767, "grad_norm": 21.678186416625977, "learning_rate": 7.416971385797669e-06, "loss": 1.01343136, "memory(GiB)": 28.47, "step": 740, "train_speed(iter/s)": 0.425354 }, { "acc": 0.78197284, "epoch": 0.02017166219911732, "grad_norm": 244.8945770263672, "learning_rate": 7.424531362880217e-06, "loss": 1.06342964, "memory(GiB)": 28.47, "step": 745, "train_speed(iter/s)": 0.425414 }, { "acc": 0.77977467, "epoch": 0.020307042482332872, "grad_norm": 45.05595779418945, "learning_rate": 7.432040771175823e-06, "loss": 1.06586714, "memory(GiB)": 28.47, "step": 750, "train_speed(iter/s)": 0.425472 }, { "acc": 0.81025829, "epoch": 0.020442422765548425, "grad_norm": 18.224040985107422, "learning_rate": 7.43950028270241e-06, "loss": 0.98478146, "memory(GiB)": 28.47, "step": 755, "train_speed(iter/s)": 0.425541 }, { "acc": 0.80513821, "epoch": 0.020577803048763978, "grad_norm": 39.37622833251953, "learning_rate": 7.4469105561704234e-06, "loss": 0.98765774, "memory(GiB)": 28.47, "step": 760, "train_speed(iter/s)": 0.42559 }, { "acc": 0.79451046, "epoch": 0.02071318333197953, "grad_norm": 10.971508979797363, "learning_rate": 7.454272237331881e-06, "loss": 1.03286457, "memory(GiB)": 28.47, "step": 765, "train_speed(iter/s)": 0.425647 }, { "acc": 0.81049929, "epoch": 0.020848563615195083, "grad_norm": 27.96160888671875, "learning_rate": 7.4615859593180594e-06, "loss": 0.90298634, "memory(GiB)": 28.47, "step": 770, "train_speed(iter/s)": 0.425713 }, { "acc": 0.80541582, "epoch": 0.020983943898410636, "grad_norm": 32.70130157470703, "learning_rate": 7.468852342966254e-06, "loss": 1.02989578, "memory(GiB)": 28.47, "step": 775, "train_speed(iter/s)": 0.425779 }, { "acc": 0.78514581, "epoch": 0.021119324181626188, "grad_norm": 74.41635131835938, "learning_rate": 7.4760719971360185e-06, "loss": 1.1624321, "memory(GiB)": 28.47, "step": 780, "train_speed(iter/s)": 0.425835 }, { "acc": 0.8126421, "epoch": 0.02125470446484174, "grad_norm": 17.713659286499023, "learning_rate": 7.483245519015324e-06, "loss": 0.89516535, "memory(GiB)": 28.47, "step": 785, "train_speed(iter/s)": 0.425894 }, { "acc": 0.79903927, "epoch": 0.021390084748057293, "grad_norm": 25.11139678955078, "learning_rate": 7.490373494416974e-06, "loss": 0.96257515, "memory(GiB)": 28.47, "step": 790, "train_speed(iter/s)": 0.425943 }, { "acc": 0.79704456, "epoch": 0.021525465031272846, "grad_norm": 27.742008209228516, "learning_rate": 7.497456498065685e-06, "loss": 1.06225166, "memory(GiB)": 28.47, "step": 795, "train_speed(iter/s)": 0.426 }, { "acc": 0.76908655, "epoch": 0.0216608453144884, "grad_norm": 81.78112030029297, "learning_rate": 7.504495093876162e-06, "loss": 1.18189707, "memory(GiB)": 28.47, "step": 800, "train_speed(iter/s)": 0.426052 }, { "acc": 0.78753891, "epoch": 0.02179622559770395, "grad_norm": 19.599119186401367, "learning_rate": 7.511489835222509e-06, "loss": 1.1021574, "memory(GiB)": 28.47, "step": 805, "train_speed(iter/s)": 0.426095 }, { "acc": 0.78447847, "epoch": 0.021931605880919504, "grad_norm": 56.795166015625, "learning_rate": 7.518441265199305e-06, "loss": 1.06969652, "memory(GiB)": 28.47, "step": 810, "train_speed(iter/s)": 0.426151 }, { "acc": 0.78868442, "epoch": 0.022066986164135056, "grad_norm": 76.65985870361328, "learning_rate": 7.525349916874637e-06, "loss": 1.1628191, "memory(GiB)": 28.47, "step": 815, "train_speed(iter/s)": 0.426201 }, { "acc": 0.82130451, "epoch": 0.02220236644735061, "grad_norm": 15.295308113098145, "learning_rate": 7.5322163135354034e-06, "loss": 0.801052, "memory(GiB)": 28.47, "step": 820, "train_speed(iter/s)": 0.42626 }, { "acc": 0.77923732, "epoch": 0.02233774673056616, "grad_norm": 10.123788833618164, "learning_rate": 7.5390409689251596e-06, "loss": 1.14641132, "memory(GiB)": 28.47, "step": 825, "train_speed(iter/s)": 0.426303 }, { "acc": 0.80167389, "epoch": 0.022473127013781714, "grad_norm": 18.57280158996582, "learning_rate": 7.545824387474774e-06, "loss": 0.97405624, "memory(GiB)": 28.47, "step": 830, "train_speed(iter/s)": 0.426354 }, { "acc": 0.79655046, "epoch": 0.022608507296997267, "grad_norm": 40.92021179199219, "learning_rate": 7.5525670645261585e-06, "loss": 0.95968266, "memory(GiB)": 28.47, "step": 835, "train_speed(iter/s)": 0.426408 }, { "acc": 0.78492632, "epoch": 0.022743887580212816, "grad_norm": 19.118680953979492, "learning_rate": 7.559269486549326e-06, "loss": 1.18246441, "memory(GiB)": 28.47, "step": 840, "train_speed(iter/s)": 0.426442 }, { "acc": 0.79238987, "epoch": 0.02287926786342837, "grad_norm": 17.6538028717041, "learning_rate": 7.5659321313529955e-06, "loss": 1.00086498, "memory(GiB)": 28.47, "step": 845, "train_speed(iter/s)": 0.42649 }, { "acc": 0.77588592, "epoch": 0.02301464814664392, "grad_norm": 22.939674377441406, "learning_rate": 7.572555468289001e-06, "loss": 1.16232967, "memory(GiB)": 28.47, "step": 850, "train_speed(iter/s)": 0.426516 }, { "acc": 0.78449984, "epoch": 0.023150028429859474, "grad_norm": 46.1162223815918, "learning_rate": 7.579139958450686e-06, "loss": 1.0483532, "memory(GiB)": 28.47, "step": 855, "train_speed(iter/s)": 0.426552 }, { "acc": 0.77803893, "epoch": 0.023285408713075027, "grad_norm": 16.84689712524414, "learning_rate": 7.5856860548655335e-06, "loss": 1.09508858, "memory(GiB)": 28.47, "step": 860, "train_speed(iter/s)": 0.426591 }, { "acc": 0.76909938, "epoch": 0.02342078899629058, "grad_norm": 51.22965621948242, "learning_rate": 7.592194202682193e-06, "loss": 1.21036787, "memory(GiB)": 28.47, "step": 865, "train_speed(iter/s)": 0.426614 }, { "acc": 0.79761615, "epoch": 0.023556169279506132, "grad_norm": 36.69175720214844, "learning_rate": 7.598664839352141e-06, "loss": 1.13658905, "memory(GiB)": 28.47, "step": 870, "train_speed(iter/s)": 0.42665 }, { "acc": 0.81051617, "epoch": 0.023691549562721684, "grad_norm": 18.133420944213867, "learning_rate": 7.605098394806105e-06, "loss": 0.95561237, "memory(GiB)": 28.47, "step": 875, "train_speed(iter/s)": 0.426697 }, { "acc": 0.81950779, "epoch": 0.023826929845937237, "grad_norm": 14.756786346435547, "learning_rate": 7.611495291625499e-06, "loss": 0.91510944, "memory(GiB)": 28.47, "step": 880, "train_speed(iter/s)": 0.426749 }, { "acc": 0.80308943, "epoch": 0.02396231012915279, "grad_norm": 14.389519691467285, "learning_rate": 7.617855945208967e-06, "loss": 0.98055344, "memory(GiB)": 28.47, "step": 885, "train_speed(iter/s)": 0.426794 }, { "acc": 0.83372078, "epoch": 0.024097690412368342, "grad_norm": 24.47084617614746, "learning_rate": 7.6241807639342566e-06, "loss": 0.88399591, "memory(GiB)": 28.47, "step": 890, "train_speed(iter/s)": 0.426844 }, { "acc": 0.81275864, "epoch": 0.024233070695583895, "grad_norm": 17.488962173461914, "learning_rate": 7.630470149315553e-06, "loss": 0.91319618, "memory(GiB)": 28.47, "step": 895, "train_speed(iter/s)": 0.426888 }, { "acc": 0.78720875, "epoch": 0.024368450978799448, "grad_norm": 12.831904411315918, "learning_rate": 7.636724496156425e-06, "loss": 1.11109324, "memory(GiB)": 28.47, "step": 900, "train_speed(iter/s)": 0.426927 }, { "acc": 0.8114646, "epoch": 0.024503831262015, "grad_norm": 20.11437225341797, "learning_rate": 7.642944192698547e-06, "loss": 1.02055931, "memory(GiB)": 28.47, "step": 905, "train_speed(iter/s)": 0.426961 }, { "acc": 0.76755886, "epoch": 0.024639211545230553, "grad_norm": 28.004690170288086, "learning_rate": 7.649129620766302e-06, "loss": 1.15707369, "memory(GiB)": 28.47, "step": 910, "train_speed(iter/s)": 0.427017 }, { "acc": 0.76810212, "epoch": 0.024774591828446105, "grad_norm": 20.91655921936035, "learning_rate": 7.655281155907453e-06, "loss": 1.13500509, "memory(GiB)": 28.47, "step": 915, "train_speed(iter/s)": 0.427057 }, { "acc": 0.79321957, "epoch": 0.024909972111661658, "grad_norm": 11.828251838684082, "learning_rate": 7.66139916752995e-06, "loss": 1.02939148, "memory(GiB)": 28.47, "step": 920, "train_speed(iter/s)": 0.427092 }, { "acc": 0.81636257, "epoch": 0.02504535239487721, "grad_norm": 23.867473602294922, "learning_rate": 7.667484019035053e-06, "loss": 0.94572353, "memory(GiB)": 28.47, "step": 925, "train_speed(iter/s)": 0.427124 }, { "acc": 0.78336954, "epoch": 0.025180732678092763, "grad_norm": 16.65552520751953, "learning_rate": 7.673536067946856e-06, "loss": 1.27387218, "memory(GiB)": 28.47, "step": 930, "train_speed(iter/s)": 0.427157 }, { "acc": 0.80431423, "epoch": 0.025316112961308316, "grad_norm": 16.972578048706055, "learning_rate": 7.67955566603834e-06, "loss": 1.03075695, "memory(GiB)": 28.47, "step": 935, "train_speed(iter/s)": 0.42718 }, { "acc": 0.79382749, "epoch": 0.02545149324452387, "grad_norm": 30.189964294433594, "learning_rate": 7.685543159454058e-06, "loss": 1.13227673, "memory(GiB)": 28.47, "step": 940, "train_speed(iter/s)": 0.427227 }, { "acc": 0.81378155, "epoch": 0.02558687352773942, "grad_norm": 23.365163803100586, "learning_rate": 7.69149888882959e-06, "loss": 0.96492167, "memory(GiB)": 28.47, "step": 945, "train_speed(iter/s)": 0.427264 }, { "acc": 0.78788953, "epoch": 0.025722253810954974, "grad_norm": 18.80040740966797, "learning_rate": 7.697423189407807e-06, "loss": 1.06606579, "memory(GiB)": 28.47, "step": 950, "train_speed(iter/s)": 0.427289 }, { "acc": 0.79977112, "epoch": 0.025857634094170526, "grad_norm": 25.770723342895508, "learning_rate": 7.703316391152124e-06, "loss": 0.99106493, "memory(GiB)": 28.47, "step": 955, "train_speed(iter/s)": 0.427325 }, { "acc": 0.80761814, "epoch": 0.02599301437738608, "grad_norm": 8.442838668823242, "learning_rate": 7.709178818856764e-06, "loss": 0.9422925, "memory(GiB)": 28.47, "step": 960, "train_speed(iter/s)": 0.427347 }, { "acc": 0.77915807, "epoch": 0.02612839466060163, "grad_norm": 11.236177444458008, "learning_rate": 7.71501079225418e-06, "loss": 1.16840611, "memory(GiB)": 28.47, "step": 965, "train_speed(iter/s)": 0.427388 }, { "acc": 0.81112919, "epoch": 0.02626377494381718, "grad_norm": 26.89558219909668, "learning_rate": 7.720812626119685e-06, "loss": 0.90880318, "memory(GiB)": 28.47, "step": 970, "train_speed(iter/s)": 0.427431 }, { "acc": 0.80742359, "epoch": 0.026399155227032733, "grad_norm": 19.842374801635742, "learning_rate": 7.726584630373403e-06, "loss": 0.97042389, "memory(GiB)": 28.47, "step": 975, "train_speed(iter/s)": 0.427468 }, { "acc": 0.81765594, "epoch": 0.026534535510248286, "grad_norm": 7.770326137542725, "learning_rate": 7.732327110179609e-06, "loss": 0.90652046, "memory(GiB)": 28.47, "step": 980, "train_speed(iter/s)": 0.427504 }, { "acc": 0.79544406, "epoch": 0.02666991579346384, "grad_norm": 15.299177169799805, "learning_rate": 7.738040366043542e-06, "loss": 1.0150156, "memory(GiB)": 28.47, "step": 985, "train_speed(iter/s)": 0.427528 }, { "acc": 0.80652199, "epoch": 0.02680529607667939, "grad_norm": 11.649066925048828, "learning_rate": 7.743724693905764e-06, "loss": 1.0278224, "memory(GiB)": 28.47, "step": 990, "train_speed(iter/s)": 0.427567 }, { "acc": 0.81058636, "epoch": 0.026940676359894944, "grad_norm": 60.56632614135742, "learning_rate": 7.749380385234145e-06, "loss": 0.96785221, "memory(GiB)": 28.47, "step": 995, "train_speed(iter/s)": 0.427599 }, { "acc": 0.7982049, "epoch": 0.027076056643110497, "grad_norm": 15.370145797729492, "learning_rate": 7.755007727113547e-06, "loss": 0.91359243, "memory(GiB)": 28.47, "step": 1000, "train_speed(iter/s)": 0.42763 }, { "acc": 0.76521292, "epoch": 0.02721143692632605, "grad_norm": 17.734455108642578, "learning_rate": 7.760607002333253e-06, "loss": 1.27961559, "memory(GiB)": 28.47, "step": 1005, "train_speed(iter/s)": 0.427662 }, { "acc": 0.79076052, "epoch": 0.027346817209541602, "grad_norm": 20.778533935546875, "learning_rate": 7.766178489472258e-06, "loss": 1.02582102, "memory(GiB)": 28.47, "step": 1010, "train_speed(iter/s)": 0.427701 }, { "acc": 0.81170082, "epoch": 0.027482197492757154, "grad_norm": 30.491487503051758, "learning_rate": 7.771722462982424e-06, "loss": 0.87872925, "memory(GiB)": 28.47, "step": 1015, "train_speed(iter/s)": 0.427743 }, { "acc": 0.80240698, "epoch": 0.027617577775972707, "grad_norm": 38.56878662109375, "learning_rate": 7.777239193269604e-06, "loss": 0.89741421, "memory(GiB)": 28.47, "step": 1020, "train_speed(iter/s)": 0.427773 }, { "acc": 0.79378266, "epoch": 0.02775295805918826, "grad_norm": 18.590185165405273, "learning_rate": 7.782728946772789e-06, "loss": 1.06796255, "memory(GiB)": 28.47, "step": 1025, "train_speed(iter/s)": 0.427801 }, { "acc": 0.78933964, "epoch": 0.027888338342403812, "grad_norm": 18.02821922302246, "learning_rate": 7.788191986041315e-06, "loss": 1.15914679, "memory(GiB)": 28.47, "step": 1030, "train_speed(iter/s)": 0.427831 }, { "acc": 0.80880089, "epoch": 0.028023718625619365, "grad_norm": 15.773004531860352, "learning_rate": 7.793628569810213e-06, "loss": 1.13652916, "memory(GiB)": 28.47, "step": 1035, "train_speed(iter/s)": 0.427851 }, { "acc": 0.80159311, "epoch": 0.028159098908834918, "grad_norm": 21.16741943359375, "learning_rate": 7.799038953073742e-06, "loss": 1.02817707, "memory(GiB)": 28.47, "step": 1040, "train_speed(iter/s)": 0.427892 }, { "acc": 0.81581783, "epoch": 0.02829447919205047, "grad_norm": 10.029458999633789, "learning_rate": 7.804423387157145e-06, "loss": 0.89275341, "memory(GiB)": 28.47, "step": 1045, "train_speed(iter/s)": 0.427926 }, { "acc": 0.765977, "epoch": 0.028429859475266023, "grad_norm": 15.955931663513184, "learning_rate": 7.80978211978671e-06, "loss": 1.15783424, "memory(GiB)": 28.47, "step": 1050, "train_speed(iter/s)": 0.427958 }, { "acc": 0.76727657, "epoch": 0.028565239758481575, "grad_norm": 14.81109619140625, "learning_rate": 7.815115395158156e-06, "loss": 1.23452311, "memory(GiB)": 28.47, "step": 1055, "train_speed(iter/s)": 0.427979 }, { "acc": 0.79773278, "epoch": 0.028700620041697128, "grad_norm": 17.577922821044922, "learning_rate": 7.820423454003409e-06, "loss": 0.91418371, "memory(GiB)": 28.47, "step": 1060, "train_speed(iter/s)": 0.428002 }, { "acc": 0.82426958, "epoch": 0.02883600032491268, "grad_norm": 14.451324462890625, "learning_rate": 7.825706533655799e-06, "loss": 0.85408554, "memory(GiB)": 28.47, "step": 1065, "train_speed(iter/s)": 0.428035 }, { "acc": 0.82300844, "epoch": 0.028971380608128233, "grad_norm": 21.015043258666992, "learning_rate": 7.830964868113742e-06, "loss": 0.90582275, "memory(GiB)": 28.47, "step": 1070, "train_speed(iter/s)": 0.428073 }, { "acc": 0.81144609, "epoch": 0.029106760891343786, "grad_norm": 27.500064849853516, "learning_rate": 7.836198688102918e-06, "loss": 0.89745388, "memory(GiB)": 28.47, "step": 1075, "train_speed(iter/s)": 0.428086 }, { "acc": 0.77503033, "epoch": 0.02924214117455934, "grad_norm": 196.80166625976562, "learning_rate": 7.841408221137029e-06, "loss": 1.16802731, "memory(GiB)": 28.47, "step": 1080, "train_speed(iter/s)": 0.428116 }, { "acc": 0.8030715, "epoch": 0.02937752145777489, "grad_norm": 25.680017471313477, "learning_rate": 7.84659369157714e-06, "loss": 1.05915747, "memory(GiB)": 28.47, "step": 1085, "train_speed(iter/s)": 0.428142 }, { "acc": 0.81266222, "epoch": 0.029512901740990444, "grad_norm": 14.353148460388184, "learning_rate": 7.851755320689657e-06, "loss": 0.94887695, "memory(GiB)": 28.47, "step": 1090, "train_speed(iter/s)": 0.428172 }, { "acc": 0.83175678, "epoch": 0.029648282024205996, "grad_norm": 13.620509147644043, "learning_rate": 7.856893326702986e-06, "loss": 0.8778285, "memory(GiB)": 28.47, "step": 1095, "train_speed(iter/s)": 0.428201 }, { "acc": 0.79896793, "epoch": 0.029783662307421545, "grad_norm": 58.333255767822266, "learning_rate": 7.862007924862884e-06, "loss": 0.99709187, "memory(GiB)": 28.47, "step": 1100, "train_speed(iter/s)": 0.42823 }, { "acc": 0.77567477, "epoch": 0.029919042590637098, "grad_norm": 34.77915573120117, "learning_rate": 7.86709932748658e-06, "loss": 1.16404696, "memory(GiB)": 28.47, "step": 1105, "train_speed(iter/s)": 0.428252 }, { "acc": 0.80017042, "epoch": 0.03005442287385265, "grad_norm": 20.71426010131836, "learning_rate": 7.872167744015657e-06, "loss": 0.99005585, "memory(GiB)": 28.47, "step": 1110, "train_speed(iter/s)": 0.428261 }, { "acc": 0.8150053, "epoch": 0.030189803157068203, "grad_norm": 17.506261825561523, "learning_rate": 7.87721338106774e-06, "loss": 0.83633957, "memory(GiB)": 28.47, "step": 1115, "train_speed(iter/s)": 0.428286 }, { "acc": 0.81409016, "epoch": 0.030325183440283756, "grad_norm": 47.62744903564453, "learning_rate": 7.88223644248705e-06, "loss": 0.9821537, "memory(GiB)": 28.47, "step": 1120, "train_speed(iter/s)": 0.42832 }, { "acc": 0.79577084, "epoch": 0.03046056372349931, "grad_norm": 18.59375, "learning_rate": 7.88723712939381e-06, "loss": 1.00849266, "memory(GiB)": 28.47, "step": 1125, "train_speed(iter/s)": 0.428324 }, { "acc": 0.80470657, "epoch": 0.03059594400671486, "grad_norm": 25.397201538085938, "learning_rate": 7.892215640232572e-06, "loss": 0.95714321, "memory(GiB)": 28.47, "step": 1130, "train_speed(iter/s)": 0.42834 }, { "acc": 0.76224098, "epoch": 0.030731324289930414, "grad_norm": 23.7777099609375, "learning_rate": 7.897172170819465e-06, "loss": 1.28909235, "memory(GiB)": 28.47, "step": 1135, "train_speed(iter/s)": 0.42836 }, { "acc": 0.81418676, "epoch": 0.030866704573145966, "grad_norm": 17.227619171142578, "learning_rate": 7.90210691438841e-06, "loss": 0.90516233, "memory(GiB)": 28.47, "step": 1140, "train_speed(iter/s)": 0.42839 }, { "acc": 0.79314041, "epoch": 0.03100208485636152, "grad_norm": 33.41503143310547, "learning_rate": 7.907020061636323e-06, "loss": 1.08236446, "memory(GiB)": 28.47, "step": 1145, "train_speed(iter/s)": 0.428402 }, { "acc": 0.77911415, "epoch": 0.03113746513957707, "grad_norm": 19.4280948638916, "learning_rate": 7.911911800767335e-06, "loss": 0.98193913, "memory(GiB)": 28.47, "step": 1150, "train_speed(iter/s)": 0.428406 }, { "acc": 0.80941963, "epoch": 0.03127284542279263, "grad_norm": 10.419604301452637, "learning_rate": 7.916782317536048e-06, "loss": 0.80654125, "memory(GiB)": 28.47, "step": 1155, "train_speed(iter/s)": 0.428428 }, { "acc": 0.8070117, "epoch": 0.03140822570600818, "grad_norm": 22.30207633972168, "learning_rate": 7.921631795289865e-06, "loss": 0.96590405, "memory(GiB)": 28.47, "step": 1160, "train_speed(iter/s)": 0.42846 }, { "acc": 0.78384142, "epoch": 0.031543605989223726, "grad_norm": 23.53416633605957, "learning_rate": 7.9264604150104e-06, "loss": 1.06990728, "memory(GiB)": 28.47, "step": 1165, "train_speed(iter/s)": 0.428478 }, { "acc": 0.79350471, "epoch": 0.03167898627243928, "grad_norm": 26.649995803833008, "learning_rate": 7.931268355354007e-06, "loss": 1.08035812, "memory(GiB)": 28.47, "step": 1170, "train_speed(iter/s)": 0.428487 }, { "acc": 0.78000464, "epoch": 0.03181436655565483, "grad_norm": 47.624656677246094, "learning_rate": 7.936055792691442e-06, "loss": 1.09066868, "memory(GiB)": 28.47, "step": 1175, "train_speed(iter/s)": 0.428507 }, { "acc": 0.78488574, "epoch": 0.031949746838870384, "grad_norm": 22.957691192626953, "learning_rate": 7.940822901146691e-06, "loss": 1.11128263, "memory(GiB)": 28.47, "step": 1180, "train_speed(iter/s)": 0.428507 }, { "acc": 0.7880343, "epoch": 0.03208512712208594, "grad_norm": 19.31131935119629, "learning_rate": 7.94556985263496e-06, "loss": 1.05656176, "memory(GiB)": 28.47, "step": 1185, "train_speed(iter/s)": 0.428527 }, { "acc": 0.824508, "epoch": 0.03222050740530149, "grad_norm": 15.177520751953125, "learning_rate": 7.950296816899887e-06, "loss": 0.84573536, "memory(GiB)": 28.47, "step": 1190, "train_speed(iter/s)": 0.428559 }, { "acc": 0.77469354, "epoch": 0.03235588768851704, "grad_norm": 28.127439498901367, "learning_rate": 7.955003961549953e-06, "loss": 1.19047146, "memory(GiB)": 28.47, "step": 1195, "train_speed(iter/s)": 0.42858 }, { "acc": 0.76289053, "epoch": 0.032491267971732594, "grad_norm": 22.157907485961914, "learning_rate": 7.959691452094149e-06, "loss": 1.15515451, "memory(GiB)": 28.47, "step": 1200, "train_speed(iter/s)": 0.428596 }, { "acc": 0.84148254, "epoch": 0.03262664825494815, "grad_norm": 8.989446640014648, "learning_rate": 7.964359451976891e-06, "loss": 0.77334843, "memory(GiB)": 28.47, "step": 1205, "train_speed(iter/s)": 0.428625 }, { "acc": 0.78631172, "epoch": 0.0327620285381637, "grad_norm": 9.359373092651367, "learning_rate": 7.96900812261222e-06, "loss": 0.99193811, "memory(GiB)": 28.47, "step": 1210, "train_speed(iter/s)": 0.428646 }, { "acc": 0.76522861, "epoch": 0.03289740882137925, "grad_norm": 24.524791717529297, "learning_rate": 7.973637623417293e-06, "loss": 1.27245922, "memory(GiB)": 28.47, "step": 1215, "train_speed(iter/s)": 0.428662 }, { "acc": 0.81138935, "epoch": 0.033032789104594805, "grad_norm": 15.878875732421875, "learning_rate": 7.978248111845175e-06, "loss": 0.89538383, "memory(GiB)": 28.47, "step": 1220, "train_speed(iter/s)": 0.428696 }, { "acc": 0.84068727, "epoch": 0.03316816938781036, "grad_norm": 8.733650207519531, "learning_rate": 7.982839743416992e-06, "loss": 0.7325098, "memory(GiB)": 28.47, "step": 1225, "train_speed(iter/s)": 0.428715 }, { "acc": 0.80703964, "epoch": 0.03330354967102591, "grad_norm": 17.737913131713867, "learning_rate": 7.98741267175339e-06, "loss": 0.96481514, "memory(GiB)": 28.47, "step": 1230, "train_speed(iter/s)": 0.428745 }, { "acc": 0.82979946, "epoch": 0.03343892995424146, "grad_norm": 26.5035457611084, "learning_rate": 7.991967048605389e-06, "loss": 0.93583546, "memory(GiB)": 28.47, "step": 1235, "train_speed(iter/s)": 0.42877 }, { "acc": 0.82145958, "epoch": 0.033574310237457015, "grad_norm": 13.144651412963867, "learning_rate": 7.996503023884581e-06, "loss": 0.95623636, "memory(GiB)": 28.47, "step": 1240, "train_speed(iter/s)": 0.428786 }, { "acc": 0.81811295, "epoch": 0.03370969052067257, "grad_norm": 14.165471076965332, "learning_rate": 8.001020745692761e-06, "loss": 0.99587288, "memory(GiB)": 28.47, "step": 1245, "train_speed(iter/s)": 0.428796 }, { "acc": 0.79170403, "epoch": 0.03384507080388812, "grad_norm": 15.703094482421875, "learning_rate": 8.00552036035093e-06, "loss": 0.96375456, "memory(GiB)": 28.47, "step": 1250, "train_speed(iter/s)": 0.428819 }, { "acc": 0.80574026, "epoch": 0.03398045108710367, "grad_norm": 38.350669860839844, "learning_rate": 8.010002012427728e-06, "loss": 0.97864733, "memory(GiB)": 28.47, "step": 1255, "train_speed(iter/s)": 0.428847 }, { "acc": 0.80559855, "epoch": 0.034115831370319226, "grad_norm": 20.937788009643555, "learning_rate": 8.014465844767312e-06, "loss": 1.02784586, "memory(GiB)": 28.47, "step": 1260, "train_speed(iter/s)": 0.428868 }, { "acc": 0.80854082, "epoch": 0.03425121165353478, "grad_norm": 19.786819458007812, "learning_rate": 8.018911998516672e-06, "loss": 1.03593845, "memory(GiB)": 28.47, "step": 1265, "train_speed(iter/s)": 0.428894 }, { "acc": 0.82206488, "epoch": 0.03438659193675033, "grad_norm": 13.024375915527344, "learning_rate": 8.023340613152406e-06, "loss": 0.85683708, "memory(GiB)": 28.47, "step": 1270, "train_speed(iter/s)": 0.428915 }, { "acc": 0.85694752, "epoch": 0.034521972219965884, "grad_norm": 8.681492805480957, "learning_rate": 8.027751826506988e-06, "loss": 0.75118346, "memory(GiB)": 28.47, "step": 1275, "train_speed(iter/s)": 0.428934 }, { "acc": 0.79295363, "epoch": 0.034657352503181436, "grad_norm": 21.432884216308594, "learning_rate": 8.032145774794488e-06, "loss": 0.94407072, "memory(GiB)": 28.47, "step": 1280, "train_speed(iter/s)": 0.428946 }, { "acc": 0.80997887, "epoch": 0.03479273278639699, "grad_norm": 13.450640678405762, "learning_rate": 8.03652259263583e-06, "loss": 1.02219658, "memory(GiB)": 28.47, "step": 1285, "train_speed(iter/s)": 0.428951 }, { "acc": 0.78629594, "epoch": 0.03492811306961254, "grad_norm": 16.12348747253418, "learning_rate": 8.04088241308352e-06, "loss": 1.15774002, "memory(GiB)": 28.47, "step": 1290, "train_speed(iter/s)": 0.428965 }, { "acc": 0.78902559, "epoch": 0.035063493352828094, "grad_norm": 14.731587409973145, "learning_rate": 8.04522536764594e-06, "loss": 1.13096695, "memory(GiB)": 28.47, "step": 1295, "train_speed(iter/s)": 0.428992 }, { "acc": 0.79952688, "epoch": 0.03519887363604365, "grad_norm": 18.58437156677246, "learning_rate": 8.049551586311127e-06, "loss": 1.04576149, "memory(GiB)": 28.47, "step": 1300, "train_speed(iter/s)": 0.429012 }, { "acc": 0.82263699, "epoch": 0.0353342539192592, "grad_norm": 20.26158332824707, "learning_rate": 8.05386119757013e-06, "loss": 0.85684357, "memory(GiB)": 28.47, "step": 1305, "train_speed(iter/s)": 0.429038 }, { "acc": 0.79823866, "epoch": 0.03546963420247475, "grad_norm": 70.29048156738281, "learning_rate": 8.058154328439903e-06, "loss": 1.11916046, "memory(GiB)": 28.47, "step": 1310, "train_speed(iter/s)": 0.429061 }, { "acc": 0.77140064, "epoch": 0.035605014485690305, "grad_norm": 23.830148696899414, "learning_rate": 8.062431104485777e-06, "loss": 1.18717384, "memory(GiB)": 28.47, "step": 1315, "train_speed(iter/s)": 0.429085 }, { "acc": 0.80029812, "epoch": 0.03574039476890586, "grad_norm": 16.143104553222656, "learning_rate": 8.066691649843486e-06, "loss": 0.95655327, "memory(GiB)": 28.47, "step": 1320, "train_speed(iter/s)": 0.429105 }, { "acc": 0.80862713, "epoch": 0.03587577505212141, "grad_norm": 17.220535278320312, "learning_rate": 8.070936087240793e-06, "loss": 0.99182291, "memory(GiB)": 28.47, "step": 1325, "train_speed(iter/s)": 0.429113 }, { "acc": 0.817383, "epoch": 0.03601115533533696, "grad_norm": 15.755491256713867, "learning_rate": 8.075164538018694e-06, "loss": 0.97863464, "memory(GiB)": 28.47, "step": 1330, "train_speed(iter/s)": 0.429128 }, { "acc": 0.77824941, "epoch": 0.036146535618552515, "grad_norm": 18.30636215209961, "learning_rate": 8.079377122152244e-06, "loss": 1.16044683, "memory(GiB)": 28.47, "step": 1335, "train_speed(iter/s)": 0.429143 }, { "acc": 0.8116868, "epoch": 0.03628191590176807, "grad_norm": 10.516523361206055, "learning_rate": 8.083573958270977e-06, "loss": 1.01278496, "memory(GiB)": 28.47, "step": 1340, "train_speed(iter/s)": 0.429161 }, { "acc": 0.83053226, "epoch": 0.03641729618498362, "grad_norm": 14.256603240966797, "learning_rate": 8.087755163678949e-06, "loss": 0.87052746, "memory(GiB)": 28.47, "step": 1345, "train_speed(iter/s)": 0.429181 }, { "acc": 0.82578411, "epoch": 0.03655267646819917, "grad_norm": 32.00737380981445, "learning_rate": 8.091920854374413e-06, "loss": 0.80699329, "memory(GiB)": 28.47, "step": 1350, "train_speed(iter/s)": 0.4292 }, { "acc": 0.77650619, "epoch": 0.036688056751414726, "grad_norm": 21.321521759033203, "learning_rate": 8.096071145069132e-06, "loss": 1.07392826, "memory(GiB)": 28.47, "step": 1355, "train_speed(iter/s)": 0.429211 }, { "acc": 0.80263786, "epoch": 0.03682343703463028, "grad_norm": 29.603561401367188, "learning_rate": 8.100206149207328e-06, "loss": 1.13002253, "memory(GiB)": 28.47, "step": 1360, "train_speed(iter/s)": 0.429235 }, { "acc": 0.78825045, "epoch": 0.03695881731784583, "grad_norm": 13.836586952209473, "learning_rate": 8.10432597898429e-06, "loss": 1.08866978, "memory(GiB)": 28.47, "step": 1365, "train_speed(iter/s)": 0.429255 }, { "acc": 0.78820958, "epoch": 0.037094197601061384, "grad_norm": 10.461233139038086, "learning_rate": 8.10843074536464e-06, "loss": 1.09218102, "memory(GiB)": 28.47, "step": 1370, "train_speed(iter/s)": 0.429274 }, { "acc": 0.81705542, "epoch": 0.037229577884276936, "grad_norm": 16.655202865600586, "learning_rate": 8.112520558100267e-06, "loss": 0.84492321, "memory(GiB)": 28.47, "step": 1375, "train_speed(iter/s)": 0.429301 }, { "acc": 0.79932384, "epoch": 0.03736495816749249, "grad_norm": 29.173095703125, "learning_rate": 8.116595525747937e-06, "loss": 0.97712498, "memory(GiB)": 28.47, "step": 1380, "train_speed(iter/s)": 0.429317 }, { "acc": 0.80392389, "epoch": 0.03750033845070804, "grad_norm": 31.921125411987305, "learning_rate": 8.120655755686573e-06, "loss": 1.06724815, "memory(GiB)": 28.47, "step": 1385, "train_speed(iter/s)": 0.42933 }, { "acc": 0.79399495, "epoch": 0.037635718733923594, "grad_norm": 18.353878021240234, "learning_rate": 8.12470135413425e-06, "loss": 1.02800503, "memory(GiB)": 28.47, "step": 1390, "train_speed(iter/s)": 0.429353 }, { "acc": 0.79442787, "epoch": 0.03777109901713915, "grad_norm": 22.89917755126953, "learning_rate": 8.128732426164844e-06, "loss": 1.05573921, "memory(GiB)": 28.47, "step": 1395, "train_speed(iter/s)": 0.429372 }, { "acc": 0.78466244, "epoch": 0.0379064793003547, "grad_norm": 12.995190620422363, "learning_rate": 8.132749075724432e-06, "loss": 1.14084148, "memory(GiB)": 28.47, "step": 1400, "train_speed(iter/s)": 0.429398 }, { "acc": 0.8080287, "epoch": 0.03804185958357025, "grad_norm": 20.12947654724121, "learning_rate": 8.13675140564736e-06, "loss": 0.89438782, "memory(GiB)": 28.47, "step": 1405, "train_speed(iter/s)": 0.42941 }, { "acc": 0.79307003, "epoch": 0.038177239866785805, "grad_norm": 9.540760040283203, "learning_rate": 8.140739517672046e-06, "loss": 1.05404587, "memory(GiB)": 28.47, "step": 1410, "train_speed(iter/s)": 0.42943 }, { "acc": 0.82797585, "epoch": 0.03831262015000136, "grad_norm": 9.439714431762695, "learning_rate": 8.144713512456487e-06, "loss": 0.9030221, "memory(GiB)": 28.47, "step": 1415, "train_speed(iter/s)": 0.429441 }, { "acc": 0.79201922, "epoch": 0.0384480004332169, "grad_norm": 19.82669448852539, "learning_rate": 8.148673489593524e-06, "loss": 1.09041834, "memory(GiB)": 28.47, "step": 1420, "train_speed(iter/s)": 0.429454 }, { "acc": 0.83234568, "epoch": 0.038583380716432455, "grad_norm": 20.163415908813477, "learning_rate": 8.152619547625793e-06, "loss": 0.82368536, "memory(GiB)": 28.47, "step": 1425, "train_speed(iter/s)": 0.429473 }, { "acc": 0.80830889, "epoch": 0.03871876099964801, "grad_norm": 47.6097412109375, "learning_rate": 8.156551784060464e-06, "loss": 0.92644863, "memory(GiB)": 28.47, "step": 1430, "train_speed(iter/s)": 0.429492 }, { "acc": 0.80771408, "epoch": 0.03885414128286356, "grad_norm": 14.675963401794434, "learning_rate": 8.160470295383676e-06, "loss": 0.9079957, "memory(GiB)": 28.47, "step": 1435, "train_speed(iter/s)": 0.429516 }, { "acc": 0.81786661, "epoch": 0.03898952156607911, "grad_norm": 16.55533218383789, "learning_rate": 8.164375177074753e-06, "loss": 0.94515228, "memory(GiB)": 28.47, "step": 1440, "train_speed(iter/s)": 0.429531 }, { "acc": 0.79959183, "epoch": 0.039124901849294666, "grad_norm": 6.906937122344971, "learning_rate": 8.168266523620167e-06, "loss": 1.01061573, "memory(GiB)": 28.47, "step": 1445, "train_speed(iter/s)": 0.429537 }, { "acc": 0.79212656, "epoch": 0.03926028213251022, "grad_norm": 13.91312026977539, "learning_rate": 8.172144428527247e-06, "loss": 1.16351957, "memory(GiB)": 28.47, "step": 1450, "train_speed(iter/s)": 0.429553 }, { "acc": 0.81204357, "epoch": 0.03939566241572577, "grad_norm": 13.884037971496582, "learning_rate": 8.176008984337672e-06, "loss": 0.97241268, "memory(GiB)": 28.47, "step": 1455, "train_speed(iter/s)": 0.429562 }, { "acc": 0.80899944, "epoch": 0.039531042698941324, "grad_norm": 33.065345764160156, "learning_rate": 8.179860282640708e-06, "loss": 1.01415815, "memory(GiB)": 28.47, "step": 1460, "train_speed(iter/s)": 0.429567 }, { "acc": 0.80858183, "epoch": 0.039666422982156876, "grad_norm": 26.672874450683594, "learning_rate": 8.183698414086246e-06, "loss": 1.00769262, "memory(GiB)": 28.47, "step": 1465, "train_speed(iter/s)": 0.429582 }, { "acc": 0.82455311, "epoch": 0.03980180326537243, "grad_norm": 26.75243377685547, "learning_rate": 8.187523468397596e-06, "loss": 0.92528114, "memory(GiB)": 28.47, "step": 1470, "train_speed(iter/s)": 0.42959 }, { "acc": 0.82874184, "epoch": 0.03993718354858798, "grad_norm": 14.982037544250488, "learning_rate": 8.191335534384074e-06, "loss": 0.83563499, "memory(GiB)": 28.47, "step": 1475, "train_speed(iter/s)": 0.429609 }, { "acc": 0.79865823, "epoch": 0.040072563831803534, "grad_norm": 25.75694465637207, "learning_rate": 8.195134699953379e-06, "loss": 1.06370039, "memory(GiB)": 28.47, "step": 1480, "train_speed(iter/s)": 0.429621 }, { "acc": 0.8205472, "epoch": 0.04020794411501909, "grad_norm": 15.959389686584473, "learning_rate": 8.19892105212375e-06, "loss": 0.86293154, "memory(GiB)": 28.47, "step": 1485, "train_speed(iter/s)": 0.429634 }, { "acc": 0.84625196, "epoch": 0.04034332439823464, "grad_norm": 14.088672637939453, "learning_rate": 8.202694677035929e-06, "loss": 0.85967417, "memory(GiB)": 28.47, "step": 1490, "train_speed(iter/s)": 0.429653 }, { "acc": 0.80498686, "epoch": 0.04047870468145019, "grad_norm": 35.051387786865234, "learning_rate": 8.206455659964913e-06, "loss": 1.08885841, "memory(GiB)": 28.47, "step": 1495, "train_speed(iter/s)": 0.429665 }, { "acc": 0.80083609, "epoch": 0.040614084964665745, "grad_norm": 21.75932502746582, "learning_rate": 8.210204085331533e-06, "loss": 1.09179659, "memory(GiB)": 28.47, "step": 1500, "train_speed(iter/s)": 0.429661 }, { "acc": 0.78144102, "epoch": 0.0407494652478813, "grad_norm": 23.00457191467285, "learning_rate": 8.213940036713805e-06, "loss": 1.12786884, "memory(GiB)": 28.47, "step": 1505, "train_speed(iter/s)": 0.429671 }, { "acc": 0.81179485, "epoch": 0.04088484553109685, "grad_norm": 34.13738250732422, "learning_rate": 8.21766359685812e-06, "loss": 0.94262896, "memory(GiB)": 28.47, "step": 1510, "train_speed(iter/s)": 0.429688 }, { "acc": 0.80284367, "epoch": 0.0410202258143124, "grad_norm": 15.898991584777832, "learning_rate": 8.221374847690246e-06, "loss": 0.97411423, "memory(GiB)": 28.47, "step": 1515, "train_speed(iter/s)": 0.429703 }, { "acc": 0.83207951, "epoch": 0.041155606097527955, "grad_norm": 26.31964874267578, "learning_rate": 8.225073870326133e-06, "loss": 0.8489048, "memory(GiB)": 28.47, "step": 1520, "train_speed(iter/s)": 0.429715 }, { "acc": 0.81173954, "epoch": 0.04129098638074351, "grad_norm": 13.389384269714355, "learning_rate": 8.22876074508256e-06, "loss": 0.86969233, "memory(GiB)": 28.47, "step": 1525, "train_speed(iter/s)": 0.429725 }, { "acc": 0.78332872, "epoch": 0.04142636666395906, "grad_norm": 13.847004890441895, "learning_rate": 8.23243555148759e-06, "loss": 1.17159719, "memory(GiB)": 28.47, "step": 1530, "train_speed(iter/s)": 0.429736 }, { "acc": 0.83712044, "epoch": 0.04156174694717461, "grad_norm": 6.98686408996582, "learning_rate": 8.236098368290876e-06, "loss": 0.81874075, "memory(GiB)": 28.47, "step": 1535, "train_speed(iter/s)": 0.429756 }, { "acc": 0.76555133, "epoch": 0.041697127230390166, "grad_norm": 15.577502250671387, "learning_rate": 8.239749273473771e-06, "loss": 1.15936909, "memory(GiB)": 28.47, "step": 1540, "train_speed(iter/s)": 0.429766 }, { "acc": 0.78093548, "epoch": 0.04183250751360572, "grad_norm": 18.68055534362793, "learning_rate": 8.243388344259301e-06, "loss": 1.14529133, "memory(GiB)": 28.47, "step": 1545, "train_speed(iter/s)": 0.429781 }, { "acc": 0.82694387, "epoch": 0.04196788779682127, "grad_norm": 13.334043502807617, "learning_rate": 8.247015657121964e-06, "loss": 0.86077061, "memory(GiB)": 28.47, "step": 1550, "train_speed(iter/s)": 0.429798 }, { "acc": 0.81914015, "epoch": 0.042103268080036824, "grad_norm": 13.942566871643066, "learning_rate": 8.250631287797369e-06, "loss": 0.83206787, "memory(GiB)": 28.47, "step": 1555, "train_speed(iter/s)": 0.429807 }, { "acc": 0.80140581, "epoch": 0.042238648363252376, "grad_norm": 58.5575065612793, "learning_rate": 8.254235311291729e-06, "loss": 0.92562199, "memory(GiB)": 28.47, "step": 1560, "train_speed(iter/s)": 0.429819 }, { "acc": 0.80188551, "epoch": 0.04237402864646793, "grad_norm": 17.64594841003418, "learning_rate": 8.257827801891193e-06, "loss": 1.05209007, "memory(GiB)": 28.47, "step": 1565, "train_speed(iter/s)": 0.429827 }, { "acc": 0.80739479, "epoch": 0.04250940892968348, "grad_norm": 15.837706565856934, "learning_rate": 8.261408833171033e-06, "loss": 0.9514226, "memory(GiB)": 28.47, "step": 1570, "train_speed(iter/s)": 0.429839 }, { "acc": 0.79810214, "epoch": 0.042644789212899034, "grad_norm": 26.159839630126953, "learning_rate": 8.264978478004697e-06, "loss": 0.98888922, "memory(GiB)": 28.47, "step": 1575, "train_speed(iter/s)": 0.429847 }, { "acc": 0.81238651, "epoch": 0.04278016949611459, "grad_norm": 15.193293571472168, "learning_rate": 8.268536808572685e-06, "loss": 0.90974665, "memory(GiB)": 28.47, "step": 1580, "train_speed(iter/s)": 0.429861 }, { "acc": 0.79206786, "epoch": 0.04291554977933014, "grad_norm": 11.796355247497559, "learning_rate": 8.272083896371327e-06, "loss": 0.94037971, "memory(GiB)": 28.47, "step": 1585, "train_speed(iter/s)": 0.42987 }, { "acc": 0.80519276, "epoch": 0.04305093006254569, "grad_norm": 11.764022827148438, "learning_rate": 8.275619812221395e-06, "loss": 0.93168964, "memory(GiB)": 28.47, "step": 1590, "train_speed(iter/s)": 0.429885 }, { "acc": 0.81623917, "epoch": 0.043186310345761245, "grad_norm": 19.646881103515625, "learning_rate": 8.279144626276587e-06, "loss": 0.89723387, "memory(GiB)": 28.47, "step": 1595, "train_speed(iter/s)": 0.429906 }, { "acc": 0.80344706, "epoch": 0.0433216906289768, "grad_norm": 16.15004539489746, "learning_rate": 8.282658408031872e-06, "loss": 0.90144081, "memory(GiB)": 28.47, "step": 1600, "train_speed(iter/s)": 0.429922 }, { "acc": 0.79119406, "epoch": 0.04345707091219235, "grad_norm": 15.128207206726074, "learning_rate": 8.286161226331729e-06, "loss": 1.12540178, "memory(GiB)": 28.47, "step": 1605, "train_speed(iter/s)": 0.429927 }, { "acc": 0.78032236, "epoch": 0.0435924511954079, "grad_norm": 12.970561027526855, "learning_rate": 8.28965314937822e-06, "loss": 1.12208767, "memory(GiB)": 28.47, "step": 1610, "train_speed(iter/s)": 0.429932 }, { "acc": 0.81906033, "epoch": 0.043727831478623455, "grad_norm": 8.651330947875977, "learning_rate": 8.293134244738972e-06, "loss": 0.88671827, "memory(GiB)": 28.47, "step": 1615, "train_speed(iter/s)": 0.429944 }, { "acc": 0.79608989, "epoch": 0.04386321176183901, "grad_norm": 23.648208618164062, "learning_rate": 8.296604579355015e-06, "loss": 1.07738848, "memory(GiB)": 28.47, "step": 1620, "train_speed(iter/s)": 0.429956 }, { "acc": 0.83068647, "epoch": 0.04399859204505456, "grad_norm": 12.679540634155273, "learning_rate": 8.300064219548511e-06, "loss": 0.80053329, "memory(GiB)": 28.47, "step": 1625, "train_speed(iter/s)": 0.429973 }, { "acc": 0.79042206, "epoch": 0.04413397232827011, "grad_norm": 11.687248229980469, "learning_rate": 8.303513231030347e-06, "loss": 1.05329103, "memory(GiB)": 28.47, "step": 1630, "train_speed(iter/s)": 0.429984 }, { "acc": 0.79690337, "epoch": 0.044269352611485666, "grad_norm": 47.34459686279297, "learning_rate": 8.306951678907645e-06, "loss": 1.0416111, "memory(GiB)": 28.47, "step": 1635, "train_speed(iter/s)": 0.429998 }, { "acc": 0.81840801, "epoch": 0.04440473289470122, "grad_norm": 11.456661224365234, "learning_rate": 8.310379627691114e-06, "loss": 0.98311462, "memory(GiB)": 28.47, "step": 1640, "train_speed(iter/s)": 0.430012 }, { "acc": 0.7989378, "epoch": 0.04454011317791677, "grad_norm": 20.614091873168945, "learning_rate": 8.31379714130233e-06, "loss": 1.03786221, "memory(GiB)": 28.47, "step": 1645, "train_speed(iter/s)": 0.43003 }, { "acc": 0.84355259, "epoch": 0.04467549346113232, "grad_norm": 11.716605186462402, "learning_rate": 8.31720428308087e-06, "loss": 0.83601818, "memory(GiB)": 28.47, "step": 1650, "train_speed(iter/s)": 0.43005 }, { "acc": 0.81066999, "epoch": 0.044810873744347876, "grad_norm": 36.69941329956055, "learning_rate": 8.32060111579138e-06, "loss": 0.92672815, "memory(GiB)": 28.47, "step": 1655, "train_speed(iter/s)": 0.430063 }, { "acc": 0.79105654, "epoch": 0.04494625402756343, "grad_norm": 45.953224182128906, "learning_rate": 8.323987701630485e-06, "loss": 1.04789963, "memory(GiB)": 28.47, "step": 1660, "train_speed(iter/s)": 0.430074 }, { "acc": 0.79962015, "epoch": 0.04508163431077898, "grad_norm": 14.310107231140137, "learning_rate": 8.327364102233644e-06, "loss": 1.02013063, "memory(GiB)": 28.47, "step": 1665, "train_speed(iter/s)": 0.430092 }, { "acc": 0.82655792, "epoch": 0.045217014593994534, "grad_norm": 21.098087310791016, "learning_rate": 8.330730378681868e-06, "loss": 0.86007204, "memory(GiB)": 28.47, "step": 1670, "train_speed(iter/s)": 0.430088 }, { "acc": 0.82613697, "epoch": 0.04535239487721009, "grad_norm": 24.080280303955078, "learning_rate": 8.334086591508361e-06, "loss": 0.82330227, "memory(GiB)": 28.47, "step": 1675, "train_speed(iter/s)": 0.430105 }, { "acc": 0.83253689, "epoch": 0.04548777516042563, "grad_norm": 24.96285629272461, "learning_rate": 8.337432800705036e-06, "loss": 0.81998978, "memory(GiB)": 28.47, "step": 1680, "train_speed(iter/s)": 0.430121 }, { "acc": 0.83343058, "epoch": 0.045623155443641185, "grad_norm": 9.882243156433105, "learning_rate": 8.340769065728964e-06, "loss": 0.82488613, "memory(GiB)": 28.47, "step": 1685, "train_speed(iter/s)": 0.430133 }, { "acc": 0.81112232, "epoch": 0.04575853572685674, "grad_norm": 11.12065601348877, "learning_rate": 8.344095445508707e-06, "loss": 0.94339447, "memory(GiB)": 28.47, "step": 1690, "train_speed(iter/s)": 0.430146 }, { "acc": 0.80355301, "epoch": 0.04589391601007229, "grad_norm": 67.69092559814453, "learning_rate": 8.34741199845056e-06, "loss": 0.99898815, "memory(GiB)": 28.47, "step": 1695, "train_speed(iter/s)": 0.430161 }, { "acc": 0.82638454, "epoch": 0.04602929629328784, "grad_norm": 13.979840278625488, "learning_rate": 8.350718782444712e-06, "loss": 0.88887844, "memory(GiB)": 28.47, "step": 1700, "train_speed(iter/s)": 0.430173 }, { "acc": 0.78760328, "epoch": 0.046164676576503395, "grad_norm": 29.06406593322754, "learning_rate": 8.354015854871303e-06, "loss": 1.07869387, "memory(GiB)": 28.47, "step": 1705, "train_speed(iter/s)": 0.430179 }, { "acc": 0.81741953, "epoch": 0.04630005685971895, "grad_norm": 22.211626052856445, "learning_rate": 8.357303272606397e-06, "loss": 0.96238594, "memory(GiB)": 28.47, "step": 1710, "train_speed(iter/s)": 0.43019 }, { "acc": 0.83751068, "epoch": 0.0464354371429345, "grad_norm": 9.949236869812012, "learning_rate": 8.360581092027879e-06, "loss": 0.82353048, "memory(GiB)": 28.47, "step": 1715, "train_speed(iter/s)": 0.430167 }, { "acc": 0.82225847, "epoch": 0.04657081742615005, "grad_norm": 8.795845031738281, "learning_rate": 8.363849369021244e-06, "loss": 0.8577034, "memory(GiB)": 28.47, "step": 1720, "train_speed(iter/s)": 0.430175 }, { "acc": 0.81682186, "epoch": 0.046706197709365606, "grad_norm": 16.09285545349121, "learning_rate": 8.367108158985321e-06, "loss": 0.85553493, "memory(GiB)": 28.47, "step": 1725, "train_speed(iter/s)": 0.430192 }, { "acc": 0.8340086, "epoch": 0.04684157799258116, "grad_norm": 12.058584213256836, "learning_rate": 8.370357516837904e-06, "loss": 0.78055177, "memory(GiB)": 28.47, "step": 1730, "train_speed(iter/s)": 0.430207 }, { "acc": 0.7960248, "epoch": 0.04697695827579671, "grad_norm": 21.424341201782227, "learning_rate": 8.373597497021313e-06, "loss": 1.1466815, "memory(GiB)": 28.47, "step": 1735, "train_speed(iter/s)": 0.430219 }, { "acc": 0.81667175, "epoch": 0.047112338559012264, "grad_norm": 17.907764434814453, "learning_rate": 8.376828153507851e-06, "loss": 0.88982277, "memory(GiB)": 28.47, "step": 1740, "train_speed(iter/s)": 0.430232 }, { "acc": 0.81109333, "epoch": 0.047247718842227816, "grad_norm": 9.526403427124023, "learning_rate": 8.380049539805219e-06, "loss": 0.81221294, "memory(GiB)": 28.47, "step": 1745, "train_speed(iter/s)": 0.430235 }, { "acc": 0.81606579, "epoch": 0.04738309912544337, "grad_norm": 8.769844055175781, "learning_rate": 8.383261708961816e-06, "loss": 0.96454563, "memory(GiB)": 28.47, "step": 1750, "train_speed(iter/s)": 0.430237 }, { "acc": 0.79451151, "epoch": 0.04751847940865892, "grad_norm": 10.373156547546387, "learning_rate": 8.386464713571991e-06, "loss": 1.09882784, "memory(GiB)": 28.47, "step": 1755, "train_speed(iter/s)": 0.43024 }, { "acc": 0.81655092, "epoch": 0.047653859691874474, "grad_norm": 64.81940460205078, "learning_rate": 8.38965860578121e-06, "loss": 0.90206909, "memory(GiB)": 28.47, "step": 1760, "train_speed(iter/s)": 0.430248 }, { "acc": 0.81218815, "epoch": 0.04778923997509003, "grad_norm": 13.243330955505371, "learning_rate": 8.39284343729114e-06, "loss": 0.87392426, "memory(GiB)": 28.47, "step": 1765, "train_speed(iter/s)": 0.430256 }, { "acc": 0.80507784, "epoch": 0.04792462025830558, "grad_norm": 12.365399360656738, "learning_rate": 8.396019259364678e-06, "loss": 0.87899132, "memory(GiB)": 28.47, "step": 1770, "train_speed(iter/s)": 0.43026 }, { "acc": 0.80011187, "epoch": 0.04806000054152113, "grad_norm": 23.248760223388672, "learning_rate": 8.399186122830908e-06, "loss": 0.95820408, "memory(GiB)": 28.47, "step": 1775, "train_speed(iter/s)": 0.43027 }, { "acc": 0.79986219, "epoch": 0.048195380824736685, "grad_norm": 18.66202735900879, "learning_rate": 8.402344078089968e-06, "loss": 1.06688147, "memory(GiB)": 28.47, "step": 1780, "train_speed(iter/s)": 0.430279 }, { "acc": 0.75734816, "epoch": 0.04833076110795224, "grad_norm": 13.454381942749023, "learning_rate": 8.405493175117875e-06, "loss": 1.37080936, "memory(GiB)": 28.47, "step": 1785, "train_speed(iter/s)": 0.430284 }, { "acc": 0.79077234, "epoch": 0.04846614139116779, "grad_norm": 10.622823715209961, "learning_rate": 8.408633463471265e-06, "loss": 1.14421597, "memory(GiB)": 28.47, "step": 1790, "train_speed(iter/s)": 0.43029 }, { "acc": 0.82287064, "epoch": 0.04860152167438334, "grad_norm": 14.647329330444336, "learning_rate": 8.411764992292069e-06, "loss": 0.97594662, "memory(GiB)": 28.47, "step": 1795, "train_speed(iter/s)": 0.430299 }, { "acc": 0.79652524, "epoch": 0.048736901957598895, "grad_norm": 21.096193313598633, "learning_rate": 8.414887810312137e-06, "loss": 1.09851856, "memory(GiB)": 28.47, "step": 1800, "train_speed(iter/s)": 0.430311 }, { "acc": 0.79829788, "epoch": 0.04887228224081445, "grad_norm": 9.347407341003418, "learning_rate": 8.418001965857778e-06, "loss": 1.04412212, "memory(GiB)": 28.47, "step": 1805, "train_speed(iter/s)": 0.430312 }, { "acc": 0.82426109, "epoch": 0.04900766252403, "grad_norm": 16.78676414489746, "learning_rate": 8.421107506854257e-06, "loss": 0.91654367, "memory(GiB)": 28.47, "step": 1810, "train_speed(iter/s)": 0.430309 }, { "acc": 0.79658298, "epoch": 0.04914304280724555, "grad_norm": 34.73291778564453, "learning_rate": 8.424204480830208e-06, "loss": 1.09704466, "memory(GiB)": 28.47, "step": 1815, "train_speed(iter/s)": 0.43031 }, { "acc": 0.84737368, "epoch": 0.049278423090461106, "grad_norm": 22.743379592895508, "learning_rate": 8.427292934922012e-06, "loss": 0.80570087, "memory(GiB)": 28.47, "step": 1820, "train_speed(iter/s)": 0.430316 }, { "acc": 0.8325676, "epoch": 0.04941380337367666, "grad_norm": 6.8426103591918945, "learning_rate": 8.430372915878093e-06, "loss": 0.86237888, "memory(GiB)": 28.47, "step": 1825, "train_speed(iter/s)": 0.430335 }, { "acc": 0.81066036, "epoch": 0.04954918365689221, "grad_norm": 33.95155334472656, "learning_rate": 8.433444470063162e-06, "loss": 0.90407906, "memory(GiB)": 28.47, "step": 1830, "train_speed(iter/s)": 0.430348 }, { "acc": 0.82139416, "epoch": 0.049684563940107764, "grad_norm": 13.360420227050781, "learning_rate": 8.436507643462418e-06, "loss": 0.90700665, "memory(GiB)": 28.47, "step": 1835, "train_speed(iter/s)": 0.430355 }, { "acc": 0.80872879, "epoch": 0.049819944223323316, "grad_norm": 9.922309875488281, "learning_rate": 8.43956248168566e-06, "loss": 0.97628603, "memory(GiB)": 28.47, "step": 1840, "train_speed(iter/s)": 0.430369 }, { "acc": 0.81342335, "epoch": 0.04995532450653887, "grad_norm": 19.79385757446289, "learning_rate": 8.442609029971377e-06, "loss": 1.04767141, "memory(GiB)": 28.47, "step": 1845, "train_speed(iter/s)": 0.430382 }, { "acc": 0.8287199, "epoch": 0.05009070478975442, "grad_norm": 21.83308982849121, "learning_rate": 8.445647333190763e-06, "loss": 0.92009554, "memory(GiB)": 28.47, "step": 1850, "train_speed(iter/s)": 0.430387 }, { "acc": 0.83770142, "epoch": 0.050226085072969974, "grad_norm": 9.060059547424316, "learning_rate": 8.448677435851679e-06, "loss": 0.84593716, "memory(GiB)": 28.47, "step": 1855, "train_speed(iter/s)": 0.430392 }, { "acc": 0.82709513, "epoch": 0.05036146535618553, "grad_norm": 18.638683319091797, "learning_rate": 8.451699382102568e-06, "loss": 0.75429273, "memory(GiB)": 28.47, "step": 1860, "train_speed(iter/s)": 0.4304 }, { "acc": 0.80761852, "epoch": 0.05049684563940108, "grad_norm": 33.84961700439453, "learning_rate": 8.454713215736313e-06, "loss": 1.00246201, "memory(GiB)": 28.47, "step": 1865, "train_speed(iter/s)": 0.430409 }, { "acc": 0.83570442, "epoch": 0.05063222592261663, "grad_norm": 42.8490104675293, "learning_rate": 8.45771898019405e-06, "loss": 0.70945849, "memory(GiB)": 28.47, "step": 1870, "train_speed(iter/s)": 0.430416 }, { "acc": 0.79552593, "epoch": 0.050767606205832184, "grad_norm": 18.70354461669922, "learning_rate": 8.460716718568917e-06, "loss": 1.01240711, "memory(GiB)": 28.47, "step": 1875, "train_speed(iter/s)": 0.430426 }, { "acc": 0.78810277, "epoch": 0.05090298648904774, "grad_norm": 15.742875099182129, "learning_rate": 8.463706473609768e-06, "loss": 1.04452591, "memory(GiB)": 28.47, "step": 1880, "train_speed(iter/s)": 0.430429 }, { "acc": 0.80500793, "epoch": 0.05103836677226329, "grad_norm": 21.890296936035156, "learning_rate": 8.466688287724827e-06, "loss": 1.02816725, "memory(GiB)": 28.47, "step": 1885, "train_speed(iter/s)": 0.430434 }, { "acc": 0.80634537, "epoch": 0.05117374705547884, "grad_norm": 26.321826934814453, "learning_rate": 8.4696622029853e-06, "loss": 0.93286448, "memory(GiB)": 28.47, "step": 1890, "train_speed(iter/s)": 0.430446 }, { "acc": 0.84548664, "epoch": 0.051309127338694395, "grad_norm": 13.00786304473877, "learning_rate": 8.472628261128928e-06, "loss": 0.88786488, "memory(GiB)": 28.47, "step": 1895, "train_speed(iter/s)": 0.430458 }, { "acc": 0.82346506, "epoch": 0.05144450762190995, "grad_norm": 15.6830472946167, "learning_rate": 8.475586503563517e-06, "loss": 0.92617111, "memory(GiB)": 28.47, "step": 1900, "train_speed(iter/s)": 0.43047 }, { "acc": 0.80913296, "epoch": 0.0515798879051255, "grad_norm": 11.548112869262695, "learning_rate": 8.478536971370395e-06, "loss": 0.90684099, "memory(GiB)": 28.47, "step": 1905, "train_speed(iter/s)": 0.430484 }, { "acc": 0.82615671, "epoch": 0.05171526818834105, "grad_norm": 15.019444465637207, "learning_rate": 8.481479705307834e-06, "loss": 0.82915401, "memory(GiB)": 28.47, "step": 1910, "train_speed(iter/s)": 0.430493 }, { "acc": 0.81649456, "epoch": 0.051850648471556605, "grad_norm": 16.71370506286621, "learning_rate": 8.484414745814441e-06, "loss": 0.95572968, "memory(GiB)": 28.47, "step": 1915, "train_speed(iter/s)": 0.430494 }, { "acc": 0.80541258, "epoch": 0.05198602875477216, "grad_norm": 21.199514389038086, "learning_rate": 8.487342133012476e-06, "loss": 1.12215748, "memory(GiB)": 28.47, "step": 1920, "train_speed(iter/s)": 0.430498 }, { "acc": 0.8396452, "epoch": 0.05212140903798771, "grad_norm": 37.02876663208008, "learning_rate": 8.490261906711154e-06, "loss": 0.78052392, "memory(GiB)": 28.47, "step": 1925, "train_speed(iter/s)": 0.430511 }, { "acc": 0.83404255, "epoch": 0.05225678932120326, "grad_norm": 15.001119613647461, "learning_rate": 8.493174106409892e-06, "loss": 0.815308, "memory(GiB)": 28.47, "step": 1930, "train_speed(iter/s)": 0.430519 }, { "acc": 0.79618168, "epoch": 0.05239216960441881, "grad_norm": 10.127327919006348, "learning_rate": 8.496078771301508e-06, "loss": 0.89829369, "memory(GiB)": 28.47, "step": 1935, "train_speed(iter/s)": 0.43052 }, { "acc": 0.8277565, "epoch": 0.05252754988763436, "grad_norm": 16.079843521118164, "learning_rate": 8.498975940275396e-06, "loss": 0.82163267, "memory(GiB)": 28.47, "step": 1940, "train_speed(iter/s)": 0.430532 }, { "acc": 0.80981636, "epoch": 0.052662930170849914, "grad_norm": 18.516098022460938, "learning_rate": 8.501865651920643e-06, "loss": 0.94495411, "memory(GiB)": 28.47, "step": 1945, "train_speed(iter/s)": 0.430542 }, { "acc": 0.8087225, "epoch": 0.05279831045406547, "grad_norm": 15.915547370910645, "learning_rate": 8.504747944529113e-06, "loss": 1.00773659, "memory(GiB)": 28.47, "step": 1950, "train_speed(iter/s)": 0.430541 }, { "acc": 0.82414532, "epoch": 0.05293369073728102, "grad_norm": 32.29111099243164, "learning_rate": 8.5076228560985e-06, "loss": 0.91811752, "memory(GiB)": 28.47, "step": 1955, "train_speed(iter/s)": 0.430552 }, { "acc": 0.80884838, "epoch": 0.05306907102049657, "grad_norm": 63.64313507080078, "learning_rate": 8.51049042433532e-06, "loss": 1.00522852, "memory(GiB)": 28.47, "step": 1960, "train_speed(iter/s)": 0.430562 }, { "acc": 0.79557009, "epoch": 0.053204451303712125, "grad_norm": 38.81918716430664, "learning_rate": 8.51335068665789e-06, "loss": 1.04478817, "memory(GiB)": 28.47, "step": 1965, "train_speed(iter/s)": 0.430568 }, { "acc": 0.83172016, "epoch": 0.05333983158692768, "grad_norm": 12.204544067382812, "learning_rate": 8.516203680199252e-06, "loss": 0.81131859, "memory(GiB)": 28.47, "step": 1970, "train_speed(iter/s)": 0.430579 }, { "acc": 0.7880167, "epoch": 0.05347521187014323, "grad_norm": 16.62394142150879, "learning_rate": 8.51904944181007e-06, "loss": 1.11231813, "memory(GiB)": 28.47, "step": 1975, "train_speed(iter/s)": 0.430587 }, { "acc": 0.80714512, "epoch": 0.05361059215335878, "grad_norm": 11.640135765075684, "learning_rate": 8.521888008061474e-06, "loss": 0.93992119, "memory(GiB)": 28.47, "step": 1980, "train_speed(iter/s)": 0.430593 }, { "acc": 0.8354085, "epoch": 0.053745972436574335, "grad_norm": 11.665823936462402, "learning_rate": 8.524719415247899e-06, "loss": 0.84198179, "memory(GiB)": 28.47, "step": 1985, "train_speed(iter/s)": 0.430602 }, { "acc": 0.82760162, "epoch": 0.05388135271978989, "grad_norm": 13.131875038146973, "learning_rate": 8.527543699389857e-06, "loss": 0.82605371, "memory(GiB)": 28.47, "step": 1990, "train_speed(iter/s)": 0.430609 }, { "acc": 0.82583504, "epoch": 0.05401673300300544, "grad_norm": 11.156871795654297, "learning_rate": 8.53036089623668e-06, "loss": 0.81666832, "memory(GiB)": 28.47, "step": 1995, "train_speed(iter/s)": 0.430616 }, { "acc": 0.82763577, "epoch": 0.05415211328622099, "grad_norm": 10.383646965026855, "learning_rate": 8.533171041269257e-06, "loss": 0.79334345, "memory(GiB)": 28.47, "step": 2000, "train_speed(iter/s)": 0.430626 }, { "acc": 0.80466309, "epoch": 0.054287493569436546, "grad_norm": 12.390043258666992, "learning_rate": 8.535974169702692e-06, "loss": 0.95046177, "memory(GiB)": 28.47, "step": 2005, "train_speed(iter/s)": 0.430636 }, { "acc": 0.82324715, "epoch": 0.0544228738526521, "grad_norm": 30.72674560546875, "learning_rate": 8.538770316488965e-06, "loss": 0.9335535, "memory(GiB)": 28.47, "step": 2010, "train_speed(iter/s)": 0.430645 }, { "acc": 0.81123028, "epoch": 0.05455825413586765, "grad_norm": 13.077561378479004, "learning_rate": 8.541559516319544e-06, "loss": 0.86651077, "memory(GiB)": 28.47, "step": 2015, "train_speed(iter/s)": 0.43065 }, { "acc": 0.82011642, "epoch": 0.054693634419083204, "grad_norm": 33.736297607421875, "learning_rate": 8.54434180362797e-06, "loss": 0.99292746, "memory(GiB)": 28.47, "step": 2020, "train_speed(iter/s)": 0.430657 }, { "acc": 0.79747019, "epoch": 0.054829014702298756, "grad_norm": 15.799782752990723, "learning_rate": 8.5471172125924e-06, "loss": 1.07511311, "memory(GiB)": 28.47, "step": 2025, "train_speed(iter/s)": 0.430667 }, { "acc": 0.80466356, "epoch": 0.05496439498551431, "grad_norm": 43.63134002685547, "learning_rate": 8.549885777138135e-06, "loss": 1.01137428, "memory(GiB)": 28.47, "step": 2030, "train_speed(iter/s)": 0.430673 }, { "acc": 0.76617661, "epoch": 0.05509977526872986, "grad_norm": 21.355819702148438, "learning_rate": 8.552647530940101e-06, "loss": 1.13536892, "memory(GiB)": 28.47, "step": 2035, "train_speed(iter/s)": 0.430683 }, { "acc": 0.82064705, "epoch": 0.055235155551945414, "grad_norm": 21.89875602722168, "learning_rate": 8.555402507425314e-06, "loss": 0.96101599, "memory(GiB)": 28.47, "step": 2040, "train_speed(iter/s)": 0.43068 }, { "acc": 0.83359184, "epoch": 0.05537053583516097, "grad_norm": 18.282840728759766, "learning_rate": 8.5581507397753e-06, "loss": 0.8572607, "memory(GiB)": 28.47, "step": 2045, "train_speed(iter/s)": 0.430694 }, { "acc": 0.8450098, "epoch": 0.05550591611837652, "grad_norm": 11.773467063903809, "learning_rate": 8.560892260928499e-06, "loss": 0.69071984, "memory(GiB)": 28.47, "step": 2050, "train_speed(iter/s)": 0.430688 }, { "acc": 0.81305742, "epoch": 0.05564129640159207, "grad_norm": 11.16331672668457, "learning_rate": 8.563627103582627e-06, "loss": 0.79246373, "memory(GiB)": 28.47, "step": 2055, "train_speed(iter/s)": 0.430694 }, { "acc": 0.8178134, "epoch": 0.055776676684807625, "grad_norm": 17.471969604492188, "learning_rate": 8.566355300197025e-06, "loss": 0.80696526, "memory(GiB)": 28.47, "step": 2060, "train_speed(iter/s)": 0.430704 }, { "acc": 0.827034, "epoch": 0.05591205696802318, "grad_norm": 7.071767330169678, "learning_rate": 8.569076882994961e-06, "loss": 0.88442039, "memory(GiB)": 28.47, "step": 2065, "train_speed(iter/s)": 0.430709 }, { "acc": 0.82126141, "epoch": 0.05604743725123873, "grad_norm": 37.0366096496582, "learning_rate": 8.571791883965923e-06, "loss": 0.86003685, "memory(GiB)": 28.47, "step": 2070, "train_speed(iter/s)": 0.430721 }, { "acc": 0.81009293, "epoch": 0.05618281753445428, "grad_norm": 79.82390594482422, "learning_rate": 8.574500334867868e-06, "loss": 0.94442148, "memory(GiB)": 28.47, "step": 2075, "train_speed(iter/s)": 0.430729 }, { "acc": 0.78085556, "epoch": 0.056318197817669835, "grad_norm": 35.48921585083008, "learning_rate": 8.577202267229452e-06, "loss": 1.20911922, "memory(GiB)": 28.47, "step": 2080, "train_speed(iter/s)": 0.430733 }, { "acc": 0.82969828, "epoch": 0.05645357810088539, "grad_norm": 27.522071838378906, "learning_rate": 8.579897712352236e-06, "loss": 0.89209833, "memory(GiB)": 28.47, "step": 2085, "train_speed(iter/s)": 0.430741 }, { "acc": 0.82517042, "epoch": 0.05658895838410094, "grad_norm": 16.51707649230957, "learning_rate": 8.582586701312855e-06, "loss": 0.89985237, "memory(GiB)": 28.47, "step": 2090, "train_speed(iter/s)": 0.430751 }, { "acc": 0.80007534, "epoch": 0.05672433866731649, "grad_norm": 16.381790161132812, "learning_rate": 8.585269264965176e-06, "loss": 1.03312416, "memory(GiB)": 28.47, "step": 2095, "train_speed(iter/s)": 0.430761 }, { "acc": 0.81700935, "epoch": 0.056859718950532046, "grad_norm": 25.523221969604492, "learning_rate": 8.58794543394242e-06, "loss": 1.02733173, "memory(GiB)": 28.47, "step": 2100, "train_speed(iter/s)": 0.430763 }, { "acc": 0.81464291, "epoch": 0.0569950992337476, "grad_norm": 11.854068756103516, "learning_rate": 8.59061523865925e-06, "loss": 0.91803303, "memory(GiB)": 28.47, "step": 2105, "train_speed(iter/s)": 0.430769 }, { "acc": 0.81056461, "epoch": 0.05713047951696315, "grad_norm": 13.206374168395996, "learning_rate": 8.593278709313866e-06, "loss": 1.08391666, "memory(GiB)": 28.47, "step": 2110, "train_speed(iter/s)": 0.43078 }, { "acc": 0.82843513, "epoch": 0.0572658598001787, "grad_norm": 15.421470642089844, "learning_rate": 8.595935875890033e-06, "loss": 0.81903858, "memory(GiB)": 28.47, "step": 2115, "train_speed(iter/s)": 0.430792 }, { "acc": 0.82746067, "epoch": 0.057401240083394256, "grad_norm": 24.065778732299805, "learning_rate": 8.598586768159119e-06, "loss": 0.90251808, "memory(GiB)": 28.47, "step": 2120, "train_speed(iter/s)": 0.430798 }, { "acc": 0.82652864, "epoch": 0.05753662036660981, "grad_norm": 10.475021362304688, "learning_rate": 8.601231415682095e-06, "loss": 0.86156483, "memory(GiB)": 28.47, "step": 2125, "train_speed(iter/s)": 0.430791 }, { "acc": 0.80869102, "epoch": 0.05767200064982536, "grad_norm": 216.68521118164062, "learning_rate": 8.60386984781151e-06, "loss": 0.99887867, "memory(GiB)": 28.47, "step": 2130, "train_speed(iter/s)": 0.430791 }, { "acc": 0.8006649, "epoch": 0.057807380933040914, "grad_norm": 21.38585662841797, "learning_rate": 8.606502093693447e-06, "loss": 0.97113495, "memory(GiB)": 28.47, "step": 2135, "train_speed(iter/s)": 0.430795 }, { "acc": 0.82419119, "epoch": 0.057942761216256466, "grad_norm": 12.870153427124023, "learning_rate": 8.609128182269452e-06, "loss": 0.84856663, "memory(GiB)": 28.47, "step": 2140, "train_speed(iter/s)": 0.430799 }, { "acc": 0.81116781, "epoch": 0.05807814149947202, "grad_norm": 14.531530380249023, "learning_rate": 8.61174814227845e-06, "loss": 0.95172997, "memory(GiB)": 28.47, "step": 2145, "train_speed(iter/s)": 0.430805 }, { "acc": 0.79561853, "epoch": 0.05821352178268757, "grad_norm": 23.641437530517578, "learning_rate": 8.614362002258628e-06, "loss": 1.09547443, "memory(GiB)": 28.47, "step": 2150, "train_speed(iter/s)": 0.430815 }, { "acc": 0.82036591, "epoch": 0.058348902065903124, "grad_norm": 21.676748275756836, "learning_rate": 8.616969790549296e-06, "loss": 0.93143873, "memory(GiB)": 28.47, "step": 2155, "train_speed(iter/s)": 0.430822 }, { "acc": 0.77206964, "epoch": 0.05848428234911868, "grad_norm": 15.37320327758789, "learning_rate": 8.619571535292739e-06, "loss": 1.0987524, "memory(GiB)": 28.47, "step": 2160, "train_speed(iter/s)": 0.430819 }, { "acc": 0.820998, "epoch": 0.05861966263233423, "grad_norm": 23.62978172302246, "learning_rate": 8.622167264436034e-06, "loss": 0.97184925, "memory(GiB)": 28.47, "step": 2165, "train_speed(iter/s)": 0.430826 }, { "acc": 0.77607832, "epoch": 0.05875504291554978, "grad_norm": 21.672693252563477, "learning_rate": 8.624757005732851e-06, "loss": 1.11990261, "memory(GiB)": 28.47, "step": 2170, "train_speed(iter/s)": 0.430835 }, { "acc": 0.8441143, "epoch": 0.058890423198765335, "grad_norm": 9.384203910827637, "learning_rate": 8.627340786745236e-06, "loss": 0.69501152, "memory(GiB)": 28.47, "step": 2175, "train_speed(iter/s)": 0.430833 }, { "acc": 0.82888546, "epoch": 0.05902580348198089, "grad_norm": 7.562715530395508, "learning_rate": 8.62991863484537e-06, "loss": 0.88853798, "memory(GiB)": 28.47, "step": 2180, "train_speed(iter/s)": 0.430837 }, { "acc": 0.77359724, "epoch": 0.05916118376519644, "grad_norm": 13.896949768066406, "learning_rate": 8.632490577217305e-06, "loss": 1.11355858, "memory(GiB)": 28.47, "step": 2185, "train_speed(iter/s)": 0.43084 }, { "acc": 0.79599676, "epoch": 0.05929656404841199, "grad_norm": 25.635616302490234, "learning_rate": 8.635056640858697e-06, "loss": 1.07362785, "memory(GiB)": 28.47, "step": 2190, "train_speed(iter/s)": 0.430848 }, { "acc": 0.8180172, "epoch": 0.05943194433162754, "grad_norm": 35.42445755004883, "learning_rate": 8.637616852582486e-06, "loss": 0.93209066, "memory(GiB)": 28.47, "step": 2195, "train_speed(iter/s)": 0.430858 }, { "acc": 0.79487987, "epoch": 0.05956732461484309, "grad_norm": 42.49654006958008, "learning_rate": 8.640171239018594e-06, "loss": 1.05314331, "memory(GiB)": 28.47, "step": 2200, "train_speed(iter/s)": 0.430866 }, { "acc": 0.81799812, "epoch": 0.059702704898058644, "grad_norm": 13.791367530822754, "learning_rate": 8.642719826615584e-06, "loss": 0.93887949, "memory(GiB)": 28.47, "step": 2205, "train_speed(iter/s)": 0.43088 }, { "acc": 0.80695267, "epoch": 0.059838085181274196, "grad_norm": 14.717842102050781, "learning_rate": 8.645262641642292e-06, "loss": 1.00685644, "memory(GiB)": 28.47, "step": 2210, "train_speed(iter/s)": 0.430881 }, { "acc": 0.80857658, "epoch": 0.05997346546448975, "grad_norm": 13.368457794189453, "learning_rate": 8.647799710189466e-06, "loss": 0.91473808, "memory(GiB)": 28.47, "step": 2215, "train_speed(iter/s)": 0.430892 }, { "acc": 0.82205143, "epoch": 0.0601088457477053, "grad_norm": 15.635245323181152, "learning_rate": 8.650331058171367e-06, "loss": 0.82301617, "memory(GiB)": 28.47, "step": 2220, "train_speed(iter/s)": 0.430891 }, { "acc": 0.84512405, "epoch": 0.060244226030920854, "grad_norm": 13.821908950805664, "learning_rate": 8.652856711327352e-06, "loss": 0.81919947, "memory(GiB)": 28.47, "step": 2225, "train_speed(iter/s)": 0.430898 }, { "acc": 0.82641773, "epoch": 0.06037960631413641, "grad_norm": 14.55715560913086, "learning_rate": 8.65537669522345e-06, "loss": 0.9246439, "memory(GiB)": 28.47, "step": 2230, "train_speed(iter/s)": 0.430903 }, { "acc": 0.84831839, "epoch": 0.06051498659735196, "grad_norm": 42.20588302612305, "learning_rate": 8.657891035253915e-06, "loss": 0.84107342, "memory(GiB)": 28.47, "step": 2235, "train_speed(iter/s)": 0.430852 }, { "acc": 0.82622881, "epoch": 0.06065036688056751, "grad_norm": 11.863590240478516, "learning_rate": 8.66039975664276e-06, "loss": 0.85235806, "memory(GiB)": 28.47, "step": 2240, "train_speed(iter/s)": 0.430782 }, { "acc": 0.80347366, "epoch": 0.060785747163783065, "grad_norm": 13.944347381591797, "learning_rate": 8.662902884445273e-06, "loss": 0.9785677, "memory(GiB)": 28.47, "step": 2245, "train_speed(iter/s)": 0.430704 }, { "acc": 0.82511034, "epoch": 0.06092112744699862, "grad_norm": 18.690059661865234, "learning_rate": 8.665400443549521e-06, "loss": 0.87603703, "memory(GiB)": 28.47, "step": 2250, "train_speed(iter/s)": 0.430695 }, { "acc": 0.82892237, "epoch": 0.06105650773021417, "grad_norm": 29.819406509399414, "learning_rate": 8.667892458677836e-06, "loss": 0.93661032, "memory(GiB)": 28.47, "step": 2255, "train_speed(iter/s)": 0.430682 }, { "acc": 0.80811729, "epoch": 0.06119188801342972, "grad_norm": 21.885969161987305, "learning_rate": 8.670378954388282e-06, "loss": 0.90410509, "memory(GiB)": 28.47, "step": 2260, "train_speed(iter/s)": 0.430654 }, { "acc": 0.82608719, "epoch": 0.061327268296645275, "grad_norm": 14.220187187194824, "learning_rate": 8.672859955076109e-06, "loss": 0.77459469, "memory(GiB)": 28.47, "step": 2265, "train_speed(iter/s)": 0.430661 }, { "acc": 0.80483341, "epoch": 0.06146264857986083, "grad_norm": 21.660497665405273, "learning_rate": 8.675335484975176e-06, "loss": 0.98184977, "memory(GiB)": 28.47, "step": 2270, "train_speed(iter/s)": 0.430668 }, { "acc": 0.83304939, "epoch": 0.06159802886307638, "grad_norm": 6.044909477233887, "learning_rate": 8.677805568159396e-06, "loss": 0.85065384, "memory(GiB)": 28.47, "step": 2275, "train_speed(iter/s)": 0.430677 }, { "acc": 0.83785267, "epoch": 0.06173340914629193, "grad_norm": 25.03074836730957, "learning_rate": 8.68027022854412e-06, "loss": 0.8134964, "memory(GiB)": 28.47, "step": 2280, "train_speed(iter/s)": 0.43069 }, { "acc": 0.81212692, "epoch": 0.061868789429507486, "grad_norm": 14.452347755432129, "learning_rate": 8.682729489887534e-06, "loss": 0.95179148, "memory(GiB)": 28.47, "step": 2285, "train_speed(iter/s)": 0.430696 }, { "acc": 0.84071827, "epoch": 0.06200416971272304, "grad_norm": 12.662845611572266, "learning_rate": 8.685183375792032e-06, "loss": 0.6894105, "memory(GiB)": 28.47, "step": 2290, "train_speed(iter/s)": 0.430707 }, { "acc": 0.83248777, "epoch": 0.06213954999593859, "grad_norm": 14.315495491027832, "learning_rate": 8.687631909705579e-06, "loss": 0.86465302, "memory(GiB)": 28.47, "step": 2295, "train_speed(iter/s)": 0.430714 }, { "acc": 0.78785181, "epoch": 0.06227493027915414, "grad_norm": 17.779705047607422, "learning_rate": 8.690075114923045e-06, "loss": 1.13722658, "memory(GiB)": 28.47, "step": 2300, "train_speed(iter/s)": 0.430708 }, { "acc": 0.81408548, "epoch": 0.062410310562369696, "grad_norm": 17.94352149963379, "learning_rate": 8.692513014587546e-06, "loss": 1.0097538, "memory(GiB)": 28.47, "step": 2305, "train_speed(iter/s)": 0.430715 }, { "acc": 0.83217068, "epoch": 0.06254569084558526, "grad_norm": 15.940545082092285, "learning_rate": 8.694945631691756e-06, "loss": 0.9164978, "memory(GiB)": 28.47, "step": 2310, "train_speed(iter/s)": 0.430723 }, { "acc": 0.79670916, "epoch": 0.06268107112880081, "grad_norm": 13.643343925476074, "learning_rate": 8.697372989079208e-06, "loss": 1.07077274, "memory(GiB)": 28.47, "step": 2315, "train_speed(iter/s)": 0.430727 }, { "acc": 0.8183075, "epoch": 0.06281645141201636, "grad_norm": 12.420082092285156, "learning_rate": 8.699795109445575e-06, "loss": 0.99579105, "memory(GiB)": 28.47, "step": 2320, "train_speed(iter/s)": 0.43073 }, { "acc": 0.81113415, "epoch": 0.0629518316952319, "grad_norm": 11.312845230102539, "learning_rate": 8.70221201533995e-06, "loss": 0.94889593, "memory(GiB)": 28.47, "step": 2325, "train_speed(iter/s)": 0.430734 }, { "acc": 0.78903689, "epoch": 0.06308721197844745, "grad_norm": 11.402338027954102, "learning_rate": 8.70462372916611e-06, "loss": 1.07221804, "memory(GiB)": 28.47, "step": 2330, "train_speed(iter/s)": 0.430733 }, { "acc": 0.82345247, "epoch": 0.063222592261663, "grad_norm": 21.802141189575195, "learning_rate": 8.707030273183747e-06, "loss": 0.90828171, "memory(GiB)": 28.47, "step": 2335, "train_speed(iter/s)": 0.430717 }, { "acc": 0.81681614, "epoch": 0.06335797254487856, "grad_norm": 12.37070083618164, "learning_rate": 8.709431669509715e-06, "loss": 0.95505505, "memory(GiB)": 28.47, "step": 2340, "train_speed(iter/s)": 0.430727 }, { "acc": 0.81897516, "epoch": 0.06349335282809411, "grad_norm": 9.569697380065918, "learning_rate": 8.711827940119248e-06, "loss": 0.97296362, "memory(GiB)": 28.47, "step": 2345, "train_speed(iter/s)": 0.430728 }, { "acc": 0.81356735, "epoch": 0.06362873311130966, "grad_norm": 51.96983337402344, "learning_rate": 8.714219106847152e-06, "loss": 1.00356817, "memory(GiB)": 28.47, "step": 2350, "train_speed(iter/s)": 0.430738 }, { "acc": 0.80475273, "epoch": 0.06376411339452522, "grad_norm": 64.9387435913086, "learning_rate": 8.716605191389021e-06, "loss": 0.94019213, "memory(GiB)": 28.47, "step": 2355, "train_speed(iter/s)": 0.430742 }, { "acc": 0.82473946, "epoch": 0.06389949367774077, "grad_norm": 11.662091255187988, "learning_rate": 8.718986215302401e-06, "loss": 0.88222637, "memory(GiB)": 28.47, "step": 2360, "train_speed(iter/s)": 0.430751 }, { "acc": 0.80650272, "epoch": 0.06403487396095632, "grad_norm": 12.81386947631836, "learning_rate": 8.721362200007966e-06, "loss": 0.95778255, "memory(GiB)": 28.47, "step": 2365, "train_speed(iter/s)": 0.430753 }, { "acc": 0.82947292, "epoch": 0.06417025424417187, "grad_norm": 9.827524185180664, "learning_rate": 8.723733166790672e-06, "loss": 0.8080636, "memory(GiB)": 28.47, "step": 2370, "train_speed(iter/s)": 0.430763 }, { "acc": 0.81419067, "epoch": 0.06430563452738743, "grad_norm": 12.786499977111816, "learning_rate": 8.726099136800901e-06, "loss": 0.95266285, "memory(GiB)": 28.47, "step": 2375, "train_speed(iter/s)": 0.430773 }, { "acc": 0.77552581, "epoch": 0.06444101481060298, "grad_norm": 52.19234848022461, "learning_rate": 8.728460131055597e-06, "loss": 1.17278652, "memory(GiB)": 28.47, "step": 2380, "train_speed(iter/s)": 0.430781 }, { "acc": 0.82774544, "epoch": 0.06457639509381853, "grad_norm": 11.207027435302734, "learning_rate": 8.730816170439383e-06, "loss": 0.82475204, "memory(GiB)": 28.47, "step": 2385, "train_speed(iter/s)": 0.430784 }, { "acc": 0.80898371, "epoch": 0.06471177537703408, "grad_norm": 58.99937438964844, "learning_rate": 8.733167275705664e-06, "loss": 1.0544529, "memory(GiB)": 28.47, "step": 2390, "train_speed(iter/s)": 0.430794 }, { "acc": 0.79117699, "epoch": 0.06484715566024964, "grad_norm": 28.80071449279785, "learning_rate": 8.735513467477735e-06, "loss": 1.03167439, "memory(GiB)": 28.47, "step": 2395, "train_speed(iter/s)": 0.430802 }, { "acc": 0.81750164, "epoch": 0.06498253594346519, "grad_norm": 25.100852966308594, "learning_rate": 8.73785476624986e-06, "loss": 1.00408602, "memory(GiB)": 28.47, "step": 2400, "train_speed(iter/s)": 0.430808 }, { "acc": 0.81145306, "epoch": 0.06511791622668074, "grad_norm": 15.52379322052002, "learning_rate": 8.740191192388343e-06, "loss": 0.83363285, "memory(GiB)": 28.47, "step": 2405, "train_speed(iter/s)": 0.430809 }, { "acc": 0.82232771, "epoch": 0.0652532965098963, "grad_norm": 11.377240180969238, "learning_rate": 8.742522766132602e-06, "loss": 0.92128906, "memory(GiB)": 28.47, "step": 2410, "train_speed(iter/s)": 0.43082 }, { "acc": 0.79930844, "epoch": 0.06538867679311185, "grad_norm": 8.583967208862305, "learning_rate": 8.744849507596207e-06, "loss": 1.09771643, "memory(GiB)": 28.47, "step": 2415, "train_speed(iter/s)": 0.430831 }, { "acc": 0.81352882, "epoch": 0.0655240570763274, "grad_norm": 9.24681568145752, "learning_rate": 8.747171436767932e-06, "loss": 1.02291756, "memory(GiB)": 28.47, "step": 2420, "train_speed(iter/s)": 0.430834 }, { "acc": 0.82863121, "epoch": 0.06565943735954295, "grad_norm": 16.444820404052734, "learning_rate": 8.74948857351278e-06, "loss": 0.87399549, "memory(GiB)": 28.47, "step": 2425, "train_speed(iter/s)": 0.43084 }, { "acc": 0.81898003, "epoch": 0.0657948176427585, "grad_norm": 9.166695594787598, "learning_rate": 8.751800937573004e-06, "loss": 0.90156431, "memory(GiB)": 28.47, "step": 2430, "train_speed(iter/s)": 0.430845 }, { "acc": 0.8468359, "epoch": 0.06593019792597406, "grad_norm": 23.507123947143555, "learning_rate": 8.754108548569115e-06, "loss": 0.78106337, "memory(GiB)": 28.47, "step": 2435, "train_speed(iter/s)": 0.430851 }, { "acc": 0.81573372, "epoch": 0.06606557820918961, "grad_norm": 71.56193542480469, "learning_rate": 8.756411426000885e-06, "loss": 0.86914024, "memory(GiB)": 28.47, "step": 2440, "train_speed(iter/s)": 0.43086 }, { "acc": 0.82908602, "epoch": 0.06620095849240516, "grad_norm": 28.5731143951416, "learning_rate": 8.758709589248335e-06, "loss": 0.87502155, "memory(GiB)": 28.47, "step": 2445, "train_speed(iter/s)": 0.430868 }, { "acc": 0.80564842, "epoch": 0.06633633877562072, "grad_norm": 58.42051315307617, "learning_rate": 8.761003057572704e-06, "loss": 1.04647102, "memory(GiB)": 28.47, "step": 2450, "train_speed(iter/s)": 0.430871 }, { "acc": 0.81036587, "epoch": 0.06647171905883627, "grad_norm": 19.23745346069336, "learning_rate": 8.76329185011743e-06, "loss": 0.92515011, "memory(GiB)": 28.47, "step": 2455, "train_speed(iter/s)": 0.430878 }, { "acc": 0.82340755, "epoch": 0.06660709934205182, "grad_norm": 18.469810485839844, "learning_rate": 8.765575985909101e-06, "loss": 0.85429897, "memory(GiB)": 28.47, "step": 2460, "train_speed(iter/s)": 0.430882 }, { "acc": 0.79915142, "epoch": 0.06674247962526737, "grad_norm": 23.04323387145996, "learning_rate": 8.767855483858413e-06, "loss": 1.03534031, "memory(GiB)": 28.47, "step": 2465, "train_speed(iter/s)": 0.43088 }, { "acc": 0.81532803, "epoch": 0.06687785990848293, "grad_norm": 9.754612922668457, "learning_rate": 8.770130362761097e-06, "loss": 0.8531086, "memory(GiB)": 28.47, "step": 2470, "train_speed(iter/s)": 0.430886 }, { "acc": 0.82279892, "epoch": 0.06701324019169848, "grad_norm": 13.675073623657227, "learning_rate": 8.772400641298859e-06, "loss": 0.84186001, "memory(GiB)": 28.47, "step": 2475, "train_speed(iter/s)": 0.430897 }, { "acc": 0.82319031, "epoch": 0.06714862047491403, "grad_norm": 4.690008163452148, "learning_rate": 8.77466633804029e-06, "loss": 0.83222771, "memory(GiB)": 28.47, "step": 2480, "train_speed(iter/s)": 0.4309 }, { "acc": 0.80978069, "epoch": 0.06728400075812958, "grad_norm": 13.560160636901855, "learning_rate": 8.776927471441794e-06, "loss": 0.97652836, "memory(GiB)": 28.47, "step": 2485, "train_speed(iter/s)": 0.430909 }, { "acc": 0.78753929, "epoch": 0.06741938104134514, "grad_norm": 20.363492965698242, "learning_rate": 8.779184059848472e-06, "loss": 1.09302645, "memory(GiB)": 28.47, "step": 2490, "train_speed(iter/s)": 0.430913 }, { "acc": 0.80535097, "epoch": 0.06755476132456069, "grad_norm": 11.882552146911621, "learning_rate": 8.781436121495027e-06, "loss": 0.98990479, "memory(GiB)": 28.47, "step": 2495, "train_speed(iter/s)": 0.430921 }, { "acc": 0.80961342, "epoch": 0.06769014160777624, "grad_norm": 11.176031112670898, "learning_rate": 8.78368367450664e-06, "loss": 1.02416496, "memory(GiB)": 28.47, "step": 2500, "train_speed(iter/s)": 0.430934 }, { "acc": 0.77348609, "epoch": 0.0678255218909918, "grad_norm": 63.708961486816406, "learning_rate": 8.785926736899856e-06, "loss": 1.19730349, "memory(GiB)": 28.47, "step": 2505, "train_speed(iter/s)": 0.430942 }, { "acc": 0.82296305, "epoch": 0.06796090217420735, "grad_norm": 8.045540809631348, "learning_rate": 8.788165326583439e-06, "loss": 0.85804005, "memory(GiB)": 28.47, "step": 2510, "train_speed(iter/s)": 0.430948 }, { "acc": 0.82538958, "epoch": 0.0680962824574229, "grad_norm": 12.456151008605957, "learning_rate": 8.790399461359235e-06, "loss": 0.89565048, "memory(GiB)": 28.47, "step": 2515, "train_speed(iter/s)": 0.430953 }, { "acc": 0.86627235, "epoch": 0.06823166274063845, "grad_norm": 9.319043159484863, "learning_rate": 8.792629158923022e-06, "loss": 0.64874859, "memory(GiB)": 28.47, "step": 2520, "train_speed(iter/s)": 0.430962 }, { "acc": 0.79198341, "epoch": 0.068367043023854, "grad_norm": 12.860502243041992, "learning_rate": 8.794854436865354e-06, "loss": 1.06540642, "memory(GiB)": 28.47, "step": 2525, "train_speed(iter/s)": 0.430964 }, { "acc": 0.7948658, "epoch": 0.06850242330706956, "grad_norm": 11.005609512329102, "learning_rate": 8.797075312672383e-06, "loss": 1.06534481, "memory(GiB)": 28.47, "step": 2530, "train_speed(iter/s)": 0.430968 }, { "acc": 0.81369648, "epoch": 0.06863780359028511, "grad_norm": 11.259763717651367, "learning_rate": 8.799291803726693e-06, "loss": 0.92218456, "memory(GiB)": 28.47, "step": 2535, "train_speed(iter/s)": 0.430976 }, { "acc": 0.82430553, "epoch": 0.06877318387350066, "grad_norm": 33.8117561340332, "learning_rate": 8.801503927308118e-06, "loss": 0.91729565, "memory(GiB)": 28.47, "step": 2540, "train_speed(iter/s)": 0.430981 }, { "acc": 0.82203236, "epoch": 0.06890856415671621, "grad_norm": 45.171268463134766, "learning_rate": 8.803711700594539e-06, "loss": 0.95732946, "memory(GiB)": 28.47, "step": 2545, "train_speed(iter/s)": 0.430983 }, { "acc": 0.80519638, "epoch": 0.06904394443993177, "grad_norm": 9.604567527770996, "learning_rate": 8.805915140662698e-06, "loss": 0.97321434, "memory(GiB)": 28.47, "step": 2550, "train_speed(iter/s)": 0.430985 }, { "acc": 0.82087345, "epoch": 0.06917932472314732, "grad_norm": 15.21679401397705, "learning_rate": 8.80811426448898e-06, "loss": 0.80896654, "memory(GiB)": 28.47, "step": 2555, "train_speed(iter/s)": 0.430995 }, { "acc": 0.82162485, "epoch": 0.06931470500636287, "grad_norm": 10.009476661682129, "learning_rate": 8.8103090889502e-06, "loss": 0.84714699, "memory(GiB)": 28.47, "step": 2560, "train_speed(iter/s)": 0.431001 }, { "acc": 0.82261295, "epoch": 0.06945008528957843, "grad_norm": 15.694221496582031, "learning_rate": 8.812499630824385e-06, "loss": 0.88283482, "memory(GiB)": 28.47, "step": 2565, "train_speed(iter/s)": 0.431008 }, { "acc": 0.81911278, "epoch": 0.06958546557279398, "grad_norm": 25.705440521240234, "learning_rate": 8.81468590679154e-06, "loss": 1.00264406, "memory(GiB)": 28.47, "step": 2570, "train_speed(iter/s)": 0.431014 }, { "acc": 0.81512165, "epoch": 0.06972084585600953, "grad_norm": 27.352252960205078, "learning_rate": 8.81686793343441e-06, "loss": 0.89466572, "memory(GiB)": 28.47, "step": 2575, "train_speed(iter/s)": 0.431023 }, { "acc": 0.81838598, "epoch": 0.06985622613922508, "grad_norm": 7.153420448303223, "learning_rate": 8.819045727239232e-06, "loss": 0.88405457, "memory(GiB)": 28.47, "step": 2580, "train_speed(iter/s)": 0.431025 }, { "acc": 0.83181839, "epoch": 0.06999160642244064, "grad_norm": 14.299272537231445, "learning_rate": 8.82121930459649e-06, "loss": 0.83911915, "memory(GiB)": 28.47, "step": 2585, "train_speed(iter/s)": 0.431024 }, { "acc": 0.81805658, "epoch": 0.07012698670565619, "grad_norm": 9.955344200134277, "learning_rate": 8.82338868180165e-06, "loss": 0.79815598, "memory(GiB)": 28.47, "step": 2590, "train_speed(iter/s)": 0.431033 }, { "acc": 0.81721516, "epoch": 0.07026236698887174, "grad_norm": 12.539963722229004, "learning_rate": 8.825553875055893e-06, "loss": 0.82713299, "memory(GiB)": 28.47, "step": 2595, "train_speed(iter/s)": 0.431037 }, { "acc": 0.81512823, "epoch": 0.0703977472720873, "grad_norm": 18.96776580810547, "learning_rate": 8.827714900466837e-06, "loss": 0.91754303, "memory(GiB)": 28.47, "step": 2600, "train_speed(iter/s)": 0.431044 }, { "acc": 0.79963369, "epoch": 0.07053312755530285, "grad_norm": 9.58221435546875, "learning_rate": 8.829871774049267e-06, "loss": 1.02038555, "memory(GiB)": 28.47, "step": 2605, "train_speed(iter/s)": 0.431047 }, { "acc": 0.80921507, "epoch": 0.0706685078385184, "grad_norm": 23.338886260986328, "learning_rate": 8.832024511725838e-06, "loss": 0.90368929, "memory(GiB)": 28.47, "step": 2610, "train_speed(iter/s)": 0.431045 }, { "acc": 0.83586006, "epoch": 0.07080388812173395, "grad_norm": 32.23484420776367, "learning_rate": 8.834173129327783e-06, "loss": 0.8181942, "memory(GiB)": 28.47, "step": 2615, "train_speed(iter/s)": 0.43105 }, { "acc": 0.83139782, "epoch": 0.0709392684049495, "grad_norm": 10.279273986816406, "learning_rate": 8.836317642595613e-06, "loss": 0.85570927, "memory(GiB)": 28.47, "step": 2620, "train_speed(iter/s)": 0.431057 }, { "acc": 0.80773754, "epoch": 0.07107464868816506, "grad_norm": 22.503501892089844, "learning_rate": 8.838458067179805e-06, "loss": 1.05634642, "memory(GiB)": 28.47, "step": 2625, "train_speed(iter/s)": 0.431063 }, { "acc": 0.80063505, "epoch": 0.07121002897138061, "grad_norm": 24.759843826293945, "learning_rate": 8.840594418641489e-06, "loss": 0.99728403, "memory(GiB)": 28.47, "step": 2630, "train_speed(iter/s)": 0.431072 }, { "acc": 0.81800919, "epoch": 0.07134540925459616, "grad_norm": 11.000011444091797, "learning_rate": 8.84272671245313e-06, "loss": 0.91886806, "memory(GiB)": 28.47, "step": 2635, "train_speed(iter/s)": 0.431076 }, { "acc": 0.84627218, "epoch": 0.07148078953781171, "grad_norm": 28.413497924804688, "learning_rate": 8.844854963999198e-06, "loss": 0.73226357, "memory(GiB)": 28.47, "step": 2640, "train_speed(iter/s)": 0.431084 }, { "acc": 0.8303834, "epoch": 0.07161616982102727, "grad_norm": 30.853559494018555, "learning_rate": 8.846979188576832e-06, "loss": 0.8196105, "memory(GiB)": 28.47, "step": 2645, "train_speed(iter/s)": 0.431094 }, { "acc": 0.86130905, "epoch": 0.07175155010424282, "grad_norm": 12.217704772949219, "learning_rate": 8.849099401396503e-06, "loss": 0.80446758, "memory(GiB)": 28.47, "step": 2650, "train_speed(iter/s)": 0.431101 }, { "acc": 0.79909315, "epoch": 0.07188693038745837, "grad_norm": 64.60633850097656, "learning_rate": 8.851215617582664e-06, "loss": 1.08217783, "memory(GiB)": 28.47, "step": 2655, "train_speed(iter/s)": 0.431108 }, { "acc": 0.8154644, "epoch": 0.07202231067067393, "grad_norm": 34.744441986083984, "learning_rate": 8.853327852174404e-06, "loss": 0.94103928, "memory(GiB)": 28.47, "step": 2660, "train_speed(iter/s)": 0.431118 }, { "acc": 0.82862425, "epoch": 0.07215769095388948, "grad_norm": 7.920958518981934, "learning_rate": 8.855436120126079e-06, "loss": 0.81420259, "memory(GiB)": 28.47, "step": 2665, "train_speed(iter/s)": 0.431127 }, { "acc": 0.81959753, "epoch": 0.07229307123710503, "grad_norm": 18.235050201416016, "learning_rate": 8.857540436307956e-06, "loss": 0.87505131, "memory(GiB)": 28.47, "step": 2670, "train_speed(iter/s)": 0.431132 }, { "acc": 0.79372702, "epoch": 0.07242845152032058, "grad_norm": 7.3839616775512695, "learning_rate": 8.859640815506835e-06, "loss": 1.07072182, "memory(GiB)": 28.47, "step": 2675, "train_speed(iter/s)": 0.431134 }, { "acc": 0.79401197, "epoch": 0.07256383180353614, "grad_norm": 11.782637596130371, "learning_rate": 8.861737272426687e-06, "loss": 1.12223368, "memory(GiB)": 28.47, "step": 2680, "train_speed(iter/s)": 0.431135 }, { "acc": 0.80870552, "epoch": 0.07269921208675169, "grad_norm": 9.994579315185547, "learning_rate": 8.863829821689251e-06, "loss": 0.92690687, "memory(GiB)": 28.47, "step": 2685, "train_speed(iter/s)": 0.431138 }, { "acc": 0.82145233, "epoch": 0.07283459236996724, "grad_norm": 8.26672649383545, "learning_rate": 8.865918477834659e-06, "loss": 1.00572557, "memory(GiB)": 28.47, "step": 2690, "train_speed(iter/s)": 0.431144 }, { "acc": 0.81252232, "epoch": 0.0729699726531828, "grad_norm": 13.667534828186035, "learning_rate": 8.868003255322041e-06, "loss": 0.93132381, "memory(GiB)": 28.47, "step": 2695, "train_speed(iter/s)": 0.43115 }, { "acc": 0.78855281, "epoch": 0.07310535293639835, "grad_norm": 24.754606246948242, "learning_rate": 8.870084168530123e-06, "loss": 1.11757202, "memory(GiB)": 28.47, "step": 2700, "train_speed(iter/s)": 0.431155 }, { "acc": 0.82505322, "epoch": 0.0732407332196139, "grad_norm": 20.905656814575195, "learning_rate": 8.872161231757823e-06, "loss": 0.95419378, "memory(GiB)": 28.47, "step": 2705, "train_speed(iter/s)": 0.431163 }, { "acc": 0.81339674, "epoch": 0.07337611350282945, "grad_norm": 7.000082492828369, "learning_rate": 8.874234459224843e-06, "loss": 0.91614666, "memory(GiB)": 28.47, "step": 2710, "train_speed(iter/s)": 0.431169 }, { "acc": 0.78504419, "epoch": 0.073511493786045, "grad_norm": 22.092424392700195, "learning_rate": 8.876303865072243e-06, "loss": 1.13096476, "memory(GiB)": 28.47, "step": 2715, "train_speed(iter/s)": 0.431173 }, { "acc": 0.79804139, "epoch": 0.07364687406926056, "grad_norm": 19.701475143432617, "learning_rate": 8.878369463363038e-06, "loss": 0.97763443, "memory(GiB)": 28.47, "step": 2720, "train_speed(iter/s)": 0.431175 }, { "acc": 0.84062366, "epoch": 0.07378225435247611, "grad_norm": 7.86229944229126, "learning_rate": 8.880431268082752e-06, "loss": 0.79188814, "memory(GiB)": 28.47, "step": 2725, "train_speed(iter/s)": 0.431181 }, { "acc": 0.79181461, "epoch": 0.07391763463569166, "grad_norm": 19.20139503479004, "learning_rate": 8.88248929314e-06, "loss": 1.19352322, "memory(GiB)": 28.47, "step": 2730, "train_speed(iter/s)": 0.431186 }, { "acc": 0.80901299, "epoch": 0.07405301491890721, "grad_norm": 11.973570823669434, "learning_rate": 8.884543552367043e-06, "loss": 1.00767593, "memory(GiB)": 28.47, "step": 2735, "train_speed(iter/s)": 0.431195 }, { "acc": 0.823629, "epoch": 0.07418839520212277, "grad_norm": 5.5021443367004395, "learning_rate": 8.886594059520349e-06, "loss": 0.85729179, "memory(GiB)": 28.47, "step": 2740, "train_speed(iter/s)": 0.431197 }, { "acc": 0.83470535, "epoch": 0.07432377548533832, "grad_norm": 12.400824546813965, "learning_rate": 8.88864082828115e-06, "loss": 0.9249527, "memory(GiB)": 28.47, "step": 2745, "train_speed(iter/s)": 0.431206 }, { "acc": 0.8189044, "epoch": 0.07445915576855387, "grad_norm": 10.564435958862305, "learning_rate": 8.890683872255977e-06, "loss": 0.94778852, "memory(GiB)": 28.47, "step": 2750, "train_speed(iter/s)": 0.43121 }, { "acc": 0.81451054, "epoch": 0.07459453605176943, "grad_norm": 26.537595748901367, "learning_rate": 8.89272320497722e-06, "loss": 0.94341908, "memory(GiB)": 28.47, "step": 2755, "train_speed(iter/s)": 0.431218 }, { "acc": 0.83115788, "epoch": 0.07472991633498498, "grad_norm": 16.91650390625, "learning_rate": 8.894758839903647e-06, "loss": 0.93387518, "memory(GiB)": 28.47, "step": 2760, "train_speed(iter/s)": 0.431221 }, { "acc": 0.83130302, "epoch": 0.07486529661820053, "grad_norm": 11.333497047424316, "learning_rate": 8.896790790420955e-06, "loss": 0.84187784, "memory(GiB)": 28.47, "step": 2765, "train_speed(iter/s)": 0.431229 }, { "acc": 0.83073416, "epoch": 0.07500067690141608, "grad_norm": 11.145787239074707, "learning_rate": 8.898819069842285e-06, "loss": 0.88293228, "memory(GiB)": 28.47, "step": 2770, "train_speed(iter/s)": 0.431237 }, { "acc": 0.79217439, "epoch": 0.07513605718463164, "grad_norm": 22.60525131225586, "learning_rate": 8.900843691408752e-06, "loss": 1.11787491, "memory(GiB)": 28.47, "step": 2775, "train_speed(iter/s)": 0.431242 }, { "acc": 0.82214737, "epoch": 0.07527143746784719, "grad_norm": 7.475882530212402, "learning_rate": 8.902864668289958e-06, "loss": 0.85444756, "memory(GiB)": 28.47, "step": 2780, "train_speed(iter/s)": 0.431249 }, { "acc": 0.83979626, "epoch": 0.07540681775106274, "grad_norm": 11.898143768310547, "learning_rate": 8.904882013584519e-06, "loss": 0.95335388, "memory(GiB)": 28.47, "step": 2785, "train_speed(iter/s)": 0.431258 }, { "acc": 0.81884594, "epoch": 0.0755421980342783, "grad_norm": 15.565675735473633, "learning_rate": 8.906895740320554e-06, "loss": 0.86152983, "memory(GiB)": 28.47, "step": 2790, "train_speed(iter/s)": 0.431262 }, { "acc": 0.81293221, "epoch": 0.07567757831749385, "grad_norm": 16.394723892211914, "learning_rate": 8.908905861456208e-06, "loss": 0.95874987, "memory(GiB)": 28.47, "step": 2795, "train_speed(iter/s)": 0.431262 }, { "acc": 0.81054783, "epoch": 0.0758129586007094, "grad_norm": 15.273553848266602, "learning_rate": 8.910912389880142e-06, "loss": 0.96100864, "memory(GiB)": 28.47, "step": 2800, "train_speed(iter/s)": 0.431265 }, { "acc": 0.78782644, "epoch": 0.07594833888392495, "grad_norm": 32.06568908691406, "learning_rate": 8.912915338412036e-06, "loss": 1.12596531, "memory(GiB)": 28.47, "step": 2805, "train_speed(iter/s)": 0.431267 }, { "acc": 0.83803406, "epoch": 0.0760837191671405, "grad_norm": 18.084890365600586, "learning_rate": 8.914914719803072e-06, "loss": 0.74794979, "memory(GiB)": 28.47, "step": 2810, "train_speed(iter/s)": 0.431273 }, { "acc": 0.82365322, "epoch": 0.07621909945035606, "grad_norm": 5.17512321472168, "learning_rate": 8.916910546736427e-06, "loss": 0.86733732, "memory(GiB)": 28.47, "step": 2815, "train_speed(iter/s)": 0.431277 }, { "acc": 0.82236528, "epoch": 0.07635447973357161, "grad_norm": 9.392663955688477, "learning_rate": 8.918902831827755e-06, "loss": 0.91554871, "memory(GiB)": 28.47, "step": 2820, "train_speed(iter/s)": 0.431276 }, { "acc": 0.82289457, "epoch": 0.07648986001678716, "grad_norm": 13.453300476074219, "learning_rate": 8.920891587625666e-06, "loss": 0.90888786, "memory(GiB)": 28.47, "step": 2825, "train_speed(iter/s)": 0.43128 }, { "acc": 0.81461821, "epoch": 0.07662524030000271, "grad_norm": 12.27838134765625, "learning_rate": 8.922876826612199e-06, "loss": 0.91008329, "memory(GiB)": 28.47, "step": 2830, "train_speed(iter/s)": 0.431283 }, { "acc": 0.79450951, "epoch": 0.07676062058321827, "grad_norm": 18.266305923461914, "learning_rate": 8.924858561203287e-06, "loss": 1.11482563, "memory(GiB)": 28.47, "step": 2835, "train_speed(iter/s)": 0.431288 }, { "acc": 0.81499338, "epoch": 0.0768960008664338, "grad_norm": 45.245513916015625, "learning_rate": 8.926836803749234e-06, "loss": 1.01818438, "memory(GiB)": 28.47, "step": 2840, "train_speed(iter/s)": 0.431285 }, { "acc": 0.82137375, "epoch": 0.07703138114964936, "grad_norm": 6.982784271240234, "learning_rate": 8.928811566535168e-06, "loss": 0.8506424, "memory(GiB)": 28.47, "step": 2845, "train_speed(iter/s)": 0.431294 }, { "acc": 0.80876923, "epoch": 0.07716676143286491, "grad_norm": 16.396705627441406, "learning_rate": 8.930782861781504e-06, "loss": 0.94714193, "memory(GiB)": 28.47, "step": 2850, "train_speed(iter/s)": 0.431298 }, { "acc": 0.82868872, "epoch": 0.07730214171608046, "grad_norm": 21.551790237426758, "learning_rate": 8.932750701644394e-06, "loss": 0.87177734, "memory(GiB)": 28.47, "step": 2855, "train_speed(iter/s)": 0.431298 }, { "acc": 0.82258568, "epoch": 0.07743752199929602, "grad_norm": 10.918010711669922, "learning_rate": 8.934715098216174e-06, "loss": 0.84734039, "memory(GiB)": 28.47, "step": 2860, "train_speed(iter/s)": 0.4313 }, { "acc": 0.80825768, "epoch": 0.07757290228251157, "grad_norm": 9.169697761535645, "learning_rate": 8.93667606352582e-06, "loss": 1.02863998, "memory(GiB)": 28.47, "step": 2865, "train_speed(iter/s)": 0.431306 }, { "acc": 0.82812452, "epoch": 0.07770828256572712, "grad_norm": 17.518320083618164, "learning_rate": 8.938633609539384e-06, "loss": 0.89213123, "memory(GiB)": 28.47, "step": 2870, "train_speed(iter/s)": 0.431313 }, { "acc": 0.82832298, "epoch": 0.07784366284894267, "grad_norm": 44.05882263183594, "learning_rate": 8.94058774816043e-06, "loss": 1.00385265, "memory(GiB)": 28.47, "step": 2875, "train_speed(iter/s)": 0.43131 }, { "acc": 0.80864449, "epoch": 0.07797904313215823, "grad_norm": 28.270843505859375, "learning_rate": 8.942538491230463e-06, "loss": 1.05024376, "memory(GiB)": 28.47, "step": 2880, "train_speed(iter/s)": 0.43131 }, { "acc": 0.82998543, "epoch": 0.07811442341537378, "grad_norm": 14.588701248168945, "learning_rate": 8.94448585052938e-06, "loss": 0.91885967, "memory(GiB)": 28.47, "step": 2885, "train_speed(iter/s)": 0.431314 }, { "acc": 0.82218838, "epoch": 0.07824980369858933, "grad_norm": 27.965560913085938, "learning_rate": 8.946429837775877e-06, "loss": 0.85964069, "memory(GiB)": 28.47, "step": 2890, "train_speed(iter/s)": 0.431321 }, { "acc": 0.80451584, "epoch": 0.07838518398180488, "grad_norm": 19.95863151550293, "learning_rate": 8.948370464627879e-06, "loss": 0.9506361, "memory(GiB)": 28.47, "step": 2895, "train_speed(iter/s)": 0.431326 }, { "acc": 0.80571346, "epoch": 0.07852056426502044, "grad_norm": 33.836463928222656, "learning_rate": 8.95030774268296e-06, "loss": 1.03719769, "memory(GiB)": 28.47, "step": 2900, "train_speed(iter/s)": 0.431335 }, { "acc": 0.83114223, "epoch": 0.07865594454823599, "grad_norm": 61.441917419433594, "learning_rate": 8.952241683478755e-06, "loss": 0.84346352, "memory(GiB)": 28.47, "step": 2905, "train_speed(iter/s)": 0.431338 }, { "acc": 0.83482723, "epoch": 0.07879132483145154, "grad_norm": 38.36819076538086, "learning_rate": 8.954172298493382e-06, "loss": 0.77468362, "memory(GiB)": 28.47, "step": 2910, "train_speed(iter/s)": 0.431345 }, { "acc": 0.81372738, "epoch": 0.0789267051146671, "grad_norm": 12.429166793823242, "learning_rate": 8.956099599145841e-06, "loss": 0.9987915, "memory(GiB)": 28.47, "step": 2915, "train_speed(iter/s)": 0.431346 }, { "acc": 0.83520107, "epoch": 0.07906208539788265, "grad_norm": 15.819997787475586, "learning_rate": 8.958023596796419e-06, "loss": 0.84467554, "memory(GiB)": 28.47, "step": 2920, "train_speed(iter/s)": 0.431346 }, { "acc": 0.78032069, "epoch": 0.0791974656810982, "grad_norm": 11.17618179321289, "learning_rate": 8.959944302747101e-06, "loss": 1.21726675, "memory(GiB)": 28.47, "step": 2925, "train_speed(iter/s)": 0.431348 }, { "acc": 0.81083698, "epoch": 0.07933284596431375, "grad_norm": 15.237430572509766, "learning_rate": 8.961861728241956e-06, "loss": 0.89560003, "memory(GiB)": 28.47, "step": 2930, "train_speed(iter/s)": 0.431352 }, { "acc": 0.83204346, "epoch": 0.0794682262475293, "grad_norm": 12.549101829528809, "learning_rate": 8.963775884467547e-06, "loss": 0.82426815, "memory(GiB)": 28.47, "step": 2935, "train_speed(iter/s)": 0.431358 }, { "acc": 0.84580803, "epoch": 0.07960360653074486, "grad_norm": 9.356401443481445, "learning_rate": 8.965686782553307e-06, "loss": 0.73988972, "memory(GiB)": 28.47, "step": 2940, "train_speed(iter/s)": 0.431362 }, { "acc": 0.82389078, "epoch": 0.07973898681396041, "grad_norm": 28.02204704284668, "learning_rate": 8.967594433571936e-06, "loss": 0.92465916, "memory(GiB)": 28.47, "step": 2945, "train_speed(iter/s)": 0.431369 }, { "acc": 0.8141449, "epoch": 0.07987436709717596, "grad_norm": 12.954010009765625, "learning_rate": 8.969498848539786e-06, "loss": 0.94875698, "memory(GiB)": 28.47, "step": 2950, "train_speed(iter/s)": 0.431372 }, { "acc": 0.80879879, "epoch": 0.08000974738039152, "grad_norm": 29.716812133789062, "learning_rate": 8.971400038417239e-06, "loss": 0.97067986, "memory(GiB)": 28.47, "step": 2955, "train_speed(iter/s)": 0.431376 }, { "acc": 0.82064819, "epoch": 0.08014512766360707, "grad_norm": 28.912813186645508, "learning_rate": 8.973298014109091e-06, "loss": 0.9645525, "memory(GiB)": 28.47, "step": 2960, "train_speed(iter/s)": 0.431386 }, { "acc": 0.79236517, "epoch": 0.08028050794682262, "grad_norm": 9.155458450317383, "learning_rate": 8.975192786464918e-06, "loss": 1.0466176, "memory(GiB)": 28.47, "step": 2965, "train_speed(iter/s)": 0.431392 }, { "acc": 0.78360643, "epoch": 0.08041588823003817, "grad_norm": 30.95037078857422, "learning_rate": 8.977084366279461e-06, "loss": 1.17900791, "memory(GiB)": 28.47, "step": 2970, "train_speed(iter/s)": 0.431396 }, { "acc": 0.80006695, "epoch": 0.08055126851325373, "grad_norm": 9.580492973327637, "learning_rate": 8.978972764292982e-06, "loss": 1.0403986, "memory(GiB)": 28.47, "step": 2975, "train_speed(iter/s)": 0.431394 }, { "acc": 0.82250271, "epoch": 0.08068664879646928, "grad_norm": 22.953153610229492, "learning_rate": 8.980857991191639e-06, "loss": 0.89744301, "memory(GiB)": 28.47, "step": 2980, "train_speed(iter/s)": 0.431396 }, { "acc": 0.80724087, "epoch": 0.08082202907968483, "grad_norm": 16.709693908691406, "learning_rate": 8.982740057607843e-06, "loss": 0.87957268, "memory(GiB)": 28.47, "step": 2985, "train_speed(iter/s)": 0.431398 }, { "acc": 0.8156683, "epoch": 0.08095740936290038, "grad_norm": 11.775604248046875, "learning_rate": 8.984618974120625e-06, "loss": 0.89344883, "memory(GiB)": 28.47, "step": 2990, "train_speed(iter/s)": 0.431404 }, { "acc": 0.81834755, "epoch": 0.08109278964611594, "grad_norm": 15.291888236999512, "learning_rate": 8.986494751255982e-06, "loss": 0.88699875, "memory(GiB)": 28.47, "step": 2995, "train_speed(iter/s)": 0.431408 }, { "acc": 0.84791107, "epoch": 0.08122816992933149, "grad_norm": 11.42262077331543, "learning_rate": 8.988367399487242e-06, "loss": 0.67445111, "memory(GiB)": 28.47, "step": 3000, "train_speed(iter/s)": 0.431414 }, { "acc": 0.8289505, "epoch": 0.08136355021254704, "grad_norm": 7.734365463256836, "learning_rate": 8.99023692923541e-06, "loss": 0.81006718, "memory(GiB)": 28.47, "step": 3005, "train_speed(iter/s)": 0.431421 }, { "acc": 0.79492245, "epoch": 0.0814989304957626, "grad_norm": 37.19353485107422, "learning_rate": 8.992103350869515e-06, "loss": 1.01479397, "memory(GiB)": 28.47, "step": 3010, "train_speed(iter/s)": 0.431429 }, { "acc": 0.82623425, "epoch": 0.08163431077897815, "grad_norm": 10.71951961517334, "learning_rate": 8.993966674706951e-06, "loss": 0.74351072, "memory(GiB)": 28.47, "step": 3015, "train_speed(iter/s)": 0.431428 }, { "acc": 0.86120539, "epoch": 0.0817696910621937, "grad_norm": 6.974207401275635, "learning_rate": 8.99582691101383e-06, "loss": 0.65452089, "memory(GiB)": 28.47, "step": 3020, "train_speed(iter/s)": 0.431433 }, { "acc": 0.83270016, "epoch": 0.08190507134540925, "grad_norm": 11.531371116638184, "learning_rate": 8.997684070005316e-06, "loss": 0.87507076, "memory(GiB)": 28.47, "step": 3025, "train_speed(iter/s)": 0.431437 }, { "acc": 0.83184414, "epoch": 0.0820404516286248, "grad_norm": 9.42324447631836, "learning_rate": 8.999538161845956e-06, "loss": 0.87158613, "memory(GiB)": 28.47, "step": 3030, "train_speed(iter/s)": 0.431438 }, { "acc": 0.81966286, "epoch": 0.08217583191184036, "grad_norm": 13.800521850585938, "learning_rate": 9.001389196650025e-06, "loss": 0.93307896, "memory(GiB)": 28.47, "step": 3035, "train_speed(iter/s)": 0.431444 }, { "acc": 0.82377872, "epoch": 0.08231121219505591, "grad_norm": 8.93074893951416, "learning_rate": 9.003237184481843e-06, "loss": 0.88135481, "memory(GiB)": 28.47, "step": 3040, "train_speed(iter/s)": 0.43145 }, { "acc": 0.83126459, "epoch": 0.08244659247827146, "grad_norm": 39.19841384887695, "learning_rate": 9.005082135356121e-06, "loss": 0.8382163, "memory(GiB)": 28.47, "step": 3045, "train_speed(iter/s)": 0.431452 }, { "acc": 0.8315093, "epoch": 0.08258197276148702, "grad_norm": 31.328760147094727, "learning_rate": 9.00692405923827e-06, "loss": 0.85633116, "memory(GiB)": 28.47, "step": 3050, "train_speed(iter/s)": 0.431457 }, { "acc": 0.83014584, "epoch": 0.08271735304470257, "grad_norm": 11.068338394165039, "learning_rate": 9.008762966044732e-06, "loss": 0.91452169, "memory(GiB)": 28.47, "step": 3055, "train_speed(iter/s)": 0.43146 }, { "acc": 0.81256208, "epoch": 0.08285273332791812, "grad_norm": 17.299701690673828, "learning_rate": 9.0105988656433e-06, "loss": 0.9861145, "memory(GiB)": 28.47, "step": 3060, "train_speed(iter/s)": 0.431464 }, { "acc": 0.81672211, "epoch": 0.08298811361113367, "grad_norm": 11.301126480102539, "learning_rate": 9.012431767853439e-06, "loss": 0.92810936, "memory(GiB)": 28.47, "step": 3065, "train_speed(iter/s)": 0.431469 }, { "acc": 0.82986889, "epoch": 0.08312349389434923, "grad_norm": 18.470251083374023, "learning_rate": 9.014261682446588e-06, "loss": 0.85862141, "memory(GiB)": 28.47, "step": 3070, "train_speed(iter/s)": 0.431473 }, { "acc": 0.81631641, "epoch": 0.08325887417756478, "grad_norm": 21.398794174194336, "learning_rate": 9.016088619146487e-06, "loss": 0.90867224, "memory(GiB)": 28.47, "step": 3075, "train_speed(iter/s)": 0.431475 }, { "acc": 0.82550306, "epoch": 0.08339425446078033, "grad_norm": 5.936622142791748, "learning_rate": 9.01791258762948e-06, "loss": 0.89195518, "memory(GiB)": 28.47, "step": 3080, "train_speed(iter/s)": 0.431483 }, { "acc": 0.80674753, "epoch": 0.08352963474399588, "grad_norm": 19.803560256958008, "learning_rate": 9.019733597524832e-06, "loss": 0.89707355, "memory(GiB)": 28.47, "step": 3085, "train_speed(iter/s)": 0.431485 }, { "acc": 0.82085304, "epoch": 0.08366501502721144, "grad_norm": 18.766925811767578, "learning_rate": 9.021551658415011e-06, "loss": 1.01489935, "memory(GiB)": 28.47, "step": 3090, "train_speed(iter/s)": 0.431485 }, { "acc": 0.81138744, "epoch": 0.08380039531042699, "grad_norm": 45.61198043823242, "learning_rate": 9.02336677983602e-06, "loss": 0.97963371, "memory(GiB)": 28.47, "step": 3095, "train_speed(iter/s)": 0.431484 }, { "acc": 0.85864372, "epoch": 0.08393577559364254, "grad_norm": 6.416590690612793, "learning_rate": 9.025178971277674e-06, "loss": 0.69125834, "memory(GiB)": 28.47, "step": 3100, "train_speed(iter/s)": 0.431488 }, { "acc": 0.84414091, "epoch": 0.0840711558768581, "grad_norm": 6.638707160949707, "learning_rate": 9.02698824218391e-06, "loss": 0.72995052, "memory(GiB)": 28.47, "step": 3105, "train_speed(iter/s)": 0.431496 }, { "acc": 0.82286959, "epoch": 0.08420653616007365, "grad_norm": 15.538666725158691, "learning_rate": 9.02879460195308e-06, "loss": 0.85337925, "memory(GiB)": 28.47, "step": 3110, "train_speed(iter/s)": 0.4315 }, { "acc": 0.82719374, "epoch": 0.0843419164432892, "grad_norm": 20.71246910095215, "learning_rate": 9.030598059938237e-06, "loss": 0.97087698, "memory(GiB)": 28.47, "step": 3115, "train_speed(iter/s)": 0.431504 }, { "acc": 0.79011269, "epoch": 0.08447729672650475, "grad_norm": 9.661642074584961, "learning_rate": 9.03239862544744e-06, "loss": 1.01400003, "memory(GiB)": 28.47, "step": 3120, "train_speed(iter/s)": 0.431506 }, { "acc": 0.80195236, "epoch": 0.0846126770097203, "grad_norm": 16.73030662536621, "learning_rate": 9.034196307744024e-06, "loss": 1.00635281, "memory(GiB)": 28.47, "step": 3125, "train_speed(iter/s)": 0.431508 }, { "acc": 0.82509594, "epoch": 0.08474805729293586, "grad_norm": 10.775908470153809, "learning_rate": 9.035991116046904e-06, "loss": 0.87817974, "memory(GiB)": 28.47, "step": 3130, "train_speed(iter/s)": 0.431506 }, { "acc": 0.83254585, "epoch": 0.08488343757615141, "grad_norm": 12.696739196777344, "learning_rate": 9.037783059530843e-06, "loss": 0.84259005, "memory(GiB)": 28.47, "step": 3135, "train_speed(iter/s)": 0.431514 }, { "acc": 0.80366154, "epoch": 0.08501881785936696, "grad_norm": 8.651960372924805, "learning_rate": 9.039572147326747e-06, "loss": 0.9980154, "memory(GiB)": 28.47, "step": 3140, "train_speed(iter/s)": 0.431518 }, { "acc": 0.81169758, "epoch": 0.08515419814258252, "grad_norm": 19.782649993896484, "learning_rate": 9.041358388521929e-06, "loss": 1.01406717, "memory(GiB)": 28.47, "step": 3145, "train_speed(iter/s)": 0.431523 }, { "acc": 0.78806891, "epoch": 0.08528957842579807, "grad_norm": 33.25545120239258, "learning_rate": 9.043141792160407e-06, "loss": 1.17294712, "memory(GiB)": 28.47, "step": 3150, "train_speed(iter/s)": 0.431527 }, { "acc": 0.80820456, "epoch": 0.08542495870901362, "grad_norm": 10.538063049316406, "learning_rate": 9.044922367243156e-06, "loss": 1.01403942, "memory(GiB)": 28.47, "step": 3155, "train_speed(iter/s)": 0.431531 }, { "acc": 0.82419109, "epoch": 0.08556033899222917, "grad_norm": 12.51890754699707, "learning_rate": 9.046700122728395e-06, "loss": 0.87169867, "memory(GiB)": 28.47, "step": 3160, "train_speed(iter/s)": 0.431534 }, { "acc": 0.80725937, "epoch": 0.08569571927544473, "grad_norm": 19.58367156982422, "learning_rate": 9.048475067531853e-06, "loss": 0.91900654, "memory(GiB)": 28.47, "step": 3165, "train_speed(iter/s)": 0.431537 }, { "acc": 0.82341805, "epoch": 0.08583109955866028, "grad_norm": 14.400102615356445, "learning_rate": 9.05024721052704e-06, "loss": 0.93691473, "memory(GiB)": 28.47, "step": 3170, "train_speed(iter/s)": 0.431536 }, { "acc": 0.82499533, "epoch": 0.08596647984187583, "grad_norm": 17.017065048217773, "learning_rate": 9.052016560545503e-06, "loss": 0.84892979, "memory(GiB)": 28.47, "step": 3175, "train_speed(iter/s)": 0.431536 }, { "acc": 0.80796804, "epoch": 0.08610186012509138, "grad_norm": 48.15336990356445, "learning_rate": 9.053783126377105e-06, "loss": 0.96746912, "memory(GiB)": 28.47, "step": 3180, "train_speed(iter/s)": 0.431539 }, { "acc": 0.82819328, "epoch": 0.08623724040830694, "grad_norm": 9.356096267700195, "learning_rate": 9.055546916770285e-06, "loss": 0.81308784, "memory(GiB)": 28.47, "step": 3185, "train_speed(iter/s)": 0.431542 }, { "acc": 0.83487911, "epoch": 0.08637262069152249, "grad_norm": 6.716773986816406, "learning_rate": 9.057307940432297e-06, "loss": 0.75717573, "memory(GiB)": 28.47, "step": 3190, "train_speed(iter/s)": 0.431548 }, { "acc": 0.81624393, "epoch": 0.08650800097473804, "grad_norm": 9.451478958129883, "learning_rate": 9.059066206029499e-06, "loss": 0.89361267, "memory(GiB)": 28.47, "step": 3195, "train_speed(iter/s)": 0.43155 }, { "acc": 0.8298975, "epoch": 0.0866433812579536, "grad_norm": 8.692720413208008, "learning_rate": 9.060821722187584e-06, "loss": 0.89284134, "memory(GiB)": 28.47, "step": 3200, "train_speed(iter/s)": 0.431551 }, { "acc": 0.80829744, "epoch": 0.08677876154116915, "grad_norm": 45.83414840698242, "learning_rate": 9.062574497491848e-06, "loss": 0.94415874, "memory(GiB)": 28.47, "step": 3205, "train_speed(iter/s)": 0.431552 }, { "acc": 0.82528715, "epoch": 0.0869141418243847, "grad_norm": 12.147252082824707, "learning_rate": 9.064324540487437e-06, "loss": 0.88629694, "memory(GiB)": 28.47, "step": 3210, "train_speed(iter/s)": 0.431557 }, { "acc": 0.81701775, "epoch": 0.08704952210760025, "grad_norm": 10.814788818359375, "learning_rate": 9.066071859679603e-06, "loss": 0.84520512, "memory(GiB)": 28.47, "step": 3215, "train_speed(iter/s)": 0.431562 }, { "acc": 0.79052019, "epoch": 0.0871849023908158, "grad_norm": 35.72739028930664, "learning_rate": 9.06781646353393e-06, "loss": 1.1185667, "memory(GiB)": 28.47, "step": 3220, "train_speed(iter/s)": 0.431568 }, { "acc": 0.82128735, "epoch": 0.08732028267403136, "grad_norm": 8.872923851013184, "learning_rate": 9.069558360476615e-06, "loss": 0.87412395, "memory(GiB)": 28.47, "step": 3225, "train_speed(iter/s)": 0.431566 }, { "acc": 0.81161156, "epoch": 0.08745566295724691, "grad_norm": 10.970698356628418, "learning_rate": 9.071297558894682e-06, "loss": 0.93665714, "memory(GiB)": 28.47, "step": 3230, "train_speed(iter/s)": 0.43157 }, { "acc": 0.82866879, "epoch": 0.08759104324046246, "grad_norm": 16.96310043334961, "learning_rate": 9.073034067136244e-06, "loss": 0.80322857, "memory(GiB)": 28.47, "step": 3235, "train_speed(iter/s)": 0.431573 }, { "acc": 0.83238392, "epoch": 0.08772642352367802, "grad_norm": 26.879589080810547, "learning_rate": 9.074767893510727e-06, "loss": 0.85641689, "memory(GiB)": 28.47, "step": 3240, "train_speed(iter/s)": 0.431578 }, { "acc": 0.82184849, "epoch": 0.08786180380689357, "grad_norm": 24.353321075439453, "learning_rate": 9.076499046289123e-06, "loss": 0.85195198, "memory(GiB)": 28.47, "step": 3245, "train_speed(iter/s)": 0.43158 }, { "acc": 0.81434574, "epoch": 0.08799718409010912, "grad_norm": 15.307036399841309, "learning_rate": 9.07822753370422e-06, "loss": 0.90711393, "memory(GiB)": 28.47, "step": 3250, "train_speed(iter/s)": 0.431581 }, { "acc": 0.77674427, "epoch": 0.08813256437332467, "grad_norm": 19.62494659423828, "learning_rate": 9.079953363950837e-06, "loss": 1.1252182, "memory(GiB)": 28.47, "step": 3255, "train_speed(iter/s)": 0.431569 }, { "acc": 0.84642038, "epoch": 0.08826794465654023, "grad_norm": 12.757472038269043, "learning_rate": 9.081676545186059e-06, "loss": 0.79834976, "memory(GiB)": 28.47, "step": 3260, "train_speed(iter/s)": 0.431573 }, { "acc": 0.79841461, "epoch": 0.08840332493975578, "grad_norm": 9.521628379821777, "learning_rate": 9.083397085529462e-06, "loss": 1.01657124, "memory(GiB)": 28.47, "step": 3265, "train_speed(iter/s)": 0.43158 }, { "acc": 0.79885087, "epoch": 0.08853870522297133, "grad_norm": 27.194194793701172, "learning_rate": 9.085114993063356e-06, "loss": 1.05547132, "memory(GiB)": 28.47, "step": 3270, "train_speed(iter/s)": 0.431584 }, { "acc": 0.84199963, "epoch": 0.08867408550618688, "grad_norm": 20.292724609375, "learning_rate": 9.086830275832996e-06, "loss": 0.82376642, "memory(GiB)": 28.47, "step": 3275, "train_speed(iter/s)": 0.431587 }, { "acc": 0.81402588, "epoch": 0.08880946578940244, "grad_norm": 6.22076416015625, "learning_rate": 9.088542941846826e-06, "loss": 0.9679266, "memory(GiB)": 28.47, "step": 3280, "train_speed(iter/s)": 0.431583 }, { "acc": 0.82987576, "epoch": 0.08894484607261799, "grad_norm": 12.65290641784668, "learning_rate": 9.090252999076683e-06, "loss": 0.83650227, "memory(GiB)": 28.47, "step": 3285, "train_speed(iter/s)": 0.431587 }, { "acc": 0.81228657, "epoch": 0.08908022635583354, "grad_norm": 15.792572021484375, "learning_rate": 9.09196045545804e-06, "loss": 0.96165028, "memory(GiB)": 28.47, "step": 3290, "train_speed(iter/s)": 0.431592 }, { "acc": 0.81661072, "epoch": 0.0892156066390491, "grad_norm": 7.06252384185791, "learning_rate": 9.09366531889021e-06, "loss": 0.98581257, "memory(GiB)": 28.47, "step": 3295, "train_speed(iter/s)": 0.431596 }, { "acc": 0.82484426, "epoch": 0.08935098692226465, "grad_norm": 7.9904351234436035, "learning_rate": 9.095367597236583e-06, "loss": 0.93125124, "memory(GiB)": 28.47, "step": 3300, "train_speed(iter/s)": 0.431592 }, { "acc": 0.81871157, "epoch": 0.0894863672054802, "grad_norm": 11.014958381652832, "learning_rate": 9.097067298324817e-06, "loss": 0.92838936, "memory(GiB)": 28.47, "step": 3305, "train_speed(iter/s)": 0.431595 }, { "acc": 0.83837147, "epoch": 0.08962174748869575, "grad_norm": 23.706382751464844, "learning_rate": 9.098764429947088e-06, "loss": 0.82102041, "memory(GiB)": 28.47, "step": 3310, "train_speed(iter/s)": 0.431601 }, { "acc": 0.81348619, "epoch": 0.0897571277719113, "grad_norm": 28.516008377075195, "learning_rate": 9.100458999860278e-06, "loss": 0.93661518, "memory(GiB)": 28.47, "step": 3315, "train_speed(iter/s)": 0.431604 }, { "acc": 0.80912943, "epoch": 0.08989250805512686, "grad_norm": 15.154059410095215, "learning_rate": 9.102151015786195e-06, "loss": 1.00530195, "memory(GiB)": 28.47, "step": 3320, "train_speed(iter/s)": 0.431605 }, { "acc": 0.80639715, "epoch": 0.09002788833834241, "grad_norm": 14.807502746582031, "learning_rate": 9.103840485411789e-06, "loss": 0.93808165, "memory(GiB)": 28.47, "step": 3325, "train_speed(iter/s)": 0.431605 }, { "acc": 0.81663694, "epoch": 0.09016326862155796, "grad_norm": 14.538613319396973, "learning_rate": 9.105527416389354e-06, "loss": 0.93946724, "memory(GiB)": 28.47, "step": 3330, "train_speed(iter/s)": 0.431607 }, { "acc": 0.81031742, "epoch": 0.09029864890477352, "grad_norm": 16.279258728027344, "learning_rate": 9.107211816336746e-06, "loss": 0.99396591, "memory(GiB)": 28.47, "step": 3335, "train_speed(iter/s)": 0.431615 }, { "acc": 0.83331642, "epoch": 0.09043402918798907, "grad_norm": 7.02159309387207, "learning_rate": 9.10889369283758e-06, "loss": 0.78708162, "memory(GiB)": 28.47, "step": 3340, "train_speed(iter/s)": 0.431618 }, { "acc": 0.81469288, "epoch": 0.09056940947120462, "grad_norm": 16.141708374023438, "learning_rate": 9.110573053441436e-06, "loss": 0.98655586, "memory(GiB)": 28.47, "step": 3345, "train_speed(iter/s)": 0.431624 }, { "acc": 0.80464449, "epoch": 0.09070478975442017, "grad_norm": 56.359771728515625, "learning_rate": 9.11224990566407e-06, "loss": 0.92397671, "memory(GiB)": 28.47, "step": 3350, "train_speed(iter/s)": 0.431623 }, { "acc": 0.83918018, "epoch": 0.09084017003763571, "grad_norm": 11.204062461853027, "learning_rate": 9.113924256987609e-06, "loss": 0.79078054, "memory(GiB)": 28.47, "step": 3355, "train_speed(iter/s)": 0.43163 }, { "acc": 0.8161293, "epoch": 0.09097555032085126, "grad_norm": 9.31067180633545, "learning_rate": 9.115596114860744e-06, "loss": 0.87964001, "memory(GiB)": 28.47, "step": 3360, "train_speed(iter/s)": 0.431635 }, { "acc": 0.80610151, "epoch": 0.09111093060406682, "grad_norm": 7.933526515960693, "learning_rate": 9.117265486698956e-06, "loss": 0.99837952, "memory(GiB)": 28.47, "step": 3365, "train_speed(iter/s)": 0.431638 }, { "acc": 0.8486928, "epoch": 0.09124631088728237, "grad_norm": 21.243227005004883, "learning_rate": 9.118932379884674e-06, "loss": 0.72663817, "memory(GiB)": 28.47, "step": 3370, "train_speed(iter/s)": 0.431644 }, { "acc": 0.82085686, "epoch": 0.09138169117049792, "grad_norm": 49.3696174621582, "learning_rate": 9.120596801767508e-06, "loss": 0.91568155, "memory(GiB)": 28.47, "step": 3375, "train_speed(iter/s)": 0.431647 }, { "acc": 0.80837154, "epoch": 0.09151707145371347, "grad_norm": 21.098657608032227, "learning_rate": 9.122258759664417e-06, "loss": 1.021556, "memory(GiB)": 28.47, "step": 3380, "train_speed(iter/s)": 0.431649 }, { "acc": 0.84846973, "epoch": 0.09165245173692903, "grad_norm": 8.473694801330566, "learning_rate": 9.123918260859919e-06, "loss": 0.73436375, "memory(GiB)": 28.47, "step": 3385, "train_speed(iter/s)": 0.431649 }, { "acc": 0.81481676, "epoch": 0.09178783202014458, "grad_norm": 8.486807823181152, "learning_rate": 9.12557531260627e-06, "loss": 0.96235046, "memory(GiB)": 28.47, "step": 3390, "train_speed(iter/s)": 0.431654 }, { "acc": 0.83131161, "epoch": 0.09192321230336013, "grad_norm": 22.999073028564453, "learning_rate": 9.127229922123665e-06, "loss": 0.8963953, "memory(GiB)": 28.47, "step": 3395, "train_speed(iter/s)": 0.431658 }, { "acc": 0.79909286, "epoch": 0.09205859258657569, "grad_norm": 25.230846405029297, "learning_rate": 9.128882096600422e-06, "loss": 1.01107292, "memory(GiB)": 28.47, "step": 3400, "train_speed(iter/s)": 0.431663 }, { "acc": 0.8447298, "epoch": 0.09219397286979124, "grad_norm": 6.786551475524902, "learning_rate": 9.130531843193164e-06, "loss": 0.78078423, "memory(GiB)": 28.47, "step": 3405, "train_speed(iter/s)": 0.431665 }, { "acc": 0.83201752, "epoch": 0.09232935315300679, "grad_norm": 14.738727569580078, "learning_rate": 9.132179169027012e-06, "loss": 0.80591679, "memory(GiB)": 28.47, "step": 3410, "train_speed(iter/s)": 0.431667 }, { "acc": 0.82311821, "epoch": 0.09246473343622234, "grad_norm": 57.21674728393555, "learning_rate": 9.133824081195771e-06, "loss": 0.79725733, "memory(GiB)": 28.47, "step": 3415, "train_speed(iter/s)": 0.431671 }, { "acc": 0.83156681, "epoch": 0.0926001137194379, "grad_norm": 10.519028663635254, "learning_rate": 9.135466586762107e-06, "loss": 0.7586494, "memory(GiB)": 28.47, "step": 3420, "train_speed(iter/s)": 0.431677 }, { "acc": 0.81074791, "epoch": 0.09273549400265345, "grad_norm": 8.507997512817383, "learning_rate": 9.137106692757733e-06, "loss": 0.94015503, "memory(GiB)": 28.47, "step": 3425, "train_speed(iter/s)": 0.431683 }, { "acc": 0.82251091, "epoch": 0.092870874285869, "grad_norm": 6.07591438293457, "learning_rate": 9.138744406183589e-06, "loss": 0.93427544, "memory(GiB)": 28.47, "step": 3430, "train_speed(iter/s)": 0.431687 }, { "acc": 0.8172945, "epoch": 0.09300625456908455, "grad_norm": 6.450868129730225, "learning_rate": 9.14037973401002e-06, "loss": 0.97545643, "memory(GiB)": 28.47, "step": 3435, "train_speed(iter/s)": 0.43169 }, { "acc": 0.83739309, "epoch": 0.0931416348523001, "grad_norm": 13.044450759887695, "learning_rate": 9.142012683176956e-06, "loss": 0.79540777, "memory(GiB)": 28.47, "step": 3440, "train_speed(iter/s)": 0.431695 }, { "acc": 0.82154465, "epoch": 0.09327701513551566, "grad_norm": 12.832244873046875, "learning_rate": 9.143643260594083e-06, "loss": 1.00897694, "memory(GiB)": 28.47, "step": 3445, "train_speed(iter/s)": 0.431699 }, { "acc": 0.83250446, "epoch": 0.09341239541873121, "grad_norm": 10.514697074890137, "learning_rate": 9.145271473141032e-06, "loss": 0.92016478, "memory(GiB)": 28.47, "step": 3450, "train_speed(iter/s)": 0.431703 }, { "acc": 0.83688145, "epoch": 0.09354777570194676, "grad_norm": 20.970664978027344, "learning_rate": 9.146897327667535e-06, "loss": 0.79772568, "memory(GiB)": 28.47, "step": 3455, "train_speed(iter/s)": 0.431709 }, { "acc": 0.81179008, "epoch": 0.09368315598516232, "grad_norm": 9.70344066619873, "learning_rate": 9.148520830993616e-06, "loss": 0.89658184, "memory(GiB)": 28.47, "step": 3460, "train_speed(iter/s)": 0.431709 }, { "acc": 0.82300053, "epoch": 0.09381853626837787, "grad_norm": 10.58942699432373, "learning_rate": 9.150141989909744e-06, "loss": 0.97637424, "memory(GiB)": 28.47, "step": 3465, "train_speed(iter/s)": 0.431714 }, { "acc": 0.82440271, "epoch": 0.09395391655159342, "grad_norm": 17.961793899536133, "learning_rate": 9.151760811177023e-06, "loss": 0.97433319, "memory(GiB)": 28.47, "step": 3470, "train_speed(iter/s)": 0.431717 }, { "acc": 0.78025651, "epoch": 0.09408929683480897, "grad_norm": 37.18598175048828, "learning_rate": 9.153377301527342e-06, "loss": 1.15954323, "memory(GiB)": 28.47, "step": 3475, "train_speed(iter/s)": 0.431715 }, { "acc": 0.81668921, "epoch": 0.09422467711802453, "grad_norm": 7.768034934997559, "learning_rate": 9.154991467663561e-06, "loss": 0.90072412, "memory(GiB)": 28.47, "step": 3480, "train_speed(iter/s)": 0.43172 }, { "acc": 0.7956193, "epoch": 0.09436005740124008, "grad_norm": 10.997771263122559, "learning_rate": 9.156603316259664e-06, "loss": 1.1210515, "memory(GiB)": 28.47, "step": 3485, "train_speed(iter/s)": 0.431725 }, { "acc": 0.82086668, "epoch": 0.09449543768445563, "grad_norm": 15.510093688964844, "learning_rate": 9.158212853960929e-06, "loss": 0.87969151, "memory(GiB)": 28.47, "step": 3490, "train_speed(iter/s)": 0.431727 }, { "acc": 0.84761181, "epoch": 0.09463081796767119, "grad_norm": 9.236452102661133, "learning_rate": 9.159820087384096e-06, "loss": 0.76606627, "memory(GiB)": 28.47, "step": 3495, "train_speed(iter/s)": 0.431727 }, { "acc": 0.83866444, "epoch": 0.09476619825088674, "grad_norm": 11.853589057922363, "learning_rate": 9.161425023117527e-06, "loss": 0.76884313, "memory(GiB)": 28.47, "step": 3500, "train_speed(iter/s)": 0.43173 }, { "acc": 0.84863071, "epoch": 0.09490157853410229, "grad_norm": 10.93504810333252, "learning_rate": 9.163027667721365e-06, "loss": 0.71753044, "memory(GiB)": 28.47, "step": 3505, "train_speed(iter/s)": 0.431736 }, { "acc": 0.8263092, "epoch": 0.09503695881731784, "grad_norm": 34.05278015136719, "learning_rate": 9.164628027727705e-06, "loss": 0.85083542, "memory(GiB)": 28.47, "step": 3510, "train_speed(iter/s)": 0.431739 }, { "acc": 0.82694111, "epoch": 0.0951723391005334, "grad_norm": 8.834251403808594, "learning_rate": 9.166226109640736e-06, "loss": 0.92905197, "memory(GiB)": 28.47, "step": 3515, "train_speed(iter/s)": 0.431743 }, { "acc": 0.84292221, "epoch": 0.09530771938374895, "grad_norm": 9.707822799682617, "learning_rate": 9.167821919936922e-06, "loss": 0.78060403, "memory(GiB)": 28.47, "step": 3520, "train_speed(iter/s)": 0.431745 }, { "acc": 0.8066803, "epoch": 0.0954430996669645, "grad_norm": 16.339893341064453, "learning_rate": 9.16941546506514e-06, "loss": 1.11173038, "memory(GiB)": 28.47, "step": 3525, "train_speed(iter/s)": 0.431747 }, { "acc": 0.82345619, "epoch": 0.09557847995018005, "grad_norm": 14.78331184387207, "learning_rate": 9.17100675144685e-06, "loss": 0.89993238, "memory(GiB)": 28.47, "step": 3530, "train_speed(iter/s)": 0.431748 }, { "acc": 0.8341836, "epoch": 0.0957138602333956, "grad_norm": 11.406514167785645, "learning_rate": 9.172595785476241e-06, "loss": 0.82404766, "memory(GiB)": 28.47, "step": 3535, "train_speed(iter/s)": 0.431753 }, { "acc": 0.829776, "epoch": 0.09584924051661116, "grad_norm": 23.61412811279297, "learning_rate": 9.174182573520388e-06, "loss": 0.81156063, "memory(GiB)": 28.47, "step": 3540, "train_speed(iter/s)": 0.431756 }, { "acc": 0.82614536, "epoch": 0.09598462079982671, "grad_norm": 7.104161262512207, "learning_rate": 9.175767121919411e-06, "loss": 0.93745165, "memory(GiB)": 28.47, "step": 3545, "train_speed(iter/s)": 0.43176 }, { "acc": 0.82922735, "epoch": 0.09612000108304226, "grad_norm": 12.069925308227539, "learning_rate": 9.177349436986617e-06, "loss": 0.88893337, "memory(GiB)": 28.47, "step": 3550, "train_speed(iter/s)": 0.431763 }, { "acc": 0.83857174, "epoch": 0.09625538136625782, "grad_norm": 7.867824077606201, "learning_rate": 9.17892952500866e-06, "loss": 0.70965152, "memory(GiB)": 28.47, "step": 3555, "train_speed(iter/s)": 0.431768 }, { "acc": 0.85139217, "epoch": 0.09639076164947337, "grad_norm": 15.265196800231934, "learning_rate": 9.180507392245678e-06, "loss": 0.62580214, "memory(GiB)": 28.47, "step": 3560, "train_speed(iter/s)": 0.431769 }, { "acc": 0.80788727, "epoch": 0.09652614193268892, "grad_norm": 31.522354125976562, "learning_rate": 9.182083044931462e-06, "loss": 0.98420334, "memory(GiB)": 28.47, "step": 3565, "train_speed(iter/s)": 0.431768 }, { "acc": 0.79543514, "epoch": 0.09666152221590447, "grad_norm": 56.9399528503418, "learning_rate": 9.183656489273584e-06, "loss": 1.08257713, "memory(GiB)": 28.47, "step": 3570, "train_speed(iter/s)": 0.431774 }, { "acc": 0.819347, "epoch": 0.09679690249912003, "grad_norm": 16.165382385253906, "learning_rate": 9.185227731453559e-06, "loss": 0.81211853, "memory(GiB)": 28.47, "step": 3575, "train_speed(iter/s)": 0.431774 }, { "acc": 0.84505615, "epoch": 0.09693228278233558, "grad_norm": 14.383048057556152, "learning_rate": 9.186796777626973e-06, "loss": 0.73317747, "memory(GiB)": 28.47, "step": 3580, "train_speed(iter/s)": 0.431776 }, { "acc": 0.81463375, "epoch": 0.09706766306555113, "grad_norm": 33.950103759765625, "learning_rate": 9.188363633923651e-06, "loss": 0.93639698, "memory(GiB)": 28.47, "step": 3585, "train_speed(iter/s)": 0.431775 }, { "acc": 0.82437286, "epoch": 0.09720304334876669, "grad_norm": 17.117568969726562, "learning_rate": 9.189928306447779e-06, "loss": 0.97402201, "memory(GiB)": 28.47, "step": 3590, "train_speed(iter/s)": 0.431777 }, { "acc": 0.82772179, "epoch": 0.09733842363198224, "grad_norm": 14.056438446044922, "learning_rate": 9.19149080127806e-06, "loss": 0.82787809, "memory(GiB)": 28.47, "step": 3595, "train_speed(iter/s)": 0.431783 }, { "acc": 0.81867828, "epoch": 0.09747380391519779, "grad_norm": 12.88160514831543, "learning_rate": 9.193051124467847e-06, "loss": 0.98754587, "memory(GiB)": 28.47, "step": 3600, "train_speed(iter/s)": 0.431787 }, { "acc": 0.82748985, "epoch": 0.09760918419841334, "grad_norm": 22.673389434814453, "learning_rate": 9.194609282045295e-06, "loss": 0.95791702, "memory(GiB)": 28.47, "step": 3605, "train_speed(iter/s)": 0.431791 }, { "acc": 0.79285908, "epoch": 0.0977445644816289, "grad_norm": 13.510163307189941, "learning_rate": 9.19616528001349e-06, "loss": 1.03357449, "memory(GiB)": 28.47, "step": 3610, "train_speed(iter/s)": 0.431796 }, { "acc": 0.84590673, "epoch": 0.09787994476484445, "grad_norm": 25.537540435791016, "learning_rate": 9.19771912435059e-06, "loss": 0.7557004, "memory(GiB)": 28.47, "step": 3615, "train_speed(iter/s)": 0.4318 }, { "acc": 0.82417364, "epoch": 0.09801532504806, "grad_norm": 12.181904792785645, "learning_rate": 9.199270821009967e-06, "loss": 0.94175377, "memory(GiB)": 28.47, "step": 3620, "train_speed(iter/s)": 0.431804 }, { "acc": 0.82687979, "epoch": 0.09815070533127555, "grad_norm": 7.113609313964844, "learning_rate": 9.200820375920344e-06, "loss": 0.85541592, "memory(GiB)": 28.47, "step": 3625, "train_speed(iter/s)": 0.431807 }, { "acc": 0.83909168, "epoch": 0.0982860856144911, "grad_norm": 7.790431499481201, "learning_rate": 9.20236779498592e-06, "loss": 0.68993134, "memory(GiB)": 28.47, "step": 3630, "train_speed(iter/s)": 0.431807 }, { "acc": 0.83238583, "epoch": 0.09842146589770666, "grad_norm": 33.71459197998047, "learning_rate": 9.203913084086522e-06, "loss": 0.84667692, "memory(GiB)": 28.47, "step": 3635, "train_speed(iter/s)": 0.43181 }, { "acc": 0.81879845, "epoch": 0.09855684618092221, "grad_norm": 15.566740036010742, "learning_rate": 9.205456249077722e-06, "loss": 0.89536419, "memory(GiB)": 28.47, "step": 3640, "train_speed(iter/s)": 0.431812 }, { "acc": 0.81661282, "epoch": 0.09869222646413776, "grad_norm": 21.615325927734375, "learning_rate": 9.20699729579099e-06, "loss": 0.99592628, "memory(GiB)": 28.47, "step": 3645, "train_speed(iter/s)": 0.431817 }, { "acc": 0.81719952, "epoch": 0.09882760674735332, "grad_norm": 10.02479362487793, "learning_rate": 9.208536230033803e-06, "loss": 0.92857723, "memory(GiB)": 28.47, "step": 3650, "train_speed(iter/s)": 0.431821 }, { "acc": 0.85686722, "epoch": 0.09896298703056887, "grad_norm": 74.9830322265625, "learning_rate": 9.210073057589793e-06, "loss": 0.6742672, "memory(GiB)": 28.47, "step": 3655, "train_speed(iter/s)": 0.431828 }, { "acc": 0.82590103, "epoch": 0.09909836731378442, "grad_norm": 29.95187759399414, "learning_rate": 9.211607784218874e-06, "loss": 0.87426367, "memory(GiB)": 28.47, "step": 3660, "train_speed(iter/s)": 0.43183 }, { "acc": 0.82933865, "epoch": 0.09923374759699997, "grad_norm": 41.19841003417969, "learning_rate": 9.213140415657366e-06, "loss": 0.90536842, "memory(GiB)": 28.47, "step": 3665, "train_speed(iter/s)": 0.431832 }, { "acc": 0.8181118, "epoch": 0.09936912788021553, "grad_norm": 16.094823837280273, "learning_rate": 9.21467095761813e-06, "loss": 0.92251215, "memory(GiB)": 28.47, "step": 3670, "train_speed(iter/s)": 0.431836 }, { "acc": 0.82055464, "epoch": 0.09950450816343108, "grad_norm": 17.061010360717773, "learning_rate": 9.21619941579069e-06, "loss": 0.98444538, "memory(GiB)": 28.47, "step": 3675, "train_speed(iter/s)": 0.431827 }, { "acc": 0.78319554, "epoch": 0.09963988844664663, "grad_norm": 13.772963523864746, "learning_rate": 9.217725795841371e-06, "loss": 1.09853344, "memory(GiB)": 28.47, "step": 3680, "train_speed(iter/s)": 0.431826 }, { "acc": 0.8201683, "epoch": 0.09977526872986218, "grad_norm": 64.49432373046875, "learning_rate": 9.219250103413409e-06, "loss": 0.89006271, "memory(GiB)": 28.47, "step": 3685, "train_speed(iter/s)": 0.431831 }, { "acc": 0.83509102, "epoch": 0.09991064901307774, "grad_norm": 15.74881649017334, "learning_rate": 9.220772344127087e-06, "loss": 0.88341875, "memory(GiB)": 28.47, "step": 3690, "train_speed(iter/s)": 0.431834 }, { "acc": 0.80899229, "epoch": 0.10004602929629329, "grad_norm": 7.327145576477051, "learning_rate": 9.222292523579863e-06, "loss": 0.97211037, "memory(GiB)": 28.47, "step": 3695, "train_speed(iter/s)": 0.431838 }, { "acc": 0.81580486, "epoch": 0.10018140957950884, "grad_norm": 15.660167694091797, "learning_rate": 9.223810647346474e-06, "loss": 0.95220852, "memory(GiB)": 28.47, "step": 3700, "train_speed(iter/s)": 0.431842 }, { "acc": 0.81635303, "epoch": 0.1003167898627244, "grad_norm": 16.11284637451172, "learning_rate": 9.225326720979084e-06, "loss": 0.90543127, "memory(GiB)": 28.47, "step": 3705, "train_speed(iter/s)": 0.431845 }, { "acc": 0.84661379, "epoch": 0.10045217014593995, "grad_norm": 17.873146057128906, "learning_rate": 9.22684075000739e-06, "loss": 0.77237277, "memory(GiB)": 28.47, "step": 3710, "train_speed(iter/s)": 0.431849 }, { "acc": 0.79919124, "epoch": 0.1005875504291555, "grad_norm": 17.481746673583984, "learning_rate": 9.228352739938741e-06, "loss": 1.03653851, "memory(GiB)": 28.47, "step": 3715, "train_speed(iter/s)": 0.43185 }, { "acc": 0.82219353, "epoch": 0.10072293071237105, "grad_norm": 22.248380661010742, "learning_rate": 9.229862696258278e-06, "loss": 0.93783951, "memory(GiB)": 28.47, "step": 3720, "train_speed(iter/s)": 0.431852 }, { "acc": 0.85645885, "epoch": 0.1008583109955866, "grad_norm": 55.89471435546875, "learning_rate": 9.231370624429022e-06, "loss": 0.72773108, "memory(GiB)": 28.47, "step": 3725, "train_speed(iter/s)": 0.431856 }, { "acc": 0.82652035, "epoch": 0.10099369127880216, "grad_norm": 34.82757568359375, "learning_rate": 9.232876529892025e-06, "loss": 0.83985348, "memory(GiB)": 28.47, "step": 3730, "train_speed(iter/s)": 0.431859 }, { "acc": 0.81634064, "epoch": 0.10112907156201771, "grad_norm": 11.664608001708984, "learning_rate": 9.234380418066458e-06, "loss": 0.91999493, "memory(GiB)": 28.47, "step": 3735, "train_speed(iter/s)": 0.431863 }, { "acc": 0.77961168, "epoch": 0.10126445184523326, "grad_norm": 10.168814659118652, "learning_rate": 9.235882294349761e-06, "loss": 1.15019608, "memory(GiB)": 28.47, "step": 3740, "train_speed(iter/s)": 0.431863 }, { "acc": 0.8141593, "epoch": 0.10139983212844882, "grad_norm": 13.922353744506836, "learning_rate": 9.237382164117722e-06, "loss": 0.87160645, "memory(GiB)": 28.47, "step": 3745, "train_speed(iter/s)": 0.431866 }, { "acc": 0.82724361, "epoch": 0.10153521241166437, "grad_norm": 14.45111083984375, "learning_rate": 9.238880032724628e-06, "loss": 0.85579271, "memory(GiB)": 28.47, "step": 3750, "train_speed(iter/s)": 0.431863 }, { "acc": 0.83616085, "epoch": 0.10167059269487992, "grad_norm": 12.307790756225586, "learning_rate": 9.24037590550335e-06, "loss": 0.82261295, "memory(GiB)": 28.47, "step": 3755, "train_speed(iter/s)": 0.431866 }, { "acc": 0.80427723, "epoch": 0.10180597297809547, "grad_norm": 11.033730506896973, "learning_rate": 9.24186978776548e-06, "loss": 1.02116852, "memory(GiB)": 28.47, "step": 3760, "train_speed(iter/s)": 0.431863 }, { "acc": 0.82785397, "epoch": 0.10194135326131103, "grad_norm": 9.957761764526367, "learning_rate": 9.243361684801427e-06, "loss": 0.87561922, "memory(GiB)": 28.47, "step": 3765, "train_speed(iter/s)": 0.431866 }, { "acc": 0.80226488, "epoch": 0.10207673354452658, "grad_norm": 11.645337104797363, "learning_rate": 9.244851601880538e-06, "loss": 0.9437994, "memory(GiB)": 28.47, "step": 3770, "train_speed(iter/s)": 0.431869 }, { "acc": 0.79634609, "epoch": 0.10221211382774213, "grad_norm": 10.31026840209961, "learning_rate": 9.246339544251213e-06, "loss": 1.14443741, "memory(GiB)": 28.47, "step": 3775, "train_speed(iter/s)": 0.43187 }, { "acc": 0.8392807, "epoch": 0.10234749411095768, "grad_norm": 9.347688674926758, "learning_rate": 9.247825517141009e-06, "loss": 0.74898062, "memory(GiB)": 28.47, "step": 3780, "train_speed(iter/s)": 0.431872 }, { "acc": 0.83559446, "epoch": 0.10248287439417324, "grad_norm": 7.842874526977539, "learning_rate": 9.249309525756751e-06, "loss": 0.79041648, "memory(GiB)": 28.47, "step": 3785, "train_speed(iter/s)": 0.431875 }, { "acc": 0.82163038, "epoch": 0.10261825467738879, "grad_norm": 16.176984786987305, "learning_rate": 9.250791575284639e-06, "loss": 0.93359299, "memory(GiB)": 28.47, "step": 3790, "train_speed(iter/s)": 0.43188 }, { "acc": 0.84030657, "epoch": 0.10275363496060434, "grad_norm": 13.293293952941895, "learning_rate": 9.252271670890369e-06, "loss": 0.8028307, "memory(GiB)": 28.47, "step": 3795, "train_speed(iter/s)": 0.431884 }, { "acc": 0.80095053, "epoch": 0.1028890152438199, "grad_norm": 11.875335693359375, "learning_rate": 9.253749817719229e-06, "loss": 0.97225742, "memory(GiB)": 28.47, "step": 3800, "train_speed(iter/s)": 0.431888 }, { "acc": 0.82903538, "epoch": 0.10302439552703545, "grad_norm": 15.021397590637207, "learning_rate": 9.255226020896206e-06, "loss": 0.87964163, "memory(GiB)": 28.47, "step": 3805, "train_speed(iter/s)": 0.431889 }, { "acc": 0.82172661, "epoch": 0.103159775810251, "grad_norm": 12.566983222961426, "learning_rate": 9.256700285526106e-06, "loss": 0.86552267, "memory(GiB)": 28.47, "step": 3810, "train_speed(iter/s)": 0.431895 }, { "acc": 0.84244175, "epoch": 0.10329515609346655, "grad_norm": 12.557291030883789, "learning_rate": 9.258172616693637e-06, "loss": 0.79559355, "memory(GiB)": 28.47, "step": 3815, "train_speed(iter/s)": 0.431896 }, { "acc": 0.81894264, "epoch": 0.1034305363766821, "grad_norm": 13.217413902282715, "learning_rate": 9.259643019463544e-06, "loss": 0.91411781, "memory(GiB)": 28.47, "step": 3820, "train_speed(iter/s)": 0.431899 }, { "acc": 0.7887579, "epoch": 0.10356591665989766, "grad_norm": 25.361948013305664, "learning_rate": 9.261111498880687e-06, "loss": 1.05176973, "memory(GiB)": 28.47, "step": 3825, "train_speed(iter/s)": 0.4319 }, { "acc": 0.81135616, "epoch": 0.10370129694311321, "grad_norm": 37.06028747558594, "learning_rate": 9.262578059970151e-06, "loss": 0.9641223, "memory(GiB)": 28.47, "step": 3830, "train_speed(iter/s)": 0.431906 }, { "acc": 0.83967228, "epoch": 0.10383667722632876, "grad_norm": 8.475799560546875, "learning_rate": 9.264042707737366e-06, "loss": 0.8869009, "memory(GiB)": 28.47, "step": 3835, "train_speed(iter/s)": 0.431908 }, { "acc": 0.81416216, "epoch": 0.10397205750954432, "grad_norm": 7.238214015960693, "learning_rate": 9.265505447168186e-06, "loss": 0.91634636, "memory(GiB)": 28.47, "step": 3840, "train_speed(iter/s)": 0.431908 }, { "acc": 0.82202911, "epoch": 0.10410743779275987, "grad_norm": 38.04701232910156, "learning_rate": 9.266966283229008e-06, "loss": 0.88972788, "memory(GiB)": 28.47, "step": 3845, "train_speed(iter/s)": 0.431911 }, { "acc": 0.85627966, "epoch": 0.10424281807597542, "grad_norm": 17.72210121154785, "learning_rate": 9.268425220866866e-06, "loss": 0.67116971, "memory(GiB)": 28.47, "step": 3850, "train_speed(iter/s)": 0.431913 }, { "acc": 0.81771774, "epoch": 0.10437819835919097, "grad_norm": 11.531146049499512, "learning_rate": 9.269882265009526e-06, "loss": 0.9825779, "memory(GiB)": 28.47, "step": 3855, "train_speed(iter/s)": 0.431917 }, { "acc": 0.84160366, "epoch": 0.10451357864240653, "grad_norm": 16.75503921508789, "learning_rate": 9.271337420565602e-06, "loss": 0.81888809, "memory(GiB)": 28.47, "step": 3860, "train_speed(iter/s)": 0.431916 }, { "acc": 0.82176609, "epoch": 0.10464895892562208, "grad_norm": 11.903031349182129, "learning_rate": 9.27279069242464e-06, "loss": 0.87067175, "memory(GiB)": 28.47, "step": 3865, "train_speed(iter/s)": 0.431918 }, { "acc": 0.80600624, "epoch": 0.10478433920883762, "grad_norm": 14.014933586120605, "learning_rate": 9.274242085457218e-06, "loss": 0.95743561, "memory(GiB)": 28.47, "step": 3870, "train_speed(iter/s)": 0.431912 }, { "acc": 0.83147669, "epoch": 0.10491971949205317, "grad_norm": 7.11749792098999, "learning_rate": 9.275691604515058e-06, "loss": 0.78486443, "memory(GiB)": 28.47, "step": 3875, "train_speed(iter/s)": 0.431914 }, { "acc": 0.83279285, "epoch": 0.10505509977526872, "grad_norm": 28.359725952148438, "learning_rate": 9.277139254431106e-06, "loss": 0.91970673, "memory(GiB)": 28.47, "step": 3880, "train_speed(iter/s)": 0.431919 }, { "acc": 0.80525799, "epoch": 0.10519048005848428, "grad_norm": 14.558599472045898, "learning_rate": 9.278585040019637e-06, "loss": 1.08377018, "memory(GiB)": 28.47, "step": 3885, "train_speed(iter/s)": 0.431922 }, { "acc": 0.82766285, "epoch": 0.10532586034169983, "grad_norm": 11.91128921508789, "learning_rate": 9.280028966076353e-06, "loss": 0.80331173, "memory(GiB)": 28.47, "step": 3890, "train_speed(iter/s)": 0.431926 }, { "acc": 0.83318672, "epoch": 0.10546124062491538, "grad_norm": 6.842861652374268, "learning_rate": 9.28147103737847e-06, "loss": 0.81821947, "memory(GiB)": 28.47, "step": 3895, "train_speed(iter/s)": 0.431926 }, { "acc": 0.82747583, "epoch": 0.10559662090813093, "grad_norm": 14.798812866210938, "learning_rate": 9.282911258684823e-06, "loss": 0.96114988, "memory(GiB)": 28.47, "step": 3900, "train_speed(iter/s)": 0.431928 }, { "acc": 0.8220933, "epoch": 0.10573200119134649, "grad_norm": 15.379965782165527, "learning_rate": 9.284349634735956e-06, "loss": 0.88899984, "memory(GiB)": 28.47, "step": 3905, "train_speed(iter/s)": 0.431929 }, { "acc": 0.81231585, "epoch": 0.10586738147456204, "grad_norm": 16.068395614624023, "learning_rate": 9.28578617025421e-06, "loss": 0.86402798, "memory(GiB)": 28.47, "step": 3910, "train_speed(iter/s)": 0.431934 }, { "acc": 0.83062897, "epoch": 0.10600276175777759, "grad_norm": 9.347081184387207, "learning_rate": 9.287220869943824e-06, "loss": 0.83446865, "memory(GiB)": 28.47, "step": 3915, "train_speed(iter/s)": 0.431935 }, { "acc": 0.8062623, "epoch": 0.10613814204099314, "grad_norm": 26.984163284301758, "learning_rate": 9.28865373849103e-06, "loss": 0.96166096, "memory(GiB)": 28.47, "step": 3920, "train_speed(iter/s)": 0.431939 }, { "acc": 0.83640347, "epoch": 0.1062735223242087, "grad_norm": 8.18165397644043, "learning_rate": 9.29008478056413e-06, "loss": 0.80948486, "memory(GiB)": 28.47, "step": 3925, "train_speed(iter/s)": 0.43194 }, { "acc": 0.81258326, "epoch": 0.10640890260742425, "grad_norm": 15.816357612609863, "learning_rate": 9.291514000813601e-06, "loss": 0.87767696, "memory(GiB)": 28.47, "step": 3930, "train_speed(iter/s)": 0.431937 }, { "acc": 0.83083305, "epoch": 0.1065442828906398, "grad_norm": 9.5112943649292, "learning_rate": 9.29294140387218e-06, "loss": 0.7687541, "memory(GiB)": 28.47, "step": 3935, "train_speed(iter/s)": 0.431939 }, { "acc": 0.82818394, "epoch": 0.10667966317385535, "grad_norm": 8.052830696105957, "learning_rate": 9.294366994354963e-06, "loss": 0.91103249, "memory(GiB)": 28.47, "step": 3940, "train_speed(iter/s)": 0.431941 }, { "acc": 0.8319293, "epoch": 0.10681504345707091, "grad_norm": 74.92879486083984, "learning_rate": 9.295790776859475e-06, "loss": 0.78175192, "memory(GiB)": 28.47, "step": 3945, "train_speed(iter/s)": 0.431944 }, { "acc": 0.84220772, "epoch": 0.10695042374028646, "grad_norm": 12.624823570251465, "learning_rate": 9.297212755965778e-06, "loss": 0.74174771, "memory(GiB)": 28.47, "step": 3950, "train_speed(iter/s)": 0.431945 }, { "acc": 0.79453444, "epoch": 0.10708580402350201, "grad_norm": 21.58405303955078, "learning_rate": 9.298632936236554e-06, "loss": 1.07157345, "memory(GiB)": 28.47, "step": 3955, "train_speed(iter/s)": 0.431948 }, { "acc": 0.79525914, "epoch": 0.10722118430671757, "grad_norm": 45.50607681274414, "learning_rate": 9.300051322217185e-06, "loss": 1.02812042, "memory(GiB)": 28.47, "step": 3960, "train_speed(iter/s)": 0.431951 }, { "acc": 0.80234108, "epoch": 0.10735656458993312, "grad_norm": 8.807971000671387, "learning_rate": 9.30146791843585e-06, "loss": 1.0250946, "memory(GiB)": 28.47, "step": 3965, "train_speed(iter/s)": 0.431954 }, { "acc": 0.80043249, "epoch": 0.10749194487314867, "grad_norm": 15.087498664855957, "learning_rate": 9.302882729403611e-06, "loss": 1.04847031, "memory(GiB)": 28.47, "step": 3970, "train_speed(iter/s)": 0.431953 }, { "acc": 0.81868362, "epoch": 0.10762732515636422, "grad_norm": 33.08646774291992, "learning_rate": 9.30429575961449e-06, "loss": 0.87643127, "memory(GiB)": 28.47, "step": 3975, "train_speed(iter/s)": 0.431951 }, { "acc": 0.85740662, "epoch": 0.10776270543957978, "grad_norm": 9.02985668182373, "learning_rate": 9.305707013545567e-06, "loss": 0.70138688, "memory(GiB)": 28.47, "step": 3980, "train_speed(iter/s)": 0.431954 }, { "acc": 0.81452026, "epoch": 0.10789808572279533, "grad_norm": 31.0690860748291, "learning_rate": 9.307116495657053e-06, "loss": 0.89700737, "memory(GiB)": 28.47, "step": 3985, "train_speed(iter/s)": 0.431956 }, { "acc": 0.80792007, "epoch": 0.10803346600601088, "grad_norm": 11.182830810546875, "learning_rate": 9.308524210392393e-06, "loss": 0.98397713, "memory(GiB)": 28.47, "step": 3990, "train_speed(iter/s)": 0.431961 }, { "acc": 0.83131466, "epoch": 0.10816884628922643, "grad_norm": 13.851734161376953, "learning_rate": 9.309930162178316e-06, "loss": 0.90205641, "memory(GiB)": 28.47, "step": 3995, "train_speed(iter/s)": 0.431958 }, { "acc": 0.85852823, "epoch": 0.10830422657244199, "grad_norm": 31.125036239624023, "learning_rate": 9.311334355424967e-06, "loss": 0.74736128, "memory(GiB)": 28.47, "step": 4000, "train_speed(iter/s)": 0.431961 }, { "acc": 0.81926937, "epoch": 0.10843960685565754, "grad_norm": 13.607980728149414, "learning_rate": 9.312736794525942e-06, "loss": 0.80966702, "memory(GiB)": 28.47, "step": 4005, "train_speed(iter/s)": 0.431962 }, { "acc": 0.8059269, "epoch": 0.10857498713887309, "grad_norm": 26.652563095092773, "learning_rate": 9.3141374838584e-06, "loss": 0.96160688, "memory(GiB)": 28.47, "step": 4010, "train_speed(iter/s)": 0.431965 }, { "acc": 0.81316471, "epoch": 0.10871036742208864, "grad_norm": 5.584409713745117, "learning_rate": 9.315536427783139e-06, "loss": 1.00667639, "memory(GiB)": 28.47, "step": 4015, "train_speed(iter/s)": 0.431967 }, { "acc": 0.79485159, "epoch": 0.1088457477053042, "grad_norm": 23.307832717895508, "learning_rate": 9.316933630644673e-06, "loss": 1.11096401, "memory(GiB)": 28.47, "step": 4020, "train_speed(iter/s)": 0.431968 }, { "acc": 0.81427956, "epoch": 0.10898112798851975, "grad_norm": 11.721306800842285, "learning_rate": 9.318329096771315e-06, "loss": 0.91247587, "memory(GiB)": 28.47, "step": 4025, "train_speed(iter/s)": 0.431973 }, { "acc": 0.81383572, "epoch": 0.1091165082717353, "grad_norm": 10.255794525146484, "learning_rate": 9.319722830475254e-06, "loss": 0.91607161, "memory(GiB)": 28.47, "step": 4030, "train_speed(iter/s)": 0.431974 }, { "acc": 0.82593889, "epoch": 0.10925188855495085, "grad_norm": 14.372516632080078, "learning_rate": 9.321114836052645e-06, "loss": 0.92868242, "memory(GiB)": 28.47, "step": 4035, "train_speed(iter/s)": 0.431964 }, { "acc": 0.80351334, "epoch": 0.10938726883816641, "grad_norm": 8.814722061157227, "learning_rate": 9.322505117783682e-06, "loss": 1.02025051, "memory(GiB)": 28.47, "step": 4040, "train_speed(iter/s)": 0.431967 }, { "acc": 0.81004572, "epoch": 0.10952264912138196, "grad_norm": 13.710161209106445, "learning_rate": 9.323893679932667e-06, "loss": 0.93223524, "memory(GiB)": 28.47, "step": 4045, "train_speed(iter/s)": 0.431966 }, { "acc": 0.81198483, "epoch": 0.10965802940459751, "grad_norm": 32.57359313964844, "learning_rate": 9.32528052674811e-06, "loss": 0.98027611, "memory(GiB)": 28.47, "step": 4050, "train_speed(iter/s)": 0.431967 }, { "acc": 0.82135286, "epoch": 0.10979340968781306, "grad_norm": 15.932833671569824, "learning_rate": 9.326665662462792e-06, "loss": 0.87867441, "memory(GiB)": 28.47, "step": 4055, "train_speed(iter/s)": 0.431971 }, { "acc": 0.81321068, "epoch": 0.10992878997102862, "grad_norm": 12.963146209716797, "learning_rate": 9.328049091293845e-06, "loss": 0.95366602, "memory(GiB)": 28.47, "step": 4060, "train_speed(iter/s)": 0.431975 }, { "acc": 0.78788576, "epoch": 0.11006417025424417, "grad_norm": 13.318949699401855, "learning_rate": 9.32943081744283e-06, "loss": 1.08272009, "memory(GiB)": 28.47, "step": 4065, "train_speed(iter/s)": 0.431979 }, { "acc": 0.81786728, "epoch": 0.11019955053745972, "grad_norm": 12.57652759552002, "learning_rate": 9.330810845095813e-06, "loss": 0.86620827, "memory(GiB)": 28.47, "step": 4070, "train_speed(iter/s)": 0.431981 }, { "acc": 0.82555161, "epoch": 0.11033493082067528, "grad_norm": 14.482728004455566, "learning_rate": 9.332189178423442e-06, "loss": 0.89265041, "memory(GiB)": 28.47, "step": 4075, "train_speed(iter/s)": 0.431986 }, { "acc": 0.82071295, "epoch": 0.11047031110389083, "grad_norm": 18.07454490661621, "learning_rate": 9.333565821581026e-06, "loss": 0.97026958, "memory(GiB)": 28.47, "step": 4080, "train_speed(iter/s)": 0.431988 }, { "acc": 0.84155626, "epoch": 0.11060569138710638, "grad_norm": 6.005283832550049, "learning_rate": 9.3349407787086e-06, "loss": 0.80958929, "memory(GiB)": 28.47, "step": 4085, "train_speed(iter/s)": 0.431992 }, { "acc": 0.83332863, "epoch": 0.11074107167032193, "grad_norm": 11.757827758789062, "learning_rate": 9.336314053931012e-06, "loss": 0.834762, "memory(GiB)": 28.47, "step": 4090, "train_speed(iter/s)": 0.431998 }, { "acc": 0.85263529, "epoch": 0.11087645195353749, "grad_norm": 16.639678955078125, "learning_rate": 9.337685651357987e-06, "loss": 0.73762279, "memory(GiB)": 28.47, "step": 4095, "train_speed(iter/s)": 0.432002 }, { "acc": 0.80647392, "epoch": 0.11101183223675304, "grad_norm": 17.667503356933594, "learning_rate": 9.339055575084209e-06, "loss": 1.00812063, "memory(GiB)": 28.47, "step": 4100, "train_speed(iter/s)": 0.432003 }, { "acc": 0.85091133, "epoch": 0.11114721251996859, "grad_norm": 11.841998100280762, "learning_rate": 9.34042382918939e-06, "loss": 0.76933136, "memory(GiB)": 28.47, "step": 4105, "train_speed(iter/s)": 0.432008 }, { "acc": 0.80073242, "epoch": 0.11128259280318414, "grad_norm": 10.689797401428223, "learning_rate": 9.341790417738337e-06, "loss": 1.03042374, "memory(GiB)": 28.47, "step": 4110, "train_speed(iter/s)": 0.432007 }, { "acc": 0.83082333, "epoch": 0.1114179730863997, "grad_norm": 10.05215835571289, "learning_rate": 9.34315534478104e-06, "loss": 0.82314701, "memory(GiB)": 28.47, "step": 4115, "train_speed(iter/s)": 0.432007 }, { "acc": 0.86056633, "epoch": 0.11155335336961525, "grad_norm": 22.814882278442383, "learning_rate": 9.344518614352735e-06, "loss": 0.71064377, "memory(GiB)": 28.47, "step": 4120, "train_speed(iter/s)": 0.432012 }, { "acc": 0.81827469, "epoch": 0.1116887336528308, "grad_norm": 20.047481536865234, "learning_rate": 9.345880230473965e-06, "loss": 0.87666492, "memory(GiB)": 28.47, "step": 4125, "train_speed(iter/s)": 0.432016 }, { "acc": 0.81682291, "epoch": 0.11182411393604635, "grad_norm": 9.084550857543945, "learning_rate": 9.347240197150673e-06, "loss": 0.89635115, "memory(GiB)": 28.47, "step": 4130, "train_speed(iter/s)": 0.432016 }, { "acc": 0.83266735, "epoch": 0.1119594942192619, "grad_norm": 12.269067764282227, "learning_rate": 9.348598518374254e-06, "loss": 0.79550238, "memory(GiB)": 28.47, "step": 4135, "train_speed(iter/s)": 0.432021 }, { "acc": 0.79285517, "epoch": 0.11209487450247746, "grad_norm": 16.316131591796875, "learning_rate": 9.349955198121634e-06, "loss": 1.11633654, "memory(GiB)": 28.47, "step": 4140, "train_speed(iter/s)": 0.432019 }, { "acc": 0.79230881, "epoch": 0.11223025478569301, "grad_norm": 16.456911087036133, "learning_rate": 9.351310240355345e-06, "loss": 1.03270321, "memory(GiB)": 28.47, "step": 4145, "train_speed(iter/s)": 0.432015 }, { "acc": 0.83962345, "epoch": 0.11236563506890856, "grad_norm": 29.25869369506836, "learning_rate": 9.35266364902358e-06, "loss": 0.85661545, "memory(GiB)": 28.47, "step": 4150, "train_speed(iter/s)": 0.432016 }, { "acc": 0.81972561, "epoch": 0.11250101535212412, "grad_norm": 14.39642333984375, "learning_rate": 9.354015428060272e-06, "loss": 0.89377775, "memory(GiB)": 28.47, "step": 4155, "train_speed(iter/s)": 0.432019 }, { "acc": 0.83687458, "epoch": 0.11263639563533967, "grad_norm": 12.158677101135254, "learning_rate": 9.355365581385163e-06, "loss": 0.82630806, "memory(GiB)": 28.47, "step": 4160, "train_speed(iter/s)": 0.432023 }, { "acc": 0.81289558, "epoch": 0.11277177591855522, "grad_norm": 10.150981903076172, "learning_rate": 9.356714112903869e-06, "loss": 0.99659271, "memory(GiB)": 28.47, "step": 4165, "train_speed(iter/s)": 0.432024 }, { "acc": 0.82743931, "epoch": 0.11290715620177078, "grad_norm": 35.54581832885742, "learning_rate": 9.358061026507946e-06, "loss": 0.81296301, "memory(GiB)": 28.47, "step": 4170, "train_speed(iter/s)": 0.432029 }, { "acc": 0.81950178, "epoch": 0.11304253648498633, "grad_norm": 6.058299541473389, "learning_rate": 9.359406326074964e-06, "loss": 0.87089348, "memory(GiB)": 28.47, "step": 4175, "train_speed(iter/s)": 0.432031 }, { "acc": 0.81867256, "epoch": 0.11317791676820188, "grad_norm": 14.017197608947754, "learning_rate": 9.360750015468565e-06, "loss": 0.93512659, "memory(GiB)": 28.47, "step": 4180, "train_speed(iter/s)": 0.432035 }, { "acc": 0.83821487, "epoch": 0.11331329705141743, "grad_norm": 6.823525905609131, "learning_rate": 9.36209209853854e-06, "loss": 0.81062069, "memory(GiB)": 28.47, "step": 4185, "train_speed(iter/s)": 0.432036 }, { "acc": 0.8337389, "epoch": 0.11344867733463299, "grad_norm": 8.488354682922363, "learning_rate": 9.363432579120888e-06, "loss": 0.8060461, "memory(GiB)": 28.47, "step": 4190, "train_speed(iter/s)": 0.432037 }, { "acc": 0.83764992, "epoch": 0.11358405761784854, "grad_norm": 7.6431121826171875, "learning_rate": 9.36477146103788e-06, "loss": 0.77630501, "memory(GiB)": 28.47, "step": 4195, "train_speed(iter/s)": 0.432038 }, { "acc": 0.83015881, "epoch": 0.11371943790106409, "grad_norm": 11.100565910339355, "learning_rate": 9.36610874809813e-06, "loss": 0.90867662, "memory(GiB)": 28.47, "step": 4200, "train_speed(iter/s)": 0.432042 }, { "acc": 0.83783436, "epoch": 0.11385481818427964, "grad_norm": 11.181665420532227, "learning_rate": 9.36744444409666e-06, "loss": 0.77996206, "memory(GiB)": 28.47, "step": 4205, "train_speed(iter/s)": 0.432042 }, { "acc": 0.84244871, "epoch": 0.1139901984674952, "grad_norm": 8.993098258972168, "learning_rate": 9.368778552814963e-06, "loss": 0.77873421, "memory(GiB)": 28.47, "step": 4210, "train_speed(iter/s)": 0.432045 }, { "acc": 0.8192028, "epoch": 0.11412557875071075, "grad_norm": 11.523269653320312, "learning_rate": 9.370111078021059e-06, "loss": 0.87573566, "memory(GiB)": 28.47, "step": 4215, "train_speed(iter/s)": 0.432042 }, { "acc": 0.82920666, "epoch": 0.1142609590339263, "grad_norm": 13.534872055053711, "learning_rate": 9.371442023469577e-06, "loss": 0.90266066, "memory(GiB)": 28.47, "step": 4220, "train_speed(iter/s)": 0.432046 }, { "acc": 0.81535101, "epoch": 0.11439633931714185, "grad_norm": 41.33736801147461, "learning_rate": 9.3727713929018e-06, "loss": 0.93947477, "memory(GiB)": 28.47, "step": 4225, "train_speed(iter/s)": 0.432049 }, { "acc": 0.83405762, "epoch": 0.1145317196003574, "grad_norm": 23.172914505004883, "learning_rate": 9.374099190045743e-06, "loss": 0.80662956, "memory(GiB)": 28.47, "step": 4230, "train_speed(iter/s)": 0.432044 }, { "acc": 0.82553768, "epoch": 0.11466709988357296, "grad_norm": 14.304803848266602, "learning_rate": 9.375425418616202e-06, "loss": 0.93385801, "memory(GiB)": 28.47, "step": 4235, "train_speed(iter/s)": 0.432046 }, { "acc": 0.82272911, "epoch": 0.11480248016678851, "grad_norm": 17.036075592041016, "learning_rate": 9.37675008231483e-06, "loss": 0.90053978, "memory(GiB)": 28.47, "step": 4240, "train_speed(iter/s)": 0.432048 }, { "acc": 0.83246613, "epoch": 0.11493786045000406, "grad_norm": 17.35870933532715, "learning_rate": 9.378073184830186e-06, "loss": 0.83473206, "memory(GiB)": 28.47, "step": 4245, "train_speed(iter/s)": 0.432049 }, { "acc": 0.83537483, "epoch": 0.11507324073321962, "grad_norm": 7.60365104675293, "learning_rate": 9.379394729837806e-06, "loss": 0.74776402, "memory(GiB)": 28.47, "step": 4250, "train_speed(iter/s)": 0.43205 }, { "acc": 0.83597469, "epoch": 0.11520862101643517, "grad_norm": 10.905571937561035, "learning_rate": 9.380714721000262e-06, "loss": 0.78832521, "memory(GiB)": 28.47, "step": 4255, "train_speed(iter/s)": 0.432052 }, { "acc": 0.83385296, "epoch": 0.11534400129965072, "grad_norm": 7.032608509063721, "learning_rate": 9.38203316196722e-06, "loss": 0.77768068, "memory(GiB)": 28.47, "step": 4260, "train_speed(iter/s)": 0.432055 }, { "acc": 0.83007507, "epoch": 0.11547938158286628, "grad_norm": 19.195537567138672, "learning_rate": 9.383350056375507e-06, "loss": 0.87883453, "memory(GiB)": 28.47, "step": 4265, "train_speed(iter/s)": 0.432055 }, { "acc": 0.84052439, "epoch": 0.11561476186608183, "grad_norm": 22.564992904663086, "learning_rate": 9.384665407849157e-06, "loss": 0.86717157, "memory(GiB)": 28.47, "step": 4270, "train_speed(iter/s)": 0.432059 }, { "acc": 0.82568798, "epoch": 0.11575014214929738, "grad_norm": 8.792993545532227, "learning_rate": 9.385979219999492e-06, "loss": 0.79330463, "memory(GiB)": 28.47, "step": 4275, "train_speed(iter/s)": 0.432062 }, { "acc": 0.79396691, "epoch": 0.11588552243251293, "grad_norm": 9.355545043945312, "learning_rate": 9.387291496425163e-06, "loss": 1.06032829, "memory(GiB)": 28.47, "step": 4280, "train_speed(iter/s)": 0.432065 }, { "acc": 0.83411274, "epoch": 0.11602090271572849, "grad_norm": 14.607172012329102, "learning_rate": 9.388602240712219e-06, "loss": 0.8851429, "memory(GiB)": 28.47, "step": 4285, "train_speed(iter/s)": 0.432066 }, { "acc": 0.84421616, "epoch": 0.11615628299894404, "grad_norm": 6.263132572174072, "learning_rate": 9.389911456434163e-06, "loss": 0.80950871, "memory(GiB)": 28.47, "step": 4290, "train_speed(iter/s)": 0.432069 }, { "acc": 0.82478666, "epoch": 0.11629166328215959, "grad_norm": 8.249150276184082, "learning_rate": 9.391219147152006e-06, "loss": 0.8666399, "memory(GiB)": 28.47, "step": 4295, "train_speed(iter/s)": 0.432074 }, { "acc": 0.81230326, "epoch": 0.11642704356537514, "grad_norm": 9.448840141296387, "learning_rate": 9.392525316414338e-06, "loss": 0.96225319, "memory(GiB)": 28.47, "step": 4300, "train_speed(iter/s)": 0.432077 }, { "acc": 0.81306725, "epoch": 0.1165624238485907, "grad_norm": 11.33607292175293, "learning_rate": 9.393829967757373e-06, "loss": 1.00603104, "memory(GiB)": 28.47, "step": 4305, "train_speed(iter/s)": 0.432081 }, { "acc": 0.8351161, "epoch": 0.11669780413180625, "grad_norm": 7.483735084533691, "learning_rate": 9.395133104705007e-06, "loss": 0.81222239, "memory(GiB)": 28.47, "step": 4310, "train_speed(iter/s)": 0.43208 }, { "acc": 0.83938093, "epoch": 0.1168331844150218, "grad_norm": 12.470635414123535, "learning_rate": 9.396434730768886e-06, "loss": 0.78426495, "memory(GiB)": 28.47, "step": 4315, "train_speed(iter/s)": 0.43208 }, { "acc": 0.82847271, "epoch": 0.11696856469823735, "grad_norm": 12.806897163391113, "learning_rate": 9.39773484944845e-06, "loss": 0.96942329, "memory(GiB)": 28.47, "step": 4320, "train_speed(iter/s)": 0.432083 }, { "acc": 0.80142136, "epoch": 0.1171039449814529, "grad_norm": 12.168987274169922, "learning_rate": 9.399033464230999e-06, "loss": 1.0214941, "memory(GiB)": 28.47, "step": 4325, "train_speed(iter/s)": 0.432081 }, { "acc": 0.8265686, "epoch": 0.11723932526466846, "grad_norm": 9.049471855163574, "learning_rate": 9.400330578591742e-06, "loss": 0.92336216, "memory(GiB)": 28.47, "step": 4330, "train_speed(iter/s)": 0.432084 }, { "acc": 0.84549675, "epoch": 0.11737470554788401, "grad_norm": 18.912689208984375, "learning_rate": 9.401626195993865e-06, "loss": 0.78750935, "memory(GiB)": 28.47, "step": 4335, "train_speed(iter/s)": 0.432086 }, { "acc": 0.81554508, "epoch": 0.11751008583109956, "grad_norm": 14.3551664352417, "learning_rate": 9.402920319888561e-06, "loss": 0.96835365, "memory(GiB)": 28.47, "step": 4340, "train_speed(iter/s)": 0.432085 }, { "acc": 0.83069725, "epoch": 0.11764546611431512, "grad_norm": 12.244587898254395, "learning_rate": 9.404212953715117e-06, "loss": 0.78783503, "memory(GiB)": 28.47, "step": 4345, "train_speed(iter/s)": 0.432088 }, { "acc": 0.85056324, "epoch": 0.11778084639753067, "grad_norm": 18.782878875732422, "learning_rate": 9.405504100900946e-06, "loss": 0.70443192, "memory(GiB)": 28.47, "step": 4350, "train_speed(iter/s)": 0.432092 }, { "acc": 0.82460995, "epoch": 0.11791622668074622, "grad_norm": 16.54639434814453, "learning_rate": 9.40679376486165e-06, "loss": 0.87797251, "memory(GiB)": 28.47, "step": 4355, "train_speed(iter/s)": 0.432093 }, { "acc": 0.80124617, "epoch": 0.11805160696396177, "grad_norm": 11.047134399414062, "learning_rate": 9.408081949001078e-06, "loss": 0.99929466, "memory(GiB)": 28.47, "step": 4360, "train_speed(iter/s)": 0.432095 }, { "acc": 0.84007492, "epoch": 0.11818698724717733, "grad_norm": 14.122875213623047, "learning_rate": 9.40936865671137e-06, "loss": 0.8456378, "memory(GiB)": 28.47, "step": 4365, "train_speed(iter/s)": 0.432094 }, { "acc": 0.83282824, "epoch": 0.11832236753039288, "grad_norm": 17.821897506713867, "learning_rate": 9.410653891373016e-06, "loss": 0.86018906, "memory(GiB)": 28.47, "step": 4370, "train_speed(iter/s)": 0.432099 }, { "acc": 0.828475, "epoch": 0.11845774781360843, "grad_norm": 10.746869087219238, "learning_rate": 9.411937656354911e-06, "loss": 0.80690041, "memory(GiB)": 28.47, "step": 4375, "train_speed(iter/s)": 0.432103 }, { "acc": 0.81655312, "epoch": 0.11859312809682399, "grad_norm": 9.953995704650879, "learning_rate": 9.413219955014407e-06, "loss": 0.85647392, "memory(GiB)": 28.47, "step": 4380, "train_speed(iter/s)": 0.432103 }, { "acc": 0.82782927, "epoch": 0.11872850838003952, "grad_norm": 9.59829330444336, "learning_rate": 9.414500790697361e-06, "loss": 0.86840458, "memory(GiB)": 28.47, "step": 4385, "train_speed(iter/s)": 0.432104 }, { "acc": 0.86501446, "epoch": 0.11886388866325508, "grad_norm": 9.05327320098877, "learning_rate": 9.415780166738196e-06, "loss": 0.70745907, "memory(GiB)": 28.47, "step": 4390, "train_speed(iter/s)": 0.432107 }, { "acc": 0.79134927, "epoch": 0.11899926894647063, "grad_norm": 10.641505241394043, "learning_rate": 9.417058086459943e-06, "loss": 1.02670126, "memory(GiB)": 28.47, "step": 4395, "train_speed(iter/s)": 0.432111 }, { "acc": 0.83009624, "epoch": 0.11913464922968618, "grad_norm": 10.517546653747559, "learning_rate": 9.418334553174305e-06, "loss": 0.80738125, "memory(GiB)": 28.47, "step": 4400, "train_speed(iter/s)": 0.432115 }, { "acc": 0.8095746, "epoch": 0.11927002951290173, "grad_norm": 6.586027145385742, "learning_rate": 9.419609570181694e-06, "loss": 0.95326319, "memory(GiB)": 28.47, "step": 4405, "train_speed(iter/s)": 0.432116 }, { "acc": 0.82935581, "epoch": 0.11940540979611729, "grad_norm": 10.15049934387207, "learning_rate": 9.420883140771294e-06, "loss": 0.76080465, "memory(GiB)": 28.47, "step": 4410, "train_speed(iter/s)": 0.432116 }, { "acc": 0.85144081, "epoch": 0.11954079007933284, "grad_norm": 12.691434860229492, "learning_rate": 9.422155268221107e-06, "loss": 0.7519886, "memory(GiB)": 28.47, "step": 4415, "train_speed(iter/s)": 0.432117 }, { "acc": 0.8152277, "epoch": 0.11967617036254839, "grad_norm": 8.187901496887207, "learning_rate": 9.423425955798002e-06, "loss": 0.86851921, "memory(GiB)": 28.47, "step": 4420, "train_speed(iter/s)": 0.432119 }, { "acc": 0.83388138, "epoch": 0.11981155064576395, "grad_norm": 4.809279918670654, "learning_rate": 9.424695206757774e-06, "loss": 0.75915723, "memory(GiB)": 28.47, "step": 4425, "train_speed(iter/s)": 0.43212 }, { "acc": 0.82977009, "epoch": 0.1199469309289795, "grad_norm": 9.753023147583008, "learning_rate": 9.425963024345177e-06, "loss": 0.8228466, "memory(GiB)": 28.47, "step": 4430, "train_speed(iter/s)": 0.432122 }, { "acc": 0.80973406, "epoch": 0.12008231121219505, "grad_norm": 65.83289337158203, "learning_rate": 9.427229411793996e-06, "loss": 0.98314638, "memory(GiB)": 28.47, "step": 4435, "train_speed(iter/s)": 0.432125 }, { "acc": 0.79600716, "epoch": 0.1202176914954106, "grad_norm": 10.506497383117676, "learning_rate": 9.428494372327077e-06, "loss": 0.97112913, "memory(GiB)": 28.47, "step": 4440, "train_speed(iter/s)": 0.432127 }, { "acc": 0.81878452, "epoch": 0.12035307177862616, "grad_norm": 16.012916564941406, "learning_rate": 9.429757909156388e-06, "loss": 0.91431885, "memory(GiB)": 28.47, "step": 4445, "train_speed(iter/s)": 0.432126 }, { "acc": 0.81883993, "epoch": 0.12048845206184171, "grad_norm": 7.369384288787842, "learning_rate": 9.431020025483062e-06, "loss": 1.01212263, "memory(GiB)": 28.47, "step": 4450, "train_speed(iter/s)": 0.432128 }, { "acc": 0.81785326, "epoch": 0.12062383234505726, "grad_norm": 14.755085945129395, "learning_rate": 9.43228072449745e-06, "loss": 0.89213781, "memory(GiB)": 28.47, "step": 4455, "train_speed(iter/s)": 0.432131 }, { "acc": 0.83744488, "epoch": 0.12075921262827281, "grad_norm": 13.054859161376953, "learning_rate": 9.43354000937916e-06, "loss": 0.86194315, "memory(GiB)": 28.47, "step": 4460, "train_speed(iter/s)": 0.432134 }, { "acc": 0.80882692, "epoch": 0.12089459291148837, "grad_norm": 19.728792190551758, "learning_rate": 9.434797883297125e-06, "loss": 1.06193247, "memory(GiB)": 28.47, "step": 4465, "train_speed(iter/s)": 0.432138 }, { "acc": 0.82180805, "epoch": 0.12102997319470392, "grad_norm": 14.540101051330566, "learning_rate": 9.436054349409626e-06, "loss": 0.81870613, "memory(GiB)": 28.47, "step": 4470, "train_speed(iter/s)": 0.43214 }, { "acc": 0.82286701, "epoch": 0.12116535347791947, "grad_norm": 13.164909362792969, "learning_rate": 9.437309410864358e-06, "loss": 0.91949673, "memory(GiB)": 28.47, "step": 4475, "train_speed(iter/s)": 0.432143 }, { "acc": 0.84883099, "epoch": 0.12130073376113502, "grad_norm": 9.563490867614746, "learning_rate": 9.43856307079847e-06, "loss": 0.70888181, "memory(GiB)": 28.47, "step": 4480, "train_speed(iter/s)": 0.432147 }, { "acc": 0.79738159, "epoch": 0.12143611404435058, "grad_norm": 11.25001049041748, "learning_rate": 9.439815332338612e-06, "loss": 1.08016701, "memory(GiB)": 28.47, "step": 4485, "train_speed(iter/s)": 0.432145 }, { "acc": 0.8399229, "epoch": 0.12157149432756613, "grad_norm": 8.541853904724121, "learning_rate": 9.441066198600981e-06, "loss": 0.79709406, "memory(GiB)": 28.47, "step": 4490, "train_speed(iter/s)": 0.432147 }, { "acc": 0.8300684, "epoch": 0.12170687461078168, "grad_norm": 13.511530876159668, "learning_rate": 9.442315672691377e-06, "loss": 0.859196, "memory(GiB)": 28.47, "step": 4495, "train_speed(iter/s)": 0.432152 }, { "acc": 0.82289448, "epoch": 0.12184225489399723, "grad_norm": 9.407613754272461, "learning_rate": 9.443563757705231e-06, "loss": 0.95166616, "memory(GiB)": 28.47, "step": 4500, "train_speed(iter/s)": 0.432151 }, { "acc": 0.83677568, "epoch": 0.12197763517721279, "grad_norm": 21.81905174255371, "learning_rate": 9.444810456727669e-06, "loss": 0.83872871, "memory(GiB)": 28.47, "step": 4505, "train_speed(iter/s)": 0.432153 }, { "acc": 0.8056181, "epoch": 0.12211301546042834, "grad_norm": 16.6709041595459, "learning_rate": 9.446055772833548e-06, "loss": 1.03903084, "memory(GiB)": 28.47, "step": 4510, "train_speed(iter/s)": 0.432152 }, { "acc": 0.82665386, "epoch": 0.12224839574364389, "grad_norm": 6.6656494140625, "learning_rate": 9.447299709087502e-06, "loss": 0.81490364, "memory(GiB)": 28.47, "step": 4515, "train_speed(iter/s)": 0.432156 }, { "acc": 0.82427473, "epoch": 0.12238377602685944, "grad_norm": 33.48405075073242, "learning_rate": 9.448542268543992e-06, "loss": 0.8667449, "memory(GiB)": 28.47, "step": 4520, "train_speed(iter/s)": 0.432159 }, { "acc": 0.8517664, "epoch": 0.122519156310075, "grad_norm": 16.013277053833008, "learning_rate": 9.449783454247351e-06, "loss": 0.80256615, "memory(GiB)": 28.47, "step": 4525, "train_speed(iter/s)": 0.432161 }, { "acc": 0.82510777, "epoch": 0.12265453659329055, "grad_norm": 9.171815872192383, "learning_rate": 9.451023269231817e-06, "loss": 0.87627096, "memory(GiB)": 28.47, "step": 4530, "train_speed(iter/s)": 0.432163 }, { "acc": 0.83411732, "epoch": 0.1227899168765061, "grad_norm": 12.65903091430664, "learning_rate": 9.452261716521595e-06, "loss": 0.78620434, "memory(GiB)": 28.47, "step": 4535, "train_speed(iter/s)": 0.432167 }, { "acc": 0.7927031, "epoch": 0.12292529715972166, "grad_norm": 13.605646133422852, "learning_rate": 9.453498799130888e-06, "loss": 0.97673664, "memory(GiB)": 28.47, "step": 4540, "train_speed(iter/s)": 0.432168 }, { "acc": 0.83352823, "epoch": 0.12306067744293721, "grad_norm": 9.261334419250488, "learning_rate": 9.454734520063943e-06, "loss": 0.7909893, "memory(GiB)": 28.47, "step": 4545, "train_speed(iter/s)": 0.432171 }, { "acc": 0.85465269, "epoch": 0.12319605772615276, "grad_norm": 10.341453552246094, "learning_rate": 9.455968882315108e-06, "loss": 0.68065405, "memory(GiB)": 28.47, "step": 4550, "train_speed(iter/s)": 0.432175 }, { "acc": 0.81710663, "epoch": 0.12333143800936831, "grad_norm": 7.5996246337890625, "learning_rate": 9.45720188886885e-06, "loss": 0.95196095, "memory(GiB)": 28.47, "step": 4555, "train_speed(iter/s)": 0.432171 }, { "acc": 0.81892815, "epoch": 0.12346681829258387, "grad_norm": 14.058156967163086, "learning_rate": 9.458433542699831e-06, "loss": 0.85652056, "memory(GiB)": 28.47, "step": 4560, "train_speed(iter/s)": 0.432173 }, { "acc": 0.82841301, "epoch": 0.12360219857579942, "grad_norm": 7.014794826507568, "learning_rate": 9.459663846772916e-06, "loss": 0.83984003, "memory(GiB)": 28.47, "step": 4565, "train_speed(iter/s)": 0.432175 }, { "acc": 0.85178242, "epoch": 0.12373757885901497, "grad_norm": 11.537638664245605, "learning_rate": 9.460892804043244e-06, "loss": 0.7329206, "memory(GiB)": 28.47, "step": 4570, "train_speed(iter/s)": 0.432177 }, { "acc": 0.83261461, "epoch": 0.12387295914223052, "grad_norm": 5.9012346267700195, "learning_rate": 9.462120417456258e-06, "loss": 0.86886368, "memory(GiB)": 28.47, "step": 4575, "train_speed(iter/s)": 0.432182 }, { "acc": 0.84121056, "epoch": 0.12400833942544608, "grad_norm": 8.509859085083008, "learning_rate": 9.463346689947745e-06, "loss": 0.77697363, "memory(GiB)": 28.47, "step": 4580, "train_speed(iter/s)": 0.432185 }, { "acc": 0.81702003, "epoch": 0.12414371970866163, "grad_norm": 8.58179759979248, "learning_rate": 9.464571624443885e-06, "loss": 0.84574947, "memory(GiB)": 28.47, "step": 4585, "train_speed(iter/s)": 0.432188 }, { "acc": 0.83223534, "epoch": 0.12427909999187718, "grad_norm": 10.922098159790039, "learning_rate": 9.465795223861289e-06, "loss": 0.99110498, "memory(GiB)": 28.47, "step": 4590, "train_speed(iter/s)": 0.432192 }, { "acc": 0.82398891, "epoch": 0.12441448027509273, "grad_norm": 26.3032169342041, "learning_rate": 9.467017491107046e-06, "loss": 0.88263655, "memory(GiB)": 28.47, "step": 4595, "train_speed(iter/s)": 0.432193 }, { "acc": 0.83432064, "epoch": 0.12454986055830829, "grad_norm": 13.311638832092285, "learning_rate": 9.468238429078755e-06, "loss": 0.75104041, "memory(GiB)": 28.47, "step": 4600, "train_speed(iter/s)": 0.432197 }, { "acc": 0.81606464, "epoch": 0.12468524084152384, "grad_norm": 18.23239517211914, "learning_rate": 9.469458040664573e-06, "loss": 1.05101862, "memory(GiB)": 28.47, "step": 4605, "train_speed(iter/s)": 0.4322 }, { "acc": 0.84113073, "epoch": 0.12482062112473939, "grad_norm": 12.305004119873047, "learning_rate": 9.470676328743256e-06, "loss": 0.82718163, "memory(GiB)": 28.47, "step": 4610, "train_speed(iter/s)": 0.432189 }, { "acc": 0.80103626, "epoch": 0.12495600140795494, "grad_norm": 30.200679779052734, "learning_rate": 9.471893296184197e-06, "loss": 1.0207592, "memory(GiB)": 28.47, "step": 4615, "train_speed(iter/s)": 0.432182 }, { "acc": 0.80717678, "epoch": 0.1250913816911705, "grad_norm": 12.938453674316406, "learning_rate": 9.473108945847466e-06, "loss": 1.04991255, "memory(GiB)": 28.47, "step": 4620, "train_speed(iter/s)": 0.432165 }, { "acc": 0.81317997, "epoch": 0.12522676197438606, "grad_norm": 6.279201984405518, "learning_rate": 9.47432328058386e-06, "loss": 1.02841349, "memory(GiB)": 28.47, "step": 4625, "train_speed(iter/s)": 0.43216 }, { "acc": 0.82599964, "epoch": 0.12536214225760162, "grad_norm": 16.83620262145996, "learning_rate": 9.475536303234918e-06, "loss": 0.87781239, "memory(GiB)": 28.47, "step": 4630, "train_speed(iter/s)": 0.432154 }, { "acc": 0.83439846, "epoch": 0.12549752254081717, "grad_norm": 11.911431312561035, "learning_rate": 9.476748016632998e-06, "loss": 0.85592957, "memory(GiB)": 28.47, "step": 4635, "train_speed(iter/s)": 0.432147 }, { "acc": 0.81237364, "epoch": 0.12563290282403272, "grad_norm": 12.292994499206543, "learning_rate": 9.477958423601285e-06, "loss": 1.0242528, "memory(GiB)": 28.47, "step": 4640, "train_speed(iter/s)": 0.432141 }, { "acc": 0.79662561, "epoch": 0.12576828310724825, "grad_norm": 15.097963333129883, "learning_rate": 9.479167526953845e-06, "loss": 1.00132408, "memory(GiB)": 28.47, "step": 4645, "train_speed(iter/s)": 0.432129 }, { "acc": 0.82181625, "epoch": 0.1259036633904638, "grad_norm": 19.485048294067383, "learning_rate": 9.480375329495662e-06, "loss": 0.89302454, "memory(GiB)": 28.47, "step": 4650, "train_speed(iter/s)": 0.43212 }, { "acc": 0.84168749, "epoch": 0.12603904367367935, "grad_norm": 7.348368167877197, "learning_rate": 9.481581834022674e-06, "loss": 0.82666492, "memory(GiB)": 28.47, "step": 4655, "train_speed(iter/s)": 0.432121 }, { "acc": 0.81563025, "epoch": 0.1261744239568949, "grad_norm": 10.675302505493164, "learning_rate": 9.48278704332182e-06, "loss": 0.94395008, "memory(GiB)": 28.47, "step": 4660, "train_speed(iter/s)": 0.432119 }, { "acc": 0.82775583, "epoch": 0.12630980424011046, "grad_norm": 6.954318046569824, "learning_rate": 9.483990960171067e-06, "loss": 0.8684947, "memory(GiB)": 28.47, "step": 4665, "train_speed(iter/s)": 0.432119 }, { "acc": 0.82350168, "epoch": 0.126445184523326, "grad_norm": 17.23990249633789, "learning_rate": 9.485193587339457e-06, "loss": 0.87341852, "memory(GiB)": 28.47, "step": 4670, "train_speed(iter/s)": 0.432122 }, { "acc": 0.84950666, "epoch": 0.12658056480654156, "grad_norm": 10.383831024169922, "learning_rate": 9.486394927587144e-06, "loss": 0.69800229, "memory(GiB)": 28.47, "step": 4675, "train_speed(iter/s)": 0.432122 }, { "acc": 0.83521557, "epoch": 0.12671594508975711, "grad_norm": 22.313093185424805, "learning_rate": 9.487594983665427e-06, "loss": 0.84725161, "memory(GiB)": 28.47, "step": 4680, "train_speed(iter/s)": 0.432124 }, { "acc": 0.81774998, "epoch": 0.12685132537297267, "grad_norm": 22.210830688476562, "learning_rate": 9.488793758316796e-06, "loss": 0.99962015, "memory(GiB)": 28.47, "step": 4685, "train_speed(iter/s)": 0.432127 }, { "acc": 0.809618, "epoch": 0.12698670565618822, "grad_norm": 12.362476348876953, "learning_rate": 9.489991254274958e-06, "loss": 0.94001446, "memory(GiB)": 28.47, "step": 4690, "train_speed(iter/s)": 0.432128 }, { "acc": 0.80642776, "epoch": 0.12712208593940377, "grad_norm": 113.215087890625, "learning_rate": 9.491187474264888e-06, "loss": 1.02594662, "memory(GiB)": 28.47, "step": 4695, "train_speed(iter/s)": 0.432127 }, { "acc": 0.81536522, "epoch": 0.12725746622261933, "grad_norm": 22.877790451049805, "learning_rate": 9.492382421002863e-06, "loss": 0.85364742, "memory(GiB)": 28.47, "step": 4700, "train_speed(iter/s)": 0.43213 }, { "acc": 0.82311869, "epoch": 0.12739284650583488, "grad_norm": 52.90999984741211, "learning_rate": 9.493576097196485e-06, "loss": 0.93144417, "memory(GiB)": 28.47, "step": 4705, "train_speed(iter/s)": 0.432132 }, { "acc": 0.83731356, "epoch": 0.12752822678905043, "grad_norm": 10.24858570098877, "learning_rate": 9.494768505544732e-06, "loss": 0.85886908, "memory(GiB)": 28.47, "step": 4710, "train_speed(iter/s)": 0.432136 }, { "acc": 0.81598434, "epoch": 0.12766360707226598, "grad_norm": 39.9036865234375, "learning_rate": 9.495959648737997e-06, "loss": 1.03388424, "memory(GiB)": 28.47, "step": 4715, "train_speed(iter/s)": 0.432135 }, { "acc": 0.83243809, "epoch": 0.12779898735548154, "grad_norm": 17.445270538330078, "learning_rate": 9.497149529458113e-06, "loss": 0.8457572, "memory(GiB)": 28.47, "step": 4720, "train_speed(iter/s)": 0.432137 }, { "acc": 0.83524914, "epoch": 0.1279343676386971, "grad_norm": 7.375385761260986, "learning_rate": 9.498338150378395e-06, "loss": 0.86710548, "memory(GiB)": 28.47, "step": 4725, "train_speed(iter/s)": 0.432135 }, { "acc": 0.82847271, "epoch": 0.12806974792191264, "grad_norm": 9.923633575439453, "learning_rate": 9.499525514163678e-06, "loss": 0.82240915, "memory(GiB)": 28.47, "step": 4730, "train_speed(iter/s)": 0.432139 }, { "acc": 0.83782606, "epoch": 0.1282051282051282, "grad_norm": 8.974177360534668, "learning_rate": 9.500711623470346e-06, "loss": 0.80037422, "memory(GiB)": 28.47, "step": 4735, "train_speed(iter/s)": 0.432143 }, { "acc": 0.8083746, "epoch": 0.12834050848834375, "grad_norm": 13.608155250549316, "learning_rate": 9.501896480946382e-06, "loss": 1.1322073, "memory(GiB)": 28.47, "step": 4740, "train_speed(iter/s)": 0.432144 }, { "acc": 0.81942244, "epoch": 0.1284758887715593, "grad_norm": 14.14465618133545, "learning_rate": 9.503080089231383e-06, "loss": 0.93329458, "memory(GiB)": 28.47, "step": 4745, "train_speed(iter/s)": 0.432145 }, { "acc": 0.82900496, "epoch": 0.12861126905477485, "grad_norm": 46.41033935546875, "learning_rate": 9.504262450956612e-06, "loss": 0.85254402, "memory(GiB)": 28.47, "step": 4750, "train_speed(iter/s)": 0.432148 }, { "acc": 0.83321323, "epoch": 0.1287466493379904, "grad_norm": 17.097305297851562, "learning_rate": 9.505443568745026e-06, "loss": 0.85931015, "memory(GiB)": 28.47, "step": 4755, "train_speed(iter/s)": 0.432151 }, { "acc": 0.79658689, "epoch": 0.12888202962120596, "grad_norm": 11.149961471557617, "learning_rate": 9.50662344521131e-06, "loss": 0.99232693, "memory(GiB)": 28.47, "step": 4760, "train_speed(iter/s)": 0.432153 }, { "acc": 0.83016911, "epoch": 0.1290174099044215, "grad_norm": 12.963226318359375, "learning_rate": 9.507802082961915e-06, "loss": 0.82718277, "memory(GiB)": 28.47, "step": 4765, "train_speed(iter/s)": 0.43215 }, { "acc": 0.83171053, "epoch": 0.12915279018763706, "grad_norm": 12.021730422973633, "learning_rate": 9.508979484595094e-06, "loss": 0.76499543, "memory(GiB)": 28.47, "step": 4770, "train_speed(iter/s)": 0.43215 }, { "acc": 0.84633389, "epoch": 0.12928817047085261, "grad_norm": 10.20706558227539, "learning_rate": 9.51015565270093e-06, "loss": 0.84378548, "memory(GiB)": 28.47, "step": 4775, "train_speed(iter/s)": 0.432152 }, { "acc": 0.84083796, "epoch": 0.12942355075406817, "grad_norm": 7.642237186431885, "learning_rate": 9.511330589861375e-06, "loss": 0.75622568, "memory(GiB)": 28.47, "step": 4780, "train_speed(iter/s)": 0.432154 }, { "acc": 0.84439564, "epoch": 0.12955893103728372, "grad_norm": 9.573097229003906, "learning_rate": 9.512504298650282e-06, "loss": 0.73783731, "memory(GiB)": 28.47, "step": 4785, "train_speed(iter/s)": 0.432158 }, { "acc": 0.84307613, "epoch": 0.12969431132049927, "grad_norm": 5.094137191772461, "learning_rate": 9.513676781633446e-06, "loss": 0.73622637, "memory(GiB)": 28.47, "step": 4790, "train_speed(iter/s)": 0.432162 }, { "acc": 0.79719238, "epoch": 0.12982969160371483, "grad_norm": 9.357901573181152, "learning_rate": 9.514848041368622e-06, "loss": 1.05265541, "memory(GiB)": 28.47, "step": 4795, "train_speed(iter/s)": 0.432163 }, { "acc": 0.83675966, "epoch": 0.12996507188693038, "grad_norm": 11.669062614440918, "learning_rate": 9.51601808040557e-06, "loss": 0.8179822, "memory(GiB)": 28.47, "step": 4800, "train_speed(iter/s)": 0.432164 }, { "acc": 0.84643822, "epoch": 0.13010045217014593, "grad_norm": 12.667279243469238, "learning_rate": 9.517186901286091e-06, "loss": 0.70205421, "memory(GiB)": 28.47, "step": 4805, "train_speed(iter/s)": 0.432166 }, { "acc": 0.80667181, "epoch": 0.13023583245336148, "grad_norm": 65.91060638427734, "learning_rate": 9.518354506544054e-06, "loss": 0.98220425, "memory(GiB)": 28.47, "step": 4810, "train_speed(iter/s)": 0.432168 }, { "acc": 0.81950912, "epoch": 0.13037121273657704, "grad_norm": 28.88762092590332, "learning_rate": 9.519520898705427e-06, "loss": 0.8043644, "memory(GiB)": 28.47, "step": 4815, "train_speed(iter/s)": 0.432171 }, { "acc": 0.85827007, "epoch": 0.1305065930197926, "grad_norm": 9.292129516601562, "learning_rate": 9.520686080288312e-06, "loss": 0.65541682, "memory(GiB)": 28.47, "step": 4820, "train_speed(iter/s)": 0.432173 }, { "acc": 0.82667074, "epoch": 0.13064197330300814, "grad_norm": 14.074847221374512, "learning_rate": 9.521850053802985e-06, "loss": 0.86554604, "memory(GiB)": 28.47, "step": 4825, "train_speed(iter/s)": 0.432176 }, { "acc": 0.83417282, "epoch": 0.1307773535862237, "grad_norm": 11.756551742553711, "learning_rate": 9.523012821751919e-06, "loss": 0.83949537, "memory(GiB)": 28.47, "step": 4830, "train_speed(iter/s)": 0.432178 }, { "acc": 0.83372068, "epoch": 0.13091273386943925, "grad_norm": 9.682574272155762, "learning_rate": 9.524174386629813e-06, "loss": 0.81716022, "memory(GiB)": 28.47, "step": 4835, "train_speed(iter/s)": 0.43218 }, { "acc": 0.82226744, "epoch": 0.1310481141526548, "grad_norm": 16.37875747680664, "learning_rate": 9.525334750923642e-06, "loss": 0.77573199, "memory(GiB)": 28.47, "step": 4840, "train_speed(iter/s)": 0.432183 }, { "acc": 0.83662653, "epoch": 0.13118349443587035, "grad_norm": 21.584932327270508, "learning_rate": 9.526493917112669e-06, "loss": 0.88088226, "memory(GiB)": 28.47, "step": 4845, "train_speed(iter/s)": 0.43218 }, { "acc": 0.81016788, "epoch": 0.1313188747190859, "grad_norm": 19.782447814941406, "learning_rate": 9.52765188766849e-06, "loss": 0.9459034, "memory(GiB)": 28.47, "step": 4850, "train_speed(iter/s)": 0.432182 }, { "acc": 0.8380084, "epoch": 0.13145425500230146, "grad_norm": 5.658334732055664, "learning_rate": 9.528808665055055e-06, "loss": 0.73561196, "memory(GiB)": 28.47, "step": 4855, "train_speed(iter/s)": 0.432185 }, { "acc": 0.83457861, "epoch": 0.131589635285517, "grad_norm": 8.713306427001953, "learning_rate": 9.529964251728714e-06, "loss": 0.81167469, "memory(GiB)": 28.47, "step": 4860, "train_speed(iter/s)": 0.432185 }, { "acc": 0.81862898, "epoch": 0.13172501556873256, "grad_norm": 28.663480758666992, "learning_rate": 9.53111865013823e-06, "loss": 0.91208105, "memory(GiB)": 28.47, "step": 4865, "train_speed(iter/s)": 0.432186 }, { "acc": 0.84489288, "epoch": 0.13186039585194811, "grad_norm": 13.244647979736328, "learning_rate": 9.532271862724825e-06, "loss": 0.68162489, "memory(GiB)": 28.47, "step": 4870, "train_speed(iter/s)": 0.432186 }, { "acc": 0.82121162, "epoch": 0.13199577613516367, "grad_norm": 28.46011734008789, "learning_rate": 9.533423891922208e-06, "loss": 0.8770134, "memory(GiB)": 28.47, "step": 4875, "train_speed(iter/s)": 0.43219 }, { "acc": 0.81519403, "epoch": 0.13213115641837922, "grad_norm": 19.542278289794922, "learning_rate": 9.534574740156597e-06, "loss": 1.04570923, "memory(GiB)": 28.47, "step": 4880, "train_speed(iter/s)": 0.432191 }, { "acc": 0.8122117, "epoch": 0.13226653670159477, "grad_norm": 21.58730125427246, "learning_rate": 9.535724409846761e-06, "loss": 0.94616718, "memory(GiB)": 28.47, "step": 4885, "train_speed(iter/s)": 0.432192 }, { "acc": 0.84498911, "epoch": 0.13240191698481033, "grad_norm": 37.55464553833008, "learning_rate": 9.536872903404045e-06, "loss": 0.95273762, "memory(GiB)": 28.47, "step": 4890, "train_speed(iter/s)": 0.432196 }, { "acc": 0.8136797, "epoch": 0.13253729726802588, "grad_norm": 16.469751358032227, "learning_rate": 9.5380202232324e-06, "loss": 0.98822613, "memory(GiB)": 28.47, "step": 4895, "train_speed(iter/s)": 0.432198 }, { "acc": 0.8439373, "epoch": 0.13267267755124143, "grad_norm": 9.627345085144043, "learning_rate": 9.539166371728414e-06, "loss": 0.75908003, "memory(GiB)": 28.47, "step": 4900, "train_speed(iter/s)": 0.432201 }, { "acc": 0.83911905, "epoch": 0.13280805783445698, "grad_norm": 9.668413162231445, "learning_rate": 9.540311351281342e-06, "loss": 0.77140646, "memory(GiB)": 28.47, "step": 4905, "train_speed(iter/s)": 0.432201 }, { "acc": 0.81552992, "epoch": 0.13294343811767254, "grad_norm": 12.674148559570312, "learning_rate": 9.54145516427314e-06, "loss": 0.8948864, "memory(GiB)": 28.47, "step": 4910, "train_speed(iter/s)": 0.432201 }, { "acc": 0.84504251, "epoch": 0.1330788184008881, "grad_norm": 19.745941162109375, "learning_rate": 9.542597813078484e-06, "loss": 0.82617426, "memory(GiB)": 28.47, "step": 4915, "train_speed(iter/s)": 0.432202 }, { "acc": 0.83785572, "epoch": 0.13321419868410364, "grad_norm": 8.911446571350098, "learning_rate": 9.543739300064811e-06, "loss": 0.78693013, "memory(GiB)": 28.47, "step": 4920, "train_speed(iter/s)": 0.432201 }, { "acc": 0.82514782, "epoch": 0.1333495789673192, "grad_norm": 14.900259971618652, "learning_rate": 9.544879627592347e-06, "loss": 0.82500753, "memory(GiB)": 28.47, "step": 4925, "train_speed(iter/s)": 0.432205 }, { "acc": 0.82844954, "epoch": 0.13348495925053475, "grad_norm": 17.217098236083984, "learning_rate": 9.546018798014125e-06, "loss": 0.87309361, "memory(GiB)": 28.47, "step": 4930, "train_speed(iter/s)": 0.432205 }, { "acc": 0.80369844, "epoch": 0.1336203395337503, "grad_norm": 9.435262680053711, "learning_rate": 9.547156813676026e-06, "loss": 0.93563604, "memory(GiB)": 28.47, "step": 4935, "train_speed(iter/s)": 0.432205 }, { "acc": 0.81899729, "epoch": 0.13375571981696585, "grad_norm": 14.114782333374023, "learning_rate": 9.548293676916809e-06, "loss": 0.97037868, "memory(GiB)": 28.47, "step": 4940, "train_speed(iter/s)": 0.432205 }, { "acc": 0.84751177, "epoch": 0.1338911001001814, "grad_norm": 9.873625755310059, "learning_rate": 9.549429390068127e-06, "loss": 0.70957375, "memory(GiB)": 28.47, "step": 4945, "train_speed(iter/s)": 0.432205 }, { "acc": 0.80169621, "epoch": 0.13402648038339696, "grad_norm": 18.412092208862305, "learning_rate": 9.550563955454567e-06, "loss": 1.05866022, "memory(GiB)": 28.47, "step": 4950, "train_speed(iter/s)": 0.432205 }, { "acc": 0.85984087, "epoch": 0.1341618606666125, "grad_norm": 13.578300476074219, "learning_rate": 9.551697375393681e-06, "loss": 0.63452039, "memory(GiB)": 28.47, "step": 4955, "train_speed(iter/s)": 0.432207 }, { "acc": 0.81988735, "epoch": 0.13429724094982806, "grad_norm": 13.875617027282715, "learning_rate": 9.552829652196002e-06, "loss": 0.9379797, "memory(GiB)": 28.47, "step": 4960, "train_speed(iter/s)": 0.432209 }, { "acc": 0.85436974, "epoch": 0.13443262123304361, "grad_norm": 28.18172264099121, "learning_rate": 9.553960788165078e-06, "loss": 0.70972281, "memory(GiB)": 28.47, "step": 4965, "train_speed(iter/s)": 0.43221 }, { "acc": 0.82592201, "epoch": 0.13456800151625917, "grad_norm": 12.7731351852417, "learning_rate": 9.555090785597506e-06, "loss": 0.89304466, "memory(GiB)": 28.47, "step": 4970, "train_speed(iter/s)": 0.432211 }, { "acc": 0.84286003, "epoch": 0.13470338179947472, "grad_norm": 8.689510345458984, "learning_rate": 9.556219646782951e-06, "loss": 0.84906788, "memory(GiB)": 28.47, "step": 4975, "train_speed(iter/s)": 0.432213 }, { "acc": 0.8341589, "epoch": 0.13483876208269027, "grad_norm": 13.869176864624023, "learning_rate": 9.557347374004182e-06, "loss": 0.85818872, "memory(GiB)": 28.47, "step": 4980, "train_speed(iter/s)": 0.432215 }, { "acc": 0.82236881, "epoch": 0.13497414236590582, "grad_norm": 9.369086265563965, "learning_rate": 9.558473969537093e-06, "loss": 0.85883694, "memory(GiB)": 28.47, "step": 4985, "train_speed(iter/s)": 0.432214 }, { "acc": 0.81135731, "epoch": 0.13510952264912138, "grad_norm": 10.808182716369629, "learning_rate": 9.559599435650737e-06, "loss": 0.9912961, "memory(GiB)": 28.47, "step": 4990, "train_speed(iter/s)": 0.432215 }, { "acc": 0.83909683, "epoch": 0.13524490293233693, "grad_norm": 34.11470031738281, "learning_rate": 9.560723774607342e-06, "loss": 0.84187164, "memory(GiB)": 28.47, "step": 4995, "train_speed(iter/s)": 0.432218 }, { "acc": 0.83295317, "epoch": 0.13538028321555248, "grad_norm": 10.028899192810059, "learning_rate": 9.561846988662353e-06, "loss": 0.82563276, "memory(GiB)": 28.47, "step": 5000, "train_speed(iter/s)": 0.432217 }, { "acc": 0.84220791, "epoch": 0.13551566349876804, "grad_norm": 15.763869285583496, "learning_rate": 9.562969080064446e-06, "loss": 0.8424345, "memory(GiB)": 28.47, "step": 5005, "train_speed(iter/s)": 0.432215 }, { "acc": 0.8314312, "epoch": 0.1356510437819836, "grad_norm": 11.57483959197998, "learning_rate": 9.564090051055566e-06, "loss": 0.89285622, "memory(GiB)": 28.47, "step": 5010, "train_speed(iter/s)": 0.432218 }, { "acc": 0.83984623, "epoch": 0.13578642406519914, "grad_norm": 13.67554759979248, "learning_rate": 9.56520990387095e-06, "loss": 0.85278835, "memory(GiB)": 28.47, "step": 5015, "train_speed(iter/s)": 0.432218 }, { "acc": 0.82461891, "epoch": 0.1359218043484147, "grad_norm": 10.746847152709961, "learning_rate": 9.56632864073915e-06, "loss": 0.87867203, "memory(GiB)": 28.47, "step": 5020, "train_speed(iter/s)": 0.432222 }, { "acc": 0.86384277, "epoch": 0.13605718463163025, "grad_norm": 6.341182231903076, "learning_rate": 9.56744626388206e-06, "loss": 0.61187668, "memory(GiB)": 28.47, "step": 5025, "train_speed(iter/s)": 0.432226 }, { "acc": 0.84659719, "epoch": 0.1361925649148458, "grad_norm": 9.894391059875488, "learning_rate": 9.568562775514946e-06, "loss": 0.79475832, "memory(GiB)": 28.47, "step": 5030, "train_speed(iter/s)": 0.432229 }, { "acc": 0.84221411, "epoch": 0.13632794519806135, "grad_norm": 8.429847717285156, "learning_rate": 9.569678177846474e-06, "loss": 0.75839548, "memory(GiB)": 28.47, "step": 5035, "train_speed(iter/s)": 0.432233 }, { "acc": 0.81192837, "epoch": 0.1364633254812769, "grad_norm": 33.20293426513672, "learning_rate": 9.570792473078734e-06, "loss": 0.95959921, "memory(GiB)": 28.47, "step": 5040, "train_speed(iter/s)": 0.432232 }, { "acc": 0.84058294, "epoch": 0.13659870576449246, "grad_norm": 6.60755729675293, "learning_rate": 9.571905663407259e-06, "loss": 0.84702549, "memory(GiB)": 28.47, "step": 5045, "train_speed(iter/s)": 0.432232 }, { "acc": 0.84764309, "epoch": 0.136734086047708, "grad_norm": 9.140764236450195, "learning_rate": 9.573017751021063e-06, "loss": 0.74231243, "memory(GiB)": 28.47, "step": 5050, "train_speed(iter/s)": 0.432235 }, { "acc": 0.79576044, "epoch": 0.13686946633092356, "grad_norm": 25.743810653686523, "learning_rate": 9.574128738102662e-06, "loss": 1.09953423, "memory(GiB)": 28.47, "step": 5055, "train_speed(iter/s)": 0.432238 }, { "acc": 0.84818411, "epoch": 0.13700484661413911, "grad_norm": 11.733108520507812, "learning_rate": 9.575238626828091e-06, "loss": 0.74727583, "memory(GiB)": 28.47, "step": 5060, "train_speed(iter/s)": 0.432241 }, { "acc": 0.7956027, "epoch": 0.13714022689735467, "grad_norm": 17.280698776245117, "learning_rate": 9.57634741936695e-06, "loss": 1.09396553, "memory(GiB)": 28.47, "step": 5065, "train_speed(iter/s)": 0.432243 }, { "acc": 0.8260025, "epoch": 0.13727560718057022, "grad_norm": 10.42100715637207, "learning_rate": 9.577455117882403e-06, "loss": 0.86230316, "memory(GiB)": 28.47, "step": 5070, "train_speed(iter/s)": 0.432248 }, { "acc": 0.82797441, "epoch": 0.13741098746378577, "grad_norm": 8.022921562194824, "learning_rate": 9.578561724531227e-06, "loss": 0.90486288, "memory(GiB)": 28.47, "step": 5075, "train_speed(iter/s)": 0.432245 }, { "acc": 0.85848751, "epoch": 0.13754636774700132, "grad_norm": 8.886683464050293, "learning_rate": 9.579667241463828e-06, "loss": 0.68183699, "memory(GiB)": 28.47, "step": 5080, "train_speed(iter/s)": 0.432249 }, { "acc": 0.83335304, "epoch": 0.13768174803021688, "grad_norm": 19.71379852294922, "learning_rate": 9.580771670824257e-06, "loss": 0.86571712, "memory(GiB)": 28.47, "step": 5085, "train_speed(iter/s)": 0.43225 }, { "acc": 0.84922733, "epoch": 0.13781712831343243, "grad_norm": 10.019243240356445, "learning_rate": 9.581875014750251e-06, "loss": 0.79063206, "memory(GiB)": 28.47, "step": 5090, "train_speed(iter/s)": 0.432253 }, { "acc": 0.82054262, "epoch": 0.13795250859664798, "grad_norm": 10.227775573730469, "learning_rate": 9.582977275373247e-06, "loss": 0.84318953, "memory(GiB)": 28.47, "step": 5095, "train_speed(iter/s)": 0.432254 }, { "acc": 0.77908335, "epoch": 0.13808788887986354, "grad_norm": 9.69990348815918, "learning_rate": 9.58407845481841e-06, "loss": 1.13202744, "memory(GiB)": 28.47, "step": 5100, "train_speed(iter/s)": 0.432254 }, { "acc": 0.83321953, "epoch": 0.1382232691630791, "grad_norm": 7.82913875579834, "learning_rate": 9.58517855520466e-06, "loss": 0.81536007, "memory(GiB)": 28.47, "step": 5105, "train_speed(iter/s)": 0.432257 }, { "acc": 0.83521996, "epoch": 0.13835864944629464, "grad_norm": 27.33734703063965, "learning_rate": 9.58627757864469e-06, "loss": 0.83623924, "memory(GiB)": 28.47, "step": 5110, "train_speed(iter/s)": 0.432257 }, { "acc": 0.84080582, "epoch": 0.1384940297295102, "grad_norm": 13.773984909057617, "learning_rate": 9.587375527244998e-06, "loss": 0.83221092, "memory(GiB)": 28.47, "step": 5115, "train_speed(iter/s)": 0.432259 }, { "acc": 0.81653843, "epoch": 0.13862941001272575, "grad_norm": 6.440382957458496, "learning_rate": 9.58847240310591e-06, "loss": 0.93997183, "memory(GiB)": 28.47, "step": 5120, "train_speed(iter/s)": 0.432256 }, { "acc": 0.80502129, "epoch": 0.1387647902959413, "grad_norm": 10.704886436462402, "learning_rate": 9.589568208321593e-06, "loss": 0.97807684, "memory(GiB)": 28.47, "step": 5125, "train_speed(iter/s)": 0.432259 }, { "acc": 0.85730934, "epoch": 0.13890017057915685, "grad_norm": 8.0760498046875, "learning_rate": 9.590662944980094e-06, "loss": 0.64773288, "memory(GiB)": 28.47, "step": 5130, "train_speed(iter/s)": 0.432259 }, { "acc": 0.8175992, "epoch": 0.1390355508623724, "grad_norm": 7.016950607299805, "learning_rate": 9.59175661516336e-06, "loss": 0.83915701, "memory(GiB)": 28.47, "step": 5135, "train_speed(iter/s)": 0.432252 }, { "acc": 0.84012928, "epoch": 0.13917093114558796, "grad_norm": 6.715457916259766, "learning_rate": 9.59284922094725e-06, "loss": 0.79532051, "memory(GiB)": 28.47, "step": 5140, "train_speed(iter/s)": 0.432254 }, { "acc": 0.8481595, "epoch": 0.1393063114288035, "grad_norm": 7.629361629486084, "learning_rate": 9.593940764401576e-06, "loss": 0.74462638, "memory(GiB)": 28.47, "step": 5145, "train_speed(iter/s)": 0.432251 }, { "acc": 0.83908625, "epoch": 0.13944169171201906, "grad_norm": 10.753778457641602, "learning_rate": 9.59503124759012e-06, "loss": 0.79790106, "memory(GiB)": 28.47, "step": 5150, "train_speed(iter/s)": 0.432252 }, { "acc": 0.81759729, "epoch": 0.13957707199523461, "grad_norm": 14.202627182006836, "learning_rate": 9.596120672570646e-06, "loss": 0.88818531, "memory(GiB)": 28.47, "step": 5155, "train_speed(iter/s)": 0.432254 }, { "acc": 0.82130508, "epoch": 0.13971245227845017, "grad_norm": 7.180920124053955, "learning_rate": 9.597209041394942e-06, "loss": 0.89143257, "memory(GiB)": 28.47, "step": 5160, "train_speed(iter/s)": 0.432256 }, { "acc": 0.81640205, "epoch": 0.13984783256166572, "grad_norm": 10.228363037109375, "learning_rate": 9.598296356108831e-06, "loss": 0.98109674, "memory(GiB)": 28.47, "step": 5165, "train_speed(iter/s)": 0.432261 }, { "acc": 0.82711506, "epoch": 0.13998321284488127, "grad_norm": 8.313232421875, "learning_rate": 9.5993826187522e-06, "loss": 0.83414335, "memory(GiB)": 28.47, "step": 5170, "train_speed(iter/s)": 0.43226 }, { "acc": 0.84173126, "epoch": 0.14011859312809682, "grad_norm": 9.392180442810059, "learning_rate": 9.60046783135902e-06, "loss": 0.88139324, "memory(GiB)": 28.47, "step": 5175, "train_speed(iter/s)": 0.432261 }, { "acc": 0.83978481, "epoch": 0.14025397341131238, "grad_norm": 13.873648643493652, "learning_rate": 9.601551995957363e-06, "loss": 0.72562232, "memory(GiB)": 28.47, "step": 5180, "train_speed(iter/s)": 0.432263 }, { "acc": 0.84180717, "epoch": 0.14038935369452793, "grad_norm": 13.274706840515137, "learning_rate": 9.602635114569435e-06, "loss": 0.69498711, "memory(GiB)": 28.47, "step": 5185, "train_speed(iter/s)": 0.432266 }, { "acc": 0.82568588, "epoch": 0.14052473397774348, "grad_norm": 12.14195728302002, "learning_rate": 9.603717189211603e-06, "loss": 0.86471977, "memory(GiB)": 28.47, "step": 5190, "train_speed(iter/s)": 0.432267 }, { "acc": 0.81709518, "epoch": 0.14066011426095903, "grad_norm": 12.754096031188965, "learning_rate": 9.604798221894397e-06, "loss": 0.9768383, "memory(GiB)": 28.47, "step": 5195, "train_speed(iter/s)": 0.432271 }, { "acc": 0.82716236, "epoch": 0.1407954945441746, "grad_norm": 8.899168968200684, "learning_rate": 9.605878214622547e-06, "loss": 0.91076221, "memory(GiB)": 28.47, "step": 5200, "train_speed(iter/s)": 0.432269 }, { "acc": 0.85581512, "epoch": 0.14093087482739014, "grad_norm": 15.335173606872559, "learning_rate": 9.60695716939501e-06, "loss": 0.67169876, "memory(GiB)": 28.47, "step": 5205, "train_speed(iter/s)": 0.432272 }, { "acc": 0.84099674, "epoch": 0.1410662551106057, "grad_norm": 16.932506561279297, "learning_rate": 9.608035088204977e-06, "loss": 0.85682716, "memory(GiB)": 28.47, "step": 5210, "train_speed(iter/s)": 0.432272 }, { "acc": 0.84181747, "epoch": 0.14120163539382125, "grad_norm": 9.991447448730469, "learning_rate": 9.609111973039909e-06, "loss": 0.74729419, "memory(GiB)": 28.47, "step": 5215, "train_speed(iter/s)": 0.43227 }, { "acc": 0.80894766, "epoch": 0.1413370156770368, "grad_norm": 22.42329978942871, "learning_rate": 9.61018782588155e-06, "loss": 1.08597698, "memory(GiB)": 28.47, "step": 5220, "train_speed(iter/s)": 0.432272 }, { "acc": 0.82567263, "epoch": 0.14147239596025235, "grad_norm": 15.434639930725098, "learning_rate": 9.611262648705951e-06, "loss": 0.81722994, "memory(GiB)": 28.47, "step": 5225, "train_speed(iter/s)": 0.432274 }, { "acc": 0.81411915, "epoch": 0.1416077762434679, "grad_norm": 12.911584854125977, "learning_rate": 9.612336443483496e-06, "loss": 1.00213575, "memory(GiB)": 28.47, "step": 5230, "train_speed(iter/s)": 0.432275 }, { "acc": 0.83527298, "epoch": 0.14174315652668346, "grad_norm": 12.987811088562012, "learning_rate": 9.613409212178917e-06, "loss": 0.91145678, "memory(GiB)": 28.47, "step": 5235, "train_speed(iter/s)": 0.432277 }, { "acc": 0.83338947, "epoch": 0.141878536809899, "grad_norm": 18.974761962890625, "learning_rate": 9.614480956751322e-06, "loss": 0.8173912, "memory(GiB)": 28.47, "step": 5240, "train_speed(iter/s)": 0.43228 }, { "acc": 0.84607048, "epoch": 0.14201391709311456, "grad_norm": 21.932165145874023, "learning_rate": 9.615551679154216e-06, "loss": 0.81403561, "memory(GiB)": 28.47, "step": 5245, "train_speed(iter/s)": 0.432283 }, { "acc": 0.85096655, "epoch": 0.1421492973763301, "grad_norm": 19.38091278076172, "learning_rate": 9.616621381335513e-06, "loss": 0.69347315, "memory(GiB)": 28.47, "step": 5250, "train_speed(iter/s)": 0.432286 }, { "acc": 0.82787189, "epoch": 0.14228467765954567, "grad_norm": 11.31412410736084, "learning_rate": 9.61769006523757e-06, "loss": 0.88165016, "memory(GiB)": 28.47, "step": 5255, "train_speed(iter/s)": 0.432289 }, { "acc": 0.83097687, "epoch": 0.14242005794276122, "grad_norm": 12.65803337097168, "learning_rate": 9.618757732797199e-06, "loss": 0.88838482, "memory(GiB)": 28.47, "step": 5260, "train_speed(iter/s)": 0.432294 }, { "acc": 0.8248209, "epoch": 0.14255543822597677, "grad_norm": 20.52335548400879, "learning_rate": 9.619824385945692e-06, "loss": 0.91049805, "memory(GiB)": 28.47, "step": 5265, "train_speed(iter/s)": 0.432294 }, { "acc": 0.82482061, "epoch": 0.14269081850919232, "grad_norm": 83.90352630615234, "learning_rate": 9.62089002660884e-06, "loss": 0.88556967, "memory(GiB)": 28.47, "step": 5270, "train_speed(iter/s)": 0.432294 }, { "acc": 0.83463879, "epoch": 0.14282619879240788, "grad_norm": 20.052644729614258, "learning_rate": 9.62195465670696e-06, "loss": 0.85209036, "memory(GiB)": 28.47, "step": 5275, "train_speed(iter/s)": 0.432295 }, { "acc": 0.82738628, "epoch": 0.14296157907562343, "grad_norm": 15.500801086425781, "learning_rate": 9.623018278154907e-06, "loss": 0.8788559, "memory(GiB)": 28.47, "step": 5280, "train_speed(iter/s)": 0.432298 }, { "acc": 0.80962524, "epoch": 0.14309695935883898, "grad_norm": 20.758277893066406, "learning_rate": 9.624080892862102e-06, "loss": 1.06100159, "memory(GiB)": 28.47, "step": 5285, "train_speed(iter/s)": 0.4323 }, { "acc": 0.83671532, "epoch": 0.14323233964205453, "grad_norm": 9.405817985534668, "learning_rate": 9.625142502732542e-06, "loss": 0.8652442, "memory(GiB)": 28.47, "step": 5290, "train_speed(iter/s)": 0.432301 }, { "acc": 0.8266798, "epoch": 0.1433677199252701, "grad_norm": 7.229486465454102, "learning_rate": 9.626203109664837e-06, "loss": 0.82859287, "memory(GiB)": 28.47, "step": 5295, "train_speed(iter/s)": 0.432302 }, { "acc": 0.81234245, "epoch": 0.14350310020848564, "grad_norm": 16.532367706298828, "learning_rate": 9.627262715552212e-06, "loss": 0.99644184, "memory(GiB)": 28.47, "step": 5300, "train_speed(iter/s)": 0.432304 }, { "acc": 0.83896561, "epoch": 0.1436384804917012, "grad_norm": 36.38948059082031, "learning_rate": 9.628321322282548e-06, "loss": 0.83090839, "memory(GiB)": 28.47, "step": 5305, "train_speed(iter/s)": 0.432308 }, { "acc": 0.83354292, "epoch": 0.14377386077491675, "grad_norm": 40.4394416809082, "learning_rate": 9.629378931738374e-06, "loss": 0.77727213, "memory(GiB)": 28.47, "step": 5310, "train_speed(iter/s)": 0.432311 }, { "acc": 0.85237598, "epoch": 0.1439092410581323, "grad_norm": 7.750356197357178, "learning_rate": 9.630435545796924e-06, "loss": 0.64973898, "memory(GiB)": 28.47, "step": 5315, "train_speed(iter/s)": 0.432311 }, { "acc": 0.8544508, "epoch": 0.14404462134134785, "grad_norm": 14.167588233947754, "learning_rate": 9.631491166330113e-06, "loss": 0.74425306, "memory(GiB)": 28.47, "step": 5320, "train_speed(iter/s)": 0.432312 }, { "acc": 0.85559044, "epoch": 0.1441800016245634, "grad_norm": 6.895402908325195, "learning_rate": 9.632545795204605e-06, "loss": 0.64082174, "memory(GiB)": 28.47, "step": 5325, "train_speed(iter/s)": 0.432314 }, { "acc": 0.82731285, "epoch": 0.14431538190777896, "grad_norm": 10.883588790893555, "learning_rate": 9.633599434281789e-06, "loss": 0.83045664, "memory(GiB)": 28.47, "step": 5330, "train_speed(iter/s)": 0.432316 }, { "acc": 0.81719961, "epoch": 0.1444507621909945, "grad_norm": 16.48829460144043, "learning_rate": 9.634652085417828e-06, "loss": 0.83899803, "memory(GiB)": 28.47, "step": 5335, "train_speed(iter/s)": 0.432318 }, { "acc": 0.81534653, "epoch": 0.14458614247421006, "grad_norm": 11.482892036437988, "learning_rate": 9.635703750463666e-06, "loss": 0.96745567, "memory(GiB)": 28.47, "step": 5340, "train_speed(iter/s)": 0.432321 }, { "acc": 0.83790026, "epoch": 0.1447215227574256, "grad_norm": 13.324213981628418, "learning_rate": 9.636754431265048e-06, "loss": 0.72911191, "memory(GiB)": 28.47, "step": 5345, "train_speed(iter/s)": 0.432323 }, { "acc": 0.84689674, "epoch": 0.14485690304064117, "grad_norm": 23.446331024169922, "learning_rate": 9.637804129662549e-06, "loss": 0.86540203, "memory(GiB)": 28.47, "step": 5350, "train_speed(iter/s)": 0.432325 }, { "acc": 0.84612942, "epoch": 0.14499228332385672, "grad_norm": 15.963907241821289, "learning_rate": 9.63885284749157e-06, "loss": 0.77280722, "memory(GiB)": 28.47, "step": 5355, "train_speed(iter/s)": 0.432328 }, { "acc": 0.82942352, "epoch": 0.14512766360707227, "grad_norm": 9.641435623168945, "learning_rate": 9.639900586582399e-06, "loss": 0.82809219, "memory(GiB)": 28.47, "step": 5360, "train_speed(iter/s)": 0.432329 }, { "acc": 0.81675014, "epoch": 0.14526304389028782, "grad_norm": 15.11886978149414, "learning_rate": 9.640947348760176e-06, "loss": 0.85899849, "memory(GiB)": 28.47, "step": 5365, "train_speed(iter/s)": 0.432331 }, { "acc": 0.82051659, "epoch": 0.14539842417350338, "grad_norm": 16.226383209228516, "learning_rate": 9.641993135844961e-06, "loss": 0.86390381, "memory(GiB)": 28.47, "step": 5370, "train_speed(iter/s)": 0.432332 }, { "acc": 0.84967794, "epoch": 0.14553380445671893, "grad_norm": 10.286771774291992, "learning_rate": 9.643037949651724e-06, "loss": 0.69687486, "memory(GiB)": 28.47, "step": 5375, "train_speed(iter/s)": 0.432334 }, { "acc": 0.79948359, "epoch": 0.14566918473993448, "grad_norm": 18.37030029296875, "learning_rate": 9.644081791990369e-06, "loss": 1.04309664, "memory(GiB)": 28.47, "step": 5380, "train_speed(iter/s)": 0.432328 }, { "acc": 0.84229584, "epoch": 0.14580456502315003, "grad_norm": 16.37691307067871, "learning_rate": 9.645124664665765e-06, "loss": 0.80172977, "memory(GiB)": 28.47, "step": 5385, "train_speed(iter/s)": 0.43233 }, { "acc": 0.83246136, "epoch": 0.1459399453063656, "grad_norm": 8.1416015625, "learning_rate": 9.646166569477751e-06, "loss": 0.86926899, "memory(GiB)": 28.47, "step": 5390, "train_speed(iter/s)": 0.43233 }, { "acc": 0.84252253, "epoch": 0.14607532558958114, "grad_norm": 12.310517311096191, "learning_rate": 9.647207508221158e-06, "loss": 0.82982826, "memory(GiB)": 28.47, "step": 5395, "train_speed(iter/s)": 0.432333 }, { "acc": 0.82375183, "epoch": 0.1462107058727967, "grad_norm": 11.56061840057373, "learning_rate": 9.648247482685834e-06, "loss": 0.92524662, "memory(GiB)": 28.47, "step": 5400, "train_speed(iter/s)": 0.432332 }, { "acc": 0.85939732, "epoch": 0.14634608615601225, "grad_norm": 13.215866088867188, "learning_rate": 9.649286494656651e-06, "loss": 0.71412573, "memory(GiB)": 28.47, "step": 5405, "train_speed(iter/s)": 0.432334 }, { "acc": 0.82381229, "epoch": 0.1464814664392278, "grad_norm": 28.4361629486084, "learning_rate": 9.650324545913535e-06, "loss": 0.9488162, "memory(GiB)": 28.47, "step": 5410, "train_speed(iter/s)": 0.432333 }, { "acc": 0.83917809, "epoch": 0.14661684672244335, "grad_norm": 7.87384033203125, "learning_rate": 9.651361638231476e-06, "loss": 0.78975449, "memory(GiB)": 28.47, "step": 5415, "train_speed(iter/s)": 0.432335 }, { "acc": 0.81756763, "epoch": 0.1467522270056589, "grad_norm": 9.135193824768066, "learning_rate": 9.652397773380552e-06, "loss": 0.96983242, "memory(GiB)": 28.47, "step": 5420, "train_speed(iter/s)": 0.432339 }, { "acc": 0.82971363, "epoch": 0.14688760728887446, "grad_norm": 11.901311874389648, "learning_rate": 9.653432953125944e-06, "loss": 0.81507835, "memory(GiB)": 28.47, "step": 5425, "train_speed(iter/s)": 0.432342 }, { "acc": 0.81534157, "epoch": 0.14702298757209, "grad_norm": 18.999847412109375, "learning_rate": 9.654467179227954e-06, "loss": 0.95827112, "memory(GiB)": 28.47, "step": 5430, "train_speed(iter/s)": 0.432345 }, { "acc": 0.83805056, "epoch": 0.14715836785530556, "grad_norm": 7.7446489334106445, "learning_rate": 9.655500453442022e-06, "loss": 0.79929156, "memory(GiB)": 28.47, "step": 5435, "train_speed(iter/s)": 0.432348 }, { "acc": 0.82331238, "epoch": 0.1472937481385211, "grad_norm": 9.094372749328613, "learning_rate": 9.65653277751875e-06, "loss": 0.95137253, "memory(GiB)": 28.47, "step": 5440, "train_speed(iter/s)": 0.43235 }, { "acc": 0.83558521, "epoch": 0.14742912842173667, "grad_norm": 10.956398010253906, "learning_rate": 9.657564153203907e-06, "loss": 0.88279257, "memory(GiB)": 28.47, "step": 5445, "train_speed(iter/s)": 0.43235 }, { "acc": 0.83494263, "epoch": 0.14756450870495222, "grad_norm": 7.681601524353027, "learning_rate": 9.658594582238464e-06, "loss": 0.84282265, "memory(GiB)": 28.47, "step": 5450, "train_speed(iter/s)": 0.432349 }, { "acc": 0.82761221, "epoch": 0.14769988898816777, "grad_norm": 7.019815921783447, "learning_rate": 9.659624066358596e-06, "loss": 0.84275713, "memory(GiB)": 28.47, "step": 5455, "train_speed(iter/s)": 0.432349 }, { "acc": 0.82558899, "epoch": 0.14783526927138332, "grad_norm": 49.37578201293945, "learning_rate": 9.66065260729571e-06, "loss": 0.90417938, "memory(GiB)": 28.47, "step": 5460, "train_speed(iter/s)": 0.432354 }, { "acc": 0.83898506, "epoch": 0.14797064955459888, "grad_norm": 10.634709358215332, "learning_rate": 9.661680206776458e-06, "loss": 0.80843782, "memory(GiB)": 28.47, "step": 5465, "train_speed(iter/s)": 0.432355 }, { "acc": 0.86757565, "epoch": 0.14810602983781443, "grad_norm": 10.54111099243164, "learning_rate": 9.662706866522753e-06, "loss": 0.64789267, "memory(GiB)": 28.47, "step": 5470, "train_speed(iter/s)": 0.432355 }, { "acc": 0.83437328, "epoch": 0.14824141012102998, "grad_norm": 7.98175048828125, "learning_rate": 9.66373258825179e-06, "loss": 0.86351681, "memory(GiB)": 28.47, "step": 5475, "train_speed(iter/s)": 0.432359 }, { "acc": 0.83612633, "epoch": 0.14837679040424553, "grad_norm": 15.511543273925781, "learning_rate": 9.66475737367606e-06, "loss": 0.90585175, "memory(GiB)": 28.47, "step": 5480, "train_speed(iter/s)": 0.432359 }, { "acc": 0.81650333, "epoch": 0.1485121706874611, "grad_norm": 9.087318420410156, "learning_rate": 9.665781224503372e-06, "loss": 0.96832962, "memory(GiB)": 28.47, "step": 5485, "train_speed(iter/s)": 0.43236 }, { "acc": 0.81464128, "epoch": 0.14864755097067664, "grad_norm": 17.2063045501709, "learning_rate": 9.66680414243686e-06, "loss": 0.97611589, "memory(GiB)": 28.47, "step": 5490, "train_speed(iter/s)": 0.432363 }, { "acc": 0.81886292, "epoch": 0.1487829312538922, "grad_norm": 15.400359153747559, "learning_rate": 9.667826129175017e-06, "loss": 0.82971191, "memory(GiB)": 28.47, "step": 5495, "train_speed(iter/s)": 0.432362 }, { "acc": 0.83755064, "epoch": 0.14891831153710774, "grad_norm": 11.09558391571045, "learning_rate": 9.668847186411689e-06, "loss": 0.8488266, "memory(GiB)": 28.47, "step": 5500, "train_speed(iter/s)": 0.432363 }, { "acc": 0.8205245, "epoch": 0.1490536918203233, "grad_norm": 18.693744659423828, "learning_rate": 9.669867315836116e-06, "loss": 0.85432167, "memory(GiB)": 28.47, "step": 5505, "train_speed(iter/s)": 0.432365 }, { "acc": 0.82968559, "epoch": 0.14918907210353885, "grad_norm": 7.360330104827881, "learning_rate": 9.67088651913293e-06, "loss": 0.87222767, "memory(GiB)": 28.47, "step": 5510, "train_speed(iter/s)": 0.432365 }, { "acc": 0.82620525, "epoch": 0.1493244523867544, "grad_norm": 19.784957885742188, "learning_rate": 9.671904797982183e-06, "loss": 0.97838669, "memory(GiB)": 28.47, "step": 5515, "train_speed(iter/s)": 0.432369 }, { "acc": 0.81942806, "epoch": 0.14945983266996996, "grad_norm": 10.05479621887207, "learning_rate": 9.672922154059359e-06, "loss": 0.93844471, "memory(GiB)": 28.47, "step": 5520, "train_speed(iter/s)": 0.43237 }, { "acc": 0.79803305, "epoch": 0.1495952129531855, "grad_norm": 29.574495315551758, "learning_rate": 9.673938589035386e-06, "loss": 1.01026421, "memory(GiB)": 28.47, "step": 5525, "train_speed(iter/s)": 0.432373 }, { "acc": 0.83688488, "epoch": 0.14973059323640106, "grad_norm": 15.610112190246582, "learning_rate": 9.674954104576665e-06, "loss": 0.83828373, "memory(GiB)": 28.47, "step": 5530, "train_speed(iter/s)": 0.432375 }, { "acc": 0.85363836, "epoch": 0.1498659735196166, "grad_norm": 18.064016342163086, "learning_rate": 9.675968702345077e-06, "loss": 0.69900503, "memory(GiB)": 28.47, "step": 5535, "train_speed(iter/s)": 0.432378 }, { "acc": 0.81072435, "epoch": 0.15000135380283217, "grad_norm": 18.77393913269043, "learning_rate": 9.676982383997995e-06, "loss": 0.95295048, "memory(GiB)": 28.47, "step": 5540, "train_speed(iter/s)": 0.432377 }, { "acc": 0.7885005, "epoch": 0.15013673408604772, "grad_norm": 18.633724212646484, "learning_rate": 9.677995151188316e-06, "loss": 1.16107168, "memory(GiB)": 28.47, "step": 5545, "train_speed(iter/s)": 0.432375 }, { "acc": 0.83347149, "epoch": 0.15027211436926327, "grad_norm": 21.303512573242188, "learning_rate": 9.67900700556446e-06, "loss": 0.78707294, "memory(GiB)": 28.47, "step": 5550, "train_speed(iter/s)": 0.432377 }, { "acc": 0.82318344, "epoch": 0.15040749465247882, "grad_norm": 10.279500007629395, "learning_rate": 9.680017948770404e-06, "loss": 0.89250431, "memory(GiB)": 28.47, "step": 5555, "train_speed(iter/s)": 0.432379 }, { "acc": 0.81365566, "epoch": 0.15054287493569438, "grad_norm": 9.773945808410645, "learning_rate": 9.681027982445668e-06, "loss": 0.84940186, "memory(GiB)": 28.47, "step": 5560, "train_speed(iter/s)": 0.432377 }, { "acc": 0.8033968, "epoch": 0.15067825521890993, "grad_norm": 8.991392135620117, "learning_rate": 9.682037108225375e-06, "loss": 1.00553675, "memory(GiB)": 28.47, "step": 5565, "train_speed(iter/s)": 0.432378 }, { "acc": 0.8362196, "epoch": 0.15081363550212548, "grad_norm": 13.031886100769043, "learning_rate": 9.683045327740229e-06, "loss": 0.89032869, "memory(GiB)": 28.47, "step": 5570, "train_speed(iter/s)": 0.432381 }, { "acc": 0.87949152, "epoch": 0.15094901578534103, "grad_norm": 7.884453773498535, "learning_rate": 9.684052642616544e-06, "loss": 0.59702544, "memory(GiB)": 28.47, "step": 5575, "train_speed(iter/s)": 0.432383 }, { "acc": 0.81436872, "epoch": 0.1510843960685566, "grad_norm": 13.518305778503418, "learning_rate": 9.685059054476264e-06, "loss": 1.05655422, "memory(GiB)": 28.47, "step": 5580, "train_speed(iter/s)": 0.432384 }, { "acc": 0.82322693, "epoch": 0.15121977635177214, "grad_norm": 17.314891815185547, "learning_rate": 9.686064564936974e-06, "loss": 0.85456829, "memory(GiB)": 28.47, "step": 5585, "train_speed(iter/s)": 0.432387 }, { "acc": 0.85973129, "epoch": 0.1513551566349877, "grad_norm": 10.630074501037598, "learning_rate": 9.687069175611918e-06, "loss": 0.65040569, "memory(GiB)": 28.47, "step": 5590, "train_speed(iter/s)": 0.432387 }, { "acc": 0.8199625, "epoch": 0.15149053691820324, "grad_norm": 17.856016159057617, "learning_rate": 9.688072888110011e-06, "loss": 0.90072021, "memory(GiB)": 28.47, "step": 5595, "train_speed(iter/s)": 0.43239 }, { "acc": 0.82919636, "epoch": 0.1516259172014188, "grad_norm": 5.621787071228027, "learning_rate": 9.689075704035854e-06, "loss": 0.82361813, "memory(GiB)": 28.47, "step": 5600, "train_speed(iter/s)": 0.432389 }, { "acc": 0.81920872, "epoch": 0.15176129748463435, "grad_norm": 17.749126434326172, "learning_rate": 9.690077624989756e-06, "loss": 0.91422739, "memory(GiB)": 28.47, "step": 5605, "train_speed(iter/s)": 0.432392 }, { "acc": 0.82462215, "epoch": 0.1518966777678499, "grad_norm": 8.329930305480957, "learning_rate": 9.691078652567746e-06, "loss": 0.84526682, "memory(GiB)": 28.47, "step": 5610, "train_speed(iter/s)": 0.432392 }, { "acc": 0.85345478, "epoch": 0.15203205805106546, "grad_norm": 15.919513702392578, "learning_rate": 9.692078788361585e-06, "loss": 0.76877441, "memory(GiB)": 28.47, "step": 5615, "train_speed(iter/s)": 0.432395 }, { "acc": 0.83114939, "epoch": 0.152167438334281, "grad_norm": 13.364951133728027, "learning_rate": 9.693078033958783e-06, "loss": 0.91099129, "memory(GiB)": 28.47, "step": 5620, "train_speed(iter/s)": 0.432397 }, { "acc": 0.83101377, "epoch": 0.15230281861749656, "grad_norm": 14.377482414245605, "learning_rate": 9.694076390942614e-06, "loss": 0.79359846, "memory(GiB)": 28.47, "step": 5625, "train_speed(iter/s)": 0.432399 }, { "acc": 0.83201122, "epoch": 0.1524381989007121, "grad_norm": 21.436498641967773, "learning_rate": 9.695073860892137e-06, "loss": 0.85325508, "memory(GiB)": 28.47, "step": 5630, "train_speed(iter/s)": 0.4324 }, { "acc": 0.8478508, "epoch": 0.15257357918392767, "grad_norm": 10.176763534545898, "learning_rate": 9.6960704453822e-06, "loss": 0.79239097, "memory(GiB)": 28.47, "step": 5635, "train_speed(iter/s)": 0.432403 }, { "acc": 0.82247782, "epoch": 0.15270895946714322, "grad_norm": 15.553000450134277, "learning_rate": 9.697066145983466e-06, "loss": 0.88144264, "memory(GiB)": 28.47, "step": 5640, "train_speed(iter/s)": 0.432405 }, { "acc": 0.84538078, "epoch": 0.15284433975035877, "grad_norm": 20.766542434692383, "learning_rate": 9.698060964262417e-06, "loss": 0.81419811, "memory(GiB)": 28.47, "step": 5645, "train_speed(iter/s)": 0.432405 }, { "acc": 0.83478346, "epoch": 0.15297972003357432, "grad_norm": 18.524181365966797, "learning_rate": 9.699054901781378e-06, "loss": 0.85063448, "memory(GiB)": 28.47, "step": 5650, "train_speed(iter/s)": 0.432407 }, { "acc": 0.8167388, "epoch": 0.15311510031678988, "grad_norm": 13.075820922851562, "learning_rate": 9.700047960098526e-06, "loss": 0.90058155, "memory(GiB)": 28.47, "step": 5655, "train_speed(iter/s)": 0.432407 }, { "acc": 0.84991693, "epoch": 0.15325048060000543, "grad_norm": 14.013840675354004, "learning_rate": 9.701040140767908e-06, "loss": 0.6959692, "memory(GiB)": 28.47, "step": 5660, "train_speed(iter/s)": 0.432408 }, { "acc": 0.85147858, "epoch": 0.15338586088322098, "grad_norm": 9.2694091796875, "learning_rate": 9.702031445339455e-06, "loss": 0.66256933, "memory(GiB)": 28.47, "step": 5665, "train_speed(iter/s)": 0.43241 }, { "acc": 0.81016636, "epoch": 0.15352124116643653, "grad_norm": 11.55788803100586, "learning_rate": 9.703021875358997e-06, "loss": 0.97964458, "memory(GiB)": 28.47, "step": 5670, "train_speed(iter/s)": 0.432412 }, { "acc": 0.82416821, "epoch": 0.15365662144965206, "grad_norm": 9.699296951293945, "learning_rate": 9.70401143236827e-06, "loss": 0.90313911, "memory(GiB)": 28.47, "step": 5675, "train_speed(iter/s)": 0.432413 }, { "acc": 0.83289299, "epoch": 0.1537920017328676, "grad_norm": 13.711488723754883, "learning_rate": 9.705000117904944e-06, "loss": 0.88907633, "memory(GiB)": 28.47, "step": 5680, "train_speed(iter/s)": 0.432412 }, { "acc": 0.82711058, "epoch": 0.15392738201608316, "grad_norm": 12.292017936706543, "learning_rate": 9.705987933502625e-06, "loss": 0.87983971, "memory(GiB)": 28.47, "step": 5685, "train_speed(iter/s)": 0.432415 }, { "acc": 0.82056475, "epoch": 0.15406276229929872, "grad_norm": 8.012575149536133, "learning_rate": 9.70697488069088e-06, "loss": 0.97424259, "memory(GiB)": 28.47, "step": 5690, "train_speed(iter/s)": 0.432416 }, { "acc": 0.83574991, "epoch": 0.15419814258251427, "grad_norm": 5.789156913757324, "learning_rate": 9.707960960995238e-06, "loss": 0.9084981, "memory(GiB)": 28.47, "step": 5695, "train_speed(iter/s)": 0.432418 }, { "acc": 0.83089485, "epoch": 0.15433352286572982, "grad_norm": 16.058759689331055, "learning_rate": 9.708946175937216e-06, "loss": 0.88937702, "memory(GiB)": 28.47, "step": 5700, "train_speed(iter/s)": 0.432419 }, { "acc": 0.84167519, "epoch": 0.15446890314894537, "grad_norm": 16.154659271240234, "learning_rate": 9.709930527034329e-06, "loss": 0.72698231, "memory(GiB)": 28.47, "step": 5705, "train_speed(iter/s)": 0.43242 }, { "acc": 0.79368286, "epoch": 0.15460428343216093, "grad_norm": 26.549943923950195, "learning_rate": 9.710914015800104e-06, "loss": 1.03363724, "memory(GiB)": 28.47, "step": 5710, "train_speed(iter/s)": 0.432421 }, { "acc": 0.81889381, "epoch": 0.15473966371537648, "grad_norm": 6.575316429138184, "learning_rate": 9.711896643744091e-06, "loss": 0.90070553, "memory(GiB)": 28.47, "step": 5715, "train_speed(iter/s)": 0.432423 }, { "acc": 0.85486097, "epoch": 0.15487504399859203, "grad_norm": 8.572178840637207, "learning_rate": 9.712878412371885e-06, "loss": 0.68058062, "memory(GiB)": 28.47, "step": 5720, "train_speed(iter/s)": 0.432425 }, { "acc": 0.84668407, "epoch": 0.15501042428180759, "grad_norm": 9.065070152282715, "learning_rate": 9.713859323185128e-06, "loss": 0.78781433, "memory(GiB)": 28.47, "step": 5725, "train_speed(iter/s)": 0.432425 }, { "acc": 0.84191275, "epoch": 0.15514580456502314, "grad_norm": 6.342543125152588, "learning_rate": 9.714839377681533e-06, "loss": 0.81785221, "memory(GiB)": 28.47, "step": 5730, "train_speed(iter/s)": 0.432426 }, { "acc": 0.83251095, "epoch": 0.1552811848482387, "grad_norm": 7.353481292724609, "learning_rate": 9.715818577354893e-06, "loss": 0.83747816, "memory(GiB)": 28.47, "step": 5735, "train_speed(iter/s)": 0.432426 }, { "acc": 0.84455843, "epoch": 0.15541656513145424, "grad_norm": 22.628971099853516, "learning_rate": 9.716796923695096e-06, "loss": 0.78708954, "memory(GiB)": 28.47, "step": 5740, "train_speed(iter/s)": 0.432422 }, { "acc": 0.84600744, "epoch": 0.1555519454146698, "grad_norm": 6.522933483123779, "learning_rate": 9.717774418188138e-06, "loss": 0.76179695, "memory(GiB)": 28.47, "step": 5745, "train_speed(iter/s)": 0.432425 }, { "acc": 0.84856186, "epoch": 0.15568732569788535, "grad_norm": 11.729684829711914, "learning_rate": 9.718751062316138e-06, "loss": 0.71427121, "memory(GiB)": 28.47, "step": 5750, "train_speed(iter/s)": 0.432428 }, { "acc": 0.80702295, "epoch": 0.1558227059811009, "grad_norm": 7.640467166900635, "learning_rate": 9.719726857557351e-06, "loss": 1.11276321, "memory(GiB)": 28.47, "step": 5755, "train_speed(iter/s)": 0.432429 }, { "acc": 0.81859665, "epoch": 0.15595808626431645, "grad_norm": 13.955910682678223, "learning_rate": 9.720701805386173e-06, "loss": 1.0491497, "memory(GiB)": 28.47, "step": 5760, "train_speed(iter/s)": 0.432431 }, { "acc": 0.81115074, "epoch": 0.156093466547532, "grad_norm": 11.199629783630371, "learning_rate": 9.721675907273174e-06, "loss": 0.93134785, "memory(GiB)": 28.47, "step": 5765, "train_speed(iter/s)": 0.432432 }, { "acc": 0.80137129, "epoch": 0.15622884683074756, "grad_norm": 16.146604537963867, "learning_rate": 9.72264916468509e-06, "loss": 0.99985609, "memory(GiB)": 28.47, "step": 5770, "train_speed(iter/s)": 0.432433 }, { "acc": 0.84490833, "epoch": 0.1563642271139631, "grad_norm": 3.596870183944702, "learning_rate": 9.723621579084852e-06, "loss": 0.72933073, "memory(GiB)": 28.47, "step": 5775, "train_speed(iter/s)": 0.432433 }, { "acc": 0.79881229, "epoch": 0.15649960739717866, "grad_norm": 17.042604446411133, "learning_rate": 9.724593151931587e-06, "loss": 0.95862875, "memory(GiB)": 28.47, "step": 5780, "train_speed(iter/s)": 0.432436 }, { "acc": 0.81852951, "epoch": 0.15663498768039422, "grad_norm": 10.941211700439453, "learning_rate": 9.725563884680642e-06, "loss": 0.98105822, "memory(GiB)": 28.47, "step": 5785, "train_speed(iter/s)": 0.432438 }, { "acc": 0.81398249, "epoch": 0.15677036796360977, "grad_norm": 13.357721328735352, "learning_rate": 9.72653377878359e-06, "loss": 0.8954134, "memory(GiB)": 28.47, "step": 5790, "train_speed(iter/s)": 0.432438 }, { "acc": 0.80868053, "epoch": 0.15690574824682532, "grad_norm": 10.156329154968262, "learning_rate": 9.727502835688242e-06, "loss": 1.04516563, "memory(GiB)": 28.47, "step": 5795, "train_speed(iter/s)": 0.43244 }, { "acc": 0.80897388, "epoch": 0.15704112853004087, "grad_norm": 35.485618591308594, "learning_rate": 9.72847105683867e-06, "loss": 1.06331654, "memory(GiB)": 28.47, "step": 5800, "train_speed(iter/s)": 0.432442 }, { "acc": 0.85189104, "epoch": 0.15717650881325643, "grad_norm": 5.903875350952148, "learning_rate": 9.729438443675207e-06, "loss": 0.70712962, "memory(GiB)": 28.47, "step": 5805, "train_speed(iter/s)": 0.432443 }, { "acc": 0.81736965, "epoch": 0.15731188909647198, "grad_norm": 19.58486557006836, "learning_rate": 9.730404997634465e-06, "loss": 0.9584527, "memory(GiB)": 28.47, "step": 5810, "train_speed(iter/s)": 0.432445 }, { "acc": 0.85935507, "epoch": 0.15744726937968753, "grad_norm": 17.442781448364258, "learning_rate": 9.731370720149358e-06, "loss": 0.68147254, "memory(GiB)": 28.47, "step": 5815, "train_speed(iter/s)": 0.432447 }, { "acc": 0.8426424, "epoch": 0.15758264966290308, "grad_norm": 8.242560386657715, "learning_rate": 9.732335612649094e-06, "loss": 0.80453463, "memory(GiB)": 28.47, "step": 5820, "train_speed(iter/s)": 0.432449 }, { "acc": 0.81258831, "epoch": 0.15771802994611864, "grad_norm": 17.218198776245117, "learning_rate": 9.733299676559204e-06, "loss": 0.99045029, "memory(GiB)": 28.47, "step": 5825, "train_speed(iter/s)": 0.43245 }, { "acc": 0.829599, "epoch": 0.1578534102293342, "grad_norm": 22.31541633605957, "learning_rate": 9.734262913301551e-06, "loss": 0.89463263, "memory(GiB)": 28.47, "step": 5830, "train_speed(iter/s)": 0.432452 }, { "acc": 0.78251972, "epoch": 0.15798879051254974, "grad_norm": 27.115324020385742, "learning_rate": 9.73522532429434e-06, "loss": 1.05479851, "memory(GiB)": 28.47, "step": 5835, "train_speed(iter/s)": 0.432455 }, { "acc": 0.8292285, "epoch": 0.1581241707957653, "grad_norm": 29.871496200561523, "learning_rate": 9.736186910952129e-06, "loss": 0.97952185, "memory(GiB)": 28.47, "step": 5840, "train_speed(iter/s)": 0.432457 }, { "acc": 0.8161602, "epoch": 0.15825955107898085, "grad_norm": 9.10466480255127, "learning_rate": 9.737147674685851e-06, "loss": 0.88040867, "memory(GiB)": 28.47, "step": 5845, "train_speed(iter/s)": 0.432458 }, { "acc": 0.81014395, "epoch": 0.1583949313621964, "grad_norm": 15.14355754852295, "learning_rate": 9.738107616902811e-06, "loss": 1.00902939, "memory(GiB)": 28.47, "step": 5850, "train_speed(iter/s)": 0.432461 }, { "acc": 0.83142519, "epoch": 0.15853031164541195, "grad_norm": 7.363438129425049, "learning_rate": 9.739066739006714e-06, "loss": 0.91079617, "memory(GiB)": 28.47, "step": 5855, "train_speed(iter/s)": 0.43246 }, { "acc": 0.85523157, "epoch": 0.1586656919286275, "grad_norm": 5.6166863441467285, "learning_rate": 9.740025042397668e-06, "loss": 0.6090559, "memory(GiB)": 28.47, "step": 5860, "train_speed(iter/s)": 0.432463 }, { "acc": 0.82240915, "epoch": 0.15880107221184306, "grad_norm": 8.725358963012695, "learning_rate": 9.740982528472195e-06, "loss": 0.9037323, "memory(GiB)": 28.47, "step": 5865, "train_speed(iter/s)": 0.432464 }, { "acc": 0.84140205, "epoch": 0.1589364524950586, "grad_norm": 9.72524356842041, "learning_rate": 9.741939198623257e-06, "loss": 0.8609499, "memory(GiB)": 28.47, "step": 5870, "train_speed(iter/s)": 0.432467 }, { "acc": 0.85001926, "epoch": 0.15907183277827416, "grad_norm": 8.727360725402832, "learning_rate": 9.742895054240249e-06, "loss": 0.78661823, "memory(GiB)": 28.47, "step": 5875, "train_speed(iter/s)": 0.432467 }, { "acc": 0.81767597, "epoch": 0.15920721306148972, "grad_norm": 13.882883071899414, "learning_rate": 9.743850096709016e-06, "loss": 1.00372572, "memory(GiB)": 28.47, "step": 5880, "train_speed(iter/s)": 0.432466 }, { "acc": 0.83471537, "epoch": 0.15934259334470527, "grad_norm": 8.619938850402832, "learning_rate": 9.744804327411885e-06, "loss": 0.74019976, "memory(GiB)": 28.47, "step": 5885, "train_speed(iter/s)": 0.432466 }, { "acc": 0.8322444, "epoch": 0.15947797362792082, "grad_norm": 20.96869468688965, "learning_rate": 9.745757747727646e-06, "loss": 0.93937817, "memory(GiB)": 28.47, "step": 5890, "train_speed(iter/s)": 0.43247 }, { "acc": 0.84079895, "epoch": 0.15961335391113637, "grad_norm": 14.244214057922363, "learning_rate": 9.746710359031586e-06, "loss": 0.783887, "memory(GiB)": 28.47, "step": 5895, "train_speed(iter/s)": 0.432472 }, { "acc": 0.819279, "epoch": 0.15974873419435193, "grad_norm": 8.336273193359375, "learning_rate": 9.747662162695496e-06, "loss": 0.9124341, "memory(GiB)": 28.47, "step": 5900, "train_speed(iter/s)": 0.432474 }, { "acc": 0.82877769, "epoch": 0.15988411447756748, "grad_norm": 4.881915092468262, "learning_rate": 9.748613160087673e-06, "loss": 0.84378204, "memory(GiB)": 28.47, "step": 5905, "train_speed(iter/s)": 0.432474 }, { "acc": 0.82522154, "epoch": 0.16001949476078303, "grad_norm": 15.576807975769043, "learning_rate": 9.749563352572949e-06, "loss": 0.8524601, "memory(GiB)": 28.47, "step": 5910, "train_speed(iter/s)": 0.432475 }, { "acc": 0.81056194, "epoch": 0.16015487504399858, "grad_norm": 34.33243942260742, "learning_rate": 9.750512741512688e-06, "loss": 0.88342838, "memory(GiB)": 28.47, "step": 5915, "train_speed(iter/s)": 0.432476 }, { "acc": 0.81237154, "epoch": 0.16029025532721414, "grad_norm": 12.522683143615723, "learning_rate": 9.7514613282648e-06, "loss": 0.84204159, "memory(GiB)": 28.47, "step": 5920, "train_speed(iter/s)": 0.432478 }, { "acc": 0.84104481, "epoch": 0.1604256356104297, "grad_norm": 21.845094680786133, "learning_rate": 9.752409114183766e-06, "loss": 0.82329321, "memory(GiB)": 28.47, "step": 5925, "train_speed(iter/s)": 0.432479 }, { "acc": 0.80764027, "epoch": 0.16056101589364524, "grad_norm": 9.476088523864746, "learning_rate": 9.75335610062063e-06, "loss": 0.97884884, "memory(GiB)": 28.47, "step": 5930, "train_speed(iter/s)": 0.432481 }, { "acc": 0.80142889, "epoch": 0.1606963961768608, "grad_norm": 10.704888343811035, "learning_rate": 9.754302288923023e-06, "loss": 1.02238808, "memory(GiB)": 28.47, "step": 5935, "train_speed(iter/s)": 0.432484 }, { "acc": 0.83427563, "epoch": 0.16083177646007635, "grad_norm": 12.910918235778809, "learning_rate": 9.755247680435171e-06, "loss": 0.82667694, "memory(GiB)": 28.47, "step": 5940, "train_speed(iter/s)": 0.432485 }, { "acc": 0.83950214, "epoch": 0.1609671567432919, "grad_norm": 9.053915023803711, "learning_rate": 9.756192276497912e-06, "loss": 0.89665718, "memory(GiB)": 28.47, "step": 5945, "train_speed(iter/s)": 0.432488 }, { "acc": 0.81771898, "epoch": 0.16110253702650745, "grad_norm": 11.806354522705078, "learning_rate": 9.757136078448692e-06, "loss": 1.03034506, "memory(GiB)": 28.47, "step": 5950, "train_speed(iter/s)": 0.432489 }, { "acc": 0.81550903, "epoch": 0.161237917309723, "grad_norm": 12.761961936950684, "learning_rate": 9.758079087621598e-06, "loss": 0.91045876, "memory(GiB)": 28.47, "step": 5955, "train_speed(iter/s)": 0.432492 }, { "acc": 0.81463423, "epoch": 0.16137329759293856, "grad_norm": 14.847119331359863, "learning_rate": 9.75902130534735e-06, "loss": 0.88415003, "memory(GiB)": 28.47, "step": 5960, "train_speed(iter/s)": 0.432492 }, { "acc": 0.80791817, "epoch": 0.1615086778761541, "grad_norm": 10.894841194152832, "learning_rate": 9.75996273295332e-06, "loss": 0.90256414, "memory(GiB)": 28.47, "step": 5965, "train_speed(iter/s)": 0.432494 }, { "acc": 0.82451935, "epoch": 0.16164405815936966, "grad_norm": 12.096435546875, "learning_rate": 9.760903371763553e-06, "loss": 0.87908249, "memory(GiB)": 28.47, "step": 5970, "train_speed(iter/s)": 0.432496 }, { "acc": 0.85433083, "epoch": 0.16177943844258522, "grad_norm": 7.956698417663574, "learning_rate": 9.761843223098759e-06, "loss": 0.72310829, "memory(GiB)": 28.47, "step": 5975, "train_speed(iter/s)": 0.432498 }, { "acc": 0.83564453, "epoch": 0.16191481872580077, "grad_norm": 55.060157775878906, "learning_rate": 9.762782288276335e-06, "loss": 0.82277889, "memory(GiB)": 28.47, "step": 5980, "train_speed(iter/s)": 0.432498 }, { "acc": 0.80599594, "epoch": 0.16205019900901632, "grad_norm": 31.000085830688477, "learning_rate": 9.763720568610379e-06, "loss": 1.06638346, "memory(GiB)": 28.47, "step": 5985, "train_speed(iter/s)": 0.432499 }, { "acc": 0.82502613, "epoch": 0.16218557929223187, "grad_norm": 32.81681823730469, "learning_rate": 9.764658065411692e-06, "loss": 0.84509478, "memory(GiB)": 28.47, "step": 5990, "train_speed(iter/s)": 0.4325 }, { "acc": 0.82235832, "epoch": 0.16232095957544743, "grad_norm": 11.623953819274902, "learning_rate": 9.7655947799878e-06, "loss": 0.87260742, "memory(GiB)": 28.47, "step": 5995, "train_speed(iter/s)": 0.432501 }, { "acc": 0.83290348, "epoch": 0.16245633985866298, "grad_norm": 10.541695594787598, "learning_rate": 9.766530713642953e-06, "loss": 0.77563057, "memory(GiB)": 28.47, "step": 6000, "train_speed(iter/s)": 0.432504 }, { "acc": 0.83281517, "epoch": 0.16259172014187853, "grad_norm": 20.573654174804688, "learning_rate": 9.767465867678146e-06, "loss": 0.846492, "memory(GiB)": 28.47, "step": 6005, "train_speed(iter/s)": 0.432506 }, { "acc": 0.81868992, "epoch": 0.16272710042509408, "grad_norm": 10.28570556640625, "learning_rate": 9.768400243391122e-06, "loss": 1.04668503, "memory(GiB)": 28.47, "step": 6010, "train_speed(iter/s)": 0.432509 }, { "acc": 0.80712347, "epoch": 0.16286248070830964, "grad_norm": 6.331949710845947, "learning_rate": 9.76933384207639e-06, "loss": 1.01661777, "memory(GiB)": 28.47, "step": 6015, "train_speed(iter/s)": 0.432511 }, { "acc": 0.84790249, "epoch": 0.1629978609915252, "grad_norm": 31.5520076751709, "learning_rate": 9.770266665025227e-06, "loss": 0.68894153, "memory(GiB)": 28.47, "step": 6020, "train_speed(iter/s)": 0.432508 }, { "acc": 0.81584339, "epoch": 0.16313324127474074, "grad_norm": 19.300769805908203, "learning_rate": 9.771198713525696e-06, "loss": 0.94620552, "memory(GiB)": 28.47, "step": 6025, "train_speed(iter/s)": 0.43251 }, { "acc": 0.82001705, "epoch": 0.1632686215579563, "grad_norm": 13.98460578918457, "learning_rate": 9.772129988862661e-06, "loss": 0.87224627, "memory(GiB)": 28.47, "step": 6030, "train_speed(iter/s)": 0.43251 }, { "acc": 0.85102119, "epoch": 0.16340400184117185, "grad_norm": 4.808847904205322, "learning_rate": 9.773060492317784e-06, "loss": 0.65794115, "memory(GiB)": 28.47, "step": 6035, "train_speed(iter/s)": 0.432512 }, { "acc": 0.83078899, "epoch": 0.1635393821243874, "grad_norm": 9.785215377807617, "learning_rate": 9.773990225169542e-06, "loss": 0.85131197, "memory(GiB)": 28.47, "step": 6040, "train_speed(iter/s)": 0.432513 }, { "acc": 0.84942503, "epoch": 0.16367476240760295, "grad_norm": 5.330050468444824, "learning_rate": 9.77491918869324e-06, "loss": 0.81193762, "memory(GiB)": 28.47, "step": 6045, "train_speed(iter/s)": 0.432516 }, { "acc": 0.81492167, "epoch": 0.1638101426908185, "grad_norm": 17.775636672973633, "learning_rate": 9.775847384161026e-06, "loss": 0.99695902, "memory(GiB)": 28.47, "step": 6050, "train_speed(iter/s)": 0.432517 }, { "acc": 0.82114267, "epoch": 0.16394552297403406, "grad_norm": 14.274642944335938, "learning_rate": 9.776774812841888e-06, "loss": 0.98013134, "memory(GiB)": 28.47, "step": 6055, "train_speed(iter/s)": 0.432515 }, { "acc": 0.85379457, "epoch": 0.1640809032572496, "grad_norm": 7.559213161468506, "learning_rate": 9.777701476001668e-06, "loss": 0.72856846, "memory(GiB)": 28.47, "step": 6060, "train_speed(iter/s)": 0.432516 }, { "acc": 0.84083595, "epoch": 0.16421628354046516, "grad_norm": 8.281952857971191, "learning_rate": 9.778627374903086e-06, "loss": 0.81888266, "memory(GiB)": 28.47, "step": 6065, "train_speed(iter/s)": 0.432516 }, { "acc": 0.84647026, "epoch": 0.16435166382368072, "grad_norm": 14.674857139587402, "learning_rate": 9.779552510805735e-06, "loss": 0.70026345, "memory(GiB)": 28.47, "step": 6070, "train_speed(iter/s)": 0.432518 }, { "acc": 0.81776581, "epoch": 0.16448704410689627, "grad_norm": 7.264768123626709, "learning_rate": 9.780476884966096e-06, "loss": 0.91640511, "memory(GiB)": 28.47, "step": 6075, "train_speed(iter/s)": 0.432517 }, { "acc": 0.80553474, "epoch": 0.16462242439011182, "grad_norm": 6.725428104400635, "learning_rate": 9.781400498637555e-06, "loss": 0.99041595, "memory(GiB)": 28.47, "step": 6080, "train_speed(iter/s)": 0.432516 }, { "acc": 0.81450386, "epoch": 0.16475780467332737, "grad_norm": 17.71790885925293, "learning_rate": 9.782323353070398e-06, "loss": 1.03029461, "memory(GiB)": 28.47, "step": 6085, "train_speed(iter/s)": 0.432517 }, { "acc": 0.81254692, "epoch": 0.16489318495654293, "grad_norm": 17.52740478515625, "learning_rate": 9.783245449511831e-06, "loss": 0.94582424, "memory(GiB)": 28.47, "step": 6090, "train_speed(iter/s)": 0.432514 }, { "acc": 0.83347101, "epoch": 0.16502856523975848, "grad_norm": 8.704626083374023, "learning_rate": 9.784166789206002e-06, "loss": 0.83281116, "memory(GiB)": 28.47, "step": 6095, "train_speed(iter/s)": 0.432516 }, { "acc": 0.81777649, "epoch": 0.16516394552297403, "grad_norm": 6.968641757965088, "learning_rate": 9.78508737339398e-06, "loss": 0.929702, "memory(GiB)": 28.47, "step": 6100, "train_speed(iter/s)": 0.432513 }, { "acc": 0.84074497, "epoch": 0.16529932580618958, "grad_norm": 12.456703186035156, "learning_rate": 9.7860072033138e-06, "loss": 0.73026581, "memory(GiB)": 28.47, "step": 6105, "train_speed(iter/s)": 0.432515 }, { "acc": 0.82767048, "epoch": 0.16543470608940514, "grad_norm": 11.481708526611328, "learning_rate": 9.786926280200443e-06, "loss": 0.90436993, "memory(GiB)": 28.47, "step": 6110, "train_speed(iter/s)": 0.432514 }, { "acc": 0.82197475, "epoch": 0.1655700863726207, "grad_norm": 14.648462295532227, "learning_rate": 9.787844605285868e-06, "loss": 0.94784431, "memory(GiB)": 28.47, "step": 6115, "train_speed(iter/s)": 0.432515 }, { "acc": 0.80973806, "epoch": 0.16570546665583624, "grad_norm": 13.165321350097656, "learning_rate": 9.788762179799012e-06, "loss": 0.9778162, "memory(GiB)": 28.47, "step": 6120, "train_speed(iter/s)": 0.432518 }, { "acc": 0.82663288, "epoch": 0.1658408469390518, "grad_norm": 9.274374961853027, "learning_rate": 9.789679004965798e-06, "loss": 0.82334595, "memory(GiB)": 28.47, "step": 6125, "train_speed(iter/s)": 0.43252 }, { "acc": 0.82384682, "epoch": 0.16597622722226735, "grad_norm": 17.5833740234375, "learning_rate": 9.79059508200915e-06, "loss": 0.99456835, "memory(GiB)": 28.47, "step": 6130, "train_speed(iter/s)": 0.432521 }, { "acc": 0.82768288, "epoch": 0.1661116075054829, "grad_norm": 24.44462776184082, "learning_rate": 9.791510412148998e-06, "loss": 0.87736664, "memory(GiB)": 28.47, "step": 6135, "train_speed(iter/s)": 0.432522 }, { "acc": 0.84272766, "epoch": 0.16624698778869845, "grad_norm": 5.821450233459473, "learning_rate": 9.792424996602298e-06, "loss": 0.77040129, "memory(GiB)": 28.47, "step": 6140, "train_speed(iter/s)": 0.432523 }, { "acc": 0.81237335, "epoch": 0.166382368071914, "grad_norm": 9.218583106994629, "learning_rate": 9.793338836583024e-06, "loss": 0.89443302, "memory(GiB)": 28.47, "step": 6145, "train_speed(iter/s)": 0.432523 }, { "acc": 0.83414364, "epoch": 0.16651774835512956, "grad_norm": 24.98359489440918, "learning_rate": 9.794251933302197e-06, "loss": 0.80618267, "memory(GiB)": 28.47, "step": 6150, "train_speed(iter/s)": 0.432522 }, { "acc": 0.808636, "epoch": 0.1666531286383451, "grad_norm": 20.284902572631836, "learning_rate": 9.795164287967879e-06, "loss": 0.95910711, "memory(GiB)": 28.47, "step": 6155, "train_speed(iter/s)": 0.432524 }, { "acc": 0.80975313, "epoch": 0.16678850892156066, "grad_norm": 8.32419490814209, "learning_rate": 9.796075901785192e-06, "loss": 1.01124821, "memory(GiB)": 28.47, "step": 6160, "train_speed(iter/s)": 0.432525 }, { "acc": 0.81784725, "epoch": 0.16692388920477622, "grad_norm": 12.92884349822998, "learning_rate": 9.796986775956323e-06, "loss": 0.87468452, "memory(GiB)": 28.47, "step": 6165, "train_speed(iter/s)": 0.432524 }, { "acc": 0.82679634, "epoch": 0.16705926948799177, "grad_norm": 9.86050796508789, "learning_rate": 9.797896911680542e-06, "loss": 0.88290424, "memory(GiB)": 28.47, "step": 6170, "train_speed(iter/s)": 0.432526 }, { "acc": 0.83778076, "epoch": 0.16719464977120732, "grad_norm": 12.646681785583496, "learning_rate": 9.798806310154192e-06, "loss": 0.80400152, "memory(GiB)": 28.47, "step": 6175, "train_speed(iter/s)": 0.432523 }, { "acc": 0.85795269, "epoch": 0.16733003005442287, "grad_norm": 9.337841033935547, "learning_rate": 9.799714972570723e-06, "loss": 0.73105593, "memory(GiB)": 28.47, "step": 6180, "train_speed(iter/s)": 0.432526 }, { "acc": 0.80179796, "epoch": 0.16746541033763843, "grad_norm": 16.462587356567383, "learning_rate": 9.80062290012068e-06, "loss": 1.10487957, "memory(GiB)": 28.47, "step": 6185, "train_speed(iter/s)": 0.432527 }, { "acc": 0.83163395, "epoch": 0.16760079062085398, "grad_norm": 9.908432006835938, "learning_rate": 9.80153009399173e-06, "loss": 0.78650217, "memory(GiB)": 28.47, "step": 6190, "train_speed(iter/s)": 0.432528 }, { "acc": 0.87063446, "epoch": 0.16773617090406953, "grad_norm": 7.9431843757629395, "learning_rate": 9.802436555368658e-06, "loss": 0.63261538, "memory(GiB)": 28.47, "step": 6195, "train_speed(iter/s)": 0.432531 }, { "acc": 0.85290966, "epoch": 0.16787155118728508, "grad_norm": 7.23268461227417, "learning_rate": 9.803342285433384e-06, "loss": 0.67388487, "memory(GiB)": 28.47, "step": 6200, "train_speed(iter/s)": 0.432533 }, { "acc": 0.85481663, "epoch": 0.16800693147050064, "grad_norm": 4.376656532287598, "learning_rate": 9.804247285364968e-06, "loss": 0.76941943, "memory(GiB)": 28.47, "step": 6205, "train_speed(iter/s)": 0.432533 }, { "acc": 0.81751881, "epoch": 0.1681423117537162, "grad_norm": 12.482694625854492, "learning_rate": 9.80515155633962e-06, "loss": 0.92513561, "memory(GiB)": 28.47, "step": 6210, "train_speed(iter/s)": 0.432534 }, { "acc": 0.83913126, "epoch": 0.16827769203693174, "grad_norm": 20.25996208190918, "learning_rate": 9.806055099530714e-06, "loss": 0.80701761, "memory(GiB)": 28.47, "step": 6215, "train_speed(iter/s)": 0.432537 }, { "acc": 0.82594204, "epoch": 0.1684130723201473, "grad_norm": 11.094504356384277, "learning_rate": 9.806957916108791e-06, "loss": 0.85175076, "memory(GiB)": 28.47, "step": 6220, "train_speed(iter/s)": 0.432539 }, { "acc": 0.83579941, "epoch": 0.16854845260336285, "grad_norm": 23.9199275970459, "learning_rate": 9.807860007241566e-06, "loss": 0.8648633, "memory(GiB)": 28.47, "step": 6225, "train_speed(iter/s)": 0.43254 }, { "acc": 0.84241028, "epoch": 0.1686838328865784, "grad_norm": 6.513299465179443, "learning_rate": 9.808761374093948e-06, "loss": 0.76789498, "memory(GiB)": 28.47, "step": 6230, "train_speed(iter/s)": 0.432538 }, { "acc": 0.83237867, "epoch": 0.16881921316979395, "grad_norm": 13.338624000549316, "learning_rate": 9.809662017828039e-06, "loss": 1.00955906, "memory(GiB)": 28.47, "step": 6235, "train_speed(iter/s)": 0.432542 }, { "acc": 0.86969442, "epoch": 0.1689545934530095, "grad_norm": 8.717559814453125, "learning_rate": 9.81056193960315e-06, "loss": 0.62216139, "memory(GiB)": 28.47, "step": 6240, "train_speed(iter/s)": 0.432542 }, { "acc": 0.83902712, "epoch": 0.16908997373622506, "grad_norm": 31.905715942382812, "learning_rate": 9.8114611405758e-06, "loss": 0.89717045, "memory(GiB)": 28.47, "step": 6245, "train_speed(iter/s)": 0.432544 }, { "acc": 0.83865175, "epoch": 0.1692253540194406, "grad_norm": 5.577101707458496, "learning_rate": 9.812359621899734e-06, "loss": 0.79009972, "memory(GiB)": 28.47, "step": 6250, "train_speed(iter/s)": 0.432546 }, { "acc": 0.83509407, "epoch": 0.16936073430265616, "grad_norm": 10.778547286987305, "learning_rate": 9.813257384725933e-06, "loss": 0.83704929, "memory(GiB)": 28.47, "step": 6255, "train_speed(iter/s)": 0.432545 }, { "acc": 0.81849594, "epoch": 0.16949611458587172, "grad_norm": 10.834933280944824, "learning_rate": 9.814154430202614e-06, "loss": 0.84074783, "memory(GiB)": 28.47, "step": 6260, "train_speed(iter/s)": 0.432547 }, { "acc": 0.84984951, "epoch": 0.16963149486908727, "grad_norm": 9.770054817199707, "learning_rate": 9.815050759475245e-06, "loss": 0.76034422, "memory(GiB)": 28.47, "step": 6265, "train_speed(iter/s)": 0.432545 }, { "acc": 0.83228188, "epoch": 0.16976687515230282, "grad_norm": 8.071646690368652, "learning_rate": 9.815946373686553e-06, "loss": 0.76477127, "memory(GiB)": 28.47, "step": 6270, "train_speed(iter/s)": 0.432543 }, { "acc": 0.8245676, "epoch": 0.16990225543551837, "grad_norm": 7.093127727508545, "learning_rate": 9.816841273976533e-06, "loss": 0.91329784, "memory(GiB)": 28.47, "step": 6275, "train_speed(iter/s)": 0.432546 }, { "acc": 0.83855515, "epoch": 0.17003763571873393, "grad_norm": 9.554523468017578, "learning_rate": 9.817735461482455e-06, "loss": 0.77043428, "memory(GiB)": 28.47, "step": 6280, "train_speed(iter/s)": 0.432548 }, { "acc": 0.80626545, "epoch": 0.17017301600194948, "grad_norm": 10.729103088378906, "learning_rate": 9.818628937338875e-06, "loss": 0.94581623, "memory(GiB)": 28.47, "step": 6285, "train_speed(iter/s)": 0.432548 }, { "acc": 0.82754421, "epoch": 0.17030839628516503, "grad_norm": 7.9934515953063965, "learning_rate": 9.819521702677639e-06, "loss": 0.95130634, "memory(GiB)": 28.47, "step": 6290, "train_speed(iter/s)": 0.432547 }, { "acc": 0.81619663, "epoch": 0.17044377656838058, "grad_norm": 18.043338775634766, "learning_rate": 9.8204137586279e-06, "loss": 0.99028244, "memory(GiB)": 28.47, "step": 6295, "train_speed(iter/s)": 0.43255 }, { "acc": 0.83073435, "epoch": 0.17057915685159614, "grad_norm": 10.325737953186035, "learning_rate": 9.821305106316117e-06, "loss": 0.81711168, "memory(GiB)": 28.47, "step": 6300, "train_speed(iter/s)": 0.432552 }, { "acc": 0.85036039, "epoch": 0.1707145371348117, "grad_norm": 26.143075942993164, "learning_rate": 9.822195746866072e-06, "loss": 0.7213172, "memory(GiB)": 28.47, "step": 6305, "train_speed(iter/s)": 0.432552 }, { "acc": 0.82832165, "epoch": 0.17084991741802724, "grad_norm": 8.743664741516113, "learning_rate": 9.823085681398866e-06, "loss": 0.73182802, "memory(GiB)": 28.47, "step": 6310, "train_speed(iter/s)": 0.432551 }, { "acc": 0.82579327, "epoch": 0.1709852977012428, "grad_norm": 12.27797794342041, "learning_rate": 9.82397491103295e-06, "loss": 0.88908615, "memory(GiB)": 28.47, "step": 6315, "train_speed(iter/s)": 0.432551 }, { "acc": 0.83894062, "epoch": 0.17112067798445835, "grad_norm": 7.1580047607421875, "learning_rate": 9.824863436884106e-06, "loss": 0.79955359, "memory(GiB)": 28.47, "step": 6320, "train_speed(iter/s)": 0.432552 }, { "acc": 0.82118034, "epoch": 0.1712560582676739, "grad_norm": 14.201200485229492, "learning_rate": 9.825751260065477e-06, "loss": 0.91164446, "memory(GiB)": 28.47, "step": 6325, "train_speed(iter/s)": 0.432553 }, { "acc": 0.81470308, "epoch": 0.17139143855088945, "grad_norm": 13.981484413146973, "learning_rate": 9.826638381687563e-06, "loss": 0.89288406, "memory(GiB)": 28.47, "step": 6330, "train_speed(iter/s)": 0.432552 }, { "acc": 0.82536449, "epoch": 0.171526818834105, "grad_norm": 5.130577564239502, "learning_rate": 9.827524802858239e-06, "loss": 0.88974676, "memory(GiB)": 28.47, "step": 6335, "train_speed(iter/s)": 0.432554 }, { "acc": 0.85821371, "epoch": 0.17166219911732056, "grad_norm": 10.794734001159668, "learning_rate": 9.82841052468275e-06, "loss": 0.70676727, "memory(GiB)": 28.47, "step": 6340, "train_speed(iter/s)": 0.432555 }, { "acc": 0.84582939, "epoch": 0.1717975794005361, "grad_norm": 6.755431652069092, "learning_rate": 9.82929554826373e-06, "loss": 0.75722446, "memory(GiB)": 28.47, "step": 6345, "train_speed(iter/s)": 0.432556 }, { "acc": 0.82411423, "epoch": 0.17193295968375166, "grad_norm": 9.221948623657227, "learning_rate": 9.830179874701213e-06, "loss": 0.94726725, "memory(GiB)": 28.47, "step": 6350, "train_speed(iter/s)": 0.432555 }, { "acc": 0.84247246, "epoch": 0.17206833996696722, "grad_norm": 11.774725914001465, "learning_rate": 9.831063505092626e-06, "loss": 0.76159525, "memory(GiB)": 28.47, "step": 6355, "train_speed(iter/s)": 0.432556 }, { "acc": 0.80745745, "epoch": 0.17220372025018277, "grad_norm": 8.931744575500488, "learning_rate": 9.831946440532816e-06, "loss": 0.94660835, "memory(GiB)": 28.47, "step": 6360, "train_speed(iter/s)": 0.432558 }, { "acc": 0.83403358, "epoch": 0.17233910053339832, "grad_norm": 7.645717144012451, "learning_rate": 9.832828682114042e-06, "loss": 0.8333231, "memory(GiB)": 28.47, "step": 6365, "train_speed(iter/s)": 0.432559 }, { "acc": 0.81482334, "epoch": 0.17247448081661387, "grad_norm": 18.804208755493164, "learning_rate": 9.833710230925992e-06, "loss": 0.94412041, "memory(GiB)": 28.47, "step": 6370, "train_speed(iter/s)": 0.432557 }, { "acc": 0.82752361, "epoch": 0.17260986109982943, "grad_norm": 16.443208694458008, "learning_rate": 9.834591088055793e-06, "loss": 0.82537861, "memory(GiB)": 28.47, "step": 6375, "train_speed(iter/s)": 0.432558 }, { "acc": 0.81098022, "epoch": 0.17274524138304498, "grad_norm": 25.13401985168457, "learning_rate": 9.835471254588007e-06, "loss": 0.99850149, "memory(GiB)": 28.47, "step": 6380, "train_speed(iter/s)": 0.432559 }, { "acc": 0.82307949, "epoch": 0.17288062166626053, "grad_norm": 12.00407886505127, "learning_rate": 9.836350731604653e-06, "loss": 0.96427078, "memory(GiB)": 28.47, "step": 6385, "train_speed(iter/s)": 0.43256 }, { "acc": 0.797896, "epoch": 0.17301600194947608, "grad_norm": 11.395648956298828, "learning_rate": 9.837229520185209e-06, "loss": 1.10142326, "memory(GiB)": 28.47, "step": 6390, "train_speed(iter/s)": 0.432561 }, { "acc": 0.78361015, "epoch": 0.17315138223269164, "grad_norm": 15.074742317199707, "learning_rate": 9.838107621406615e-06, "loss": 1.03454762, "memory(GiB)": 28.47, "step": 6395, "train_speed(iter/s)": 0.432559 }, { "acc": 0.84599695, "epoch": 0.1732867625159072, "grad_norm": 7.662530899047852, "learning_rate": 9.838985036343293e-06, "loss": 0.80239487, "memory(GiB)": 28.47, "step": 6400, "train_speed(iter/s)": 0.432562 }, { "acc": 0.855723, "epoch": 0.17342214279912274, "grad_norm": 11.022924423217773, "learning_rate": 9.839861766067143e-06, "loss": 0.74239907, "memory(GiB)": 28.47, "step": 6405, "train_speed(iter/s)": 0.432562 }, { "acc": 0.85992403, "epoch": 0.1735575230823383, "grad_norm": 7.0898756980896, "learning_rate": 9.84073781164756e-06, "loss": 0.7067028, "memory(GiB)": 28.47, "step": 6410, "train_speed(iter/s)": 0.43256 }, { "acc": 0.85250664, "epoch": 0.17369290336555385, "grad_norm": 10.440248489379883, "learning_rate": 9.841613174151427e-06, "loss": 0.72996898, "memory(GiB)": 28.47, "step": 6415, "train_speed(iter/s)": 0.432562 }, { "acc": 0.84271259, "epoch": 0.1738282836487694, "grad_norm": 4.996527194976807, "learning_rate": 9.842487854643149e-06, "loss": 0.88839474, "memory(GiB)": 28.47, "step": 6420, "train_speed(iter/s)": 0.432561 }, { "acc": 0.84106989, "epoch": 0.17396366393198495, "grad_norm": 11.824193000793457, "learning_rate": 9.843361854184632e-06, "loss": 0.74934945, "memory(GiB)": 28.47, "step": 6425, "train_speed(iter/s)": 0.432561 }, { "acc": 0.81386375, "epoch": 0.1740990442152005, "grad_norm": 11.404891967773438, "learning_rate": 9.844235173835314e-06, "loss": 0.94089241, "memory(GiB)": 28.47, "step": 6430, "train_speed(iter/s)": 0.432564 }, { "acc": 0.81631947, "epoch": 0.17423442449841606, "grad_norm": 6.254262924194336, "learning_rate": 9.845107814652149e-06, "loss": 0.93256683, "memory(GiB)": 28.47, "step": 6435, "train_speed(iter/s)": 0.432564 }, { "acc": 0.8553381, "epoch": 0.1743698047816316, "grad_norm": 6.434142589569092, "learning_rate": 9.84597977768964e-06, "loss": 0.61210699, "memory(GiB)": 28.47, "step": 6440, "train_speed(iter/s)": 0.432565 }, { "acc": 0.82491217, "epoch": 0.17450518506484716, "grad_norm": 15.800392150878906, "learning_rate": 9.846851063999836e-06, "loss": 0.97884827, "memory(GiB)": 28.47, "step": 6445, "train_speed(iter/s)": 0.432566 }, { "acc": 0.81834841, "epoch": 0.17464056534806272, "grad_norm": 16.10360336303711, "learning_rate": 9.847721674632325e-06, "loss": 0.94075756, "memory(GiB)": 28.47, "step": 6450, "train_speed(iter/s)": 0.432566 }, { "acc": 0.81113987, "epoch": 0.17477594563127827, "grad_norm": 7.825332164764404, "learning_rate": 9.848591610634271e-06, "loss": 0.98304682, "memory(GiB)": 28.47, "step": 6455, "train_speed(iter/s)": 0.432566 }, { "acc": 0.82239017, "epoch": 0.17491132591449382, "grad_norm": 17.7966251373291, "learning_rate": 9.849460873050394e-06, "loss": 0.92091122, "memory(GiB)": 28.47, "step": 6460, "train_speed(iter/s)": 0.432563 }, { "acc": 0.84713516, "epoch": 0.17504670619770937, "grad_norm": 11.442717552185059, "learning_rate": 9.850329462922993e-06, "loss": 0.75840797, "memory(GiB)": 28.47, "step": 6465, "train_speed(iter/s)": 0.432562 }, { "acc": 0.83521385, "epoch": 0.17518208648092493, "grad_norm": 14.232423782348633, "learning_rate": 9.851197381291954e-06, "loss": 0.8700881, "memory(GiB)": 28.47, "step": 6470, "train_speed(iter/s)": 0.432564 }, { "acc": 0.82742653, "epoch": 0.17531746676414048, "grad_norm": 16.271289825439453, "learning_rate": 9.852064629194745e-06, "loss": 0.9002718, "memory(GiB)": 28.47, "step": 6475, "train_speed(iter/s)": 0.432563 }, { "acc": 0.81780109, "epoch": 0.17545284704735603, "grad_norm": 17.21255874633789, "learning_rate": 9.852931207666437e-06, "loss": 0.95508575, "memory(GiB)": 28.47, "step": 6480, "train_speed(iter/s)": 0.432563 }, { "acc": 0.8090148, "epoch": 0.17558822733057158, "grad_norm": 20.06003761291504, "learning_rate": 9.853797117739705e-06, "loss": 0.93194275, "memory(GiB)": 28.47, "step": 6485, "train_speed(iter/s)": 0.432564 }, { "acc": 0.81187382, "epoch": 0.17572360761378714, "grad_norm": 9.837409019470215, "learning_rate": 9.854662360444835e-06, "loss": 0.94286747, "memory(GiB)": 28.47, "step": 6490, "train_speed(iter/s)": 0.432565 }, { "acc": 0.81369724, "epoch": 0.1758589878970027, "grad_norm": 11.336877822875977, "learning_rate": 9.855526936809732e-06, "loss": 1.08530407, "memory(GiB)": 28.47, "step": 6495, "train_speed(iter/s)": 0.432565 }, { "acc": 0.81975231, "epoch": 0.17599436818021824, "grad_norm": 38.36488342285156, "learning_rate": 9.85639084785993e-06, "loss": 1.01141033, "memory(GiB)": 28.47, "step": 6500, "train_speed(iter/s)": 0.432563 }, { "acc": 0.81681023, "epoch": 0.1761297484634338, "grad_norm": 7.616665363311768, "learning_rate": 9.8572540946186e-06, "loss": 0.90636635, "memory(GiB)": 28.47, "step": 6505, "train_speed(iter/s)": 0.432566 }, { "acc": 0.85166607, "epoch": 0.17626512874664935, "grad_norm": 26.82760238647461, "learning_rate": 9.858116678106548e-06, "loss": 0.70881181, "memory(GiB)": 28.47, "step": 6510, "train_speed(iter/s)": 0.432566 }, { "acc": 0.84688454, "epoch": 0.1764005090298649, "grad_norm": 6.996549129486084, "learning_rate": 9.858978599342231e-06, "loss": 0.80947199, "memory(GiB)": 28.47, "step": 6515, "train_speed(iter/s)": 0.432568 }, { "acc": 0.82442169, "epoch": 0.17653588931308045, "grad_norm": 10.31274127960205, "learning_rate": 9.85983985934177e-06, "loss": 0.85198851, "memory(GiB)": 28.47, "step": 6520, "train_speed(iter/s)": 0.432568 }, { "acc": 0.81289272, "epoch": 0.176671269596296, "grad_norm": 14.9295015335083, "learning_rate": 9.860700459118932e-06, "loss": 0.9944746, "memory(GiB)": 28.47, "step": 6525, "train_speed(iter/s)": 0.432567 }, { "acc": 0.82708549, "epoch": 0.17680664987951156, "grad_norm": 6.6590118408203125, "learning_rate": 9.861560399685172e-06, "loss": 0.81425629, "memory(GiB)": 28.47, "step": 6530, "train_speed(iter/s)": 0.432568 }, { "acc": 0.82978363, "epoch": 0.1769420301627271, "grad_norm": 16.90454864501953, "learning_rate": 9.862419682049614e-06, "loss": 0.9434041, "memory(GiB)": 28.47, "step": 6535, "train_speed(iter/s)": 0.432569 }, { "acc": 0.84182053, "epoch": 0.17707741044594266, "grad_norm": 16.152814865112305, "learning_rate": 9.863278307219066e-06, "loss": 0.74585109, "memory(GiB)": 28.47, "step": 6540, "train_speed(iter/s)": 0.432566 }, { "acc": 0.80893192, "epoch": 0.17721279072915821, "grad_norm": 9.678153038024902, "learning_rate": 9.864136276198031e-06, "loss": 1.05899649, "memory(GiB)": 28.47, "step": 6545, "train_speed(iter/s)": 0.432563 }, { "acc": 0.85063915, "epoch": 0.17734817101237377, "grad_norm": 31.149503707885742, "learning_rate": 9.864993589988708e-06, "loss": 0.77252541, "memory(GiB)": 28.47, "step": 6550, "train_speed(iter/s)": 0.432564 }, { "acc": 0.81146545, "epoch": 0.17748355129558932, "grad_norm": 15.152413368225098, "learning_rate": 9.865850249591004e-06, "loss": 0.94273338, "memory(GiB)": 28.47, "step": 6555, "train_speed(iter/s)": 0.432565 }, { "acc": 0.83674021, "epoch": 0.17761893157880487, "grad_norm": 13.10489559173584, "learning_rate": 9.866706256002537e-06, "loss": 0.82740574, "memory(GiB)": 28.47, "step": 6560, "train_speed(iter/s)": 0.432566 }, { "acc": 0.80732241, "epoch": 0.17775431186202043, "grad_norm": 8.504382133483887, "learning_rate": 9.867561610218644e-06, "loss": 0.93603249, "memory(GiB)": 28.47, "step": 6565, "train_speed(iter/s)": 0.432568 }, { "acc": 0.80754509, "epoch": 0.17788969214523598, "grad_norm": 12.978816032409668, "learning_rate": 9.868416313232393e-06, "loss": 0.99896612, "memory(GiB)": 28.47, "step": 6570, "train_speed(iter/s)": 0.432569 }, { "acc": 0.8362464, "epoch": 0.17802507242845153, "grad_norm": 13.309609413146973, "learning_rate": 9.869270366034582e-06, "loss": 0.84539289, "memory(GiB)": 28.47, "step": 6575, "train_speed(iter/s)": 0.432569 }, { "acc": 0.82520428, "epoch": 0.17816045271166708, "grad_norm": 12.626435279846191, "learning_rate": 9.87012376961375e-06, "loss": 0.84606667, "memory(GiB)": 28.47, "step": 6580, "train_speed(iter/s)": 0.43257 }, { "acc": 0.84200783, "epoch": 0.17829583299488264, "grad_norm": 11.439812660217285, "learning_rate": 9.870976524956182e-06, "loss": 0.74235668, "memory(GiB)": 28.47, "step": 6585, "train_speed(iter/s)": 0.432572 }, { "acc": 0.85135422, "epoch": 0.1784312132780982, "grad_norm": 7.498187065124512, "learning_rate": 9.87182863304592e-06, "loss": 0.68323421, "memory(GiB)": 28.47, "step": 6590, "train_speed(iter/s)": 0.432572 }, { "acc": 0.84605179, "epoch": 0.17856659356131374, "grad_norm": 6.339626312255859, "learning_rate": 9.872680094864768e-06, "loss": 0.72266245, "memory(GiB)": 28.47, "step": 6595, "train_speed(iter/s)": 0.432574 }, { "acc": 0.8432375, "epoch": 0.1787019738445293, "grad_norm": 11.834110260009766, "learning_rate": 9.873530911392291e-06, "loss": 0.815207, "memory(GiB)": 28.47, "step": 6600, "train_speed(iter/s)": 0.432577 }, { "acc": 0.84380054, "epoch": 0.17883735412774485, "grad_norm": 16.302553176879883, "learning_rate": 9.874381083605838e-06, "loss": 0.86859894, "memory(GiB)": 28.47, "step": 6605, "train_speed(iter/s)": 0.432578 }, { "acc": 0.83163195, "epoch": 0.1789727344109604, "grad_norm": 11.563742637634277, "learning_rate": 9.875230612480529e-06, "loss": 0.87630777, "memory(GiB)": 28.47, "step": 6610, "train_speed(iter/s)": 0.432579 }, { "acc": 0.83801098, "epoch": 0.17910811469417595, "grad_norm": 22.411911010742188, "learning_rate": 9.87607949898928e-06, "loss": 0.77857809, "memory(GiB)": 28.47, "step": 6615, "train_speed(iter/s)": 0.432576 }, { "acc": 0.84572039, "epoch": 0.1792434949773915, "grad_norm": 22.968095779418945, "learning_rate": 9.8769277441028e-06, "loss": 0.84598322, "memory(GiB)": 28.47, "step": 6620, "train_speed(iter/s)": 0.432579 }, { "acc": 0.81912861, "epoch": 0.17937887526060706, "grad_norm": 16.845767974853516, "learning_rate": 9.877775348789598e-06, "loss": 0.93891888, "memory(GiB)": 28.47, "step": 6625, "train_speed(iter/s)": 0.432579 }, { "acc": 0.81871014, "epoch": 0.1795142555438226, "grad_norm": 10.718066215515137, "learning_rate": 9.878622314015989e-06, "loss": 0.8486578, "memory(GiB)": 28.47, "step": 6630, "train_speed(iter/s)": 0.432579 }, { "acc": 0.83810043, "epoch": 0.17964963582703816, "grad_norm": 16.71600914001465, "learning_rate": 9.879468640746108e-06, "loss": 0.75191665, "memory(GiB)": 28.47, "step": 6635, "train_speed(iter/s)": 0.43258 }, { "acc": 0.8241127, "epoch": 0.17978501611025371, "grad_norm": 15.165549278259277, "learning_rate": 9.880314329941906e-06, "loss": 0.88194685, "memory(GiB)": 28.47, "step": 6640, "train_speed(iter/s)": 0.432582 }, { "acc": 0.84326458, "epoch": 0.17992039639346927, "grad_norm": 10.161111831665039, "learning_rate": 9.881159382563165e-06, "loss": 0.79243908, "memory(GiB)": 28.47, "step": 6645, "train_speed(iter/s)": 0.432582 }, { "acc": 0.83440342, "epoch": 0.18005577667668482, "grad_norm": 12.2638521194458, "learning_rate": 9.882003799567499e-06, "loss": 0.81978989, "memory(GiB)": 28.47, "step": 6650, "train_speed(iter/s)": 0.432584 }, { "acc": 0.83741646, "epoch": 0.18019115695990037, "grad_norm": 16.58986473083496, "learning_rate": 9.882847581910364e-06, "loss": 0.86798096, "memory(GiB)": 28.47, "step": 6655, "train_speed(iter/s)": 0.432585 }, { "acc": 0.82831402, "epoch": 0.18032653724311593, "grad_norm": 10.866086959838867, "learning_rate": 9.883690730545064e-06, "loss": 0.88297825, "memory(GiB)": 28.47, "step": 6660, "train_speed(iter/s)": 0.432585 }, { "acc": 0.84900017, "epoch": 0.18046191752633148, "grad_norm": 27.678829193115234, "learning_rate": 9.884533246422755e-06, "loss": 0.73008313, "memory(GiB)": 28.47, "step": 6665, "train_speed(iter/s)": 0.432586 }, { "acc": 0.82793694, "epoch": 0.18059729780954703, "grad_norm": 6.831502437591553, "learning_rate": 9.885375130492456e-06, "loss": 0.90115108, "memory(GiB)": 28.47, "step": 6670, "train_speed(iter/s)": 0.432585 }, { "acc": 0.85199976, "epoch": 0.18073267809276258, "grad_norm": 15.847341537475586, "learning_rate": 9.886216383701049e-06, "loss": 0.79341574, "memory(GiB)": 28.47, "step": 6675, "train_speed(iter/s)": 0.432585 }, { "acc": 0.81728363, "epoch": 0.18086805837597814, "grad_norm": 12.237916946411133, "learning_rate": 9.887057006993292e-06, "loss": 0.85193996, "memory(GiB)": 28.47, "step": 6680, "train_speed(iter/s)": 0.432586 }, { "acc": 0.8333478, "epoch": 0.1810034386591937, "grad_norm": 8.327934265136719, "learning_rate": 9.887897001311814e-06, "loss": 0.89499426, "memory(GiB)": 28.47, "step": 6685, "train_speed(iter/s)": 0.432588 }, { "acc": 0.8476965, "epoch": 0.18113881894240924, "grad_norm": 20.10259246826172, "learning_rate": 9.888736367597148e-06, "loss": 0.74414363, "memory(GiB)": 28.47, "step": 6690, "train_speed(iter/s)": 0.432589 }, { "acc": 0.86844311, "epoch": 0.1812741992256248, "grad_norm": 9.156512260437012, "learning_rate": 9.8895751067877e-06, "loss": 0.70883589, "memory(GiB)": 28.47, "step": 6695, "train_speed(iter/s)": 0.432591 }, { "acc": 0.83410435, "epoch": 0.18140957950884035, "grad_norm": 9.401200294494629, "learning_rate": 9.890413219819781e-06, "loss": 0.72224445, "memory(GiB)": 28.47, "step": 6700, "train_speed(iter/s)": 0.432593 }, { "acc": 0.82511272, "epoch": 0.1815449597920559, "grad_norm": 12.063451766967773, "learning_rate": 9.891250707627614e-06, "loss": 0.88383837, "memory(GiB)": 28.47, "step": 6705, "train_speed(iter/s)": 0.432592 }, { "acc": 0.85937614, "epoch": 0.18168034007527142, "grad_norm": 9.67360782623291, "learning_rate": 9.89208757114332e-06, "loss": 0.69832926, "memory(GiB)": 28.47, "step": 6710, "train_speed(iter/s)": 0.432593 }, { "acc": 0.84218922, "epoch": 0.18181572035848698, "grad_norm": 9.23974895477295, "learning_rate": 9.892923811296943e-06, "loss": 0.74631977, "memory(GiB)": 28.47, "step": 6715, "train_speed(iter/s)": 0.432596 }, { "acc": 0.84319887, "epoch": 0.18195110064170253, "grad_norm": 9.882576942443848, "learning_rate": 9.893759429016456e-06, "loss": 0.91711226, "memory(GiB)": 28.47, "step": 6720, "train_speed(iter/s)": 0.432597 }, { "acc": 0.82513361, "epoch": 0.18208648092491808, "grad_norm": 9.180719375610352, "learning_rate": 9.894594425227753e-06, "loss": 0.8973032, "memory(GiB)": 28.47, "step": 6725, "train_speed(iter/s)": 0.432595 }, { "acc": 0.82536449, "epoch": 0.18222186120813363, "grad_norm": 9.815922737121582, "learning_rate": 9.895428800854666e-06, "loss": 0.80820913, "memory(GiB)": 28.47, "step": 6730, "train_speed(iter/s)": 0.432594 }, { "acc": 0.84787092, "epoch": 0.1823572414913492, "grad_norm": 10.39901351928711, "learning_rate": 9.896262556818968e-06, "loss": 0.67609615, "memory(GiB)": 28.47, "step": 6735, "train_speed(iter/s)": 0.432595 }, { "acc": 0.84703054, "epoch": 0.18249262177456474, "grad_norm": 20.848777770996094, "learning_rate": 9.897095694040384e-06, "loss": 0.7407793, "memory(GiB)": 28.47, "step": 6740, "train_speed(iter/s)": 0.432597 }, { "acc": 0.84931297, "epoch": 0.1826280020577803, "grad_norm": 11.626336097717285, "learning_rate": 9.897928213436589e-06, "loss": 0.79119129, "memory(GiB)": 28.47, "step": 6745, "train_speed(iter/s)": 0.432599 }, { "acc": 0.86847229, "epoch": 0.18276338234099584, "grad_norm": 6.027079105377197, "learning_rate": 9.898760115923218e-06, "loss": 0.69183912, "memory(GiB)": 28.47, "step": 6750, "train_speed(iter/s)": 0.4326 }, { "acc": 0.82153244, "epoch": 0.1828987626242114, "grad_norm": 7.335066795349121, "learning_rate": 9.899591402413872e-06, "loss": 0.95751705, "memory(GiB)": 28.47, "step": 6755, "train_speed(iter/s)": 0.432599 }, { "acc": 0.83523827, "epoch": 0.18303414290742695, "grad_norm": 9.016904830932617, "learning_rate": 9.900422073820127e-06, "loss": 0.77294526, "memory(GiB)": 28.47, "step": 6760, "train_speed(iter/s)": 0.432598 }, { "acc": 0.80820274, "epoch": 0.1831695231906425, "grad_norm": 11.632920265197754, "learning_rate": 9.901252131051535e-06, "loss": 0.96756229, "memory(GiB)": 28.47, "step": 6765, "train_speed(iter/s)": 0.4326 }, { "acc": 0.85111389, "epoch": 0.18330490347385806, "grad_norm": 7.285400867462158, "learning_rate": 9.902081575015629e-06, "loss": 0.68620744, "memory(GiB)": 28.47, "step": 6770, "train_speed(iter/s)": 0.432602 }, { "acc": 0.83839817, "epoch": 0.1834402837570736, "grad_norm": 29.122350692749023, "learning_rate": 9.902910406617936e-06, "loss": 0.81202536, "memory(GiB)": 28.47, "step": 6775, "train_speed(iter/s)": 0.432604 }, { "acc": 0.85555153, "epoch": 0.18357566404028916, "grad_norm": 7.4435954093933105, "learning_rate": 9.903738626761982e-06, "loss": 0.63270559, "memory(GiB)": 28.47, "step": 6780, "train_speed(iter/s)": 0.432604 }, { "acc": 0.83411331, "epoch": 0.1837110443235047, "grad_norm": 24.962617874145508, "learning_rate": 9.904566236349284e-06, "loss": 0.74256063, "memory(GiB)": 28.47, "step": 6785, "train_speed(iter/s)": 0.432602 }, { "acc": 0.82293091, "epoch": 0.18384642460672027, "grad_norm": 14.452526092529297, "learning_rate": 9.905393236279376e-06, "loss": 0.92316694, "memory(GiB)": 28.47, "step": 6790, "train_speed(iter/s)": 0.432604 }, { "acc": 0.84594975, "epoch": 0.18398180488993582, "grad_norm": 19.98086166381836, "learning_rate": 9.906219627449805e-06, "loss": 0.72424097, "memory(GiB)": 28.47, "step": 6795, "train_speed(iter/s)": 0.432605 }, { "acc": 0.8095171, "epoch": 0.18411718517315137, "grad_norm": 11.864405632019043, "learning_rate": 9.907045410756132e-06, "loss": 0.84846249, "memory(GiB)": 28.47, "step": 6800, "train_speed(iter/s)": 0.432607 }, { "acc": 0.81941252, "epoch": 0.18425256545636692, "grad_norm": 16.403669357299805, "learning_rate": 9.90787058709195e-06, "loss": 0.93439465, "memory(GiB)": 28.47, "step": 6805, "train_speed(iter/s)": 0.432606 }, { "acc": 0.83713379, "epoch": 0.18438794573958248, "grad_norm": 5.282078266143799, "learning_rate": 9.908695157348874e-06, "loss": 0.75857692, "memory(GiB)": 28.47, "step": 6810, "train_speed(iter/s)": 0.432607 }, { "acc": 0.83169918, "epoch": 0.18452332602279803, "grad_norm": 11.963404655456543, "learning_rate": 9.909519122416565e-06, "loss": 0.98902617, "memory(GiB)": 28.47, "step": 6815, "train_speed(iter/s)": 0.432607 }, { "acc": 0.81028633, "epoch": 0.18465870630601358, "grad_norm": 28.15664291381836, "learning_rate": 9.910342483182723e-06, "loss": 1.03791361, "memory(GiB)": 28.47, "step": 6820, "train_speed(iter/s)": 0.432609 }, { "acc": 0.82632475, "epoch": 0.18479408658922913, "grad_norm": 14.315866470336914, "learning_rate": 9.911165240533093e-06, "loss": 0.82625923, "memory(GiB)": 28.47, "step": 6825, "train_speed(iter/s)": 0.43261 }, { "acc": 0.81732216, "epoch": 0.1849294668724447, "grad_norm": 14.226173400878906, "learning_rate": 9.911987395351482e-06, "loss": 0.89931898, "memory(GiB)": 28.47, "step": 6830, "train_speed(iter/s)": 0.432611 }, { "acc": 0.85242443, "epoch": 0.18506484715566024, "grad_norm": 18.775522232055664, "learning_rate": 9.912808948519749e-06, "loss": 0.71571636, "memory(GiB)": 28.47, "step": 6835, "train_speed(iter/s)": 0.432615 }, { "acc": 0.81425209, "epoch": 0.1852002274388758, "grad_norm": 13.106413841247559, "learning_rate": 9.913629900917818e-06, "loss": 0.88545542, "memory(GiB)": 28.47, "step": 6840, "train_speed(iter/s)": 0.432617 }, { "acc": 0.8047245, "epoch": 0.18533560772209134, "grad_norm": 8.55036735534668, "learning_rate": 9.914450253423692e-06, "loss": 0.95285883, "memory(GiB)": 28.47, "step": 6845, "train_speed(iter/s)": 0.432618 }, { "acc": 0.83229885, "epoch": 0.1854709880053069, "grad_norm": 5.164578914642334, "learning_rate": 9.915270006913445e-06, "loss": 0.84428539, "memory(GiB)": 28.47, "step": 6850, "train_speed(iter/s)": 0.43262 }, { "acc": 0.82451153, "epoch": 0.18560636828852245, "grad_norm": 8.577277183532715, "learning_rate": 9.916089162261233e-06, "loss": 0.86086397, "memory(GiB)": 28.47, "step": 6855, "train_speed(iter/s)": 0.432621 }, { "acc": 0.80717564, "epoch": 0.185741748571738, "grad_norm": 6.944357395172119, "learning_rate": 9.916907720339301e-06, "loss": 0.95596657, "memory(GiB)": 28.47, "step": 6860, "train_speed(iter/s)": 0.432618 }, { "acc": 0.85164795, "epoch": 0.18587712885495355, "grad_norm": 10.772153854370117, "learning_rate": 9.917725682017986e-06, "loss": 0.74131293, "memory(GiB)": 28.47, "step": 6865, "train_speed(iter/s)": 0.432618 }, { "acc": 0.83220921, "epoch": 0.1860125091381691, "grad_norm": 3.6503233909606934, "learning_rate": 9.918543048165732e-06, "loss": 0.92816849, "memory(GiB)": 28.47, "step": 6870, "train_speed(iter/s)": 0.432617 }, { "acc": 0.84343958, "epoch": 0.18614788942138466, "grad_norm": 8.039510726928711, "learning_rate": 9.919359819649073e-06, "loss": 0.76395512, "memory(GiB)": 28.47, "step": 6875, "train_speed(iter/s)": 0.432619 }, { "acc": 0.81962481, "epoch": 0.1862832697046002, "grad_norm": 8.766990661621094, "learning_rate": 9.920175997332664e-06, "loss": 0.89965172, "memory(GiB)": 28.47, "step": 6880, "train_speed(iter/s)": 0.432621 }, { "acc": 0.82143412, "epoch": 0.18641864998781577, "grad_norm": 9.129606246948242, "learning_rate": 9.920991582079275e-06, "loss": 0.97232857, "memory(GiB)": 28.47, "step": 6885, "train_speed(iter/s)": 0.432623 }, { "acc": 0.8426589, "epoch": 0.18655403027103132, "grad_norm": 7.202159404754639, "learning_rate": 9.921806574749794e-06, "loss": 0.70513525, "memory(GiB)": 28.47, "step": 6890, "train_speed(iter/s)": 0.432624 }, { "acc": 0.82303743, "epoch": 0.18668941055424687, "grad_norm": 9.800485610961914, "learning_rate": 9.922620976203234e-06, "loss": 0.97600269, "memory(GiB)": 28.47, "step": 6895, "train_speed(iter/s)": 0.432626 }, { "acc": 0.82529316, "epoch": 0.18682479083746242, "grad_norm": 6.6016693115234375, "learning_rate": 9.923434787296742e-06, "loss": 0.96003723, "memory(GiB)": 28.47, "step": 6900, "train_speed(iter/s)": 0.432626 }, { "acc": 0.82921658, "epoch": 0.18696017112067798, "grad_norm": 9.153133392333984, "learning_rate": 9.924248008885603e-06, "loss": 0.75823011, "memory(GiB)": 28.47, "step": 6905, "train_speed(iter/s)": 0.432629 }, { "acc": 0.82653971, "epoch": 0.18709555140389353, "grad_norm": 80.48800659179688, "learning_rate": 9.925060641823246e-06, "loss": 0.8088419, "memory(GiB)": 28.47, "step": 6910, "train_speed(iter/s)": 0.43263 }, { "acc": 0.83438797, "epoch": 0.18723093168710908, "grad_norm": 14.1777925491333, "learning_rate": 9.925872686961243e-06, "loss": 0.77363644, "memory(GiB)": 28.47, "step": 6915, "train_speed(iter/s)": 0.432631 }, { "acc": 0.83829584, "epoch": 0.18736631197032463, "grad_norm": 10.290044784545898, "learning_rate": 9.926684145149327e-06, "loss": 0.82186451, "memory(GiB)": 28.47, "step": 6920, "train_speed(iter/s)": 0.432633 }, { "acc": 0.83381672, "epoch": 0.1875016922535402, "grad_norm": 20.46592140197754, "learning_rate": 9.927495017235378e-06, "loss": 0.85536079, "memory(GiB)": 28.47, "step": 6925, "train_speed(iter/s)": 0.432634 }, { "acc": 0.85798168, "epoch": 0.18763707253675574, "grad_norm": 22.760221481323242, "learning_rate": 9.928305304065455e-06, "loss": 0.68725624, "memory(GiB)": 28.47, "step": 6930, "train_speed(iter/s)": 0.432633 }, { "acc": 0.84487677, "epoch": 0.1877724528199713, "grad_norm": 7.4574294090271, "learning_rate": 9.929115006483775e-06, "loss": 0.80345306, "memory(GiB)": 28.47, "step": 6935, "train_speed(iter/s)": 0.432634 }, { "acc": 0.84358082, "epoch": 0.18790783310318684, "grad_norm": 45.78912353515625, "learning_rate": 9.929924125332735e-06, "loss": 0.80083923, "memory(GiB)": 28.47, "step": 6940, "train_speed(iter/s)": 0.432635 }, { "acc": 0.83563461, "epoch": 0.1880432133864024, "grad_norm": 4.918540000915527, "learning_rate": 9.930732661452906e-06, "loss": 0.72274046, "memory(GiB)": 28.47, "step": 6945, "train_speed(iter/s)": 0.432638 }, { "acc": 0.83584967, "epoch": 0.18817859366961795, "grad_norm": 20.337669372558594, "learning_rate": 9.931540615683054e-06, "loss": 0.88301563, "memory(GiB)": 28.47, "step": 6950, "train_speed(iter/s)": 0.432639 }, { "acc": 0.8272687, "epoch": 0.1883139739528335, "grad_norm": 9.622770309448242, "learning_rate": 9.932347988860125e-06, "loss": 0.90175409, "memory(GiB)": 28.47, "step": 6955, "train_speed(iter/s)": 0.432641 }, { "acc": 0.85664778, "epoch": 0.18844935423604905, "grad_norm": 7.8653364181518555, "learning_rate": 9.933154781819272e-06, "loss": 0.72147379, "memory(GiB)": 28.47, "step": 6960, "train_speed(iter/s)": 0.432642 }, { "acc": 0.81233368, "epoch": 0.1885847345192646, "grad_norm": 20.524263381958008, "learning_rate": 9.933960995393837e-06, "loss": 0.96493702, "memory(GiB)": 28.47, "step": 6965, "train_speed(iter/s)": 0.432641 }, { "acc": 0.86788673, "epoch": 0.18872011480248016, "grad_norm": 8.233152389526367, "learning_rate": 9.934766630415374e-06, "loss": 0.7038085, "memory(GiB)": 28.47, "step": 6970, "train_speed(iter/s)": 0.432641 }, { "acc": 0.82916775, "epoch": 0.1888554950856957, "grad_norm": 61.772647857666016, "learning_rate": 9.935571687713649e-06, "loss": 0.88604851, "memory(GiB)": 28.47, "step": 6975, "train_speed(iter/s)": 0.43264 }, { "acc": 0.83836098, "epoch": 0.18899087536891127, "grad_norm": 37.20457077026367, "learning_rate": 9.93637616811664e-06, "loss": 0.84288836, "memory(GiB)": 28.47, "step": 6980, "train_speed(iter/s)": 0.432639 }, { "acc": 0.83026981, "epoch": 0.18912625565212682, "grad_norm": 8.948814392089844, "learning_rate": 9.93718007245055e-06, "loss": 0.84388332, "memory(GiB)": 28.47, "step": 6985, "train_speed(iter/s)": 0.432642 }, { "acc": 0.852349, "epoch": 0.18926163593534237, "grad_norm": 20.049163818359375, "learning_rate": 9.937983401539806e-06, "loss": 0.68289604, "memory(GiB)": 28.47, "step": 6990, "train_speed(iter/s)": 0.432644 }, { "acc": 0.83389387, "epoch": 0.18939701621855792, "grad_norm": 30.657766342163086, "learning_rate": 9.93878615620707e-06, "loss": 0.89692488, "memory(GiB)": 28.47, "step": 6995, "train_speed(iter/s)": 0.432646 }, { "acc": 0.84168921, "epoch": 0.18953239650177348, "grad_norm": 6.606830596923828, "learning_rate": 9.939588337273237e-06, "loss": 0.74869461, "memory(GiB)": 28.47, "step": 7000, "train_speed(iter/s)": 0.432643 }, { "acc": 0.83325272, "epoch": 0.18966777678498903, "grad_norm": 11.859999656677246, "learning_rate": 9.940389945557445e-06, "loss": 0.782827, "memory(GiB)": 28.47, "step": 7005, "train_speed(iter/s)": 0.432639 }, { "acc": 0.82945843, "epoch": 0.18980315706820458, "grad_norm": 24.715898513793945, "learning_rate": 9.941190981877076e-06, "loss": 0.83628292, "memory(GiB)": 28.47, "step": 7010, "train_speed(iter/s)": 0.432637 }, { "acc": 0.81585426, "epoch": 0.18993853735142013, "grad_norm": 14.73061466217041, "learning_rate": 9.941991447047768e-06, "loss": 0.89473238, "memory(GiB)": 28.47, "step": 7015, "train_speed(iter/s)": 0.432631 }, { "acc": 0.81560268, "epoch": 0.1900739176346357, "grad_norm": 10.87050724029541, "learning_rate": 9.942791341883414e-06, "loss": 0.91863661, "memory(GiB)": 28.47, "step": 7020, "train_speed(iter/s)": 0.432628 }, { "acc": 0.83452721, "epoch": 0.19020929791785124, "grad_norm": 12.141420364379883, "learning_rate": 9.943590667196165e-06, "loss": 0.95469761, "memory(GiB)": 28.47, "step": 7025, "train_speed(iter/s)": 0.432623 }, { "acc": 0.8350872, "epoch": 0.1903446782010668, "grad_norm": 18.159603118896484, "learning_rate": 9.944389423796446e-06, "loss": 0.75422449, "memory(GiB)": 28.47, "step": 7030, "train_speed(iter/s)": 0.432619 }, { "acc": 0.82009583, "epoch": 0.19048005848428234, "grad_norm": 12.969120979309082, "learning_rate": 9.945187612492945e-06, "loss": 0.91790447, "memory(GiB)": 28.47, "step": 7035, "train_speed(iter/s)": 0.432604 }, { "acc": 0.79906683, "epoch": 0.1906154387674979, "grad_norm": 12.786957740783691, "learning_rate": 9.94598523409263e-06, "loss": 0.95957279, "memory(GiB)": 28.47, "step": 7040, "train_speed(iter/s)": 0.432604 }, { "acc": 0.82397671, "epoch": 0.19075081905071345, "grad_norm": 9.480546951293945, "learning_rate": 9.946782289400755e-06, "loss": 0.83065815, "memory(GiB)": 28.47, "step": 7045, "train_speed(iter/s)": 0.432603 }, { "acc": 0.83112516, "epoch": 0.190886199333929, "grad_norm": 21.848220825195312, "learning_rate": 9.94757877922085e-06, "loss": 0.81589689, "memory(GiB)": 28.47, "step": 7050, "train_speed(iter/s)": 0.432604 }, { "acc": 0.82141762, "epoch": 0.19102157961714455, "grad_norm": 18.978513717651367, "learning_rate": 9.948374704354745e-06, "loss": 0.91209669, "memory(GiB)": 28.47, "step": 7055, "train_speed(iter/s)": 0.432606 }, { "acc": 0.84317112, "epoch": 0.1911569599003601, "grad_norm": 12.93516731262207, "learning_rate": 9.949170065602559e-06, "loss": 0.79215651, "memory(GiB)": 28.47, "step": 7060, "train_speed(iter/s)": 0.432607 }, { "acc": 0.85396519, "epoch": 0.19129234018357566, "grad_norm": 6.469468116760254, "learning_rate": 9.949964863762718e-06, "loss": 0.70321612, "memory(GiB)": 28.47, "step": 7065, "train_speed(iter/s)": 0.432608 }, { "acc": 0.8399642, "epoch": 0.1914277204667912, "grad_norm": 7.223949432373047, "learning_rate": 9.950759099631951e-06, "loss": 0.83378544, "memory(GiB)": 28.47, "step": 7070, "train_speed(iter/s)": 0.432607 }, { "acc": 0.84208508, "epoch": 0.19156310075000677, "grad_norm": 6.939309597015381, "learning_rate": 9.951552774005294e-06, "loss": 0.70127764, "memory(GiB)": 28.47, "step": 7075, "train_speed(iter/s)": 0.432609 }, { "acc": 0.84744177, "epoch": 0.19169848103322232, "grad_norm": 10.152613639831543, "learning_rate": 9.9523458876761e-06, "loss": 0.81414347, "memory(GiB)": 28.47, "step": 7080, "train_speed(iter/s)": 0.432611 }, { "acc": 0.82102232, "epoch": 0.19183386131643787, "grad_norm": 11.3368558883667, "learning_rate": 9.953138441436044e-06, "loss": 0.88130121, "memory(GiB)": 28.47, "step": 7085, "train_speed(iter/s)": 0.432612 }, { "acc": 0.81933107, "epoch": 0.19196924159965342, "grad_norm": 18.3112735748291, "learning_rate": 9.953930436075123e-06, "loss": 0.79238806, "memory(GiB)": 28.47, "step": 7090, "train_speed(iter/s)": 0.432613 }, { "acc": 0.83668823, "epoch": 0.19210462188286898, "grad_norm": 8.534195899963379, "learning_rate": 9.954721872381662e-06, "loss": 0.83093815, "memory(GiB)": 28.47, "step": 7095, "train_speed(iter/s)": 0.432614 }, { "acc": 0.84156151, "epoch": 0.19224000216608453, "grad_norm": 26.528608322143555, "learning_rate": 9.955512751142327e-06, "loss": 0.73588214, "memory(GiB)": 28.47, "step": 7100, "train_speed(iter/s)": 0.432616 }, { "acc": 0.82603703, "epoch": 0.19237538244930008, "grad_norm": 10.660347938537598, "learning_rate": 9.956303073142116e-06, "loss": 0.86869135, "memory(GiB)": 28.47, "step": 7105, "train_speed(iter/s)": 0.432618 }, { "acc": 0.82688522, "epoch": 0.19251076273251563, "grad_norm": 13.044265747070312, "learning_rate": 9.95709283916437e-06, "loss": 1.02640705, "memory(GiB)": 28.47, "step": 7110, "train_speed(iter/s)": 0.432618 }, { "acc": 0.82382107, "epoch": 0.19264614301573119, "grad_norm": 9.637751579284668, "learning_rate": 9.957882049990779e-06, "loss": 0.88143702, "memory(GiB)": 28.47, "step": 7115, "train_speed(iter/s)": 0.43262 }, { "acc": 0.86047201, "epoch": 0.19278152329894674, "grad_norm": 9.81454849243164, "learning_rate": 9.958670706401388e-06, "loss": 0.63652096, "memory(GiB)": 28.47, "step": 7120, "train_speed(iter/s)": 0.432615 }, { "acc": 0.83545361, "epoch": 0.1929169035821623, "grad_norm": 8.532998085021973, "learning_rate": 9.9594588091746e-06, "loss": 0.83364954, "memory(GiB)": 28.47, "step": 7125, "train_speed(iter/s)": 0.432616 }, { "acc": 0.82785902, "epoch": 0.19305228386537784, "grad_norm": 42.5789680480957, "learning_rate": 9.960246359087172e-06, "loss": 0.94364147, "memory(GiB)": 28.47, "step": 7130, "train_speed(iter/s)": 0.432616 }, { "acc": 0.80790548, "epoch": 0.1931876641485934, "grad_norm": 13.203764915466309, "learning_rate": 9.961033356914238e-06, "loss": 0.97558403, "memory(GiB)": 28.47, "step": 7135, "train_speed(iter/s)": 0.432617 }, { "acc": 0.84251223, "epoch": 0.19332304443180895, "grad_norm": 8.352291107177734, "learning_rate": 9.961819803429296e-06, "loss": 0.80677471, "memory(GiB)": 28.47, "step": 7140, "train_speed(iter/s)": 0.432618 }, { "acc": 0.81956234, "epoch": 0.1934584247150245, "grad_norm": 12.278369903564453, "learning_rate": 9.96260569940422e-06, "loss": 0.89610844, "memory(GiB)": 28.47, "step": 7145, "train_speed(iter/s)": 0.432618 }, { "acc": 0.80247936, "epoch": 0.19359380499824005, "grad_norm": 12.86889362335205, "learning_rate": 9.963391045609269e-06, "loss": 0.95652781, "memory(GiB)": 28.47, "step": 7150, "train_speed(iter/s)": 0.432618 }, { "acc": 0.83013926, "epoch": 0.1937291852814556, "grad_norm": 16.1982364654541, "learning_rate": 9.96417584281308e-06, "loss": 0.84494791, "memory(GiB)": 28.47, "step": 7155, "train_speed(iter/s)": 0.43262 }, { "acc": 0.83379498, "epoch": 0.19386456556467116, "grad_norm": 28.6771240234375, "learning_rate": 9.964960091782683e-06, "loss": 0.85252771, "memory(GiB)": 28.47, "step": 7160, "train_speed(iter/s)": 0.432621 }, { "acc": 0.81756754, "epoch": 0.1939999458478867, "grad_norm": 13.500059127807617, "learning_rate": 9.965743793283504e-06, "loss": 1.01967945, "memory(GiB)": 28.47, "step": 7165, "train_speed(iter/s)": 0.432623 }, { "acc": 0.83775349, "epoch": 0.19413532613110226, "grad_norm": 17.51290512084961, "learning_rate": 9.966526948079363e-06, "loss": 0.78719034, "memory(GiB)": 28.47, "step": 7170, "train_speed(iter/s)": 0.432623 }, { "acc": 0.80706635, "epoch": 0.19427070641431782, "grad_norm": 11.310343742370605, "learning_rate": 9.967309556932479e-06, "loss": 0.96776104, "memory(GiB)": 28.47, "step": 7175, "train_speed(iter/s)": 0.432625 }, { "acc": 0.84346876, "epoch": 0.19440608669753337, "grad_norm": 13.298775672912598, "learning_rate": 9.968091620603489e-06, "loss": 0.70874219, "memory(GiB)": 28.47, "step": 7180, "train_speed(iter/s)": 0.432627 }, { "acc": 0.8527482, "epoch": 0.19454146698074892, "grad_norm": 7.9031548500061035, "learning_rate": 9.968873139851433e-06, "loss": 0.70482912, "memory(GiB)": 28.47, "step": 7185, "train_speed(iter/s)": 0.43263 }, { "acc": 0.85057268, "epoch": 0.19467684726396448, "grad_norm": 15.166648864746094, "learning_rate": 9.96965411543377e-06, "loss": 0.75419698, "memory(GiB)": 28.47, "step": 7190, "train_speed(iter/s)": 0.432629 }, { "acc": 0.83343525, "epoch": 0.19481222754718003, "grad_norm": 11.920531272888184, "learning_rate": 9.970434548106375e-06, "loss": 0.85270824, "memory(GiB)": 28.47, "step": 7195, "train_speed(iter/s)": 0.432632 }, { "acc": 0.82544327, "epoch": 0.19494760783039558, "grad_norm": 18.52738380432129, "learning_rate": 9.971214438623557e-06, "loss": 0.88264351, "memory(GiB)": 28.47, "step": 7200, "train_speed(iter/s)": 0.432632 }, { "acc": 0.84044676, "epoch": 0.19508298811361113, "grad_norm": 8.006918907165527, "learning_rate": 9.971993787738045e-06, "loss": 0.81542606, "memory(GiB)": 28.47, "step": 7205, "train_speed(iter/s)": 0.432633 }, { "acc": 0.82940922, "epoch": 0.19521836839682669, "grad_norm": 15.532279968261719, "learning_rate": 9.972772596201005e-06, "loss": 0.8853981, "memory(GiB)": 28.47, "step": 7210, "train_speed(iter/s)": 0.432635 }, { "acc": 0.82667427, "epoch": 0.19535374868004224, "grad_norm": 13.088403701782227, "learning_rate": 9.97355086476204e-06, "loss": 0.98060818, "memory(GiB)": 28.47, "step": 7215, "train_speed(iter/s)": 0.432634 }, { "acc": 0.83762264, "epoch": 0.1954891289632578, "grad_norm": 7.622903347015381, "learning_rate": 9.9743285941692e-06, "loss": 0.86376266, "memory(GiB)": 28.47, "step": 7220, "train_speed(iter/s)": 0.432636 }, { "acc": 0.82326965, "epoch": 0.19562450924647334, "grad_norm": 8.751079559326172, "learning_rate": 9.975105785168972e-06, "loss": 0.93104296, "memory(GiB)": 28.47, "step": 7225, "train_speed(iter/s)": 0.432634 }, { "acc": 0.85571842, "epoch": 0.1957598895296889, "grad_norm": 9.608154296875, "learning_rate": 9.9758824385063e-06, "loss": 0.71184425, "memory(GiB)": 28.47, "step": 7230, "train_speed(iter/s)": 0.432635 }, { "acc": 0.82370033, "epoch": 0.19589526981290445, "grad_norm": 12.262713432312012, "learning_rate": 9.976658554924583e-06, "loss": 0.88046236, "memory(GiB)": 28.47, "step": 7235, "train_speed(iter/s)": 0.432635 }, { "acc": 0.81665783, "epoch": 0.19603065009612, "grad_norm": 15.592447280883789, "learning_rate": 9.977434135165679e-06, "loss": 0.99404631, "memory(GiB)": 28.47, "step": 7240, "train_speed(iter/s)": 0.432637 }, { "acc": 0.84179239, "epoch": 0.19616603037933555, "grad_norm": 9.839834213256836, "learning_rate": 9.978209179969905e-06, "loss": 0.85128288, "memory(GiB)": 28.47, "step": 7245, "train_speed(iter/s)": 0.432637 }, { "acc": 0.86236067, "epoch": 0.1963014106625511, "grad_norm": 11.874628067016602, "learning_rate": 9.978983690076052e-06, "loss": 0.70503864, "memory(GiB)": 28.47, "step": 7250, "train_speed(iter/s)": 0.432639 }, { "acc": 0.85951862, "epoch": 0.19643679094576666, "grad_norm": 9.177205085754395, "learning_rate": 9.979757666221382e-06, "loss": 0.79373841, "memory(GiB)": 28.47, "step": 7255, "train_speed(iter/s)": 0.432642 }, { "acc": 0.83142052, "epoch": 0.1965721712289822, "grad_norm": 8.691389083862305, "learning_rate": 9.98053110914163e-06, "loss": 0.87203102, "memory(GiB)": 28.47, "step": 7260, "train_speed(iter/s)": 0.432643 }, { "acc": 0.82504292, "epoch": 0.19670755151219776, "grad_norm": 10.81818962097168, "learning_rate": 9.981304019571013e-06, "loss": 0.96319122, "memory(GiB)": 28.47, "step": 7265, "train_speed(iter/s)": 0.432643 }, { "acc": 0.84885941, "epoch": 0.19684293179541332, "grad_norm": 6.139666557312012, "learning_rate": 9.98207639824223e-06, "loss": 0.71815624, "memory(GiB)": 28.47, "step": 7270, "train_speed(iter/s)": 0.432644 }, { "acc": 0.81440926, "epoch": 0.19697831207862887, "grad_norm": 13.129656791687012, "learning_rate": 9.982848245886477e-06, "loss": 0.99294643, "memory(GiB)": 28.47, "step": 7275, "train_speed(iter/s)": 0.432644 }, { "acc": 0.86531143, "epoch": 0.19711369236184442, "grad_norm": 20.271236419677734, "learning_rate": 9.983619563233432e-06, "loss": 0.66180573, "memory(GiB)": 28.47, "step": 7280, "train_speed(iter/s)": 0.432646 }, { "acc": 0.84135418, "epoch": 0.19724907264505998, "grad_norm": 8.257466316223145, "learning_rate": 9.984390351011282e-06, "loss": 0.83527946, "memory(GiB)": 28.47, "step": 7285, "train_speed(iter/s)": 0.432647 }, { "acc": 0.84020939, "epoch": 0.19738445292827553, "grad_norm": 8.935830116271973, "learning_rate": 9.9851606099467e-06, "loss": 0.74276857, "memory(GiB)": 28.47, "step": 7290, "train_speed(iter/s)": 0.432648 }, { "acc": 0.83259678, "epoch": 0.19751983321149108, "grad_norm": 10.810580253601074, "learning_rate": 9.985930340764879e-06, "loss": 0.81126537, "memory(GiB)": 28.47, "step": 7295, "train_speed(iter/s)": 0.432651 }, { "acc": 0.8438488, "epoch": 0.19765521349470663, "grad_norm": 6.379395484924316, "learning_rate": 9.986699544189515e-06, "loss": 0.79978304, "memory(GiB)": 28.47, "step": 7300, "train_speed(iter/s)": 0.432652 }, { "acc": 0.81459389, "epoch": 0.19779059377792219, "grad_norm": 45.16960144042969, "learning_rate": 9.987468220942813e-06, "loss": 0.95724516, "memory(GiB)": 28.47, "step": 7305, "train_speed(iter/s)": 0.432653 }, { "acc": 0.82892284, "epoch": 0.19792597406113774, "grad_norm": 27.219051361083984, "learning_rate": 9.988236371745504e-06, "loss": 0.95036716, "memory(GiB)": 28.47, "step": 7310, "train_speed(iter/s)": 0.432655 }, { "acc": 0.83794432, "epoch": 0.1980613543443533, "grad_norm": 26.33360481262207, "learning_rate": 9.989003997316835e-06, "loss": 0.83748341, "memory(GiB)": 28.47, "step": 7315, "train_speed(iter/s)": 0.432656 }, { "acc": 0.84315834, "epoch": 0.19819673462756884, "grad_norm": 9.785826683044434, "learning_rate": 9.989771098374584e-06, "loss": 0.8484045, "memory(GiB)": 28.47, "step": 7320, "train_speed(iter/s)": 0.432655 }, { "acc": 0.84941216, "epoch": 0.1983321149107844, "grad_norm": 16.91504669189453, "learning_rate": 9.990537675635051e-06, "loss": 0.8630312, "memory(GiB)": 28.47, "step": 7325, "train_speed(iter/s)": 0.432657 }, { "acc": 0.85916805, "epoch": 0.19846749519399995, "grad_norm": 11.134766578674316, "learning_rate": 9.991303729813074e-06, "loss": 0.74734755, "memory(GiB)": 28.47, "step": 7330, "train_speed(iter/s)": 0.432658 }, { "acc": 0.84334822, "epoch": 0.1986028754772155, "grad_norm": 9.12582015991211, "learning_rate": 9.992069261622032e-06, "loss": 0.7962142, "memory(GiB)": 28.47, "step": 7335, "train_speed(iter/s)": 0.432659 }, { "acc": 0.83015232, "epoch": 0.19873825576043105, "grad_norm": 12.779552459716797, "learning_rate": 9.99283427177384e-06, "loss": 0.794489, "memory(GiB)": 28.47, "step": 7340, "train_speed(iter/s)": 0.432658 }, { "acc": 0.84394398, "epoch": 0.1988736360436466, "grad_norm": 6.72313117980957, "learning_rate": 9.993598760978958e-06, "loss": 0.73037071, "memory(GiB)": 28.47, "step": 7345, "train_speed(iter/s)": 0.43266 }, { "acc": 0.84309931, "epoch": 0.19900901632686216, "grad_norm": 9.795076370239258, "learning_rate": 9.9943627299464e-06, "loss": 0.73229628, "memory(GiB)": 28.47, "step": 7350, "train_speed(iter/s)": 0.432657 }, { "acc": 0.84233093, "epoch": 0.1991443966100777, "grad_norm": 11.772644996643066, "learning_rate": 9.995126179383734e-06, "loss": 0.7468308, "memory(GiB)": 28.47, "step": 7355, "train_speed(iter/s)": 0.43266 }, { "acc": 0.84410763, "epoch": 0.19927977689329326, "grad_norm": 10.878520011901855, "learning_rate": 9.995889109997081e-06, "loss": 0.77764301, "memory(GiB)": 28.47, "step": 7360, "train_speed(iter/s)": 0.43266 }, { "acc": 0.82241116, "epoch": 0.19941515717650882, "grad_norm": 9.930004119873047, "learning_rate": 9.996651522491125e-06, "loss": 0.92154503, "memory(GiB)": 28.47, "step": 7365, "train_speed(iter/s)": 0.432662 }, { "acc": 0.81747932, "epoch": 0.19955053745972437, "grad_norm": 10.98591423034668, "learning_rate": 9.997413417569119e-06, "loss": 1.03522453, "memory(GiB)": 28.47, "step": 7370, "train_speed(iter/s)": 0.432662 }, { "acc": 0.84110594, "epoch": 0.19968591774293992, "grad_norm": 9.363046646118164, "learning_rate": 9.99817479593288e-06, "loss": 0.80894051, "memory(GiB)": 28.47, "step": 7375, "train_speed(iter/s)": 0.432661 }, { "acc": 0.83686094, "epoch": 0.19982129802615547, "grad_norm": 12.807454109191895, "learning_rate": 9.998935658282798e-06, "loss": 0.73086867, "memory(GiB)": 28.47, "step": 7380, "train_speed(iter/s)": 0.432663 }, { "acc": 0.82151489, "epoch": 0.19995667830937103, "grad_norm": 9.430710792541504, "learning_rate": 9.999696005317847e-06, "loss": 0.92043953, "memory(GiB)": 28.47, "step": 7385, "train_speed(iter/s)": 0.432664 }, { "acc": 0.82008772, "epoch": 0.20009205859258658, "grad_norm": 9.424635887145996, "learning_rate": 9.99999998872686e-06, "loss": 0.98719692, "memory(GiB)": 28.47, "step": 7390, "train_speed(iter/s)": 0.432665 }, { "acc": 0.84171181, "epoch": 0.20022743887580213, "grad_norm": 5.904183387756348, "learning_rate": 9.999999919835442e-06, "loss": 0.77517958, "memory(GiB)": 28.47, "step": 7395, "train_speed(iter/s)": 0.432667 }, { "acc": 0.80900421, "epoch": 0.20036281915901769, "grad_norm": 18.009727478027344, "learning_rate": 9.999999788315464e-06, "loss": 1.04228621, "memory(GiB)": 28.47, "step": 7400, "train_speed(iter/s)": 0.432668 }, { "acc": 0.81805096, "epoch": 0.20049819944223324, "grad_norm": 17.65770149230957, "learning_rate": 9.999999594166927e-06, "loss": 0.92476921, "memory(GiB)": 28.47, "step": 7405, "train_speed(iter/s)": 0.432669 }, { "acc": 0.82924995, "epoch": 0.2006335797254488, "grad_norm": 12.506562232971191, "learning_rate": 9.999999337389835e-06, "loss": 0.84854717, "memory(GiB)": 28.47, "step": 7410, "train_speed(iter/s)": 0.43267 }, { "acc": 0.83217859, "epoch": 0.20076896000866434, "grad_norm": 9.269903182983398, "learning_rate": 9.999999017984188e-06, "loss": 0.84246807, "memory(GiB)": 28.47, "step": 7415, "train_speed(iter/s)": 0.432668 }, { "acc": 0.83343763, "epoch": 0.2009043402918799, "grad_norm": 8.7667236328125, "learning_rate": 9.999998635949991e-06, "loss": 0.80941448, "memory(GiB)": 28.47, "step": 7420, "train_speed(iter/s)": 0.43267 }, { "acc": 0.83708706, "epoch": 0.20103972057509545, "grad_norm": 6.181110858917236, "learning_rate": 9.999998191287252e-06, "loss": 0.79905691, "memory(GiB)": 28.47, "step": 7425, "train_speed(iter/s)": 0.432671 }, { "acc": 0.84578476, "epoch": 0.201175100858311, "grad_norm": 9.684298515319824, "learning_rate": 9.999997683995972e-06, "loss": 0.75885396, "memory(GiB)": 28.47, "step": 7430, "train_speed(iter/s)": 0.432672 }, { "acc": 0.82345486, "epoch": 0.20131048114152655, "grad_norm": 5.6334028244018555, "learning_rate": 9.999997114076162e-06, "loss": 0.87851372, "memory(GiB)": 28.47, "step": 7435, "train_speed(iter/s)": 0.432669 }, { "acc": 0.83400059, "epoch": 0.2014458614247421, "grad_norm": 14.351609230041504, "learning_rate": 9.999996481527824e-06, "loss": 0.84656515, "memory(GiB)": 28.47, "step": 7440, "train_speed(iter/s)": 0.43267 }, { "acc": 0.82362556, "epoch": 0.20158124170795766, "grad_norm": 8.049490928649902, "learning_rate": 9.99999578635097e-06, "loss": 0.89129658, "memory(GiB)": 28.47, "step": 7445, "train_speed(iter/s)": 0.432671 }, { "acc": 0.84859381, "epoch": 0.2017166219911732, "grad_norm": 9.506021499633789, "learning_rate": 9.999995028545609e-06, "loss": 0.79891653, "memory(GiB)": 28.47, "step": 7450, "train_speed(iter/s)": 0.432671 }, { "acc": 0.84172611, "epoch": 0.20185200227438876, "grad_norm": 11.215977668762207, "learning_rate": 9.999994208111746e-06, "loss": 0.77100682, "memory(GiB)": 28.47, "step": 7455, "train_speed(iter/s)": 0.432671 }, { "acc": 0.83897743, "epoch": 0.20198738255760432, "grad_norm": 18.448057174682617, "learning_rate": 9.999993325049395e-06, "loss": 0.83662968, "memory(GiB)": 28.47, "step": 7460, "train_speed(iter/s)": 0.432674 }, { "acc": 0.82872581, "epoch": 0.20212276284081987, "grad_norm": 6.080060958862305, "learning_rate": 9.999992379358566e-06, "loss": 0.8186655, "memory(GiB)": 28.47, "step": 7465, "train_speed(iter/s)": 0.432675 }, { "acc": 0.84024315, "epoch": 0.20225814312403542, "grad_norm": 8.49847412109375, "learning_rate": 9.99999137103927e-06, "loss": 0.79903669, "memory(GiB)": 28.47, "step": 7470, "train_speed(iter/s)": 0.432676 }, { "acc": 0.84249115, "epoch": 0.20239352340725097, "grad_norm": 10.878981590270996, "learning_rate": 9.999990300091522e-06, "loss": 0.76291509, "memory(GiB)": 28.47, "step": 7475, "train_speed(iter/s)": 0.432677 }, { "acc": 0.81978798, "epoch": 0.20252890369046653, "grad_norm": 10.505189895629883, "learning_rate": 9.999989166515333e-06, "loss": 0.88702259, "memory(GiB)": 28.47, "step": 7480, "train_speed(iter/s)": 0.432678 }, { "acc": 0.84857731, "epoch": 0.20266428397368208, "grad_norm": 9.30642032623291, "learning_rate": 9.999987970310719e-06, "loss": 0.76770139, "memory(GiB)": 28.47, "step": 7485, "train_speed(iter/s)": 0.432679 }, { "acc": 0.84632702, "epoch": 0.20279966425689763, "grad_norm": 4.96101713180542, "learning_rate": 9.999986711477695e-06, "loss": 0.68244104, "memory(GiB)": 28.47, "step": 7490, "train_speed(iter/s)": 0.432682 }, { "acc": 0.83489151, "epoch": 0.20293504454011319, "grad_norm": 5.701102256774902, "learning_rate": 9.999985390016275e-06, "loss": 0.86140137, "memory(GiB)": 28.47, "step": 7495, "train_speed(iter/s)": 0.432684 }, { "acc": 0.85824432, "epoch": 0.20307042482332874, "grad_norm": 5.584298133850098, "learning_rate": 9.999984005926476e-06, "loss": 0.65252562, "memory(GiB)": 28.47, "step": 7500, "train_speed(iter/s)": 0.432687 }, { "acc": 0.84043865, "epoch": 0.2032058051065443, "grad_norm": 6.8924241065979, "learning_rate": 9.999982559208316e-06, "loss": 0.74450502, "memory(GiB)": 28.47, "step": 7505, "train_speed(iter/s)": 0.432686 }, { "acc": 0.84251175, "epoch": 0.20334118538975984, "grad_norm": 7.911787509918213, "learning_rate": 9.999981049861815e-06, "loss": 0.82454357, "memory(GiB)": 28.47, "step": 7510, "train_speed(iter/s)": 0.432687 }, { "acc": 0.84832058, "epoch": 0.2034765656729754, "grad_norm": 11.765864372253418, "learning_rate": 9.999979477886987e-06, "loss": 0.74909205, "memory(GiB)": 28.47, "step": 7515, "train_speed(iter/s)": 0.432689 }, { "acc": 0.80815048, "epoch": 0.20361194595619095, "grad_norm": 6.02691125869751, "learning_rate": 9.999977843283855e-06, "loss": 0.98534107, "memory(GiB)": 28.47, "step": 7520, "train_speed(iter/s)": 0.432689 }, { "acc": 0.85415487, "epoch": 0.2037473262394065, "grad_norm": 17.16164207458496, "learning_rate": 9.99997614605244e-06, "loss": 0.72049427, "memory(GiB)": 28.47, "step": 7525, "train_speed(iter/s)": 0.432691 }, { "acc": 0.82124882, "epoch": 0.20388270652262205, "grad_norm": 13.619634628295898, "learning_rate": 9.999974386192763e-06, "loss": 0.85651064, "memory(GiB)": 28.47, "step": 7530, "train_speed(iter/s)": 0.432691 }, { "acc": 0.86722765, "epoch": 0.2040180868058376, "grad_norm": 9.235222816467285, "learning_rate": 9.999972563704845e-06, "loss": 0.6087369, "memory(GiB)": 28.47, "step": 7535, "train_speed(iter/s)": 0.432691 }, { "acc": 0.81463022, "epoch": 0.20415346708905316, "grad_norm": 12.928462982177734, "learning_rate": 9.999970678588708e-06, "loss": 1.04860497, "memory(GiB)": 28.47, "step": 7540, "train_speed(iter/s)": 0.432693 }, { "acc": 0.81532135, "epoch": 0.2042888473722687, "grad_norm": 10.786906242370605, "learning_rate": 9.999968730844379e-06, "loss": 0.94469891, "memory(GiB)": 28.47, "step": 7545, "train_speed(iter/s)": 0.432693 }, { "acc": 0.83992577, "epoch": 0.20442422765548426, "grad_norm": 6.2585248947143555, "learning_rate": 9.999966720471877e-06, "loss": 0.73458142, "memory(GiB)": 28.47, "step": 7550, "train_speed(iter/s)": 0.432694 }, { "acc": 0.81058788, "epoch": 0.20455960793869982, "grad_norm": 7.473820209503174, "learning_rate": 9.999964647471233e-06, "loss": 1.02007122, "memory(GiB)": 28.47, "step": 7555, "train_speed(iter/s)": 0.432694 }, { "acc": 0.79927845, "epoch": 0.20469498822191537, "grad_norm": 9.319891929626465, "learning_rate": 9.999962511842471e-06, "loss": 1.04622421, "memory(GiB)": 28.47, "step": 7560, "train_speed(iter/s)": 0.432692 }, { "acc": 0.81188927, "epoch": 0.20483036850513092, "grad_norm": 14.332473754882812, "learning_rate": 9.999960313585615e-06, "loss": 0.98078957, "memory(GiB)": 28.47, "step": 7565, "train_speed(iter/s)": 0.432692 }, { "acc": 0.8384964, "epoch": 0.20496574878834647, "grad_norm": 12.718974113464355, "learning_rate": 9.999958052700695e-06, "loss": 0.87250862, "memory(GiB)": 28.47, "step": 7570, "train_speed(iter/s)": 0.432695 }, { "acc": 0.85527639, "epoch": 0.20510112907156203, "grad_norm": 9.50553035736084, "learning_rate": 9.99995572918774e-06, "loss": 0.67762489, "memory(GiB)": 28.47, "step": 7575, "train_speed(iter/s)": 0.432693 }, { "acc": 0.83020573, "epoch": 0.20523650935477758, "grad_norm": 11.351916313171387, "learning_rate": 9.999953343046777e-06, "loss": 0.90505161, "memory(GiB)": 28.47, "step": 7580, "train_speed(iter/s)": 0.432695 }, { "acc": 0.86108208, "epoch": 0.20537188963799313, "grad_norm": 6.588757038116455, "learning_rate": 9.999950894277837e-06, "loss": 0.66238079, "memory(GiB)": 28.47, "step": 7585, "train_speed(iter/s)": 0.432696 }, { "acc": 0.83732538, "epoch": 0.20550726992120869, "grad_norm": 8.941829681396484, "learning_rate": 9.99994838288095e-06, "loss": 0.88513994, "memory(GiB)": 28.47, "step": 7590, "train_speed(iter/s)": 0.432697 }, { "acc": 0.82772961, "epoch": 0.20564265020442424, "grad_norm": 16.367647171020508, "learning_rate": 9.999945808856148e-06, "loss": 0.79291706, "memory(GiB)": 28.47, "step": 7595, "train_speed(iter/s)": 0.432698 }, { "acc": 0.82445364, "epoch": 0.2057780304876398, "grad_norm": 7.478921413421631, "learning_rate": 9.999943172203467e-06, "loss": 0.85733509, "memory(GiB)": 28.47, "step": 7600, "train_speed(iter/s)": 0.432698 }, { "acc": 0.81854115, "epoch": 0.20591341077085534, "grad_norm": 24.317668914794922, "learning_rate": 9.999940472922933e-06, "loss": 0.84215336, "memory(GiB)": 28.47, "step": 7605, "train_speed(iter/s)": 0.4327 }, { "acc": 0.8293293, "epoch": 0.2060487910540709, "grad_norm": 21.193822860717773, "learning_rate": 9.999937711014584e-06, "loss": 0.86268511, "memory(GiB)": 28.47, "step": 7610, "train_speed(iter/s)": 0.432701 }, { "acc": 0.81598282, "epoch": 0.20618417133728645, "grad_norm": 9.17041015625, "learning_rate": 9.999934886478454e-06, "loss": 0.97115927, "memory(GiB)": 28.47, "step": 7615, "train_speed(iter/s)": 0.432701 }, { "acc": 0.84237328, "epoch": 0.206319551620502, "grad_norm": 6.8164496421813965, "learning_rate": 9.999931999314579e-06, "loss": 0.76973085, "memory(GiB)": 28.47, "step": 7620, "train_speed(iter/s)": 0.432702 }, { "acc": 0.84448147, "epoch": 0.20645493190371755, "grad_norm": 7.4754133224487305, "learning_rate": 9.999929049522992e-06, "loss": 0.7916676, "memory(GiB)": 28.47, "step": 7625, "train_speed(iter/s)": 0.432703 }, { "acc": 0.80738726, "epoch": 0.2065903121869331, "grad_norm": 9.806488990783691, "learning_rate": 9.999926037103733e-06, "loss": 1.0397665, "memory(GiB)": 28.47, "step": 7630, "train_speed(iter/s)": 0.432702 }, { "acc": 0.8404253, "epoch": 0.20672569247014866, "grad_norm": 7.305280685424805, "learning_rate": 9.999922962056839e-06, "loss": 0.86044579, "memory(GiB)": 28.47, "step": 7635, "train_speed(iter/s)": 0.432705 }, { "acc": 0.83979559, "epoch": 0.2068610727533642, "grad_norm": 12.296158790588379, "learning_rate": 9.99991982438235e-06, "loss": 0.80487528, "memory(GiB)": 28.47, "step": 7640, "train_speed(iter/s)": 0.432704 }, { "acc": 0.8534441, "epoch": 0.20699645303657976, "grad_norm": 6.477973461151123, "learning_rate": 9.999916624080304e-06, "loss": 0.76393318, "memory(GiB)": 28.47, "step": 7645, "train_speed(iter/s)": 0.432707 }, { "acc": 0.82618408, "epoch": 0.20713183331979532, "grad_norm": 12.114766120910645, "learning_rate": 9.99991336115074e-06, "loss": 0.95553646, "memory(GiB)": 28.47, "step": 7650, "train_speed(iter/s)": 0.432708 }, { "acc": 0.8149353, "epoch": 0.20726721360301087, "grad_norm": 14.359258651733398, "learning_rate": 9.9999100355937e-06, "loss": 0.9748064, "memory(GiB)": 28.47, "step": 7655, "train_speed(iter/s)": 0.43271 }, { "acc": 0.84856586, "epoch": 0.20740259388622642, "grad_norm": 9.553521156311035, "learning_rate": 9.999906647409225e-06, "loss": 0.68768945, "memory(GiB)": 28.47, "step": 7660, "train_speed(iter/s)": 0.432712 }, { "acc": 0.83239231, "epoch": 0.20753797416944197, "grad_norm": 24.983139038085938, "learning_rate": 9.999903196597357e-06, "loss": 0.78270502, "memory(GiB)": 28.47, "step": 7665, "train_speed(iter/s)": 0.43271 }, { "acc": 0.79516544, "epoch": 0.20767335445265753, "grad_norm": 11.007709503173828, "learning_rate": 9.999899683158141e-06, "loss": 1.0440979, "memory(GiB)": 28.47, "step": 7670, "train_speed(iter/s)": 0.432712 }, { "acc": 0.84770718, "epoch": 0.20780873473587308, "grad_norm": 5.728073596954346, "learning_rate": 9.999896107091622e-06, "loss": 0.78940268, "memory(GiB)": 28.47, "step": 7675, "train_speed(iter/s)": 0.432713 }, { "acc": 0.83931561, "epoch": 0.20794411501908863, "grad_norm": 7.0061140060424805, "learning_rate": 9.999892468397841e-06, "loss": 0.83836479, "memory(GiB)": 28.47, "step": 7680, "train_speed(iter/s)": 0.432714 }, { "acc": 0.84748726, "epoch": 0.20807949530230418, "grad_norm": 12.305624961853027, "learning_rate": 9.999888767076845e-06, "loss": 0.72000232, "memory(GiB)": 28.47, "step": 7685, "train_speed(iter/s)": 0.432715 }, { "acc": 0.83293447, "epoch": 0.20821487558551974, "grad_norm": 20.971250534057617, "learning_rate": 9.999885003128683e-06, "loss": 0.90559664, "memory(GiB)": 28.47, "step": 7690, "train_speed(iter/s)": 0.432715 }, { "acc": 0.83397799, "epoch": 0.2083502558687353, "grad_norm": 7.824898719787598, "learning_rate": 9.999881176553398e-06, "loss": 0.74380651, "memory(GiB)": 28.47, "step": 7695, "train_speed(iter/s)": 0.432715 }, { "acc": 0.84033527, "epoch": 0.20848563615195084, "grad_norm": 8.222275733947754, "learning_rate": 9.999877287351043e-06, "loss": 0.72734375, "memory(GiB)": 28.47, "step": 7700, "train_speed(iter/s)": 0.432717 }, { "acc": 0.85308466, "epoch": 0.2086210164351664, "grad_norm": 11.821892738342285, "learning_rate": 9.99987333552166e-06, "loss": 0.76394334, "memory(GiB)": 28.47, "step": 7705, "train_speed(iter/s)": 0.432719 }, { "acc": 0.842733, "epoch": 0.20875639671838195, "grad_norm": 12.88120174407959, "learning_rate": 9.999869321065303e-06, "loss": 0.796527, "memory(GiB)": 28.47, "step": 7710, "train_speed(iter/s)": 0.432718 }, { "acc": 0.82206898, "epoch": 0.2088917770015975, "grad_norm": 13.630555152893066, "learning_rate": 9.999865243982025e-06, "loss": 0.90857372, "memory(GiB)": 28.47, "step": 7715, "train_speed(iter/s)": 0.432719 }, { "acc": 0.81984062, "epoch": 0.20902715728481305, "grad_norm": 11.366606712341309, "learning_rate": 9.99986110427187e-06, "loss": 0.85887451, "memory(GiB)": 28.47, "step": 7720, "train_speed(iter/s)": 0.432716 }, { "acc": 0.84681015, "epoch": 0.2091625375680286, "grad_norm": 7.746798515319824, "learning_rate": 9.999856901934896e-06, "loss": 0.75642176, "memory(GiB)": 28.47, "step": 7725, "train_speed(iter/s)": 0.432714 }, { "acc": 0.82845945, "epoch": 0.20929791785124416, "grad_norm": 15.556086540222168, "learning_rate": 9.99985263697115e-06, "loss": 0.86295748, "memory(GiB)": 28.47, "step": 7730, "train_speed(iter/s)": 0.432716 }, { "acc": 0.83695488, "epoch": 0.2094332981344597, "grad_norm": 17.802492141723633, "learning_rate": 9.999848309380692e-06, "loss": 0.83389101, "memory(GiB)": 28.47, "step": 7735, "train_speed(iter/s)": 0.432718 }, { "acc": 0.82258787, "epoch": 0.20956867841767524, "grad_norm": 10.425895690917969, "learning_rate": 9.99984391916357e-06, "loss": 0.93775053, "memory(GiB)": 28.47, "step": 7740, "train_speed(iter/s)": 0.432716 }, { "acc": 0.8137639, "epoch": 0.2097040587008908, "grad_norm": 13.52621841430664, "learning_rate": 9.999839466319842e-06, "loss": 0.90918865, "memory(GiB)": 28.47, "step": 7745, "train_speed(iter/s)": 0.432716 }, { "acc": 0.82545681, "epoch": 0.20983943898410634, "grad_norm": 8.1875638961792, "learning_rate": 9.999834950849564e-06, "loss": 0.92010326, "memory(GiB)": 28.47, "step": 7750, "train_speed(iter/s)": 0.432716 }, { "acc": 0.83142376, "epoch": 0.2099748192673219, "grad_norm": 19.527067184448242, "learning_rate": 9.999830372752792e-06, "loss": 0.89834776, "memory(GiB)": 28.47, "step": 7755, "train_speed(iter/s)": 0.432717 }, { "acc": 0.82956905, "epoch": 0.21011019955053745, "grad_norm": 18.21101951599121, "learning_rate": 9.999825732029584e-06, "loss": 0.81799383, "memory(GiB)": 28.47, "step": 7760, "train_speed(iter/s)": 0.432718 }, { "acc": 0.78976769, "epoch": 0.210245579833753, "grad_norm": 8.764511108398438, "learning_rate": 9.999821028679996e-06, "loss": 1.0112669, "memory(GiB)": 28.47, "step": 7765, "train_speed(iter/s)": 0.432719 }, { "acc": 0.80650902, "epoch": 0.21038096011696855, "grad_norm": 5.159273624420166, "learning_rate": 9.99981626270409e-06, "loss": 1.05314903, "memory(GiB)": 28.47, "step": 7770, "train_speed(iter/s)": 0.43272 }, { "acc": 0.87524872, "epoch": 0.2105163404001841, "grad_norm": 7.1520256996154785, "learning_rate": 9.999811434101924e-06, "loss": 0.59171238, "memory(GiB)": 28.47, "step": 7775, "train_speed(iter/s)": 0.432722 }, { "acc": 0.81002436, "epoch": 0.21065172068339966, "grad_norm": 25.93706703186035, "learning_rate": 9.999806542873558e-06, "loss": 0.97407532, "memory(GiB)": 28.47, "step": 7780, "train_speed(iter/s)": 0.432722 }, { "acc": 0.85825768, "epoch": 0.2107871009666152, "grad_norm": 10.998146057128906, "learning_rate": 9.999801589019054e-06, "loss": 0.70416384, "memory(GiB)": 28.47, "step": 7785, "train_speed(iter/s)": 0.432723 }, { "acc": 0.84196682, "epoch": 0.21092248124983076, "grad_norm": 9.012848854064941, "learning_rate": 9.999796572538474e-06, "loss": 0.83263016, "memory(GiB)": 28.47, "step": 7790, "train_speed(iter/s)": 0.432723 }, { "acc": 0.83639393, "epoch": 0.21105786153304631, "grad_norm": 13.097915649414062, "learning_rate": 9.99979149343188e-06, "loss": 0.82554607, "memory(GiB)": 28.47, "step": 7795, "train_speed(iter/s)": 0.432724 }, { "acc": 0.84796, "epoch": 0.21119324181626187, "grad_norm": 21.22269630432129, "learning_rate": 9.99978635169934e-06, "loss": 0.78270035, "memory(GiB)": 28.47, "step": 7800, "train_speed(iter/s)": 0.432725 }, { "acc": 0.83645592, "epoch": 0.21132862209947742, "grad_norm": 7.846476078033447, "learning_rate": 9.999781147340912e-06, "loss": 0.74969521, "memory(GiB)": 28.47, "step": 7805, "train_speed(iter/s)": 0.432725 }, { "acc": 0.8019783, "epoch": 0.21146400238269297, "grad_norm": 23.496673583984375, "learning_rate": 9.999775880356665e-06, "loss": 1.1076704, "memory(GiB)": 28.47, "step": 7810, "train_speed(iter/s)": 0.432725 }, { "acc": 0.83561792, "epoch": 0.21159938266590853, "grad_norm": 10.021134376525879, "learning_rate": 9.999770550746662e-06, "loss": 0.87573185, "memory(GiB)": 28.47, "step": 7815, "train_speed(iter/s)": 0.432726 }, { "acc": 0.82095776, "epoch": 0.21173476294912408, "grad_norm": 11.376835823059082, "learning_rate": 9.999765158510974e-06, "loss": 0.92294292, "memory(GiB)": 28.47, "step": 7820, "train_speed(iter/s)": 0.432727 }, { "acc": 0.84071712, "epoch": 0.21187014323233963, "grad_norm": 11.774027824401855, "learning_rate": 9.999759703649666e-06, "loss": 0.83803501, "memory(GiB)": 28.47, "step": 7825, "train_speed(iter/s)": 0.432729 }, { "acc": 0.82763367, "epoch": 0.21200552351555518, "grad_norm": 10.12926959991455, "learning_rate": 9.999754186162805e-06, "loss": 0.89542046, "memory(GiB)": 28.47, "step": 7830, "train_speed(iter/s)": 0.43273 }, { "acc": 0.8239254, "epoch": 0.21214090379877074, "grad_norm": 8.166844367980957, "learning_rate": 9.999748606050464e-06, "loss": 0.9135231, "memory(GiB)": 28.47, "step": 7835, "train_speed(iter/s)": 0.432732 }, { "acc": 0.86894884, "epoch": 0.2122762840819863, "grad_norm": 29.179285049438477, "learning_rate": 9.99974296331271e-06, "loss": 0.64997201, "memory(GiB)": 28.47, "step": 7840, "train_speed(iter/s)": 0.432733 }, { "acc": 0.84395847, "epoch": 0.21241166436520184, "grad_norm": 11.043501853942871, "learning_rate": 9.999737257949615e-06, "loss": 0.82911501, "memory(GiB)": 28.47, "step": 7845, "train_speed(iter/s)": 0.432734 }, { "acc": 0.83045158, "epoch": 0.2125470446484174, "grad_norm": 18.464365005493164, "learning_rate": 9.999731489961248e-06, "loss": 0.93323965, "memory(GiB)": 28.47, "step": 7850, "train_speed(iter/s)": 0.432732 }, { "acc": 0.83147621, "epoch": 0.21268242493163295, "grad_norm": 7.989866733551025, "learning_rate": 9.999725659347685e-06, "loss": 0.89306135, "memory(GiB)": 28.47, "step": 7855, "train_speed(iter/s)": 0.432735 }, { "acc": 0.83065987, "epoch": 0.2128178052148485, "grad_norm": 11.419926643371582, "learning_rate": 9.999719766108995e-06, "loss": 0.83576412, "memory(GiB)": 28.47, "step": 7860, "train_speed(iter/s)": 0.432733 }, { "acc": 0.81081171, "epoch": 0.21295318549806405, "grad_norm": 20.254249572753906, "learning_rate": 9.999713810245257e-06, "loss": 0.96478462, "memory(GiB)": 28.47, "step": 7865, "train_speed(iter/s)": 0.432735 }, { "acc": 0.84086676, "epoch": 0.2130885657812796, "grad_norm": 19.504962921142578, "learning_rate": 9.99970779175654e-06, "loss": 0.88568735, "memory(GiB)": 28.47, "step": 7870, "train_speed(iter/s)": 0.432735 }, { "acc": 0.86213226, "epoch": 0.21322394606449516, "grad_norm": 10.354416847229004, "learning_rate": 9.999701710642924e-06, "loss": 0.71971712, "memory(GiB)": 28.47, "step": 7875, "train_speed(iter/s)": 0.432736 }, { "acc": 0.82945442, "epoch": 0.2133593263477107, "grad_norm": 10.950926780700684, "learning_rate": 9.99969556690448e-06, "loss": 0.93690681, "memory(GiB)": 28.47, "step": 7880, "train_speed(iter/s)": 0.432736 }, { "acc": 0.82429104, "epoch": 0.21349470663092626, "grad_norm": 12.535093307495117, "learning_rate": 9.999689360541292e-06, "loss": 0.9347353, "memory(GiB)": 28.47, "step": 7885, "train_speed(iter/s)": 0.432735 }, { "acc": 0.85130825, "epoch": 0.21363008691414181, "grad_norm": 5.250226020812988, "learning_rate": 9.999683091553432e-06, "loss": 0.69912472, "memory(GiB)": 28.47, "step": 7890, "train_speed(iter/s)": 0.432736 }, { "acc": 0.83555679, "epoch": 0.21376546719735737, "grad_norm": 22.822301864624023, "learning_rate": 9.99967675994098e-06, "loss": 0.85865364, "memory(GiB)": 28.47, "step": 7895, "train_speed(iter/s)": 0.432737 }, { "acc": 0.83623028, "epoch": 0.21390084748057292, "grad_norm": 16.02756118774414, "learning_rate": 9.999670365704017e-06, "loss": 0.77474318, "memory(GiB)": 28.47, "step": 7900, "train_speed(iter/s)": 0.432736 }, { "acc": 0.83132801, "epoch": 0.21403622776378847, "grad_norm": 11.957893371582031, "learning_rate": 9.999663908842621e-06, "loss": 0.90369968, "memory(GiB)": 28.47, "step": 7905, "train_speed(iter/s)": 0.432737 }, { "acc": 0.85432281, "epoch": 0.21417160804700403, "grad_norm": 7.380212783813477, "learning_rate": 9.999657389356874e-06, "loss": 0.70671368, "memory(GiB)": 28.47, "step": 7910, "train_speed(iter/s)": 0.432735 }, { "acc": 0.825208, "epoch": 0.21430698833021958, "grad_norm": 8.754596710205078, "learning_rate": 9.999650807246858e-06, "loss": 0.94634686, "memory(GiB)": 28.47, "step": 7915, "train_speed(iter/s)": 0.432737 }, { "acc": 0.83262606, "epoch": 0.21444236861343513, "grad_norm": 37.56325149536133, "learning_rate": 9.999644162512653e-06, "loss": 0.8458559, "memory(GiB)": 28.47, "step": 7920, "train_speed(iter/s)": 0.432739 }, { "acc": 0.83983088, "epoch": 0.21457774889665068, "grad_norm": 26.81208610534668, "learning_rate": 9.999637455154345e-06, "loss": 0.84635372, "memory(GiB)": 28.47, "step": 7925, "train_speed(iter/s)": 0.432739 }, { "acc": 0.83695879, "epoch": 0.21471312917986624, "grad_norm": 18.54741668701172, "learning_rate": 9.999630685172016e-06, "loss": 0.94147987, "memory(GiB)": 28.47, "step": 7930, "train_speed(iter/s)": 0.43274 }, { "acc": 0.82813358, "epoch": 0.2148485094630818, "grad_norm": 11.37489128112793, "learning_rate": 9.999623852565752e-06, "loss": 0.86548882, "memory(GiB)": 28.47, "step": 7935, "train_speed(iter/s)": 0.43274 }, { "acc": 0.82476139, "epoch": 0.21498388974629734, "grad_norm": 12.840019226074219, "learning_rate": 9.99961695733564e-06, "loss": 0.89922476, "memory(GiB)": 28.47, "step": 7940, "train_speed(iter/s)": 0.432742 }, { "acc": 0.83280354, "epoch": 0.2151192700295129, "grad_norm": 10.376108169555664, "learning_rate": 9.999609999481766e-06, "loss": 0.87566452, "memory(GiB)": 28.47, "step": 7945, "train_speed(iter/s)": 0.432742 }, { "acc": 0.81520519, "epoch": 0.21525465031272845, "grad_norm": 7.111582279205322, "learning_rate": 9.999602979004215e-06, "loss": 0.97740774, "memory(GiB)": 28.47, "step": 7950, "train_speed(iter/s)": 0.432741 }, { "acc": 0.83074274, "epoch": 0.215390030595944, "grad_norm": 6.358975887298584, "learning_rate": 9.999595895903075e-06, "loss": 0.86468496, "memory(GiB)": 28.47, "step": 7955, "train_speed(iter/s)": 0.432742 }, { "acc": 0.81719971, "epoch": 0.21552541087915955, "grad_norm": 16.385162353515625, "learning_rate": 9.999588750178435e-06, "loss": 1.04334688, "memory(GiB)": 28.47, "step": 7960, "train_speed(iter/s)": 0.432742 }, { "acc": 0.83332424, "epoch": 0.2156607911623751, "grad_norm": 14.33413028717041, "learning_rate": 9.999581541830386e-06, "loss": 0.86678667, "memory(GiB)": 28.47, "step": 7965, "train_speed(iter/s)": 0.432744 }, { "acc": 0.85566559, "epoch": 0.21579617144559066, "grad_norm": 9.064006805419922, "learning_rate": 9.99957427085902e-06, "loss": 0.7669157, "memory(GiB)": 28.47, "step": 7970, "train_speed(iter/s)": 0.432745 }, { "acc": 0.84730682, "epoch": 0.2159315517288062, "grad_norm": 20.029478073120117, "learning_rate": 9.999566937264421e-06, "loss": 0.81143093, "memory(GiB)": 28.47, "step": 7975, "train_speed(iter/s)": 0.432746 }, { "acc": 0.83652039, "epoch": 0.21606693201202176, "grad_norm": 13.219794273376465, "learning_rate": 9.999559541046689e-06, "loss": 0.85811405, "memory(GiB)": 28.47, "step": 7980, "train_speed(iter/s)": 0.432748 }, { "acc": 0.84373169, "epoch": 0.21620231229523731, "grad_norm": 8.816709518432617, "learning_rate": 9.999552082205912e-06, "loss": 0.79042454, "memory(GiB)": 28.47, "step": 7985, "train_speed(iter/s)": 0.432749 }, { "acc": 0.83881826, "epoch": 0.21633769257845287, "grad_norm": 10.656944274902344, "learning_rate": 9.999544560742184e-06, "loss": 0.77095447, "memory(GiB)": 28.47, "step": 7990, "train_speed(iter/s)": 0.432747 }, { "acc": 0.83956575, "epoch": 0.21647307286166842, "grad_norm": 6.456490516662598, "learning_rate": 9.9995369766556e-06, "loss": 0.84945469, "memory(GiB)": 28.47, "step": 7995, "train_speed(iter/s)": 0.432747 }, { "acc": 0.82207394, "epoch": 0.21660845314488397, "grad_norm": 11.433113098144531, "learning_rate": 9.999529329946254e-06, "loss": 0.87560997, "memory(GiB)": 28.47, "step": 8000, "train_speed(iter/s)": 0.432747 }, { "acc": 0.84420872, "epoch": 0.21674383342809952, "grad_norm": 19.753419876098633, "learning_rate": 9.999521620614246e-06, "loss": 0.78299012, "memory(GiB)": 28.47, "step": 8005, "train_speed(iter/s)": 0.432748 }, { "acc": 0.85580368, "epoch": 0.21687921371131508, "grad_norm": 13.606307029724121, "learning_rate": 9.999513848659666e-06, "loss": 0.79568691, "memory(GiB)": 28.47, "step": 8010, "train_speed(iter/s)": 0.43275 }, { "acc": 0.84631662, "epoch": 0.21701459399453063, "grad_norm": 5.426596164703369, "learning_rate": 9.999506014082617e-06, "loss": 0.75566549, "memory(GiB)": 28.47, "step": 8015, "train_speed(iter/s)": 0.432752 }, { "acc": 0.84770508, "epoch": 0.21714997427774618, "grad_norm": 20.341838836669922, "learning_rate": 9.99949811688319e-06, "loss": 0.63664446, "memory(GiB)": 28.47, "step": 8020, "train_speed(iter/s)": 0.432753 }, { "acc": 0.82730999, "epoch": 0.21728535456096174, "grad_norm": 11.976794242858887, "learning_rate": 9.999490157061492e-06, "loss": 0.86407747, "memory(GiB)": 28.47, "step": 8025, "train_speed(iter/s)": 0.432753 }, { "acc": 0.85398684, "epoch": 0.2174207348441773, "grad_norm": 5.386150360107422, "learning_rate": 9.99948213461762e-06, "loss": 0.72814646, "memory(GiB)": 28.47, "step": 8030, "train_speed(iter/s)": 0.432754 }, { "acc": 0.84140692, "epoch": 0.21755611512739284, "grad_norm": 18.13071632385254, "learning_rate": 9.999474049551672e-06, "loss": 0.76651926, "memory(GiB)": 28.47, "step": 8035, "train_speed(iter/s)": 0.432753 }, { "acc": 0.85267649, "epoch": 0.2176914954106084, "grad_norm": 5.988933086395264, "learning_rate": 9.999465901863752e-06, "loss": 0.77755542, "memory(GiB)": 28.47, "step": 8040, "train_speed(iter/s)": 0.432754 }, { "acc": 0.81689167, "epoch": 0.21782687569382395, "grad_norm": 11.068904876708984, "learning_rate": 9.999457691553958e-06, "loss": 0.97837811, "memory(GiB)": 28.47, "step": 8045, "train_speed(iter/s)": 0.432755 }, { "acc": 0.85599213, "epoch": 0.2179622559770395, "grad_norm": 8.37526798248291, "learning_rate": 9.9994494186224e-06, "loss": 0.69186769, "memory(GiB)": 28.47, "step": 8050, "train_speed(iter/s)": 0.432755 }, { "acc": 0.81780834, "epoch": 0.21809763626025505, "grad_norm": 14.055182456970215, "learning_rate": 9.999441083069177e-06, "loss": 0.99642601, "memory(GiB)": 28.47, "step": 8055, "train_speed(iter/s)": 0.432755 }, { "acc": 0.79725065, "epoch": 0.2182330165434706, "grad_norm": 12.289287567138672, "learning_rate": 9.999432684894392e-06, "loss": 1.1256053, "memory(GiB)": 28.47, "step": 8060, "train_speed(iter/s)": 0.432755 }, { "acc": 0.81532383, "epoch": 0.21836839682668616, "grad_norm": 11.584163665771484, "learning_rate": 9.999424224098153e-06, "loss": 0.94939375, "memory(GiB)": 28.47, "step": 8065, "train_speed(iter/s)": 0.432755 }, { "acc": 0.84838333, "epoch": 0.2185037771099017, "grad_norm": 6.6476874351501465, "learning_rate": 9.999415700680567e-06, "loss": 0.78426805, "memory(GiB)": 28.47, "step": 8070, "train_speed(iter/s)": 0.432755 }, { "acc": 0.83907013, "epoch": 0.21863915739311726, "grad_norm": 8.39785099029541, "learning_rate": 9.999407114641739e-06, "loss": 0.81025829, "memory(GiB)": 28.47, "step": 8075, "train_speed(iter/s)": 0.432756 }, { "acc": 0.81963558, "epoch": 0.21877453767633281, "grad_norm": 5.7945075035095215, "learning_rate": 9.999398465981775e-06, "loss": 0.92607765, "memory(GiB)": 28.47, "step": 8080, "train_speed(iter/s)": 0.432756 }, { "acc": 0.84886189, "epoch": 0.21890991795954837, "grad_norm": 29.10260772705078, "learning_rate": 9.999389754700785e-06, "loss": 0.84539557, "memory(GiB)": 28.47, "step": 8085, "train_speed(iter/s)": 0.432757 }, { "acc": 0.82106018, "epoch": 0.21904529824276392, "grad_norm": 9.22830867767334, "learning_rate": 9.999380980798879e-06, "loss": 0.85322037, "memory(GiB)": 28.47, "step": 8090, "train_speed(iter/s)": 0.432758 }, { "acc": 0.8199235, "epoch": 0.21918067852597947, "grad_norm": 31.63853645324707, "learning_rate": 9.999372144276165e-06, "loss": 1.02258625, "memory(GiB)": 28.47, "step": 8095, "train_speed(iter/s)": 0.432758 }, { "acc": 0.86338749, "epoch": 0.21931605880919502, "grad_norm": 52.68183898925781, "learning_rate": 9.999363245132755e-06, "loss": 0.64080086, "memory(GiB)": 28.47, "step": 8100, "train_speed(iter/s)": 0.43276 }, { "acc": 0.83720026, "epoch": 0.21945143909241058, "grad_norm": 11.78874397277832, "learning_rate": 9.999354283368759e-06, "loss": 0.84823418, "memory(GiB)": 28.47, "step": 8105, "train_speed(iter/s)": 0.432762 }, { "acc": 0.79561973, "epoch": 0.21958681937562613, "grad_norm": 11.075209617614746, "learning_rate": 9.999345258984292e-06, "loss": 1.04733448, "memory(GiB)": 28.47, "step": 8110, "train_speed(iter/s)": 0.432764 }, { "acc": 0.83808441, "epoch": 0.21972219965884168, "grad_norm": 12.240011215209961, "learning_rate": 9.999336171979463e-06, "loss": 0.76320276, "memory(GiB)": 28.47, "step": 8115, "train_speed(iter/s)": 0.432764 }, { "acc": 0.85886297, "epoch": 0.21985757994205724, "grad_norm": 6.842144966125488, "learning_rate": 9.999327022354392e-06, "loss": 0.74720902, "memory(GiB)": 28.47, "step": 8120, "train_speed(iter/s)": 0.432765 }, { "acc": 0.84101238, "epoch": 0.2199929602252728, "grad_norm": 12.734996795654297, "learning_rate": 9.999317810109188e-06, "loss": 0.77456675, "memory(GiB)": 28.47, "step": 8125, "train_speed(iter/s)": 0.432766 }, { "acc": 0.83620777, "epoch": 0.22012834050848834, "grad_norm": 25.424089431762695, "learning_rate": 9.999308535243966e-06, "loss": 0.88010635, "memory(GiB)": 28.47, "step": 8130, "train_speed(iter/s)": 0.432765 }, { "acc": 0.85937271, "epoch": 0.2202637207917039, "grad_norm": 5.111148834228516, "learning_rate": 9.999299197758849e-06, "loss": 0.65103989, "memory(GiB)": 28.47, "step": 8135, "train_speed(iter/s)": 0.432765 }, { "acc": 0.82609568, "epoch": 0.22039910107491945, "grad_norm": 14.557611465454102, "learning_rate": 9.999289797653946e-06, "loss": 0.91477814, "memory(GiB)": 28.47, "step": 8140, "train_speed(iter/s)": 0.432766 }, { "acc": 0.82051716, "epoch": 0.220534481358135, "grad_norm": 18.672985076904297, "learning_rate": 9.99928033492938e-06, "loss": 0.94745064, "memory(GiB)": 28.47, "step": 8145, "train_speed(iter/s)": 0.432761 }, { "acc": 0.87090092, "epoch": 0.22066986164135055, "grad_norm": 15.343202590942383, "learning_rate": 9.999270809585267e-06, "loss": 0.65422707, "memory(GiB)": 28.47, "step": 8150, "train_speed(iter/s)": 0.432761 }, { "acc": 0.83962574, "epoch": 0.2208052419245661, "grad_norm": 16.477249145507812, "learning_rate": 9.999261221621726e-06, "loss": 0.7854938, "memory(GiB)": 28.47, "step": 8155, "train_speed(iter/s)": 0.432763 }, { "acc": 0.85098391, "epoch": 0.22094062220778166, "grad_norm": 13.86974811553955, "learning_rate": 9.99925157103888e-06, "loss": 0.66520572, "memory(GiB)": 28.47, "step": 8160, "train_speed(iter/s)": 0.432764 }, { "acc": 0.83605137, "epoch": 0.2210760024909972, "grad_norm": 8.329276084899902, "learning_rate": 9.999241857836847e-06, "loss": 0.92273245, "memory(GiB)": 28.47, "step": 8165, "train_speed(iter/s)": 0.432765 }, { "acc": 0.84050312, "epoch": 0.22121138277421276, "grad_norm": 9.238499641418457, "learning_rate": 9.999232082015749e-06, "loss": 0.7592423, "memory(GiB)": 28.47, "step": 8170, "train_speed(iter/s)": 0.432766 }, { "acc": 0.8319499, "epoch": 0.22134676305742831, "grad_norm": 10.360279083251953, "learning_rate": 9.99922224357571e-06, "loss": 0.86676102, "memory(GiB)": 28.47, "step": 8175, "train_speed(iter/s)": 0.432767 }, { "acc": 0.85047207, "epoch": 0.22148214334064387, "grad_norm": 8.108928680419922, "learning_rate": 9.999212342516853e-06, "loss": 0.70872564, "memory(GiB)": 28.47, "step": 8180, "train_speed(iter/s)": 0.432769 }, { "acc": 0.84389668, "epoch": 0.22161752362385942, "grad_norm": 7.5332350730896, "learning_rate": 9.999202378839302e-06, "loss": 0.78067064, "memory(GiB)": 28.47, "step": 8185, "train_speed(iter/s)": 0.432769 }, { "acc": 0.8237812, "epoch": 0.22175290390707497, "grad_norm": 11.723163604736328, "learning_rate": 9.999192352543179e-06, "loss": 0.84617119, "memory(GiB)": 28.47, "step": 8190, "train_speed(iter/s)": 0.432771 }, { "acc": 0.8408721, "epoch": 0.22188828419029052, "grad_norm": 17.409469604492188, "learning_rate": 9.999182263628613e-06, "loss": 0.78337326, "memory(GiB)": 28.47, "step": 8195, "train_speed(iter/s)": 0.432773 }, { "acc": 0.86601381, "epoch": 0.22202366447350608, "grad_norm": 7.113701820373535, "learning_rate": 9.99917211209573e-06, "loss": 0.69789348, "memory(GiB)": 28.47, "step": 8200, "train_speed(iter/s)": 0.432773 }, { "acc": 0.88081322, "epoch": 0.22215904475672163, "grad_norm": 6.315946578979492, "learning_rate": 9.999161897944656e-06, "loss": 0.55807538, "memory(GiB)": 28.47, "step": 8205, "train_speed(iter/s)": 0.432775 }, { "acc": 0.83433437, "epoch": 0.22229442503993718, "grad_norm": 10.735146522521973, "learning_rate": 9.999151621175518e-06, "loss": 0.79512115, "memory(GiB)": 28.47, "step": 8210, "train_speed(iter/s)": 0.432774 }, { "acc": 0.8599369, "epoch": 0.22242980532315274, "grad_norm": 11.686790466308594, "learning_rate": 9.999141281788447e-06, "loss": 0.68570976, "memory(GiB)": 28.47, "step": 8215, "train_speed(iter/s)": 0.432775 }, { "acc": 0.83071241, "epoch": 0.2225651856063683, "grad_norm": 20.5842342376709, "learning_rate": 9.999130879783574e-06, "loss": 0.87105665, "memory(GiB)": 28.47, "step": 8220, "train_speed(iter/s)": 0.432772 }, { "acc": 0.82770977, "epoch": 0.22270056588958384, "grad_norm": 10.549489974975586, "learning_rate": 9.999120415161024e-06, "loss": 0.90560045, "memory(GiB)": 28.47, "step": 8225, "train_speed(iter/s)": 0.432774 }, { "acc": 0.82583494, "epoch": 0.2228359461727994, "grad_norm": 11.045021057128906, "learning_rate": 9.999109887920932e-06, "loss": 0.86293507, "memory(GiB)": 28.47, "step": 8230, "train_speed(iter/s)": 0.432774 }, { "acc": 0.82816772, "epoch": 0.22297132645601495, "grad_norm": 10.089041709899902, "learning_rate": 9.999099298063428e-06, "loss": 0.85910091, "memory(GiB)": 28.47, "step": 8235, "train_speed(iter/s)": 0.432775 }, { "acc": 0.80162401, "epoch": 0.2231067067392305, "grad_norm": 11.638565063476562, "learning_rate": 9.999088645588645e-06, "loss": 0.96885223, "memory(GiB)": 28.47, "step": 8240, "train_speed(iter/s)": 0.432776 }, { "acc": 0.83906612, "epoch": 0.22324208702244605, "grad_norm": 8.062522888183594, "learning_rate": 9.99907793049672e-06, "loss": 0.76165972, "memory(GiB)": 28.47, "step": 8245, "train_speed(iter/s)": 0.432777 }, { "acc": 0.84382524, "epoch": 0.2233774673056616, "grad_norm": 7.3917975425720215, "learning_rate": 9.999067152787783e-06, "loss": 0.77703991, "memory(GiB)": 28.47, "step": 8250, "train_speed(iter/s)": 0.432774 }, { "acc": 0.82907867, "epoch": 0.22351284758887716, "grad_norm": 13.264008522033691, "learning_rate": 9.99905631246197e-06, "loss": 0.85611572, "memory(GiB)": 28.47, "step": 8255, "train_speed(iter/s)": 0.432774 }, { "acc": 0.82389679, "epoch": 0.2236482278720927, "grad_norm": 17.835681915283203, "learning_rate": 9.999045409519417e-06, "loss": 0.8774724, "memory(GiB)": 28.47, "step": 8260, "train_speed(iter/s)": 0.432772 }, { "acc": 0.81946812, "epoch": 0.22378360815530826, "grad_norm": 8.339454650878906, "learning_rate": 9.999034443960258e-06, "loss": 0.8590065, "memory(GiB)": 28.47, "step": 8265, "train_speed(iter/s)": 0.432774 }, { "acc": 0.81246281, "epoch": 0.2239189884385238, "grad_norm": 8.95114803314209, "learning_rate": 9.999023415784635e-06, "loss": 0.86651707, "memory(GiB)": 28.47, "step": 8270, "train_speed(iter/s)": 0.432775 }, { "acc": 0.82971306, "epoch": 0.22405436872173937, "grad_norm": 9.510956764221191, "learning_rate": 9.999012324992686e-06, "loss": 0.84834929, "memory(GiB)": 28.47, "step": 8275, "train_speed(iter/s)": 0.432775 }, { "acc": 0.84932394, "epoch": 0.22418974900495492, "grad_norm": 19.97765350341797, "learning_rate": 9.999001171584546e-06, "loss": 0.82469645, "memory(GiB)": 28.47, "step": 8280, "train_speed(iter/s)": 0.432775 }, { "acc": 0.84018354, "epoch": 0.22432512928817047, "grad_norm": 22.23173713684082, "learning_rate": 9.998989955560358e-06, "loss": 0.86929522, "memory(GiB)": 28.47, "step": 8285, "train_speed(iter/s)": 0.432776 }, { "acc": 0.82606468, "epoch": 0.22446050957138602, "grad_norm": 8.805488586425781, "learning_rate": 9.998978676920261e-06, "loss": 0.90851765, "memory(GiB)": 28.47, "step": 8290, "train_speed(iter/s)": 0.432777 }, { "acc": 0.82268181, "epoch": 0.22459588985460158, "grad_norm": 10.88599681854248, "learning_rate": 9.998967335664397e-06, "loss": 0.93801184, "memory(GiB)": 28.47, "step": 8295, "train_speed(iter/s)": 0.432778 }, { "acc": 0.84049559, "epoch": 0.22473127013781713, "grad_norm": 12.240317344665527, "learning_rate": 9.998955931792906e-06, "loss": 0.74554462, "memory(GiB)": 28.47, "step": 8300, "train_speed(iter/s)": 0.432778 }, { "acc": 0.84246769, "epoch": 0.22486665042103268, "grad_norm": 14.193145751953125, "learning_rate": 9.998944465305934e-06, "loss": 0.80665922, "memory(GiB)": 28.47, "step": 8305, "train_speed(iter/s)": 0.432779 }, { "acc": 0.86471214, "epoch": 0.22500203070424823, "grad_norm": 12.161528587341309, "learning_rate": 9.998932936203623e-06, "loss": 0.68588405, "memory(GiB)": 28.47, "step": 8310, "train_speed(iter/s)": 0.432781 }, { "acc": 0.84528894, "epoch": 0.2251374109874638, "grad_norm": 4.603665828704834, "learning_rate": 9.998921344486117e-06, "loss": 0.74339328, "memory(GiB)": 28.47, "step": 8315, "train_speed(iter/s)": 0.43278 }, { "acc": 0.82409458, "epoch": 0.22527279127067934, "grad_norm": 7.563953399658203, "learning_rate": 9.998909690153561e-06, "loss": 0.82365828, "memory(GiB)": 28.47, "step": 8320, "train_speed(iter/s)": 0.43278 }, { "acc": 0.83903303, "epoch": 0.2254081715538949, "grad_norm": 6.975497722625732, "learning_rate": 9.998897973206103e-06, "loss": 0.8226162, "memory(GiB)": 28.47, "step": 8325, "train_speed(iter/s)": 0.432781 }, { "acc": 0.84021034, "epoch": 0.22554355183711045, "grad_norm": 4.724039077758789, "learning_rate": 9.998886193643888e-06, "loss": 0.79683056, "memory(GiB)": 28.47, "step": 8330, "train_speed(iter/s)": 0.432779 }, { "acc": 0.82729359, "epoch": 0.225678932120326, "grad_norm": 12.094094276428223, "learning_rate": 9.998874351467066e-06, "loss": 0.88114471, "memory(GiB)": 28.47, "step": 8335, "train_speed(iter/s)": 0.43278 }, { "acc": 0.83289471, "epoch": 0.22581431240354155, "grad_norm": 11.899755477905273, "learning_rate": 9.99886244667578e-06, "loss": 0.91369028, "memory(GiB)": 28.47, "step": 8340, "train_speed(iter/s)": 0.432781 }, { "acc": 0.82333412, "epoch": 0.2259496926867571, "grad_norm": 15.438369750976562, "learning_rate": 9.998850479270186e-06, "loss": 0.88261347, "memory(GiB)": 28.47, "step": 8345, "train_speed(iter/s)": 0.432782 }, { "acc": 0.81954031, "epoch": 0.22608507296997266, "grad_norm": 19.643266677856445, "learning_rate": 9.99883844925043e-06, "loss": 0.90834179, "memory(GiB)": 28.47, "step": 8350, "train_speed(iter/s)": 0.432783 }, { "acc": 0.8361475, "epoch": 0.2262204532531882, "grad_norm": 4.949466705322266, "learning_rate": 9.99882635661666e-06, "loss": 0.87635098, "memory(GiB)": 28.47, "step": 8355, "train_speed(iter/s)": 0.432781 }, { "acc": 0.83651657, "epoch": 0.22635583353640376, "grad_norm": 16.074934005737305, "learning_rate": 9.998814201369032e-06, "loss": 0.8696867, "memory(GiB)": 28.47, "step": 8360, "train_speed(iter/s)": 0.432782 }, { "acc": 0.82434015, "epoch": 0.2264912138196193, "grad_norm": 7.863641262054443, "learning_rate": 9.998801983507698e-06, "loss": 0.89335232, "memory(GiB)": 28.47, "step": 8365, "train_speed(iter/s)": 0.43278 }, { "acc": 0.84493599, "epoch": 0.22662659410283487, "grad_norm": 12.840778350830078, "learning_rate": 9.998789703032811e-06, "loss": 0.76794829, "memory(GiB)": 28.47, "step": 8370, "train_speed(iter/s)": 0.43278 }, { "acc": 0.86671448, "epoch": 0.22676197438605042, "grad_norm": 8.561010360717773, "learning_rate": 9.998777359944522e-06, "loss": 0.60582876, "memory(GiB)": 28.47, "step": 8375, "train_speed(iter/s)": 0.432781 }, { "acc": 0.83587255, "epoch": 0.22689735466926597, "grad_norm": 12.608460426330566, "learning_rate": 9.99876495424299e-06, "loss": 0.73852034, "memory(GiB)": 28.47, "step": 8380, "train_speed(iter/s)": 0.432781 }, { "acc": 0.83635426, "epoch": 0.22703273495248152, "grad_norm": 10.770161628723145, "learning_rate": 9.998752485928365e-06, "loss": 0.77148333, "memory(GiB)": 28.47, "step": 8385, "train_speed(iter/s)": 0.432782 }, { "acc": 0.82110405, "epoch": 0.22716811523569708, "grad_norm": 16.19117546081543, "learning_rate": 9.998739955000806e-06, "loss": 0.939608, "memory(GiB)": 28.47, "step": 8390, "train_speed(iter/s)": 0.432783 }, { "acc": 0.82049751, "epoch": 0.22730349551891263, "grad_norm": 11.033831596374512, "learning_rate": 9.998727361460473e-06, "loss": 1.0081522, "memory(GiB)": 28.47, "step": 8395, "train_speed(iter/s)": 0.432784 }, { "acc": 0.84129734, "epoch": 0.22743887580212818, "grad_norm": 13.576970100402832, "learning_rate": 9.998714705307518e-06, "loss": 0.7999856, "memory(GiB)": 28.47, "step": 8400, "train_speed(iter/s)": 0.432785 }, { "acc": 0.86006374, "epoch": 0.22757425608534373, "grad_norm": 8.303088188171387, "learning_rate": 9.998701986542104e-06, "loss": 0.67683802, "memory(GiB)": 28.47, "step": 8405, "train_speed(iter/s)": 0.432787 }, { "acc": 0.81076965, "epoch": 0.2277096363685593, "grad_norm": 8.08629035949707, "learning_rate": 9.998689205164386e-06, "loss": 0.97958736, "memory(GiB)": 28.47, "step": 8410, "train_speed(iter/s)": 0.432789 }, { "acc": 0.79649458, "epoch": 0.22784501665177484, "grad_norm": 8.607096672058105, "learning_rate": 9.998676361174531e-06, "loss": 1.07501478, "memory(GiB)": 28.47, "step": 8415, "train_speed(iter/s)": 0.432789 }, { "acc": 0.83147373, "epoch": 0.2279803969349904, "grad_norm": 12.505559921264648, "learning_rate": 9.998663454572693e-06, "loss": 0.90128593, "memory(GiB)": 28.47, "step": 8420, "train_speed(iter/s)": 0.43279 }, { "acc": 0.84916935, "epoch": 0.22811577721820595, "grad_norm": 14.628918647766113, "learning_rate": 9.998650485359036e-06, "loss": 0.82484484, "memory(GiB)": 28.47, "step": 8425, "train_speed(iter/s)": 0.432791 }, { "acc": 0.85678921, "epoch": 0.2282511575014215, "grad_norm": 9.86893081665039, "learning_rate": 9.998637453533724e-06, "loss": 0.744277, "memory(GiB)": 28.47, "step": 8430, "train_speed(iter/s)": 0.432791 }, { "acc": 0.82130489, "epoch": 0.22838653778463705, "grad_norm": 9.331374168395996, "learning_rate": 9.99862435909692e-06, "loss": 0.8571619, "memory(GiB)": 28.47, "step": 8435, "train_speed(iter/s)": 0.432792 }, { "acc": 0.82647696, "epoch": 0.2285219180678526, "grad_norm": 10.155786514282227, "learning_rate": 9.998611202048784e-06, "loss": 0.89274588, "memory(GiB)": 28.47, "step": 8440, "train_speed(iter/s)": 0.432791 }, { "acc": 0.84310055, "epoch": 0.22865729835106816, "grad_norm": 13.248878479003906, "learning_rate": 9.998597982389486e-06, "loss": 0.81880455, "memory(GiB)": 28.47, "step": 8445, "train_speed(iter/s)": 0.432791 }, { "acc": 0.80326529, "epoch": 0.2287926786342837, "grad_norm": 11.08929443359375, "learning_rate": 9.998584700119188e-06, "loss": 1.11306629, "memory(GiB)": 28.47, "step": 8450, "train_speed(iter/s)": 0.432793 }, { "acc": 0.83170223, "epoch": 0.22892805891749926, "grad_norm": 4.490206241607666, "learning_rate": 9.998571355238061e-06, "loss": 0.81565742, "memory(GiB)": 28.47, "step": 8455, "train_speed(iter/s)": 0.432795 }, { "acc": 0.81249428, "epoch": 0.2290634392007148, "grad_norm": 11.516656875610352, "learning_rate": 9.998557947746268e-06, "loss": 1.00493889, "memory(GiB)": 28.47, "step": 8460, "train_speed(iter/s)": 0.432796 }, { "acc": 0.84137907, "epoch": 0.22919881948393037, "grad_norm": 16.463682174682617, "learning_rate": 9.998544477643976e-06, "loss": 0.79051561, "memory(GiB)": 28.47, "step": 8465, "train_speed(iter/s)": 0.432796 }, { "acc": 0.83412819, "epoch": 0.22933419976714592, "grad_norm": 8.897909164428711, "learning_rate": 9.998530944931356e-06, "loss": 0.84527378, "memory(GiB)": 28.47, "step": 8470, "train_speed(iter/s)": 0.432797 }, { "acc": 0.85368271, "epoch": 0.22946958005036147, "grad_norm": 5.790576934814453, "learning_rate": 9.998517349608579e-06, "loss": 0.69543519, "memory(GiB)": 28.47, "step": 8475, "train_speed(iter/s)": 0.432799 }, { "acc": 0.82356815, "epoch": 0.22960496033357702, "grad_norm": 10.203055381774902, "learning_rate": 9.998503691675813e-06, "loss": 0.93040409, "memory(GiB)": 28.47, "step": 8480, "train_speed(iter/s)": 0.432801 }, { "acc": 0.83794489, "epoch": 0.22974034061679258, "grad_norm": 8.693636894226074, "learning_rate": 9.99848997113323e-06, "loss": 0.74058719, "memory(GiB)": 28.47, "step": 8485, "train_speed(iter/s)": 0.432801 }, { "acc": 0.83119926, "epoch": 0.22987572090000813, "grad_norm": 26.6708927154541, "learning_rate": 9.998476187981e-06, "loss": 0.84456978, "memory(GiB)": 28.47, "step": 8490, "train_speed(iter/s)": 0.432803 }, { "acc": 0.83751936, "epoch": 0.23001110118322368, "grad_norm": 22.34656524658203, "learning_rate": 9.9984623422193e-06, "loss": 0.90767555, "memory(GiB)": 28.47, "step": 8495, "train_speed(iter/s)": 0.432803 }, { "acc": 0.81803751, "epoch": 0.23014648146643923, "grad_norm": 10.016645431518555, "learning_rate": 9.998448433848299e-06, "loss": 1.01196613, "memory(GiB)": 28.47, "step": 8500, "train_speed(iter/s)": 0.432802 }, { "acc": 0.85385933, "epoch": 0.2302818617496548, "grad_norm": 12.920782089233398, "learning_rate": 9.998434462868173e-06, "loss": 0.79376974, "memory(GiB)": 28.47, "step": 8505, "train_speed(iter/s)": 0.432802 }, { "acc": 0.81980391, "epoch": 0.23041724203287034, "grad_norm": 10.647492408752441, "learning_rate": 9.998420429279097e-06, "loss": 0.9386261, "memory(GiB)": 28.47, "step": 8510, "train_speed(iter/s)": 0.432802 }, { "acc": 0.83259354, "epoch": 0.2305526223160859, "grad_norm": 7.510665416717529, "learning_rate": 9.998406333081246e-06, "loss": 0.83785944, "memory(GiB)": 28.47, "step": 8515, "train_speed(iter/s)": 0.432802 }, { "acc": 0.8453474, "epoch": 0.23068800259930144, "grad_norm": 8.837570190429688, "learning_rate": 9.998392174274798e-06, "loss": 0.81045227, "memory(GiB)": 28.47, "step": 8520, "train_speed(iter/s)": 0.432803 }, { "acc": 0.8280714, "epoch": 0.230823382882517, "grad_norm": 13.192431449890137, "learning_rate": 9.998377952859929e-06, "loss": 0.84867802, "memory(GiB)": 28.47, "step": 8525, "train_speed(iter/s)": 0.432803 }, { "acc": 0.84490442, "epoch": 0.23095876316573255, "grad_norm": 5.0646514892578125, "learning_rate": 9.99836366883682e-06, "loss": 0.71671176, "memory(GiB)": 28.47, "step": 8530, "train_speed(iter/s)": 0.432804 }, { "acc": 0.84424877, "epoch": 0.2310941434489481, "grad_norm": 11.857834815979004, "learning_rate": 9.998349322205646e-06, "loss": 0.80873508, "memory(GiB)": 28.47, "step": 8535, "train_speed(iter/s)": 0.432806 }, { "acc": 0.85897732, "epoch": 0.23122952373216366, "grad_norm": 8.723550796508789, "learning_rate": 9.998334912966588e-06, "loss": 0.68127809, "memory(GiB)": 28.47, "step": 8540, "train_speed(iter/s)": 0.432807 }, { "acc": 0.82379189, "epoch": 0.2313649040153792, "grad_norm": 19.355323791503906, "learning_rate": 9.99832044111983e-06, "loss": 0.86094275, "memory(GiB)": 28.47, "step": 8545, "train_speed(iter/s)": 0.432808 }, { "acc": 0.83064594, "epoch": 0.23150028429859476, "grad_norm": 9.932415008544922, "learning_rate": 9.998305906665548e-06, "loss": 0.88378086, "memory(GiB)": 28.47, "step": 8550, "train_speed(iter/s)": 0.432809 }, { "acc": 0.84933987, "epoch": 0.2316356645818103, "grad_norm": 7.6164937019348145, "learning_rate": 9.998291309603926e-06, "loss": 0.72380261, "memory(GiB)": 28.47, "step": 8555, "train_speed(iter/s)": 0.43281 }, { "acc": 0.84261436, "epoch": 0.23177104486502587, "grad_norm": 6.464815139770508, "learning_rate": 9.998276649935146e-06, "loss": 0.76056595, "memory(GiB)": 28.47, "step": 8560, "train_speed(iter/s)": 0.432809 }, { "acc": 0.82366791, "epoch": 0.23190642514824142, "grad_norm": 14.677176475524902, "learning_rate": 9.998261927659397e-06, "loss": 0.90538549, "memory(GiB)": 28.47, "step": 8565, "train_speed(iter/s)": 0.432808 }, { "acc": 0.8203598, "epoch": 0.23204180543145697, "grad_norm": 7.158023357391357, "learning_rate": 9.998247142776855e-06, "loss": 0.95065498, "memory(GiB)": 28.47, "step": 8570, "train_speed(iter/s)": 0.432808 }, { "acc": 0.83818665, "epoch": 0.23217718571467252, "grad_norm": 8.72464370727539, "learning_rate": 9.998232295287711e-06, "loss": 0.79367523, "memory(GiB)": 28.47, "step": 8575, "train_speed(iter/s)": 0.43281 }, { "acc": 0.84179211, "epoch": 0.23231256599788808, "grad_norm": 11.390531539916992, "learning_rate": 9.998217385192151e-06, "loss": 0.85295048, "memory(GiB)": 28.47, "step": 8580, "train_speed(iter/s)": 0.432809 }, { "acc": 0.82735863, "epoch": 0.23244794628110363, "grad_norm": 8.731085777282715, "learning_rate": 9.998202412490358e-06, "loss": 0.85426998, "memory(GiB)": 28.47, "step": 8585, "train_speed(iter/s)": 0.43281 }, { "acc": 0.86638947, "epoch": 0.23258332656431918, "grad_norm": 14.80317211151123, "learning_rate": 9.998187377182523e-06, "loss": 0.68923373, "memory(GiB)": 28.47, "step": 8590, "train_speed(iter/s)": 0.432811 }, { "acc": 0.82715092, "epoch": 0.23271870684753473, "grad_norm": 6.569695472717285, "learning_rate": 9.998172279268831e-06, "loss": 0.88009434, "memory(GiB)": 28.47, "step": 8595, "train_speed(iter/s)": 0.432809 }, { "acc": 0.84266338, "epoch": 0.2328540871307503, "grad_norm": 6.399485111236572, "learning_rate": 9.998157118749477e-06, "loss": 0.76870303, "memory(GiB)": 28.47, "step": 8600, "train_speed(iter/s)": 0.432811 }, { "acc": 0.84155664, "epoch": 0.23298946741396584, "grad_norm": 14.073763847351074, "learning_rate": 9.998141895624645e-06, "loss": 0.78772688, "memory(GiB)": 28.47, "step": 8605, "train_speed(iter/s)": 0.432813 }, { "acc": 0.85821238, "epoch": 0.2331248476971814, "grad_norm": 11.890704154968262, "learning_rate": 9.998126609894528e-06, "loss": 0.73343911, "memory(GiB)": 28.47, "step": 8610, "train_speed(iter/s)": 0.432812 }, { "acc": 0.86099205, "epoch": 0.23326022798039694, "grad_norm": 11.088118553161621, "learning_rate": 9.998111261559319e-06, "loss": 0.74597554, "memory(GiB)": 28.47, "step": 8615, "train_speed(iter/s)": 0.432813 }, { "acc": 0.82333994, "epoch": 0.2333956082636125, "grad_norm": 22.994104385375977, "learning_rate": 9.998095850619207e-06, "loss": 0.88156261, "memory(GiB)": 28.47, "step": 8620, "train_speed(iter/s)": 0.432814 }, { "acc": 0.8244462, "epoch": 0.23353098854682805, "grad_norm": 10.953325271606445, "learning_rate": 9.998080377074386e-06, "loss": 0.81250305, "memory(GiB)": 28.47, "step": 8625, "train_speed(iter/s)": 0.432814 }, { "acc": 0.83946323, "epoch": 0.2336663688300436, "grad_norm": 9.313084602355957, "learning_rate": 9.998064840925052e-06, "loss": 0.91631775, "memory(GiB)": 28.47, "step": 8630, "train_speed(iter/s)": 0.432815 }, { "acc": 0.84674168, "epoch": 0.23380174911325916, "grad_norm": 10.39704704284668, "learning_rate": 9.998049242171397e-06, "loss": 0.72190685, "memory(GiB)": 28.47, "step": 8635, "train_speed(iter/s)": 0.432815 }, { "acc": 0.8307065, "epoch": 0.2339371293964747, "grad_norm": 7.356349945068359, "learning_rate": 9.99803358081362e-06, "loss": 0.82191162, "memory(GiB)": 28.47, "step": 8640, "train_speed(iter/s)": 0.432816 }, { "acc": 0.855756, "epoch": 0.23407250967969026, "grad_norm": 27.199703216552734, "learning_rate": 9.998017856851912e-06, "loss": 0.75819674, "memory(GiB)": 28.47, "step": 8645, "train_speed(iter/s)": 0.432816 }, { "acc": 0.85097942, "epoch": 0.2342078899629058, "grad_norm": 12.24400806427002, "learning_rate": 9.998002070286474e-06, "loss": 0.79037681, "memory(GiB)": 28.47, "step": 8650, "train_speed(iter/s)": 0.432819 }, { "acc": 0.85094509, "epoch": 0.23434327024612137, "grad_norm": 7.639919281005859, "learning_rate": 9.997986221117504e-06, "loss": 0.75027027, "memory(GiB)": 28.47, "step": 8655, "train_speed(iter/s)": 0.432817 }, { "acc": 0.84268417, "epoch": 0.23447865052933692, "grad_norm": 13.845766067504883, "learning_rate": 9.997970309345197e-06, "loss": 0.79583998, "memory(GiB)": 28.47, "step": 8660, "train_speed(iter/s)": 0.432817 }, { "acc": 0.82413063, "epoch": 0.23461403081255247, "grad_norm": 9.472533226013184, "learning_rate": 9.997954334969754e-06, "loss": 0.84441204, "memory(GiB)": 28.47, "step": 8665, "train_speed(iter/s)": 0.432819 }, { "acc": 0.82378616, "epoch": 0.23474941109576802, "grad_norm": 7.2939348220825195, "learning_rate": 9.997938297991377e-06, "loss": 0.90517902, "memory(GiB)": 28.47, "step": 8670, "train_speed(iter/s)": 0.43282 }, { "acc": 0.86030636, "epoch": 0.23488479137898358, "grad_norm": 15.369363784790039, "learning_rate": 9.997922198410265e-06, "loss": 0.68935695, "memory(GiB)": 28.47, "step": 8675, "train_speed(iter/s)": 0.432822 }, { "acc": 0.82396946, "epoch": 0.23502017166219913, "grad_norm": 9.004494667053223, "learning_rate": 9.99790603622662e-06, "loss": 0.82658834, "memory(GiB)": 28.47, "step": 8680, "train_speed(iter/s)": 0.432823 }, { "acc": 0.84097691, "epoch": 0.23515555194541468, "grad_norm": 6.413897514343262, "learning_rate": 9.997889811440643e-06, "loss": 0.71228237, "memory(GiB)": 28.47, "step": 8685, "train_speed(iter/s)": 0.432824 }, { "acc": 0.84541235, "epoch": 0.23529093222863023, "grad_norm": 6.440986156463623, "learning_rate": 9.997873524052541e-06, "loss": 0.72338972, "memory(GiB)": 28.47, "step": 8690, "train_speed(iter/s)": 0.432825 }, { "acc": 0.84696503, "epoch": 0.2354263125118458, "grad_norm": 9.715967178344727, "learning_rate": 9.997857174062515e-06, "loss": 0.77335567, "memory(GiB)": 28.47, "step": 8695, "train_speed(iter/s)": 0.432825 }, { "acc": 0.85341015, "epoch": 0.23556169279506134, "grad_norm": 12.575839042663574, "learning_rate": 9.997840761470768e-06, "loss": 0.71025181, "memory(GiB)": 28.47, "step": 8700, "train_speed(iter/s)": 0.432826 }, { "acc": 0.83868237, "epoch": 0.2356970730782769, "grad_norm": 9.726304054260254, "learning_rate": 9.99782428627751e-06, "loss": 0.83454218, "memory(GiB)": 28.47, "step": 8705, "train_speed(iter/s)": 0.432827 }, { "acc": 0.85000334, "epoch": 0.23583245336149244, "grad_norm": 9.5299072265625, "learning_rate": 9.997807748482948e-06, "loss": 0.77924142, "memory(GiB)": 28.47, "step": 8710, "train_speed(iter/s)": 0.432828 }, { "acc": 0.86200161, "epoch": 0.235967833644708, "grad_norm": 6.964817047119141, "learning_rate": 9.997791148087285e-06, "loss": 0.66420364, "memory(GiB)": 28.47, "step": 8715, "train_speed(iter/s)": 0.43283 }, { "acc": 0.82375746, "epoch": 0.23610321392792355, "grad_norm": 25.423248291015625, "learning_rate": 9.99777448509073e-06, "loss": 1.05972176, "memory(GiB)": 28.47, "step": 8720, "train_speed(iter/s)": 0.432831 }, { "acc": 0.81980162, "epoch": 0.2362385942111391, "grad_norm": 14.200469970703125, "learning_rate": 9.997757759493493e-06, "loss": 0.93350391, "memory(GiB)": 28.47, "step": 8725, "train_speed(iter/s)": 0.432832 }, { "acc": 0.82068377, "epoch": 0.23637397449435466, "grad_norm": 8.756012916564941, "learning_rate": 9.997740971295783e-06, "loss": 0.91446238, "memory(GiB)": 28.47, "step": 8730, "train_speed(iter/s)": 0.432832 }, { "acc": 0.80687447, "epoch": 0.2365093547775702, "grad_norm": 9.09968090057373, "learning_rate": 9.99772412049781e-06, "loss": 1.01957436, "memory(GiB)": 28.47, "step": 8735, "train_speed(iter/s)": 0.43283 }, { "acc": 0.84610062, "epoch": 0.23664473506078576, "grad_norm": 37.05979537963867, "learning_rate": 9.997707207099786e-06, "loss": 0.71670599, "memory(GiB)": 28.47, "step": 8740, "train_speed(iter/s)": 0.432831 }, { "acc": 0.85124807, "epoch": 0.2367801153440013, "grad_norm": 4.784770965576172, "learning_rate": 9.997690231101922e-06, "loss": 0.6857688, "memory(GiB)": 28.47, "step": 8745, "train_speed(iter/s)": 0.432832 }, { "acc": 0.83755398, "epoch": 0.23691549562721687, "grad_norm": 9.82710075378418, "learning_rate": 9.997673192504431e-06, "loss": 0.75410347, "memory(GiB)": 28.47, "step": 8750, "train_speed(iter/s)": 0.432833 }, { "acc": 0.85223694, "epoch": 0.23705087591043242, "grad_norm": 12.801077842712402, "learning_rate": 9.997656091307526e-06, "loss": 0.80669861, "memory(GiB)": 28.47, "step": 8755, "train_speed(iter/s)": 0.432833 }, { "acc": 0.82757378, "epoch": 0.23718625619364797, "grad_norm": 14.015997886657715, "learning_rate": 9.99763892751142e-06, "loss": 0.91514053, "memory(GiB)": 28.47, "step": 8760, "train_speed(iter/s)": 0.432835 }, { "acc": 0.84939842, "epoch": 0.23732163647686352, "grad_norm": 8.29715633392334, "learning_rate": 9.997621701116333e-06, "loss": 0.7524611, "memory(GiB)": 28.47, "step": 8765, "train_speed(iter/s)": 0.432833 }, { "acc": 0.84045506, "epoch": 0.23745701676007905, "grad_norm": 8.799751281738281, "learning_rate": 9.997604412122476e-06, "loss": 0.75659451, "memory(GiB)": 28.47, "step": 8770, "train_speed(iter/s)": 0.432834 }, { "acc": 0.83219604, "epoch": 0.2375923970432946, "grad_norm": 7.806349277496338, "learning_rate": 9.997587060530065e-06, "loss": 0.86257496, "memory(GiB)": 28.47, "step": 8775, "train_speed(iter/s)": 0.432833 }, { "acc": 0.84595413, "epoch": 0.23772777732651015, "grad_norm": 10.54002571105957, "learning_rate": 9.997569646339323e-06, "loss": 0.71396542, "memory(GiB)": 28.47, "step": 8780, "train_speed(iter/s)": 0.432835 }, { "acc": 0.82832308, "epoch": 0.2378631576097257, "grad_norm": 13.724752426147461, "learning_rate": 9.997552169550462e-06, "loss": 0.89732628, "memory(GiB)": 28.47, "step": 8785, "train_speed(iter/s)": 0.432836 }, { "acc": 0.82187347, "epoch": 0.23799853789294126, "grad_norm": 24.59625816345215, "learning_rate": 9.997534630163704e-06, "loss": 0.94413567, "memory(GiB)": 28.47, "step": 8790, "train_speed(iter/s)": 0.432835 }, { "acc": 0.84468117, "epoch": 0.2381339181761568, "grad_norm": 8.181790351867676, "learning_rate": 9.997517028179267e-06, "loss": 0.79302301, "memory(GiB)": 28.47, "step": 8795, "train_speed(iter/s)": 0.432834 }, { "acc": 0.86035852, "epoch": 0.23826929845937236, "grad_norm": 10.062252044677734, "learning_rate": 9.997499363597372e-06, "loss": 0.65614004, "memory(GiB)": 28.47, "step": 8800, "train_speed(iter/s)": 0.432832 }, { "acc": 0.84013395, "epoch": 0.23840467874258792, "grad_norm": 33.14008331298828, "learning_rate": 9.997481636418244e-06, "loss": 0.84811993, "memory(GiB)": 28.47, "step": 8805, "train_speed(iter/s)": 0.432832 }, { "acc": 0.8527174, "epoch": 0.23854005902580347, "grad_norm": 24.310251235961914, "learning_rate": 9.9974638466421e-06, "loss": 0.66900091, "memory(GiB)": 28.47, "step": 8810, "train_speed(iter/s)": 0.432832 }, { "acc": 0.84052296, "epoch": 0.23867543930901902, "grad_norm": 9.134693145751953, "learning_rate": 9.997445994269166e-06, "loss": 0.76708913, "memory(GiB)": 28.47, "step": 8815, "train_speed(iter/s)": 0.43283 }, { "acc": 0.82068768, "epoch": 0.23881081959223457, "grad_norm": 14.10361385345459, "learning_rate": 9.997428079299662e-06, "loss": 0.95722618, "memory(GiB)": 28.47, "step": 8820, "train_speed(iter/s)": 0.43283 }, { "acc": 0.84334154, "epoch": 0.23894619987545013, "grad_norm": 12.155356407165527, "learning_rate": 9.997410101733816e-06, "loss": 0.81522064, "memory(GiB)": 28.47, "step": 8825, "train_speed(iter/s)": 0.432832 }, { "acc": 0.84597178, "epoch": 0.23908158015866568, "grad_norm": 18.281591415405273, "learning_rate": 9.997392061571853e-06, "loss": 0.78984275, "memory(GiB)": 28.47, "step": 8830, "train_speed(iter/s)": 0.432834 }, { "acc": 0.84133205, "epoch": 0.23921696044188123, "grad_norm": 12.619462013244629, "learning_rate": 9.997373958813998e-06, "loss": 0.83135662, "memory(GiB)": 28.47, "step": 8835, "train_speed(iter/s)": 0.432834 }, { "acc": 0.85597782, "epoch": 0.23935234072509678, "grad_norm": 8.251482963562012, "learning_rate": 9.997355793460479e-06, "loss": 0.79072409, "memory(GiB)": 28.47, "step": 8840, "train_speed(iter/s)": 0.432835 }, { "acc": 0.84655609, "epoch": 0.23948772100831234, "grad_norm": 5.954738140106201, "learning_rate": 9.99733756551152e-06, "loss": 0.77425084, "memory(GiB)": 28.47, "step": 8845, "train_speed(iter/s)": 0.432836 }, { "acc": 0.83770657, "epoch": 0.2396231012915279, "grad_norm": 7.9721808433532715, "learning_rate": 9.99731927496735e-06, "loss": 0.75487103, "memory(GiB)": 28.47, "step": 8850, "train_speed(iter/s)": 0.432836 }, { "acc": 0.85681543, "epoch": 0.23975848157474344, "grad_norm": 15.513980865478516, "learning_rate": 9.997300921828205e-06, "loss": 0.86201878, "memory(GiB)": 28.47, "step": 8855, "train_speed(iter/s)": 0.432836 }, { "acc": 0.8380188, "epoch": 0.239893861857959, "grad_norm": 7.153443813323975, "learning_rate": 9.997282506094306e-06, "loss": 0.84388466, "memory(GiB)": 28.47, "step": 8860, "train_speed(iter/s)": 0.432836 }, { "acc": 0.85984192, "epoch": 0.24002924214117455, "grad_norm": 14.044923782348633, "learning_rate": 9.997264027765889e-06, "loss": 0.73156223, "memory(GiB)": 28.47, "step": 8865, "train_speed(iter/s)": 0.432837 }, { "acc": 0.83110924, "epoch": 0.2401646224243901, "grad_norm": 8.278010368347168, "learning_rate": 9.997245486843182e-06, "loss": 0.85620661, "memory(GiB)": 28.47, "step": 8870, "train_speed(iter/s)": 0.432839 }, { "acc": 0.83646431, "epoch": 0.24030000270760565, "grad_norm": 6.159977436065674, "learning_rate": 9.997226883326421e-06, "loss": 0.80884819, "memory(GiB)": 28.47, "step": 8875, "train_speed(iter/s)": 0.43284 }, { "acc": 0.83874006, "epoch": 0.2404353829908212, "grad_norm": 10.8474760055542, "learning_rate": 9.997208217215838e-06, "loss": 0.87004108, "memory(GiB)": 28.47, "step": 8880, "train_speed(iter/s)": 0.43284 }, { "acc": 0.82743244, "epoch": 0.24057076327403676, "grad_norm": 7.073341369628906, "learning_rate": 9.997189488511664e-06, "loss": 0.81090822, "memory(GiB)": 28.47, "step": 8885, "train_speed(iter/s)": 0.432841 }, { "acc": 0.84619751, "epoch": 0.2407061435572523, "grad_norm": 8.856756210327148, "learning_rate": 9.997170697214136e-06, "loss": 0.72628746, "memory(GiB)": 28.47, "step": 8890, "train_speed(iter/s)": 0.432842 }, { "acc": 0.84813614, "epoch": 0.24084152384046786, "grad_norm": 9.777007102966309, "learning_rate": 9.997151843323491e-06, "loss": 0.78935752, "memory(GiB)": 28.47, "step": 8895, "train_speed(iter/s)": 0.432843 }, { "acc": 0.86361771, "epoch": 0.24097690412368342, "grad_norm": 13.949838638305664, "learning_rate": 9.997132926839962e-06, "loss": 0.65124998, "memory(GiB)": 28.47, "step": 8900, "train_speed(iter/s)": 0.432843 }, { "acc": 0.84845428, "epoch": 0.24111228440689897, "grad_norm": 20.66562843322754, "learning_rate": 9.997113947763788e-06, "loss": 0.82981472, "memory(GiB)": 28.47, "step": 8905, "train_speed(iter/s)": 0.432842 }, { "acc": 0.8242754, "epoch": 0.24124766469011452, "grad_norm": 8.034099578857422, "learning_rate": 9.997094906095204e-06, "loss": 0.88384857, "memory(GiB)": 28.47, "step": 8910, "train_speed(iter/s)": 0.432844 }, { "acc": 0.80560484, "epoch": 0.24138304497333007, "grad_norm": 11.921720504760742, "learning_rate": 9.997075801834452e-06, "loss": 0.992204, "memory(GiB)": 28.47, "step": 8915, "train_speed(iter/s)": 0.432845 }, { "acc": 0.83722515, "epoch": 0.24151842525654563, "grad_norm": 12.9072847366333, "learning_rate": 9.997056634981768e-06, "loss": 0.72106009, "memory(GiB)": 28.47, "step": 8920, "train_speed(iter/s)": 0.432847 }, { "acc": 0.85670376, "epoch": 0.24165380553976118, "grad_norm": 9.292198181152344, "learning_rate": 9.997037405537397e-06, "loss": 0.70801768, "memory(GiB)": 28.47, "step": 8925, "train_speed(iter/s)": 0.432848 }, { "acc": 0.85031796, "epoch": 0.24178918582297673, "grad_norm": 10.87301254272461, "learning_rate": 9.997018113501575e-06, "loss": 0.76559563, "memory(GiB)": 28.47, "step": 8930, "train_speed(iter/s)": 0.432847 }, { "acc": 0.83073912, "epoch": 0.24192456610619228, "grad_norm": 10.922086715698242, "learning_rate": 9.996998758874545e-06, "loss": 0.91092415, "memory(GiB)": 28.47, "step": 8935, "train_speed(iter/s)": 0.432846 }, { "acc": 0.83393021, "epoch": 0.24205994638940784, "grad_norm": 11.984174728393555, "learning_rate": 9.99697934165655e-06, "loss": 0.89133215, "memory(GiB)": 28.47, "step": 8940, "train_speed(iter/s)": 0.432848 }, { "acc": 0.83100109, "epoch": 0.2421953266726234, "grad_norm": 13.859021186828613, "learning_rate": 9.996959861847835e-06, "loss": 0.85237064, "memory(GiB)": 28.47, "step": 8945, "train_speed(iter/s)": 0.432848 }, { "acc": 0.82108221, "epoch": 0.24233070695583894, "grad_norm": 9.91993522644043, "learning_rate": 9.996940319448642e-06, "loss": 0.96026249, "memory(GiB)": 28.47, "step": 8950, "train_speed(iter/s)": 0.432848 }, { "acc": 0.86191111, "epoch": 0.2424660872390545, "grad_norm": 7.943027496337891, "learning_rate": 9.996920714459213e-06, "loss": 0.66942124, "memory(GiB)": 28.47, "step": 8955, "train_speed(iter/s)": 0.432849 }, { "acc": 0.84343567, "epoch": 0.24260146752227005, "grad_norm": 7.018692493438721, "learning_rate": 9.9969010468798e-06, "loss": 0.87830524, "memory(GiB)": 28.47, "step": 8960, "train_speed(iter/s)": 0.432848 }, { "acc": 0.85687923, "epoch": 0.2427368478054856, "grad_norm": 10.1292142868042, "learning_rate": 9.996881316710644e-06, "loss": 0.76273584, "memory(GiB)": 28.47, "step": 8965, "train_speed(iter/s)": 0.432849 }, { "acc": 0.86046295, "epoch": 0.24287222808870115, "grad_norm": 17.724672317504883, "learning_rate": 9.996861523951997e-06, "loss": 0.74219751, "memory(GiB)": 28.47, "step": 8970, "train_speed(iter/s)": 0.432851 }, { "acc": 0.83989544, "epoch": 0.2430076083719167, "grad_norm": 9.096482276916504, "learning_rate": 9.996841668604101e-06, "loss": 0.84671822, "memory(GiB)": 28.47, "step": 8975, "train_speed(iter/s)": 0.432851 }, { "acc": 0.84554462, "epoch": 0.24314298865513226, "grad_norm": 22.912952423095703, "learning_rate": 9.996821750667208e-06, "loss": 0.72615485, "memory(GiB)": 28.47, "step": 8980, "train_speed(iter/s)": 0.432853 }, { "acc": 0.83490124, "epoch": 0.2432783689383478, "grad_norm": 9.243379592895508, "learning_rate": 9.996801770141568e-06, "loss": 0.87572241, "memory(GiB)": 28.47, "step": 8985, "train_speed(iter/s)": 0.432854 }, { "acc": 0.83799334, "epoch": 0.24341374922156336, "grad_norm": 9.736285209655762, "learning_rate": 9.996781727027432e-06, "loss": 0.80839167, "memory(GiB)": 28.47, "step": 8990, "train_speed(iter/s)": 0.432849 }, { "acc": 0.83934212, "epoch": 0.24354912950477892, "grad_norm": 10.080693244934082, "learning_rate": 9.996761621325047e-06, "loss": 0.80272789, "memory(GiB)": 28.47, "step": 8995, "train_speed(iter/s)": 0.43285 }, { "acc": 0.8406168, "epoch": 0.24368450978799447, "grad_norm": 7.583650588989258, "learning_rate": 9.996741453034668e-06, "loss": 0.86249266, "memory(GiB)": 28.47, "step": 9000, "train_speed(iter/s)": 0.432852 }, { "acc": 0.83847399, "epoch": 0.24381989007121002, "grad_norm": 11.407156944274902, "learning_rate": 9.996721222156548e-06, "loss": 0.77038736, "memory(GiB)": 28.47, "step": 9005, "train_speed(iter/s)": 0.432853 }, { "acc": 0.83079853, "epoch": 0.24395527035442557, "grad_norm": 8.415976524353027, "learning_rate": 9.99670092869094e-06, "loss": 0.81252079, "memory(GiB)": 28.47, "step": 9010, "train_speed(iter/s)": 0.432853 }, { "acc": 0.83356619, "epoch": 0.24409065063764113, "grad_norm": 15.671250343322754, "learning_rate": 9.996680572638098e-06, "loss": 0.88562832, "memory(GiB)": 28.47, "step": 9015, "train_speed(iter/s)": 0.432855 }, { "acc": 0.82116241, "epoch": 0.24422603092085668, "grad_norm": 8.076355934143066, "learning_rate": 9.996660153998277e-06, "loss": 0.87242699, "memory(GiB)": 28.47, "step": 9020, "train_speed(iter/s)": 0.432855 }, { "acc": 0.84157286, "epoch": 0.24436141120407223, "grad_norm": 6.982852458953857, "learning_rate": 9.996639672771733e-06, "loss": 0.76972337, "memory(GiB)": 28.47, "step": 9025, "train_speed(iter/s)": 0.432856 }, { "acc": 0.83258457, "epoch": 0.24449679148728778, "grad_norm": 6.611621379852295, "learning_rate": 9.996619128958719e-06, "loss": 0.84278803, "memory(GiB)": 28.47, "step": 9030, "train_speed(iter/s)": 0.432858 }, { "acc": 0.84620647, "epoch": 0.24463217177050334, "grad_norm": 18.633386611938477, "learning_rate": 9.996598522559499e-06, "loss": 0.80635986, "memory(GiB)": 28.47, "step": 9035, "train_speed(iter/s)": 0.432858 }, { "acc": 0.83671646, "epoch": 0.2447675520537189, "grad_norm": 24.495771408081055, "learning_rate": 9.996577853574326e-06, "loss": 0.92017517, "memory(GiB)": 28.47, "step": 9040, "train_speed(iter/s)": 0.432858 }, { "acc": 0.82810011, "epoch": 0.24490293233693444, "grad_norm": 18.123952865600586, "learning_rate": 9.99655712200346e-06, "loss": 0.96335487, "memory(GiB)": 28.47, "step": 9045, "train_speed(iter/s)": 0.432859 }, { "acc": 0.85768795, "epoch": 0.24503831262015, "grad_norm": 7.925358295440674, "learning_rate": 9.996536327847163e-06, "loss": 0.68906198, "memory(GiB)": 28.47, "step": 9050, "train_speed(iter/s)": 0.43286 }, { "acc": 0.84503059, "epoch": 0.24517369290336555, "grad_norm": 9.020931243896484, "learning_rate": 9.996515471105694e-06, "loss": 0.7735014, "memory(GiB)": 28.47, "step": 9055, "train_speed(iter/s)": 0.432862 }, { "acc": 0.83489237, "epoch": 0.2453090731865811, "grad_norm": 8.641921997070312, "learning_rate": 9.996494551779312e-06, "loss": 0.9036869, "memory(GiB)": 28.47, "step": 9060, "train_speed(iter/s)": 0.432863 }, { "acc": 0.84842968, "epoch": 0.24544445346979665, "grad_norm": 8.315512657165527, "learning_rate": 9.996473569868283e-06, "loss": 0.7579504, "memory(GiB)": 28.47, "step": 9065, "train_speed(iter/s)": 0.432863 }, { "acc": 0.85101624, "epoch": 0.2455798337530122, "grad_norm": 6.708447456359863, "learning_rate": 9.996452525372867e-06, "loss": 0.7846714, "memory(GiB)": 28.47, "step": 9070, "train_speed(iter/s)": 0.432865 }, { "acc": 0.82795935, "epoch": 0.24571521403622776, "grad_norm": 18.32210922241211, "learning_rate": 9.99643141829333e-06, "loss": 0.87525578, "memory(GiB)": 28.47, "step": 9075, "train_speed(iter/s)": 0.432864 }, { "acc": 0.83076191, "epoch": 0.2458505943194433, "grad_norm": 20.11400604248047, "learning_rate": 9.996410248629934e-06, "loss": 0.81694536, "memory(GiB)": 28.47, "step": 9080, "train_speed(iter/s)": 0.432865 }, { "acc": 0.85000553, "epoch": 0.24598597460265886, "grad_norm": 15.155071258544922, "learning_rate": 9.996389016382945e-06, "loss": 0.7455349, "memory(GiB)": 28.47, "step": 9085, "train_speed(iter/s)": 0.432866 }, { "acc": 0.83207378, "epoch": 0.24612135488587442, "grad_norm": 8.684244155883789, "learning_rate": 9.996367721552629e-06, "loss": 0.9621253, "memory(GiB)": 28.47, "step": 9090, "train_speed(iter/s)": 0.432867 }, { "acc": 0.85986605, "epoch": 0.24625673516908997, "grad_norm": 13.997823715209961, "learning_rate": 9.996346364139253e-06, "loss": 0.66711216, "memory(GiB)": 28.47, "step": 9095, "train_speed(iter/s)": 0.432867 }, { "acc": 0.85809355, "epoch": 0.24639211545230552, "grad_norm": 15.112324714660645, "learning_rate": 9.996324944143084e-06, "loss": 0.76933885, "memory(GiB)": 28.47, "step": 9100, "train_speed(iter/s)": 0.432867 }, { "acc": 0.83305492, "epoch": 0.24652749573552107, "grad_norm": 11.674927711486816, "learning_rate": 9.996303461564393e-06, "loss": 0.7411685, "memory(GiB)": 28.47, "step": 9105, "train_speed(iter/s)": 0.432865 }, { "acc": 0.86138573, "epoch": 0.24666287601873663, "grad_norm": 5.928935527801514, "learning_rate": 9.996281916403444e-06, "loss": 0.69156265, "memory(GiB)": 28.47, "step": 9110, "train_speed(iter/s)": 0.432865 }, { "acc": 0.84011288, "epoch": 0.24679825630195218, "grad_norm": 9.421463012695312, "learning_rate": 9.996260308660512e-06, "loss": 0.83205957, "memory(GiB)": 28.47, "step": 9115, "train_speed(iter/s)": 0.432866 }, { "acc": 0.79934244, "epoch": 0.24693363658516773, "grad_norm": 8.139548301696777, "learning_rate": 9.996238638335865e-06, "loss": 1.0971199, "memory(GiB)": 28.47, "step": 9120, "train_speed(iter/s)": 0.432867 }, { "acc": 0.86689959, "epoch": 0.24706901686838328, "grad_norm": 15.069989204406738, "learning_rate": 9.996216905429773e-06, "loss": 0.63262548, "memory(GiB)": 28.47, "step": 9125, "train_speed(iter/s)": 0.432869 }, { "acc": 0.83041019, "epoch": 0.24720439715159884, "grad_norm": 13.062505722045898, "learning_rate": 9.996195109942514e-06, "loss": 0.77320437, "memory(GiB)": 28.47, "step": 9130, "train_speed(iter/s)": 0.432869 }, { "acc": 0.83022499, "epoch": 0.2473397774348144, "grad_norm": 9.129217147827148, "learning_rate": 9.996173251874353e-06, "loss": 0.80343094, "memory(GiB)": 28.47, "step": 9135, "train_speed(iter/s)": 0.43287 }, { "acc": 0.82722683, "epoch": 0.24747515771802994, "grad_norm": 12.66303825378418, "learning_rate": 9.996151331225572e-06, "loss": 0.90289488, "memory(GiB)": 28.47, "step": 9140, "train_speed(iter/s)": 0.43287 }, { "acc": 0.84945965, "epoch": 0.2476105380012455, "grad_norm": 9.270899772644043, "learning_rate": 9.996129347996439e-06, "loss": 0.7636827, "memory(GiB)": 28.47, "step": 9145, "train_speed(iter/s)": 0.432871 }, { "acc": 0.87193804, "epoch": 0.24774591828446105, "grad_norm": 6.35015869140625, "learning_rate": 9.996107302187232e-06, "loss": 0.60394897, "memory(GiB)": 28.47, "step": 9150, "train_speed(iter/s)": 0.432872 }, { "acc": 0.81057529, "epoch": 0.2478812985676766, "grad_norm": 11.82265853881836, "learning_rate": 9.996085193798228e-06, "loss": 0.98431683, "memory(GiB)": 28.47, "step": 9155, "train_speed(iter/s)": 0.432872 }, { "acc": 0.82100658, "epoch": 0.24801667885089215, "grad_norm": 74.1897201538086, "learning_rate": 9.996063022829704e-06, "loss": 0.82365313, "memory(GiB)": 28.47, "step": 9160, "train_speed(iter/s)": 0.432872 }, { "acc": 0.8003973, "epoch": 0.2481520591341077, "grad_norm": 11.35525131225586, "learning_rate": 9.996040789281936e-06, "loss": 1.02369766, "memory(GiB)": 28.47, "step": 9165, "train_speed(iter/s)": 0.432873 }, { "acc": 0.85916595, "epoch": 0.24828743941732326, "grad_norm": 7.9248552322387695, "learning_rate": 9.996018493155202e-06, "loss": 0.65107546, "memory(GiB)": 28.47, "step": 9170, "train_speed(iter/s)": 0.432873 }, { "acc": 0.85463486, "epoch": 0.2484228197005388, "grad_norm": 8.455409049987793, "learning_rate": 9.995996134449785e-06, "loss": 0.79247856, "memory(GiB)": 28.47, "step": 9175, "train_speed(iter/s)": 0.432874 }, { "acc": 0.8493062, "epoch": 0.24855819998375436, "grad_norm": 10.55017375946045, "learning_rate": 9.995973713165961e-06, "loss": 0.70745859, "memory(GiB)": 28.47, "step": 9180, "train_speed(iter/s)": 0.432876 }, { "acc": 0.82097683, "epoch": 0.24869358026696992, "grad_norm": 10.493437767028809, "learning_rate": 9.995951229304016e-06, "loss": 0.92594738, "memory(GiB)": 28.47, "step": 9185, "train_speed(iter/s)": 0.432877 }, { "acc": 0.83645811, "epoch": 0.24882896055018547, "grad_norm": 13.908914566040039, "learning_rate": 9.995928682864225e-06, "loss": 0.70812826, "memory(GiB)": 28.47, "step": 9190, "train_speed(iter/s)": 0.432877 }, { "acc": 0.82554455, "epoch": 0.24896434083340102, "grad_norm": 6.717907428741455, "learning_rate": 9.995906073846873e-06, "loss": 0.9887394, "memory(GiB)": 28.47, "step": 9195, "train_speed(iter/s)": 0.432876 }, { "acc": 0.83453312, "epoch": 0.24909972111661657, "grad_norm": 6.4943766593933105, "learning_rate": 9.995883402252247e-06, "loss": 0.83868141, "memory(GiB)": 28.47, "step": 9200, "train_speed(iter/s)": 0.432877 }, { "acc": 0.81000156, "epoch": 0.24923510139983213, "grad_norm": 19.442480087280273, "learning_rate": 9.995860668080626e-06, "loss": 1.04077234, "memory(GiB)": 28.47, "step": 9205, "train_speed(iter/s)": 0.432877 }, { "acc": 0.84030676, "epoch": 0.24937048168304768, "grad_norm": 16.198057174682617, "learning_rate": 9.995837871332298e-06, "loss": 0.73385382, "memory(GiB)": 28.47, "step": 9210, "train_speed(iter/s)": 0.432877 }, { "acc": 0.84543476, "epoch": 0.24950586196626323, "grad_norm": 8.037443161010742, "learning_rate": 9.995815012007545e-06, "loss": 0.7471633, "memory(GiB)": 28.47, "step": 9215, "train_speed(iter/s)": 0.432879 }, { "acc": 0.84522295, "epoch": 0.24964124224947878, "grad_norm": 10.490013122558594, "learning_rate": 9.995792090106658e-06, "loss": 0.79833603, "memory(GiB)": 28.47, "step": 9220, "train_speed(iter/s)": 0.43288 }, { "acc": 0.82272415, "epoch": 0.24977662253269434, "grad_norm": 9.984091758728027, "learning_rate": 9.995769105629922e-06, "loss": 0.87212391, "memory(GiB)": 28.47, "step": 9225, "train_speed(iter/s)": 0.43288 }, { "acc": 0.83379631, "epoch": 0.2499120028159099, "grad_norm": 6.4929609298706055, "learning_rate": 9.995746058577623e-06, "loss": 0.87410011, "memory(GiB)": 28.47, "step": 9230, "train_speed(iter/s)": 0.432879 }, { "acc": 0.858778, "epoch": 0.25004738309912544, "grad_norm": 10.711402893066406, "learning_rate": 9.995722948950052e-06, "loss": 0.71781702, "memory(GiB)": 28.47, "step": 9235, "train_speed(iter/s)": 0.432881 }, { "acc": 0.84864197, "epoch": 0.250182763382341, "grad_norm": 8.633685111999512, "learning_rate": 9.995699776747499e-06, "loss": 0.76503801, "memory(GiB)": 28.47, "step": 9240, "train_speed(iter/s)": 0.432882 }, { "acc": 0.86370964, "epoch": 0.25031814366555655, "grad_norm": 7.675531387329102, "learning_rate": 9.995676541970254e-06, "loss": 0.64562521, "memory(GiB)": 28.47, "step": 9245, "train_speed(iter/s)": 0.432883 }, { "acc": 0.81933956, "epoch": 0.2504535239487721, "grad_norm": 17.58249855041504, "learning_rate": 9.995653244618603e-06, "loss": 0.96274023, "memory(GiB)": 28.47, "step": 9250, "train_speed(iter/s)": 0.432883 }, { "acc": 0.82816792, "epoch": 0.25058890423198765, "grad_norm": 5.963950157165527, "learning_rate": 9.995629884692848e-06, "loss": 0.85869446, "memory(GiB)": 28.47, "step": 9255, "train_speed(iter/s)": 0.432883 }, { "acc": 0.84604645, "epoch": 0.25072428451520323, "grad_norm": 12.330341339111328, "learning_rate": 9.995606462193271e-06, "loss": 0.75865769, "memory(GiB)": 28.47, "step": 9260, "train_speed(iter/s)": 0.432883 }, { "acc": 0.86316357, "epoch": 0.25085966479841876, "grad_norm": 10.418169021606445, "learning_rate": 9.995582977120173e-06, "loss": 0.58162451, "memory(GiB)": 28.47, "step": 9265, "train_speed(iter/s)": 0.432885 }, { "acc": 0.8500535, "epoch": 0.25099504508163434, "grad_norm": 7.944705486297607, "learning_rate": 9.995559429473844e-06, "loss": 0.66566877, "memory(GiB)": 28.47, "step": 9270, "train_speed(iter/s)": 0.432884 }, { "acc": 0.8734129, "epoch": 0.25113042536484986, "grad_norm": 13.719301223754883, "learning_rate": 9.995535819254582e-06, "loss": 0.68324623, "memory(GiB)": 28.47, "step": 9275, "train_speed(iter/s)": 0.432886 }, { "acc": 0.82133007, "epoch": 0.25126580564806544, "grad_norm": 15.109112739562988, "learning_rate": 9.99551214646268e-06, "loss": 0.84186993, "memory(GiB)": 28.47, "step": 9280, "train_speed(iter/s)": 0.432886 }, { "acc": 0.86248646, "epoch": 0.25140118593128097, "grad_norm": 14.209095001220703, "learning_rate": 9.995488411098434e-06, "loss": 0.6771318, "memory(GiB)": 28.47, "step": 9285, "train_speed(iter/s)": 0.432886 }, { "acc": 0.83797426, "epoch": 0.2515365662144965, "grad_norm": 6.965208530426025, "learning_rate": 9.995464613162146e-06, "loss": 0.74522266, "memory(GiB)": 28.47, "step": 9290, "train_speed(iter/s)": 0.432887 }, { "acc": 0.83597584, "epoch": 0.2516719464977121, "grad_norm": 7.695929050445557, "learning_rate": 9.995440752654109e-06, "loss": 0.72449474, "memory(GiB)": 28.47, "step": 9295, "train_speed(iter/s)": 0.432888 }, { "acc": 0.81698933, "epoch": 0.2518073267809276, "grad_norm": 7.931333541870117, "learning_rate": 9.995416829574624e-06, "loss": 0.99008389, "memory(GiB)": 28.47, "step": 9300, "train_speed(iter/s)": 0.43289 }, { "acc": 0.82363205, "epoch": 0.2519427070641432, "grad_norm": 11.806032180786133, "learning_rate": 9.995392843923991e-06, "loss": 0.84475632, "memory(GiB)": 28.47, "step": 9305, "train_speed(iter/s)": 0.43289 }, { "acc": 0.8802639, "epoch": 0.2520780873473587, "grad_norm": 7.934165954589844, "learning_rate": 9.995368795702512e-06, "loss": 0.59023662, "memory(GiB)": 28.47, "step": 9310, "train_speed(iter/s)": 0.432886 }, { "acc": 0.86580505, "epoch": 0.2522134676305743, "grad_norm": 6.114723205566406, "learning_rate": 9.995344684910486e-06, "loss": 0.53184776, "memory(GiB)": 28.47, "step": 9315, "train_speed(iter/s)": 0.432888 }, { "acc": 0.83830242, "epoch": 0.2523488479137898, "grad_norm": 9.22684097290039, "learning_rate": 9.995320511548213e-06, "loss": 0.81605663, "memory(GiB)": 28.47, "step": 9320, "train_speed(iter/s)": 0.432888 }, { "acc": 0.85479527, "epoch": 0.2524842281970054, "grad_norm": 5.319605827331543, "learning_rate": 9.995296275616001e-06, "loss": 0.7361093, "memory(GiB)": 28.47, "step": 9325, "train_speed(iter/s)": 0.432889 }, { "acc": 0.8632926, "epoch": 0.2526196084802209, "grad_norm": 8.107354164123535, "learning_rate": 9.99527197711415e-06, "loss": 0.70820503, "memory(GiB)": 28.47, "step": 9330, "train_speed(iter/s)": 0.432888 }, { "acc": 0.84884624, "epoch": 0.2527549887634365, "grad_norm": 9.802459716796875, "learning_rate": 9.995247616042965e-06, "loss": 0.78434849, "memory(GiB)": 28.47, "step": 9335, "train_speed(iter/s)": 0.432889 }, { "acc": 0.82666721, "epoch": 0.252890369046652, "grad_norm": 19.35904884338379, "learning_rate": 9.995223192402751e-06, "loss": 0.9441885, "memory(GiB)": 28.47, "step": 9340, "train_speed(iter/s)": 0.432888 }, { "acc": 0.84362602, "epoch": 0.2530257493298676, "grad_norm": 12.487250328063965, "learning_rate": 9.995198706193814e-06, "loss": 0.77032533, "memory(GiB)": 28.47, "step": 9345, "train_speed(iter/s)": 0.432889 }, { "acc": 0.85204563, "epoch": 0.2531611296130831, "grad_norm": 9.791399002075195, "learning_rate": 9.995174157416463e-06, "loss": 0.81288815, "memory(GiB)": 28.47, "step": 9350, "train_speed(iter/s)": 0.43289 }, { "acc": 0.82806416, "epoch": 0.2532965098962987, "grad_norm": 7.724008083343506, "learning_rate": 9.995149546071004e-06, "loss": 0.94785995, "memory(GiB)": 28.47, "step": 9355, "train_speed(iter/s)": 0.432891 }, { "acc": 0.84476261, "epoch": 0.25343189017951423, "grad_norm": 9.17418384552002, "learning_rate": 9.995124872157743e-06, "loss": 0.79732571, "memory(GiB)": 28.47, "step": 9360, "train_speed(iter/s)": 0.432892 }, { "acc": 0.84225216, "epoch": 0.2535672704627298, "grad_norm": 9.326857566833496, "learning_rate": 9.99510013567699e-06, "loss": 0.81094074, "memory(GiB)": 28.47, "step": 9365, "train_speed(iter/s)": 0.432892 }, { "acc": 0.8428772, "epoch": 0.25370265074594534, "grad_norm": 8.093037605285645, "learning_rate": 9.995075336629059e-06, "loss": 0.72157822, "memory(GiB)": 28.47, "step": 9370, "train_speed(iter/s)": 0.432893 }, { "acc": 0.84423141, "epoch": 0.2538380310291609, "grad_norm": 12.602751731872559, "learning_rate": 9.995050475014256e-06, "loss": 0.81178589, "memory(GiB)": 28.47, "step": 9375, "train_speed(iter/s)": 0.432889 }, { "acc": 0.85508404, "epoch": 0.25397341131237644, "grad_norm": 7.560980796813965, "learning_rate": 9.995025550832895e-06, "loss": 0.71955795, "memory(GiB)": 28.47, "step": 9380, "train_speed(iter/s)": 0.432889 }, { "acc": 0.81788311, "epoch": 0.254108791595592, "grad_norm": 9.462220191955566, "learning_rate": 9.995000564085286e-06, "loss": 0.9666914, "memory(GiB)": 28.47, "step": 9385, "train_speed(iter/s)": 0.432888 }, { "acc": 0.83430004, "epoch": 0.25424417187880755, "grad_norm": 10.132489204406738, "learning_rate": 9.994975514771744e-06, "loss": 0.8849781, "memory(GiB)": 28.47, "step": 9390, "train_speed(iter/s)": 0.432884 }, { "acc": 0.84711313, "epoch": 0.2543795521620231, "grad_norm": 8.586897850036621, "learning_rate": 9.994950402892582e-06, "loss": 0.80241375, "memory(GiB)": 28.47, "step": 9395, "train_speed(iter/s)": 0.43288 }, { "acc": 0.83201733, "epoch": 0.25451493244523865, "grad_norm": 4.313867568969727, "learning_rate": 9.994925228448113e-06, "loss": 0.82490578, "memory(GiB)": 28.47, "step": 9400, "train_speed(iter/s)": 0.432875 }, { "acc": 0.82409716, "epoch": 0.25465031272845423, "grad_norm": 7.4447736740112305, "learning_rate": 9.994899991438657e-06, "loss": 0.88802176, "memory(GiB)": 28.47, "step": 9405, "train_speed(iter/s)": 0.432872 }, { "acc": 0.84738197, "epoch": 0.25478569301166976, "grad_norm": 32.42693328857422, "learning_rate": 9.994874691864525e-06, "loss": 0.82642298, "memory(GiB)": 28.47, "step": 9410, "train_speed(iter/s)": 0.432865 }, { "acc": 0.84521637, "epoch": 0.25492107329488534, "grad_norm": 10.976373672485352, "learning_rate": 9.994849329726037e-06, "loss": 0.76767244, "memory(GiB)": 28.47, "step": 9415, "train_speed(iter/s)": 0.432861 }, { "acc": 0.84585018, "epoch": 0.25505645357810086, "grad_norm": 11.702845573425293, "learning_rate": 9.99482390502351e-06, "loss": 0.78296103, "memory(GiB)": 28.47, "step": 9420, "train_speed(iter/s)": 0.432857 }, { "acc": 0.82108116, "epoch": 0.25519183386131644, "grad_norm": 10.643749237060547, "learning_rate": 9.99479841775726e-06, "loss": 0.98755674, "memory(GiB)": 28.47, "step": 9425, "train_speed(iter/s)": 0.432854 }, { "acc": 0.83175907, "epoch": 0.25532721414453197, "grad_norm": 19.511791229248047, "learning_rate": 9.994772867927612e-06, "loss": 0.91962814, "memory(GiB)": 28.47, "step": 9430, "train_speed(iter/s)": 0.432851 }, { "acc": 0.82661076, "epoch": 0.25546259442774755, "grad_norm": 11.095688819885254, "learning_rate": 9.99474725553488e-06, "loss": 0.83563328, "memory(GiB)": 28.47, "step": 9435, "train_speed(iter/s)": 0.432849 }, { "acc": 0.83939972, "epoch": 0.25559797471096307, "grad_norm": 8.985230445861816, "learning_rate": 9.994721580579388e-06, "loss": 0.87869091, "memory(GiB)": 28.47, "step": 9440, "train_speed(iter/s)": 0.432847 }, { "acc": 0.83047409, "epoch": 0.25573335499417865, "grad_norm": 7.8643903732299805, "learning_rate": 9.994695843061458e-06, "loss": 0.86930523, "memory(GiB)": 28.47, "step": 9445, "train_speed(iter/s)": 0.432847 }, { "acc": 0.8317585, "epoch": 0.2558687352773942, "grad_norm": 13.772309303283691, "learning_rate": 9.994670042981412e-06, "loss": 0.8806695, "memory(GiB)": 28.47, "step": 9450, "train_speed(iter/s)": 0.432849 }, { "acc": 0.83531322, "epoch": 0.25600411556060976, "grad_norm": 6.16581392288208, "learning_rate": 9.99464418033957e-06, "loss": 0.71674376, "memory(GiB)": 28.47, "step": 9455, "train_speed(iter/s)": 0.432848 }, { "acc": 0.83115101, "epoch": 0.2561394958438253, "grad_norm": 4.997987747192383, "learning_rate": 9.99461825513626e-06, "loss": 0.88436069, "memory(GiB)": 28.47, "step": 9460, "train_speed(iter/s)": 0.432849 }, { "acc": 0.83739853, "epoch": 0.25627487612704086, "grad_norm": 8.890103340148926, "learning_rate": 9.994592267371805e-06, "loss": 0.83388529, "memory(GiB)": 28.47, "step": 9465, "train_speed(iter/s)": 0.432851 }, { "acc": 0.83548441, "epoch": 0.2564102564102564, "grad_norm": 9.27535343170166, "learning_rate": 9.994566217046532e-06, "loss": 0.78441324, "memory(GiB)": 28.47, "step": 9470, "train_speed(iter/s)": 0.432852 }, { "acc": 0.85758982, "epoch": 0.25654563669347197, "grad_norm": 5.778042793273926, "learning_rate": 9.994540104160766e-06, "loss": 0.70277185, "memory(GiB)": 28.47, "step": 9475, "train_speed(iter/s)": 0.432853 }, { "acc": 0.84010725, "epoch": 0.2566810169766875, "grad_norm": 8.258101463317871, "learning_rate": 9.994513928714834e-06, "loss": 0.72636619, "memory(GiB)": 28.47, "step": 9480, "train_speed(iter/s)": 0.432853 }, { "acc": 0.8585372, "epoch": 0.2568163972599031, "grad_norm": 7.7577619552612305, "learning_rate": 9.994487690709066e-06, "loss": 0.58949237, "memory(GiB)": 28.47, "step": 9485, "train_speed(iter/s)": 0.432853 }, { "acc": 0.85282726, "epoch": 0.2569517775431186, "grad_norm": 5.5778985023498535, "learning_rate": 9.994461390143788e-06, "loss": 0.74357285, "memory(GiB)": 28.47, "step": 9490, "train_speed(iter/s)": 0.432853 }, { "acc": 0.8583415, "epoch": 0.2570871578263342, "grad_norm": 7.523184299468994, "learning_rate": 9.99443502701933e-06, "loss": 0.70260754, "memory(GiB)": 28.47, "step": 9495, "train_speed(iter/s)": 0.432854 }, { "acc": 0.85689688, "epoch": 0.2572225381095497, "grad_norm": 10.752668380737305, "learning_rate": 9.994408601336022e-06, "loss": 0.66567249, "memory(GiB)": 28.47, "step": 9500, "train_speed(iter/s)": 0.432855 }, { "acc": 0.84073067, "epoch": 0.2573579183927653, "grad_norm": 9.981781005859375, "learning_rate": 9.994382113094197e-06, "loss": 0.78554258, "memory(GiB)": 28.47, "step": 9505, "train_speed(iter/s)": 0.432856 }, { "acc": 0.78738794, "epoch": 0.2574932986759808, "grad_norm": 8.042664527893066, "learning_rate": 9.994355562294186e-06, "loss": 1.04120836, "memory(GiB)": 28.47, "step": 9510, "train_speed(iter/s)": 0.432853 }, { "acc": 0.84598856, "epoch": 0.2576286789591964, "grad_norm": 6.820955753326416, "learning_rate": 9.99432894893632e-06, "loss": 0.82488079, "memory(GiB)": 28.47, "step": 9515, "train_speed(iter/s)": 0.432853 }, { "acc": 0.8294157, "epoch": 0.2577640592424119, "grad_norm": 8.3377046585083, "learning_rate": 9.994302273020932e-06, "loss": 0.92587271, "memory(GiB)": 28.47, "step": 9520, "train_speed(iter/s)": 0.432853 }, { "acc": 0.85395956, "epoch": 0.2578994395256275, "grad_norm": 9.153865814208984, "learning_rate": 9.994275534548361e-06, "loss": 0.80362749, "memory(GiB)": 28.47, "step": 9525, "train_speed(iter/s)": 0.432854 }, { "acc": 0.84622154, "epoch": 0.258034819808843, "grad_norm": 14.510088920593262, "learning_rate": 9.994248733518938e-06, "loss": 0.75623708, "memory(GiB)": 28.47, "step": 9530, "train_speed(iter/s)": 0.432856 }, { "acc": 0.84567633, "epoch": 0.2581702000920586, "grad_norm": 18.65243148803711, "learning_rate": 9.994221869932997e-06, "loss": 0.71455374, "memory(GiB)": 28.47, "step": 9535, "train_speed(iter/s)": 0.432858 }, { "acc": 0.80671539, "epoch": 0.2583055803752741, "grad_norm": 16.972837448120117, "learning_rate": 9.99419494379088e-06, "loss": 0.94687796, "memory(GiB)": 28.47, "step": 9540, "train_speed(iter/s)": 0.432858 }, { "acc": 0.83393822, "epoch": 0.2584409606584897, "grad_norm": 11.319768905639648, "learning_rate": 9.99416795509292e-06, "loss": 0.82715988, "memory(GiB)": 28.47, "step": 9545, "train_speed(iter/s)": 0.432859 }, { "acc": 0.81326237, "epoch": 0.25857634094170523, "grad_norm": 7.772289276123047, "learning_rate": 9.994140903839455e-06, "loss": 0.85318375, "memory(GiB)": 28.47, "step": 9550, "train_speed(iter/s)": 0.432861 }, { "acc": 0.82283859, "epoch": 0.2587117212249208, "grad_norm": 11.35995864868164, "learning_rate": 9.994113790030827e-06, "loss": 0.9102644, "memory(GiB)": 28.47, "step": 9555, "train_speed(iter/s)": 0.432861 }, { "acc": 0.83964434, "epoch": 0.25884710150813633, "grad_norm": 16.058393478393555, "learning_rate": 9.994086613667372e-06, "loss": 0.70890217, "memory(GiB)": 28.47, "step": 9560, "train_speed(iter/s)": 0.432862 }, { "acc": 0.83596821, "epoch": 0.2589824817913519, "grad_norm": 9.319514274597168, "learning_rate": 9.994059374749435e-06, "loss": 0.810606, "memory(GiB)": 28.47, "step": 9565, "train_speed(iter/s)": 0.432863 }, { "acc": 0.84712601, "epoch": 0.25911786207456744, "grad_norm": 19.88065528869629, "learning_rate": 9.994032073277353e-06, "loss": 0.77849493, "memory(GiB)": 28.47, "step": 9570, "train_speed(iter/s)": 0.432863 }, { "acc": 0.83753004, "epoch": 0.259253242357783, "grad_norm": 9.969686508178711, "learning_rate": 9.99400470925147e-06, "loss": 0.88768559, "memory(GiB)": 28.47, "step": 9575, "train_speed(iter/s)": 0.432864 }, { "acc": 0.84848938, "epoch": 0.25938862264099855, "grad_norm": 16.666912078857422, "learning_rate": 9.993977282672128e-06, "loss": 0.88535547, "memory(GiB)": 28.47, "step": 9580, "train_speed(iter/s)": 0.432862 }, { "acc": 0.84900866, "epoch": 0.2595240029242141, "grad_norm": 9.825153350830078, "learning_rate": 9.99394979353967e-06, "loss": 0.75267086, "memory(GiB)": 28.47, "step": 9585, "train_speed(iter/s)": 0.432863 }, { "acc": 0.8552187, "epoch": 0.25965938320742965, "grad_norm": 8.103316307067871, "learning_rate": 9.993922241854442e-06, "loss": 0.6297883, "memory(GiB)": 28.47, "step": 9590, "train_speed(iter/s)": 0.432864 }, { "acc": 0.80937128, "epoch": 0.25979476349064523, "grad_norm": 7.862823486328125, "learning_rate": 9.993894627616788e-06, "loss": 0.99934826, "memory(GiB)": 28.47, "step": 9595, "train_speed(iter/s)": 0.432863 }, { "acc": 0.84374666, "epoch": 0.25993014377386076, "grad_norm": 6.160345077514648, "learning_rate": 9.993866950827056e-06, "loss": 0.8294466, "memory(GiB)": 28.47, "step": 9600, "train_speed(iter/s)": 0.432864 }, { "acc": 0.85896473, "epoch": 0.26006552405707634, "grad_norm": 4.390780448913574, "learning_rate": 9.993839211485588e-06, "loss": 0.72348394, "memory(GiB)": 28.47, "step": 9605, "train_speed(iter/s)": 0.432863 }, { "acc": 0.84103785, "epoch": 0.26020090434029186, "grad_norm": 14.911860466003418, "learning_rate": 9.993811409592737e-06, "loss": 0.83972731, "memory(GiB)": 28.47, "step": 9610, "train_speed(iter/s)": 0.432863 }, { "acc": 0.83370934, "epoch": 0.26033628462350744, "grad_norm": 7.78707218170166, "learning_rate": 9.993783545148847e-06, "loss": 0.84423676, "memory(GiB)": 28.47, "step": 9615, "train_speed(iter/s)": 0.432861 }, { "acc": 0.84186468, "epoch": 0.26047166490672297, "grad_norm": 9.502845764160156, "learning_rate": 9.99375561815427e-06, "loss": 0.81475735, "memory(GiB)": 28.47, "step": 9620, "train_speed(iter/s)": 0.432861 }, { "acc": 0.84618139, "epoch": 0.26060704518993855, "grad_norm": 29.684864044189453, "learning_rate": 9.993727628609355e-06, "loss": 0.78936219, "memory(GiB)": 28.47, "step": 9625, "train_speed(iter/s)": 0.432863 }, { "acc": 0.8683548, "epoch": 0.26074242547315407, "grad_norm": 8.94357967376709, "learning_rate": 9.99369957651445e-06, "loss": 0.66911259, "memory(GiB)": 28.47, "step": 9630, "train_speed(iter/s)": 0.432863 }, { "acc": 0.85024776, "epoch": 0.26087780575636965, "grad_norm": 8.616045951843262, "learning_rate": 9.99367146186991e-06, "loss": 0.80314674, "memory(GiB)": 28.47, "step": 9635, "train_speed(iter/s)": 0.432863 }, { "acc": 0.84116335, "epoch": 0.2610131860395852, "grad_norm": 5.97199010848999, "learning_rate": 9.993643284676088e-06, "loss": 0.74692693, "memory(GiB)": 28.47, "step": 9640, "train_speed(iter/s)": 0.432864 }, { "acc": 0.8155551, "epoch": 0.26114856632280076, "grad_norm": 12.414862632751465, "learning_rate": 9.993615044933332e-06, "loss": 0.96226406, "memory(GiB)": 28.47, "step": 9645, "train_speed(iter/s)": 0.432863 }, { "acc": 0.82734499, "epoch": 0.2612839466060163, "grad_norm": 13.327662467956543, "learning_rate": 9.993586742642e-06, "loss": 0.86669655, "memory(GiB)": 28.47, "step": 9650, "train_speed(iter/s)": 0.432864 }, { "acc": 0.84419861, "epoch": 0.26141932688923186, "grad_norm": 11.167251586914062, "learning_rate": 9.993558377802445e-06, "loss": 0.76992311, "memory(GiB)": 28.47, "step": 9655, "train_speed(iter/s)": 0.432865 }, { "acc": 0.82730293, "epoch": 0.2615547071724474, "grad_norm": 8.29052734375, "learning_rate": 9.993529950415022e-06, "loss": 0.87620411, "memory(GiB)": 28.47, "step": 9660, "train_speed(iter/s)": 0.432866 }, { "acc": 0.85674858, "epoch": 0.26169008745566297, "grad_norm": 6.068314552307129, "learning_rate": 9.993501460480088e-06, "loss": 0.73030062, "memory(GiB)": 28.47, "step": 9665, "train_speed(iter/s)": 0.432866 }, { "acc": 0.83983564, "epoch": 0.2618254677388785, "grad_norm": 9.781936645507812, "learning_rate": 9.993472907998e-06, "loss": 0.82422218, "memory(GiB)": 28.47, "step": 9670, "train_speed(iter/s)": 0.432868 }, { "acc": 0.85816116, "epoch": 0.2619608480220941, "grad_norm": 8.545613288879395, "learning_rate": 9.993444292969113e-06, "loss": 0.70609212, "memory(GiB)": 28.47, "step": 9675, "train_speed(iter/s)": 0.432867 }, { "acc": 0.80514193, "epoch": 0.2620962283053096, "grad_norm": 9.182432174682617, "learning_rate": 9.99341561539379e-06, "loss": 1.05225086, "memory(GiB)": 28.47, "step": 9680, "train_speed(iter/s)": 0.432867 }, { "acc": 0.85121193, "epoch": 0.2622316085885252, "grad_norm": 8.811955451965332, "learning_rate": 9.993386875272387e-06, "loss": 0.75487118, "memory(GiB)": 28.47, "step": 9685, "train_speed(iter/s)": 0.432869 }, { "acc": 0.83802242, "epoch": 0.2623669888717407, "grad_norm": 14.721817970275879, "learning_rate": 9.993358072605265e-06, "loss": 0.82926693, "memory(GiB)": 28.47, "step": 9690, "train_speed(iter/s)": 0.432869 }, { "acc": 0.83651133, "epoch": 0.2625023691549563, "grad_norm": 12.560510635375977, "learning_rate": 9.993329207392784e-06, "loss": 0.86567564, "memory(GiB)": 28.47, "step": 9695, "train_speed(iter/s)": 0.43287 }, { "acc": 0.82162228, "epoch": 0.2626377494381718, "grad_norm": 14.302886009216309, "learning_rate": 9.993300279635308e-06, "loss": 0.96003036, "memory(GiB)": 28.47, "step": 9700, "train_speed(iter/s)": 0.43287 }, { "acc": 0.87520161, "epoch": 0.2627731297213874, "grad_norm": 5.321650505065918, "learning_rate": 9.993271289333196e-06, "loss": 0.56996984, "memory(GiB)": 28.47, "step": 9705, "train_speed(iter/s)": 0.432871 }, { "acc": 0.83040867, "epoch": 0.2629085100046029, "grad_norm": 20.914962768554688, "learning_rate": 9.993242236486812e-06, "loss": 0.88904495, "memory(GiB)": 28.47, "step": 9710, "train_speed(iter/s)": 0.432872 }, { "acc": 0.85531063, "epoch": 0.2630438902878185, "grad_norm": 14.156712532043457, "learning_rate": 9.993213121096523e-06, "loss": 0.70837736, "memory(GiB)": 28.47, "step": 9715, "train_speed(iter/s)": 0.432873 }, { "acc": 0.80469885, "epoch": 0.263179270571034, "grad_norm": 16.321935653686523, "learning_rate": 9.993183943162689e-06, "loss": 1.02882795, "memory(GiB)": 28.47, "step": 9720, "train_speed(iter/s)": 0.432872 }, { "acc": 0.86180162, "epoch": 0.2633146508542496, "grad_norm": 7.372640132904053, "learning_rate": 9.99315470268568e-06, "loss": 0.72889066, "memory(GiB)": 28.47, "step": 9725, "train_speed(iter/s)": 0.432873 }, { "acc": 0.82175455, "epoch": 0.2634500311374651, "grad_norm": 18.97603988647461, "learning_rate": 9.99312539966586e-06, "loss": 1.00384607, "memory(GiB)": 28.47, "step": 9730, "train_speed(iter/s)": 0.432874 }, { "acc": 0.83056555, "epoch": 0.2635854114206807, "grad_norm": 6.421618461608887, "learning_rate": 9.993096034103596e-06, "loss": 0.83215103, "memory(GiB)": 28.47, "step": 9735, "train_speed(iter/s)": 0.432873 }, { "acc": 0.82571564, "epoch": 0.26372079170389623, "grad_norm": 19.666234970092773, "learning_rate": 9.993066605999256e-06, "loss": 0.9186491, "memory(GiB)": 28.47, "step": 9740, "train_speed(iter/s)": 0.432873 }, { "acc": 0.84222937, "epoch": 0.2638561719871118, "grad_norm": 6.642849922180176, "learning_rate": 9.993037115353211e-06, "loss": 0.7860487, "memory(GiB)": 28.47, "step": 9745, "train_speed(iter/s)": 0.432875 }, { "acc": 0.83550415, "epoch": 0.26399155227032733, "grad_norm": 23.911237716674805, "learning_rate": 9.993007562165826e-06, "loss": 0.85004177, "memory(GiB)": 28.47, "step": 9750, "train_speed(iter/s)": 0.432875 }, { "acc": 0.83343773, "epoch": 0.2641269325535429, "grad_norm": 7.116504192352295, "learning_rate": 9.992977946437474e-06, "loss": 0.81378136, "memory(GiB)": 28.47, "step": 9755, "train_speed(iter/s)": 0.432876 }, { "acc": 0.83463249, "epoch": 0.26426231283675844, "grad_norm": 8.189997673034668, "learning_rate": 9.992948268168527e-06, "loss": 0.7948761, "memory(GiB)": 28.47, "step": 9760, "train_speed(iter/s)": 0.432875 }, { "acc": 0.83764029, "epoch": 0.264397693119974, "grad_norm": 6.365671157836914, "learning_rate": 9.992918527359355e-06, "loss": 0.74392567, "memory(GiB)": 28.47, "step": 9765, "train_speed(iter/s)": 0.432876 }, { "acc": 0.85571823, "epoch": 0.26453307340318954, "grad_norm": 10.970457077026367, "learning_rate": 9.992888724010329e-06, "loss": 0.72626171, "memory(GiB)": 28.47, "step": 9770, "train_speed(iter/s)": 0.432877 }, { "acc": 0.88276672, "epoch": 0.2646684536864051, "grad_norm": 2.799288511276245, "learning_rate": 9.992858858121824e-06, "loss": 0.59302235, "memory(GiB)": 28.47, "step": 9775, "train_speed(iter/s)": 0.432878 }, { "acc": 0.83940315, "epoch": 0.26480383396962065, "grad_norm": 14.129658699035645, "learning_rate": 9.992828929694215e-06, "loss": 0.84168797, "memory(GiB)": 28.47, "step": 9780, "train_speed(iter/s)": 0.432878 }, { "acc": 0.80385933, "epoch": 0.26493921425283623, "grad_norm": 6.63055944442749, "learning_rate": 9.992798938727877e-06, "loss": 1.01695786, "memory(GiB)": 28.47, "step": 9785, "train_speed(iter/s)": 0.432878 }, { "acc": 0.83619633, "epoch": 0.26507459453605176, "grad_norm": 9.553576469421387, "learning_rate": 9.992768885223184e-06, "loss": 0.86412201, "memory(GiB)": 28.47, "step": 9790, "train_speed(iter/s)": 0.432879 }, { "acc": 0.82173138, "epoch": 0.26520997481926734, "grad_norm": 13.347848892211914, "learning_rate": 9.992738769180515e-06, "loss": 0.91832886, "memory(GiB)": 28.47, "step": 9795, "train_speed(iter/s)": 0.432879 }, { "acc": 0.84197454, "epoch": 0.26534535510248286, "grad_norm": 11.597740173339844, "learning_rate": 9.992708590600245e-06, "loss": 0.78192177, "memory(GiB)": 28.47, "step": 9800, "train_speed(iter/s)": 0.432879 }, { "acc": 0.84875336, "epoch": 0.26548073538569844, "grad_norm": 7.017856121063232, "learning_rate": 9.992678349482751e-06, "loss": 0.82167282, "memory(GiB)": 28.47, "step": 9805, "train_speed(iter/s)": 0.432878 }, { "acc": 0.84187708, "epoch": 0.26561611566891397, "grad_norm": 11.422337532043457, "learning_rate": 9.992648045828416e-06, "loss": 0.9117094, "memory(GiB)": 28.47, "step": 9810, "train_speed(iter/s)": 0.432879 }, { "acc": 0.83022938, "epoch": 0.26575149595212955, "grad_norm": 6.668945789337158, "learning_rate": 9.992617679637617e-06, "loss": 0.8169157, "memory(GiB)": 28.47, "step": 9815, "train_speed(iter/s)": 0.43288 }, { "acc": 0.83587437, "epoch": 0.26588687623534507, "grad_norm": 8.563366889953613, "learning_rate": 9.992587250910733e-06, "loss": 0.8504096, "memory(GiB)": 28.47, "step": 9820, "train_speed(iter/s)": 0.432879 }, { "acc": 0.82649689, "epoch": 0.26602225651856065, "grad_norm": 10.546082496643066, "learning_rate": 9.992556759648147e-06, "loss": 0.88345776, "memory(GiB)": 28.47, "step": 9825, "train_speed(iter/s)": 0.432878 }, { "acc": 0.86030359, "epoch": 0.2661576368017762, "grad_norm": 10.108377456665039, "learning_rate": 9.992526205850242e-06, "loss": 0.69030857, "memory(GiB)": 28.47, "step": 9830, "train_speed(iter/s)": 0.432878 }, { "acc": 0.84377289, "epoch": 0.26629301708499176, "grad_norm": 13.75832748413086, "learning_rate": 9.992495589517399e-06, "loss": 0.76014633, "memory(GiB)": 28.47, "step": 9835, "train_speed(iter/s)": 0.432879 }, { "acc": 0.83844051, "epoch": 0.2664283973682073, "grad_norm": 7.717580318450928, "learning_rate": 9.99246491065e-06, "loss": 0.83006601, "memory(GiB)": 28.47, "step": 9840, "train_speed(iter/s)": 0.432881 }, { "acc": 0.84532194, "epoch": 0.26656377765142286, "grad_norm": 13.275409698486328, "learning_rate": 9.992434169248434e-06, "loss": 0.77443986, "memory(GiB)": 28.47, "step": 9845, "train_speed(iter/s)": 0.432882 }, { "acc": 0.84164639, "epoch": 0.2666991579346384, "grad_norm": 12.65241527557373, "learning_rate": 9.992403365313082e-06, "loss": 0.93778553, "memory(GiB)": 28.47, "step": 9850, "train_speed(iter/s)": 0.432882 }, { "acc": 0.859688, "epoch": 0.26683453821785397, "grad_norm": 12.042691230773926, "learning_rate": 9.992372498844333e-06, "loss": 0.73044162, "memory(GiB)": 28.47, "step": 9855, "train_speed(iter/s)": 0.432884 }, { "acc": 0.84293385, "epoch": 0.2669699185010695, "grad_norm": 7.693385601043701, "learning_rate": 9.99234156984257e-06, "loss": 0.70768919, "memory(GiB)": 28.47, "step": 9860, "train_speed(iter/s)": 0.432885 }, { "acc": 0.81229286, "epoch": 0.2671052987842851, "grad_norm": 7.116718292236328, "learning_rate": 9.992310578308182e-06, "loss": 0.91921968, "memory(GiB)": 28.47, "step": 9865, "train_speed(iter/s)": 0.432885 }, { "acc": 0.83154488, "epoch": 0.2672406790675006, "grad_norm": 5.968506336212158, "learning_rate": 9.99227952424156e-06, "loss": 0.9221839, "memory(GiB)": 28.47, "step": 9870, "train_speed(iter/s)": 0.432882 }, { "acc": 0.83167133, "epoch": 0.2673760593507162, "grad_norm": 14.67180347442627, "learning_rate": 9.992248407643088e-06, "loss": 0.88027496, "memory(GiB)": 28.47, "step": 9875, "train_speed(iter/s)": 0.432883 }, { "acc": 0.86040592, "epoch": 0.2675114396339317, "grad_norm": 13.018559455871582, "learning_rate": 9.992217228513159e-06, "loss": 0.68235226, "memory(GiB)": 28.47, "step": 9880, "train_speed(iter/s)": 0.432883 }, { "acc": 0.87946215, "epoch": 0.2676468199171473, "grad_norm": 6.0316081047058105, "learning_rate": 9.992185986852164e-06, "loss": 0.58489075, "memory(GiB)": 28.47, "step": 9885, "train_speed(iter/s)": 0.432885 }, { "acc": 0.86453896, "epoch": 0.2677822002003628, "grad_norm": 10.12771224975586, "learning_rate": 9.992154682660493e-06, "loss": 0.67177086, "memory(GiB)": 28.47, "step": 9890, "train_speed(iter/s)": 0.432886 }, { "acc": 0.8240098, "epoch": 0.2679175804835784, "grad_norm": 11.241543769836426, "learning_rate": 9.99212331593854e-06, "loss": 0.85599899, "memory(GiB)": 28.47, "step": 9895, "train_speed(iter/s)": 0.432886 }, { "acc": 0.81948891, "epoch": 0.2680529607667939, "grad_norm": 15.659954071044922, "learning_rate": 9.992091886686694e-06, "loss": 0.97741776, "memory(GiB)": 28.47, "step": 9900, "train_speed(iter/s)": 0.432887 }, { "acc": 0.84956913, "epoch": 0.2681883410500095, "grad_norm": 25.408931732177734, "learning_rate": 9.992060394905352e-06, "loss": 0.7342104, "memory(GiB)": 28.47, "step": 9905, "train_speed(iter/s)": 0.432888 }, { "acc": 0.82943249, "epoch": 0.268323721333225, "grad_norm": 11.668146133422852, "learning_rate": 9.99202884059491e-06, "loss": 0.86226406, "memory(GiB)": 28.47, "step": 9910, "train_speed(iter/s)": 0.43289 }, { "acc": 0.8385294, "epoch": 0.2684591016164406, "grad_norm": 12.603494644165039, "learning_rate": 9.991997223755756e-06, "loss": 0.81974115, "memory(GiB)": 28.47, "step": 9915, "train_speed(iter/s)": 0.432892 }, { "acc": 0.84851341, "epoch": 0.2685944818996561, "grad_norm": 21.722166061401367, "learning_rate": 9.991965544388295e-06, "loss": 0.78690786, "memory(GiB)": 28.47, "step": 9920, "train_speed(iter/s)": 0.432892 }, { "acc": 0.82940426, "epoch": 0.2687298621828717, "grad_norm": 54.03271484375, "learning_rate": 9.991933802492918e-06, "loss": 0.98350458, "memory(GiB)": 28.47, "step": 9925, "train_speed(iter/s)": 0.432893 }, { "acc": 0.84776688, "epoch": 0.26886524246608723, "grad_norm": 37.06486892700195, "learning_rate": 9.991901998070026e-06, "loss": 0.74095507, "memory(GiB)": 28.47, "step": 9930, "train_speed(iter/s)": 0.432889 }, { "acc": 0.8535738, "epoch": 0.2690006227493028, "grad_norm": 6.593866348266602, "learning_rate": 9.991870131120014e-06, "loss": 0.70603576, "memory(GiB)": 28.47, "step": 9935, "train_speed(iter/s)": 0.432891 }, { "acc": 0.81696529, "epoch": 0.26913600303251833, "grad_norm": 10.832534790039062, "learning_rate": 9.991838201643283e-06, "loss": 0.8969902, "memory(GiB)": 28.47, "step": 9940, "train_speed(iter/s)": 0.432893 }, { "acc": 0.84650135, "epoch": 0.2692713833157339, "grad_norm": 13.076122283935547, "learning_rate": 9.991806209640234e-06, "loss": 0.768752, "memory(GiB)": 28.47, "step": 9945, "train_speed(iter/s)": 0.432895 }, { "acc": 0.84318237, "epoch": 0.26940676359894944, "grad_norm": 32.735435485839844, "learning_rate": 9.991774155111267e-06, "loss": 0.77869134, "memory(GiB)": 28.47, "step": 9950, "train_speed(iter/s)": 0.432897 }, { "acc": 0.81618309, "epoch": 0.269542143882165, "grad_norm": 8.593345642089844, "learning_rate": 9.991742038056783e-06, "loss": 0.94670715, "memory(GiB)": 28.47, "step": 9955, "train_speed(iter/s)": 0.432897 }, { "acc": 0.82083006, "epoch": 0.26967752416538054, "grad_norm": 34.05036544799805, "learning_rate": 9.991709858477185e-06, "loss": 1.00294094, "memory(GiB)": 28.47, "step": 9960, "train_speed(iter/s)": 0.432897 }, { "acc": 0.84185724, "epoch": 0.2698129044485961, "grad_norm": 10.573719024658203, "learning_rate": 9.991677616372877e-06, "loss": 0.83606853, "memory(GiB)": 28.47, "step": 9965, "train_speed(iter/s)": 0.432898 }, { "acc": 0.8355052, "epoch": 0.26994828473181165, "grad_norm": 9.902421951293945, "learning_rate": 9.99164531174426e-06, "loss": 0.8530921, "memory(GiB)": 28.47, "step": 9970, "train_speed(iter/s)": 0.432899 }, { "acc": 0.84636841, "epoch": 0.27008366501502723, "grad_norm": 8.114364624023438, "learning_rate": 9.991612944591741e-06, "loss": 0.76672211, "memory(GiB)": 28.47, "step": 9975, "train_speed(iter/s)": 0.4329 }, { "acc": 0.84094162, "epoch": 0.27021904529824275, "grad_norm": 13.245989799499512, "learning_rate": 9.991580514915724e-06, "loss": 0.84534264, "memory(GiB)": 28.47, "step": 9980, "train_speed(iter/s)": 0.432902 }, { "acc": 0.84057236, "epoch": 0.27035442558145834, "grad_norm": 13.802678108215332, "learning_rate": 9.991548022716618e-06, "loss": 0.78930316, "memory(GiB)": 28.47, "step": 9985, "train_speed(iter/s)": 0.432903 }, { "acc": 0.8235671, "epoch": 0.27048980586467386, "grad_norm": 15.33684253692627, "learning_rate": 9.991515467994826e-06, "loss": 0.93489752, "memory(GiB)": 28.47, "step": 9990, "train_speed(iter/s)": 0.432903 }, { "acc": 0.85203199, "epoch": 0.27062518614788944, "grad_norm": 7.000151634216309, "learning_rate": 9.99148285075076e-06, "loss": 0.69577074, "memory(GiB)": 28.47, "step": 9995, "train_speed(iter/s)": 0.432904 }, { "acc": 0.83874741, "epoch": 0.27076056643110497, "grad_norm": 8.012320518493652, "learning_rate": 9.991450170984824e-06, "loss": 0.8428812, "memory(GiB)": 28.47, "step": 10000, "train_speed(iter/s)": 0.432904 }, { "epoch": 0.27076056643110497, "eval_acc": 0.5521288463514217, "eval_loss": 1.218422770500183, "eval_runtime": 1299.2518, "eval_samples_per_second": 66.427, "eval_steps_per_second": 2.077, "step": 10000 }, { "acc": 0.81848774, "epoch": 0.27089594671432055, "grad_norm": 11.73974609375, "learning_rate": 9.991417428697432e-06, "loss": 0.93288403, "memory(GiB)": 34.88, "step": 10005, "train_speed(iter/s)": 0.409475 }, { "acc": 0.83941936, "epoch": 0.27103132699753607, "grad_norm": 8.524848937988281, "learning_rate": 9.99138462388899e-06, "loss": 0.8267355, "memory(GiB)": 34.88, "step": 10010, "train_speed(iter/s)": 0.409485 }, { "acc": 0.84128227, "epoch": 0.27116670728075165, "grad_norm": 6.303038597106934, "learning_rate": 9.991351756559915e-06, "loss": 0.75332246, "memory(GiB)": 34.88, "step": 10015, "train_speed(iter/s)": 0.409496 }, { "acc": 0.82071199, "epoch": 0.2713020875639672, "grad_norm": 16.744918823242188, "learning_rate": 9.99131882671061e-06, "loss": 0.99680357, "memory(GiB)": 34.88, "step": 10020, "train_speed(iter/s)": 0.409508 }, { "acc": 0.8295723, "epoch": 0.27143746784718276, "grad_norm": 8.799184799194336, "learning_rate": 9.991285834341496e-06, "loss": 0.80626974, "memory(GiB)": 34.88, "step": 10025, "train_speed(iter/s)": 0.40952 }, { "acc": 0.84518623, "epoch": 0.2715728481303983, "grad_norm": 3.8062331676483154, "learning_rate": 9.99125277945298e-06, "loss": 0.73094711, "memory(GiB)": 34.88, "step": 10030, "train_speed(iter/s)": 0.409531 }, { "acc": 0.85207577, "epoch": 0.27170822841361386, "grad_norm": 12.756465911865234, "learning_rate": 9.99121966204548e-06, "loss": 0.78079686, "memory(GiB)": 34.88, "step": 10035, "train_speed(iter/s)": 0.409542 }, { "acc": 0.80195675, "epoch": 0.2718436086968294, "grad_norm": 27.799806594848633, "learning_rate": 9.991186482119409e-06, "loss": 1.09471598, "memory(GiB)": 34.88, "step": 10040, "train_speed(iter/s)": 0.409554 }, { "acc": 0.84751396, "epoch": 0.27197898898004497, "grad_norm": 7.97413444519043, "learning_rate": 9.991153239675184e-06, "loss": 0.78479543, "memory(GiB)": 34.88, "step": 10045, "train_speed(iter/s)": 0.409567 }, { "acc": 0.8561491, "epoch": 0.2721143692632605, "grad_norm": 10.695923805236816, "learning_rate": 9.991119934713219e-06, "loss": 0.8172822, "memory(GiB)": 34.88, "step": 10050, "train_speed(iter/s)": 0.409578 }, { "acc": 0.85197334, "epoch": 0.27224974954647607, "grad_norm": 8.067151069641113, "learning_rate": 9.991086567233933e-06, "loss": 0.74605322, "memory(GiB)": 34.88, "step": 10055, "train_speed(iter/s)": 0.409591 }, { "acc": 0.82392054, "epoch": 0.2723851298296916, "grad_norm": 9.03326416015625, "learning_rate": 9.991053137237745e-06, "loss": 0.86454563, "memory(GiB)": 34.88, "step": 10060, "train_speed(iter/s)": 0.409599 }, { "acc": 0.81218815, "epoch": 0.2725205101129072, "grad_norm": 14.222941398620605, "learning_rate": 9.991019644725072e-06, "loss": 0.90894012, "memory(GiB)": 34.88, "step": 10065, "train_speed(iter/s)": 0.409609 }, { "acc": 0.83359556, "epoch": 0.2726558903961227, "grad_norm": 10.559578895568848, "learning_rate": 9.990986089696333e-06, "loss": 0.90589619, "memory(GiB)": 34.88, "step": 10070, "train_speed(iter/s)": 0.409621 }, { "acc": 0.83400459, "epoch": 0.2727912706793383, "grad_norm": 7.352718830108643, "learning_rate": 9.990952472151952e-06, "loss": 0.85149908, "memory(GiB)": 34.88, "step": 10075, "train_speed(iter/s)": 0.409632 }, { "acc": 0.84590321, "epoch": 0.2729266509625538, "grad_norm": 10.024971008300781, "learning_rate": 9.990918792092344e-06, "loss": 0.74559216, "memory(GiB)": 34.88, "step": 10080, "train_speed(iter/s)": 0.409643 }, { "acc": 0.84810228, "epoch": 0.2730620312457694, "grad_norm": 23.434001922607422, "learning_rate": 9.990885049517936e-06, "loss": 0.8434042, "memory(GiB)": 34.88, "step": 10085, "train_speed(iter/s)": 0.409651 }, { "acc": 0.85356188, "epoch": 0.2731974115289849, "grad_norm": 6.622769355773926, "learning_rate": 9.99085124442915e-06, "loss": 0.76854305, "memory(GiB)": 34.88, "step": 10090, "train_speed(iter/s)": 0.409662 }, { "acc": 0.83783703, "epoch": 0.2733327918122005, "grad_norm": 9.491442680358887, "learning_rate": 9.990817376826409e-06, "loss": 0.81932402, "memory(GiB)": 34.88, "step": 10095, "train_speed(iter/s)": 0.409673 }, { "acc": 0.84286671, "epoch": 0.273468172095416, "grad_norm": 10.891047477722168, "learning_rate": 9.990783446710136e-06, "loss": 0.72556276, "memory(GiB)": 34.88, "step": 10100, "train_speed(iter/s)": 0.409685 }, { "acc": 0.85139999, "epoch": 0.2736035523786316, "grad_norm": 6.5681023597717285, "learning_rate": 9.990749454080758e-06, "loss": 0.82060575, "memory(GiB)": 34.88, "step": 10105, "train_speed(iter/s)": 0.409696 }, { "acc": 0.82522163, "epoch": 0.2737389326618471, "grad_norm": 26.067386627197266, "learning_rate": 9.990715398938698e-06, "loss": 0.94168949, "memory(GiB)": 34.88, "step": 10110, "train_speed(iter/s)": 0.409708 }, { "acc": 0.83274899, "epoch": 0.2738743129450627, "grad_norm": 10.666775703430176, "learning_rate": 9.990681281284385e-06, "loss": 0.8663269, "memory(GiB)": 34.88, "step": 10115, "train_speed(iter/s)": 0.409719 }, { "acc": 0.86248817, "epoch": 0.27400969322827823, "grad_norm": 30.86562156677246, "learning_rate": 9.990647101118245e-06, "loss": 0.82138968, "memory(GiB)": 34.88, "step": 10120, "train_speed(iter/s)": 0.40973 }, { "acc": 0.84528351, "epoch": 0.2741450735114938, "grad_norm": 10.01025390625, "learning_rate": 9.990612858440707e-06, "loss": 0.80674038, "memory(GiB)": 34.88, "step": 10125, "train_speed(iter/s)": 0.409742 }, { "acc": 0.87897434, "epoch": 0.27428045379470933, "grad_norm": 5.596156120300293, "learning_rate": 9.990578553252203e-06, "loss": 0.65813837, "memory(GiB)": 34.88, "step": 10130, "train_speed(iter/s)": 0.409754 }, { "acc": 0.84137211, "epoch": 0.2744158340779249, "grad_norm": 5.955329418182373, "learning_rate": 9.990544185553156e-06, "loss": 0.82983112, "memory(GiB)": 34.88, "step": 10135, "train_speed(iter/s)": 0.409766 }, { "acc": 0.82555161, "epoch": 0.27455121436114044, "grad_norm": 22.920961380004883, "learning_rate": 9.990509755344002e-06, "loss": 0.88117962, "memory(GiB)": 34.88, "step": 10140, "train_speed(iter/s)": 0.409777 }, { "acc": 0.8257267, "epoch": 0.274686594644356, "grad_norm": 12.945895195007324, "learning_rate": 9.990475262625171e-06, "loss": 0.90219421, "memory(GiB)": 34.88, "step": 10145, "train_speed(iter/s)": 0.409789 }, { "acc": 0.8591629, "epoch": 0.27482197492757154, "grad_norm": 12.23098373413086, "learning_rate": 9.990440707397092e-06, "loss": 0.67868347, "memory(GiB)": 34.88, "step": 10150, "train_speed(iter/s)": 0.4098 }, { "acc": 0.82056255, "epoch": 0.2749573552107871, "grad_norm": 19.00391960144043, "learning_rate": 9.990406089660203e-06, "loss": 0.91881886, "memory(GiB)": 34.88, "step": 10155, "train_speed(iter/s)": 0.409811 }, { "acc": 0.83253975, "epoch": 0.27509273549400265, "grad_norm": 13.520402908325195, "learning_rate": 9.990371409414934e-06, "loss": 0.85327415, "memory(GiB)": 34.88, "step": 10160, "train_speed(iter/s)": 0.409823 }, { "acc": 0.81882362, "epoch": 0.27522811577721823, "grad_norm": 13.182723045349121, "learning_rate": 9.990336666661721e-06, "loss": 0.96506748, "memory(GiB)": 34.88, "step": 10165, "train_speed(iter/s)": 0.409834 }, { "acc": 0.83558502, "epoch": 0.27536349606043375, "grad_norm": 12.211753845214844, "learning_rate": 9.990301861400999e-06, "loss": 0.8262742, "memory(GiB)": 34.88, "step": 10170, "train_speed(iter/s)": 0.409846 }, { "acc": 0.84582996, "epoch": 0.27549887634364933, "grad_norm": 11.547018051147461, "learning_rate": 9.990266993633204e-06, "loss": 0.81835375, "memory(GiB)": 34.88, "step": 10175, "train_speed(iter/s)": 0.409855 }, { "acc": 0.86528549, "epoch": 0.27563425662686486, "grad_norm": 6.034268856048584, "learning_rate": 9.99023206335877e-06, "loss": 0.69547119, "memory(GiB)": 34.88, "step": 10180, "train_speed(iter/s)": 0.409868 }, { "acc": 0.85223436, "epoch": 0.27576963691008044, "grad_norm": 9.8302001953125, "learning_rate": 9.990197070578138e-06, "loss": 0.688901, "memory(GiB)": 34.88, "step": 10185, "train_speed(iter/s)": 0.40988 }, { "acc": 0.87401352, "epoch": 0.27590501719329596, "grad_norm": 6.674774169921875, "learning_rate": 9.990162015291747e-06, "loss": 0.64699755, "memory(GiB)": 34.88, "step": 10190, "train_speed(iter/s)": 0.409893 }, { "acc": 0.8606535, "epoch": 0.27604039747651155, "grad_norm": 7.4654765129089355, "learning_rate": 9.990126897500033e-06, "loss": 0.71481729, "memory(GiB)": 34.88, "step": 10195, "train_speed(iter/s)": 0.409905 }, { "acc": 0.83144646, "epoch": 0.27617577775972707, "grad_norm": 9.306281089782715, "learning_rate": 9.990091717203438e-06, "loss": 0.89374638, "memory(GiB)": 34.88, "step": 10200, "train_speed(iter/s)": 0.409915 }, { "acc": 0.82074804, "epoch": 0.27631115804294265, "grad_norm": 21.407705307006836, "learning_rate": 9.990056474402404e-06, "loss": 1.04355106, "memory(GiB)": 34.88, "step": 10205, "train_speed(iter/s)": 0.409927 }, { "acc": 0.85973072, "epoch": 0.2764465383261582, "grad_norm": 6.908925533294678, "learning_rate": 9.990021169097368e-06, "loss": 0.72565246, "memory(GiB)": 34.88, "step": 10210, "train_speed(iter/s)": 0.409938 }, { "acc": 0.84722319, "epoch": 0.27658191860937376, "grad_norm": 10.707244873046875, "learning_rate": 9.989985801288777e-06, "loss": 0.85377235, "memory(GiB)": 34.88, "step": 10215, "train_speed(iter/s)": 0.409949 }, { "acc": 0.88601694, "epoch": 0.2767172988925893, "grad_norm": 6.467896461486816, "learning_rate": 9.989950370977072e-06, "loss": 0.55421314, "memory(GiB)": 34.88, "step": 10220, "train_speed(iter/s)": 0.409961 }, { "acc": 0.82151928, "epoch": 0.27685267917580486, "grad_norm": 12.517912864685059, "learning_rate": 9.989914878162695e-06, "loss": 0.85352211, "memory(GiB)": 34.88, "step": 10225, "train_speed(iter/s)": 0.409973 }, { "acc": 0.86344013, "epoch": 0.2769880594590204, "grad_norm": 11.258623123168945, "learning_rate": 9.989879322846093e-06, "loss": 0.72798433, "memory(GiB)": 34.88, "step": 10230, "train_speed(iter/s)": 0.409981 }, { "acc": 0.87424173, "epoch": 0.27712343974223597, "grad_norm": 7.514410495758057, "learning_rate": 9.98984370502771e-06, "loss": 0.61834283, "memory(GiB)": 34.88, "step": 10235, "train_speed(iter/s)": 0.409993 }, { "acc": 0.83072386, "epoch": 0.2772588200254515, "grad_norm": 12.936874389648438, "learning_rate": 9.989808024707996e-06, "loss": 0.83011503, "memory(GiB)": 34.88, "step": 10240, "train_speed(iter/s)": 0.410004 }, { "acc": 0.85471039, "epoch": 0.27739420030866707, "grad_norm": 9.50017261505127, "learning_rate": 9.989772281887393e-06, "loss": 0.72412348, "memory(GiB)": 34.88, "step": 10245, "train_speed(iter/s)": 0.410012 }, { "acc": 0.82402382, "epoch": 0.2775295805918826, "grad_norm": 8.639623641967773, "learning_rate": 9.98973647656635e-06, "loss": 0.88397923, "memory(GiB)": 34.88, "step": 10250, "train_speed(iter/s)": 0.410023 }, { "acc": 0.83334951, "epoch": 0.2776649608750982, "grad_norm": 11.29455280303955, "learning_rate": 9.989700608745319e-06, "loss": 0.85820618, "memory(GiB)": 34.88, "step": 10255, "train_speed(iter/s)": 0.410034 }, { "acc": 0.85265112, "epoch": 0.2778003411583137, "grad_norm": 5.3350958824157715, "learning_rate": 9.989664678424746e-06, "loss": 0.63979292, "memory(GiB)": 34.88, "step": 10260, "train_speed(iter/s)": 0.410045 }, { "acc": 0.83316898, "epoch": 0.2779357214415293, "grad_norm": 10.027888298034668, "learning_rate": 9.989628685605081e-06, "loss": 0.84151688, "memory(GiB)": 34.88, "step": 10265, "train_speed(iter/s)": 0.410057 }, { "acc": 0.85438251, "epoch": 0.2780711017247448, "grad_norm": 8.64756965637207, "learning_rate": 9.989592630286775e-06, "loss": 0.79408398, "memory(GiB)": 34.88, "step": 10270, "train_speed(iter/s)": 0.410068 }, { "acc": 0.84933815, "epoch": 0.2782064820079604, "grad_norm": 7.809368133544922, "learning_rate": 9.989556512470281e-06, "loss": 0.76480069, "memory(GiB)": 34.88, "step": 10275, "train_speed(iter/s)": 0.410077 }, { "acc": 0.84266443, "epoch": 0.2783418622911759, "grad_norm": 12.244816780090332, "learning_rate": 9.98952033215605e-06, "loss": 0.79641232, "memory(GiB)": 34.88, "step": 10280, "train_speed(iter/s)": 0.410087 }, { "acc": 0.83687592, "epoch": 0.2784772425743915, "grad_norm": 10.479060173034668, "learning_rate": 9.989484089344535e-06, "loss": 0.83121586, "memory(GiB)": 34.88, "step": 10285, "train_speed(iter/s)": 0.410098 }, { "acc": 0.83432503, "epoch": 0.278612622857607, "grad_norm": 8.975525856018066, "learning_rate": 9.989447784036193e-06, "loss": 0.80488214, "memory(GiB)": 34.88, "step": 10290, "train_speed(iter/s)": 0.41011 }, { "acc": 0.86628056, "epoch": 0.2787480031408226, "grad_norm": 3.1972882747650146, "learning_rate": 9.989411416231475e-06, "loss": 0.7046073, "memory(GiB)": 34.88, "step": 10295, "train_speed(iter/s)": 0.410122 }, { "acc": 0.82685146, "epoch": 0.2788833834240381, "grad_norm": 35.776248931884766, "learning_rate": 9.98937498593084e-06, "loss": 0.93264809, "memory(GiB)": 34.88, "step": 10300, "train_speed(iter/s)": 0.410133 }, { "acc": 0.84757357, "epoch": 0.2790187637072537, "grad_norm": 11.817483901977539, "learning_rate": 9.989338493134742e-06, "loss": 0.81140413, "memory(GiB)": 34.88, "step": 10305, "train_speed(iter/s)": 0.410142 }, { "acc": 0.82474127, "epoch": 0.27915414399046923, "grad_norm": 8.810964584350586, "learning_rate": 9.989301937843638e-06, "loss": 0.87075214, "memory(GiB)": 34.88, "step": 10310, "train_speed(iter/s)": 0.410153 }, { "acc": 0.84386883, "epoch": 0.2792895242736848, "grad_norm": 10.962048530578613, "learning_rate": 9.98926532005799e-06, "loss": 0.65930138, "memory(GiB)": 34.88, "step": 10315, "train_speed(iter/s)": 0.410165 }, { "acc": 0.82365265, "epoch": 0.27942490455690033, "grad_norm": 5.291881084442139, "learning_rate": 9.989228639778248e-06, "loss": 0.80976391, "memory(GiB)": 34.88, "step": 10320, "train_speed(iter/s)": 0.410176 }, { "acc": 0.83487892, "epoch": 0.27956028484011586, "grad_norm": 9.247161865234375, "learning_rate": 9.989191897004882e-06, "loss": 0.80775146, "memory(GiB)": 34.88, "step": 10325, "train_speed(iter/s)": 0.410187 }, { "acc": 0.83348169, "epoch": 0.27969566512333144, "grad_norm": 9.114007949829102, "learning_rate": 9.989155091738345e-06, "loss": 0.94116783, "memory(GiB)": 34.88, "step": 10330, "train_speed(iter/s)": 0.410199 }, { "acc": 0.86179819, "epoch": 0.27983104540654696, "grad_norm": 5.890951156616211, "learning_rate": 9.989118223979101e-06, "loss": 0.6570713, "memory(GiB)": 34.88, "step": 10335, "train_speed(iter/s)": 0.41021 }, { "acc": 0.84077187, "epoch": 0.27996642568976254, "grad_norm": 8.193560600280762, "learning_rate": 9.989081293727611e-06, "loss": 0.74805813, "memory(GiB)": 34.88, "step": 10340, "train_speed(iter/s)": 0.410222 }, { "acc": 0.84281569, "epoch": 0.28010180597297807, "grad_norm": 6.007317066192627, "learning_rate": 9.989044300984336e-06, "loss": 0.78270435, "memory(GiB)": 34.88, "step": 10345, "train_speed(iter/s)": 0.410233 }, { "acc": 0.82168446, "epoch": 0.28023718625619365, "grad_norm": 7.965902328491211, "learning_rate": 9.989007245749743e-06, "loss": 0.90323896, "memory(GiB)": 34.88, "step": 10350, "train_speed(iter/s)": 0.410243 }, { "acc": 0.81068811, "epoch": 0.2803725665394092, "grad_norm": 9.848758697509766, "learning_rate": 9.988970128024295e-06, "loss": 0.94435215, "memory(GiB)": 34.88, "step": 10355, "train_speed(iter/s)": 0.410254 }, { "acc": 0.8440733, "epoch": 0.28050794682262475, "grad_norm": 23.310306549072266, "learning_rate": 9.988932947808454e-06, "loss": 0.81428194, "memory(GiB)": 34.88, "step": 10360, "train_speed(iter/s)": 0.410264 }, { "acc": 0.83651934, "epoch": 0.2806433271058403, "grad_norm": 13.619612693786621, "learning_rate": 9.988895705102689e-06, "loss": 0.78880229, "memory(GiB)": 34.88, "step": 10365, "train_speed(iter/s)": 0.410276 }, { "acc": 0.8359087, "epoch": 0.28077870738905586, "grad_norm": 5.556450366973877, "learning_rate": 9.988858399907467e-06, "loss": 0.86442585, "memory(GiB)": 34.88, "step": 10370, "train_speed(iter/s)": 0.410287 }, { "acc": 0.84622545, "epoch": 0.2809140876722714, "grad_norm": 8.032773971557617, "learning_rate": 9.988821032223253e-06, "loss": 0.80682964, "memory(GiB)": 34.88, "step": 10375, "train_speed(iter/s)": 0.410297 }, { "acc": 0.83402681, "epoch": 0.28104946795548696, "grad_norm": 5.373054504394531, "learning_rate": 9.988783602050514e-06, "loss": 0.83590508, "memory(GiB)": 34.88, "step": 10380, "train_speed(iter/s)": 0.410305 }, { "acc": 0.80684814, "epoch": 0.2811848482387025, "grad_norm": 10.018290519714355, "learning_rate": 9.988746109389723e-06, "loss": 1.11959324, "memory(GiB)": 34.88, "step": 10385, "train_speed(iter/s)": 0.410316 }, { "acc": 0.86195641, "epoch": 0.28132022852191807, "grad_norm": 63.200191497802734, "learning_rate": 9.988708554241347e-06, "loss": 0.70368404, "memory(GiB)": 34.88, "step": 10390, "train_speed(iter/s)": 0.410328 }, { "acc": 0.84402695, "epoch": 0.2814556088051336, "grad_norm": 7.601851940155029, "learning_rate": 9.98867093660586e-06, "loss": 0.76109667, "memory(GiB)": 34.88, "step": 10395, "train_speed(iter/s)": 0.410339 }, { "acc": 0.82300053, "epoch": 0.2815909890883492, "grad_norm": 14.154585838317871, "learning_rate": 9.988633256483726e-06, "loss": 0.89999304, "memory(GiB)": 34.88, "step": 10400, "train_speed(iter/s)": 0.410349 }, { "acc": 0.84115572, "epoch": 0.2817263693715647, "grad_norm": 38.66036605834961, "learning_rate": 9.988595513875423e-06, "loss": 0.77456217, "memory(GiB)": 34.88, "step": 10405, "train_speed(iter/s)": 0.410359 }, { "acc": 0.81765118, "epoch": 0.2818617496547803, "grad_norm": 18.422048568725586, "learning_rate": 9.988557708781422e-06, "loss": 0.91913919, "memory(GiB)": 34.88, "step": 10410, "train_speed(iter/s)": 0.41037 }, { "acc": 0.84757166, "epoch": 0.2819971299379958, "grad_norm": 10.596795082092285, "learning_rate": 9.988519841202197e-06, "loss": 0.81222744, "memory(GiB)": 34.88, "step": 10415, "train_speed(iter/s)": 0.41038 }, { "acc": 0.83760109, "epoch": 0.2821325102212114, "grad_norm": 31.548213958740234, "learning_rate": 9.98848191113822e-06, "loss": 0.81680145, "memory(GiB)": 34.88, "step": 10420, "train_speed(iter/s)": 0.410391 }, { "acc": 0.84656076, "epoch": 0.2822678905044269, "grad_norm": 7.974209308624268, "learning_rate": 9.988443918589972e-06, "loss": 0.72776647, "memory(GiB)": 34.88, "step": 10425, "train_speed(iter/s)": 0.410402 }, { "acc": 0.84002304, "epoch": 0.2824032707876425, "grad_norm": 16.50661849975586, "learning_rate": 9.988405863557923e-06, "loss": 0.87984991, "memory(GiB)": 34.88, "step": 10430, "train_speed(iter/s)": 0.410414 }, { "acc": 0.81353951, "epoch": 0.282538651070858, "grad_norm": 7.0248188972473145, "learning_rate": 9.988367746042551e-06, "loss": 0.93815088, "memory(GiB)": 34.88, "step": 10435, "train_speed(iter/s)": 0.410424 }, { "acc": 0.84524727, "epoch": 0.2826740313540736, "grad_norm": 8.777379989624023, "learning_rate": 9.988329566044338e-06, "loss": 0.76306925, "memory(GiB)": 34.88, "step": 10440, "train_speed(iter/s)": 0.410435 }, { "acc": 0.86642027, "epoch": 0.2828094116372891, "grad_norm": 7.636024475097656, "learning_rate": 9.988291323563754e-06, "loss": 0.70712457, "memory(GiB)": 34.88, "step": 10445, "train_speed(iter/s)": 0.410447 }, { "acc": 0.86638889, "epoch": 0.2829447919205047, "grad_norm": 9.3706693649292, "learning_rate": 9.988253018601284e-06, "loss": 0.58627205, "memory(GiB)": 34.88, "step": 10450, "train_speed(iter/s)": 0.410457 }, { "acc": 0.85525885, "epoch": 0.2830801722037202, "grad_norm": 8.9379301071167, "learning_rate": 9.988214651157409e-06, "loss": 0.69480257, "memory(GiB)": 34.88, "step": 10455, "train_speed(iter/s)": 0.410469 }, { "acc": 0.81762142, "epoch": 0.2832155524869358, "grad_norm": 8.919127464294434, "learning_rate": 9.988176221232605e-06, "loss": 0.93413639, "memory(GiB)": 34.88, "step": 10460, "train_speed(iter/s)": 0.410479 }, { "acc": 0.83839931, "epoch": 0.28335093277015133, "grad_norm": 15.306267738342285, "learning_rate": 9.988137728827354e-06, "loss": 0.85609131, "memory(GiB)": 34.88, "step": 10465, "train_speed(iter/s)": 0.41049 }, { "acc": 0.85329466, "epoch": 0.2834863130533669, "grad_norm": 13.090433120727539, "learning_rate": 9.988099173942143e-06, "loss": 0.6894937, "memory(GiB)": 34.88, "step": 10470, "train_speed(iter/s)": 0.4105 }, { "acc": 0.8597971, "epoch": 0.28362169333658244, "grad_norm": 9.667926788330078, "learning_rate": 9.988060556577451e-06, "loss": 0.64861894, "memory(GiB)": 34.88, "step": 10475, "train_speed(iter/s)": 0.410511 }, { "acc": 0.84673328, "epoch": 0.283757073619798, "grad_norm": 15.360066413879395, "learning_rate": 9.988021876733762e-06, "loss": 0.81296921, "memory(GiB)": 34.88, "step": 10480, "train_speed(iter/s)": 0.41052 }, { "acc": 0.86818113, "epoch": 0.28389245390301354, "grad_norm": 14.680681228637695, "learning_rate": 9.987983134411559e-06, "loss": 0.70155277, "memory(GiB)": 34.88, "step": 10485, "train_speed(iter/s)": 0.410531 }, { "acc": 0.84508734, "epoch": 0.2840278341862291, "grad_norm": 11.800613403320312, "learning_rate": 9.987944329611332e-06, "loss": 0.87518415, "memory(GiB)": 34.88, "step": 10490, "train_speed(iter/s)": 0.410542 }, { "acc": 0.84911613, "epoch": 0.28416321446944465, "grad_norm": 8.190686225891113, "learning_rate": 9.987905462333564e-06, "loss": 0.75779467, "memory(GiB)": 34.88, "step": 10495, "train_speed(iter/s)": 0.410554 }, { "acc": 0.85436487, "epoch": 0.2842985947526602, "grad_norm": 5.745087146759033, "learning_rate": 9.987866532578742e-06, "loss": 0.74156437, "memory(GiB)": 34.88, "step": 10500, "train_speed(iter/s)": 0.410564 }, { "acc": 0.83809605, "epoch": 0.28443397503587575, "grad_norm": 14.013193130493164, "learning_rate": 9.987827540347355e-06, "loss": 0.82041721, "memory(GiB)": 34.88, "step": 10505, "train_speed(iter/s)": 0.410574 }, { "acc": 0.84022446, "epoch": 0.28456935531909133, "grad_norm": 64.90460968017578, "learning_rate": 9.98778848563989e-06, "loss": 0.77178516, "memory(GiB)": 34.88, "step": 10510, "train_speed(iter/s)": 0.410585 }, { "acc": 0.83882294, "epoch": 0.28470473560230686, "grad_norm": 13.438713073730469, "learning_rate": 9.987749368456835e-06, "loss": 0.88178892, "memory(GiB)": 34.88, "step": 10515, "train_speed(iter/s)": 0.410595 }, { "acc": 0.84743462, "epoch": 0.28484011588552244, "grad_norm": 9.579560279846191, "learning_rate": 9.987710188798684e-06, "loss": 0.77141056, "memory(GiB)": 34.88, "step": 10520, "train_speed(iter/s)": 0.410606 }, { "acc": 0.84988556, "epoch": 0.28497549616873796, "grad_norm": 50.23802947998047, "learning_rate": 9.987670946665923e-06, "loss": 0.69332862, "memory(GiB)": 34.88, "step": 10525, "train_speed(iter/s)": 0.410617 }, { "acc": 0.82890682, "epoch": 0.28511087645195354, "grad_norm": 9.291526794433594, "learning_rate": 9.987631642059048e-06, "loss": 0.82163296, "memory(GiB)": 34.88, "step": 10530, "train_speed(iter/s)": 0.410628 }, { "acc": 0.84673004, "epoch": 0.28524625673516907, "grad_norm": 8.49111557006836, "learning_rate": 9.98759227497855e-06, "loss": 0.73962379, "memory(GiB)": 34.88, "step": 10535, "train_speed(iter/s)": 0.410636 }, { "acc": 0.82694025, "epoch": 0.28538163701838465, "grad_norm": 28.872333526611328, "learning_rate": 9.98755284542492e-06, "loss": 0.90414944, "memory(GiB)": 34.88, "step": 10540, "train_speed(iter/s)": 0.410647 }, { "acc": 0.82822247, "epoch": 0.2855170173016002, "grad_norm": 14.916007995605469, "learning_rate": 9.987513353398655e-06, "loss": 0.97331753, "memory(GiB)": 34.88, "step": 10545, "train_speed(iter/s)": 0.410658 }, { "acc": 0.85477238, "epoch": 0.28565239758481575, "grad_norm": 9.618305206298828, "learning_rate": 9.987473798900247e-06, "loss": 0.75874872, "memory(GiB)": 34.88, "step": 10550, "train_speed(iter/s)": 0.410668 }, { "acc": 0.8053072, "epoch": 0.2857877778680313, "grad_norm": 8.39683723449707, "learning_rate": 9.987434181930193e-06, "loss": 0.92885952, "memory(GiB)": 34.88, "step": 10555, "train_speed(iter/s)": 0.410678 }, { "acc": 0.84804573, "epoch": 0.28592315815124686, "grad_norm": 6.789478778839111, "learning_rate": 9.98739450248899e-06, "loss": 0.73714514, "memory(GiB)": 34.88, "step": 10560, "train_speed(iter/s)": 0.410689 }, { "acc": 0.83396149, "epoch": 0.2860585384344624, "grad_norm": 11.545573234558105, "learning_rate": 9.987354760577134e-06, "loss": 0.7744895, "memory(GiB)": 34.88, "step": 10565, "train_speed(iter/s)": 0.410699 }, { "acc": 0.84470882, "epoch": 0.28619391871767796, "grad_norm": 5.7676496505737305, "learning_rate": 9.987314956195121e-06, "loss": 0.71911306, "memory(GiB)": 34.88, "step": 10570, "train_speed(iter/s)": 0.410709 }, { "acc": 0.84914989, "epoch": 0.2863292990008935, "grad_norm": 18.37286376953125, "learning_rate": 9.987275089343455e-06, "loss": 0.73753958, "memory(GiB)": 34.88, "step": 10575, "train_speed(iter/s)": 0.410721 }, { "acc": 0.84208584, "epoch": 0.28646467928410907, "grad_norm": 10.839202880859375, "learning_rate": 9.98723516002263e-06, "loss": 0.87375107, "memory(GiB)": 34.88, "step": 10580, "train_speed(iter/s)": 0.410731 }, { "acc": 0.83774891, "epoch": 0.2866000595673246, "grad_norm": 10.29715633392334, "learning_rate": 9.987195168233148e-06, "loss": 0.77479439, "memory(GiB)": 34.88, "step": 10585, "train_speed(iter/s)": 0.410742 }, { "acc": 0.83646975, "epoch": 0.2867354398505402, "grad_norm": 10.05034065246582, "learning_rate": 9.98715511397551e-06, "loss": 0.93466558, "memory(GiB)": 34.88, "step": 10590, "train_speed(iter/s)": 0.410753 }, { "acc": 0.85219784, "epoch": 0.2868708201337557, "grad_norm": 6.081588268280029, "learning_rate": 9.987114997250218e-06, "loss": 0.75575056, "memory(GiB)": 34.88, "step": 10595, "train_speed(iter/s)": 0.410763 }, { "acc": 0.83974447, "epoch": 0.2870062004169713, "grad_norm": 12.313599586486816, "learning_rate": 9.987074818057777e-06, "loss": 0.87234402, "memory(GiB)": 34.88, "step": 10600, "train_speed(iter/s)": 0.410774 }, { "acc": 0.81950302, "epoch": 0.2871415807001868, "grad_norm": 24.177871704101562, "learning_rate": 9.987034576398686e-06, "loss": 0.88906956, "memory(GiB)": 34.88, "step": 10605, "train_speed(iter/s)": 0.410784 }, { "acc": 0.88070078, "epoch": 0.2872769609834024, "grad_norm": 6.022226810455322, "learning_rate": 9.98699427227345e-06, "loss": 0.62221174, "memory(GiB)": 34.88, "step": 10610, "train_speed(iter/s)": 0.410792 }, { "acc": 0.83694344, "epoch": 0.2874123412666179, "grad_norm": 9.31515121459961, "learning_rate": 9.986953905682577e-06, "loss": 0.94601507, "memory(GiB)": 34.88, "step": 10615, "train_speed(iter/s)": 0.410803 }, { "acc": 0.84339294, "epoch": 0.2875477215498335, "grad_norm": 14.007877349853516, "learning_rate": 9.986913476626567e-06, "loss": 0.7364399, "memory(GiB)": 34.88, "step": 10620, "train_speed(iter/s)": 0.410814 }, { "acc": 0.84620371, "epoch": 0.287683101833049, "grad_norm": 6.443284511566162, "learning_rate": 9.986872985105932e-06, "loss": 0.79132276, "memory(GiB)": 34.88, "step": 10625, "train_speed(iter/s)": 0.410825 }, { "acc": 0.84405937, "epoch": 0.2878184821162646, "grad_norm": 7.16602087020874, "learning_rate": 9.986832431121178e-06, "loss": 0.72104964, "memory(GiB)": 34.88, "step": 10630, "train_speed(iter/s)": 0.410836 }, { "acc": 0.8215826, "epoch": 0.2879538623994801, "grad_norm": 8.227433204650879, "learning_rate": 9.98679181467281e-06, "loss": 0.91271257, "memory(GiB)": 34.88, "step": 10635, "train_speed(iter/s)": 0.410846 }, { "acc": 0.85166492, "epoch": 0.2880892426826957, "grad_norm": 12.582742691040039, "learning_rate": 9.986751135761341e-06, "loss": 0.76871405, "memory(GiB)": 34.88, "step": 10640, "train_speed(iter/s)": 0.410855 }, { "acc": 0.83837395, "epoch": 0.2882246229659112, "grad_norm": 33.61799240112305, "learning_rate": 9.986710394387278e-06, "loss": 0.86435356, "memory(GiB)": 34.88, "step": 10645, "train_speed(iter/s)": 0.410866 }, { "acc": 0.83125725, "epoch": 0.2883600032491268, "grad_norm": 9.519457817077637, "learning_rate": 9.986669590551131e-06, "loss": 0.98384352, "memory(GiB)": 34.88, "step": 10650, "train_speed(iter/s)": 0.410877 }, { "acc": 0.84318466, "epoch": 0.28849538353234233, "grad_norm": 7.160506725311279, "learning_rate": 9.986628724253413e-06, "loss": 0.70198507, "memory(GiB)": 34.88, "step": 10655, "train_speed(iter/s)": 0.410887 }, { "acc": 0.83836136, "epoch": 0.2886307638155579, "grad_norm": 5.733874320983887, "learning_rate": 9.986587795494634e-06, "loss": 0.78884745, "memory(GiB)": 34.88, "step": 10660, "train_speed(iter/s)": 0.410896 }, { "acc": 0.82583275, "epoch": 0.28876614409877344, "grad_norm": 7.007323741912842, "learning_rate": 9.98654680427531e-06, "loss": 0.89835234, "memory(GiB)": 34.88, "step": 10665, "train_speed(iter/s)": 0.410907 }, { "acc": 0.83098192, "epoch": 0.288901524381989, "grad_norm": 5.910687446594238, "learning_rate": 9.98650575059595e-06, "loss": 0.80854464, "memory(GiB)": 34.88, "step": 10670, "train_speed(iter/s)": 0.410917 }, { "acc": 0.8496583, "epoch": 0.28903690466520454, "grad_norm": 15.815705299377441, "learning_rate": 9.986464634457071e-06, "loss": 0.73006816, "memory(GiB)": 34.88, "step": 10675, "train_speed(iter/s)": 0.410927 }, { "acc": 0.8528101, "epoch": 0.2891722849484201, "grad_norm": 15.849377632141113, "learning_rate": 9.98642345585919e-06, "loss": 0.71013002, "memory(GiB)": 34.88, "step": 10680, "train_speed(iter/s)": 0.410936 }, { "acc": 0.82881479, "epoch": 0.28930766523163565, "grad_norm": 20.678255081176758, "learning_rate": 9.986382214802816e-06, "loss": 0.86885595, "memory(GiB)": 34.88, "step": 10685, "train_speed(iter/s)": 0.410947 }, { "acc": 0.85112209, "epoch": 0.2894430455148512, "grad_norm": 6.144737243652344, "learning_rate": 9.986340911288471e-06, "loss": 0.71850772, "memory(GiB)": 34.88, "step": 10690, "train_speed(iter/s)": 0.410957 }, { "acc": 0.82877388, "epoch": 0.28957842579806675, "grad_norm": 8.592987060546875, "learning_rate": 9.986299545316673e-06, "loss": 0.83767586, "memory(GiB)": 34.88, "step": 10695, "train_speed(iter/s)": 0.410967 }, { "acc": 0.82090797, "epoch": 0.28971380608128233, "grad_norm": 11.009401321411133, "learning_rate": 9.98625811688794e-06, "loss": 0.91028252, "memory(GiB)": 34.88, "step": 10700, "train_speed(iter/s)": 0.410978 }, { "acc": 0.83404427, "epoch": 0.28984918636449786, "grad_norm": 11.961277961730957, "learning_rate": 9.986216626002788e-06, "loss": 0.93816471, "memory(GiB)": 34.88, "step": 10705, "train_speed(iter/s)": 0.410988 }, { "acc": 0.83179245, "epoch": 0.28998456664771344, "grad_norm": 28.053447723388672, "learning_rate": 9.986175072661738e-06, "loss": 0.85430422, "memory(GiB)": 34.88, "step": 10710, "train_speed(iter/s)": 0.410999 }, { "acc": 0.84963837, "epoch": 0.29011994693092896, "grad_norm": 12.75798225402832, "learning_rate": 9.986133456865309e-06, "loss": 0.80108624, "memory(GiB)": 34.88, "step": 10715, "train_speed(iter/s)": 0.411008 }, { "acc": 0.82666731, "epoch": 0.29025532721414454, "grad_norm": 11.242741584777832, "learning_rate": 9.986091778614028e-06, "loss": 0.79944963, "memory(GiB)": 34.88, "step": 10720, "train_speed(iter/s)": 0.411019 }, { "acc": 0.84099674, "epoch": 0.29039070749736007, "grad_norm": 12.48676586151123, "learning_rate": 9.98605003790841e-06, "loss": 0.7734973, "memory(GiB)": 34.88, "step": 10725, "train_speed(iter/s)": 0.411029 }, { "acc": 0.87678528, "epoch": 0.29052608778057565, "grad_norm": 5.42781925201416, "learning_rate": 9.986008234748984e-06, "loss": 0.57540855, "memory(GiB)": 34.88, "step": 10730, "train_speed(iter/s)": 0.411041 }, { "acc": 0.81669884, "epoch": 0.2906614680637912, "grad_norm": 7.321986675262451, "learning_rate": 9.98596636913627e-06, "loss": 0.96281376, "memory(GiB)": 34.88, "step": 10735, "train_speed(iter/s)": 0.41105 }, { "acc": 0.8276557, "epoch": 0.29079684834700675, "grad_norm": 18.290481567382812, "learning_rate": 9.985924441070792e-06, "loss": 0.92734318, "memory(GiB)": 34.88, "step": 10740, "train_speed(iter/s)": 0.411059 }, { "acc": 0.85141687, "epoch": 0.2909322286302223, "grad_norm": 6.436637878417969, "learning_rate": 9.985882450553077e-06, "loss": 0.73190722, "memory(GiB)": 34.88, "step": 10745, "train_speed(iter/s)": 0.411069 }, { "acc": 0.84021111, "epoch": 0.29106760891343786, "grad_norm": 9.644390106201172, "learning_rate": 9.98584039758365e-06, "loss": 0.83433685, "memory(GiB)": 34.88, "step": 10750, "train_speed(iter/s)": 0.411079 }, { "acc": 0.83706017, "epoch": 0.2912029891966534, "grad_norm": 18.901561737060547, "learning_rate": 9.98579828216304e-06, "loss": 0.75846181, "memory(GiB)": 34.88, "step": 10755, "train_speed(iter/s)": 0.411089 }, { "acc": 0.84860611, "epoch": 0.29133836947986896, "grad_norm": 7.598383903503418, "learning_rate": 9.985756104291773e-06, "loss": 0.6425622, "memory(GiB)": 34.88, "step": 10760, "train_speed(iter/s)": 0.411098 }, { "acc": 0.84444666, "epoch": 0.2914737497630845, "grad_norm": 5.429523944854736, "learning_rate": 9.985713863970376e-06, "loss": 0.75875101, "memory(GiB)": 34.88, "step": 10765, "train_speed(iter/s)": 0.411109 }, { "acc": 0.83663826, "epoch": 0.29160913004630007, "grad_norm": 8.149168968200684, "learning_rate": 9.985671561199381e-06, "loss": 0.74453316, "memory(GiB)": 34.88, "step": 10770, "train_speed(iter/s)": 0.411118 }, { "acc": 0.85128899, "epoch": 0.2917445103295156, "grad_norm": 6.523275852203369, "learning_rate": 9.985629195979313e-06, "loss": 0.72816296, "memory(GiB)": 34.88, "step": 10775, "train_speed(iter/s)": 0.411129 }, { "acc": 0.85179214, "epoch": 0.2918798906127312, "grad_norm": 9.751714706420898, "learning_rate": 9.98558676831071e-06, "loss": 0.73256059, "memory(GiB)": 34.88, "step": 10780, "train_speed(iter/s)": 0.41114 }, { "acc": 0.8313426, "epoch": 0.2920152708959467, "grad_norm": 14.970806121826172, "learning_rate": 9.985544278194097e-06, "loss": 0.86029377, "memory(GiB)": 34.88, "step": 10785, "train_speed(iter/s)": 0.411149 }, { "acc": 0.83426762, "epoch": 0.2921506511791623, "grad_norm": 13.573835372924805, "learning_rate": 9.985501725630011e-06, "loss": 0.84345198, "memory(GiB)": 34.88, "step": 10790, "train_speed(iter/s)": 0.41116 }, { "acc": 0.84822807, "epoch": 0.2922860314623778, "grad_norm": 6.152320384979248, "learning_rate": 9.98545911061898e-06, "loss": 0.72377768, "memory(GiB)": 34.88, "step": 10795, "train_speed(iter/s)": 0.411171 }, { "acc": 0.87727432, "epoch": 0.2924214117455934, "grad_norm": 8.049673080444336, "learning_rate": 9.985416433161543e-06, "loss": 0.60637331, "memory(GiB)": 34.88, "step": 10800, "train_speed(iter/s)": 0.411182 }, { "acc": 0.82211437, "epoch": 0.2925567920288089, "grad_norm": 8.48117733001709, "learning_rate": 9.985373693258233e-06, "loss": 0.91835003, "memory(GiB)": 34.88, "step": 10805, "train_speed(iter/s)": 0.411191 }, { "acc": 0.81153002, "epoch": 0.2926921723120245, "grad_norm": 15.186257362365723, "learning_rate": 9.985330890909582e-06, "loss": 0.98653917, "memory(GiB)": 34.88, "step": 10810, "train_speed(iter/s)": 0.4112 }, { "acc": 0.8214344, "epoch": 0.29282755259524, "grad_norm": 12.745603561401367, "learning_rate": 9.98528802611613e-06, "loss": 0.9304738, "memory(GiB)": 34.88, "step": 10815, "train_speed(iter/s)": 0.411211 }, { "acc": 0.82911568, "epoch": 0.2929629328784556, "grad_norm": 11.171011924743652, "learning_rate": 9.985245098878412e-06, "loss": 0.8925827, "memory(GiB)": 34.88, "step": 10820, "train_speed(iter/s)": 0.411221 }, { "acc": 0.86134071, "epoch": 0.2930983131616711, "grad_norm": 7.408243179321289, "learning_rate": 9.985202109196968e-06, "loss": 0.73309159, "memory(GiB)": 34.88, "step": 10825, "train_speed(iter/s)": 0.411232 }, { "acc": 0.83184242, "epoch": 0.2932336934448867, "grad_norm": 14.634317398071289, "learning_rate": 9.985159057072336e-06, "loss": 0.87022982, "memory(GiB)": 34.88, "step": 10830, "train_speed(iter/s)": 0.411242 }, { "acc": 0.85711288, "epoch": 0.2933690737281022, "grad_norm": 7.37150764465332, "learning_rate": 9.985115942505054e-06, "loss": 0.65554523, "memory(GiB)": 34.88, "step": 10835, "train_speed(iter/s)": 0.411253 }, { "acc": 0.83629026, "epoch": 0.2935044540113178, "grad_norm": 29.188030242919922, "learning_rate": 9.985072765495662e-06, "loss": 0.82054195, "memory(GiB)": 34.88, "step": 10840, "train_speed(iter/s)": 0.411262 }, { "acc": 0.85501232, "epoch": 0.29363983429453333, "grad_norm": 6.266002655029297, "learning_rate": 9.9850295260447e-06, "loss": 0.68681393, "memory(GiB)": 34.88, "step": 10845, "train_speed(iter/s)": 0.411272 }, { "acc": 0.840131, "epoch": 0.2937752145777489, "grad_norm": 10.238394737243652, "learning_rate": 9.984986224152713e-06, "loss": 0.84231396, "memory(GiB)": 34.88, "step": 10850, "train_speed(iter/s)": 0.411281 }, { "acc": 0.84230442, "epoch": 0.29391059486096444, "grad_norm": 11.460814476013184, "learning_rate": 9.984942859820241e-06, "loss": 0.77484512, "memory(GiB)": 34.88, "step": 10855, "train_speed(iter/s)": 0.411291 }, { "acc": 0.84387112, "epoch": 0.29404597514418, "grad_norm": 12.29874324798584, "learning_rate": 9.98489943304783e-06, "loss": 0.87437038, "memory(GiB)": 34.88, "step": 10860, "train_speed(iter/s)": 0.411302 }, { "acc": 0.852425, "epoch": 0.29418135542739554, "grad_norm": 13.1736421585083, "learning_rate": 9.984855943836018e-06, "loss": 0.76373692, "memory(GiB)": 34.88, "step": 10865, "train_speed(iter/s)": 0.41131 }, { "acc": 0.84549618, "epoch": 0.2943167357106111, "grad_norm": 4.459498405456543, "learning_rate": 9.984812392185354e-06, "loss": 0.76243043, "memory(GiB)": 34.88, "step": 10870, "train_speed(iter/s)": 0.411319 }, { "acc": 0.8364152, "epoch": 0.29445211599382665, "grad_norm": 22.34714126586914, "learning_rate": 9.984768778096385e-06, "loss": 0.85100002, "memory(GiB)": 34.88, "step": 10875, "train_speed(iter/s)": 0.411329 }, { "acc": 0.84469452, "epoch": 0.2945874962770422, "grad_norm": 18.913585662841797, "learning_rate": 9.984725101569655e-06, "loss": 0.69637213, "memory(GiB)": 34.88, "step": 10880, "train_speed(iter/s)": 0.41134 }, { "acc": 0.84963245, "epoch": 0.29472287656025775, "grad_norm": 24.075416564941406, "learning_rate": 9.984681362605712e-06, "loss": 0.71288638, "memory(GiB)": 34.88, "step": 10885, "train_speed(iter/s)": 0.41135 }, { "acc": 0.8428751, "epoch": 0.29485825684347333, "grad_norm": 8.083955764770508, "learning_rate": 9.984637561205103e-06, "loss": 0.80926151, "memory(GiB)": 34.88, "step": 10890, "train_speed(iter/s)": 0.411361 }, { "acc": 0.8551506, "epoch": 0.29499363712668886, "grad_norm": 14.503480911254883, "learning_rate": 9.984593697368377e-06, "loss": 0.75733824, "memory(GiB)": 34.88, "step": 10895, "train_speed(iter/s)": 0.41137 }, { "acc": 0.82352066, "epoch": 0.29512901740990444, "grad_norm": 7.326123237609863, "learning_rate": 9.984549771096084e-06, "loss": 0.88869677, "memory(GiB)": 34.88, "step": 10900, "train_speed(iter/s)": 0.41138 }, { "acc": 0.81716013, "epoch": 0.29526439769311996, "grad_norm": 15.20312786102295, "learning_rate": 9.984505782388774e-06, "loss": 1.00359268, "memory(GiB)": 34.88, "step": 10905, "train_speed(iter/s)": 0.411388 }, { "acc": 0.87366505, "epoch": 0.29539977797633554, "grad_norm": 15.806796073913574, "learning_rate": 9.984461731246998e-06, "loss": 0.69350791, "memory(GiB)": 34.88, "step": 10910, "train_speed(iter/s)": 0.411399 }, { "acc": 0.85212612, "epoch": 0.29553515825955107, "grad_norm": 5.613367557525635, "learning_rate": 9.984417617671308e-06, "loss": 0.65797844, "memory(GiB)": 34.88, "step": 10915, "train_speed(iter/s)": 0.411408 }, { "acc": 0.83216658, "epoch": 0.29567053854276665, "grad_norm": 10.320393562316895, "learning_rate": 9.984373441662255e-06, "loss": 0.74739213, "memory(GiB)": 34.88, "step": 10920, "train_speed(iter/s)": 0.411418 }, { "acc": 0.85520267, "epoch": 0.2958059188259822, "grad_norm": 10.71557331085205, "learning_rate": 9.984329203220397e-06, "loss": 0.78592772, "memory(GiB)": 34.88, "step": 10925, "train_speed(iter/s)": 0.411429 }, { "acc": 0.81831551, "epoch": 0.29594129910919775, "grad_norm": 9.456999778747559, "learning_rate": 9.984284902346282e-06, "loss": 1.00013447, "memory(GiB)": 34.88, "step": 10930, "train_speed(iter/s)": 0.411437 }, { "acc": 0.84820433, "epoch": 0.2960766793924133, "grad_norm": 6.4825334548950195, "learning_rate": 9.98424053904047e-06, "loss": 0.76253438, "memory(GiB)": 34.88, "step": 10935, "train_speed(iter/s)": 0.411448 }, { "acc": 0.82490368, "epoch": 0.29621205967562886, "grad_norm": 25.633150100708008, "learning_rate": 9.984196113303512e-06, "loss": 0.88380022, "memory(GiB)": 34.88, "step": 10940, "train_speed(iter/s)": 0.411458 }, { "acc": 0.86659584, "epoch": 0.2963474399588444, "grad_norm": 7.805182933807373, "learning_rate": 9.98415162513597e-06, "loss": 0.67523065, "memory(GiB)": 34.88, "step": 10945, "train_speed(iter/s)": 0.411468 }, { "acc": 0.84754782, "epoch": 0.29648282024205996, "grad_norm": 7.796731472015381, "learning_rate": 9.984107074538397e-06, "loss": 0.84014874, "memory(GiB)": 34.88, "step": 10950, "train_speed(iter/s)": 0.411478 }, { "acc": 0.82949457, "epoch": 0.2966182005252755, "grad_norm": 10.504949569702148, "learning_rate": 9.984062461511354e-06, "loss": 0.86284409, "memory(GiB)": 34.88, "step": 10955, "train_speed(iter/s)": 0.411488 }, { "acc": 0.85627775, "epoch": 0.29675358080849107, "grad_norm": 8.582497596740723, "learning_rate": 9.984017786055397e-06, "loss": 0.69821191, "memory(GiB)": 34.88, "step": 10960, "train_speed(iter/s)": 0.411497 }, { "acc": 0.85152788, "epoch": 0.2968889610917066, "grad_norm": 9.023905754089355, "learning_rate": 9.983973048171088e-06, "loss": 0.73567176, "memory(GiB)": 34.88, "step": 10965, "train_speed(iter/s)": 0.411508 }, { "acc": 0.85708742, "epoch": 0.2970243413749222, "grad_norm": 75.80860900878906, "learning_rate": 9.983928247858985e-06, "loss": 0.71110387, "memory(GiB)": 34.88, "step": 10970, "train_speed(iter/s)": 0.411517 }, { "acc": 0.83567066, "epoch": 0.2971597216581377, "grad_norm": 7.268929481506348, "learning_rate": 9.983883385119653e-06, "loss": 0.84074402, "memory(GiB)": 34.88, "step": 10975, "train_speed(iter/s)": 0.411526 }, { "acc": 0.83827038, "epoch": 0.2972951019413533, "grad_norm": 21.953771591186523, "learning_rate": 9.98383845995365e-06, "loss": 0.88581085, "memory(GiB)": 34.88, "step": 10980, "train_speed(iter/s)": 0.411537 }, { "acc": 0.85521488, "epoch": 0.2974304822245688, "grad_norm": 11.062788963317871, "learning_rate": 9.983793472361543e-06, "loss": 0.81296501, "memory(GiB)": 34.88, "step": 10985, "train_speed(iter/s)": 0.411547 }, { "acc": 0.85781908, "epoch": 0.2975658625077844, "grad_norm": 15.268569946289062, "learning_rate": 9.98374842234389e-06, "loss": 0.73075109, "memory(GiB)": 34.88, "step": 10990, "train_speed(iter/s)": 0.411557 }, { "acc": 0.86035042, "epoch": 0.2977012427909999, "grad_norm": 6.61451530456543, "learning_rate": 9.983703309901262e-06, "loss": 0.64058819, "memory(GiB)": 34.88, "step": 10995, "train_speed(iter/s)": 0.411566 }, { "acc": 0.82892323, "epoch": 0.2978366230742155, "grad_norm": 6.724303245544434, "learning_rate": 9.983658135034219e-06, "loss": 0.86285295, "memory(GiB)": 34.88, "step": 11000, "train_speed(iter/s)": 0.411576 }, { "acc": 0.84821339, "epoch": 0.297972003357431, "grad_norm": 14.58345890045166, "learning_rate": 9.983612897743327e-06, "loss": 0.69557395, "memory(GiB)": 34.88, "step": 11005, "train_speed(iter/s)": 0.411587 }, { "acc": 0.83026447, "epoch": 0.2981073836406466, "grad_norm": 6.034548282623291, "learning_rate": 9.983567598029156e-06, "loss": 0.76060262, "memory(GiB)": 34.88, "step": 11010, "train_speed(iter/s)": 0.411597 }, { "acc": 0.80726948, "epoch": 0.2982427639238621, "grad_norm": 17.598514556884766, "learning_rate": 9.983522235892271e-06, "loss": 0.98199425, "memory(GiB)": 34.88, "step": 11015, "train_speed(iter/s)": 0.411607 }, { "acc": 0.87072144, "epoch": 0.2983781442070777, "grad_norm": 6.062009334564209, "learning_rate": 9.98347681133324e-06, "loss": 0.58985901, "memory(GiB)": 34.88, "step": 11020, "train_speed(iter/s)": 0.411616 }, { "acc": 0.86369076, "epoch": 0.2985135244902932, "grad_norm": 7.649457931518555, "learning_rate": 9.983431324352634e-06, "loss": 0.70751629, "memory(GiB)": 34.88, "step": 11025, "train_speed(iter/s)": 0.411626 }, { "acc": 0.82174416, "epoch": 0.2986489047735088, "grad_norm": 5.956027030944824, "learning_rate": 9.98338577495102e-06, "loss": 0.93261576, "memory(GiB)": 34.88, "step": 11030, "train_speed(iter/s)": 0.411635 }, { "acc": 0.85493231, "epoch": 0.29878428505672433, "grad_norm": 4.065944194793701, "learning_rate": 9.983340163128972e-06, "loss": 0.7479291, "memory(GiB)": 34.88, "step": 11035, "train_speed(iter/s)": 0.411645 }, { "acc": 0.86949539, "epoch": 0.2989196653399399, "grad_norm": 8.179756164550781, "learning_rate": 9.98329448888706e-06, "loss": 0.67950182, "memory(GiB)": 34.88, "step": 11040, "train_speed(iter/s)": 0.411655 }, { "acc": 0.82747688, "epoch": 0.29905504562315544, "grad_norm": 24.86500358581543, "learning_rate": 9.983248752225856e-06, "loss": 0.85432434, "memory(GiB)": 34.88, "step": 11045, "train_speed(iter/s)": 0.411665 }, { "acc": 0.85241947, "epoch": 0.299190425906371, "grad_norm": 5.877401828765869, "learning_rate": 9.983202953145931e-06, "loss": 0.74072552, "memory(GiB)": 34.88, "step": 11050, "train_speed(iter/s)": 0.411675 }, { "acc": 0.88195152, "epoch": 0.29932580618958654, "grad_norm": 12.414798736572266, "learning_rate": 9.983157091647862e-06, "loss": 0.50322628, "memory(GiB)": 34.88, "step": 11055, "train_speed(iter/s)": 0.411685 }, { "acc": 0.85578012, "epoch": 0.2994611864728021, "grad_norm": 9.082847595214844, "learning_rate": 9.98311116773222e-06, "loss": 0.73979392, "memory(GiB)": 34.88, "step": 11060, "train_speed(iter/s)": 0.411694 }, { "acc": 0.84282475, "epoch": 0.29959656675601765, "grad_norm": 12.609400749206543, "learning_rate": 9.983065181399584e-06, "loss": 0.77705541, "memory(GiB)": 34.88, "step": 11065, "train_speed(iter/s)": 0.411704 }, { "acc": 0.84372034, "epoch": 0.2997319470392332, "grad_norm": 9.054217338562012, "learning_rate": 9.98301913265053e-06, "loss": 0.82140827, "memory(GiB)": 34.88, "step": 11070, "train_speed(iter/s)": 0.411714 }, { "acc": 0.82481956, "epoch": 0.29986732732244875, "grad_norm": 6.107285499572754, "learning_rate": 9.98297302148563e-06, "loss": 0.90961256, "memory(GiB)": 34.88, "step": 11075, "train_speed(iter/s)": 0.411723 }, { "acc": 0.82678528, "epoch": 0.30000270760566433, "grad_norm": 9.878252029418945, "learning_rate": 9.982926847905466e-06, "loss": 0.75938306, "memory(GiB)": 34.88, "step": 11080, "train_speed(iter/s)": 0.411734 }, { "acc": 0.8420373, "epoch": 0.30013808788887986, "grad_norm": 32.8321647644043, "learning_rate": 9.982880611910617e-06, "loss": 0.82845364, "memory(GiB)": 34.88, "step": 11085, "train_speed(iter/s)": 0.411743 }, { "acc": 0.83866959, "epoch": 0.30027346817209544, "grad_norm": 6.606157302856445, "learning_rate": 9.982834313501658e-06, "loss": 0.75809021, "memory(GiB)": 34.88, "step": 11090, "train_speed(iter/s)": 0.411754 }, { "acc": 0.85169334, "epoch": 0.30040884845531096, "grad_norm": 7.830959796905518, "learning_rate": 9.982787952679174e-06, "loss": 0.79781289, "memory(GiB)": 34.88, "step": 11095, "train_speed(iter/s)": 0.411762 }, { "acc": 0.84675446, "epoch": 0.30054422873852654, "grad_norm": 5.820547103881836, "learning_rate": 9.98274152944374e-06, "loss": 0.80563755, "memory(GiB)": 34.88, "step": 11100, "train_speed(iter/s)": 0.411772 }, { "acc": 0.85253086, "epoch": 0.30067960902174207, "grad_norm": 5.748239517211914, "learning_rate": 9.982695043795943e-06, "loss": 0.67995872, "memory(GiB)": 34.88, "step": 11105, "train_speed(iter/s)": 0.41178 }, { "acc": 0.83103418, "epoch": 0.30081498930495765, "grad_norm": 7.6017069816589355, "learning_rate": 9.982648495736364e-06, "loss": 0.91939344, "memory(GiB)": 34.88, "step": 11110, "train_speed(iter/s)": 0.41179 }, { "acc": 0.83015423, "epoch": 0.30095036958817317, "grad_norm": 16.39286231994629, "learning_rate": 9.982601885265583e-06, "loss": 0.82002926, "memory(GiB)": 34.88, "step": 11115, "train_speed(iter/s)": 0.411799 }, { "acc": 0.852561, "epoch": 0.30108574987138875, "grad_norm": 5.281936168670654, "learning_rate": 9.982555212384188e-06, "loss": 0.75237236, "memory(GiB)": 34.88, "step": 11120, "train_speed(iter/s)": 0.41181 }, { "acc": 0.84866676, "epoch": 0.3012211301546043, "grad_norm": 26.359024047851562, "learning_rate": 9.982508477092758e-06, "loss": 0.79883833, "memory(GiB)": 34.88, "step": 11125, "train_speed(iter/s)": 0.41182 }, { "acc": 0.85737991, "epoch": 0.30135651043781986, "grad_norm": 16.794883728027344, "learning_rate": 9.982461679391886e-06, "loss": 0.71563764, "memory(GiB)": 34.88, "step": 11130, "train_speed(iter/s)": 0.411829 }, { "acc": 0.85721865, "epoch": 0.3014918907210354, "grad_norm": 9.66382122039795, "learning_rate": 9.982414819282153e-06, "loss": 0.61863003, "memory(GiB)": 34.88, "step": 11135, "train_speed(iter/s)": 0.411839 }, { "acc": 0.84599304, "epoch": 0.30162727100425096, "grad_norm": 8.145236015319824, "learning_rate": 9.982367896764148e-06, "loss": 0.79736891, "memory(GiB)": 34.88, "step": 11140, "train_speed(iter/s)": 0.411848 }, { "acc": 0.8425992, "epoch": 0.3017626512874665, "grad_norm": 10.734066009521484, "learning_rate": 9.982320911838456e-06, "loss": 0.88532162, "memory(GiB)": 34.88, "step": 11145, "train_speed(iter/s)": 0.411859 }, { "acc": 0.84162645, "epoch": 0.30189803157068207, "grad_norm": 10.017556190490723, "learning_rate": 9.98227386450567e-06, "loss": 0.79597859, "memory(GiB)": 34.88, "step": 11150, "train_speed(iter/s)": 0.411868 }, { "acc": 0.87050514, "epoch": 0.3020334118538976, "grad_norm": 7.584836006164551, "learning_rate": 9.982226754766377e-06, "loss": 0.62387991, "memory(GiB)": 34.88, "step": 11155, "train_speed(iter/s)": 0.411877 }, { "acc": 0.86332359, "epoch": 0.3021687921371132, "grad_norm": 9.363042831420898, "learning_rate": 9.982179582621167e-06, "loss": 0.7158289, "memory(GiB)": 34.88, "step": 11160, "train_speed(iter/s)": 0.411887 }, { "acc": 0.85992336, "epoch": 0.3023041724203287, "grad_norm": 7.653534412384033, "learning_rate": 9.982132348070634e-06, "loss": 0.71043816, "memory(GiB)": 34.88, "step": 11165, "train_speed(iter/s)": 0.411897 }, { "acc": 0.84198341, "epoch": 0.3024395527035443, "grad_norm": 10.147193908691406, "learning_rate": 9.982085051115363e-06, "loss": 0.81119232, "memory(GiB)": 34.88, "step": 11170, "train_speed(iter/s)": 0.411906 }, { "acc": 0.84226398, "epoch": 0.3025749329867598, "grad_norm": 16.68094253540039, "learning_rate": 9.982037691755953e-06, "loss": 0.79146109, "memory(GiB)": 34.88, "step": 11175, "train_speed(iter/s)": 0.411917 }, { "acc": 0.86755581, "epoch": 0.3027103132699754, "grad_norm": 6.24618673324585, "learning_rate": 9.981990269992992e-06, "loss": 0.67794819, "memory(GiB)": 34.88, "step": 11180, "train_speed(iter/s)": 0.411927 }, { "acc": 0.85021591, "epoch": 0.3028456935531909, "grad_norm": 5.7481513023376465, "learning_rate": 9.981942785827078e-06, "loss": 0.8125226, "memory(GiB)": 34.88, "step": 11185, "train_speed(iter/s)": 0.411936 }, { "acc": 0.85017128, "epoch": 0.3029810738364065, "grad_norm": 7.638333320617676, "learning_rate": 9.981895239258806e-06, "loss": 0.77550035, "memory(GiB)": 34.88, "step": 11190, "train_speed(iter/s)": 0.411946 }, { "acc": 0.8746664, "epoch": 0.303116454119622, "grad_norm": 7.006137371063232, "learning_rate": 9.98184763028877e-06, "loss": 0.61187325, "memory(GiB)": 34.88, "step": 11195, "train_speed(iter/s)": 0.411956 }, { "acc": 0.82916641, "epoch": 0.3032518344028376, "grad_norm": 4.189087867736816, "learning_rate": 9.981799958917565e-06, "loss": 0.8473568, "memory(GiB)": 34.88, "step": 11200, "train_speed(iter/s)": 0.411963 }, { "acc": 0.84378843, "epoch": 0.3033872146860531, "grad_norm": 15.583980560302734, "learning_rate": 9.98175222514579e-06, "loss": 0.75519872, "memory(GiB)": 34.88, "step": 11205, "train_speed(iter/s)": 0.411972 }, { "acc": 0.8168438, "epoch": 0.3035225949692687, "grad_norm": 20.410924911499023, "learning_rate": 9.981704428974043e-06, "loss": 0.92002287, "memory(GiB)": 34.88, "step": 11210, "train_speed(iter/s)": 0.411983 }, { "acc": 0.8286705, "epoch": 0.3036579752524842, "grad_norm": 12.523185729980469, "learning_rate": 9.981656570402923e-06, "loss": 0.83376436, "memory(GiB)": 34.88, "step": 11215, "train_speed(iter/s)": 0.411991 }, { "acc": 0.85500412, "epoch": 0.3037933555356998, "grad_norm": 8.804986953735352, "learning_rate": 9.981608649433028e-06, "loss": 0.7264957, "memory(GiB)": 34.88, "step": 11220, "train_speed(iter/s)": 0.412001 }, { "acc": 0.84395971, "epoch": 0.30392873581891533, "grad_norm": 8.569748878479004, "learning_rate": 9.98156066606496e-06, "loss": 0.76800685, "memory(GiB)": 34.88, "step": 11225, "train_speed(iter/s)": 0.412011 }, { "acc": 0.86488686, "epoch": 0.3040641161021309, "grad_norm": 17.53213119506836, "learning_rate": 9.98151262029932e-06, "loss": 0.65735807, "memory(GiB)": 34.88, "step": 11230, "train_speed(iter/s)": 0.412021 }, { "acc": 0.83652582, "epoch": 0.30419949638534644, "grad_norm": 8.050326347351074, "learning_rate": 9.981464512136709e-06, "loss": 0.86979666, "memory(GiB)": 34.88, "step": 11235, "train_speed(iter/s)": 0.412028 }, { "acc": 0.86707096, "epoch": 0.304334876668562, "grad_norm": 5.362160682678223, "learning_rate": 9.98141634157773e-06, "loss": 0.6153388, "memory(GiB)": 34.88, "step": 11240, "train_speed(iter/s)": 0.412038 }, { "acc": 0.83184948, "epoch": 0.30447025695177754, "grad_norm": 10.68491268157959, "learning_rate": 9.981368108622985e-06, "loss": 0.86453896, "memory(GiB)": 34.88, "step": 11245, "train_speed(iter/s)": 0.412048 }, { "acc": 0.85443869, "epoch": 0.3046056372349931, "grad_norm": 9.190485000610352, "learning_rate": 9.981319813273081e-06, "loss": 0.63289652, "memory(GiB)": 34.88, "step": 11250, "train_speed(iter/s)": 0.412057 }, { "acc": 0.84251728, "epoch": 0.30474101751820865, "grad_norm": 6.084734916687012, "learning_rate": 9.981271455528621e-06, "loss": 0.77846203, "memory(GiB)": 34.88, "step": 11255, "train_speed(iter/s)": 0.412066 }, { "acc": 0.83646908, "epoch": 0.3048763978014242, "grad_norm": 14.599442481994629, "learning_rate": 9.981223035390212e-06, "loss": 0.85368624, "memory(GiB)": 34.88, "step": 11260, "train_speed(iter/s)": 0.412076 }, { "acc": 0.84703465, "epoch": 0.30501177808463975, "grad_norm": 14.350021362304688, "learning_rate": 9.981174552858458e-06, "loss": 0.76209145, "memory(GiB)": 34.88, "step": 11265, "train_speed(iter/s)": 0.412086 }, { "acc": 0.82341728, "epoch": 0.30514715836785533, "grad_norm": 11.695759773254395, "learning_rate": 9.98112600793397e-06, "loss": 1.01764631, "memory(GiB)": 34.88, "step": 11270, "train_speed(iter/s)": 0.412095 }, { "acc": 0.84491806, "epoch": 0.30528253865107086, "grad_norm": 8.039559364318848, "learning_rate": 9.981077400617353e-06, "loss": 0.69347858, "memory(GiB)": 34.88, "step": 11275, "train_speed(iter/s)": 0.412101 }, { "acc": 0.83168087, "epoch": 0.30541791893428644, "grad_norm": 17.821189880371094, "learning_rate": 9.981028730909218e-06, "loss": 0.92690811, "memory(GiB)": 34.88, "step": 11280, "train_speed(iter/s)": 0.412108 }, { "acc": 0.83963871, "epoch": 0.30555329921750196, "grad_norm": 12.01355266571045, "learning_rate": 9.980979998810175e-06, "loss": 0.8141489, "memory(GiB)": 34.88, "step": 11285, "train_speed(iter/s)": 0.412116 }, { "acc": 0.81772423, "epoch": 0.30568867950071754, "grad_norm": 7.529498100280762, "learning_rate": 9.980931204320831e-06, "loss": 0.96186028, "memory(GiB)": 34.88, "step": 11290, "train_speed(iter/s)": 0.41212 }, { "acc": 0.81548977, "epoch": 0.30582405978393307, "grad_norm": 11.33802318572998, "learning_rate": 9.980882347441802e-06, "loss": 1.00581398, "memory(GiB)": 34.88, "step": 11295, "train_speed(iter/s)": 0.412127 }, { "acc": 0.86643782, "epoch": 0.30595944006714865, "grad_norm": 7.091865062713623, "learning_rate": 9.980833428173696e-06, "loss": 0.6366015, "memory(GiB)": 34.88, "step": 11300, "train_speed(iter/s)": 0.412135 }, { "acc": 0.8378273, "epoch": 0.30609482035036417, "grad_norm": 25.838964462280273, "learning_rate": 9.980784446517129e-06, "loss": 0.80046768, "memory(GiB)": 34.88, "step": 11305, "train_speed(iter/s)": 0.41214 }, { "acc": 0.84136066, "epoch": 0.30623020063357975, "grad_norm": 8.195072174072266, "learning_rate": 9.980735402472712e-06, "loss": 0.7987515, "memory(GiB)": 34.88, "step": 11310, "train_speed(iter/s)": 0.412144 }, { "acc": 0.85485268, "epoch": 0.3063655809167953, "grad_norm": 9.797872543334961, "learning_rate": 9.980686296041062e-06, "loss": 0.75475969, "memory(GiB)": 34.88, "step": 11315, "train_speed(iter/s)": 0.412149 }, { "acc": 0.83380413, "epoch": 0.30650096120001086, "grad_norm": 16.32807731628418, "learning_rate": 9.980637127222789e-06, "loss": 0.79835606, "memory(GiB)": 34.88, "step": 11320, "train_speed(iter/s)": 0.412155 }, { "acc": 0.82896309, "epoch": 0.3066363414832264, "grad_norm": 7.4927191734313965, "learning_rate": 9.980587896018516e-06, "loss": 0.76681199, "memory(GiB)": 34.88, "step": 11325, "train_speed(iter/s)": 0.412158 }, { "acc": 0.84556923, "epoch": 0.30677172176644196, "grad_norm": 9.25534725189209, "learning_rate": 9.980538602428854e-06, "loss": 0.80495634, "memory(GiB)": 34.88, "step": 11330, "train_speed(iter/s)": 0.41216 }, { "acc": 0.87106943, "epoch": 0.3069071020496575, "grad_norm": 8.055158615112305, "learning_rate": 9.980489246454424e-06, "loss": 0.58879433, "memory(GiB)": 34.88, "step": 11335, "train_speed(iter/s)": 0.412165 }, { "acc": 0.850033, "epoch": 0.30704248233287307, "grad_norm": 7.989402770996094, "learning_rate": 9.980439828095842e-06, "loss": 0.7429204, "memory(GiB)": 34.88, "step": 11340, "train_speed(iter/s)": 0.412172 }, { "acc": 0.82380657, "epoch": 0.3071778626160886, "grad_norm": 8.241498947143555, "learning_rate": 9.98039034735373e-06, "loss": 0.97116737, "memory(GiB)": 34.88, "step": 11345, "train_speed(iter/s)": 0.412182 }, { "acc": 0.84614973, "epoch": 0.3073132428993041, "grad_norm": 12.346386909484863, "learning_rate": 9.980340804228704e-06, "loss": 0.83841066, "memory(GiB)": 34.88, "step": 11350, "train_speed(iter/s)": 0.412191 }, { "acc": 0.86559715, "epoch": 0.3074486231825197, "grad_norm": 6.36625862121582, "learning_rate": 9.980291198721389e-06, "loss": 0.59056706, "memory(GiB)": 34.88, "step": 11355, "train_speed(iter/s)": 0.412198 }, { "acc": 0.85721149, "epoch": 0.3075840034657352, "grad_norm": 5.488839626312256, "learning_rate": 9.980241530832402e-06, "loss": 0.60512934, "memory(GiB)": 34.88, "step": 11360, "train_speed(iter/s)": 0.412206 }, { "acc": 0.83742466, "epoch": 0.3077193837489508, "grad_norm": 9.94983196258545, "learning_rate": 9.980191800562367e-06, "loss": 0.89851809, "memory(GiB)": 34.88, "step": 11365, "train_speed(iter/s)": 0.412215 }, { "acc": 0.85002632, "epoch": 0.30785476403216633, "grad_norm": 11.006830215454102, "learning_rate": 9.980142007911905e-06, "loss": 0.77725949, "memory(GiB)": 34.88, "step": 11370, "train_speed(iter/s)": 0.412225 }, { "acc": 0.85775642, "epoch": 0.3079901443153819, "grad_norm": 14.995018005371094, "learning_rate": 9.980092152881647e-06, "loss": 0.63668585, "memory(GiB)": 34.88, "step": 11375, "train_speed(iter/s)": 0.412233 }, { "acc": 0.81982727, "epoch": 0.30812552459859743, "grad_norm": 30.748525619506836, "learning_rate": 9.980042235472207e-06, "loss": 0.91083946, "memory(GiB)": 34.88, "step": 11380, "train_speed(iter/s)": 0.412242 }, { "acc": 0.83941879, "epoch": 0.308260904881813, "grad_norm": 7.267021656036377, "learning_rate": 9.979992255684221e-06, "loss": 0.76963215, "memory(GiB)": 34.88, "step": 11385, "train_speed(iter/s)": 0.41225 }, { "acc": 0.81909924, "epoch": 0.30839628516502854, "grad_norm": 7.5313215255737305, "learning_rate": 9.979942213518306e-06, "loss": 0.91481838, "memory(GiB)": 34.88, "step": 11390, "train_speed(iter/s)": 0.41226 }, { "acc": 0.86949959, "epoch": 0.3085316654482441, "grad_norm": 8.691401481628418, "learning_rate": 9.979892108975096e-06, "loss": 0.57060652, "memory(GiB)": 34.88, "step": 11395, "train_speed(iter/s)": 0.41227 }, { "acc": 0.8342205, "epoch": 0.30866704573145964, "grad_norm": 7.06003475189209, "learning_rate": 9.979841942055213e-06, "loss": 0.95045462, "memory(GiB)": 34.88, "step": 11400, "train_speed(iter/s)": 0.41228 }, { "acc": 0.84717541, "epoch": 0.3088024260146752, "grad_norm": 8.958600997924805, "learning_rate": 9.979791712759288e-06, "loss": 0.72762165, "memory(GiB)": 34.88, "step": 11405, "train_speed(iter/s)": 0.41229 }, { "acc": 0.83136196, "epoch": 0.30893780629789075, "grad_norm": 11.058818817138672, "learning_rate": 9.97974142108795e-06, "loss": 0.82450771, "memory(GiB)": 34.88, "step": 11410, "train_speed(iter/s)": 0.412298 }, { "acc": 0.82879143, "epoch": 0.30907318658110633, "grad_norm": 9.068517684936523, "learning_rate": 9.979691067041828e-06, "loss": 0.81514053, "memory(GiB)": 34.88, "step": 11415, "train_speed(iter/s)": 0.412308 }, { "acc": 0.81161537, "epoch": 0.30920856686432185, "grad_norm": 7.18001127243042, "learning_rate": 9.979640650621558e-06, "loss": 1.01318607, "memory(GiB)": 34.88, "step": 11420, "train_speed(iter/s)": 0.412317 }, { "acc": 0.84290142, "epoch": 0.30934394714753743, "grad_norm": 4.326175212860107, "learning_rate": 9.979590171827763e-06, "loss": 0.76483827, "memory(GiB)": 34.88, "step": 11425, "train_speed(iter/s)": 0.412325 }, { "acc": 0.84057102, "epoch": 0.30947932743075296, "grad_norm": 4.994710922241211, "learning_rate": 9.979539630661081e-06, "loss": 0.83047848, "memory(GiB)": 34.88, "step": 11430, "train_speed(iter/s)": 0.412335 }, { "acc": 0.81699047, "epoch": 0.30961470771396854, "grad_norm": 7.608227252960205, "learning_rate": 9.979489027122144e-06, "loss": 0.91500568, "memory(GiB)": 34.88, "step": 11435, "train_speed(iter/s)": 0.412344 }, { "acc": 0.85648069, "epoch": 0.30975008799718406, "grad_norm": 7.47374153137207, "learning_rate": 9.979438361211585e-06, "loss": 0.7071929, "memory(GiB)": 34.88, "step": 11440, "train_speed(iter/s)": 0.412352 }, { "acc": 0.84143906, "epoch": 0.30988546828039965, "grad_norm": 9.883268356323242, "learning_rate": 9.97938763293004e-06, "loss": 0.73337288, "memory(GiB)": 34.88, "step": 11445, "train_speed(iter/s)": 0.412361 }, { "acc": 0.85114374, "epoch": 0.31002084856361517, "grad_norm": 12.073039054870605, "learning_rate": 9.979336842278144e-06, "loss": 0.79483867, "memory(GiB)": 34.88, "step": 11450, "train_speed(iter/s)": 0.41237 }, { "acc": 0.82249708, "epoch": 0.31015622884683075, "grad_norm": 6.805473327636719, "learning_rate": 9.979285989256533e-06, "loss": 0.8964118, "memory(GiB)": 34.88, "step": 11455, "train_speed(iter/s)": 0.412378 }, { "acc": 0.85016947, "epoch": 0.3102916091300463, "grad_norm": 8.887206077575684, "learning_rate": 9.979235073865845e-06, "loss": 0.81331148, "memory(GiB)": 34.88, "step": 11460, "train_speed(iter/s)": 0.412388 }, { "acc": 0.8575841, "epoch": 0.31042698941326186, "grad_norm": 10.18922233581543, "learning_rate": 9.979184096106715e-06, "loss": 0.7022192, "memory(GiB)": 34.88, "step": 11465, "train_speed(iter/s)": 0.412397 }, { "acc": 0.84684563, "epoch": 0.3105623696964774, "grad_norm": 5.763382911682129, "learning_rate": 9.979133055979787e-06, "loss": 0.75107069, "memory(GiB)": 34.88, "step": 11470, "train_speed(iter/s)": 0.412405 }, { "acc": 0.84238005, "epoch": 0.31069774997969296, "grad_norm": 10.981940269470215, "learning_rate": 9.979081953485694e-06, "loss": 0.84665756, "memory(GiB)": 34.88, "step": 11475, "train_speed(iter/s)": 0.412414 }, { "acc": 0.83440208, "epoch": 0.3108331302629085, "grad_norm": 8.47048568725586, "learning_rate": 9.97903078862508e-06, "loss": 0.81189651, "memory(GiB)": 34.88, "step": 11480, "train_speed(iter/s)": 0.412422 }, { "acc": 0.85718307, "epoch": 0.31096851054612407, "grad_norm": 4.845151424407959, "learning_rate": 9.978979561398584e-06, "loss": 0.74096432, "memory(GiB)": 34.88, "step": 11485, "train_speed(iter/s)": 0.41243 }, { "acc": 0.8519804, "epoch": 0.3111038908293396, "grad_norm": 7.817569255828857, "learning_rate": 9.97892827180685e-06, "loss": 0.74496131, "memory(GiB)": 34.88, "step": 11490, "train_speed(iter/s)": 0.41244 }, { "acc": 0.83702183, "epoch": 0.31123927111255517, "grad_norm": 69.36688995361328, "learning_rate": 9.978876919850521e-06, "loss": 0.76586084, "memory(GiB)": 34.88, "step": 11495, "train_speed(iter/s)": 0.41245 }, { "acc": 0.84186602, "epoch": 0.3113746513957707, "grad_norm": 16.498708724975586, "learning_rate": 9.978825505530236e-06, "loss": 0.73791142, "memory(GiB)": 34.88, "step": 11500, "train_speed(iter/s)": 0.412459 }, { "acc": 0.85632725, "epoch": 0.3115100316789863, "grad_norm": 8.888365745544434, "learning_rate": 9.978774028846641e-06, "loss": 0.6998414, "memory(GiB)": 34.88, "step": 11505, "train_speed(iter/s)": 0.412468 }, { "acc": 0.87460022, "epoch": 0.3116454119622018, "grad_norm": 8.24666690826416, "learning_rate": 9.978722489800385e-06, "loss": 0.65148826, "memory(GiB)": 34.88, "step": 11510, "train_speed(iter/s)": 0.412477 }, { "acc": 0.85156136, "epoch": 0.3117807922454174, "grad_norm": 14.113048553466797, "learning_rate": 9.978670888392109e-06, "loss": 0.81987019, "memory(GiB)": 34.88, "step": 11515, "train_speed(iter/s)": 0.412486 }, { "acc": 0.83123341, "epoch": 0.3119161725286329, "grad_norm": 12.033549308776855, "learning_rate": 9.978619224622458e-06, "loss": 0.82887621, "memory(GiB)": 34.88, "step": 11520, "train_speed(iter/s)": 0.412495 }, { "acc": 0.85806465, "epoch": 0.3120515528118485, "grad_norm": 6.1351776123046875, "learning_rate": 9.978567498492084e-06, "loss": 0.58938179, "memory(GiB)": 34.88, "step": 11525, "train_speed(iter/s)": 0.412504 }, { "acc": 0.84655476, "epoch": 0.312186933095064, "grad_norm": 8.303923606872559, "learning_rate": 9.978515710001631e-06, "loss": 0.83919296, "memory(GiB)": 34.88, "step": 11530, "train_speed(iter/s)": 0.412513 }, { "acc": 0.86689758, "epoch": 0.3123223133782796, "grad_norm": 8.384225845336914, "learning_rate": 9.97846385915175e-06, "loss": 0.62126999, "memory(GiB)": 34.88, "step": 11535, "train_speed(iter/s)": 0.412522 }, { "acc": 0.82377224, "epoch": 0.3124576936614951, "grad_norm": 9.498910903930664, "learning_rate": 9.978411945943091e-06, "loss": 0.83133392, "memory(GiB)": 34.88, "step": 11540, "train_speed(iter/s)": 0.412531 }, { "acc": 0.82187233, "epoch": 0.3125930739447107, "grad_norm": 6.200134754180908, "learning_rate": 9.978359970376305e-06, "loss": 0.88863354, "memory(GiB)": 34.88, "step": 11545, "train_speed(iter/s)": 0.412539 }, { "acc": 0.85501499, "epoch": 0.3127284542279262, "grad_norm": 9.029380798339844, "learning_rate": 9.978307932452037e-06, "loss": 0.68037252, "memory(GiB)": 34.88, "step": 11550, "train_speed(iter/s)": 0.412549 }, { "acc": 0.81798544, "epoch": 0.3128638345111418, "grad_norm": 16.00704002380371, "learning_rate": 9.978255832170945e-06, "loss": 0.92340775, "memory(GiB)": 34.88, "step": 11555, "train_speed(iter/s)": 0.412559 }, { "acc": 0.86161594, "epoch": 0.3129992147943573, "grad_norm": 12.710039138793945, "learning_rate": 9.978203669533682e-06, "loss": 0.81279669, "memory(GiB)": 34.88, "step": 11560, "train_speed(iter/s)": 0.412568 }, { "acc": 0.83115559, "epoch": 0.3131345950775729, "grad_norm": 5.863936901092529, "learning_rate": 9.978151444540897e-06, "loss": 0.82746811, "memory(GiB)": 34.88, "step": 11565, "train_speed(iter/s)": 0.412577 }, { "acc": 0.85452557, "epoch": 0.31326997536078843, "grad_norm": 15.166165351867676, "learning_rate": 9.978099157193248e-06, "loss": 0.73715425, "memory(GiB)": 34.88, "step": 11570, "train_speed(iter/s)": 0.412587 }, { "acc": 0.83775816, "epoch": 0.313405355644004, "grad_norm": 23.058012008666992, "learning_rate": 9.978046807491387e-06, "loss": 0.83030643, "memory(GiB)": 34.88, "step": 11575, "train_speed(iter/s)": 0.412596 }, { "acc": 0.85927677, "epoch": 0.31354073592721954, "grad_norm": 11.704646110534668, "learning_rate": 9.977994395435973e-06, "loss": 0.71711564, "memory(GiB)": 34.88, "step": 11580, "train_speed(iter/s)": 0.412606 }, { "acc": 0.86789742, "epoch": 0.3136761162104351, "grad_norm": 22.95638084411621, "learning_rate": 9.977941921027659e-06, "loss": 0.68677349, "memory(GiB)": 34.88, "step": 11585, "train_speed(iter/s)": 0.412615 }, { "acc": 0.86734161, "epoch": 0.31381149649365064, "grad_norm": 6.501792907714844, "learning_rate": 9.977889384267106e-06, "loss": 0.65705042, "memory(GiB)": 34.88, "step": 11590, "train_speed(iter/s)": 0.412624 }, { "acc": 0.83186502, "epoch": 0.3139468767768662, "grad_norm": 8.68641185760498, "learning_rate": 9.97783678515497e-06, "loss": 0.77417288, "memory(GiB)": 34.88, "step": 11595, "train_speed(iter/s)": 0.412634 }, { "acc": 0.84610081, "epoch": 0.31408225706008175, "grad_norm": 12.103214263916016, "learning_rate": 9.97778412369191e-06, "loss": 0.70459137, "memory(GiB)": 34.88, "step": 11600, "train_speed(iter/s)": 0.412643 }, { "acc": 0.86287298, "epoch": 0.31421763734329733, "grad_norm": 6.179566383361816, "learning_rate": 9.977731399878587e-06, "loss": 0.61539078, "memory(GiB)": 34.88, "step": 11605, "train_speed(iter/s)": 0.412651 }, { "acc": 0.82943363, "epoch": 0.31435301762651285, "grad_norm": 20.47307586669922, "learning_rate": 9.97767861371566e-06, "loss": 0.84382753, "memory(GiB)": 34.88, "step": 11610, "train_speed(iter/s)": 0.412658 }, { "acc": 0.86589661, "epoch": 0.31448839790972843, "grad_norm": 6.44058084487915, "learning_rate": 9.97762576520379e-06, "loss": 0.6750874, "memory(GiB)": 34.88, "step": 11615, "train_speed(iter/s)": 0.412668 }, { "acc": 0.86165028, "epoch": 0.31462377819294396, "grad_norm": 5.960505962371826, "learning_rate": 9.97757285434364e-06, "loss": 0.60903487, "memory(GiB)": 34.88, "step": 11620, "train_speed(iter/s)": 0.412677 }, { "acc": 0.83202438, "epoch": 0.31475915847615954, "grad_norm": 29.167661666870117, "learning_rate": 9.977519881135874e-06, "loss": 0.86536655, "memory(GiB)": 34.88, "step": 11625, "train_speed(iter/s)": 0.412686 }, { "acc": 0.8556694, "epoch": 0.31489453875937506, "grad_norm": 14.844968795776367, "learning_rate": 9.977466845581152e-06, "loss": 0.81397133, "memory(GiB)": 34.88, "step": 11630, "train_speed(iter/s)": 0.412695 }, { "acc": 0.84667358, "epoch": 0.31502991904259064, "grad_norm": 7.487668037414551, "learning_rate": 9.97741374768014e-06, "loss": 0.84604282, "memory(GiB)": 34.88, "step": 11635, "train_speed(iter/s)": 0.412704 }, { "acc": 0.85285492, "epoch": 0.31516529932580617, "grad_norm": 7.499426364898682, "learning_rate": 9.977360587433506e-06, "loss": 0.69858904, "memory(GiB)": 34.88, "step": 11640, "train_speed(iter/s)": 0.412712 }, { "acc": 0.81588125, "epoch": 0.31530067960902175, "grad_norm": 16.182600021362305, "learning_rate": 9.977307364841913e-06, "loss": 0.92257824, "memory(GiB)": 34.88, "step": 11645, "train_speed(iter/s)": 0.412722 }, { "acc": 0.80735989, "epoch": 0.3154360598922373, "grad_norm": 9.113747596740723, "learning_rate": 9.977254079906028e-06, "loss": 1.14120407, "memory(GiB)": 34.88, "step": 11650, "train_speed(iter/s)": 0.412731 }, { "acc": 0.84567337, "epoch": 0.31557144017545286, "grad_norm": 13.281778335571289, "learning_rate": 9.977200732626518e-06, "loss": 0.79902959, "memory(GiB)": 34.88, "step": 11655, "train_speed(iter/s)": 0.412739 }, { "acc": 0.82499199, "epoch": 0.3157068204586684, "grad_norm": 15.674338340759277, "learning_rate": 9.977147323004054e-06, "loss": 0.8799304, "memory(GiB)": 34.88, "step": 11660, "train_speed(iter/s)": 0.412748 }, { "acc": 0.85116577, "epoch": 0.31584220074188396, "grad_norm": 5.237752437591553, "learning_rate": 9.977093851039302e-06, "loss": 0.78701496, "memory(GiB)": 34.88, "step": 11665, "train_speed(iter/s)": 0.412757 }, { "acc": 0.83254681, "epoch": 0.3159775810250995, "grad_norm": 8.167045593261719, "learning_rate": 9.977040316732935e-06, "loss": 0.91504068, "memory(GiB)": 34.88, "step": 11670, "train_speed(iter/s)": 0.412765 }, { "acc": 0.82891893, "epoch": 0.31611296130831507, "grad_norm": 16.131132125854492, "learning_rate": 9.976986720085619e-06, "loss": 0.87459412, "memory(GiB)": 34.88, "step": 11675, "train_speed(iter/s)": 0.412774 }, { "acc": 0.85887775, "epoch": 0.3162483415915306, "grad_norm": 10.134491920471191, "learning_rate": 9.97693306109803e-06, "loss": 0.61906853, "memory(GiB)": 34.88, "step": 11680, "train_speed(iter/s)": 0.412783 }, { "acc": 0.82681179, "epoch": 0.31638372187474617, "grad_norm": 7.351210117340088, "learning_rate": 9.976879339770836e-06, "loss": 0.86204872, "memory(GiB)": 34.88, "step": 11685, "train_speed(iter/s)": 0.412793 }, { "acc": 0.82102098, "epoch": 0.3165191021579617, "grad_norm": 6.90273904800415, "learning_rate": 9.976825556104715e-06, "loss": 0.88366947, "memory(GiB)": 34.88, "step": 11690, "train_speed(iter/s)": 0.412802 }, { "acc": 0.82788525, "epoch": 0.3166544824411773, "grad_norm": 14.39595890045166, "learning_rate": 9.976771710100335e-06, "loss": 0.92008724, "memory(GiB)": 34.88, "step": 11695, "train_speed(iter/s)": 0.412811 }, { "acc": 0.82330971, "epoch": 0.3167898627243928, "grad_norm": 18.737817764282227, "learning_rate": 9.976717801758374e-06, "loss": 0.93982449, "memory(GiB)": 34.88, "step": 11700, "train_speed(iter/s)": 0.412819 }, { "acc": 0.8225503, "epoch": 0.3169252430076084, "grad_norm": 10.25877857208252, "learning_rate": 9.97666383107951e-06, "loss": 0.90659952, "memory(GiB)": 34.88, "step": 11705, "train_speed(iter/s)": 0.412828 }, { "acc": 0.8328125, "epoch": 0.3170606232908239, "grad_norm": 19.925323486328125, "learning_rate": 9.976609798064412e-06, "loss": 0.94827576, "memory(GiB)": 34.88, "step": 11710, "train_speed(iter/s)": 0.412836 }, { "acc": 0.85541925, "epoch": 0.3171960035740395, "grad_norm": 14.515252113342285, "learning_rate": 9.976555702713763e-06, "loss": 0.80068865, "memory(GiB)": 34.88, "step": 11715, "train_speed(iter/s)": 0.412845 }, { "acc": 0.83276501, "epoch": 0.317331383857255, "grad_norm": 9.901446342468262, "learning_rate": 9.976501545028238e-06, "loss": 0.87797432, "memory(GiB)": 34.88, "step": 11720, "train_speed(iter/s)": 0.412854 }, { "acc": 0.85116472, "epoch": 0.3174667641404706, "grad_norm": 9.555065155029297, "learning_rate": 9.976447325008516e-06, "loss": 0.72289042, "memory(GiB)": 34.88, "step": 11725, "train_speed(iter/s)": 0.412864 }, { "acc": 0.85850554, "epoch": 0.3176021444236861, "grad_norm": 4.690433979034424, "learning_rate": 9.976393042655274e-06, "loss": 0.67371416, "memory(GiB)": 34.88, "step": 11730, "train_speed(iter/s)": 0.412872 }, { "acc": 0.84617672, "epoch": 0.3177375247069017, "grad_norm": 5.743691921234131, "learning_rate": 9.976338697969196e-06, "loss": 0.70128999, "memory(GiB)": 34.88, "step": 11735, "train_speed(iter/s)": 0.412877 }, { "acc": 0.85257921, "epoch": 0.3178729049901172, "grad_norm": 7.734791278839111, "learning_rate": 9.97628429095096e-06, "loss": 0.62134314, "memory(GiB)": 34.88, "step": 11740, "train_speed(iter/s)": 0.412886 }, { "acc": 0.82621765, "epoch": 0.3180082852733328, "grad_norm": 9.694787979125977, "learning_rate": 9.97622982160125e-06, "loss": 0.92282705, "memory(GiB)": 34.88, "step": 11745, "train_speed(iter/s)": 0.412896 }, { "acc": 0.81189117, "epoch": 0.3181436655565483, "grad_norm": 6.569542407989502, "learning_rate": 9.976175289920744e-06, "loss": 0.93486214, "memory(GiB)": 34.88, "step": 11750, "train_speed(iter/s)": 0.412904 }, { "acc": 0.83421926, "epoch": 0.3182790458397639, "grad_norm": 10.66962718963623, "learning_rate": 9.976120695910129e-06, "loss": 0.81053066, "memory(GiB)": 34.88, "step": 11755, "train_speed(iter/s)": 0.412913 }, { "acc": 0.83481178, "epoch": 0.31841442612297943, "grad_norm": 28.80327033996582, "learning_rate": 9.976066039570087e-06, "loss": 0.80305309, "memory(GiB)": 34.88, "step": 11760, "train_speed(iter/s)": 0.412922 }, { "acc": 0.8425065, "epoch": 0.318549806406195, "grad_norm": 27.26694679260254, "learning_rate": 9.976011320901306e-06, "loss": 0.7384078, "memory(GiB)": 34.88, "step": 11765, "train_speed(iter/s)": 0.41293 }, { "acc": 0.84740467, "epoch": 0.31868518668941054, "grad_norm": 10.01380729675293, "learning_rate": 9.975956539904466e-06, "loss": 0.76607375, "memory(GiB)": 34.88, "step": 11770, "train_speed(iter/s)": 0.412938 }, { "acc": 0.82788496, "epoch": 0.3188205669726261, "grad_norm": 9.474139213562012, "learning_rate": 9.975901696580258e-06, "loss": 0.89653254, "memory(GiB)": 34.88, "step": 11775, "train_speed(iter/s)": 0.412947 }, { "acc": 0.81603956, "epoch": 0.31895594725584164, "grad_norm": 8.89637279510498, "learning_rate": 9.975846790929367e-06, "loss": 1.01654634, "memory(GiB)": 34.88, "step": 11780, "train_speed(iter/s)": 0.412955 }, { "acc": 0.83305893, "epoch": 0.3190913275390572, "grad_norm": 15.08579158782959, "learning_rate": 9.975791822952479e-06, "loss": 0.78040376, "memory(GiB)": 34.88, "step": 11785, "train_speed(iter/s)": 0.412963 }, { "acc": 0.84080706, "epoch": 0.31922670782227275, "grad_norm": 7.841459274291992, "learning_rate": 9.975736792650286e-06, "loss": 0.79697714, "memory(GiB)": 34.88, "step": 11790, "train_speed(iter/s)": 0.412972 }, { "acc": 0.82114305, "epoch": 0.31936208810548833, "grad_norm": 11.948630332946777, "learning_rate": 9.975681700023476e-06, "loss": 0.8672533, "memory(GiB)": 34.88, "step": 11795, "train_speed(iter/s)": 0.412981 }, { "acc": 0.84049778, "epoch": 0.31949746838870385, "grad_norm": 5.700870990753174, "learning_rate": 9.975626545072738e-06, "loss": 0.78030243, "memory(GiB)": 34.88, "step": 11800, "train_speed(iter/s)": 0.412989 }, { "acc": 0.86759148, "epoch": 0.31963284867191943, "grad_norm": 6.787700176239014, "learning_rate": 9.975571327798766e-06, "loss": 0.68051963, "memory(GiB)": 34.88, "step": 11805, "train_speed(iter/s)": 0.412997 }, { "acc": 0.82278576, "epoch": 0.31976822895513496, "grad_norm": 8.432887077331543, "learning_rate": 9.975516048202248e-06, "loss": 0.85830135, "memory(GiB)": 34.88, "step": 11810, "train_speed(iter/s)": 0.413005 }, { "acc": 0.85415974, "epoch": 0.31990360923835054, "grad_norm": 20.9739990234375, "learning_rate": 9.975460706283876e-06, "loss": 0.64887791, "memory(GiB)": 34.88, "step": 11815, "train_speed(iter/s)": 0.413014 }, { "acc": 0.83442783, "epoch": 0.32003898952156606, "grad_norm": 10.996532440185547, "learning_rate": 9.97540530204435e-06, "loss": 0.79962754, "memory(GiB)": 34.88, "step": 11820, "train_speed(iter/s)": 0.413023 }, { "acc": 0.84567003, "epoch": 0.32017436980478164, "grad_norm": 7.428646564483643, "learning_rate": 9.975349835484356e-06, "loss": 0.73354607, "memory(GiB)": 34.88, "step": 11825, "train_speed(iter/s)": 0.413032 }, { "acc": 0.85971165, "epoch": 0.32030975008799717, "grad_norm": 19.059995651245117, "learning_rate": 9.975294306604593e-06, "loss": 0.71270256, "memory(GiB)": 34.88, "step": 11830, "train_speed(iter/s)": 0.413041 }, { "acc": 0.83682194, "epoch": 0.32044513037121275, "grad_norm": 11.315130233764648, "learning_rate": 9.975238715405756e-06, "loss": 0.92194986, "memory(GiB)": 34.88, "step": 11835, "train_speed(iter/s)": 0.413048 }, { "acc": 0.85707092, "epoch": 0.3205805106544283, "grad_norm": 16.615154266357422, "learning_rate": 9.975183061888543e-06, "loss": 0.65075493, "memory(GiB)": 34.88, "step": 11840, "train_speed(iter/s)": 0.413057 }, { "acc": 0.84790449, "epoch": 0.32071589093764385, "grad_norm": 14.979126930236816, "learning_rate": 9.975127346053646e-06, "loss": 0.69269385, "memory(GiB)": 34.88, "step": 11845, "train_speed(iter/s)": 0.413064 }, { "acc": 0.83257751, "epoch": 0.3208512712208594, "grad_norm": 9.59438705444336, "learning_rate": 9.975071567901769e-06, "loss": 0.84668236, "memory(GiB)": 34.88, "step": 11850, "train_speed(iter/s)": 0.413073 }, { "acc": 0.84481916, "epoch": 0.32098665150407496, "grad_norm": 8.65325927734375, "learning_rate": 9.975015727433607e-06, "loss": 0.78998132, "memory(GiB)": 34.88, "step": 11855, "train_speed(iter/s)": 0.413081 }, { "acc": 0.82211008, "epoch": 0.3211220317872905, "grad_norm": 7.534943580627441, "learning_rate": 9.97495982464986e-06, "loss": 0.92444954, "memory(GiB)": 34.88, "step": 11860, "train_speed(iter/s)": 0.41309 }, { "acc": 0.84890575, "epoch": 0.32125741207050607, "grad_norm": 5.210320472717285, "learning_rate": 9.97490385955123e-06, "loss": 0.73184872, "memory(GiB)": 34.88, "step": 11865, "train_speed(iter/s)": 0.413099 }, { "acc": 0.85163507, "epoch": 0.3213927923537216, "grad_norm": 4.9694085121154785, "learning_rate": 9.974847832138417e-06, "loss": 0.80209942, "memory(GiB)": 34.88, "step": 11870, "train_speed(iter/s)": 0.413108 }, { "acc": 0.85973644, "epoch": 0.32152817263693717, "grad_norm": 25.26624298095703, "learning_rate": 9.974791742412122e-06, "loss": 0.71325793, "memory(GiB)": 34.88, "step": 11875, "train_speed(iter/s)": 0.413116 }, { "acc": 0.8428215, "epoch": 0.3216635529201527, "grad_norm": 6.713174819946289, "learning_rate": 9.97473559037305e-06, "loss": 0.82637024, "memory(GiB)": 34.88, "step": 11880, "train_speed(iter/s)": 0.413124 }, { "acc": 0.8294549, "epoch": 0.3217989332033683, "grad_norm": 7.9002275466918945, "learning_rate": 9.974679376021902e-06, "loss": 0.82995558, "memory(GiB)": 34.88, "step": 11885, "train_speed(iter/s)": 0.413133 }, { "acc": 0.83727512, "epoch": 0.3219343134865838, "grad_norm": 8.279404640197754, "learning_rate": 9.974623099359383e-06, "loss": 0.78720422, "memory(GiB)": 34.88, "step": 11890, "train_speed(iter/s)": 0.413142 }, { "acc": 0.83222265, "epoch": 0.3220696937697994, "grad_norm": 11.72721004486084, "learning_rate": 9.974566760386198e-06, "loss": 0.92353401, "memory(GiB)": 34.88, "step": 11895, "train_speed(iter/s)": 0.413149 }, { "acc": 0.84305363, "epoch": 0.3222050740530149, "grad_norm": 24.70831298828125, "learning_rate": 9.974510359103053e-06, "loss": 0.73947887, "memory(GiB)": 34.88, "step": 11900, "train_speed(iter/s)": 0.413158 }, { "acc": 0.86999769, "epoch": 0.3223404543362305, "grad_norm": 8.762002944946289, "learning_rate": 9.974453895510654e-06, "loss": 0.69229836, "memory(GiB)": 34.88, "step": 11905, "train_speed(iter/s)": 0.413167 }, { "acc": 0.83394566, "epoch": 0.322475834619446, "grad_norm": 9.792604446411133, "learning_rate": 9.974397369609709e-06, "loss": 0.82284851, "memory(GiB)": 34.88, "step": 11910, "train_speed(iter/s)": 0.413176 }, { "acc": 0.86368656, "epoch": 0.3226112149026616, "grad_norm": 14.250713348388672, "learning_rate": 9.974340781400924e-06, "loss": 0.59089861, "memory(GiB)": 34.88, "step": 11915, "train_speed(iter/s)": 0.413184 }, { "acc": 0.84549427, "epoch": 0.3227465951858771, "grad_norm": 10.486255645751953, "learning_rate": 9.974284130885013e-06, "loss": 0.77395191, "memory(GiB)": 34.88, "step": 11920, "train_speed(iter/s)": 0.413192 }, { "acc": 0.84718418, "epoch": 0.3228819754690927, "grad_norm": 10.022283554077148, "learning_rate": 9.97422741806268e-06, "loss": 0.8194972, "memory(GiB)": 34.88, "step": 11925, "train_speed(iter/s)": 0.413201 }, { "acc": 0.8501133, "epoch": 0.3230173557523082, "grad_norm": 10.841235160827637, "learning_rate": 9.97417064293464e-06, "loss": 0.73597136, "memory(GiB)": 34.88, "step": 11930, "train_speed(iter/s)": 0.41321 }, { "acc": 0.8612072, "epoch": 0.3231527360355238, "grad_norm": 4.363994598388672, "learning_rate": 9.9741138055016e-06, "loss": 0.74517622, "memory(GiB)": 34.88, "step": 11935, "train_speed(iter/s)": 0.413217 }, { "acc": 0.83373861, "epoch": 0.3232881163187393, "grad_norm": 10.219805717468262, "learning_rate": 9.974056905764272e-06, "loss": 0.792132, "memory(GiB)": 34.88, "step": 11940, "train_speed(iter/s)": 0.413226 }, { "acc": 0.8237112, "epoch": 0.3234234966019549, "grad_norm": 14.219948768615723, "learning_rate": 9.973999943723373e-06, "loss": 0.88675261, "memory(GiB)": 34.88, "step": 11945, "train_speed(iter/s)": 0.413234 }, { "acc": 0.83555174, "epoch": 0.32355887688517043, "grad_norm": 13.744670867919922, "learning_rate": 9.973942919379615e-06, "loss": 0.95315104, "memory(GiB)": 34.88, "step": 11950, "train_speed(iter/s)": 0.413242 }, { "acc": 0.80967693, "epoch": 0.323694257168386, "grad_norm": 9.817788124084473, "learning_rate": 9.97388583273371e-06, "loss": 1.05019245, "memory(GiB)": 34.88, "step": 11955, "train_speed(iter/s)": 0.413249 }, { "acc": 0.86312952, "epoch": 0.32382963745160154, "grad_norm": 10.525947570800781, "learning_rate": 9.973828683786377e-06, "loss": 0.69312668, "memory(GiB)": 34.88, "step": 11960, "train_speed(iter/s)": 0.413257 }, { "acc": 0.86269751, "epoch": 0.3239650177348171, "grad_norm": 8.76997184753418, "learning_rate": 9.973771472538328e-06, "loss": 0.72704945, "memory(GiB)": 34.88, "step": 11965, "train_speed(iter/s)": 0.413266 }, { "acc": 0.86242628, "epoch": 0.32410039801803264, "grad_norm": 8.131453514099121, "learning_rate": 9.973714198990281e-06, "loss": 0.64374571, "memory(GiB)": 34.88, "step": 11970, "train_speed(iter/s)": 0.413275 }, { "acc": 0.86820812, "epoch": 0.3242357783012482, "grad_norm": 7.070205211639404, "learning_rate": 9.973656863142954e-06, "loss": 0.66342773, "memory(GiB)": 34.88, "step": 11975, "train_speed(iter/s)": 0.413284 }, { "acc": 0.87584057, "epoch": 0.32437115858446375, "grad_norm": 10.647077560424805, "learning_rate": 9.973599464997066e-06, "loss": 0.64809055, "memory(GiB)": 34.88, "step": 11980, "train_speed(iter/s)": 0.413293 }, { "acc": 0.83791065, "epoch": 0.32450653886767933, "grad_norm": 11.172907829284668, "learning_rate": 9.973542004553334e-06, "loss": 0.8488555, "memory(GiB)": 34.88, "step": 11985, "train_speed(iter/s)": 0.413301 }, { "acc": 0.85008678, "epoch": 0.32464191915089485, "grad_norm": 13.0277738571167, "learning_rate": 9.97348448181248e-06, "loss": 0.73985453, "memory(GiB)": 34.88, "step": 11990, "train_speed(iter/s)": 0.41331 }, { "acc": 0.84750595, "epoch": 0.32477729943411043, "grad_norm": 10.443530082702637, "learning_rate": 9.973426896775224e-06, "loss": 0.74024181, "memory(GiB)": 34.88, "step": 11995, "train_speed(iter/s)": 0.413318 }, { "acc": 0.84807701, "epoch": 0.32491267971732596, "grad_norm": 5.966375827789307, "learning_rate": 9.973369249442284e-06, "loss": 0.75160427, "memory(GiB)": 34.88, "step": 12000, "train_speed(iter/s)": 0.413326 }, { "acc": 0.80803833, "epoch": 0.32504806000054154, "grad_norm": 12.086831092834473, "learning_rate": 9.973311539814387e-06, "loss": 1.03345366, "memory(GiB)": 34.88, "step": 12005, "train_speed(iter/s)": 0.413333 }, { "acc": 0.84568958, "epoch": 0.32518344028375706, "grad_norm": 7.347864151000977, "learning_rate": 9.973253767892253e-06, "loss": 0.86917915, "memory(GiB)": 34.88, "step": 12010, "train_speed(iter/s)": 0.413341 }, { "acc": 0.82913895, "epoch": 0.32531882056697264, "grad_norm": 7.332090854644775, "learning_rate": 9.973195933676607e-06, "loss": 0.86809521, "memory(GiB)": 34.88, "step": 12015, "train_speed(iter/s)": 0.413349 }, { "acc": 0.84132996, "epoch": 0.32545420085018817, "grad_norm": 11.094966888427734, "learning_rate": 9.973138037168173e-06, "loss": 0.86975374, "memory(GiB)": 34.88, "step": 12020, "train_speed(iter/s)": 0.413358 }, { "acc": 0.84883604, "epoch": 0.32558958113340375, "grad_norm": 9.755824089050293, "learning_rate": 9.973080078367678e-06, "loss": 0.72992516, "memory(GiB)": 34.88, "step": 12025, "train_speed(iter/s)": 0.413367 }, { "acc": 0.85622807, "epoch": 0.3257249614166193, "grad_norm": 5.92153787612915, "learning_rate": 9.973022057275846e-06, "loss": 0.73078461, "memory(GiB)": 34.88, "step": 12030, "train_speed(iter/s)": 0.413375 }, { "acc": 0.86694059, "epoch": 0.32586034169983485, "grad_norm": 5.078413486480713, "learning_rate": 9.972963973893402e-06, "loss": 0.73080149, "memory(GiB)": 34.88, "step": 12035, "train_speed(iter/s)": 0.413382 }, { "acc": 0.84253578, "epoch": 0.3259957219830504, "grad_norm": 12.841866493225098, "learning_rate": 9.972905828221078e-06, "loss": 0.81736794, "memory(GiB)": 34.88, "step": 12040, "train_speed(iter/s)": 0.413388 }, { "acc": 0.8578969, "epoch": 0.32613110226626596, "grad_norm": 10.34778881072998, "learning_rate": 9.972847620259599e-06, "loss": 0.67831144, "memory(GiB)": 34.88, "step": 12045, "train_speed(iter/s)": 0.413397 }, { "acc": 0.854006, "epoch": 0.3262664825494815, "grad_norm": 8.791006088256836, "learning_rate": 9.972789350009694e-06, "loss": 0.69616356, "memory(GiB)": 34.88, "step": 12050, "train_speed(iter/s)": 0.413405 }, { "acc": 0.83362522, "epoch": 0.32640186283269707, "grad_norm": 20.19050407409668, "learning_rate": 9.972731017472098e-06, "loss": 0.81149883, "memory(GiB)": 34.88, "step": 12055, "train_speed(iter/s)": 0.413414 }, { "acc": 0.85073223, "epoch": 0.3265372431159126, "grad_norm": 15.976546287536621, "learning_rate": 9.972672622647535e-06, "loss": 0.66899052, "memory(GiB)": 34.88, "step": 12060, "train_speed(iter/s)": 0.413422 }, { "acc": 0.86450396, "epoch": 0.32667262339912817, "grad_norm": 6.852754592895508, "learning_rate": 9.972614165536743e-06, "loss": 0.61298423, "memory(GiB)": 34.88, "step": 12065, "train_speed(iter/s)": 0.413431 }, { "acc": 0.84087009, "epoch": 0.3268080036823437, "grad_norm": 5.740365028381348, "learning_rate": 9.972555646140448e-06, "loss": 0.87452745, "memory(GiB)": 34.88, "step": 12070, "train_speed(iter/s)": 0.413439 }, { "acc": 0.86562061, "epoch": 0.3269433839655593, "grad_norm": 10.201583862304688, "learning_rate": 9.972497064459385e-06, "loss": 0.6021637, "memory(GiB)": 34.88, "step": 12075, "train_speed(iter/s)": 0.413447 }, { "acc": 0.82913494, "epoch": 0.3270787642487748, "grad_norm": 7.255077362060547, "learning_rate": 9.97243842049429e-06, "loss": 0.85951614, "memory(GiB)": 34.88, "step": 12080, "train_speed(iter/s)": 0.413455 }, { "acc": 0.8602829, "epoch": 0.3272141445319904, "grad_norm": 6.053705215454102, "learning_rate": 9.972379714245898e-06, "loss": 0.71899242, "memory(GiB)": 34.88, "step": 12085, "train_speed(iter/s)": 0.413464 }, { "acc": 0.84678202, "epoch": 0.3273495248152059, "grad_norm": 7.564810752868652, "learning_rate": 9.972320945714943e-06, "loss": 0.79544749, "memory(GiB)": 34.88, "step": 12090, "train_speed(iter/s)": 0.413472 }, { "acc": 0.83558407, "epoch": 0.3274849050984215, "grad_norm": 8.954253196716309, "learning_rate": 9.97226211490216e-06, "loss": 0.77422142, "memory(GiB)": 34.88, "step": 12095, "train_speed(iter/s)": 0.413481 }, { "acc": 0.86488819, "epoch": 0.327620285381637, "grad_norm": 7.466670036315918, "learning_rate": 9.972203221808287e-06, "loss": 0.73585682, "memory(GiB)": 34.88, "step": 12100, "train_speed(iter/s)": 0.413489 }, { "acc": 0.83493776, "epoch": 0.3277556656648526, "grad_norm": 12.43871784210205, "learning_rate": 9.972144266434062e-06, "loss": 0.89465675, "memory(GiB)": 34.88, "step": 12105, "train_speed(iter/s)": 0.413497 }, { "acc": 0.85025864, "epoch": 0.3278910459480681, "grad_norm": 10.473610877990723, "learning_rate": 9.972085248780222e-06, "loss": 0.66736956, "memory(GiB)": 34.88, "step": 12110, "train_speed(iter/s)": 0.413505 }, { "acc": 0.85709343, "epoch": 0.3280264262312837, "grad_norm": 6.587846279144287, "learning_rate": 9.972026168847509e-06, "loss": 0.81095657, "memory(GiB)": 34.88, "step": 12115, "train_speed(iter/s)": 0.413514 }, { "acc": 0.85943327, "epoch": 0.3281618065144992, "grad_norm": 10.177886962890625, "learning_rate": 9.971967026636662e-06, "loss": 0.63901753, "memory(GiB)": 34.88, "step": 12120, "train_speed(iter/s)": 0.413523 }, { "acc": 0.80247812, "epoch": 0.3282971867977148, "grad_norm": 20.396331787109375, "learning_rate": 9.97190782214842e-06, "loss": 1.11643143, "memory(GiB)": 34.88, "step": 12125, "train_speed(iter/s)": 0.413531 }, { "acc": 0.83461695, "epoch": 0.3284325670809303, "grad_norm": 10.261006355285645, "learning_rate": 9.971848555383528e-06, "loss": 0.85806427, "memory(GiB)": 34.88, "step": 12130, "train_speed(iter/s)": 0.413538 }, { "acc": 0.83782005, "epoch": 0.3285679473641459, "grad_norm": 10.158202171325684, "learning_rate": 9.971789226342724e-06, "loss": 0.69940834, "memory(GiB)": 34.88, "step": 12135, "train_speed(iter/s)": 0.413545 }, { "acc": 0.85904646, "epoch": 0.32870332764736143, "grad_norm": 12.840311050415039, "learning_rate": 9.971729835026757e-06, "loss": 0.69693308, "memory(GiB)": 34.88, "step": 12140, "train_speed(iter/s)": 0.413553 }, { "acc": 0.84974222, "epoch": 0.328838707930577, "grad_norm": 5.85114049911499, "learning_rate": 9.971670381436366e-06, "loss": 0.72032623, "memory(GiB)": 34.88, "step": 12145, "train_speed(iter/s)": 0.413562 }, { "acc": 0.86257544, "epoch": 0.32897408821379254, "grad_norm": 6.179832458496094, "learning_rate": 9.971610865572298e-06, "loss": 0.76670208, "memory(GiB)": 34.88, "step": 12150, "train_speed(iter/s)": 0.413569 }, { "acc": 0.87143383, "epoch": 0.3291094684970081, "grad_norm": 7.807018756866455, "learning_rate": 9.971551287435298e-06, "loss": 0.6007935, "memory(GiB)": 34.88, "step": 12155, "train_speed(iter/s)": 0.413578 }, { "acc": 0.84472475, "epoch": 0.32924484878022364, "grad_norm": 8.046574592590332, "learning_rate": 9.971491647026113e-06, "loss": 0.76475677, "memory(GiB)": 34.88, "step": 12160, "train_speed(iter/s)": 0.413586 }, { "acc": 0.85441875, "epoch": 0.3293802290634392, "grad_norm": 13.048227310180664, "learning_rate": 9.971431944345492e-06, "loss": 0.66711426, "memory(GiB)": 34.88, "step": 12165, "train_speed(iter/s)": 0.413593 }, { "acc": 0.82679405, "epoch": 0.32951560934665475, "grad_norm": 14.639388084411621, "learning_rate": 9.971372179394176e-06, "loss": 0.89469013, "memory(GiB)": 34.88, "step": 12170, "train_speed(iter/s)": 0.413602 }, { "acc": 0.8534646, "epoch": 0.32965098962987033, "grad_norm": 16.36181640625, "learning_rate": 9.97131235217292e-06, "loss": 0.71102295, "memory(GiB)": 34.88, "step": 12175, "train_speed(iter/s)": 0.41361 }, { "acc": 0.84653425, "epoch": 0.32978636991308585, "grad_norm": 9.71764850616455, "learning_rate": 9.971252462682472e-06, "loss": 0.79637437, "memory(GiB)": 34.88, "step": 12180, "train_speed(iter/s)": 0.413619 }, { "acc": 0.81044207, "epoch": 0.32992175019630143, "grad_norm": 8.044036865234375, "learning_rate": 9.971192510923582e-06, "loss": 0.99531879, "memory(GiB)": 34.88, "step": 12185, "train_speed(iter/s)": 0.413625 }, { "acc": 0.81526833, "epoch": 0.33005713047951696, "grad_norm": 10.066828727722168, "learning_rate": 9.971132496897e-06, "loss": 0.98508558, "memory(GiB)": 34.88, "step": 12190, "train_speed(iter/s)": 0.413632 }, { "acc": 0.85080566, "epoch": 0.33019251076273254, "grad_norm": 27.041736602783203, "learning_rate": 9.97107242060348e-06, "loss": 0.81076317, "memory(GiB)": 34.88, "step": 12195, "train_speed(iter/s)": 0.41364 }, { "acc": 0.86608429, "epoch": 0.33032789104594806, "grad_norm": 21.244585037231445, "learning_rate": 9.971012282043773e-06, "loss": 0.72700601, "memory(GiB)": 34.88, "step": 12200, "train_speed(iter/s)": 0.413648 }, { "acc": 0.84433022, "epoch": 0.33046327132916364, "grad_norm": 21.01040267944336, "learning_rate": 9.970952081218634e-06, "loss": 0.65120192, "memory(GiB)": 34.88, "step": 12205, "train_speed(iter/s)": 0.413656 }, { "acc": 0.84886084, "epoch": 0.33059865161237917, "grad_norm": 7.615201473236084, "learning_rate": 9.970891818128812e-06, "loss": 0.77384038, "memory(GiB)": 34.88, "step": 12210, "train_speed(iter/s)": 0.413664 }, { "acc": 0.85770245, "epoch": 0.33073403189559475, "grad_norm": 7.0529255867004395, "learning_rate": 9.970831492775069e-06, "loss": 0.71011791, "memory(GiB)": 34.88, "step": 12215, "train_speed(iter/s)": 0.413672 }, { "acc": 0.85217419, "epoch": 0.3308694121788103, "grad_norm": 10.780505180358887, "learning_rate": 9.970771105158156e-06, "loss": 0.71700306, "memory(GiB)": 34.88, "step": 12220, "train_speed(iter/s)": 0.413681 }, { "acc": 0.85184498, "epoch": 0.33100479246202585, "grad_norm": 16.672441482543945, "learning_rate": 9.97071065527883e-06, "loss": 0.76952229, "memory(GiB)": 34.88, "step": 12225, "train_speed(iter/s)": 0.413689 }, { "acc": 0.84142113, "epoch": 0.3311401727452414, "grad_norm": 5.683825492858887, "learning_rate": 9.97065014313785e-06, "loss": 0.85545607, "memory(GiB)": 34.88, "step": 12230, "train_speed(iter/s)": 0.413698 }, { "acc": 0.84333401, "epoch": 0.33127555302845696, "grad_norm": 10.116019248962402, "learning_rate": 9.970589568735974e-06, "loss": 0.83546066, "memory(GiB)": 34.88, "step": 12235, "train_speed(iter/s)": 0.413705 }, { "acc": 0.8404397, "epoch": 0.3314109333116725, "grad_norm": 83.47492980957031, "learning_rate": 9.970528932073958e-06, "loss": 0.90461216, "memory(GiB)": 34.88, "step": 12240, "train_speed(iter/s)": 0.413709 }, { "acc": 0.82056217, "epoch": 0.33154631359488806, "grad_norm": 8.269613265991211, "learning_rate": 9.970468233152566e-06, "loss": 0.83025932, "memory(GiB)": 34.88, "step": 12245, "train_speed(iter/s)": 0.413716 }, { "acc": 0.83791409, "epoch": 0.3316816938781036, "grad_norm": 8.282134056091309, "learning_rate": 9.970407471972555e-06, "loss": 0.84208431, "memory(GiB)": 34.88, "step": 12250, "train_speed(iter/s)": 0.413723 }, { "acc": 0.85581322, "epoch": 0.33181707416131917, "grad_norm": 19.062957763671875, "learning_rate": 9.970346648534687e-06, "loss": 0.77492523, "memory(GiB)": 34.88, "step": 12255, "train_speed(iter/s)": 0.413731 }, { "acc": 0.83225298, "epoch": 0.3319524544445347, "grad_norm": 9.191717147827148, "learning_rate": 9.970285762839721e-06, "loss": 0.87834225, "memory(GiB)": 34.88, "step": 12260, "train_speed(iter/s)": 0.413739 }, { "acc": 0.86164513, "epoch": 0.3320878347277503, "grad_norm": 15.147778511047363, "learning_rate": 9.970224814888427e-06, "loss": 0.65584197, "memory(GiB)": 34.88, "step": 12265, "train_speed(iter/s)": 0.413747 }, { "acc": 0.82008448, "epoch": 0.3322232150109658, "grad_norm": 14.859197616577148, "learning_rate": 9.970163804681563e-06, "loss": 0.98731775, "memory(GiB)": 34.88, "step": 12270, "train_speed(iter/s)": 0.413755 }, { "acc": 0.85657997, "epoch": 0.3323585952941814, "grad_norm": 17.313684463500977, "learning_rate": 9.970102732219893e-06, "loss": 0.76870008, "memory(GiB)": 34.88, "step": 12275, "train_speed(iter/s)": 0.413763 }, { "acc": 0.84550676, "epoch": 0.3324939755773969, "grad_norm": 9.60391902923584, "learning_rate": 9.970041597504185e-06, "loss": 0.75075154, "memory(GiB)": 34.88, "step": 12280, "train_speed(iter/s)": 0.413771 }, { "acc": 0.8512558, "epoch": 0.3326293558606125, "grad_norm": 4.799577236175537, "learning_rate": 9.969980400535202e-06, "loss": 0.70479841, "memory(GiB)": 34.88, "step": 12285, "train_speed(iter/s)": 0.413778 }, { "acc": 0.82019501, "epoch": 0.332764736143828, "grad_norm": 6.477686882019043, "learning_rate": 9.969919141313715e-06, "loss": 0.9450141, "memory(GiB)": 34.88, "step": 12290, "train_speed(iter/s)": 0.413786 }, { "acc": 0.83508911, "epoch": 0.3329001164270436, "grad_norm": 13.45434856414795, "learning_rate": 9.969857819840484e-06, "loss": 0.77013159, "memory(GiB)": 34.88, "step": 12295, "train_speed(iter/s)": 0.413794 }, { "acc": 0.8684269, "epoch": 0.3330354967102591, "grad_norm": 8.11359977722168, "learning_rate": 9.969796436116285e-06, "loss": 0.63186474, "memory(GiB)": 34.88, "step": 12300, "train_speed(iter/s)": 0.413803 }, { "acc": 0.83469486, "epoch": 0.3331708769934747, "grad_norm": 10.805789947509766, "learning_rate": 9.969734990141883e-06, "loss": 0.86366282, "memory(GiB)": 34.88, "step": 12305, "train_speed(iter/s)": 0.413811 }, { "acc": 0.84602184, "epoch": 0.3333062572766902, "grad_norm": 8.929265975952148, "learning_rate": 9.969673481918047e-06, "loss": 0.78653097, "memory(GiB)": 34.88, "step": 12310, "train_speed(iter/s)": 0.413816 }, { "acc": 0.86011457, "epoch": 0.3334416375599058, "grad_norm": 5.764842987060547, "learning_rate": 9.969611911445548e-06, "loss": 0.70283737, "memory(GiB)": 34.88, "step": 12315, "train_speed(iter/s)": 0.413823 }, { "acc": 0.8305953, "epoch": 0.3335770178431213, "grad_norm": 7.259006023406982, "learning_rate": 9.96955027872516e-06, "loss": 0.86873474, "memory(GiB)": 34.88, "step": 12320, "train_speed(iter/s)": 0.413831 }, { "acc": 0.82773438, "epoch": 0.3337123981263369, "grad_norm": 11.026885986328125, "learning_rate": 9.969488583757652e-06, "loss": 0.87412014, "memory(GiB)": 34.88, "step": 12325, "train_speed(iter/s)": 0.41384 }, { "acc": 0.84113579, "epoch": 0.33384777840955243, "grad_norm": 7.319948196411133, "learning_rate": 9.969426826543799e-06, "loss": 0.83535595, "memory(GiB)": 34.88, "step": 12330, "train_speed(iter/s)": 0.413847 }, { "acc": 0.86084938, "epoch": 0.333983158692768, "grad_norm": 6.316466808319092, "learning_rate": 9.969365007084373e-06, "loss": 0.6433404, "memory(GiB)": 34.88, "step": 12335, "train_speed(iter/s)": 0.413856 }, { "acc": 0.84652443, "epoch": 0.33411853897598354, "grad_norm": 14.676724433898926, "learning_rate": 9.969303125380149e-06, "loss": 0.82876863, "memory(GiB)": 34.88, "step": 12340, "train_speed(iter/s)": 0.413863 }, { "acc": 0.84207783, "epoch": 0.3342539192591991, "grad_norm": 8.339305877685547, "learning_rate": 9.9692411814319e-06, "loss": 0.8740097, "memory(GiB)": 34.88, "step": 12345, "train_speed(iter/s)": 0.413871 }, { "acc": 0.83229733, "epoch": 0.33438929954241464, "grad_norm": 7.529754638671875, "learning_rate": 9.969179175240407e-06, "loss": 0.75022998, "memory(GiB)": 34.88, "step": 12350, "train_speed(iter/s)": 0.413879 }, { "acc": 0.8513134, "epoch": 0.3345246798256302, "grad_norm": 15.88758373260498, "learning_rate": 9.969117106806442e-06, "loss": 0.68392539, "memory(GiB)": 34.88, "step": 12355, "train_speed(iter/s)": 0.413887 }, { "acc": 0.85270309, "epoch": 0.33466006010884575, "grad_norm": 7.204780101776123, "learning_rate": 9.969054976130787e-06, "loss": 0.82477694, "memory(GiB)": 34.88, "step": 12360, "train_speed(iter/s)": 0.413895 }, { "acc": 0.8563592, "epoch": 0.3347954403920613, "grad_norm": 5.3952507972717285, "learning_rate": 9.968992783214216e-06, "loss": 0.63871088, "memory(GiB)": 34.88, "step": 12365, "train_speed(iter/s)": 0.413904 }, { "acc": 0.84620333, "epoch": 0.33493082067527685, "grad_norm": 15.917093276977539, "learning_rate": 9.96893052805751e-06, "loss": 0.89531298, "memory(GiB)": 34.88, "step": 12370, "train_speed(iter/s)": 0.413912 }, { "acc": 0.85088863, "epoch": 0.33506620095849243, "grad_norm": 8.154067993164062, "learning_rate": 9.968868210661449e-06, "loss": 0.73709965, "memory(GiB)": 34.88, "step": 12375, "train_speed(iter/s)": 0.41392 }, { "acc": 0.83582973, "epoch": 0.33520158124170796, "grad_norm": 10.827610969543457, "learning_rate": 9.968805831026812e-06, "loss": 0.88622904, "memory(GiB)": 34.88, "step": 12380, "train_speed(iter/s)": 0.413928 }, { "acc": 0.87505255, "epoch": 0.3353369615249235, "grad_norm": 4.94987678527832, "learning_rate": 9.968743389154384e-06, "loss": 0.6119381, "memory(GiB)": 34.88, "step": 12385, "train_speed(iter/s)": 0.413936 }, { "acc": 0.83906879, "epoch": 0.33547234180813906, "grad_norm": 7.600219249725342, "learning_rate": 9.968680885044945e-06, "loss": 0.80453911, "memory(GiB)": 34.88, "step": 12390, "train_speed(iter/s)": 0.413943 }, { "acc": 0.84065428, "epoch": 0.3356077220913546, "grad_norm": 6.37464714050293, "learning_rate": 9.968618318699276e-06, "loss": 0.81680613, "memory(GiB)": 34.88, "step": 12395, "train_speed(iter/s)": 0.413951 }, { "acc": 0.81773758, "epoch": 0.33574310237457017, "grad_norm": 9.27695083618164, "learning_rate": 9.968555690118164e-06, "loss": 0.8835022, "memory(GiB)": 34.88, "step": 12400, "train_speed(iter/s)": 0.41396 }, { "acc": 0.84196415, "epoch": 0.3358784826577857, "grad_norm": 12.096875190734863, "learning_rate": 9.96849299930239e-06, "loss": 0.81857738, "memory(GiB)": 34.88, "step": 12405, "train_speed(iter/s)": 0.413968 }, { "acc": 0.8615016, "epoch": 0.3360138629410013, "grad_norm": 5.416208744049072, "learning_rate": 9.968430246252745e-06, "loss": 0.72096071, "memory(GiB)": 34.88, "step": 12410, "train_speed(iter/s)": 0.413976 }, { "acc": 0.82930412, "epoch": 0.3361492432242168, "grad_norm": 13.279540061950684, "learning_rate": 9.968367430970009e-06, "loss": 0.88584328, "memory(GiB)": 34.88, "step": 12415, "train_speed(iter/s)": 0.413983 }, { "acc": 0.84403934, "epoch": 0.3362846235074324, "grad_norm": 10.044189453125, "learning_rate": 9.968304553454973e-06, "loss": 0.76802168, "memory(GiB)": 34.88, "step": 12420, "train_speed(iter/s)": 0.413992 }, { "acc": 0.86460772, "epoch": 0.3364200037906479, "grad_norm": 4.22659969329834, "learning_rate": 9.968241613708423e-06, "loss": 0.63736982, "memory(GiB)": 34.88, "step": 12425, "train_speed(iter/s)": 0.414 }, { "acc": 0.85984364, "epoch": 0.3365553840738635, "grad_norm": 6.391881465911865, "learning_rate": 9.968178611731147e-06, "loss": 0.62839737, "memory(GiB)": 34.88, "step": 12430, "train_speed(iter/s)": 0.414008 }, { "acc": 0.85377722, "epoch": 0.336690764357079, "grad_norm": 7.39260196685791, "learning_rate": 9.968115547523935e-06, "loss": 0.67748241, "memory(GiB)": 34.88, "step": 12435, "train_speed(iter/s)": 0.414015 }, { "acc": 0.87354517, "epoch": 0.3368261446402946, "grad_norm": 5.693172931671143, "learning_rate": 9.968052421087579e-06, "loss": 0.61247845, "memory(GiB)": 34.88, "step": 12440, "train_speed(iter/s)": 0.414022 }, { "acc": 0.84888191, "epoch": 0.3369615249235101, "grad_norm": 8.943053245544434, "learning_rate": 9.967989232422866e-06, "loss": 0.77515125, "memory(GiB)": 34.88, "step": 12445, "train_speed(iter/s)": 0.41403 }, { "acc": 0.83463097, "epoch": 0.3370969052067257, "grad_norm": 11.105743408203125, "learning_rate": 9.96792598153059e-06, "loss": 0.82699509, "memory(GiB)": 34.88, "step": 12450, "train_speed(iter/s)": 0.414036 }, { "acc": 0.8605114, "epoch": 0.3372322854899412, "grad_norm": 7.114167213439941, "learning_rate": 9.967862668411542e-06, "loss": 0.6181057, "memory(GiB)": 34.88, "step": 12455, "train_speed(iter/s)": 0.414045 }, { "acc": 0.86159954, "epoch": 0.3373676657731568, "grad_norm": 5.3362603187561035, "learning_rate": 9.967799293066514e-06, "loss": 0.65353508, "memory(GiB)": 34.88, "step": 12460, "train_speed(iter/s)": 0.414052 }, { "acc": 0.8457159, "epoch": 0.3375030460563723, "grad_norm": 8.049091339111328, "learning_rate": 9.967735855496302e-06, "loss": 0.83748283, "memory(GiB)": 34.88, "step": 12465, "train_speed(iter/s)": 0.41406 }, { "acc": 0.86420784, "epoch": 0.3376384263395879, "grad_norm": 7.041547775268555, "learning_rate": 9.967672355701704e-06, "loss": 0.71068153, "memory(GiB)": 34.88, "step": 12470, "train_speed(iter/s)": 0.414068 }, { "acc": 0.81953106, "epoch": 0.33777380662280343, "grad_norm": 11.330534934997559, "learning_rate": 9.967608793683508e-06, "loss": 0.8851944, "memory(GiB)": 34.88, "step": 12475, "train_speed(iter/s)": 0.414075 }, { "acc": 0.8409296, "epoch": 0.337909186906019, "grad_norm": 4.664676666259766, "learning_rate": 9.967545169442516e-06, "loss": 0.84333553, "memory(GiB)": 34.88, "step": 12480, "train_speed(iter/s)": 0.414082 }, { "acc": 0.83664694, "epoch": 0.33804456718923453, "grad_norm": 23.948854446411133, "learning_rate": 9.967481482979524e-06, "loss": 0.96619864, "memory(GiB)": 34.88, "step": 12485, "train_speed(iter/s)": 0.414091 }, { "acc": 0.84959879, "epoch": 0.3381799474724501, "grad_norm": 13.518426895141602, "learning_rate": 9.967417734295325e-06, "loss": 0.73724532, "memory(GiB)": 34.88, "step": 12490, "train_speed(iter/s)": 0.414097 }, { "acc": 0.83113403, "epoch": 0.33831532775566564, "grad_norm": 15.00344467163086, "learning_rate": 9.967353923390725e-06, "loss": 0.88239908, "memory(GiB)": 34.88, "step": 12495, "train_speed(iter/s)": 0.414105 }, { "acc": 0.82730045, "epoch": 0.3384507080388812, "grad_norm": 13.044384002685547, "learning_rate": 9.967290050266518e-06, "loss": 0.93467245, "memory(GiB)": 34.88, "step": 12500, "train_speed(iter/s)": 0.414114 }, { "acc": 0.86045628, "epoch": 0.33858608832209675, "grad_norm": 4.869438171386719, "learning_rate": 9.967226114923505e-06, "loss": 0.71040263, "memory(GiB)": 34.88, "step": 12505, "train_speed(iter/s)": 0.414121 }, { "acc": 0.82036381, "epoch": 0.3387214686053123, "grad_norm": 7.6227521896362305, "learning_rate": 9.967162117362491e-06, "loss": 0.92969828, "memory(GiB)": 34.88, "step": 12510, "train_speed(iter/s)": 0.414129 }, { "acc": 0.86742725, "epoch": 0.33885684888852785, "grad_norm": 18.057655334472656, "learning_rate": 9.967098057584272e-06, "loss": 0.5172637, "memory(GiB)": 34.88, "step": 12515, "train_speed(iter/s)": 0.414137 }, { "acc": 0.84694195, "epoch": 0.33899222917174343, "grad_norm": 7.467538833618164, "learning_rate": 9.967033935589652e-06, "loss": 0.80461788, "memory(GiB)": 34.88, "step": 12520, "train_speed(iter/s)": 0.414145 }, { "acc": 0.81847363, "epoch": 0.33912760945495896, "grad_norm": 8.058876991271973, "learning_rate": 9.966969751379439e-06, "loss": 0.92346439, "memory(GiB)": 34.88, "step": 12525, "train_speed(iter/s)": 0.414153 }, { "acc": 0.83784618, "epoch": 0.33926298973817454, "grad_norm": 11.852364540100098, "learning_rate": 9.966905504954432e-06, "loss": 0.73920512, "memory(GiB)": 34.88, "step": 12530, "train_speed(iter/s)": 0.41416 }, { "acc": 0.85796652, "epoch": 0.33939837002139006, "grad_norm": 11.441020965576172, "learning_rate": 9.966841196315435e-06, "loss": 0.77520313, "memory(GiB)": 34.88, "step": 12535, "train_speed(iter/s)": 0.414167 }, { "acc": 0.83631067, "epoch": 0.33953375030460564, "grad_norm": 8.761919975280762, "learning_rate": 9.966776825463257e-06, "loss": 0.85997486, "memory(GiB)": 34.88, "step": 12540, "train_speed(iter/s)": 0.414174 }, { "acc": 0.83761816, "epoch": 0.33966913058782117, "grad_norm": 16.056119918823242, "learning_rate": 9.966712392398703e-06, "loss": 0.80709524, "memory(GiB)": 34.88, "step": 12545, "train_speed(iter/s)": 0.414183 }, { "acc": 0.85321178, "epoch": 0.33980451087103675, "grad_norm": 7.816837310791016, "learning_rate": 9.96664789712258e-06, "loss": 0.77208991, "memory(GiB)": 34.88, "step": 12550, "train_speed(iter/s)": 0.414191 }, { "acc": 0.86042061, "epoch": 0.33993989115425227, "grad_norm": 8.215755462646484, "learning_rate": 9.966583339635696e-06, "loss": 0.65533228, "memory(GiB)": 34.88, "step": 12555, "train_speed(iter/s)": 0.414198 }, { "acc": 0.85240383, "epoch": 0.34007527143746785, "grad_norm": 10.83594799041748, "learning_rate": 9.966518719938862e-06, "loss": 0.69572673, "memory(GiB)": 34.88, "step": 12560, "train_speed(iter/s)": 0.414206 }, { "acc": 0.8347681, "epoch": 0.3402106517206834, "grad_norm": 11.733366012573242, "learning_rate": 9.966454038032882e-06, "loss": 0.83294621, "memory(GiB)": 34.88, "step": 12565, "train_speed(iter/s)": 0.414214 }, { "acc": 0.86041527, "epoch": 0.34034603200389896, "grad_norm": 4.3899407386779785, "learning_rate": 9.96638929391857e-06, "loss": 0.65055041, "memory(GiB)": 34.88, "step": 12570, "train_speed(iter/s)": 0.414222 }, { "acc": 0.85578251, "epoch": 0.3404814122871145, "grad_norm": 10.136338233947754, "learning_rate": 9.966324487596739e-06, "loss": 0.81274948, "memory(GiB)": 34.88, "step": 12575, "train_speed(iter/s)": 0.41423 }, { "acc": 0.84229155, "epoch": 0.34061679257033006, "grad_norm": 5.82584285736084, "learning_rate": 9.966259619068196e-06, "loss": 0.73879423, "memory(GiB)": 34.88, "step": 12580, "train_speed(iter/s)": 0.414238 }, { "acc": 0.81666298, "epoch": 0.3407521728535456, "grad_norm": 12.962389945983887, "learning_rate": 9.966194688333759e-06, "loss": 0.97885342, "memory(GiB)": 34.88, "step": 12585, "train_speed(iter/s)": 0.414244 }, { "acc": 0.83844595, "epoch": 0.34088755313676117, "grad_norm": 10.936473846435547, "learning_rate": 9.966129695394236e-06, "loss": 0.76889458, "memory(GiB)": 34.88, "step": 12590, "train_speed(iter/s)": 0.414251 }, { "acc": 0.84552555, "epoch": 0.3410229334199767, "grad_norm": 9.763404846191406, "learning_rate": 9.966064640250444e-06, "loss": 0.84289932, "memory(GiB)": 34.88, "step": 12595, "train_speed(iter/s)": 0.414259 }, { "acc": 0.82000036, "epoch": 0.3411583137031923, "grad_norm": 6.01605224609375, "learning_rate": 9.965999522903198e-06, "loss": 0.93711519, "memory(GiB)": 34.88, "step": 12600, "train_speed(iter/s)": 0.414266 }, { "acc": 0.82269373, "epoch": 0.3412936939864078, "grad_norm": 12.141623497009277, "learning_rate": 9.965934343353311e-06, "loss": 0.9798914, "memory(GiB)": 34.88, "step": 12605, "train_speed(iter/s)": 0.414274 }, { "acc": 0.85238867, "epoch": 0.3414290742696234, "grad_norm": 10.082499504089355, "learning_rate": 9.965869101601606e-06, "loss": 0.77441664, "memory(GiB)": 34.88, "step": 12610, "train_speed(iter/s)": 0.414282 }, { "acc": 0.85455856, "epoch": 0.3415644545528389, "grad_norm": 14.103257179260254, "learning_rate": 9.965803797648894e-06, "loss": 0.70546227, "memory(GiB)": 34.88, "step": 12615, "train_speed(iter/s)": 0.41429 }, { "acc": 0.86079159, "epoch": 0.3416998348360545, "grad_norm": 11.787793159484863, "learning_rate": 9.965738431495996e-06, "loss": 0.69741039, "memory(GiB)": 34.88, "step": 12620, "train_speed(iter/s)": 0.414296 }, { "acc": 0.8494524, "epoch": 0.34183521511927, "grad_norm": 12.2712984085083, "learning_rate": 9.965673003143727e-06, "loss": 0.75018139, "memory(GiB)": 34.88, "step": 12625, "train_speed(iter/s)": 0.414304 }, { "acc": 0.82860718, "epoch": 0.3419705954024856, "grad_norm": 8.324909210205078, "learning_rate": 9.965607512592912e-06, "loss": 0.91097431, "memory(GiB)": 34.88, "step": 12630, "train_speed(iter/s)": 0.414312 }, { "acc": 0.87869339, "epoch": 0.3421059756857011, "grad_norm": 8.592035293579102, "learning_rate": 9.96554195984437e-06, "loss": 0.64640636, "memory(GiB)": 34.88, "step": 12635, "train_speed(iter/s)": 0.414319 }, { "acc": 0.81171093, "epoch": 0.3422413559689167, "grad_norm": 8.583832740783691, "learning_rate": 9.965476344898918e-06, "loss": 1.01723003, "memory(GiB)": 34.88, "step": 12640, "train_speed(iter/s)": 0.414326 }, { "acc": 0.85514774, "epoch": 0.3423767362521322, "grad_norm": 5.655497074127197, "learning_rate": 9.965410667757384e-06, "loss": 0.73637094, "memory(GiB)": 34.88, "step": 12645, "train_speed(iter/s)": 0.414334 }, { "acc": 0.84051628, "epoch": 0.3425121165353478, "grad_norm": 8.394091606140137, "learning_rate": 9.965344928420587e-06, "loss": 0.70001583, "memory(GiB)": 34.88, "step": 12650, "train_speed(iter/s)": 0.414342 }, { "acc": 0.83619118, "epoch": 0.3426474968185633, "grad_norm": 9.676844596862793, "learning_rate": 9.96527912688935e-06, "loss": 0.76624784, "memory(GiB)": 34.88, "step": 12655, "train_speed(iter/s)": 0.414348 }, { "acc": 0.85321159, "epoch": 0.3427828771017789, "grad_norm": 14.850098609924316, "learning_rate": 9.9652132631645e-06, "loss": 0.85419865, "memory(GiB)": 34.88, "step": 12660, "train_speed(iter/s)": 0.414356 }, { "acc": 0.84750433, "epoch": 0.34291825738499443, "grad_norm": 6.23555326461792, "learning_rate": 9.965147337246862e-06, "loss": 0.7581738, "memory(GiB)": 34.88, "step": 12665, "train_speed(iter/s)": 0.414363 }, { "acc": 0.82751055, "epoch": 0.34305363766821, "grad_norm": 9.050498962402344, "learning_rate": 9.965081349137259e-06, "loss": 0.92296829, "memory(GiB)": 34.88, "step": 12670, "train_speed(iter/s)": 0.414371 }, { "acc": 0.87483196, "epoch": 0.34318901795142553, "grad_norm": 6.218601703643799, "learning_rate": 9.965015298836518e-06, "loss": 0.62131529, "memory(GiB)": 34.88, "step": 12675, "train_speed(iter/s)": 0.414378 }, { "acc": 0.87651091, "epoch": 0.3433243982346411, "grad_norm": 7.08363676071167, "learning_rate": 9.96494918634547e-06, "loss": 0.58372316, "memory(GiB)": 34.88, "step": 12680, "train_speed(iter/s)": 0.414385 }, { "acc": 0.85762081, "epoch": 0.34345977851785664, "grad_norm": 9.512899398803711, "learning_rate": 9.96488301166494e-06, "loss": 0.69222751, "memory(GiB)": 34.88, "step": 12685, "train_speed(iter/s)": 0.414392 }, { "acc": 0.84237299, "epoch": 0.3435951588010722, "grad_norm": 6.866676330566406, "learning_rate": 9.964816774795757e-06, "loss": 0.7928689, "memory(GiB)": 34.88, "step": 12690, "train_speed(iter/s)": 0.414399 }, { "acc": 0.87360916, "epoch": 0.34373053908428775, "grad_norm": 15.45537281036377, "learning_rate": 9.964750475738753e-06, "loss": 0.72131834, "memory(GiB)": 34.88, "step": 12695, "train_speed(iter/s)": 0.414407 }, { "acc": 0.83918457, "epoch": 0.3438659193675033, "grad_norm": 14.784083366394043, "learning_rate": 9.964684114494756e-06, "loss": 0.7124608, "memory(GiB)": 34.88, "step": 12700, "train_speed(iter/s)": 0.414413 }, { "acc": 0.83935757, "epoch": 0.34400129965071885, "grad_norm": 11.483681678771973, "learning_rate": 9.964617691064597e-06, "loss": 0.84588289, "memory(GiB)": 34.88, "step": 12705, "train_speed(iter/s)": 0.414421 }, { "acc": 0.85686569, "epoch": 0.34413667993393443, "grad_norm": 10.093513488769531, "learning_rate": 9.964551205449112e-06, "loss": 0.85158024, "memory(GiB)": 34.88, "step": 12710, "train_speed(iter/s)": 0.414429 }, { "acc": 0.86755276, "epoch": 0.34427206021714996, "grad_norm": 7.0871453285217285, "learning_rate": 9.96448465764913e-06, "loss": 0.74058466, "memory(GiB)": 34.88, "step": 12715, "train_speed(iter/s)": 0.414435 }, { "acc": 0.8458951, "epoch": 0.34440744050036554, "grad_norm": 12.581209182739258, "learning_rate": 9.964418047665486e-06, "loss": 0.78393407, "memory(GiB)": 34.88, "step": 12720, "train_speed(iter/s)": 0.414443 }, { "acc": 0.8402482, "epoch": 0.34454282078358106, "grad_norm": 7.442634582519531, "learning_rate": 9.964351375499016e-06, "loss": 0.75480194, "memory(GiB)": 34.88, "step": 12725, "train_speed(iter/s)": 0.41445 }, { "acc": 0.84191723, "epoch": 0.34467820106679664, "grad_norm": 7.252184867858887, "learning_rate": 9.964284641150553e-06, "loss": 0.76390166, "memory(GiB)": 34.88, "step": 12730, "train_speed(iter/s)": 0.414458 }, { "acc": 0.86016388, "epoch": 0.34481358135001217, "grad_norm": 15.426004409790039, "learning_rate": 9.964217844620931e-06, "loss": 0.73513975, "memory(GiB)": 34.88, "step": 12735, "train_speed(iter/s)": 0.414466 }, { "acc": 0.86016417, "epoch": 0.34494896163322775, "grad_norm": 6.026913166046143, "learning_rate": 9.96415098591099e-06, "loss": 0.68306875, "memory(GiB)": 34.88, "step": 12740, "train_speed(iter/s)": 0.414473 }, { "acc": 0.85520067, "epoch": 0.34508434191644327, "grad_norm": 4.991118907928467, "learning_rate": 9.964084065021569e-06, "loss": 0.73700705, "memory(GiB)": 34.88, "step": 12745, "train_speed(iter/s)": 0.41448 }, { "acc": 0.85930824, "epoch": 0.34521972219965885, "grad_norm": 9.915872573852539, "learning_rate": 9.964017081953504e-06, "loss": 0.68192515, "memory(GiB)": 34.88, "step": 12750, "train_speed(iter/s)": 0.414487 }, { "acc": 0.85530624, "epoch": 0.3453551024828744, "grad_norm": 14.811481475830078, "learning_rate": 9.963950036707634e-06, "loss": 0.7998786, "memory(GiB)": 34.88, "step": 12755, "train_speed(iter/s)": 0.414495 }, { "acc": 0.83594599, "epoch": 0.34549048276608996, "grad_norm": 11.4273681640625, "learning_rate": 9.963882929284797e-06, "loss": 0.864713, "memory(GiB)": 34.88, "step": 12760, "train_speed(iter/s)": 0.414503 }, { "acc": 0.84764528, "epoch": 0.3456258630493055, "grad_norm": 8.73405647277832, "learning_rate": 9.963815759685836e-06, "loss": 0.76044035, "memory(GiB)": 34.88, "step": 12765, "train_speed(iter/s)": 0.414511 }, { "acc": 0.84932833, "epoch": 0.34576124333252106, "grad_norm": 5.339521884918213, "learning_rate": 9.963748527911596e-06, "loss": 0.73353567, "memory(GiB)": 34.88, "step": 12770, "train_speed(iter/s)": 0.414518 }, { "acc": 0.85008717, "epoch": 0.3458966236157366, "grad_norm": 11.040338516235352, "learning_rate": 9.963681233962911e-06, "loss": 0.76128511, "memory(GiB)": 34.88, "step": 12775, "train_speed(iter/s)": 0.414527 }, { "acc": 0.81304436, "epoch": 0.34603200389895217, "grad_norm": 51.51664733886719, "learning_rate": 9.963613877840631e-06, "loss": 0.94885178, "memory(GiB)": 34.88, "step": 12780, "train_speed(iter/s)": 0.414534 }, { "acc": 0.83431625, "epoch": 0.3461673841821677, "grad_norm": 34.0367431640625, "learning_rate": 9.963546459545596e-06, "loss": 0.81973162, "memory(GiB)": 34.88, "step": 12785, "train_speed(iter/s)": 0.414542 }, { "acc": 0.83303423, "epoch": 0.3463027644653833, "grad_norm": 15.768654823303223, "learning_rate": 9.963478979078652e-06, "loss": 0.9222209, "memory(GiB)": 34.88, "step": 12790, "train_speed(iter/s)": 0.414549 }, { "acc": 0.8532032, "epoch": 0.3464381447485988, "grad_norm": 9.737622261047363, "learning_rate": 9.963411436440645e-06, "loss": 0.74999576, "memory(GiB)": 34.88, "step": 12795, "train_speed(iter/s)": 0.414557 }, { "acc": 0.83623009, "epoch": 0.3465735250318144, "grad_norm": 18.784414291381836, "learning_rate": 9.963343831632419e-06, "loss": 0.80725412, "memory(GiB)": 34.88, "step": 12800, "train_speed(iter/s)": 0.414565 }, { "acc": 0.85772276, "epoch": 0.3467089053150299, "grad_norm": 6.5038275718688965, "learning_rate": 9.963276164654822e-06, "loss": 0.77786632, "memory(GiB)": 34.88, "step": 12805, "train_speed(iter/s)": 0.414571 }, { "acc": 0.86948042, "epoch": 0.3468442855982455, "grad_norm": 18.976680755615234, "learning_rate": 9.963208435508703e-06, "loss": 0.75742865, "memory(GiB)": 34.88, "step": 12810, "train_speed(iter/s)": 0.414579 }, { "acc": 0.86407452, "epoch": 0.346979665881461, "grad_norm": 5.534581661224365, "learning_rate": 9.963140644194908e-06, "loss": 0.68282747, "memory(GiB)": 34.88, "step": 12815, "train_speed(iter/s)": 0.414586 }, { "acc": 0.85314903, "epoch": 0.3471150461646766, "grad_norm": 7.7489237785339355, "learning_rate": 9.963072790714287e-06, "loss": 0.72069364, "memory(GiB)": 34.88, "step": 12820, "train_speed(iter/s)": 0.414594 }, { "acc": 0.85365562, "epoch": 0.3472504264478921, "grad_norm": 10.89233684539795, "learning_rate": 9.963004875067692e-06, "loss": 0.76028748, "memory(GiB)": 34.88, "step": 12825, "train_speed(iter/s)": 0.414602 }, { "acc": 0.85301266, "epoch": 0.3473858067311077, "grad_norm": 9.140440940856934, "learning_rate": 9.962936897255971e-06, "loss": 0.68607035, "memory(GiB)": 34.88, "step": 12830, "train_speed(iter/s)": 0.414609 }, { "acc": 0.85983839, "epoch": 0.3475211870143232, "grad_norm": 7.992785453796387, "learning_rate": 9.962868857279976e-06, "loss": 0.69677963, "memory(GiB)": 34.88, "step": 12835, "train_speed(iter/s)": 0.414617 }, { "acc": 0.87040186, "epoch": 0.3476565672975388, "grad_norm": 8.523137092590332, "learning_rate": 9.962800755140561e-06, "loss": 0.59393854, "memory(GiB)": 34.88, "step": 12840, "train_speed(iter/s)": 0.414624 }, { "acc": 0.84731464, "epoch": 0.3477919475807543, "grad_norm": 11.07679271697998, "learning_rate": 9.962732590838579e-06, "loss": 0.86132832, "memory(GiB)": 34.88, "step": 12845, "train_speed(iter/s)": 0.414632 }, { "acc": 0.84195719, "epoch": 0.3479273278639699, "grad_norm": 14.204333305358887, "learning_rate": 9.962664364374882e-06, "loss": 0.80736942, "memory(GiB)": 34.88, "step": 12850, "train_speed(iter/s)": 0.414639 }, { "acc": 0.829776, "epoch": 0.34806270814718543, "grad_norm": 6.905834197998047, "learning_rate": 9.962596075750327e-06, "loss": 0.80870085, "memory(GiB)": 34.88, "step": 12855, "train_speed(iter/s)": 0.414647 }, { "acc": 0.81870422, "epoch": 0.348198088430401, "grad_norm": 10.910698890686035, "learning_rate": 9.962527724965764e-06, "loss": 0.90613661, "memory(GiB)": 34.88, "step": 12860, "train_speed(iter/s)": 0.414654 }, { "acc": 0.85629158, "epoch": 0.34833346871361653, "grad_norm": 7.680506706237793, "learning_rate": 9.962459312022056e-06, "loss": 0.75682802, "memory(GiB)": 34.88, "step": 12865, "train_speed(iter/s)": 0.414661 }, { "acc": 0.84573059, "epoch": 0.3484688489968321, "grad_norm": 8.229752540588379, "learning_rate": 9.962390836920059e-06, "loss": 0.8195159, "memory(GiB)": 34.88, "step": 12870, "train_speed(iter/s)": 0.414669 }, { "acc": 0.85338001, "epoch": 0.34860422928004764, "grad_norm": 6.286904811859131, "learning_rate": 9.962322299660629e-06, "loss": 0.71058912, "memory(GiB)": 34.88, "step": 12875, "train_speed(iter/s)": 0.414676 }, { "acc": 0.86765232, "epoch": 0.3487396095632632, "grad_norm": 9.622171401977539, "learning_rate": 9.962253700244624e-06, "loss": 0.67252541, "memory(GiB)": 34.88, "step": 12880, "train_speed(iter/s)": 0.414684 }, { "acc": 0.84157619, "epoch": 0.34887498984647874, "grad_norm": 22.49934196472168, "learning_rate": 9.962185038672902e-06, "loss": 0.82025967, "memory(GiB)": 34.88, "step": 12885, "train_speed(iter/s)": 0.41469 }, { "acc": 0.82753754, "epoch": 0.3490103701296943, "grad_norm": 8.773846626281738, "learning_rate": 9.962116314946326e-06, "loss": 0.90686102, "memory(GiB)": 34.88, "step": 12890, "train_speed(iter/s)": 0.414696 }, { "acc": 0.86582947, "epoch": 0.34914575041290985, "grad_norm": 5.57519006729126, "learning_rate": 9.962047529065758e-06, "loss": 0.57602921, "memory(GiB)": 34.88, "step": 12895, "train_speed(iter/s)": 0.414703 }, { "acc": 0.86008015, "epoch": 0.34928113069612543, "grad_norm": 13.923742294311523, "learning_rate": 9.961978681032057e-06, "loss": 0.78741493, "memory(GiB)": 34.88, "step": 12900, "train_speed(iter/s)": 0.41471 }, { "acc": 0.87231674, "epoch": 0.34941651097934096, "grad_norm": 12.13086986541748, "learning_rate": 9.961909770846084e-06, "loss": 0.58678465, "memory(GiB)": 34.88, "step": 12905, "train_speed(iter/s)": 0.414717 }, { "acc": 0.86315231, "epoch": 0.34955189126255654, "grad_norm": 11.03141975402832, "learning_rate": 9.961840798508708e-06, "loss": 0.68146038, "memory(GiB)": 34.88, "step": 12910, "train_speed(iter/s)": 0.414725 }, { "acc": 0.83309326, "epoch": 0.34968727154577206, "grad_norm": 9.487088203430176, "learning_rate": 9.961771764020787e-06, "loss": 0.83093376, "memory(GiB)": 34.88, "step": 12915, "train_speed(iter/s)": 0.414733 }, { "acc": 0.86542521, "epoch": 0.34982265182898764, "grad_norm": 8.731656074523926, "learning_rate": 9.96170266738319e-06, "loss": 0.71789689, "memory(GiB)": 34.88, "step": 12920, "train_speed(iter/s)": 0.414739 }, { "acc": 0.87010136, "epoch": 0.34995803211220317, "grad_norm": 6.66771936416626, "learning_rate": 9.96163350859678e-06, "loss": 0.72437048, "memory(GiB)": 34.88, "step": 12925, "train_speed(iter/s)": 0.414747 }, { "acc": 0.84473104, "epoch": 0.35009341239541875, "grad_norm": 12.926685333251953, "learning_rate": 9.961564287662424e-06, "loss": 0.8415349, "memory(GiB)": 34.88, "step": 12930, "train_speed(iter/s)": 0.414754 }, { "acc": 0.81805496, "epoch": 0.35022879267863427, "grad_norm": 9.050384521484375, "learning_rate": 9.961495004580989e-06, "loss": 0.94270134, "memory(GiB)": 34.88, "step": 12935, "train_speed(iter/s)": 0.41476 }, { "acc": 0.85883389, "epoch": 0.35036417296184985, "grad_norm": 11.395153999328613, "learning_rate": 9.961425659353344e-06, "loss": 0.72349691, "memory(GiB)": 34.88, "step": 12940, "train_speed(iter/s)": 0.414766 }, { "acc": 0.85937576, "epoch": 0.3504995532450654, "grad_norm": 7.775598049163818, "learning_rate": 9.961356251980356e-06, "loss": 0.74481182, "memory(GiB)": 34.88, "step": 12945, "train_speed(iter/s)": 0.414774 }, { "acc": 0.86890182, "epoch": 0.35063493352828096, "grad_norm": 4.844309329986572, "learning_rate": 9.961286782462896e-06, "loss": 0.65723848, "memory(GiB)": 34.88, "step": 12950, "train_speed(iter/s)": 0.414781 }, { "acc": 0.88467884, "epoch": 0.3507703138114965, "grad_norm": 9.409358024597168, "learning_rate": 9.961217250801835e-06, "loss": 0.60894661, "memory(GiB)": 34.88, "step": 12955, "train_speed(iter/s)": 0.414789 }, { "acc": 0.85119534, "epoch": 0.35090569409471206, "grad_norm": 14.551095008850098, "learning_rate": 9.96114765699804e-06, "loss": 0.73980179, "memory(GiB)": 34.88, "step": 12960, "train_speed(iter/s)": 0.414797 }, { "acc": 0.83381844, "epoch": 0.3510410743779276, "grad_norm": 12.759977340698242, "learning_rate": 9.961078001052386e-06, "loss": 0.86982384, "memory(GiB)": 34.88, "step": 12965, "train_speed(iter/s)": 0.414804 }, { "acc": 0.84797611, "epoch": 0.35117645466114317, "grad_norm": 10.573930740356445, "learning_rate": 9.961008282965746e-06, "loss": 0.72535372, "memory(GiB)": 34.88, "step": 12970, "train_speed(iter/s)": 0.414812 }, { "acc": 0.84888229, "epoch": 0.3513118349443587, "grad_norm": 7.817122936248779, "learning_rate": 9.960938502738993e-06, "loss": 0.78553801, "memory(GiB)": 34.88, "step": 12975, "train_speed(iter/s)": 0.414819 }, { "acc": 0.85581894, "epoch": 0.35144721522757427, "grad_norm": 36.98019027709961, "learning_rate": 9.960868660373e-06, "loss": 0.68369722, "memory(GiB)": 34.88, "step": 12980, "train_speed(iter/s)": 0.414827 }, { "acc": 0.82142544, "epoch": 0.3515825955107898, "grad_norm": 19.32792091369629, "learning_rate": 9.960798755868642e-06, "loss": 1.00024014, "memory(GiB)": 34.88, "step": 12985, "train_speed(iter/s)": 0.414835 }, { "acc": 0.86373711, "epoch": 0.3517179757940054, "grad_norm": 5.721247673034668, "learning_rate": 9.960728789226795e-06, "loss": 0.63600149, "memory(GiB)": 34.88, "step": 12990, "train_speed(iter/s)": 0.414843 }, { "acc": 0.82315159, "epoch": 0.3518533560772209, "grad_norm": 20.823408126831055, "learning_rate": 9.960658760448336e-06, "loss": 0.95995255, "memory(GiB)": 34.88, "step": 12995, "train_speed(iter/s)": 0.41485 }, { "acc": 0.87225761, "epoch": 0.3519887363604365, "grad_norm": 10.382650375366211, "learning_rate": 9.960588669534142e-06, "loss": 0.67570405, "memory(GiB)": 34.88, "step": 13000, "train_speed(iter/s)": 0.414858 }, { "acc": 0.8219286, "epoch": 0.352124116643652, "grad_norm": 11.712568283081055, "learning_rate": 9.960518516485089e-06, "loss": 0.95775108, "memory(GiB)": 34.88, "step": 13005, "train_speed(iter/s)": 0.414864 }, { "acc": 0.85809383, "epoch": 0.3522594969268676, "grad_norm": 14.205649375915527, "learning_rate": 9.960448301302059e-06, "loss": 0.79077439, "memory(GiB)": 34.88, "step": 13010, "train_speed(iter/s)": 0.41487 }, { "acc": 0.82937813, "epoch": 0.3523948772100831, "grad_norm": 9.048693656921387, "learning_rate": 9.960378023985932e-06, "loss": 0.92702122, "memory(GiB)": 34.88, "step": 13015, "train_speed(iter/s)": 0.414876 }, { "acc": 0.81277409, "epoch": 0.3525302574932987, "grad_norm": 7.1113996505737305, "learning_rate": 9.960307684537585e-06, "loss": 0.9234108, "memory(GiB)": 34.88, "step": 13020, "train_speed(iter/s)": 0.414884 }, { "acc": 0.83508606, "epoch": 0.3526656377765142, "grad_norm": 5.591046333312988, "learning_rate": 9.960237282957901e-06, "loss": 0.79758911, "memory(GiB)": 34.88, "step": 13025, "train_speed(iter/s)": 0.414891 }, { "acc": 0.84375582, "epoch": 0.3528010180597298, "grad_norm": 7.30837345123291, "learning_rate": 9.96016681924776e-06, "loss": 0.799683, "memory(GiB)": 34.88, "step": 13030, "train_speed(iter/s)": 0.414899 }, { "acc": 0.82706289, "epoch": 0.3529363983429453, "grad_norm": 11.831618309020996, "learning_rate": 9.960096293408048e-06, "loss": 0.86489735, "memory(GiB)": 34.88, "step": 13035, "train_speed(iter/s)": 0.414905 }, { "acc": 0.87393427, "epoch": 0.3530717786261609, "grad_norm": 12.009665489196777, "learning_rate": 9.960025705439646e-06, "loss": 0.64452314, "memory(GiB)": 34.88, "step": 13040, "train_speed(iter/s)": 0.414912 }, { "acc": 0.84526062, "epoch": 0.35320715890937643, "grad_norm": 10.866283416748047, "learning_rate": 9.95995505534344e-06, "loss": 0.77861533, "memory(GiB)": 34.88, "step": 13045, "train_speed(iter/s)": 0.414919 }, { "acc": 0.85328703, "epoch": 0.353342539192592, "grad_norm": 17.567378997802734, "learning_rate": 9.959884343120313e-06, "loss": 0.75868993, "memory(GiB)": 34.88, "step": 13050, "train_speed(iter/s)": 0.414926 }, { "acc": 0.84155188, "epoch": 0.35347791947580753, "grad_norm": 11.116765022277832, "learning_rate": 9.959813568771155e-06, "loss": 0.78691959, "memory(GiB)": 34.88, "step": 13055, "train_speed(iter/s)": 0.414932 }, { "acc": 0.8584219, "epoch": 0.3536132997590231, "grad_norm": 5.541955471038818, "learning_rate": 9.959742732296846e-06, "loss": 0.67159128, "memory(GiB)": 34.88, "step": 13060, "train_speed(iter/s)": 0.414939 }, { "acc": 0.85186081, "epoch": 0.35374868004223864, "grad_norm": 8.505581855773926, "learning_rate": 9.959671833698278e-06, "loss": 0.76648932, "memory(GiB)": 34.88, "step": 13065, "train_speed(iter/s)": 0.414945 }, { "acc": 0.8217392, "epoch": 0.3538840603254542, "grad_norm": 11.693995475769043, "learning_rate": 9.959600872976339e-06, "loss": 0.93565216, "memory(GiB)": 34.88, "step": 13070, "train_speed(iter/s)": 0.414952 }, { "acc": 0.82745361, "epoch": 0.35401944060866974, "grad_norm": 5.993411540985107, "learning_rate": 9.959529850131915e-06, "loss": 0.85048857, "memory(GiB)": 34.88, "step": 13075, "train_speed(iter/s)": 0.414959 }, { "acc": 0.85806618, "epoch": 0.3541548208918853, "grad_norm": 9.842734336853027, "learning_rate": 9.959458765165899e-06, "loss": 0.70218568, "memory(GiB)": 34.88, "step": 13080, "train_speed(iter/s)": 0.414966 }, { "acc": 0.85867519, "epoch": 0.35429020117510085, "grad_norm": 8.549528121948242, "learning_rate": 9.959387618079179e-06, "loss": 0.69310288, "memory(GiB)": 34.88, "step": 13085, "train_speed(iter/s)": 0.414973 }, { "acc": 0.85273361, "epoch": 0.35442558145831643, "grad_norm": 8.639396667480469, "learning_rate": 9.95931640887265e-06, "loss": 0.80404606, "memory(GiB)": 34.88, "step": 13090, "train_speed(iter/s)": 0.414981 }, { "acc": 0.86442013, "epoch": 0.35456096174153195, "grad_norm": 8.678727149963379, "learning_rate": 9.9592451375472e-06, "loss": 0.72609949, "memory(GiB)": 34.88, "step": 13095, "train_speed(iter/s)": 0.414988 }, { "acc": 0.83143406, "epoch": 0.35469634202474754, "grad_norm": 20.585899353027344, "learning_rate": 9.959173804103722e-06, "loss": 0.82255917, "memory(GiB)": 34.88, "step": 13100, "train_speed(iter/s)": 0.414995 }, { "acc": 0.84968367, "epoch": 0.35483172230796306, "grad_norm": 13.18131160736084, "learning_rate": 9.959102408543113e-06, "loss": 0.67156982, "memory(GiB)": 34.88, "step": 13105, "train_speed(iter/s)": 0.415002 }, { "acc": 0.8544198, "epoch": 0.35496710259117864, "grad_norm": 13.003220558166504, "learning_rate": 9.959030950866264e-06, "loss": 0.77934971, "memory(GiB)": 34.88, "step": 13110, "train_speed(iter/s)": 0.415009 }, { "acc": 0.86455717, "epoch": 0.35510248287439417, "grad_norm": 16.008190155029297, "learning_rate": 9.95895943107407e-06, "loss": 0.71008153, "memory(GiB)": 34.88, "step": 13115, "train_speed(iter/s)": 0.415017 }, { "acc": 0.87264547, "epoch": 0.35523786315760975, "grad_norm": 13.313244819641113, "learning_rate": 9.95888784916743e-06, "loss": 0.57335949, "memory(GiB)": 34.88, "step": 13120, "train_speed(iter/s)": 0.415025 }, { "acc": 0.86345673, "epoch": 0.35537324344082527, "grad_norm": 8.158185958862305, "learning_rate": 9.958816205147239e-06, "loss": 0.73367634, "memory(GiB)": 34.88, "step": 13125, "train_speed(iter/s)": 0.415032 }, { "acc": 0.84121475, "epoch": 0.35550862372404085, "grad_norm": 7.879458427429199, "learning_rate": 9.958744499014393e-06, "loss": 0.79941502, "memory(GiB)": 34.88, "step": 13130, "train_speed(iter/s)": 0.415039 }, { "acc": 0.84264774, "epoch": 0.3556440040072564, "grad_norm": 8.574749946594238, "learning_rate": 9.958672730769791e-06, "loss": 0.78715019, "memory(GiB)": 34.88, "step": 13135, "train_speed(iter/s)": 0.415046 }, { "acc": 0.84773445, "epoch": 0.35577938429047196, "grad_norm": 6.202514171600342, "learning_rate": 9.958600900414336e-06, "loss": 0.80696039, "memory(GiB)": 34.88, "step": 13140, "train_speed(iter/s)": 0.415053 }, { "acc": 0.84707708, "epoch": 0.3559147645736875, "grad_norm": 8.581645011901855, "learning_rate": 9.958529007948922e-06, "loss": 0.730056, "memory(GiB)": 34.88, "step": 13145, "train_speed(iter/s)": 0.41506 }, { "acc": 0.83160696, "epoch": 0.35605014485690306, "grad_norm": 10.650262832641602, "learning_rate": 9.958457053374455e-06, "loss": 0.841994, "memory(GiB)": 34.88, "step": 13150, "train_speed(iter/s)": 0.415067 }, { "acc": 0.85437632, "epoch": 0.3561855251401186, "grad_norm": 9.185823440551758, "learning_rate": 9.95838503669183e-06, "loss": 0.77576623, "memory(GiB)": 34.88, "step": 13155, "train_speed(iter/s)": 0.415074 }, { "acc": 0.83389587, "epoch": 0.35632090542333417, "grad_norm": 6.10341739654541, "learning_rate": 9.958312957901954e-06, "loss": 0.81787338, "memory(GiB)": 34.88, "step": 13160, "train_speed(iter/s)": 0.41508 }, { "acc": 0.85627413, "epoch": 0.3564562857065497, "grad_norm": 46.03801727294922, "learning_rate": 9.95824081700573e-06, "loss": 0.68797045, "memory(GiB)": 34.88, "step": 13165, "train_speed(iter/s)": 0.415087 }, { "acc": 0.84580355, "epoch": 0.35659166598976527, "grad_norm": 6.223284721374512, "learning_rate": 9.95816861400406e-06, "loss": 0.73834829, "memory(GiB)": 34.88, "step": 13170, "train_speed(iter/s)": 0.415094 }, { "acc": 0.85918961, "epoch": 0.3567270462729808, "grad_norm": 5.43124532699585, "learning_rate": 9.958096348897848e-06, "loss": 0.67515974, "memory(GiB)": 34.88, "step": 13175, "train_speed(iter/s)": 0.415101 }, { "acc": 0.86005764, "epoch": 0.3568624265561964, "grad_norm": 5.800882816314697, "learning_rate": 9.958024021688002e-06, "loss": 0.76562953, "memory(GiB)": 34.88, "step": 13180, "train_speed(iter/s)": 0.415109 }, { "acc": 0.84241228, "epoch": 0.3569978068394119, "grad_norm": 10.264715194702148, "learning_rate": 9.957951632375423e-06, "loss": 0.82444506, "memory(GiB)": 34.88, "step": 13185, "train_speed(iter/s)": 0.415115 }, { "acc": 0.8590374, "epoch": 0.3571331871226275, "grad_norm": 10.262811660766602, "learning_rate": 9.957879180961023e-06, "loss": 0.7022975, "memory(GiB)": 34.88, "step": 13190, "train_speed(iter/s)": 0.415122 }, { "acc": 0.86206188, "epoch": 0.357268567405843, "grad_norm": 16.000991821289062, "learning_rate": 9.957806667445706e-06, "loss": 0.79083414, "memory(GiB)": 34.88, "step": 13195, "train_speed(iter/s)": 0.415129 }, { "acc": 0.83616352, "epoch": 0.3574039476890586, "grad_norm": 28.873838424682617, "learning_rate": 9.957734091830382e-06, "loss": 0.85386591, "memory(GiB)": 34.88, "step": 13200, "train_speed(iter/s)": 0.415135 }, { "acc": 0.85099487, "epoch": 0.3575393279722741, "grad_norm": 8.638751029968262, "learning_rate": 9.957661454115962e-06, "loss": 0.70505676, "memory(GiB)": 34.88, "step": 13205, "train_speed(iter/s)": 0.415142 }, { "acc": 0.85007162, "epoch": 0.3576747082554897, "grad_norm": 6.8697991371154785, "learning_rate": 9.957588754303353e-06, "loss": 0.74999309, "memory(GiB)": 34.88, "step": 13210, "train_speed(iter/s)": 0.41515 }, { "acc": 0.86387043, "epoch": 0.3578100885387052, "grad_norm": 7.026995658874512, "learning_rate": 9.957515992393466e-06, "loss": 0.71545935, "memory(GiB)": 34.88, "step": 13215, "train_speed(iter/s)": 0.415157 }, { "acc": 0.83714886, "epoch": 0.3579454688219208, "grad_norm": 8.393933296203613, "learning_rate": 9.957443168387212e-06, "loss": 0.79320598, "memory(GiB)": 34.88, "step": 13220, "train_speed(iter/s)": 0.415163 }, { "acc": 0.87130671, "epoch": 0.3580808491051363, "grad_norm": 5.648645877838135, "learning_rate": 9.957370282285507e-06, "loss": 0.62594719, "memory(GiB)": 34.88, "step": 13225, "train_speed(iter/s)": 0.415171 }, { "acc": 0.84958611, "epoch": 0.3582162293883519, "grad_norm": 6.626574516296387, "learning_rate": 9.957297334089261e-06, "loss": 0.62190051, "memory(GiB)": 34.88, "step": 13230, "train_speed(iter/s)": 0.415178 }, { "acc": 0.83090343, "epoch": 0.35835160967156743, "grad_norm": 11.400418281555176, "learning_rate": 9.957224323799389e-06, "loss": 0.90033674, "memory(GiB)": 34.88, "step": 13235, "train_speed(iter/s)": 0.415184 }, { "acc": 0.85202246, "epoch": 0.358486989954783, "grad_norm": 7.44652795791626, "learning_rate": 9.957151251416802e-06, "loss": 0.77118168, "memory(GiB)": 34.88, "step": 13240, "train_speed(iter/s)": 0.415191 }, { "acc": 0.83110142, "epoch": 0.35862237023799853, "grad_norm": 15.26997184753418, "learning_rate": 9.95707811694242e-06, "loss": 0.87492599, "memory(GiB)": 34.88, "step": 13245, "train_speed(iter/s)": 0.415198 }, { "acc": 0.87237492, "epoch": 0.3587577505212141, "grad_norm": 9.269657135009766, "learning_rate": 9.957004920377156e-06, "loss": 0.62290425, "memory(GiB)": 34.88, "step": 13250, "train_speed(iter/s)": 0.415206 }, { "acc": 0.85375032, "epoch": 0.35889313080442964, "grad_norm": 5.678566932678223, "learning_rate": 9.956931661721931e-06, "loss": 0.73132939, "memory(GiB)": 34.88, "step": 13255, "train_speed(iter/s)": 0.415213 }, { "acc": 0.86662025, "epoch": 0.3590285110876452, "grad_norm": 6.9521164894104, "learning_rate": 9.95685834097766e-06, "loss": 0.67796159, "memory(GiB)": 34.88, "step": 13260, "train_speed(iter/s)": 0.41522 }, { "acc": 0.863906, "epoch": 0.35916389137086074, "grad_norm": 13.970701217651367, "learning_rate": 9.95678495814526e-06, "loss": 0.64712248, "memory(GiB)": 34.88, "step": 13265, "train_speed(iter/s)": 0.415227 }, { "acc": 0.85739822, "epoch": 0.3592992716540763, "grad_norm": 13.281990051269531, "learning_rate": 9.956711513225653e-06, "loss": 0.75416265, "memory(GiB)": 34.88, "step": 13270, "train_speed(iter/s)": 0.415235 }, { "acc": 0.84811916, "epoch": 0.35943465193729185, "grad_norm": 11.307942390441895, "learning_rate": 9.956638006219757e-06, "loss": 0.84321556, "memory(GiB)": 34.88, "step": 13275, "train_speed(iter/s)": 0.41524 }, { "acc": 0.83244419, "epoch": 0.35957003222050743, "grad_norm": 9.659806251525879, "learning_rate": 9.956564437128493e-06, "loss": 0.90854626, "memory(GiB)": 34.88, "step": 13280, "train_speed(iter/s)": 0.415247 }, { "acc": 0.8550066, "epoch": 0.35970541250372295, "grad_norm": 7.46968936920166, "learning_rate": 9.956490805952784e-06, "loss": 0.69104228, "memory(GiB)": 34.88, "step": 13285, "train_speed(iter/s)": 0.415254 }, { "acc": 0.8508358, "epoch": 0.35984079278693853, "grad_norm": 6.9117350578308105, "learning_rate": 9.956417112693553e-06, "loss": 0.72886996, "memory(GiB)": 34.88, "step": 13290, "train_speed(iter/s)": 0.415259 }, { "acc": 0.83754721, "epoch": 0.35997617307015406, "grad_norm": 7.20635461807251, "learning_rate": 9.95634335735172e-06, "loss": 0.80768309, "memory(GiB)": 34.88, "step": 13295, "train_speed(iter/s)": 0.415266 }, { "acc": 0.86682034, "epoch": 0.36011155335336964, "grad_norm": 17.797706604003906, "learning_rate": 9.956269539928211e-06, "loss": 0.68056707, "memory(GiB)": 34.88, "step": 13300, "train_speed(iter/s)": 0.415272 }, { "acc": 0.8580883, "epoch": 0.36024693363658516, "grad_norm": 10.629154205322266, "learning_rate": 9.956195660423951e-06, "loss": 0.70445986, "memory(GiB)": 34.88, "step": 13305, "train_speed(iter/s)": 0.415279 }, { "acc": 0.82731876, "epoch": 0.36038231391980075, "grad_norm": 16.0904483795166, "learning_rate": 9.956121718839864e-06, "loss": 1.01689577, "memory(GiB)": 34.88, "step": 13310, "train_speed(iter/s)": 0.415286 }, { "acc": 0.86880207, "epoch": 0.36051769420301627, "grad_norm": 7.499566555023193, "learning_rate": 9.956047715176878e-06, "loss": 0.64785662, "memory(GiB)": 34.88, "step": 13315, "train_speed(iter/s)": 0.415293 }, { "acc": 0.79687958, "epoch": 0.36065307448623185, "grad_norm": 10.111271858215332, "learning_rate": 9.95597364943592e-06, "loss": 1.1069521, "memory(GiB)": 34.88, "step": 13320, "train_speed(iter/s)": 0.415298 }, { "acc": 0.85516853, "epoch": 0.3607884547694474, "grad_norm": 9.140352249145508, "learning_rate": 9.955899521617915e-06, "loss": 0.63376102, "memory(GiB)": 34.88, "step": 13325, "train_speed(iter/s)": 0.415305 }, { "acc": 0.88580055, "epoch": 0.36092383505266296, "grad_norm": 6.344193458557129, "learning_rate": 9.955825331723795e-06, "loss": 0.44295678, "memory(GiB)": 34.88, "step": 13330, "train_speed(iter/s)": 0.415312 }, { "acc": 0.87686367, "epoch": 0.3610592153358785, "grad_norm": 8.326949119567871, "learning_rate": 9.955751079754486e-06, "loss": 0.569349, "memory(GiB)": 34.88, "step": 13335, "train_speed(iter/s)": 0.415319 }, { "acc": 0.84815769, "epoch": 0.36119459561909406, "grad_norm": 16.692407608032227, "learning_rate": 9.955676765710923e-06, "loss": 0.77233839, "memory(GiB)": 34.88, "step": 13340, "train_speed(iter/s)": 0.415326 }, { "acc": 0.86044798, "epoch": 0.3613299759023096, "grad_norm": 14.684491157531738, "learning_rate": 9.955602389594033e-06, "loss": 0.72071414, "memory(GiB)": 34.88, "step": 13345, "train_speed(iter/s)": 0.415333 }, { "acc": 0.8368104, "epoch": 0.36146535618552517, "grad_norm": 8.525323867797852, "learning_rate": 9.955527951404749e-06, "loss": 0.84187107, "memory(GiB)": 34.88, "step": 13350, "train_speed(iter/s)": 0.415337 }, { "acc": 0.83838863, "epoch": 0.3616007364687407, "grad_norm": 22.278202056884766, "learning_rate": 9.955453451144005e-06, "loss": 0.78929715, "memory(GiB)": 34.88, "step": 13355, "train_speed(iter/s)": 0.415343 }, { "acc": 0.86331406, "epoch": 0.36173611675195627, "grad_norm": 8.512104034423828, "learning_rate": 9.95537888881273e-06, "loss": 0.59486632, "memory(GiB)": 34.88, "step": 13360, "train_speed(iter/s)": 0.41535 }, { "acc": 0.83712397, "epoch": 0.3618714970351718, "grad_norm": 28.009695053100586, "learning_rate": 9.95530426441186e-06, "loss": 0.90963726, "memory(GiB)": 34.88, "step": 13365, "train_speed(iter/s)": 0.415356 }, { "acc": 0.87433224, "epoch": 0.3620068773183874, "grad_norm": 10.741913795471191, "learning_rate": 9.955229577942332e-06, "loss": 0.70251045, "memory(GiB)": 34.88, "step": 13370, "train_speed(iter/s)": 0.415363 }, { "acc": 0.84298306, "epoch": 0.3621422576016029, "grad_norm": 7.840688228607178, "learning_rate": 9.955154829405079e-06, "loss": 0.80105057, "memory(GiB)": 34.88, "step": 13375, "train_speed(iter/s)": 0.41537 }, { "acc": 0.85983191, "epoch": 0.3622776378848185, "grad_norm": 7.955355644226074, "learning_rate": 9.955080018801039e-06, "loss": 0.68622036, "memory(GiB)": 34.88, "step": 13380, "train_speed(iter/s)": 0.415378 }, { "acc": 0.84779243, "epoch": 0.362413018168034, "grad_norm": 6.931581497192383, "learning_rate": 9.95500514613115e-06, "loss": 0.71616235, "memory(GiB)": 34.88, "step": 13385, "train_speed(iter/s)": 0.415384 }, { "acc": 0.84459324, "epoch": 0.3625483984512496, "grad_norm": 9.227110862731934, "learning_rate": 9.954930211396346e-06, "loss": 0.82123375, "memory(GiB)": 34.88, "step": 13390, "train_speed(iter/s)": 0.415391 }, { "acc": 0.86382389, "epoch": 0.3626837787344651, "grad_norm": 15.994699478149414, "learning_rate": 9.954855214597571e-06, "loss": 0.71410799, "memory(GiB)": 34.88, "step": 13395, "train_speed(iter/s)": 0.415399 }, { "acc": 0.79796495, "epoch": 0.3628191590176807, "grad_norm": 10.225112915039062, "learning_rate": 9.95478015573576e-06, "loss": 1.04950085, "memory(GiB)": 34.88, "step": 13400, "train_speed(iter/s)": 0.415406 }, { "acc": 0.85187378, "epoch": 0.3629545393008962, "grad_norm": 14.606189727783203, "learning_rate": 9.954705034811853e-06, "loss": 0.77264962, "memory(GiB)": 34.88, "step": 13405, "train_speed(iter/s)": 0.415413 }, { "acc": 0.82644529, "epoch": 0.3630899195841118, "grad_norm": 10.184464454650879, "learning_rate": 9.954629851826795e-06, "loss": 0.92480068, "memory(GiB)": 34.88, "step": 13410, "train_speed(iter/s)": 0.41542 }, { "acc": 0.88614616, "epoch": 0.3632252998673273, "grad_norm": 10.192591667175293, "learning_rate": 9.954554606781525e-06, "loss": 0.59039292, "memory(GiB)": 34.88, "step": 13415, "train_speed(iter/s)": 0.415425 }, { "acc": 0.83635025, "epoch": 0.36336068015054285, "grad_norm": 17.825159072875977, "learning_rate": 9.954479299676987e-06, "loss": 0.81531658, "memory(GiB)": 34.88, "step": 13420, "train_speed(iter/s)": 0.415432 }, { "acc": 0.84508343, "epoch": 0.36349606043375843, "grad_norm": 10.27291488647461, "learning_rate": 9.954403930514125e-06, "loss": 0.81688175, "memory(GiB)": 34.88, "step": 13425, "train_speed(iter/s)": 0.415438 }, { "acc": 0.83057232, "epoch": 0.36363144071697395, "grad_norm": 5.581355094909668, "learning_rate": 9.95432849929388e-06, "loss": 0.80899715, "memory(GiB)": 34.88, "step": 13430, "train_speed(iter/s)": 0.415445 }, { "acc": 0.84449167, "epoch": 0.36376682100018953, "grad_norm": 7.466704368591309, "learning_rate": 9.954253006017199e-06, "loss": 0.79462409, "memory(GiB)": 34.88, "step": 13435, "train_speed(iter/s)": 0.415451 }, { "acc": 0.84382401, "epoch": 0.36390220128340506, "grad_norm": 6.6646528244018555, "learning_rate": 9.954177450685029e-06, "loss": 0.74349766, "memory(GiB)": 34.88, "step": 13440, "train_speed(iter/s)": 0.415458 }, { "acc": 0.84810457, "epoch": 0.36403758156662064, "grad_norm": 15.029886245727539, "learning_rate": 9.954101833298313e-06, "loss": 0.78973703, "memory(GiB)": 34.88, "step": 13445, "train_speed(iter/s)": 0.415464 }, { "acc": 0.86353159, "epoch": 0.36417296184983616, "grad_norm": 10.199933052062988, "learning_rate": 9.954026153858001e-06, "loss": 0.69493642, "memory(GiB)": 34.88, "step": 13450, "train_speed(iter/s)": 0.415471 }, { "acc": 0.85564251, "epoch": 0.36430834213305174, "grad_norm": 16.620590209960938, "learning_rate": 9.95395041236504e-06, "loss": 0.69907103, "memory(GiB)": 34.88, "step": 13455, "train_speed(iter/s)": 0.415478 }, { "acc": 0.86755066, "epoch": 0.36444372241626727, "grad_norm": 18.785110473632812, "learning_rate": 9.95387460882038e-06, "loss": 0.5758255, "memory(GiB)": 34.88, "step": 13460, "train_speed(iter/s)": 0.415485 }, { "acc": 0.84588051, "epoch": 0.36457910269948285, "grad_norm": 9.907966613769531, "learning_rate": 9.953798743224969e-06, "loss": 0.83495684, "memory(GiB)": 34.88, "step": 13465, "train_speed(iter/s)": 0.41549 }, { "acc": 0.84305201, "epoch": 0.3647144829826984, "grad_norm": 9.529802322387695, "learning_rate": 9.953722815579759e-06, "loss": 0.7681304, "memory(GiB)": 34.88, "step": 13470, "train_speed(iter/s)": 0.415496 }, { "acc": 0.83903637, "epoch": 0.36484986326591395, "grad_norm": 4.304239749908447, "learning_rate": 9.953646825885697e-06, "loss": 0.8403904, "memory(GiB)": 34.88, "step": 13475, "train_speed(iter/s)": 0.415503 }, { "acc": 0.8562665, "epoch": 0.3649852435491295, "grad_norm": 20.328500747680664, "learning_rate": 9.95357077414374e-06, "loss": 0.75531511, "memory(GiB)": 34.88, "step": 13480, "train_speed(iter/s)": 0.415509 }, { "acc": 0.84550381, "epoch": 0.36512062383234506, "grad_norm": 13.802319526672363, "learning_rate": 9.95349466035484e-06, "loss": 0.81059647, "memory(GiB)": 34.88, "step": 13485, "train_speed(iter/s)": 0.415516 }, { "acc": 0.85129261, "epoch": 0.3652560041155606, "grad_norm": 19.27220344543457, "learning_rate": 9.953418484519947e-06, "loss": 0.72648726, "memory(GiB)": 34.88, "step": 13490, "train_speed(iter/s)": 0.415523 }, { "acc": 0.82620106, "epoch": 0.36539138439877616, "grad_norm": 20.630624771118164, "learning_rate": 9.953342246640019e-06, "loss": 0.89413195, "memory(GiB)": 34.88, "step": 13495, "train_speed(iter/s)": 0.41553 }, { "acc": 0.85738716, "epoch": 0.3655267646819917, "grad_norm": 8.793779373168945, "learning_rate": 9.95326594671601e-06, "loss": 0.79896212, "memory(GiB)": 34.88, "step": 13500, "train_speed(iter/s)": 0.415537 }, { "acc": 0.85125322, "epoch": 0.36566214496520727, "grad_norm": 5.322384834289551, "learning_rate": 9.953189584748876e-06, "loss": 0.69769621, "memory(GiB)": 34.88, "step": 13505, "train_speed(iter/s)": 0.415544 }, { "acc": 0.85187397, "epoch": 0.3657975252484228, "grad_norm": 6.91472864151001, "learning_rate": 9.95311316073957e-06, "loss": 0.7205308, "memory(GiB)": 34.88, "step": 13510, "train_speed(iter/s)": 0.415548 }, { "acc": 0.8277297, "epoch": 0.3659329055316384, "grad_norm": 15.157258033752441, "learning_rate": 9.953036674689055e-06, "loss": 0.88660965, "memory(GiB)": 34.88, "step": 13515, "train_speed(iter/s)": 0.415555 }, { "acc": 0.84905128, "epoch": 0.3660682858148539, "grad_norm": 9.298842430114746, "learning_rate": 9.952960126598285e-06, "loss": 0.79521303, "memory(GiB)": 34.88, "step": 13520, "train_speed(iter/s)": 0.415562 }, { "acc": 0.87961664, "epoch": 0.3662036660980695, "grad_norm": 6.166615009307861, "learning_rate": 9.952883516468222e-06, "loss": 0.60495157, "memory(GiB)": 34.88, "step": 13525, "train_speed(iter/s)": 0.41557 }, { "acc": 0.84250145, "epoch": 0.366339046381285, "grad_norm": 15.187457084655762, "learning_rate": 9.952806844299821e-06, "loss": 0.86255655, "memory(GiB)": 34.88, "step": 13530, "train_speed(iter/s)": 0.415577 }, { "acc": 0.85336018, "epoch": 0.3664744266645006, "grad_norm": 10.37413501739502, "learning_rate": 9.952730110094048e-06, "loss": 0.66292782, "memory(GiB)": 34.88, "step": 13535, "train_speed(iter/s)": 0.415584 }, { "acc": 0.83655434, "epoch": 0.3666098069477161, "grad_norm": 14.282005310058594, "learning_rate": 9.952653313851862e-06, "loss": 0.82303276, "memory(GiB)": 34.88, "step": 13540, "train_speed(iter/s)": 0.415591 }, { "acc": 0.84441366, "epoch": 0.3667451872309317, "grad_norm": 7.971316337585449, "learning_rate": 9.952576455574224e-06, "loss": 0.73275571, "memory(GiB)": 34.88, "step": 13545, "train_speed(iter/s)": 0.415598 }, { "acc": 0.85609713, "epoch": 0.3668805675141472, "grad_norm": 13.502260208129883, "learning_rate": 9.952499535262095e-06, "loss": 0.79323173, "memory(GiB)": 34.88, "step": 13550, "train_speed(iter/s)": 0.415605 }, { "acc": 0.86207428, "epoch": 0.3670159477973628, "grad_norm": 8.392202377319336, "learning_rate": 9.952422552916445e-06, "loss": 0.70037417, "memory(GiB)": 34.88, "step": 13555, "train_speed(iter/s)": 0.415612 }, { "acc": 0.82870016, "epoch": 0.3671513280805783, "grad_norm": 14.772851943969727, "learning_rate": 9.952345508538232e-06, "loss": 0.86816893, "memory(GiB)": 34.88, "step": 13560, "train_speed(iter/s)": 0.415617 }, { "acc": 0.84854965, "epoch": 0.3672867083637939, "grad_norm": 24.88905906677246, "learning_rate": 9.952268402128426e-06, "loss": 0.79501896, "memory(GiB)": 34.88, "step": 13565, "train_speed(iter/s)": 0.415625 }, { "acc": 0.84759216, "epoch": 0.3674220886470094, "grad_norm": 12.027935981750488, "learning_rate": 9.952191233687987e-06, "loss": 0.76039481, "memory(GiB)": 34.88, "step": 13570, "train_speed(iter/s)": 0.415632 }, { "acc": 0.86097622, "epoch": 0.367557468930225, "grad_norm": 52.35308837890625, "learning_rate": 9.952114003217889e-06, "loss": 0.76132827, "memory(GiB)": 34.88, "step": 13575, "train_speed(iter/s)": 0.415639 }, { "acc": 0.85525703, "epoch": 0.36769284921344053, "grad_norm": 6.916192054748535, "learning_rate": 9.952036710719094e-06, "loss": 0.66751556, "memory(GiB)": 34.88, "step": 13580, "train_speed(iter/s)": 0.415645 }, { "acc": 0.83398666, "epoch": 0.3678282294966561, "grad_norm": 8.252933502197266, "learning_rate": 9.951959356192574e-06, "loss": 0.87276869, "memory(GiB)": 34.88, "step": 13585, "train_speed(iter/s)": 0.415652 }, { "acc": 0.83748655, "epoch": 0.36796360977987164, "grad_norm": 10.15969467163086, "learning_rate": 9.951881939639293e-06, "loss": 0.86317835, "memory(GiB)": 34.88, "step": 13590, "train_speed(iter/s)": 0.415659 }, { "acc": 0.83498402, "epoch": 0.3680989900630872, "grad_norm": 6.575142860412598, "learning_rate": 9.951804461060224e-06, "loss": 0.85769958, "memory(GiB)": 34.88, "step": 13595, "train_speed(iter/s)": 0.415665 }, { "acc": 0.84220181, "epoch": 0.36823437034630274, "grad_norm": 10.838096618652344, "learning_rate": 9.95172692045634e-06, "loss": 0.69797916, "memory(GiB)": 34.88, "step": 13600, "train_speed(iter/s)": 0.415671 }, { "acc": 0.82633324, "epoch": 0.3683697506295183, "grad_norm": 7.427011966705322, "learning_rate": 9.951649317828607e-06, "loss": 0.92852974, "memory(GiB)": 34.88, "step": 13605, "train_speed(iter/s)": 0.415677 }, { "acc": 0.84347496, "epoch": 0.36850513091273385, "grad_norm": 8.123883247375488, "learning_rate": 9.951571653177999e-06, "loss": 0.7240468, "memory(GiB)": 34.88, "step": 13610, "train_speed(iter/s)": 0.415684 }, { "acc": 0.85315504, "epoch": 0.3686405111959494, "grad_norm": 6.212808132171631, "learning_rate": 9.951493926505494e-06, "loss": 0.71880493, "memory(GiB)": 34.88, "step": 13615, "train_speed(iter/s)": 0.415691 }, { "acc": 0.85609207, "epoch": 0.36877589147916495, "grad_norm": 17.938264846801758, "learning_rate": 9.951416137812057e-06, "loss": 0.71810627, "memory(GiB)": 34.88, "step": 13620, "train_speed(iter/s)": 0.415699 }, { "acc": 0.8469346, "epoch": 0.36891127176238053, "grad_norm": 50.826690673828125, "learning_rate": 9.951338287098668e-06, "loss": 0.79620972, "memory(GiB)": 34.88, "step": 13625, "train_speed(iter/s)": 0.415705 }, { "acc": 0.86489925, "epoch": 0.36904665204559606, "grad_norm": 10.544095039367676, "learning_rate": 9.951260374366302e-06, "loss": 0.71555657, "memory(GiB)": 34.88, "step": 13630, "train_speed(iter/s)": 0.415711 }, { "acc": 0.85801554, "epoch": 0.36918203232881164, "grad_norm": 11.150018692016602, "learning_rate": 9.951182399615934e-06, "loss": 0.73805933, "memory(GiB)": 34.88, "step": 13635, "train_speed(iter/s)": 0.415717 }, { "acc": 0.84123983, "epoch": 0.36931741261202716, "grad_norm": 8.244757652282715, "learning_rate": 9.951104362848541e-06, "loss": 0.79976645, "memory(GiB)": 34.88, "step": 13640, "train_speed(iter/s)": 0.415721 }, { "acc": 0.88159904, "epoch": 0.36945279289524274, "grad_norm": 10.276311874389648, "learning_rate": 9.951026264065101e-06, "loss": 0.55751753, "memory(GiB)": 34.88, "step": 13645, "train_speed(iter/s)": 0.415728 }, { "acc": 0.82313824, "epoch": 0.36958817317845827, "grad_norm": 16.280406951904297, "learning_rate": 9.95094810326659e-06, "loss": 0.95689955, "memory(GiB)": 34.88, "step": 13650, "train_speed(iter/s)": 0.415735 }, { "acc": 0.82907619, "epoch": 0.36972355346167385, "grad_norm": 25.628768920898438, "learning_rate": 9.950869880453991e-06, "loss": 0.83545914, "memory(GiB)": 34.88, "step": 13655, "train_speed(iter/s)": 0.415739 }, { "acc": 0.85284204, "epoch": 0.3698589337448894, "grad_norm": 15.805920600891113, "learning_rate": 9.95079159562828e-06, "loss": 0.70423479, "memory(GiB)": 34.88, "step": 13660, "train_speed(iter/s)": 0.415744 }, { "acc": 0.85905476, "epoch": 0.36999431402810495, "grad_norm": 6.893500804901123, "learning_rate": 9.95071324879044e-06, "loss": 0.74806828, "memory(GiB)": 34.88, "step": 13665, "train_speed(iter/s)": 0.415752 }, { "acc": 0.85746613, "epoch": 0.3701296943113205, "grad_norm": 4.875969409942627, "learning_rate": 9.950634839941452e-06, "loss": 0.6765347, "memory(GiB)": 34.88, "step": 13670, "train_speed(iter/s)": 0.415758 }, { "acc": 0.8397646, "epoch": 0.37026507459453606, "grad_norm": 9.428768157958984, "learning_rate": 9.950556369082298e-06, "loss": 0.77882404, "memory(GiB)": 34.88, "step": 13675, "train_speed(iter/s)": 0.415763 }, { "acc": 0.87466459, "epoch": 0.3704004548777516, "grad_norm": 20.174560546875, "learning_rate": 9.950477836213961e-06, "loss": 0.63565397, "memory(GiB)": 34.88, "step": 13680, "train_speed(iter/s)": 0.415766 }, { "acc": 0.84941568, "epoch": 0.37053583516096716, "grad_norm": 10.897109985351562, "learning_rate": 9.950399241337425e-06, "loss": 0.81742258, "memory(GiB)": 34.88, "step": 13685, "train_speed(iter/s)": 0.415772 }, { "acc": 0.82936172, "epoch": 0.3706712154441827, "grad_norm": 30.3575439453125, "learning_rate": 9.950320584453674e-06, "loss": 0.88313112, "memory(GiB)": 34.88, "step": 13690, "train_speed(iter/s)": 0.415779 }, { "acc": 0.85603828, "epoch": 0.37080659572739827, "grad_norm": 7.635587692260742, "learning_rate": 9.950241865563694e-06, "loss": 0.79800954, "memory(GiB)": 34.88, "step": 13695, "train_speed(iter/s)": 0.415784 }, { "acc": 0.87944651, "epoch": 0.3709419760106138, "grad_norm": 6.977133274078369, "learning_rate": 9.950163084668472e-06, "loss": 0.66492653, "memory(GiB)": 34.88, "step": 13700, "train_speed(iter/s)": 0.415788 }, { "acc": 0.85347691, "epoch": 0.3710773562938294, "grad_norm": 15.450064659118652, "learning_rate": 9.950084241768993e-06, "loss": 0.77181029, "memory(GiB)": 34.88, "step": 13705, "train_speed(iter/s)": 0.415794 }, { "acc": 0.87458487, "epoch": 0.3712127365770449, "grad_norm": 5.691470623016357, "learning_rate": 9.950005336866247e-06, "loss": 0.577318, "memory(GiB)": 34.88, "step": 13710, "train_speed(iter/s)": 0.415798 }, { "acc": 0.85588436, "epoch": 0.3713481168602605, "grad_norm": 22.837890625, "learning_rate": 9.949926369961219e-06, "loss": 0.74825172, "memory(GiB)": 34.88, "step": 13715, "train_speed(iter/s)": 0.415802 }, { "acc": 0.84205227, "epoch": 0.371483497143476, "grad_norm": 19.968660354614258, "learning_rate": 9.9498473410549e-06, "loss": 0.82159576, "memory(GiB)": 34.88, "step": 13720, "train_speed(iter/s)": 0.415809 }, { "acc": 0.86319847, "epoch": 0.3716188774266916, "grad_norm": 18.13020133972168, "learning_rate": 9.94976825014828e-06, "loss": 0.68662834, "memory(GiB)": 34.88, "step": 13725, "train_speed(iter/s)": 0.415811 }, { "acc": 0.83647604, "epoch": 0.3717542577099071, "grad_norm": 8.368297576904297, "learning_rate": 9.949689097242351e-06, "loss": 0.7446341, "memory(GiB)": 34.88, "step": 13730, "train_speed(iter/s)": 0.415816 }, { "acc": 0.84830914, "epoch": 0.3718896379931227, "grad_norm": 31.732357025146484, "learning_rate": 9.949609882338102e-06, "loss": 0.85420485, "memory(GiB)": 34.88, "step": 13735, "train_speed(iter/s)": 0.415821 }, { "acc": 0.82113686, "epoch": 0.3720250182763382, "grad_norm": 5.788206100463867, "learning_rate": 9.949530605436529e-06, "loss": 0.85641108, "memory(GiB)": 34.88, "step": 13740, "train_speed(iter/s)": 0.415827 }, { "acc": 0.84681845, "epoch": 0.3721603985595538, "grad_norm": 6.726228713989258, "learning_rate": 9.949451266538622e-06, "loss": 0.90721188, "memory(GiB)": 34.88, "step": 13745, "train_speed(iter/s)": 0.415833 }, { "acc": 0.85423107, "epoch": 0.3722957788427693, "grad_norm": 4.691497325897217, "learning_rate": 9.949371865645374e-06, "loss": 0.73281741, "memory(GiB)": 34.88, "step": 13750, "train_speed(iter/s)": 0.415839 }, { "acc": 0.86426268, "epoch": 0.3724311591259849, "grad_norm": 5.891979694366455, "learning_rate": 9.949292402757783e-06, "loss": 0.66161671, "memory(GiB)": 34.88, "step": 13755, "train_speed(iter/s)": 0.415846 }, { "acc": 0.83972511, "epoch": 0.3725665394092004, "grad_norm": 10.965431213378906, "learning_rate": 9.949212877876843e-06, "loss": 0.88564053, "memory(GiB)": 34.88, "step": 13760, "train_speed(iter/s)": 0.415852 }, { "acc": 0.83154755, "epoch": 0.372701919692416, "grad_norm": 7.999239921569824, "learning_rate": 9.949133291003549e-06, "loss": 0.83642998, "memory(GiB)": 34.88, "step": 13765, "train_speed(iter/s)": 0.415859 }, { "acc": 0.86294556, "epoch": 0.37283729997563153, "grad_norm": 60.492008209228516, "learning_rate": 9.949053642138899e-06, "loss": 0.58920889, "memory(GiB)": 34.88, "step": 13770, "train_speed(iter/s)": 0.415865 }, { "acc": 0.84325199, "epoch": 0.3729726802588471, "grad_norm": 10.61450481414795, "learning_rate": 9.948973931283891e-06, "loss": 0.76593962, "memory(GiB)": 34.88, "step": 13775, "train_speed(iter/s)": 0.415871 }, { "acc": 0.8399147, "epoch": 0.37310806054206264, "grad_norm": 6.076504230499268, "learning_rate": 9.948894158439524e-06, "loss": 0.81298676, "memory(GiB)": 34.88, "step": 13780, "train_speed(iter/s)": 0.415877 }, { "acc": 0.85237856, "epoch": 0.3732434408252782, "grad_norm": 16.232500076293945, "learning_rate": 9.948814323606795e-06, "loss": 0.78745866, "memory(GiB)": 34.88, "step": 13785, "train_speed(iter/s)": 0.415884 }, { "acc": 0.82372437, "epoch": 0.37337882110849374, "grad_norm": 7.800540924072266, "learning_rate": 9.948734426786708e-06, "loss": 0.94949389, "memory(GiB)": 34.88, "step": 13790, "train_speed(iter/s)": 0.415889 }, { "acc": 0.84569836, "epoch": 0.3735142013917093, "grad_norm": 7.375394821166992, "learning_rate": 9.94865446798026e-06, "loss": 0.77583237, "memory(GiB)": 34.88, "step": 13795, "train_speed(iter/s)": 0.415896 }, { "acc": 0.8541441, "epoch": 0.37364958167492485, "grad_norm": 11.38244915008545, "learning_rate": 9.948574447188453e-06, "loss": 0.7658402, "memory(GiB)": 34.88, "step": 13800, "train_speed(iter/s)": 0.415903 }, { "acc": 0.86370449, "epoch": 0.3737849619581404, "grad_norm": 11.10831069946289, "learning_rate": 9.948494364412293e-06, "loss": 0.71859741, "memory(GiB)": 34.88, "step": 13805, "train_speed(iter/s)": 0.41591 }, { "acc": 0.84484539, "epoch": 0.37392034224135595, "grad_norm": 11.691322326660156, "learning_rate": 9.94841421965278e-06, "loss": 0.95238628, "memory(GiB)": 34.88, "step": 13810, "train_speed(iter/s)": 0.415917 }, { "acc": 0.83972406, "epoch": 0.37405572252457153, "grad_norm": 9.65725040435791, "learning_rate": 9.948334012910919e-06, "loss": 0.75427589, "memory(GiB)": 34.88, "step": 13815, "train_speed(iter/s)": 0.415924 }, { "acc": 0.86288433, "epoch": 0.37419110280778706, "grad_norm": 9.079848289489746, "learning_rate": 9.948253744187714e-06, "loss": 0.59358654, "memory(GiB)": 34.88, "step": 13820, "train_speed(iter/s)": 0.41593 }, { "acc": 0.88076096, "epoch": 0.37432648309100264, "grad_norm": 12.44869613647461, "learning_rate": 9.948173413484168e-06, "loss": 0.57460637, "memory(GiB)": 34.88, "step": 13825, "train_speed(iter/s)": 0.415936 }, { "acc": 0.853088, "epoch": 0.37446186337421816, "grad_norm": 20.743892669677734, "learning_rate": 9.948093020801296e-06, "loss": 0.74186363, "memory(GiB)": 34.88, "step": 13830, "train_speed(iter/s)": 0.415943 }, { "acc": 0.83986559, "epoch": 0.37459724365743374, "grad_norm": 7.411739349365234, "learning_rate": 9.948012566140096e-06, "loss": 0.74035068, "memory(GiB)": 34.88, "step": 13835, "train_speed(iter/s)": 0.415949 }, { "acc": 0.87638073, "epoch": 0.37473262394064927, "grad_norm": 7.0201802253723145, "learning_rate": 9.947932049501582e-06, "loss": 0.6593236, "memory(GiB)": 34.88, "step": 13840, "train_speed(iter/s)": 0.415955 }, { "acc": 0.87111168, "epoch": 0.37486800422386485, "grad_norm": 11.823450088500977, "learning_rate": 9.947851470886757e-06, "loss": 0.58952241, "memory(GiB)": 34.88, "step": 13845, "train_speed(iter/s)": 0.415962 }, { "acc": 0.85245476, "epoch": 0.3750033845070804, "grad_norm": 7.081539154052734, "learning_rate": 9.947770830296634e-06, "loss": 0.72559423, "memory(GiB)": 34.88, "step": 13850, "train_speed(iter/s)": 0.415967 }, { "acc": 0.83257256, "epoch": 0.37513876479029595, "grad_norm": 4.021728515625, "learning_rate": 9.947690127732223e-06, "loss": 0.82662868, "memory(GiB)": 34.88, "step": 13855, "train_speed(iter/s)": 0.415974 }, { "acc": 0.84661026, "epoch": 0.3752741450735115, "grad_norm": 11.612344741821289, "learning_rate": 9.947609363194535e-06, "loss": 0.7552742, "memory(GiB)": 34.88, "step": 13860, "train_speed(iter/s)": 0.41598 }, { "acc": 0.82100019, "epoch": 0.37540952535672706, "grad_norm": 24.59475326538086, "learning_rate": 9.947528536684579e-06, "loss": 0.95158749, "memory(GiB)": 34.88, "step": 13865, "train_speed(iter/s)": 0.415986 }, { "acc": 0.84099636, "epoch": 0.3755449056399426, "grad_norm": 5.844103813171387, "learning_rate": 9.947447648203372e-06, "loss": 0.80281506, "memory(GiB)": 34.88, "step": 13870, "train_speed(iter/s)": 0.415992 }, { "acc": 0.83555784, "epoch": 0.37568028592315816, "grad_norm": 11.115216255187988, "learning_rate": 9.947366697751923e-06, "loss": 0.87862759, "memory(GiB)": 34.88, "step": 13875, "train_speed(iter/s)": 0.415998 }, { "acc": 0.83154011, "epoch": 0.3758156662063737, "grad_norm": 7.16306209564209, "learning_rate": 9.94728568533125e-06, "loss": 0.82288599, "memory(GiB)": 34.88, "step": 13880, "train_speed(iter/s)": 0.416005 }, { "acc": 0.87364807, "epoch": 0.37595104648958927, "grad_norm": 9.075300216674805, "learning_rate": 9.947204610942365e-06, "loss": 0.62602873, "memory(GiB)": 34.88, "step": 13885, "train_speed(iter/s)": 0.416012 }, { "acc": 0.85534391, "epoch": 0.3760864267728048, "grad_norm": 7.902523040771484, "learning_rate": 9.947123474586285e-06, "loss": 0.79036508, "memory(GiB)": 34.88, "step": 13890, "train_speed(iter/s)": 0.416018 }, { "acc": 0.84210625, "epoch": 0.3762218070560204, "grad_norm": 13.872002601623535, "learning_rate": 9.947042276264024e-06, "loss": 0.88486538, "memory(GiB)": 34.88, "step": 13895, "train_speed(iter/s)": 0.416024 }, { "acc": 0.83954277, "epoch": 0.3763571873392359, "grad_norm": 8.607937812805176, "learning_rate": 9.946961015976603e-06, "loss": 0.81112881, "memory(GiB)": 34.88, "step": 13900, "train_speed(iter/s)": 0.416031 }, { "acc": 0.84908848, "epoch": 0.3764925676224515, "grad_norm": 7.201023101806641, "learning_rate": 9.946879693725038e-06, "loss": 0.72381339, "memory(GiB)": 34.88, "step": 13905, "train_speed(iter/s)": 0.416037 }, { "acc": 0.85771465, "epoch": 0.376627947905667, "grad_norm": 6.439648151397705, "learning_rate": 9.946798309510346e-06, "loss": 0.6797142, "memory(GiB)": 34.88, "step": 13910, "train_speed(iter/s)": 0.416044 }, { "acc": 0.86169586, "epoch": 0.3767633281888826, "grad_norm": 5.4318084716796875, "learning_rate": 9.94671686333355e-06, "loss": 0.67941728, "memory(GiB)": 34.88, "step": 13915, "train_speed(iter/s)": 0.416051 }, { "acc": 0.83031502, "epoch": 0.3768987084720981, "grad_norm": 12.917632102966309, "learning_rate": 9.946635355195667e-06, "loss": 0.94814339, "memory(GiB)": 34.88, "step": 13920, "train_speed(iter/s)": 0.416058 }, { "acc": 0.85012865, "epoch": 0.3770340887553137, "grad_norm": 10.343340873718262, "learning_rate": 9.94655378509772e-06, "loss": 0.77539854, "memory(GiB)": 34.88, "step": 13925, "train_speed(iter/s)": 0.416064 }, { "acc": 0.85088367, "epoch": 0.3771694690385292, "grad_norm": 8.152581214904785, "learning_rate": 9.946472153040733e-06, "loss": 0.83475809, "memory(GiB)": 34.88, "step": 13930, "train_speed(iter/s)": 0.41607 }, { "acc": 0.84473143, "epoch": 0.3773048493217448, "grad_norm": 9.189153671264648, "learning_rate": 9.946390459025722e-06, "loss": 0.81909237, "memory(GiB)": 34.88, "step": 13935, "train_speed(iter/s)": 0.416077 }, { "acc": 0.84866467, "epoch": 0.3774402296049603, "grad_norm": 10.255990982055664, "learning_rate": 9.946308703053716e-06, "loss": 0.8428606, "memory(GiB)": 34.88, "step": 13940, "train_speed(iter/s)": 0.416084 }, { "acc": 0.84965019, "epoch": 0.3775756098881759, "grad_norm": 9.863027572631836, "learning_rate": 9.946226885125738e-06, "loss": 0.67554502, "memory(GiB)": 34.88, "step": 13945, "train_speed(iter/s)": 0.41609 }, { "acc": 0.83632698, "epoch": 0.3777109901713914, "grad_norm": 6.559389591217041, "learning_rate": 9.946145005242811e-06, "loss": 0.84531069, "memory(GiB)": 34.88, "step": 13950, "train_speed(iter/s)": 0.416095 }, { "acc": 0.84206104, "epoch": 0.377846370454607, "grad_norm": 10.900979042053223, "learning_rate": 9.946063063405964e-06, "loss": 0.81158714, "memory(GiB)": 34.88, "step": 13955, "train_speed(iter/s)": 0.416102 }, { "acc": 0.85741005, "epoch": 0.37798175073782253, "grad_norm": 18.607606887817383, "learning_rate": 9.945981059616221e-06, "loss": 0.72535839, "memory(GiB)": 34.88, "step": 13960, "train_speed(iter/s)": 0.416108 }, { "acc": 0.8595892, "epoch": 0.3781171310210381, "grad_norm": 7.336759567260742, "learning_rate": 9.94589899387461e-06, "loss": 0.65013695, "memory(GiB)": 34.88, "step": 13965, "train_speed(iter/s)": 0.416115 }, { "acc": 0.82689495, "epoch": 0.37825251130425364, "grad_norm": 27.69306755065918, "learning_rate": 9.945816866182158e-06, "loss": 0.95021486, "memory(GiB)": 34.88, "step": 13970, "train_speed(iter/s)": 0.416122 }, { "acc": 0.85352926, "epoch": 0.3783878915874692, "grad_norm": 10.240683555603027, "learning_rate": 9.945734676539896e-06, "loss": 0.71398926, "memory(GiB)": 34.88, "step": 13975, "train_speed(iter/s)": 0.416129 }, { "acc": 0.84168072, "epoch": 0.37852327187068474, "grad_norm": 12.583931922912598, "learning_rate": 9.94565242494885e-06, "loss": 0.8198103, "memory(GiB)": 34.88, "step": 13980, "train_speed(iter/s)": 0.416135 }, { "acc": 0.83468533, "epoch": 0.3786586521539003, "grad_norm": 19.29922103881836, "learning_rate": 9.945570111410054e-06, "loss": 0.94865494, "memory(GiB)": 34.88, "step": 13985, "train_speed(iter/s)": 0.416141 }, { "acc": 0.8594223, "epoch": 0.37879403243711585, "grad_norm": 9.17668628692627, "learning_rate": 9.945487735924538e-06, "loss": 0.66341081, "memory(GiB)": 34.88, "step": 13990, "train_speed(iter/s)": 0.416144 }, { "acc": 0.82599354, "epoch": 0.3789294127203314, "grad_norm": 12.62450122833252, "learning_rate": 9.945405298493335e-06, "loss": 0.92363062, "memory(GiB)": 34.88, "step": 13995, "train_speed(iter/s)": 0.416151 }, { "acc": 0.83934221, "epoch": 0.37906479300354695, "grad_norm": 22.197160720825195, "learning_rate": 9.945322799117474e-06, "loss": 0.86453428, "memory(GiB)": 34.88, "step": 14000, "train_speed(iter/s)": 0.416157 }, { "acc": 0.83759279, "epoch": 0.37920017328676253, "grad_norm": 11.412677764892578, "learning_rate": 9.945240237797992e-06, "loss": 0.85756807, "memory(GiB)": 34.88, "step": 14005, "train_speed(iter/s)": 0.416164 }, { "acc": 0.85708456, "epoch": 0.37933555356997806, "grad_norm": 7.836611270904541, "learning_rate": 9.945157614535922e-06, "loss": 0.7077425, "memory(GiB)": 34.88, "step": 14010, "train_speed(iter/s)": 0.41617 }, { "acc": 0.84755554, "epoch": 0.37947093385319364, "grad_norm": 11.252543449401855, "learning_rate": 9.9450749293323e-06, "loss": 0.68303051, "memory(GiB)": 34.88, "step": 14015, "train_speed(iter/s)": 0.416176 }, { "acc": 0.83667717, "epoch": 0.37960631413640916, "grad_norm": 8.775111198425293, "learning_rate": 9.94499218218816e-06, "loss": 0.79463997, "memory(GiB)": 34.88, "step": 14020, "train_speed(iter/s)": 0.416182 }, { "acc": 0.82527475, "epoch": 0.37974169441962474, "grad_norm": 9.221898078918457, "learning_rate": 9.94490937310454e-06, "loss": 0.88152618, "memory(GiB)": 34.88, "step": 14025, "train_speed(iter/s)": 0.416189 }, { "acc": 0.84955235, "epoch": 0.37987707470284027, "grad_norm": 5.104402542114258, "learning_rate": 9.944826502082477e-06, "loss": 0.75219984, "memory(GiB)": 34.88, "step": 14030, "train_speed(iter/s)": 0.416195 }, { "acc": 0.84225826, "epoch": 0.38001245498605585, "grad_norm": 9.183113098144531, "learning_rate": 9.944743569123008e-06, "loss": 0.74093394, "memory(GiB)": 34.88, "step": 14035, "train_speed(iter/s)": 0.4162 }, { "acc": 0.85541, "epoch": 0.3801478352692714, "grad_norm": 9.227968215942383, "learning_rate": 9.944660574227173e-06, "loss": 0.74482479, "memory(GiB)": 34.88, "step": 14040, "train_speed(iter/s)": 0.416207 }, { "acc": 0.83917837, "epoch": 0.38028321555248695, "grad_norm": 14.80331802368164, "learning_rate": 9.944577517396012e-06, "loss": 0.87282782, "memory(GiB)": 34.88, "step": 14045, "train_speed(iter/s)": 0.416214 }, { "acc": 0.83565521, "epoch": 0.3804185958357025, "grad_norm": 5.020143985748291, "learning_rate": 9.944494398630566e-06, "loss": 0.74746561, "memory(GiB)": 34.88, "step": 14050, "train_speed(iter/s)": 0.416218 }, { "acc": 0.82307663, "epoch": 0.38055397611891806, "grad_norm": 17.04095458984375, "learning_rate": 9.944411217931873e-06, "loss": 0.8364131, "memory(GiB)": 34.88, "step": 14055, "train_speed(iter/s)": 0.416225 }, { "acc": 0.85447025, "epoch": 0.3806893564021336, "grad_norm": 15.381400108337402, "learning_rate": 9.944327975300979e-06, "loss": 0.74396753, "memory(GiB)": 34.88, "step": 14060, "train_speed(iter/s)": 0.416231 }, { "acc": 0.86161947, "epoch": 0.38082473668534916, "grad_norm": 8.6677827835083, "learning_rate": 9.944244670738924e-06, "loss": 0.67198386, "memory(GiB)": 34.88, "step": 14065, "train_speed(iter/s)": 0.416237 }, { "acc": 0.85669174, "epoch": 0.3809601169685647, "grad_norm": 8.163957595825195, "learning_rate": 9.944161304246754e-06, "loss": 0.71477733, "memory(GiB)": 34.88, "step": 14070, "train_speed(iter/s)": 0.416243 }, { "acc": 0.85550785, "epoch": 0.38109549725178027, "grad_norm": 11.03270435333252, "learning_rate": 9.94407787582551e-06, "loss": 0.67527285, "memory(GiB)": 34.88, "step": 14075, "train_speed(iter/s)": 0.416249 }, { "acc": 0.84608097, "epoch": 0.3812308775349958, "grad_norm": 10.540143013000488, "learning_rate": 9.94399438547624e-06, "loss": 0.82363625, "memory(GiB)": 34.88, "step": 14080, "train_speed(iter/s)": 0.416255 }, { "acc": 0.81818542, "epoch": 0.3813662578182114, "grad_norm": 11.391768455505371, "learning_rate": 9.943910833199988e-06, "loss": 0.90833931, "memory(GiB)": 34.88, "step": 14085, "train_speed(iter/s)": 0.41626 }, { "acc": 0.8579689, "epoch": 0.3815016381014269, "grad_norm": 8.223971366882324, "learning_rate": 9.943827218997804e-06, "loss": 0.7269999, "memory(GiB)": 34.88, "step": 14090, "train_speed(iter/s)": 0.416266 }, { "acc": 0.86014652, "epoch": 0.3816370183846425, "grad_norm": 9.926589012145996, "learning_rate": 9.943743542870731e-06, "loss": 0.68994598, "memory(GiB)": 34.88, "step": 14095, "train_speed(iter/s)": 0.416273 }, { "acc": 0.82473822, "epoch": 0.381772398667858, "grad_norm": 9.690874099731445, "learning_rate": 9.943659804819819e-06, "loss": 0.89333124, "memory(GiB)": 34.88, "step": 14100, "train_speed(iter/s)": 0.416279 }, { "acc": 0.86296577, "epoch": 0.3819077789510736, "grad_norm": 8.380240440368652, "learning_rate": 9.943576004846118e-06, "loss": 0.72777081, "memory(GiB)": 34.88, "step": 14105, "train_speed(iter/s)": 0.416285 }, { "acc": 0.84100494, "epoch": 0.3820431592342891, "grad_norm": 11.017704010009766, "learning_rate": 9.943492142950676e-06, "loss": 0.81721897, "memory(GiB)": 34.88, "step": 14110, "train_speed(iter/s)": 0.416291 }, { "acc": 0.85218706, "epoch": 0.3821785395175047, "grad_norm": 9.211284637451172, "learning_rate": 9.943408219134546e-06, "loss": 0.72125044, "memory(GiB)": 34.88, "step": 14115, "train_speed(iter/s)": 0.416298 }, { "acc": 0.83682547, "epoch": 0.3823139198007202, "grad_norm": 8.2269926071167, "learning_rate": 9.943324233398776e-06, "loss": 0.8405839, "memory(GiB)": 34.88, "step": 14120, "train_speed(iter/s)": 0.416304 }, { "acc": 0.85258446, "epoch": 0.3824493000839358, "grad_norm": 14.09207534790039, "learning_rate": 9.943240185744421e-06, "loss": 0.70589247, "memory(GiB)": 34.88, "step": 14125, "train_speed(iter/s)": 0.416309 }, { "acc": 0.84642143, "epoch": 0.3825846803671513, "grad_norm": 5.167172908782959, "learning_rate": 9.943156076172532e-06, "loss": 0.75083151, "memory(GiB)": 34.88, "step": 14130, "train_speed(iter/s)": 0.416315 }, { "acc": 0.84289818, "epoch": 0.3827200606503669, "grad_norm": 8.700247764587402, "learning_rate": 9.943071904684163e-06, "loss": 0.81805811, "memory(GiB)": 34.88, "step": 14135, "train_speed(iter/s)": 0.416321 }, { "acc": 0.82617273, "epoch": 0.3828554409335824, "grad_norm": 9.596917152404785, "learning_rate": 9.942987671280369e-06, "loss": 0.90452595, "memory(GiB)": 34.88, "step": 14140, "train_speed(iter/s)": 0.416327 }, { "acc": 0.833601, "epoch": 0.382990821216798, "grad_norm": 11.247903823852539, "learning_rate": 9.942903375962207e-06, "loss": 0.86004038, "memory(GiB)": 34.88, "step": 14145, "train_speed(iter/s)": 0.416332 }, { "acc": 0.84260197, "epoch": 0.38312620150001353, "grad_norm": 7.512250900268555, "learning_rate": 9.942819018730729e-06, "loss": 0.78107061, "memory(GiB)": 34.88, "step": 14150, "train_speed(iter/s)": 0.416339 }, { "acc": 0.83341446, "epoch": 0.3832615817832291, "grad_norm": 8.620563507080078, "learning_rate": 9.942734599586993e-06, "loss": 0.86455784, "memory(GiB)": 34.88, "step": 14155, "train_speed(iter/s)": 0.416345 }, { "acc": 0.81522789, "epoch": 0.38339696206644464, "grad_norm": 10.76666259765625, "learning_rate": 9.942650118532058e-06, "loss": 0.88328352, "memory(GiB)": 34.88, "step": 14160, "train_speed(iter/s)": 0.416351 }, { "acc": 0.82840691, "epoch": 0.3835323423496602, "grad_norm": 8.921262741088867, "learning_rate": 9.942565575566981e-06, "loss": 0.89769344, "memory(GiB)": 34.88, "step": 14165, "train_speed(iter/s)": 0.416357 }, { "acc": 0.86181259, "epoch": 0.38366772263287574, "grad_norm": 22.588882446289062, "learning_rate": 9.942480970692822e-06, "loss": 0.70197096, "memory(GiB)": 34.88, "step": 14170, "train_speed(iter/s)": 0.416364 }, { "acc": 0.87184439, "epoch": 0.3838031029160913, "grad_norm": 8.03677749633789, "learning_rate": 9.942396303910641e-06, "loss": 0.68866596, "memory(GiB)": 34.88, "step": 14175, "train_speed(iter/s)": 0.41637 }, { "acc": 0.87122316, "epoch": 0.38393848319930685, "grad_norm": 11.497886657714844, "learning_rate": 9.942311575221497e-06, "loss": 0.70437422, "memory(GiB)": 34.88, "step": 14180, "train_speed(iter/s)": 0.416376 }, { "acc": 0.84195375, "epoch": 0.3840738634825224, "grad_norm": 15.91991138458252, "learning_rate": 9.942226784626454e-06, "loss": 0.8750843, "memory(GiB)": 34.88, "step": 14185, "train_speed(iter/s)": 0.416382 }, { "acc": 0.8528409, "epoch": 0.38420924376573795, "grad_norm": 4.255519866943359, "learning_rate": 9.94214193212657e-06, "loss": 0.70339308, "memory(GiB)": 34.88, "step": 14190, "train_speed(iter/s)": 0.416388 }, { "acc": 0.82630863, "epoch": 0.38434462404895353, "grad_norm": 7.081652641296387, "learning_rate": 9.942057017722912e-06, "loss": 0.88355579, "memory(GiB)": 34.88, "step": 14195, "train_speed(iter/s)": 0.416394 }, { "acc": 0.86665392, "epoch": 0.38448000433216906, "grad_norm": 9.587410926818848, "learning_rate": 9.941972041416542e-06, "loss": 0.62016478, "memory(GiB)": 34.88, "step": 14200, "train_speed(iter/s)": 0.416401 }, { "acc": 0.83389835, "epoch": 0.38461538461538464, "grad_norm": 12.332745552062988, "learning_rate": 9.941887003208525e-06, "loss": 0.7708519, "memory(GiB)": 34.88, "step": 14205, "train_speed(iter/s)": 0.416407 }, { "acc": 0.84809341, "epoch": 0.38475076489860016, "grad_norm": 6.724606990814209, "learning_rate": 9.941801903099926e-06, "loss": 0.78901405, "memory(GiB)": 34.88, "step": 14210, "train_speed(iter/s)": 0.416414 }, { "acc": 0.84454622, "epoch": 0.38488614518181574, "grad_norm": 7.427396297454834, "learning_rate": 9.94171674109181e-06, "loss": 0.82758179, "memory(GiB)": 34.88, "step": 14215, "train_speed(iter/s)": 0.41642 }, { "acc": 0.86721134, "epoch": 0.38502152546503127, "grad_norm": 10.211889266967773, "learning_rate": 9.941631517185246e-06, "loss": 0.68007002, "memory(GiB)": 34.88, "step": 14220, "train_speed(iter/s)": 0.416427 }, { "acc": 0.84709349, "epoch": 0.38515690574824685, "grad_norm": 9.530041694641113, "learning_rate": 9.941546231381299e-06, "loss": 0.73423643, "memory(GiB)": 34.88, "step": 14225, "train_speed(iter/s)": 0.416433 }, { "acc": 0.84136047, "epoch": 0.38529228603146237, "grad_norm": 7.246343612670898, "learning_rate": 9.941460883681041e-06, "loss": 0.71077151, "memory(GiB)": 34.88, "step": 14230, "train_speed(iter/s)": 0.416439 }, { "acc": 0.84891024, "epoch": 0.38542766631467795, "grad_norm": 11.435317039489746, "learning_rate": 9.941375474085538e-06, "loss": 0.70675626, "memory(GiB)": 34.88, "step": 14235, "train_speed(iter/s)": 0.416445 }, { "acc": 0.8502636, "epoch": 0.3855630465978935, "grad_norm": 12.23030948638916, "learning_rate": 9.94129000259586e-06, "loss": 0.74697123, "memory(GiB)": 34.88, "step": 14240, "train_speed(iter/s)": 0.41645 }, { "acc": 0.82967739, "epoch": 0.38569842688110906, "grad_norm": 7.040680885314941, "learning_rate": 9.94120446921308e-06, "loss": 0.88214302, "memory(GiB)": 34.88, "step": 14245, "train_speed(iter/s)": 0.416456 }, { "acc": 0.86381512, "epoch": 0.3858338071643246, "grad_norm": 19.233922958374023, "learning_rate": 9.941118873938266e-06, "loss": 0.67660627, "memory(GiB)": 34.88, "step": 14250, "train_speed(iter/s)": 0.416462 }, { "acc": 0.84364719, "epoch": 0.38596918744754016, "grad_norm": 9.108282089233398, "learning_rate": 9.941033216772493e-06, "loss": 0.76452856, "memory(GiB)": 34.88, "step": 14255, "train_speed(iter/s)": 0.416468 }, { "acc": 0.83546543, "epoch": 0.3861045677307557, "grad_norm": 9.036186218261719, "learning_rate": 9.940947497716834e-06, "loss": 0.89075985, "memory(GiB)": 34.88, "step": 14260, "train_speed(iter/s)": 0.416474 }, { "acc": 0.86560879, "epoch": 0.38623994801397127, "grad_norm": 36.00193786621094, "learning_rate": 9.940861716772363e-06, "loss": 0.70560112, "memory(GiB)": 34.88, "step": 14265, "train_speed(iter/s)": 0.416481 }, { "acc": 0.83774843, "epoch": 0.3863753282971868, "grad_norm": 9.189203262329102, "learning_rate": 9.940775873940153e-06, "loss": 0.77013416, "memory(GiB)": 34.88, "step": 14270, "train_speed(iter/s)": 0.416487 }, { "acc": 0.84701405, "epoch": 0.3865107085804024, "grad_norm": 10.53861141204834, "learning_rate": 9.94068996922128e-06, "loss": 0.83258915, "memory(GiB)": 34.88, "step": 14275, "train_speed(iter/s)": 0.416492 }, { "acc": 0.84666548, "epoch": 0.3866460888636179, "grad_norm": 17.785520553588867, "learning_rate": 9.940604002616819e-06, "loss": 0.70264883, "memory(GiB)": 34.88, "step": 14280, "train_speed(iter/s)": 0.416498 }, { "acc": 0.86173344, "epoch": 0.3867814691468335, "grad_norm": 16.306642532348633, "learning_rate": 9.94051797412785e-06, "loss": 0.79201236, "memory(GiB)": 34.88, "step": 14285, "train_speed(iter/s)": 0.416504 }, { "acc": 0.80525742, "epoch": 0.386916849430049, "grad_norm": 12.136886596679688, "learning_rate": 9.940431883755448e-06, "loss": 1.07525692, "memory(GiB)": 34.88, "step": 14290, "train_speed(iter/s)": 0.41651 }, { "acc": 0.84632988, "epoch": 0.3870522297132646, "grad_norm": 13.91075325012207, "learning_rate": 9.94034573150069e-06, "loss": 0.79953113, "memory(GiB)": 34.88, "step": 14295, "train_speed(iter/s)": 0.416516 }, { "acc": 0.85398159, "epoch": 0.3871876099964801, "grad_norm": 7.187673568725586, "learning_rate": 9.940259517364661e-06, "loss": 0.71528215, "memory(GiB)": 34.88, "step": 14300, "train_speed(iter/s)": 0.416521 }, { "acc": 0.83669758, "epoch": 0.3873229902796957, "grad_norm": 9.08411979675293, "learning_rate": 9.940173241348435e-06, "loss": 0.79946437, "memory(GiB)": 34.88, "step": 14305, "train_speed(iter/s)": 0.416526 }, { "acc": 0.84534054, "epoch": 0.3874583705629112, "grad_norm": 9.91476058959961, "learning_rate": 9.940086903453099e-06, "loss": 0.82028503, "memory(GiB)": 34.88, "step": 14310, "train_speed(iter/s)": 0.416532 }, { "acc": 0.84831848, "epoch": 0.3875937508461268, "grad_norm": 13.3692626953125, "learning_rate": 9.940000503679726e-06, "loss": 0.78468885, "memory(GiB)": 34.88, "step": 14315, "train_speed(iter/s)": 0.416538 }, { "acc": 0.84442911, "epoch": 0.3877291311293423, "grad_norm": 8.008395195007324, "learning_rate": 9.939914042029405e-06, "loss": 0.77695885, "memory(GiB)": 34.88, "step": 14320, "train_speed(iter/s)": 0.416543 }, { "acc": 0.87215786, "epoch": 0.3878645114125579, "grad_norm": 7.1736907958984375, "learning_rate": 9.939827518503217e-06, "loss": 0.61697941, "memory(GiB)": 34.88, "step": 14325, "train_speed(iter/s)": 0.416549 }, { "acc": 0.8454668, "epoch": 0.3879998916957734, "grad_norm": 3.4611334800720215, "learning_rate": 9.939740933102247e-06, "loss": 0.75039911, "memory(GiB)": 34.88, "step": 14330, "train_speed(iter/s)": 0.416555 }, { "acc": 0.84979038, "epoch": 0.388135271978989, "grad_norm": 14.173559188842773, "learning_rate": 9.939654285827577e-06, "loss": 0.86585999, "memory(GiB)": 34.88, "step": 14335, "train_speed(iter/s)": 0.416561 }, { "acc": 0.85435677, "epoch": 0.38827065226220453, "grad_norm": 8.104135513305664, "learning_rate": 9.939567576680295e-06, "loss": 0.75733466, "memory(GiB)": 34.88, "step": 14340, "train_speed(iter/s)": 0.416567 }, { "acc": 0.84756432, "epoch": 0.3884060325454201, "grad_norm": 12.959490776062012, "learning_rate": 9.939480805661487e-06, "loss": 0.84640646, "memory(GiB)": 34.88, "step": 14345, "train_speed(iter/s)": 0.416573 }, { "acc": 0.8686039, "epoch": 0.38854141282863564, "grad_norm": 57.35679626464844, "learning_rate": 9.939393972772238e-06, "loss": 0.62165632, "memory(GiB)": 34.88, "step": 14350, "train_speed(iter/s)": 0.416579 }, { "acc": 0.850383, "epoch": 0.3886767931118512, "grad_norm": 8.728538513183594, "learning_rate": 9.93930707801364e-06, "loss": 0.70719757, "memory(GiB)": 34.88, "step": 14355, "train_speed(iter/s)": 0.416584 }, { "acc": 0.84626074, "epoch": 0.38881217339506674, "grad_norm": 4.884619235992432, "learning_rate": 9.939220121386775e-06, "loss": 0.79968681, "memory(GiB)": 34.88, "step": 14360, "train_speed(iter/s)": 0.41659 }, { "acc": 0.83515205, "epoch": 0.3889475536782823, "grad_norm": 10.519438743591309, "learning_rate": 9.939133102892737e-06, "loss": 0.80474014, "memory(GiB)": 34.88, "step": 14365, "train_speed(iter/s)": 0.416596 }, { "acc": 0.83682518, "epoch": 0.38908293396149785, "grad_norm": 15.943095207214355, "learning_rate": 9.939046022532616e-06, "loss": 0.8854744, "memory(GiB)": 34.88, "step": 14370, "train_speed(iter/s)": 0.416599 }, { "acc": 0.84937439, "epoch": 0.3892183142447134, "grad_norm": 10.09359359741211, "learning_rate": 9.938958880307503e-06, "loss": 0.69965677, "memory(GiB)": 34.88, "step": 14375, "train_speed(iter/s)": 0.416604 }, { "acc": 0.82661467, "epoch": 0.38935369452792895, "grad_norm": 8.936787605285645, "learning_rate": 9.938871676218484e-06, "loss": 0.85422411, "memory(GiB)": 34.88, "step": 14380, "train_speed(iter/s)": 0.41661 }, { "acc": 0.86116104, "epoch": 0.38948907481114453, "grad_norm": 4.95327091217041, "learning_rate": 9.938784410266659e-06, "loss": 0.66151328, "memory(GiB)": 34.88, "step": 14385, "train_speed(iter/s)": 0.416615 }, { "acc": 0.84310808, "epoch": 0.38962445509436006, "grad_norm": 9.625924110412598, "learning_rate": 9.938697082453118e-06, "loss": 0.77756581, "memory(GiB)": 34.88, "step": 14390, "train_speed(iter/s)": 0.416621 }, { "acc": 0.86785383, "epoch": 0.38975983537757564, "grad_norm": 8.223012924194336, "learning_rate": 9.938609692778954e-06, "loss": 0.63444848, "memory(GiB)": 34.88, "step": 14395, "train_speed(iter/s)": 0.416627 }, { "acc": 0.84558954, "epoch": 0.38989521566079116, "grad_norm": 5.6029438972473145, "learning_rate": 9.938522241245263e-06, "loss": 0.77832088, "memory(GiB)": 34.88, "step": 14400, "train_speed(iter/s)": 0.416633 }, { "acc": 0.85016842, "epoch": 0.39003059594400674, "grad_norm": 12.825072288513184, "learning_rate": 9.93843472785314e-06, "loss": 0.76251669, "memory(GiB)": 34.88, "step": 14405, "train_speed(iter/s)": 0.416639 }, { "acc": 0.86414318, "epoch": 0.39016597622722227, "grad_norm": 11.741963386535645, "learning_rate": 9.938347152603683e-06, "loss": 0.73489237, "memory(GiB)": 34.88, "step": 14410, "train_speed(iter/s)": 0.416645 }, { "acc": 0.83961411, "epoch": 0.39030135651043785, "grad_norm": 7.915651321411133, "learning_rate": 9.938259515497988e-06, "loss": 0.81686783, "memory(GiB)": 34.88, "step": 14415, "train_speed(iter/s)": 0.416651 }, { "acc": 0.80866632, "epoch": 0.39043673679365337, "grad_norm": 14.054607391357422, "learning_rate": 9.93817181653715e-06, "loss": 1.04250851, "memory(GiB)": 34.88, "step": 14420, "train_speed(iter/s)": 0.416657 }, { "acc": 0.84862022, "epoch": 0.39057211707686895, "grad_norm": 9.242899894714355, "learning_rate": 9.938084055722271e-06, "loss": 0.80984879, "memory(GiB)": 34.88, "step": 14425, "train_speed(iter/s)": 0.416662 }, { "acc": 0.82253122, "epoch": 0.3907074973600845, "grad_norm": 16.39820098876953, "learning_rate": 9.93799623305445e-06, "loss": 0.9262989, "memory(GiB)": 34.88, "step": 14430, "train_speed(iter/s)": 0.416668 }, { "acc": 0.85890245, "epoch": 0.39084287764330006, "grad_norm": 14.272700309753418, "learning_rate": 9.937908348534786e-06, "loss": 0.75555043, "memory(GiB)": 34.88, "step": 14435, "train_speed(iter/s)": 0.416673 }, { "acc": 0.83625183, "epoch": 0.3909782579265156, "grad_norm": 6.666476249694824, "learning_rate": 9.93782040216438e-06, "loss": 0.85184307, "memory(GiB)": 34.88, "step": 14440, "train_speed(iter/s)": 0.416678 }, { "acc": 0.86041012, "epoch": 0.3911136382097311, "grad_norm": 8.835479736328125, "learning_rate": 9.937732393944333e-06, "loss": 0.74488649, "memory(GiB)": 34.88, "step": 14445, "train_speed(iter/s)": 0.416683 }, { "acc": 0.82353268, "epoch": 0.3912490184929467, "grad_norm": 6.007210731506348, "learning_rate": 9.937644323875751e-06, "loss": 0.84251499, "memory(GiB)": 34.88, "step": 14450, "train_speed(iter/s)": 0.416689 }, { "acc": 0.86466599, "epoch": 0.3913843987761622, "grad_norm": 9.220449447631836, "learning_rate": 9.937556191959735e-06, "loss": 0.79060564, "memory(GiB)": 34.88, "step": 14455, "train_speed(iter/s)": 0.416696 }, { "acc": 0.83777332, "epoch": 0.3915197790593778, "grad_norm": 9.439513206481934, "learning_rate": 9.937467998197389e-06, "loss": 0.77803516, "memory(GiB)": 34.88, "step": 14460, "train_speed(iter/s)": 0.416702 }, { "acc": 0.84289589, "epoch": 0.3916551593425933, "grad_norm": 10.724453926086426, "learning_rate": 9.937379742589814e-06, "loss": 0.88471565, "memory(GiB)": 34.88, "step": 14465, "train_speed(iter/s)": 0.416708 }, { "acc": 0.83499842, "epoch": 0.3917905396258089, "grad_norm": 8.623035430908203, "learning_rate": 9.937291425138124e-06, "loss": 0.8172698, "memory(GiB)": 34.88, "step": 14470, "train_speed(iter/s)": 0.416714 }, { "acc": 0.85857306, "epoch": 0.3919259199090244, "grad_norm": 7.644922733306885, "learning_rate": 9.937203045843417e-06, "loss": 0.77523756, "memory(GiB)": 34.88, "step": 14475, "train_speed(iter/s)": 0.41672 }, { "acc": 0.84555178, "epoch": 0.39206130019224, "grad_norm": 8.271774291992188, "learning_rate": 9.937114604706806e-06, "loss": 0.8282259, "memory(GiB)": 34.88, "step": 14480, "train_speed(iter/s)": 0.416725 }, { "acc": 0.82570353, "epoch": 0.39219668047545553, "grad_norm": 9.57357406616211, "learning_rate": 9.937026101729395e-06, "loss": 0.86796761, "memory(GiB)": 34.88, "step": 14485, "train_speed(iter/s)": 0.41673 }, { "acc": 0.85031433, "epoch": 0.3923320607586711, "grad_norm": 11.656693458557129, "learning_rate": 9.936937536912297e-06, "loss": 0.80036182, "memory(GiB)": 34.88, "step": 14490, "train_speed(iter/s)": 0.416736 }, { "acc": 0.8605732, "epoch": 0.39246744104188663, "grad_norm": 7.184442043304443, "learning_rate": 9.936848910256615e-06, "loss": 0.68812284, "memory(GiB)": 34.88, "step": 14495, "train_speed(iter/s)": 0.416742 }, { "acc": 0.87197933, "epoch": 0.3926028213251022, "grad_norm": 4.726782321929932, "learning_rate": 9.936760221763466e-06, "loss": 0.60286293, "memory(GiB)": 34.88, "step": 14500, "train_speed(iter/s)": 0.416748 }, { "acc": 0.8549984, "epoch": 0.39273820160831774, "grad_norm": 15.356874465942383, "learning_rate": 9.936671471433957e-06, "loss": 0.72202511, "memory(GiB)": 34.88, "step": 14505, "train_speed(iter/s)": 0.416755 }, { "acc": 0.83296871, "epoch": 0.3928735818915333, "grad_norm": 9.11727523803711, "learning_rate": 9.9365826592692e-06, "loss": 0.8961772, "memory(GiB)": 34.88, "step": 14510, "train_speed(iter/s)": 0.41676 }, { "acc": 0.8393508, "epoch": 0.39300896217474884, "grad_norm": 36.976844787597656, "learning_rate": 9.93649378527031e-06, "loss": 0.93963528, "memory(GiB)": 34.88, "step": 14515, "train_speed(iter/s)": 0.416765 }, { "acc": 0.84289761, "epoch": 0.3931443424579644, "grad_norm": 4.042428493499756, "learning_rate": 9.936404849438398e-06, "loss": 0.73165007, "memory(GiB)": 34.88, "step": 14520, "train_speed(iter/s)": 0.41677 }, { "acc": 0.83349895, "epoch": 0.39327972274117995, "grad_norm": 22.39597511291504, "learning_rate": 9.936315851774578e-06, "loss": 0.75290122, "memory(GiB)": 34.88, "step": 14525, "train_speed(iter/s)": 0.416776 }, { "acc": 0.85513792, "epoch": 0.39341510302439553, "grad_norm": 17.003814697265625, "learning_rate": 9.936226792279966e-06, "loss": 0.73030977, "memory(GiB)": 34.88, "step": 14530, "train_speed(iter/s)": 0.416781 }, { "acc": 0.83506031, "epoch": 0.39355048330761105, "grad_norm": 6.188597202301025, "learning_rate": 9.936137670955676e-06, "loss": 0.8281579, "memory(GiB)": 34.88, "step": 14535, "train_speed(iter/s)": 0.416788 }, { "acc": 0.87237473, "epoch": 0.39368586359082663, "grad_norm": 8.251985549926758, "learning_rate": 9.93604848780283e-06, "loss": 0.60797391, "memory(GiB)": 34.88, "step": 14540, "train_speed(iter/s)": 0.416793 }, { "acc": 0.83718081, "epoch": 0.39382124387404216, "grad_norm": 12.12171745300293, "learning_rate": 9.935959242822535e-06, "loss": 0.92537155, "memory(GiB)": 34.88, "step": 14545, "train_speed(iter/s)": 0.416799 }, { "acc": 0.86291943, "epoch": 0.39395662415725774, "grad_norm": 8.088618278503418, "learning_rate": 9.935869936015918e-06, "loss": 0.62497044, "memory(GiB)": 34.88, "step": 14550, "train_speed(iter/s)": 0.416806 }, { "acc": 0.81387997, "epoch": 0.39409200444047326, "grad_norm": 8.021262168884277, "learning_rate": 9.935780567384094e-06, "loss": 0.90039062, "memory(GiB)": 34.88, "step": 14555, "train_speed(iter/s)": 0.416811 }, { "acc": 0.85992012, "epoch": 0.39422738472368885, "grad_norm": 6.801705360412598, "learning_rate": 9.935691136928184e-06, "loss": 0.71074462, "memory(GiB)": 34.88, "step": 14560, "train_speed(iter/s)": 0.416816 }, { "acc": 0.84898176, "epoch": 0.39436276500690437, "grad_norm": 6.815413475036621, "learning_rate": 9.935601644649306e-06, "loss": 0.75208511, "memory(GiB)": 34.88, "step": 14565, "train_speed(iter/s)": 0.416822 }, { "acc": 0.86093969, "epoch": 0.39449814529011995, "grad_norm": 15.99086856842041, "learning_rate": 9.935512090548584e-06, "loss": 0.68882961, "memory(GiB)": 34.88, "step": 14570, "train_speed(iter/s)": 0.416828 }, { "acc": 0.84819489, "epoch": 0.3946335255733355, "grad_norm": 7.439855575561523, "learning_rate": 9.935422474627136e-06, "loss": 0.77131453, "memory(GiB)": 34.88, "step": 14575, "train_speed(iter/s)": 0.416834 }, { "acc": 0.85049381, "epoch": 0.39476890585655106, "grad_norm": 12.408477783203125, "learning_rate": 9.93533279688609e-06, "loss": 0.80701456, "memory(GiB)": 34.88, "step": 14580, "train_speed(iter/s)": 0.416838 }, { "acc": 0.8469347, "epoch": 0.3949042861397666, "grad_norm": 9.066854476928711, "learning_rate": 9.935243057326563e-06, "loss": 0.72537813, "memory(GiB)": 34.88, "step": 14585, "train_speed(iter/s)": 0.416845 }, { "acc": 0.87403088, "epoch": 0.39503966642298216, "grad_norm": 4.64716100692749, "learning_rate": 9.935153255949685e-06, "loss": 0.57986226, "memory(GiB)": 34.88, "step": 14590, "train_speed(iter/s)": 0.416851 }, { "acc": 0.84954395, "epoch": 0.3951750467061977, "grad_norm": 8.52739143371582, "learning_rate": 9.935063392756577e-06, "loss": 0.72392416, "memory(GiB)": 34.88, "step": 14595, "train_speed(iter/s)": 0.416857 }, { "acc": 0.84769211, "epoch": 0.39531042698941327, "grad_norm": 9.537177085876465, "learning_rate": 9.934973467748366e-06, "loss": 0.87135363, "memory(GiB)": 34.88, "step": 14600, "train_speed(iter/s)": 0.416863 }, { "acc": 0.84402494, "epoch": 0.3954458072726288, "grad_norm": 13.293854713439941, "learning_rate": 9.93488348092618e-06, "loss": 0.82731638, "memory(GiB)": 34.88, "step": 14605, "train_speed(iter/s)": 0.416867 }, { "acc": 0.84482727, "epoch": 0.39558118755584437, "grad_norm": 11.154311180114746, "learning_rate": 9.934793432291143e-06, "loss": 0.76125326, "memory(GiB)": 34.88, "step": 14610, "train_speed(iter/s)": 0.416873 }, { "acc": 0.86926842, "epoch": 0.3957165678390599, "grad_norm": 5.916108131408691, "learning_rate": 9.934703321844386e-06, "loss": 0.67867579, "memory(GiB)": 34.88, "step": 14615, "train_speed(iter/s)": 0.416878 }, { "acc": 0.8577282, "epoch": 0.3958519481222755, "grad_norm": 8.571475982666016, "learning_rate": 9.934613149587035e-06, "loss": 0.80076313, "memory(GiB)": 34.88, "step": 14620, "train_speed(iter/s)": 0.416884 }, { "acc": 0.85352221, "epoch": 0.395987328405491, "grad_norm": 11.413241386413574, "learning_rate": 9.934522915520224e-06, "loss": 0.74084754, "memory(GiB)": 34.88, "step": 14625, "train_speed(iter/s)": 0.41689 }, { "acc": 0.85795784, "epoch": 0.3961227086887066, "grad_norm": 9.949962615966797, "learning_rate": 9.93443261964508e-06, "loss": 0.7704195, "memory(GiB)": 34.88, "step": 14630, "train_speed(iter/s)": 0.416897 }, { "acc": 0.87722988, "epoch": 0.3962580889719221, "grad_norm": 12.149606704711914, "learning_rate": 9.934342261962734e-06, "loss": 0.5658422, "memory(GiB)": 34.88, "step": 14635, "train_speed(iter/s)": 0.416902 }, { "acc": 0.82511692, "epoch": 0.3963934692551377, "grad_norm": 21.93450927734375, "learning_rate": 9.93425184247432e-06, "loss": 0.88048973, "memory(GiB)": 34.88, "step": 14640, "train_speed(iter/s)": 0.416908 }, { "acc": 0.83476114, "epoch": 0.3965288495383532, "grad_norm": 6.756059646606445, "learning_rate": 9.934161361180966e-06, "loss": 0.88198338, "memory(GiB)": 34.88, "step": 14645, "train_speed(iter/s)": 0.416913 }, { "acc": 0.84944553, "epoch": 0.3966642298215688, "grad_norm": 7.457823276519775, "learning_rate": 9.934070818083812e-06, "loss": 0.83605261, "memory(GiB)": 34.88, "step": 14650, "train_speed(iter/s)": 0.416919 }, { "acc": 0.85580416, "epoch": 0.3967996101047843, "grad_norm": 12.9188814163208, "learning_rate": 9.933980213183988e-06, "loss": 0.75808487, "memory(GiB)": 34.88, "step": 14655, "train_speed(iter/s)": 0.416925 }, { "acc": 0.8566946, "epoch": 0.3969349903879999, "grad_norm": 7.992600917816162, "learning_rate": 9.933889546482632e-06, "loss": 0.77688456, "memory(GiB)": 34.88, "step": 14660, "train_speed(iter/s)": 0.416932 }, { "acc": 0.85143986, "epoch": 0.3970703706712154, "grad_norm": 8.332718849182129, "learning_rate": 9.933798817980875e-06, "loss": 0.73176069, "memory(GiB)": 34.88, "step": 14665, "train_speed(iter/s)": 0.416937 }, { "acc": 0.83671017, "epoch": 0.397205750954431, "grad_norm": 9.770042419433594, "learning_rate": 9.933708027679857e-06, "loss": 0.78504786, "memory(GiB)": 34.88, "step": 14670, "train_speed(iter/s)": 0.416943 }, { "acc": 0.86190853, "epoch": 0.3973411312376465, "grad_norm": 10.071475982666016, "learning_rate": 9.933617175580715e-06, "loss": 0.6314888, "memory(GiB)": 34.88, "step": 14675, "train_speed(iter/s)": 0.416947 }, { "acc": 0.84573555, "epoch": 0.3974765115208621, "grad_norm": 9.705499649047852, "learning_rate": 9.933526261684587e-06, "loss": 0.86839218, "memory(GiB)": 34.88, "step": 14680, "train_speed(iter/s)": 0.416953 }, { "acc": 0.87175636, "epoch": 0.39761189180407763, "grad_norm": 9.512992858886719, "learning_rate": 9.933435285992612e-06, "loss": 0.65236411, "memory(GiB)": 34.88, "step": 14685, "train_speed(iter/s)": 0.416958 }, { "acc": 0.84871502, "epoch": 0.3977472720872932, "grad_norm": 12.187167167663574, "learning_rate": 9.933344248505928e-06, "loss": 0.69877987, "memory(GiB)": 34.88, "step": 14690, "train_speed(iter/s)": 0.416963 }, { "acc": 0.81971836, "epoch": 0.39788265237050874, "grad_norm": 9.27974796295166, "learning_rate": 9.933253149225679e-06, "loss": 0.94475346, "memory(GiB)": 34.88, "step": 14695, "train_speed(iter/s)": 0.416968 }, { "acc": 0.82946911, "epoch": 0.3980180326537243, "grad_norm": 13.762006759643555, "learning_rate": 9.933161988153002e-06, "loss": 0.89588289, "memory(GiB)": 34.88, "step": 14700, "train_speed(iter/s)": 0.416974 }, { "acc": 0.86270924, "epoch": 0.39815341293693984, "grad_norm": 9.141373634338379, "learning_rate": 9.933070765289043e-06, "loss": 0.74768615, "memory(GiB)": 34.88, "step": 14705, "train_speed(iter/s)": 0.41698 }, { "acc": 0.83191242, "epoch": 0.3982887932201554, "grad_norm": 10.059118270874023, "learning_rate": 9.932979480634942e-06, "loss": 0.74475527, "memory(GiB)": 34.88, "step": 14710, "train_speed(iter/s)": 0.416985 }, { "acc": 0.84915657, "epoch": 0.39842417350337095, "grad_norm": 8.262664794921875, "learning_rate": 9.932888134191842e-06, "loss": 0.82745895, "memory(GiB)": 34.88, "step": 14715, "train_speed(iter/s)": 0.416991 }, { "acc": 0.87351704, "epoch": 0.39855955378658653, "grad_norm": 11.522454261779785, "learning_rate": 9.932796725960888e-06, "loss": 0.74990005, "memory(GiB)": 34.88, "step": 14720, "train_speed(iter/s)": 0.416998 }, { "acc": 0.8390728, "epoch": 0.39869493406980205, "grad_norm": 20.6271915435791, "learning_rate": 9.932705255943229e-06, "loss": 0.78671355, "memory(GiB)": 34.88, "step": 14725, "train_speed(iter/s)": 0.417003 }, { "acc": 0.85464411, "epoch": 0.39883031435301763, "grad_norm": 9.307877540588379, "learning_rate": 9.932613724140002e-06, "loss": 0.7768611, "memory(GiB)": 34.88, "step": 14730, "train_speed(iter/s)": 0.417009 }, { "acc": 0.85155802, "epoch": 0.39896569463623316, "grad_norm": 4.468197822570801, "learning_rate": 9.932522130552364e-06, "loss": 0.66735563, "memory(GiB)": 34.88, "step": 14735, "train_speed(iter/s)": 0.417015 }, { "acc": 0.8329793, "epoch": 0.39910107491944874, "grad_norm": 11.392106056213379, "learning_rate": 9.932430475181455e-06, "loss": 0.83101139, "memory(GiB)": 34.88, "step": 14740, "train_speed(iter/s)": 0.417019 }, { "acc": 0.85538902, "epoch": 0.39923645520266426, "grad_norm": 44.9201545715332, "learning_rate": 9.932338758028426e-06, "loss": 0.66888132, "memory(GiB)": 34.88, "step": 14745, "train_speed(iter/s)": 0.417025 }, { "acc": 0.84250298, "epoch": 0.39937183548587984, "grad_norm": 14.317628860473633, "learning_rate": 9.932246979094425e-06, "loss": 0.85163078, "memory(GiB)": 34.88, "step": 14750, "train_speed(iter/s)": 0.41703 }, { "acc": 0.83590088, "epoch": 0.39950721576909537, "grad_norm": 8.202058792114258, "learning_rate": 9.932155138380602e-06, "loss": 0.86315908, "memory(GiB)": 34.88, "step": 14755, "train_speed(iter/s)": 0.417034 }, { "acc": 0.88094482, "epoch": 0.39964259605231095, "grad_norm": 11.948177337646484, "learning_rate": 9.93206323588811e-06, "loss": 0.61705289, "memory(GiB)": 34.88, "step": 14760, "train_speed(iter/s)": 0.417041 }, { "acc": 0.88346109, "epoch": 0.3997779763355265, "grad_norm": 6.433692932128906, "learning_rate": 9.931971271618096e-06, "loss": 0.61139145, "memory(GiB)": 34.88, "step": 14765, "train_speed(iter/s)": 0.417047 }, { "acc": 0.85450382, "epoch": 0.39991335661874206, "grad_norm": 19.256832122802734, "learning_rate": 9.931879245571714e-06, "loss": 0.75794191, "memory(GiB)": 34.88, "step": 14770, "train_speed(iter/s)": 0.417052 }, { "acc": 0.84366684, "epoch": 0.4000487369019576, "grad_norm": 10.229222297668457, "learning_rate": 9.931787157750118e-06, "loss": 0.82264175, "memory(GiB)": 34.88, "step": 14775, "train_speed(iter/s)": 0.417058 }, { "acc": 0.85189743, "epoch": 0.40018411718517316, "grad_norm": 9.22686767578125, "learning_rate": 9.931695008154459e-06, "loss": 0.73000822, "memory(GiB)": 34.88, "step": 14780, "train_speed(iter/s)": 0.417064 }, { "acc": 0.85361061, "epoch": 0.4003194974683887, "grad_norm": 8.535470008850098, "learning_rate": 9.931602796785894e-06, "loss": 0.68209572, "memory(GiB)": 34.88, "step": 14785, "train_speed(iter/s)": 0.417068 }, { "acc": 0.82563515, "epoch": 0.40045487775160427, "grad_norm": 12.729523658752441, "learning_rate": 9.931510523645575e-06, "loss": 0.91784439, "memory(GiB)": 34.88, "step": 14790, "train_speed(iter/s)": 0.417074 }, { "acc": 0.83507757, "epoch": 0.4005902580348198, "grad_norm": 9.594342231750488, "learning_rate": 9.931418188734662e-06, "loss": 0.89586372, "memory(GiB)": 34.88, "step": 14795, "train_speed(iter/s)": 0.417079 }, { "acc": 0.86513443, "epoch": 0.40072563831803537, "grad_norm": 10.286025047302246, "learning_rate": 9.931325792054308e-06, "loss": 0.65763335, "memory(GiB)": 34.88, "step": 14800, "train_speed(iter/s)": 0.417083 }, { "acc": 0.83071251, "epoch": 0.4008610186012509, "grad_norm": 17.528356552124023, "learning_rate": 9.931233333605672e-06, "loss": 0.88298626, "memory(GiB)": 34.88, "step": 14805, "train_speed(iter/s)": 0.417089 }, { "acc": 0.84053965, "epoch": 0.4009963988844665, "grad_norm": 8.816692352294922, "learning_rate": 9.931140813389915e-06, "loss": 0.78953452, "memory(GiB)": 34.88, "step": 14810, "train_speed(iter/s)": 0.417095 }, { "acc": 0.85289564, "epoch": 0.401131779167682, "grad_norm": 7.064871311187744, "learning_rate": 9.931048231408188e-06, "loss": 0.74548135, "memory(GiB)": 34.88, "step": 14815, "train_speed(iter/s)": 0.4171 }, { "acc": 0.84822083, "epoch": 0.4012671594508976, "grad_norm": 8.104216575622559, "learning_rate": 9.930955587661661e-06, "loss": 0.67275381, "memory(GiB)": 34.88, "step": 14820, "train_speed(iter/s)": 0.417105 }, { "acc": 0.85632915, "epoch": 0.4014025397341131, "grad_norm": 8.200675010681152, "learning_rate": 9.930862882151487e-06, "loss": 0.7477169, "memory(GiB)": 34.88, "step": 14825, "train_speed(iter/s)": 0.417111 }, { "acc": 0.85551796, "epoch": 0.4015379200173287, "grad_norm": 20.20564079284668, "learning_rate": 9.930770114878828e-06, "loss": 0.74684114, "memory(GiB)": 34.88, "step": 14830, "train_speed(iter/s)": 0.417117 }, { "acc": 0.85735455, "epoch": 0.4016733003005442, "grad_norm": 6.076716899871826, "learning_rate": 9.930677285844851e-06, "loss": 0.76497245, "memory(GiB)": 34.88, "step": 14835, "train_speed(iter/s)": 0.417122 }, { "acc": 0.85167904, "epoch": 0.4018086805837598, "grad_norm": 4.93585729598999, "learning_rate": 9.930584395050712e-06, "loss": 0.78278112, "memory(GiB)": 34.88, "step": 14840, "train_speed(iter/s)": 0.417127 }, { "acc": 0.85495453, "epoch": 0.4019440608669753, "grad_norm": 10.40556526184082, "learning_rate": 9.930491442497582e-06, "loss": 0.77550631, "memory(GiB)": 34.88, "step": 14845, "train_speed(iter/s)": 0.417132 }, { "acc": 0.87923203, "epoch": 0.4020794411501909, "grad_norm": 5.575555324554443, "learning_rate": 9.93039842818662e-06, "loss": 0.62665701, "memory(GiB)": 34.88, "step": 14850, "train_speed(iter/s)": 0.417136 }, { "acc": 0.84653196, "epoch": 0.4022148214334064, "grad_norm": 13.156060218811035, "learning_rate": 9.930305352118993e-06, "loss": 0.82481022, "memory(GiB)": 34.88, "step": 14855, "train_speed(iter/s)": 0.417142 }, { "acc": 0.87190952, "epoch": 0.402350201716622, "grad_norm": 10.012383460998535, "learning_rate": 9.930212214295868e-06, "loss": 0.59487543, "memory(GiB)": 34.88, "step": 14860, "train_speed(iter/s)": 0.417148 }, { "acc": 0.83026562, "epoch": 0.4024855819998375, "grad_norm": 5.738522052764893, "learning_rate": 9.930119014718409e-06, "loss": 0.87410774, "memory(GiB)": 34.88, "step": 14865, "train_speed(iter/s)": 0.417153 }, { "acc": 0.86540146, "epoch": 0.4026209622830531, "grad_norm": 10.40644645690918, "learning_rate": 9.930025753387786e-06, "loss": 0.6988081, "memory(GiB)": 34.88, "step": 14870, "train_speed(iter/s)": 0.417159 }, { "acc": 0.83568802, "epoch": 0.40275634256626863, "grad_norm": 6.8030195236206055, "learning_rate": 9.929932430305167e-06, "loss": 0.85487909, "memory(GiB)": 34.88, "step": 14875, "train_speed(iter/s)": 0.417165 }, { "acc": 0.87345963, "epoch": 0.4028917228494842, "grad_norm": 10.824352264404297, "learning_rate": 9.92983904547172e-06, "loss": 0.60265012, "memory(GiB)": 34.88, "step": 14880, "train_speed(iter/s)": 0.41717 }, { "acc": 0.84907684, "epoch": 0.40302710313269974, "grad_norm": 25.055702209472656, "learning_rate": 9.929745598888617e-06, "loss": 0.75312696, "memory(GiB)": 34.88, "step": 14885, "train_speed(iter/s)": 0.417176 }, { "acc": 0.86019707, "epoch": 0.4031624834159153, "grad_norm": 4.940569877624512, "learning_rate": 9.929652090557024e-06, "loss": 0.67536039, "memory(GiB)": 34.88, "step": 14890, "train_speed(iter/s)": 0.417182 }, { "acc": 0.85137768, "epoch": 0.40329786369913084, "grad_norm": 9.496589660644531, "learning_rate": 9.929558520478115e-06, "loss": 0.79436874, "memory(GiB)": 34.88, "step": 14895, "train_speed(iter/s)": 0.417187 }, { "acc": 0.84870071, "epoch": 0.4034332439823464, "grad_norm": 7.6746826171875, "learning_rate": 9.929464888653065e-06, "loss": 0.7858304, "memory(GiB)": 34.88, "step": 14900, "train_speed(iter/s)": 0.417192 }, { "acc": 0.85959768, "epoch": 0.40356862426556195, "grad_norm": 7.472569942474365, "learning_rate": 9.929371195083042e-06, "loss": 0.71995249, "memory(GiB)": 34.88, "step": 14905, "train_speed(iter/s)": 0.417197 }, { "acc": 0.85349903, "epoch": 0.40370400454877753, "grad_norm": 4.558986186981201, "learning_rate": 9.929277439769225e-06, "loss": 0.73260512, "memory(GiB)": 34.88, "step": 14910, "train_speed(iter/s)": 0.417203 }, { "acc": 0.84614649, "epoch": 0.40383938483199305, "grad_norm": 7.858669757843018, "learning_rate": 9.929183622712783e-06, "loss": 0.80381746, "memory(GiB)": 34.88, "step": 14915, "train_speed(iter/s)": 0.417208 }, { "acc": 0.83969812, "epoch": 0.40397476511520863, "grad_norm": 11.34156322479248, "learning_rate": 9.929089743914895e-06, "loss": 0.75634704, "memory(GiB)": 34.88, "step": 14920, "train_speed(iter/s)": 0.417214 }, { "acc": 0.85718994, "epoch": 0.40411014539842416, "grad_norm": 11.114236831665039, "learning_rate": 9.928995803376736e-06, "loss": 0.72887087, "memory(GiB)": 34.88, "step": 14925, "train_speed(iter/s)": 0.41722 }, { "acc": 0.85892248, "epoch": 0.40424552568163974, "grad_norm": 15.69001579284668, "learning_rate": 9.928901801099482e-06, "loss": 0.73750219, "memory(GiB)": 34.88, "step": 14930, "train_speed(iter/s)": 0.417226 }, { "acc": 0.83969193, "epoch": 0.40438090596485526, "grad_norm": 7.765518665313721, "learning_rate": 9.928807737084311e-06, "loss": 0.84413481, "memory(GiB)": 34.88, "step": 14935, "train_speed(iter/s)": 0.417231 }, { "acc": 0.8530529, "epoch": 0.40451628624807084, "grad_norm": 7.206544876098633, "learning_rate": 9.928713611332404e-06, "loss": 0.66287947, "memory(GiB)": 34.88, "step": 14940, "train_speed(iter/s)": 0.417236 }, { "acc": 0.83907452, "epoch": 0.40465166653128637, "grad_norm": 10.547680854797363, "learning_rate": 9.928619423844935e-06, "loss": 0.83399353, "memory(GiB)": 34.88, "step": 14945, "train_speed(iter/s)": 0.417242 }, { "acc": 0.87410507, "epoch": 0.40478704681450195, "grad_norm": 21.874576568603516, "learning_rate": 9.928525174623086e-06, "loss": 0.5452775, "memory(GiB)": 34.88, "step": 14950, "train_speed(iter/s)": 0.417247 }, { "acc": 0.84317875, "epoch": 0.4049224270977175, "grad_norm": 8.162652969360352, "learning_rate": 9.928430863668037e-06, "loss": 0.77678919, "memory(GiB)": 34.88, "step": 14955, "train_speed(iter/s)": 0.417252 }, { "acc": 0.84656487, "epoch": 0.40505780738093305, "grad_norm": 14.352901458740234, "learning_rate": 9.928336490980974e-06, "loss": 0.85731907, "memory(GiB)": 34.88, "step": 14960, "train_speed(iter/s)": 0.417257 }, { "acc": 0.83669968, "epoch": 0.4051931876641486, "grad_norm": 10.196084022521973, "learning_rate": 9.928242056563074e-06, "loss": 0.77083387, "memory(GiB)": 34.88, "step": 14965, "train_speed(iter/s)": 0.41726 }, { "acc": 0.84509249, "epoch": 0.40532856794736416, "grad_norm": 7.145989418029785, "learning_rate": 9.928147560415523e-06, "loss": 0.82509251, "memory(GiB)": 34.88, "step": 14970, "train_speed(iter/s)": 0.417265 }, { "acc": 0.8607728, "epoch": 0.4054639482305797, "grad_norm": 8.44675064086914, "learning_rate": 9.928053002539504e-06, "loss": 0.7206975, "memory(GiB)": 34.88, "step": 14975, "train_speed(iter/s)": 0.417271 }, { "acc": 0.84934998, "epoch": 0.40559932851379527, "grad_norm": 7.62518835067749, "learning_rate": 9.927958382936196e-06, "loss": 0.65226874, "memory(GiB)": 34.88, "step": 14980, "train_speed(iter/s)": 0.417277 }, { "acc": 0.84391422, "epoch": 0.4057347087970108, "grad_norm": 6.855854511260986, "learning_rate": 9.927863701606795e-06, "loss": 0.78254046, "memory(GiB)": 34.88, "step": 14985, "train_speed(iter/s)": 0.417282 }, { "acc": 0.84449291, "epoch": 0.40587008908022637, "grad_norm": 7.134693145751953, "learning_rate": 9.927768958552478e-06, "loss": 0.80077343, "memory(GiB)": 34.88, "step": 14990, "train_speed(iter/s)": 0.417288 }, { "acc": 0.86329975, "epoch": 0.4060054693634419, "grad_norm": 17.87375259399414, "learning_rate": 9.927674153774438e-06, "loss": 0.67977223, "memory(GiB)": 34.88, "step": 14995, "train_speed(iter/s)": 0.417293 }, { "acc": 0.88439407, "epoch": 0.4061408496466575, "grad_norm": 12.207440376281738, "learning_rate": 9.927579287273857e-06, "loss": 0.48129683, "memory(GiB)": 34.88, "step": 15000, "train_speed(iter/s)": 0.417299 }, { "acc": 0.86701746, "epoch": 0.406276229929873, "grad_norm": 6.755083084106445, "learning_rate": 9.927484359051927e-06, "loss": 0.60148501, "memory(GiB)": 34.88, "step": 15005, "train_speed(iter/s)": 0.417305 }, { "acc": 0.86020813, "epoch": 0.4064116102130886, "grad_norm": 7.059751987457275, "learning_rate": 9.927389369109837e-06, "loss": 0.66627512, "memory(GiB)": 34.88, "step": 15010, "train_speed(iter/s)": 0.417311 }, { "acc": 0.82907648, "epoch": 0.4065469904963041, "grad_norm": 13.333553314208984, "learning_rate": 9.927294317448778e-06, "loss": 0.88922806, "memory(GiB)": 34.88, "step": 15015, "train_speed(iter/s)": 0.417316 }, { "acc": 0.86955423, "epoch": 0.4066823707795197, "grad_norm": 6.176367282867432, "learning_rate": 9.927199204069937e-06, "loss": 0.64107332, "memory(GiB)": 34.88, "step": 15020, "train_speed(iter/s)": 0.417321 }, { "acc": 0.86598873, "epoch": 0.4068177510627352, "grad_norm": 15.63505744934082, "learning_rate": 9.927104028974506e-06, "loss": 0.66169968, "memory(GiB)": 34.88, "step": 15025, "train_speed(iter/s)": 0.417325 }, { "acc": 0.87343445, "epoch": 0.4069531313459508, "grad_norm": 14.369864463806152, "learning_rate": 9.927008792163681e-06, "loss": 0.6625689, "memory(GiB)": 34.88, "step": 15030, "train_speed(iter/s)": 0.417331 }, { "acc": 0.84002724, "epoch": 0.4070885116291663, "grad_norm": 7.694993019104004, "learning_rate": 9.926913493638654e-06, "loss": 0.8316349, "memory(GiB)": 34.88, "step": 15035, "train_speed(iter/s)": 0.417336 }, { "acc": 0.86682653, "epoch": 0.4072238919123819, "grad_norm": 8.974102020263672, "learning_rate": 9.926818133400617e-06, "loss": 0.58885069, "memory(GiB)": 34.88, "step": 15040, "train_speed(iter/s)": 0.417341 }, { "acc": 0.84817181, "epoch": 0.4073592721955974, "grad_norm": 6.778641700744629, "learning_rate": 9.926722711450763e-06, "loss": 0.69972987, "memory(GiB)": 34.88, "step": 15045, "train_speed(iter/s)": 0.417347 }, { "acc": 0.83697023, "epoch": 0.407494652478813, "grad_norm": 6.2611308097839355, "learning_rate": 9.926627227790292e-06, "loss": 0.8573122, "memory(GiB)": 34.88, "step": 15050, "train_speed(iter/s)": 0.41735 }, { "acc": 0.84879379, "epoch": 0.4076300327620285, "grad_norm": 39.97855758666992, "learning_rate": 9.926531682420395e-06, "loss": 0.81233788, "memory(GiB)": 34.88, "step": 15055, "train_speed(iter/s)": 0.417356 }, { "acc": 0.85221367, "epoch": 0.4077654130452441, "grad_norm": 7.4357733726501465, "learning_rate": 9.926436075342273e-06, "loss": 0.77843142, "memory(GiB)": 34.88, "step": 15060, "train_speed(iter/s)": 0.417361 }, { "acc": 0.8354723, "epoch": 0.40790079332845963, "grad_norm": 11.641487121582031, "learning_rate": 9.926340406557124e-06, "loss": 0.8670928, "memory(GiB)": 34.88, "step": 15065, "train_speed(iter/s)": 0.417366 }, { "acc": 0.88125648, "epoch": 0.4080361736116752, "grad_norm": 22.859636306762695, "learning_rate": 9.926244676066142e-06, "loss": 0.6241991, "memory(GiB)": 34.88, "step": 15070, "train_speed(iter/s)": 0.417372 }, { "acc": 0.8653574, "epoch": 0.40817155389489074, "grad_norm": 6.478321075439453, "learning_rate": 9.926148883870528e-06, "loss": 0.62049513, "memory(GiB)": 34.88, "step": 15075, "train_speed(iter/s)": 0.417378 }, { "acc": 0.85561218, "epoch": 0.4083069341781063, "grad_norm": 6.468529224395752, "learning_rate": 9.926053029971486e-06, "loss": 0.79042554, "memory(GiB)": 34.88, "step": 15080, "train_speed(iter/s)": 0.417384 }, { "acc": 0.84247723, "epoch": 0.40844231446132184, "grad_norm": 7.1132121086120605, "learning_rate": 9.92595711437021e-06, "loss": 0.78421593, "memory(GiB)": 34.88, "step": 15085, "train_speed(iter/s)": 0.417389 }, { "acc": 0.8441885, "epoch": 0.4085776947445374, "grad_norm": 5.306155204772949, "learning_rate": 9.92586113706791e-06, "loss": 0.78687119, "memory(GiB)": 34.88, "step": 15090, "train_speed(iter/s)": 0.417394 }, { "acc": 0.8494091, "epoch": 0.40871307502775295, "grad_norm": 7.152450084686279, "learning_rate": 9.92576509806578e-06, "loss": 0.79380584, "memory(GiB)": 34.88, "step": 15095, "train_speed(iter/s)": 0.417397 }, { "acc": 0.85424118, "epoch": 0.40884845531096853, "grad_norm": 7.461332321166992, "learning_rate": 9.925668997365027e-06, "loss": 0.75272899, "memory(GiB)": 34.88, "step": 15100, "train_speed(iter/s)": 0.417403 }, { "acc": 0.85945597, "epoch": 0.40898383559418405, "grad_norm": 4.1811041831970215, "learning_rate": 9.925572834966855e-06, "loss": 0.6513011, "memory(GiB)": 34.88, "step": 15105, "train_speed(iter/s)": 0.417408 }, { "acc": 0.84746685, "epoch": 0.40911921587739963, "grad_norm": 9.634078025817871, "learning_rate": 9.925476610872467e-06, "loss": 0.71411009, "memory(GiB)": 34.88, "step": 15110, "train_speed(iter/s)": 0.417414 }, { "acc": 0.85242748, "epoch": 0.40925459616061516, "grad_norm": 7.1584086418151855, "learning_rate": 9.92538032508307e-06, "loss": 0.73327808, "memory(GiB)": 34.88, "step": 15115, "train_speed(iter/s)": 0.41742 }, { "acc": 0.88091803, "epoch": 0.40938997644383074, "grad_norm": 7.886937141418457, "learning_rate": 9.925283977599869e-06, "loss": 0.60306315, "memory(GiB)": 34.88, "step": 15120, "train_speed(iter/s)": 0.417426 }, { "acc": 0.83625002, "epoch": 0.40952535672704626, "grad_norm": 12.497305870056152, "learning_rate": 9.925187568424072e-06, "loss": 0.81141491, "memory(GiB)": 34.88, "step": 15125, "train_speed(iter/s)": 0.417432 }, { "acc": 0.85238285, "epoch": 0.40966073701026184, "grad_norm": 9.73519515991211, "learning_rate": 9.925091097556885e-06, "loss": 0.70630217, "memory(GiB)": 34.88, "step": 15130, "train_speed(iter/s)": 0.417437 }, { "acc": 0.83746033, "epoch": 0.40979611729347737, "grad_norm": 7.4574875831604, "learning_rate": 9.92499456499952e-06, "loss": 0.79322138, "memory(GiB)": 34.88, "step": 15135, "train_speed(iter/s)": 0.417442 }, { "acc": 0.82676945, "epoch": 0.40993149757669295, "grad_norm": 10.130250930786133, "learning_rate": 9.924897970753184e-06, "loss": 0.85881042, "memory(GiB)": 34.88, "step": 15140, "train_speed(iter/s)": 0.417448 }, { "acc": 0.83750267, "epoch": 0.4100668778599085, "grad_norm": 34.681129455566406, "learning_rate": 9.924801314819087e-06, "loss": 0.86648483, "memory(GiB)": 34.88, "step": 15145, "train_speed(iter/s)": 0.417452 }, { "acc": 0.85247993, "epoch": 0.41020225814312405, "grad_norm": 4.928511142730713, "learning_rate": 9.924704597198437e-06, "loss": 0.65107908, "memory(GiB)": 34.88, "step": 15150, "train_speed(iter/s)": 0.417458 }, { "acc": 0.84717598, "epoch": 0.4103376384263396, "grad_norm": 14.512205123901367, "learning_rate": 9.92460781789245e-06, "loss": 0.79936609, "memory(GiB)": 34.88, "step": 15155, "train_speed(iter/s)": 0.417463 }, { "acc": 0.84203749, "epoch": 0.41047301870955516, "grad_norm": 9.30715274810791, "learning_rate": 9.924510976902338e-06, "loss": 0.77560267, "memory(GiB)": 34.88, "step": 15160, "train_speed(iter/s)": 0.417468 }, { "acc": 0.82320766, "epoch": 0.4106083989927707, "grad_norm": 9.86804485321045, "learning_rate": 9.92441407422931e-06, "loss": 0.91886616, "memory(GiB)": 34.88, "step": 15165, "train_speed(iter/s)": 0.417474 }, { "acc": 0.86997919, "epoch": 0.41074377927598626, "grad_norm": 24.472612380981445, "learning_rate": 9.924317109874585e-06, "loss": 0.65682011, "memory(GiB)": 34.88, "step": 15170, "train_speed(iter/s)": 0.417479 }, { "acc": 0.88651562, "epoch": 0.4108791595592018, "grad_norm": 5.240652084350586, "learning_rate": 9.924220083839376e-06, "loss": 0.53176112, "memory(GiB)": 34.88, "step": 15175, "train_speed(iter/s)": 0.417484 }, { "acc": 0.85071507, "epoch": 0.41101453984241737, "grad_norm": 9.834959983825684, "learning_rate": 9.924122996124895e-06, "loss": 0.79587498, "memory(GiB)": 34.88, "step": 15180, "train_speed(iter/s)": 0.41749 }, { "acc": 0.87323008, "epoch": 0.4111499201256329, "grad_norm": 8.950082778930664, "learning_rate": 9.924025846732364e-06, "loss": 0.59085331, "memory(GiB)": 34.88, "step": 15185, "train_speed(iter/s)": 0.417495 }, { "acc": 0.85969906, "epoch": 0.4112853004088485, "grad_norm": 6.464903354644775, "learning_rate": 9.923928635662995e-06, "loss": 0.66883111, "memory(GiB)": 34.88, "step": 15190, "train_speed(iter/s)": 0.4175 }, { "acc": 0.86481075, "epoch": 0.411420680692064, "grad_norm": 10.526714324951172, "learning_rate": 9.923831362918008e-06, "loss": 0.69940004, "memory(GiB)": 34.88, "step": 15195, "train_speed(iter/s)": 0.417505 }, { "acc": 0.85000696, "epoch": 0.4115560609752796, "grad_norm": 9.262223243713379, "learning_rate": 9.923734028498623e-06, "loss": 0.76460052, "memory(GiB)": 34.88, "step": 15200, "train_speed(iter/s)": 0.417511 }, { "acc": 0.86513929, "epoch": 0.4116914412584951, "grad_norm": 17.726713180541992, "learning_rate": 9.923636632406057e-06, "loss": 0.65083032, "memory(GiB)": 34.88, "step": 15205, "train_speed(iter/s)": 0.417517 }, { "acc": 0.86149073, "epoch": 0.4118268215417107, "grad_norm": 6.240720748901367, "learning_rate": 9.923539174641532e-06, "loss": 0.64447908, "memory(GiB)": 34.88, "step": 15210, "train_speed(iter/s)": 0.417522 }, { "acc": 0.85192966, "epoch": 0.4119622018249262, "grad_norm": 6.159252166748047, "learning_rate": 9.923441655206264e-06, "loss": 0.73320112, "memory(GiB)": 34.88, "step": 15215, "train_speed(iter/s)": 0.417527 }, { "acc": 0.8454689, "epoch": 0.4120975821081418, "grad_norm": 7.2613396644592285, "learning_rate": 9.92334407410148e-06, "loss": 0.82488737, "memory(GiB)": 34.88, "step": 15220, "train_speed(iter/s)": 0.417533 }, { "acc": 0.85947704, "epoch": 0.4122329623913573, "grad_norm": 9.935113906860352, "learning_rate": 9.9232464313284e-06, "loss": 0.75778189, "memory(GiB)": 34.88, "step": 15225, "train_speed(iter/s)": 0.417538 }, { "acc": 0.83812027, "epoch": 0.4123683426745729, "grad_norm": 14.889809608459473, "learning_rate": 9.92314872688825e-06, "loss": 0.81691561, "memory(GiB)": 34.88, "step": 15230, "train_speed(iter/s)": 0.417544 }, { "acc": 0.81827717, "epoch": 0.4125037229577884, "grad_norm": 12.632916450500488, "learning_rate": 9.92305096078225e-06, "loss": 0.94659901, "memory(GiB)": 34.88, "step": 15235, "train_speed(iter/s)": 0.417549 }, { "acc": 0.85920982, "epoch": 0.412639103241004, "grad_norm": 22.337974548339844, "learning_rate": 9.922953133011626e-06, "loss": 0.72549515, "memory(GiB)": 34.88, "step": 15240, "train_speed(iter/s)": 0.417554 }, { "acc": 0.82547874, "epoch": 0.4127744835242195, "grad_norm": 10.171690940856934, "learning_rate": 9.922855243577605e-06, "loss": 0.96840515, "memory(GiB)": 34.88, "step": 15245, "train_speed(iter/s)": 0.41756 }, { "acc": 0.8367691, "epoch": 0.4129098638074351, "grad_norm": 4.71132755279541, "learning_rate": 9.922757292481412e-06, "loss": 0.8890789, "memory(GiB)": 34.88, "step": 15250, "train_speed(iter/s)": 0.417565 }, { "acc": 0.86080856, "epoch": 0.41304524409065063, "grad_norm": 7.723493576049805, "learning_rate": 9.922659279724273e-06, "loss": 0.71677046, "memory(GiB)": 34.88, "step": 15255, "train_speed(iter/s)": 0.417571 }, { "acc": 0.86589813, "epoch": 0.4131806243738662, "grad_norm": 7.367323398590088, "learning_rate": 9.922561205307419e-06, "loss": 0.62127638, "memory(GiB)": 34.88, "step": 15260, "train_speed(iter/s)": 0.417576 }, { "acc": 0.84255886, "epoch": 0.41331600465708174, "grad_norm": 13.162353515625, "learning_rate": 9.922463069232076e-06, "loss": 0.81369867, "memory(GiB)": 34.88, "step": 15265, "train_speed(iter/s)": 0.417582 }, { "acc": 0.85647945, "epoch": 0.4134513849402973, "grad_norm": 8.97662353515625, "learning_rate": 9.922364871499474e-06, "loss": 0.79372497, "memory(GiB)": 34.88, "step": 15270, "train_speed(iter/s)": 0.417588 }, { "acc": 0.82628088, "epoch": 0.41358676522351284, "grad_norm": 7.938100337982178, "learning_rate": 9.922266612110841e-06, "loss": 0.85831242, "memory(GiB)": 34.88, "step": 15275, "train_speed(iter/s)": 0.417593 }, { "acc": 0.84697132, "epoch": 0.4137221455067284, "grad_norm": 8.440220832824707, "learning_rate": 9.922168291067413e-06, "loss": 0.84204712, "memory(GiB)": 34.88, "step": 15280, "train_speed(iter/s)": 0.417598 }, { "acc": 0.84351368, "epoch": 0.41385752578994395, "grad_norm": 9.35611343383789, "learning_rate": 9.922069908370416e-06, "loss": 0.81281929, "memory(GiB)": 34.88, "step": 15285, "train_speed(iter/s)": 0.417603 }, { "acc": 0.82982388, "epoch": 0.41399290607315953, "grad_norm": 48.03872299194336, "learning_rate": 9.921971464021086e-06, "loss": 0.88983803, "memory(GiB)": 34.88, "step": 15290, "train_speed(iter/s)": 0.417609 }, { "acc": 0.85765181, "epoch": 0.41412828635637505, "grad_norm": 6.36954402923584, "learning_rate": 9.921872958020654e-06, "loss": 0.76638327, "memory(GiB)": 34.88, "step": 15295, "train_speed(iter/s)": 0.417614 }, { "acc": 0.84862099, "epoch": 0.41426366663959063, "grad_norm": 15.584587097167969, "learning_rate": 9.921774390370358e-06, "loss": 0.73513689, "memory(GiB)": 34.88, "step": 15300, "train_speed(iter/s)": 0.41762 }, { "acc": 0.83286819, "epoch": 0.41439904692280616, "grad_norm": 9.029135704040527, "learning_rate": 9.921675761071428e-06, "loss": 0.89908123, "memory(GiB)": 34.88, "step": 15305, "train_speed(iter/s)": 0.417625 }, { "acc": 0.88775597, "epoch": 0.41453442720602174, "grad_norm": 17.499059677124023, "learning_rate": 9.921577070125103e-06, "loss": 0.47516909, "memory(GiB)": 34.88, "step": 15310, "train_speed(iter/s)": 0.417631 }, { "acc": 0.87269869, "epoch": 0.41466980748923726, "grad_norm": 8.212188720703125, "learning_rate": 9.921478317532614e-06, "loss": 0.60781016, "memory(GiB)": 34.88, "step": 15315, "train_speed(iter/s)": 0.417637 }, { "acc": 0.84456644, "epoch": 0.41480518777245284, "grad_norm": 10.111594200134277, "learning_rate": 9.921379503295206e-06, "loss": 0.81155729, "memory(GiB)": 34.88, "step": 15320, "train_speed(iter/s)": 0.417642 }, { "acc": 0.8536252, "epoch": 0.41494056805566837, "grad_norm": 26.819414138793945, "learning_rate": 9.92128062741411e-06, "loss": 0.6981082, "memory(GiB)": 34.88, "step": 15325, "train_speed(iter/s)": 0.417647 }, { "acc": 0.85325727, "epoch": 0.41507594833888395, "grad_norm": 7.063305377960205, "learning_rate": 9.92118168989057e-06, "loss": 0.77071552, "memory(GiB)": 34.88, "step": 15330, "train_speed(iter/s)": 0.417652 }, { "acc": 0.85040617, "epoch": 0.4152113286220995, "grad_norm": 5.736580848693848, "learning_rate": 9.92108269072582e-06, "loss": 0.81984892, "memory(GiB)": 34.88, "step": 15335, "train_speed(iter/s)": 0.417657 }, { "acc": 0.84577446, "epoch": 0.41534670890531505, "grad_norm": 12.187152862548828, "learning_rate": 9.920983629921105e-06, "loss": 0.83232203, "memory(GiB)": 34.88, "step": 15340, "train_speed(iter/s)": 0.417663 }, { "acc": 0.82787809, "epoch": 0.4154820891885306, "grad_norm": 9.746193885803223, "learning_rate": 9.920884507477662e-06, "loss": 0.88070545, "memory(GiB)": 34.88, "step": 15345, "train_speed(iter/s)": 0.417668 }, { "acc": 0.86365671, "epoch": 0.41561746947174616, "grad_norm": 6.312047958374023, "learning_rate": 9.920785323396736e-06, "loss": 0.63476086, "memory(GiB)": 34.88, "step": 15350, "train_speed(iter/s)": 0.417672 }, { "acc": 0.83864002, "epoch": 0.4157528497549617, "grad_norm": 10.315839767456055, "learning_rate": 9.920686077679569e-06, "loss": 0.77083435, "memory(GiB)": 34.88, "step": 15355, "train_speed(iter/s)": 0.417677 }, { "acc": 0.8507658, "epoch": 0.41588823003817726, "grad_norm": 12.005118370056152, "learning_rate": 9.920586770327402e-06, "loss": 0.74508905, "memory(GiB)": 34.88, "step": 15360, "train_speed(iter/s)": 0.417682 }, { "acc": 0.83151855, "epoch": 0.4160236103213928, "grad_norm": 18.10087776184082, "learning_rate": 9.920487401341478e-06, "loss": 0.88332024, "memory(GiB)": 34.88, "step": 15365, "train_speed(iter/s)": 0.417688 }, { "acc": 0.8541976, "epoch": 0.41615899060460837, "grad_norm": 8.73265552520752, "learning_rate": 9.920387970723048e-06, "loss": 0.72633858, "memory(GiB)": 34.88, "step": 15370, "train_speed(iter/s)": 0.417693 }, { "acc": 0.85636501, "epoch": 0.4162943708878239, "grad_norm": 11.76880168914795, "learning_rate": 9.92028847847335e-06, "loss": 0.77952466, "memory(GiB)": 34.88, "step": 15375, "train_speed(iter/s)": 0.417697 }, { "acc": 0.83040428, "epoch": 0.4164297511710395, "grad_norm": 9.491561889648438, "learning_rate": 9.920188924593636e-06, "loss": 0.94493179, "memory(GiB)": 34.88, "step": 15380, "train_speed(iter/s)": 0.417703 }, { "acc": 0.83477173, "epoch": 0.416565131454255, "grad_norm": 13.4846773147583, "learning_rate": 9.92008930908515e-06, "loss": 0.85197468, "memory(GiB)": 34.88, "step": 15385, "train_speed(iter/s)": 0.417707 }, { "acc": 0.85621004, "epoch": 0.4167005117374706, "grad_norm": 24.02951431274414, "learning_rate": 9.919989631949144e-06, "loss": 0.75241327, "memory(GiB)": 34.88, "step": 15390, "train_speed(iter/s)": 0.417712 }, { "acc": 0.84824076, "epoch": 0.4168358920206861, "grad_norm": 7.148147106170654, "learning_rate": 9.91988989318686e-06, "loss": 0.69756637, "memory(GiB)": 34.88, "step": 15395, "train_speed(iter/s)": 0.417718 }, { "acc": 0.86149635, "epoch": 0.4169712723039017, "grad_norm": 7.239749431610107, "learning_rate": 9.919790092799551e-06, "loss": 0.63448505, "memory(GiB)": 34.88, "step": 15400, "train_speed(iter/s)": 0.417723 }, { "acc": 0.86227608, "epoch": 0.4171066525871172, "grad_norm": 50.64037322998047, "learning_rate": 9.919690230788469e-06, "loss": 0.74984527, "memory(GiB)": 34.88, "step": 15405, "train_speed(iter/s)": 0.417727 }, { "acc": 0.84842854, "epoch": 0.4172420328703328, "grad_norm": 9.972333908081055, "learning_rate": 9.919590307154862e-06, "loss": 0.76106005, "memory(GiB)": 34.88, "step": 15410, "train_speed(iter/s)": 0.417732 }, { "acc": 0.84672012, "epoch": 0.4173774131535483, "grad_norm": 26.507328033447266, "learning_rate": 9.919490321899984e-06, "loss": 0.77545757, "memory(GiB)": 34.88, "step": 15415, "train_speed(iter/s)": 0.417737 }, { "acc": 0.86077156, "epoch": 0.4175127934367639, "grad_norm": 5.522613525390625, "learning_rate": 9.919390275025087e-06, "loss": 0.75660114, "memory(GiB)": 34.88, "step": 15420, "train_speed(iter/s)": 0.417741 }, { "acc": 0.87270985, "epoch": 0.4176481737199794, "grad_norm": 6.187155246734619, "learning_rate": 9.919290166531422e-06, "loss": 0.62483797, "memory(GiB)": 34.88, "step": 15425, "train_speed(iter/s)": 0.417746 }, { "acc": 0.83445702, "epoch": 0.417783554003195, "grad_norm": 8.004698753356934, "learning_rate": 9.919189996420245e-06, "loss": 0.86803532, "memory(GiB)": 34.88, "step": 15430, "train_speed(iter/s)": 0.417752 }, { "acc": 0.83780651, "epoch": 0.4179189342864105, "grad_norm": 5.492273330688477, "learning_rate": 9.919089764692811e-06, "loss": 0.8923686, "memory(GiB)": 34.88, "step": 15435, "train_speed(iter/s)": 0.417757 }, { "acc": 0.84146013, "epoch": 0.4180543145696261, "grad_norm": 10.037893295288086, "learning_rate": 9.918989471350375e-06, "loss": 0.79915218, "memory(GiB)": 34.88, "step": 15440, "train_speed(iter/s)": 0.417761 }, { "acc": 0.87719784, "epoch": 0.41818969485284163, "grad_norm": 7.419378280639648, "learning_rate": 9.918889116394193e-06, "loss": 0.58714023, "memory(GiB)": 34.88, "step": 15445, "train_speed(iter/s)": 0.417767 }, { "acc": 0.84882822, "epoch": 0.4183250751360572, "grad_norm": 7.125757694244385, "learning_rate": 9.918788699825524e-06, "loss": 0.82425213, "memory(GiB)": 34.88, "step": 15450, "train_speed(iter/s)": 0.417772 }, { "acc": 0.84715853, "epoch": 0.41846045541927274, "grad_norm": 6.266937732696533, "learning_rate": 9.918688221645625e-06, "loss": 0.745432, "memory(GiB)": 34.88, "step": 15455, "train_speed(iter/s)": 0.417776 }, { "acc": 0.84401741, "epoch": 0.4185958357024883, "grad_norm": 14.18823528289795, "learning_rate": 9.918587681855752e-06, "loss": 0.91669779, "memory(GiB)": 34.88, "step": 15460, "train_speed(iter/s)": 0.417781 }, { "acc": 0.85169039, "epoch": 0.41873121598570384, "grad_norm": 9.224616050720215, "learning_rate": 9.918487080457167e-06, "loss": 0.77567759, "memory(GiB)": 34.88, "step": 15465, "train_speed(iter/s)": 0.417786 }, { "acc": 0.86244574, "epoch": 0.4188665962689194, "grad_norm": 10.242584228515625, "learning_rate": 9.918386417451131e-06, "loss": 0.70107908, "memory(GiB)": 34.88, "step": 15470, "train_speed(iter/s)": 0.41779 }, { "acc": 0.85667009, "epoch": 0.41900197655213495, "grad_norm": 8.329750061035156, "learning_rate": 9.918285692838905e-06, "loss": 0.75404644, "memory(GiB)": 34.88, "step": 15475, "train_speed(iter/s)": 0.417795 }, { "acc": 0.84807816, "epoch": 0.41913735683535047, "grad_norm": 11.198434829711914, "learning_rate": 9.918184906621748e-06, "loss": 0.74012976, "memory(GiB)": 34.88, "step": 15480, "train_speed(iter/s)": 0.4178 }, { "acc": 0.86037226, "epoch": 0.41927273711856605, "grad_norm": 6.3491530418396, "learning_rate": 9.918084058800926e-06, "loss": 0.72449322, "memory(GiB)": 34.88, "step": 15485, "train_speed(iter/s)": 0.417804 }, { "acc": 0.87743855, "epoch": 0.4194081174017816, "grad_norm": 6.918399333953857, "learning_rate": 9.917983149377698e-06, "loss": 0.59599028, "memory(GiB)": 34.88, "step": 15490, "train_speed(iter/s)": 0.41781 }, { "acc": 0.84376965, "epoch": 0.41954349768499716, "grad_norm": 8.214424133300781, "learning_rate": 9.917882178353334e-06, "loss": 0.88936062, "memory(GiB)": 34.88, "step": 15495, "train_speed(iter/s)": 0.417815 }, { "acc": 0.85535488, "epoch": 0.4196788779682127, "grad_norm": 6.272881031036377, "learning_rate": 9.917781145729094e-06, "loss": 0.76321201, "memory(GiB)": 34.88, "step": 15500, "train_speed(iter/s)": 0.417821 }, { "acc": 0.84732656, "epoch": 0.41981425825142826, "grad_norm": 4.234492778778076, "learning_rate": 9.917680051506246e-06, "loss": 0.84478645, "memory(GiB)": 34.88, "step": 15505, "train_speed(iter/s)": 0.417825 }, { "acc": 0.87821808, "epoch": 0.4199496385346438, "grad_norm": 17.738340377807617, "learning_rate": 9.917578895686053e-06, "loss": 0.68190169, "memory(GiB)": 34.88, "step": 15510, "train_speed(iter/s)": 0.417831 }, { "acc": 0.83918085, "epoch": 0.42008501881785937, "grad_norm": 19.561460494995117, "learning_rate": 9.917477678269788e-06, "loss": 0.75567765, "memory(GiB)": 34.88, "step": 15515, "train_speed(iter/s)": 0.417837 }, { "acc": 0.84578209, "epoch": 0.4202203991010749, "grad_norm": 14.316682815551758, "learning_rate": 9.917376399258714e-06, "loss": 0.76033802, "memory(GiB)": 34.88, "step": 15520, "train_speed(iter/s)": 0.417841 }, { "acc": 0.86573095, "epoch": 0.4203557793842905, "grad_norm": 9.69442367553711, "learning_rate": 9.917275058654102e-06, "loss": 0.75460911, "memory(GiB)": 34.88, "step": 15525, "train_speed(iter/s)": 0.417846 }, { "acc": 0.85111904, "epoch": 0.420491159667506, "grad_norm": 7.4538044929504395, "learning_rate": 9.917173656457219e-06, "loss": 0.78177481, "memory(GiB)": 34.88, "step": 15530, "train_speed(iter/s)": 0.417852 }, { "acc": 0.85525446, "epoch": 0.4206265399507216, "grad_norm": 11.380988121032715, "learning_rate": 9.91707219266934e-06, "loss": 0.77240729, "memory(GiB)": 34.88, "step": 15535, "train_speed(iter/s)": 0.417857 }, { "acc": 0.8619689, "epoch": 0.4207619202339371, "grad_norm": 7.962784767150879, "learning_rate": 9.916970667291732e-06, "loss": 0.70286312, "memory(GiB)": 34.88, "step": 15540, "train_speed(iter/s)": 0.417863 }, { "acc": 0.85368176, "epoch": 0.4208973005171527, "grad_norm": 9.167207717895508, "learning_rate": 9.91686908032567e-06, "loss": 0.76675243, "memory(GiB)": 34.88, "step": 15545, "train_speed(iter/s)": 0.417868 }, { "acc": 0.83448706, "epoch": 0.4210326808003682, "grad_norm": 8.631641387939453, "learning_rate": 9.91676743177242e-06, "loss": 0.85248041, "memory(GiB)": 34.88, "step": 15550, "train_speed(iter/s)": 0.417872 }, { "acc": 0.84254255, "epoch": 0.4211680610835838, "grad_norm": 7.9730224609375, "learning_rate": 9.916665721633265e-06, "loss": 0.73108282, "memory(GiB)": 34.88, "step": 15555, "train_speed(iter/s)": 0.417877 }, { "acc": 0.8728157, "epoch": 0.4213034413667993, "grad_norm": 9.668437957763672, "learning_rate": 9.91656394990947e-06, "loss": 0.62399335, "memory(GiB)": 34.88, "step": 15560, "train_speed(iter/s)": 0.417883 }, { "acc": 0.83099957, "epoch": 0.4214388216500149, "grad_norm": 15.552199363708496, "learning_rate": 9.916462116602317e-06, "loss": 0.83658037, "memory(GiB)": 34.88, "step": 15565, "train_speed(iter/s)": 0.417887 }, { "acc": 0.87640533, "epoch": 0.4215742019332304, "grad_norm": 12.940287590026855, "learning_rate": 9.916360221713077e-06, "loss": 0.59215689, "memory(GiB)": 34.88, "step": 15570, "train_speed(iter/s)": 0.417892 }, { "acc": 0.85020094, "epoch": 0.421709582216446, "grad_norm": 11.346540451049805, "learning_rate": 9.91625826524303e-06, "loss": 0.74794478, "memory(GiB)": 34.88, "step": 15575, "train_speed(iter/s)": 0.417898 }, { "acc": 0.8417964, "epoch": 0.4218449624996615, "grad_norm": 7.851156234741211, "learning_rate": 9.91615624719345e-06, "loss": 0.81490374, "memory(GiB)": 34.88, "step": 15580, "train_speed(iter/s)": 0.417901 }, { "acc": 0.89063778, "epoch": 0.4219803427828771, "grad_norm": 18.290054321289062, "learning_rate": 9.916054167565617e-06, "loss": 0.61980109, "memory(GiB)": 34.88, "step": 15585, "train_speed(iter/s)": 0.417906 }, { "acc": 0.84358635, "epoch": 0.42211572306609263, "grad_norm": 8.583155632019043, "learning_rate": 9.915952026360807e-06, "loss": 0.80595894, "memory(GiB)": 34.88, "step": 15590, "train_speed(iter/s)": 0.41791 }, { "acc": 0.86355495, "epoch": 0.4222511033493082, "grad_norm": 7.033026695251465, "learning_rate": 9.915849823580302e-06, "loss": 0.58646688, "memory(GiB)": 34.88, "step": 15595, "train_speed(iter/s)": 0.417915 }, { "acc": 0.84817581, "epoch": 0.42238648363252373, "grad_norm": 8.6682710647583, "learning_rate": 9.915747559225383e-06, "loss": 0.80452671, "memory(GiB)": 34.88, "step": 15600, "train_speed(iter/s)": 0.417919 }, { "acc": 0.83032808, "epoch": 0.4225218639157393, "grad_norm": 6.964125156402588, "learning_rate": 9.91564523329733e-06, "loss": 0.78547673, "memory(GiB)": 34.88, "step": 15605, "train_speed(iter/s)": 0.417924 }, { "acc": 0.82859507, "epoch": 0.42265724419895484, "grad_norm": 8.906601905822754, "learning_rate": 9.915542845797423e-06, "loss": 0.94104195, "memory(GiB)": 34.88, "step": 15610, "train_speed(iter/s)": 0.417929 }, { "acc": 0.8548027, "epoch": 0.4227926244821704, "grad_norm": 7.592791557312012, "learning_rate": 9.915440396726947e-06, "loss": 0.7556715, "memory(GiB)": 34.88, "step": 15615, "train_speed(iter/s)": 0.417933 }, { "acc": 0.87141447, "epoch": 0.42292800476538595, "grad_norm": 6.124741554260254, "learning_rate": 9.915337886087186e-06, "loss": 0.65226345, "memory(GiB)": 34.88, "step": 15620, "train_speed(iter/s)": 0.417936 }, { "acc": 0.88125677, "epoch": 0.4230633850486015, "grad_norm": 46.735904693603516, "learning_rate": 9.915235313879421e-06, "loss": 0.60089436, "memory(GiB)": 34.88, "step": 15625, "train_speed(iter/s)": 0.417942 }, { "acc": 0.85760098, "epoch": 0.42319876533181705, "grad_norm": 6.6817474365234375, "learning_rate": 9.915132680104939e-06, "loss": 0.71907115, "memory(GiB)": 34.88, "step": 15630, "train_speed(iter/s)": 0.417947 }, { "acc": 0.8525588, "epoch": 0.42333414561503263, "grad_norm": 6.43205451965332, "learning_rate": 9.915029984765026e-06, "loss": 0.71346016, "memory(GiB)": 34.88, "step": 15635, "train_speed(iter/s)": 0.417952 }, { "acc": 0.85836496, "epoch": 0.42346952589824816, "grad_norm": 15.473915100097656, "learning_rate": 9.91492722786097e-06, "loss": 0.72784796, "memory(GiB)": 34.88, "step": 15640, "train_speed(iter/s)": 0.417957 }, { "acc": 0.85239296, "epoch": 0.42360490618146374, "grad_norm": 5.485229015350342, "learning_rate": 9.914824409394054e-06, "loss": 0.71126566, "memory(GiB)": 34.88, "step": 15645, "train_speed(iter/s)": 0.417962 }, { "acc": 0.8563158, "epoch": 0.42374028646467926, "grad_norm": 9.73657512664795, "learning_rate": 9.914721529365567e-06, "loss": 0.70209923, "memory(GiB)": 34.88, "step": 15650, "train_speed(iter/s)": 0.417968 }, { "acc": 0.85794086, "epoch": 0.42387566674789484, "grad_norm": 10.166505813598633, "learning_rate": 9.9146185877768e-06, "loss": 0.70299459, "memory(GiB)": 34.88, "step": 15655, "train_speed(iter/s)": 0.417973 }, { "acc": 0.84859695, "epoch": 0.42401104703111037, "grad_norm": 8.427055358886719, "learning_rate": 9.914515584629043e-06, "loss": 0.78439822, "memory(GiB)": 34.88, "step": 15660, "train_speed(iter/s)": 0.417979 }, { "acc": 0.86767464, "epoch": 0.42414642731432595, "grad_norm": 6.820342540740967, "learning_rate": 9.914412519923582e-06, "loss": 0.56488647, "memory(GiB)": 34.88, "step": 15665, "train_speed(iter/s)": 0.417984 }, { "acc": 0.86362801, "epoch": 0.42428180759754147, "grad_norm": 7.572638511657715, "learning_rate": 9.914309393661714e-06, "loss": 0.66147261, "memory(GiB)": 34.88, "step": 15670, "train_speed(iter/s)": 0.417989 }, { "acc": 0.87453804, "epoch": 0.42441718788075705, "grad_norm": 9.882423400878906, "learning_rate": 9.914206205844726e-06, "loss": 0.60743146, "memory(GiB)": 34.88, "step": 15675, "train_speed(iter/s)": 0.417994 }, { "acc": 0.84294338, "epoch": 0.4245525681639726, "grad_norm": 14.787633895874023, "learning_rate": 9.914102956473911e-06, "loss": 0.80329056, "memory(GiB)": 34.88, "step": 15680, "train_speed(iter/s)": 0.418 }, { "acc": 0.86249237, "epoch": 0.42468794844718816, "grad_norm": 8.228656768798828, "learning_rate": 9.913999645550566e-06, "loss": 0.65896788, "memory(GiB)": 34.88, "step": 15685, "train_speed(iter/s)": 0.418005 }, { "acc": 0.84217758, "epoch": 0.4248233287304037, "grad_norm": 6.841190814971924, "learning_rate": 9.913896273075984e-06, "loss": 0.73120718, "memory(GiB)": 34.88, "step": 15690, "train_speed(iter/s)": 0.418009 }, { "acc": 0.86855793, "epoch": 0.42495870901361926, "grad_norm": 6.41954231262207, "learning_rate": 9.913792839051455e-06, "loss": 0.62973709, "memory(GiB)": 34.88, "step": 15695, "train_speed(iter/s)": 0.418013 }, { "acc": 0.87257175, "epoch": 0.4250940892968348, "grad_norm": 8.794146537780762, "learning_rate": 9.913689343478282e-06, "loss": 0.74266901, "memory(GiB)": 34.88, "step": 15700, "train_speed(iter/s)": 0.418017 }, { "acc": 0.8671402, "epoch": 0.42522946958005037, "grad_norm": 11.4400053024292, "learning_rate": 9.913585786357756e-06, "loss": 0.72152395, "memory(GiB)": 34.88, "step": 15705, "train_speed(iter/s)": 0.418023 }, { "acc": 0.86856022, "epoch": 0.4253648498632659, "grad_norm": 5.818039417266846, "learning_rate": 9.913482167691177e-06, "loss": 0.67299824, "memory(GiB)": 34.88, "step": 15710, "train_speed(iter/s)": 0.418029 }, { "acc": 0.855509, "epoch": 0.4255002301464815, "grad_norm": 44.731300354003906, "learning_rate": 9.913378487479842e-06, "loss": 0.70158882, "memory(GiB)": 34.88, "step": 15715, "train_speed(iter/s)": 0.418033 }, { "acc": 0.82167282, "epoch": 0.425635610429697, "grad_norm": 37.49224853515625, "learning_rate": 9.91327474572505e-06, "loss": 0.87064819, "memory(GiB)": 34.88, "step": 15720, "train_speed(iter/s)": 0.418038 }, { "acc": 0.81693687, "epoch": 0.4257709907129126, "grad_norm": 8.46139907836914, "learning_rate": 9.913170942428102e-06, "loss": 1.04380054, "memory(GiB)": 34.88, "step": 15725, "train_speed(iter/s)": 0.418042 }, { "acc": 0.85004807, "epoch": 0.4259063709961281, "grad_norm": 7.950524806976318, "learning_rate": 9.913067077590296e-06, "loss": 0.80096445, "memory(GiB)": 34.88, "step": 15730, "train_speed(iter/s)": 0.418047 }, { "acc": 0.85409651, "epoch": 0.4260417512793437, "grad_norm": 7.859570503234863, "learning_rate": 9.912963151212935e-06, "loss": 0.74822817, "memory(GiB)": 34.88, "step": 15735, "train_speed(iter/s)": 0.418052 }, { "acc": 0.87392569, "epoch": 0.4261771315625592, "grad_norm": 7.849178314208984, "learning_rate": 9.912859163297318e-06, "loss": 0.55534921, "memory(GiB)": 34.88, "step": 15740, "train_speed(iter/s)": 0.418057 }, { "acc": 0.85333195, "epoch": 0.4263125118457748, "grad_norm": 7.178965091705322, "learning_rate": 9.912755113844751e-06, "loss": 0.71235938, "memory(GiB)": 34.88, "step": 15745, "train_speed(iter/s)": 0.418059 }, { "acc": 0.83166704, "epoch": 0.4264478921289903, "grad_norm": 8.880760192871094, "learning_rate": 9.912651002856537e-06, "loss": 0.95824223, "memory(GiB)": 34.88, "step": 15750, "train_speed(iter/s)": 0.418065 }, { "acc": 0.86201887, "epoch": 0.4265832724122059, "grad_norm": 14.334354400634766, "learning_rate": 9.912546830333979e-06, "loss": 0.64973421, "memory(GiB)": 34.88, "step": 15755, "train_speed(iter/s)": 0.41807 }, { "acc": 0.85039959, "epoch": 0.4267186526954214, "grad_norm": 8.045787811279297, "learning_rate": 9.912442596278383e-06, "loss": 0.80423546, "memory(GiB)": 34.88, "step": 15760, "train_speed(iter/s)": 0.418074 }, { "acc": 0.81869946, "epoch": 0.426854032978637, "grad_norm": 8.86459732055664, "learning_rate": 9.912338300691052e-06, "loss": 0.98233089, "memory(GiB)": 34.88, "step": 15765, "train_speed(iter/s)": 0.418079 }, { "acc": 0.84518452, "epoch": 0.4269894132618525, "grad_norm": 13.345843315124512, "learning_rate": 9.912233943573295e-06, "loss": 0.8187109, "memory(GiB)": 34.88, "step": 15770, "train_speed(iter/s)": 0.418083 }, { "acc": 0.85436325, "epoch": 0.4271247935450681, "grad_norm": 7.459954261779785, "learning_rate": 9.91212952492642e-06, "loss": 0.73996978, "memory(GiB)": 34.88, "step": 15775, "train_speed(iter/s)": 0.418088 }, { "acc": 0.84348564, "epoch": 0.42726017382828363, "grad_norm": 9.121651649475098, "learning_rate": 9.912025044751735e-06, "loss": 0.79032049, "memory(GiB)": 34.88, "step": 15780, "train_speed(iter/s)": 0.418094 }, { "acc": 0.87223082, "epoch": 0.4273955541114992, "grad_norm": 8.428061485290527, "learning_rate": 9.911920503050545e-06, "loss": 0.64962077, "memory(GiB)": 34.88, "step": 15785, "train_speed(iter/s)": 0.418099 }, { "acc": 0.86165524, "epoch": 0.42753093439471473, "grad_norm": 8.383810997009277, "learning_rate": 9.911815899824164e-06, "loss": 0.64046183, "memory(GiB)": 34.88, "step": 15790, "train_speed(iter/s)": 0.418103 }, { "acc": 0.8472065, "epoch": 0.4276663146779303, "grad_norm": 14.453338623046875, "learning_rate": 9.911711235073898e-06, "loss": 0.79975305, "memory(GiB)": 34.88, "step": 15795, "train_speed(iter/s)": 0.418107 }, { "acc": 0.84945078, "epoch": 0.42780169496114584, "grad_norm": 8.211772918701172, "learning_rate": 9.911606508801063e-06, "loss": 0.73953075, "memory(GiB)": 34.88, "step": 15800, "train_speed(iter/s)": 0.418112 }, { "acc": 0.86838722, "epoch": 0.4279370752443614, "grad_norm": 12.647174835205078, "learning_rate": 9.911501721006968e-06, "loss": 0.63084431, "memory(GiB)": 34.88, "step": 15805, "train_speed(iter/s)": 0.418117 }, { "acc": 0.8326973, "epoch": 0.42807245552757694, "grad_norm": 7.3275580406188965, "learning_rate": 9.911396871692926e-06, "loss": 0.87797565, "memory(GiB)": 34.88, "step": 15810, "train_speed(iter/s)": 0.418122 }, { "acc": 0.84468288, "epoch": 0.4282078358107925, "grad_norm": 8.50445556640625, "learning_rate": 9.911291960860253e-06, "loss": 0.66023989, "memory(GiB)": 34.88, "step": 15815, "train_speed(iter/s)": 0.418127 }, { "acc": 0.8412178, "epoch": 0.42834321609400805, "grad_norm": 8.10206127166748, "learning_rate": 9.91118698851026e-06, "loss": 0.84071608, "memory(GiB)": 34.88, "step": 15820, "train_speed(iter/s)": 0.418132 }, { "acc": 0.87502689, "epoch": 0.42847859637722363, "grad_norm": 9.816679000854492, "learning_rate": 9.91108195464426e-06, "loss": 0.60509562, "memory(GiB)": 34.88, "step": 15825, "train_speed(iter/s)": 0.418137 }, { "acc": 0.83377094, "epoch": 0.42861397666043916, "grad_norm": 7.437356472015381, "learning_rate": 9.910976859263574e-06, "loss": 0.83722687, "memory(GiB)": 34.88, "step": 15830, "train_speed(iter/s)": 0.418142 }, { "acc": 0.84880104, "epoch": 0.42874935694365474, "grad_norm": 10.030547142028809, "learning_rate": 9.91087170236952e-06, "loss": 0.82077837, "memory(GiB)": 34.88, "step": 15835, "train_speed(iter/s)": 0.418146 }, { "acc": 0.86307487, "epoch": 0.42888473722687026, "grad_norm": 8.893820762634277, "learning_rate": 9.910766483963405e-06, "loss": 0.72920437, "memory(GiB)": 34.88, "step": 15840, "train_speed(iter/s)": 0.41815 }, { "acc": 0.83229122, "epoch": 0.42902011751008584, "grad_norm": 11.506762504577637, "learning_rate": 9.910661204046557e-06, "loss": 0.82281675, "memory(GiB)": 34.88, "step": 15845, "train_speed(iter/s)": 0.418156 }, { "acc": 0.84946861, "epoch": 0.42915549779330137, "grad_norm": 6.181024551391602, "learning_rate": 9.910555862620291e-06, "loss": 0.78128424, "memory(GiB)": 34.88, "step": 15850, "train_speed(iter/s)": 0.418161 }, { "acc": 0.84096222, "epoch": 0.42929087807651695, "grad_norm": 5.667087078094482, "learning_rate": 9.910450459685928e-06, "loss": 0.75288663, "memory(GiB)": 34.88, "step": 15855, "train_speed(iter/s)": 0.418166 }, { "acc": 0.87184372, "epoch": 0.42942625835973247, "grad_norm": 10.820662498474121, "learning_rate": 9.910344995244786e-06, "loss": 0.62482715, "memory(GiB)": 34.88, "step": 15860, "train_speed(iter/s)": 0.418171 }, { "acc": 0.84900875, "epoch": 0.42956163864294805, "grad_norm": 4.756882667541504, "learning_rate": 9.910239469298188e-06, "loss": 0.78989334, "memory(GiB)": 34.88, "step": 15865, "train_speed(iter/s)": 0.418176 }, { "acc": 0.83004093, "epoch": 0.4296970189261636, "grad_norm": 10.283409118652344, "learning_rate": 9.910133881847455e-06, "loss": 0.91046906, "memory(GiB)": 34.88, "step": 15870, "train_speed(iter/s)": 0.418181 }, { "acc": 0.87055483, "epoch": 0.42983239920937916, "grad_norm": 15.094269752502441, "learning_rate": 9.910028232893913e-06, "loss": 0.70501323, "memory(GiB)": 34.88, "step": 15875, "train_speed(iter/s)": 0.418186 }, { "acc": 0.84485455, "epoch": 0.4299677794925947, "grad_norm": 11.573758125305176, "learning_rate": 9.90992252243888e-06, "loss": 0.84969997, "memory(GiB)": 34.88, "step": 15880, "train_speed(iter/s)": 0.418191 }, { "acc": 0.83842354, "epoch": 0.43010315977581026, "grad_norm": 8.731921195983887, "learning_rate": 9.909816750483683e-06, "loss": 0.88478832, "memory(GiB)": 34.88, "step": 15885, "train_speed(iter/s)": 0.418194 }, { "acc": 0.87711926, "epoch": 0.4302385400590258, "grad_norm": 7.840826511383057, "learning_rate": 9.90971091702965e-06, "loss": 0.73671131, "memory(GiB)": 34.88, "step": 15890, "train_speed(iter/s)": 0.4182 }, { "acc": 0.8407156, "epoch": 0.43037392034224137, "grad_norm": 10.166820526123047, "learning_rate": 9.9096050220781e-06, "loss": 0.80978508, "memory(GiB)": 34.88, "step": 15895, "train_speed(iter/s)": 0.418206 }, { "acc": 0.84672375, "epoch": 0.4305093006254569, "grad_norm": 6.7438154220581055, "learning_rate": 9.909499065630364e-06, "loss": 0.85385971, "memory(GiB)": 34.88, "step": 15900, "train_speed(iter/s)": 0.41821 }, { "acc": 0.84591093, "epoch": 0.4306446809086725, "grad_norm": 14.041548728942871, "learning_rate": 9.909393047687768e-06, "loss": 0.86969929, "memory(GiB)": 34.88, "step": 15905, "train_speed(iter/s)": 0.418215 }, { "acc": 0.86090689, "epoch": 0.430780061191888, "grad_norm": 6.257266521453857, "learning_rate": 9.909286968251643e-06, "loss": 0.69666452, "memory(GiB)": 34.88, "step": 15910, "train_speed(iter/s)": 0.41822 }, { "acc": 0.85354881, "epoch": 0.4309154414751036, "grad_norm": 10.539613723754883, "learning_rate": 9.909180827323314e-06, "loss": 0.73854804, "memory(GiB)": 34.88, "step": 15915, "train_speed(iter/s)": 0.418225 }, { "acc": 0.86662703, "epoch": 0.4310508217583191, "grad_norm": 11.201306343078613, "learning_rate": 9.909074624904112e-06, "loss": 0.62837791, "memory(GiB)": 34.88, "step": 15920, "train_speed(iter/s)": 0.418231 }, { "acc": 0.8473176, "epoch": 0.4311862020415347, "grad_norm": 6.362327575683594, "learning_rate": 9.908968360995366e-06, "loss": 0.77838163, "memory(GiB)": 34.88, "step": 15925, "train_speed(iter/s)": 0.418236 }, { "acc": 0.86027956, "epoch": 0.4313215823247502, "grad_norm": 8.66061782836914, "learning_rate": 9.908862035598412e-06, "loss": 0.69960384, "memory(GiB)": 34.88, "step": 15930, "train_speed(iter/s)": 0.418241 }, { "acc": 0.84379768, "epoch": 0.4314569626079658, "grad_norm": 7.329399108886719, "learning_rate": 9.908755648714575e-06, "loss": 0.77720747, "memory(GiB)": 34.88, "step": 15935, "train_speed(iter/s)": 0.418246 }, { "acc": 0.86765928, "epoch": 0.4315923428911813, "grad_norm": 27.211313247680664, "learning_rate": 9.908649200345194e-06, "loss": 0.66653976, "memory(GiB)": 34.88, "step": 15940, "train_speed(iter/s)": 0.418251 }, { "acc": 0.85355167, "epoch": 0.4317277231743969, "grad_norm": 6.480060577392578, "learning_rate": 9.908542690491599e-06, "loss": 0.78720341, "memory(GiB)": 34.88, "step": 15945, "train_speed(iter/s)": 0.418256 }, { "acc": 0.84535236, "epoch": 0.4318631034576124, "grad_norm": 6.995625019073486, "learning_rate": 9.908436119155125e-06, "loss": 0.79272161, "memory(GiB)": 34.88, "step": 15950, "train_speed(iter/s)": 0.418261 }, { "acc": 0.84843903, "epoch": 0.431998483740828, "grad_norm": 10.041280746459961, "learning_rate": 9.908329486337106e-06, "loss": 0.66332469, "memory(GiB)": 34.88, "step": 15955, "train_speed(iter/s)": 0.418266 }, { "acc": 0.83416042, "epoch": 0.4321338640240435, "grad_norm": 18.42619514465332, "learning_rate": 9.90822279203888e-06, "loss": 0.92989235, "memory(GiB)": 34.88, "step": 15960, "train_speed(iter/s)": 0.418271 }, { "acc": 0.87413521, "epoch": 0.4322692443072591, "grad_norm": 5.707126617431641, "learning_rate": 9.908116036261782e-06, "loss": 0.63042693, "memory(GiB)": 34.88, "step": 15965, "train_speed(iter/s)": 0.418276 }, { "acc": 0.87968998, "epoch": 0.43240462459047463, "grad_norm": 7.885061264038086, "learning_rate": 9.90800921900715e-06, "loss": 0.65848675, "memory(GiB)": 34.88, "step": 15970, "train_speed(iter/s)": 0.418281 }, { "acc": 0.85966053, "epoch": 0.4325400048736902, "grad_norm": 9.909308433532715, "learning_rate": 9.907902340276323e-06, "loss": 0.74909978, "memory(GiB)": 34.88, "step": 15975, "train_speed(iter/s)": 0.418287 }, { "acc": 0.85944109, "epoch": 0.43267538515690573, "grad_norm": 7.405905246734619, "learning_rate": 9.907795400070637e-06, "loss": 0.7756166, "memory(GiB)": 34.88, "step": 15980, "train_speed(iter/s)": 0.418291 }, { "acc": 0.84991961, "epoch": 0.4328107654401213, "grad_norm": 7.685985565185547, "learning_rate": 9.907688398391432e-06, "loss": 0.71301723, "memory(GiB)": 34.88, "step": 15985, "train_speed(iter/s)": 0.418296 }, { "acc": 0.86383533, "epoch": 0.43294614572333684, "grad_norm": 4.668724536895752, "learning_rate": 9.907581335240053e-06, "loss": 0.63854837, "memory(GiB)": 34.88, "step": 15990, "train_speed(iter/s)": 0.418301 }, { "acc": 0.86397877, "epoch": 0.4330815260065524, "grad_norm": 15.127851486206055, "learning_rate": 9.907474210617836e-06, "loss": 0.74320364, "memory(GiB)": 34.88, "step": 15995, "train_speed(iter/s)": 0.418306 }, { "acc": 0.86347008, "epoch": 0.43321690628976794, "grad_norm": 5.488097190856934, "learning_rate": 9.907367024526124e-06, "loss": 0.64761782, "memory(GiB)": 34.88, "step": 16000, "train_speed(iter/s)": 0.418311 }, { "acc": 0.83654127, "epoch": 0.4333522865729835, "grad_norm": 9.017145156860352, "learning_rate": 9.907259776966262e-06, "loss": 0.79135089, "memory(GiB)": 34.88, "step": 16005, "train_speed(iter/s)": 0.418315 }, { "acc": 0.83704472, "epoch": 0.43348766685619905, "grad_norm": 5.452642917633057, "learning_rate": 9.907152467939592e-06, "loss": 0.81168137, "memory(GiB)": 34.88, "step": 16010, "train_speed(iter/s)": 0.418318 }, { "acc": 0.86590347, "epoch": 0.43362304713941463, "grad_norm": 9.520729064941406, "learning_rate": 9.907045097447458e-06, "loss": 0.68925071, "memory(GiB)": 34.88, "step": 16015, "train_speed(iter/s)": 0.418322 }, { "acc": 0.86860867, "epoch": 0.43375842742263016, "grad_norm": 6.781006813049316, "learning_rate": 9.906937665491204e-06, "loss": 0.6287509, "memory(GiB)": 34.88, "step": 16020, "train_speed(iter/s)": 0.418328 }, { "acc": 0.83235588, "epoch": 0.43389380770584574, "grad_norm": 9.072530746459961, "learning_rate": 9.906830172072179e-06, "loss": 0.853899, "memory(GiB)": 34.88, "step": 16025, "train_speed(iter/s)": 0.418332 }, { "acc": 0.86992855, "epoch": 0.43402918798906126, "grad_norm": 6.351261615753174, "learning_rate": 9.906722617191727e-06, "loss": 0.63137803, "memory(GiB)": 34.88, "step": 16030, "train_speed(iter/s)": 0.418337 }, { "acc": 0.84714041, "epoch": 0.43416456827227684, "grad_norm": 9.02021598815918, "learning_rate": 9.906615000851196e-06, "loss": 0.80816393, "memory(GiB)": 34.88, "step": 16035, "train_speed(iter/s)": 0.418341 }, { "acc": 0.894205, "epoch": 0.43429994855549237, "grad_norm": 10.846662521362305, "learning_rate": 9.906507323051934e-06, "loss": 0.5125061, "memory(GiB)": 34.88, "step": 16040, "train_speed(iter/s)": 0.418346 }, { "acc": 0.85008907, "epoch": 0.43443532883870795, "grad_norm": 17.52163314819336, "learning_rate": 9.906399583795292e-06, "loss": 0.78626952, "memory(GiB)": 34.88, "step": 16045, "train_speed(iter/s)": 0.418349 }, { "acc": 0.84585371, "epoch": 0.43457070912192347, "grad_norm": 13.413948059082031, "learning_rate": 9.906291783082614e-06, "loss": 0.87879639, "memory(GiB)": 34.88, "step": 16050, "train_speed(iter/s)": 0.418352 }, { "acc": 0.85951004, "epoch": 0.43470608940513905, "grad_norm": 12.845685958862305, "learning_rate": 9.906183920915256e-06, "loss": 0.71494446, "memory(GiB)": 34.88, "step": 16055, "train_speed(iter/s)": 0.418356 }, { "acc": 0.87156429, "epoch": 0.4348414696883546, "grad_norm": 7.3094305992126465, "learning_rate": 9.906075997294568e-06, "loss": 0.60538449, "memory(GiB)": 34.88, "step": 16060, "train_speed(iter/s)": 0.41836 }, { "acc": 0.81829357, "epoch": 0.43497684997157016, "grad_norm": 7.230463027954102, "learning_rate": 9.905968012221898e-06, "loss": 0.86242752, "memory(GiB)": 34.88, "step": 16065, "train_speed(iter/s)": 0.418363 }, { "acc": 0.85871792, "epoch": 0.4351122302547857, "grad_norm": 5.135807991027832, "learning_rate": 9.905859965698605e-06, "loss": 0.69216309, "memory(GiB)": 34.88, "step": 16070, "train_speed(iter/s)": 0.418366 }, { "acc": 0.8418972, "epoch": 0.43524761053800126, "grad_norm": 9.725813865661621, "learning_rate": 9.905751857726039e-06, "loss": 0.8855298, "memory(GiB)": 34.88, "step": 16075, "train_speed(iter/s)": 0.418366 }, { "acc": 0.86454668, "epoch": 0.4353829908212168, "grad_norm": 11.15245246887207, "learning_rate": 9.905643688305554e-06, "loss": 0.6809577, "memory(GiB)": 34.88, "step": 16080, "train_speed(iter/s)": 0.418369 }, { "acc": 0.86249676, "epoch": 0.43551837110443237, "grad_norm": 5.808885097503662, "learning_rate": 9.905535457438506e-06, "loss": 0.68268127, "memory(GiB)": 34.88, "step": 16085, "train_speed(iter/s)": 0.418372 }, { "acc": 0.8660984, "epoch": 0.4356537513876479, "grad_norm": 12.432766914367676, "learning_rate": 9.90542716512625e-06, "loss": 0.62576971, "memory(GiB)": 34.88, "step": 16090, "train_speed(iter/s)": 0.418376 }, { "acc": 0.87887363, "epoch": 0.43578913167086347, "grad_norm": 3.9410083293914795, "learning_rate": 9.905318811370144e-06, "loss": 0.56539288, "memory(GiB)": 34.88, "step": 16095, "train_speed(iter/s)": 0.418381 }, { "acc": 0.86770439, "epoch": 0.435924511954079, "grad_norm": 5.087372303009033, "learning_rate": 9.905210396171543e-06, "loss": 0.61005249, "memory(GiB)": 34.88, "step": 16100, "train_speed(iter/s)": 0.418382 }, { "acc": 0.88396206, "epoch": 0.4360598922372946, "grad_norm": 11.092918395996094, "learning_rate": 9.905101919531807e-06, "loss": 0.59910064, "memory(GiB)": 34.88, "step": 16105, "train_speed(iter/s)": 0.418384 }, { "acc": 0.86813097, "epoch": 0.4361952725205101, "grad_norm": 7.929873466491699, "learning_rate": 9.904993381452296e-06, "loss": 0.65344219, "memory(GiB)": 34.88, "step": 16110, "train_speed(iter/s)": 0.418388 }, { "acc": 0.8280097, "epoch": 0.4363306528037257, "grad_norm": 4.973623752593994, "learning_rate": 9.904884781934368e-06, "loss": 0.92848358, "memory(GiB)": 34.88, "step": 16115, "train_speed(iter/s)": 0.41839 }, { "acc": 0.83571272, "epoch": 0.4364660330869412, "grad_norm": 6.155476093292236, "learning_rate": 9.904776120979383e-06, "loss": 0.85617809, "memory(GiB)": 34.88, "step": 16120, "train_speed(iter/s)": 0.418395 }, { "acc": 0.85595722, "epoch": 0.4366014133701568, "grad_norm": 8.491201400756836, "learning_rate": 9.9046673985887e-06, "loss": 0.73204937, "memory(GiB)": 34.88, "step": 16125, "train_speed(iter/s)": 0.4184 }, { "acc": 0.84594393, "epoch": 0.4367367936533723, "grad_norm": 5.455490589141846, "learning_rate": 9.904558614763687e-06, "loss": 0.78425975, "memory(GiB)": 34.88, "step": 16130, "train_speed(iter/s)": 0.418405 }, { "acc": 0.84854841, "epoch": 0.4368721739365879, "grad_norm": 10.290606498718262, "learning_rate": 9.904449769505702e-06, "loss": 0.88273087, "memory(GiB)": 34.88, "step": 16135, "train_speed(iter/s)": 0.41841 }, { "acc": 0.87250004, "epoch": 0.4370075542198034, "grad_norm": 8.266313552856445, "learning_rate": 9.90434086281611e-06, "loss": 0.63640261, "memory(GiB)": 34.88, "step": 16140, "train_speed(iter/s)": 0.418414 }, { "acc": 0.85686445, "epoch": 0.437142934503019, "grad_norm": 15.963072776794434, "learning_rate": 9.904231894696276e-06, "loss": 0.75004997, "memory(GiB)": 34.88, "step": 16145, "train_speed(iter/s)": 0.418419 }, { "acc": 0.86451416, "epoch": 0.4372783147862345, "grad_norm": 12.390120506286621, "learning_rate": 9.904122865147564e-06, "loss": 0.76140251, "memory(GiB)": 34.88, "step": 16150, "train_speed(iter/s)": 0.418422 }, { "acc": 0.85258217, "epoch": 0.4374136950694501, "grad_norm": 10.06130313873291, "learning_rate": 9.904013774171338e-06, "loss": 0.686901, "memory(GiB)": 34.88, "step": 16155, "train_speed(iter/s)": 0.418426 }, { "acc": 0.84653778, "epoch": 0.43754907535266563, "grad_norm": 11.95380973815918, "learning_rate": 9.90390462176897e-06, "loss": 0.85607586, "memory(GiB)": 34.88, "step": 16160, "train_speed(iter/s)": 0.418432 }, { "acc": 0.8588275, "epoch": 0.4376844556358812, "grad_norm": 8.478264808654785, "learning_rate": 9.903795407941821e-06, "loss": 0.6886416, "memory(GiB)": 34.88, "step": 16165, "train_speed(iter/s)": 0.418437 }, { "acc": 0.87884798, "epoch": 0.43781983591909673, "grad_norm": 7.864799499511719, "learning_rate": 9.903686132691262e-06, "loss": 0.65812883, "memory(GiB)": 34.88, "step": 16170, "train_speed(iter/s)": 0.418441 }, { "acc": 0.85204926, "epoch": 0.4379552162023123, "grad_norm": 7.5473103523254395, "learning_rate": 9.903576796018664e-06, "loss": 0.72764778, "memory(GiB)": 34.88, "step": 16175, "train_speed(iter/s)": 0.418446 }, { "acc": 0.83063183, "epoch": 0.43809059648552784, "grad_norm": 16.157562255859375, "learning_rate": 9.903467397925395e-06, "loss": 0.88967648, "memory(GiB)": 34.88, "step": 16180, "train_speed(iter/s)": 0.418451 }, { "acc": 0.86613417, "epoch": 0.4382259767687434, "grad_norm": 7.378592014312744, "learning_rate": 9.903357938412823e-06, "loss": 0.68578138, "memory(GiB)": 34.88, "step": 16185, "train_speed(iter/s)": 0.418454 }, { "acc": 0.84376078, "epoch": 0.43836135705195894, "grad_norm": 5.132785320281982, "learning_rate": 9.903248417482321e-06, "loss": 0.84080582, "memory(GiB)": 34.88, "step": 16190, "train_speed(iter/s)": 0.418458 }, { "acc": 0.84022217, "epoch": 0.4384967373351745, "grad_norm": 9.312392234802246, "learning_rate": 9.903138835135262e-06, "loss": 0.7825057, "memory(GiB)": 34.88, "step": 16195, "train_speed(iter/s)": 0.418462 }, { "acc": 0.85881958, "epoch": 0.43863211761839005, "grad_norm": 9.409192085266113, "learning_rate": 9.90302919137302e-06, "loss": 0.66065335, "memory(GiB)": 34.88, "step": 16200, "train_speed(iter/s)": 0.418467 }, { "acc": 0.83623447, "epoch": 0.43876749790160563, "grad_norm": 12.605180740356445, "learning_rate": 9.902919486196964e-06, "loss": 0.87428627, "memory(GiB)": 34.88, "step": 16205, "train_speed(iter/s)": 0.418472 }, { "acc": 0.82246637, "epoch": 0.43890287818482115, "grad_norm": 8.739625930786133, "learning_rate": 9.902809719608471e-06, "loss": 0.81143522, "memory(GiB)": 34.88, "step": 16210, "train_speed(iter/s)": 0.418476 }, { "acc": 0.82925949, "epoch": 0.43903825846803674, "grad_norm": 8.064397811889648, "learning_rate": 9.902699891608915e-06, "loss": 0.8523922, "memory(GiB)": 34.88, "step": 16215, "train_speed(iter/s)": 0.418481 }, { "acc": 0.86067181, "epoch": 0.43917363875125226, "grad_norm": 18.297327041625977, "learning_rate": 9.902590002199675e-06, "loss": 0.70166159, "memory(GiB)": 34.88, "step": 16220, "train_speed(iter/s)": 0.418486 }, { "acc": 0.83751059, "epoch": 0.43930901903446784, "grad_norm": 11.263941764831543, "learning_rate": 9.902480051382122e-06, "loss": 0.83425331, "memory(GiB)": 34.88, "step": 16225, "train_speed(iter/s)": 0.41849 }, { "acc": 0.87628641, "epoch": 0.43944439931768337, "grad_norm": 11.502517700195312, "learning_rate": 9.90237003915764e-06, "loss": 0.6954998, "memory(GiB)": 34.88, "step": 16230, "train_speed(iter/s)": 0.418495 }, { "acc": 0.86132841, "epoch": 0.43957977960089895, "grad_norm": 7.25650691986084, "learning_rate": 9.902259965527601e-06, "loss": 0.75011463, "memory(GiB)": 34.88, "step": 16235, "train_speed(iter/s)": 0.4185 }, { "acc": 0.86843338, "epoch": 0.43971515988411447, "grad_norm": 7.659195899963379, "learning_rate": 9.902149830493388e-06, "loss": 0.63808646, "memory(GiB)": 34.88, "step": 16240, "train_speed(iter/s)": 0.418504 }, { "acc": 0.87134647, "epoch": 0.43985054016733005, "grad_norm": 9.949504852294922, "learning_rate": 9.902039634056377e-06, "loss": 0.64395885, "memory(GiB)": 34.88, "step": 16245, "train_speed(iter/s)": 0.418508 }, { "acc": 0.85536585, "epoch": 0.4399859204505456, "grad_norm": 14.573184967041016, "learning_rate": 9.90192937621795e-06, "loss": 0.73542776, "memory(GiB)": 34.88, "step": 16250, "train_speed(iter/s)": 0.418513 }, { "acc": 0.84049244, "epoch": 0.44012130073376116, "grad_norm": 12.047385215759277, "learning_rate": 9.901819056979494e-06, "loss": 0.75403042, "memory(GiB)": 34.88, "step": 16255, "train_speed(iter/s)": 0.418518 }, { "acc": 0.87138424, "epoch": 0.4402566810169767, "grad_norm": 12.933326721191406, "learning_rate": 9.90170867634238e-06, "loss": 0.7491137, "memory(GiB)": 34.88, "step": 16260, "train_speed(iter/s)": 0.418522 }, { "acc": 0.85767403, "epoch": 0.44039206130019226, "grad_norm": 6.826445579528809, "learning_rate": 9.901598234308e-06, "loss": 0.72811699, "memory(GiB)": 34.88, "step": 16265, "train_speed(iter/s)": 0.418526 }, { "acc": 0.84002848, "epoch": 0.4405274415834078, "grad_norm": 3.9938175678253174, "learning_rate": 9.901487730877733e-06, "loss": 0.76306448, "memory(GiB)": 34.88, "step": 16270, "train_speed(iter/s)": 0.41853 }, { "acc": 0.84217949, "epoch": 0.44066282186662337, "grad_norm": 9.71411418914795, "learning_rate": 9.901377166052964e-06, "loss": 0.76096649, "memory(GiB)": 34.88, "step": 16275, "train_speed(iter/s)": 0.418535 }, { "acc": 0.82997751, "epoch": 0.4407982021498389, "grad_norm": 14.700896263122559, "learning_rate": 9.901266539835078e-06, "loss": 0.94865665, "memory(GiB)": 34.88, "step": 16280, "train_speed(iter/s)": 0.418541 }, { "acc": 0.84240131, "epoch": 0.44093358243305447, "grad_norm": 6.315280437469482, "learning_rate": 9.901155852225462e-06, "loss": 0.81605301, "memory(GiB)": 34.88, "step": 16285, "train_speed(iter/s)": 0.418544 }, { "acc": 0.85506382, "epoch": 0.44106896271627, "grad_norm": 14.42294692993164, "learning_rate": 9.901045103225502e-06, "loss": 0.78038168, "memory(GiB)": 34.88, "step": 16290, "train_speed(iter/s)": 0.418549 }, { "acc": 0.83322515, "epoch": 0.4412043429994856, "grad_norm": 17.095046997070312, "learning_rate": 9.900934292836583e-06, "loss": 0.87417707, "memory(GiB)": 34.88, "step": 16295, "train_speed(iter/s)": 0.418554 }, { "acc": 0.8368639, "epoch": 0.4413397232827011, "grad_norm": 7.999344348907471, "learning_rate": 9.900823421060098e-06, "loss": 0.83668118, "memory(GiB)": 34.88, "step": 16300, "train_speed(iter/s)": 0.41856 }, { "acc": 0.84203711, "epoch": 0.4414751035659167, "grad_norm": 10.495545387268066, "learning_rate": 9.90071248789743e-06, "loss": 0.8251585, "memory(GiB)": 34.88, "step": 16305, "train_speed(iter/s)": 0.418564 }, { "acc": 0.85202198, "epoch": 0.4416104838491322, "grad_norm": 10.772012710571289, "learning_rate": 9.900601493349974e-06, "loss": 0.7333755, "memory(GiB)": 34.88, "step": 16310, "train_speed(iter/s)": 0.418569 }, { "acc": 0.87305889, "epoch": 0.4417458641323478, "grad_norm": 20.79774284362793, "learning_rate": 9.900490437419116e-06, "loss": 0.61020823, "memory(GiB)": 34.88, "step": 16315, "train_speed(iter/s)": 0.418573 }, { "acc": 0.85760994, "epoch": 0.4418812444155633, "grad_norm": 8.697537422180176, "learning_rate": 9.900379320106251e-06, "loss": 0.68728738, "memory(GiB)": 34.88, "step": 16320, "train_speed(iter/s)": 0.418578 }, { "acc": 0.87480221, "epoch": 0.4420166246987789, "grad_norm": 6.5478692054748535, "learning_rate": 9.900268141412769e-06, "loss": 0.56145802, "memory(GiB)": 34.88, "step": 16325, "train_speed(iter/s)": 0.418583 }, { "acc": 0.87119083, "epoch": 0.4421520049819944, "grad_norm": 37.426334381103516, "learning_rate": 9.900156901340062e-06, "loss": 0.71004286, "memory(GiB)": 34.88, "step": 16330, "train_speed(iter/s)": 0.418587 }, { "acc": 0.86892357, "epoch": 0.44228738526521, "grad_norm": 9.527496337890625, "learning_rate": 9.900045599889526e-06, "loss": 0.69492903, "memory(GiB)": 34.88, "step": 16335, "train_speed(iter/s)": 0.418591 }, { "acc": 0.84175234, "epoch": 0.4424227655484255, "grad_norm": 6.299315929412842, "learning_rate": 9.899934237062555e-06, "loss": 0.92399588, "memory(GiB)": 34.88, "step": 16340, "train_speed(iter/s)": 0.418596 }, { "acc": 0.88210516, "epoch": 0.4425581458316411, "grad_norm": 6.002391815185547, "learning_rate": 9.89982281286054e-06, "loss": 0.57368393, "memory(GiB)": 34.88, "step": 16345, "train_speed(iter/s)": 0.418602 }, { "acc": 0.86803379, "epoch": 0.44269352611485663, "grad_norm": 11.02129077911377, "learning_rate": 9.89971132728488e-06, "loss": 0.60012622, "memory(GiB)": 34.88, "step": 16350, "train_speed(iter/s)": 0.418606 }, { "acc": 0.83467159, "epoch": 0.4428289063980722, "grad_norm": 11.292014122009277, "learning_rate": 9.89959978033697e-06, "loss": 0.84362431, "memory(GiB)": 34.88, "step": 16355, "train_speed(iter/s)": 0.41861 }, { "acc": 0.86705246, "epoch": 0.44296428668128773, "grad_norm": 6.559081554412842, "learning_rate": 9.899488172018212e-06, "loss": 0.61052494, "memory(GiB)": 34.88, "step": 16360, "train_speed(iter/s)": 0.418615 }, { "acc": 0.86886177, "epoch": 0.4430996669645033, "grad_norm": 10.586743354797363, "learning_rate": 9.899376502329999e-06, "loss": 0.63582449, "memory(GiB)": 34.88, "step": 16365, "train_speed(iter/s)": 0.418618 }, { "acc": 0.86735306, "epoch": 0.44323504724771884, "grad_norm": 6.061983585357666, "learning_rate": 9.899264771273731e-06, "loss": 0.56762152, "memory(GiB)": 34.88, "step": 16370, "train_speed(iter/s)": 0.418623 }, { "acc": 0.85909624, "epoch": 0.4433704275309344, "grad_norm": 8.70751953125, "learning_rate": 9.899152978850809e-06, "loss": 0.67310677, "memory(GiB)": 34.88, "step": 16375, "train_speed(iter/s)": 0.418627 }, { "acc": 0.80009279, "epoch": 0.44350580781414994, "grad_norm": 39.83460998535156, "learning_rate": 9.899041125062633e-06, "loss": 1.09578772, "memory(GiB)": 34.88, "step": 16380, "train_speed(iter/s)": 0.418631 }, { "acc": 0.85135059, "epoch": 0.4436411880973655, "grad_norm": 6.35891580581665, "learning_rate": 9.8989292099106e-06, "loss": 0.71379786, "memory(GiB)": 34.88, "step": 16385, "train_speed(iter/s)": 0.418636 }, { "acc": 0.8728137, "epoch": 0.44377656838058105, "grad_norm": 6.7271809577941895, "learning_rate": 9.898817233396121e-06, "loss": 0.57552805, "memory(GiB)": 34.88, "step": 16390, "train_speed(iter/s)": 0.418639 }, { "acc": 0.8491518, "epoch": 0.44391194866379663, "grad_norm": 9.861136436462402, "learning_rate": 9.89870519552059e-06, "loss": 0.68851209, "memory(GiB)": 34.88, "step": 16395, "train_speed(iter/s)": 0.418644 }, { "acc": 0.84895592, "epoch": 0.44404732894701215, "grad_norm": 8.190834999084473, "learning_rate": 9.898593096285414e-06, "loss": 0.75801735, "memory(GiB)": 34.88, "step": 16400, "train_speed(iter/s)": 0.418649 }, { "acc": 0.83353767, "epoch": 0.44418270923022773, "grad_norm": 14.961222648620605, "learning_rate": 9.898480935692002e-06, "loss": 0.92282248, "memory(GiB)": 34.88, "step": 16405, "train_speed(iter/s)": 0.418653 }, { "acc": 0.83015232, "epoch": 0.44431808951344326, "grad_norm": 12.905511856079102, "learning_rate": 9.898368713741748e-06, "loss": 0.89668064, "memory(GiB)": 34.88, "step": 16410, "train_speed(iter/s)": 0.418656 }, { "acc": 0.84785919, "epoch": 0.44445346979665884, "grad_norm": 5.194571018218994, "learning_rate": 9.898256430436068e-06, "loss": 0.86261063, "memory(GiB)": 34.88, "step": 16415, "train_speed(iter/s)": 0.418661 }, { "acc": 0.82505169, "epoch": 0.44458885007987436, "grad_norm": 8.759536743164062, "learning_rate": 9.898144085776363e-06, "loss": 0.94179707, "memory(GiB)": 34.88, "step": 16420, "train_speed(iter/s)": 0.418664 }, { "acc": 0.84241753, "epoch": 0.44472423036308995, "grad_norm": 11.337821006774902, "learning_rate": 9.898031679764043e-06, "loss": 1.01333818, "memory(GiB)": 34.88, "step": 16425, "train_speed(iter/s)": 0.41867 }, { "acc": 0.86219997, "epoch": 0.44485961064630547, "grad_norm": 3.4764227867126465, "learning_rate": 9.897919212400514e-06, "loss": 0.64141703, "memory(GiB)": 34.88, "step": 16430, "train_speed(iter/s)": 0.418674 }, { "acc": 0.84552107, "epoch": 0.44499499092952105, "grad_norm": 4.96597146987915, "learning_rate": 9.897806683687187e-06, "loss": 0.83590145, "memory(GiB)": 34.88, "step": 16435, "train_speed(iter/s)": 0.418679 }, { "acc": 0.86019106, "epoch": 0.4451303712127366, "grad_norm": 11.864638328552246, "learning_rate": 9.89769409362547e-06, "loss": 0.7431232, "memory(GiB)": 34.88, "step": 16440, "train_speed(iter/s)": 0.418684 }, { "acc": 0.85692444, "epoch": 0.44526575149595216, "grad_norm": 7.250304698944092, "learning_rate": 9.897581442216772e-06, "loss": 0.73952475, "memory(GiB)": 34.88, "step": 16445, "train_speed(iter/s)": 0.418689 }, { "acc": 0.83668003, "epoch": 0.4454011317791677, "grad_norm": 10.433551788330078, "learning_rate": 9.89746872946251e-06, "loss": 0.80121489, "memory(GiB)": 34.88, "step": 16450, "train_speed(iter/s)": 0.418693 }, { "acc": 0.84192448, "epoch": 0.44553651206238326, "grad_norm": 17.581247329711914, "learning_rate": 9.89735595536409e-06, "loss": 0.87220974, "memory(GiB)": 34.88, "step": 16455, "train_speed(iter/s)": 0.418697 }, { "acc": 0.86080542, "epoch": 0.4456718923455988, "grad_norm": 8.879831314086914, "learning_rate": 9.897243119922926e-06, "loss": 0.67727547, "memory(GiB)": 34.88, "step": 16460, "train_speed(iter/s)": 0.418702 }, { "acc": 0.85440893, "epoch": 0.44580727262881437, "grad_norm": 9.13719367980957, "learning_rate": 9.897130223140434e-06, "loss": 0.71585398, "memory(GiB)": 34.88, "step": 16465, "train_speed(iter/s)": 0.418706 }, { "acc": 0.84807053, "epoch": 0.4459426529120299, "grad_norm": 5.8365936279296875, "learning_rate": 9.897017265018027e-06, "loss": 0.76642385, "memory(GiB)": 34.88, "step": 16470, "train_speed(iter/s)": 0.418711 }, { "acc": 0.84511528, "epoch": 0.44607803319524547, "grad_norm": 13.476400375366211, "learning_rate": 9.896904245557119e-06, "loss": 0.82518864, "memory(GiB)": 34.88, "step": 16475, "train_speed(iter/s)": 0.418715 }, { "acc": 0.86878567, "epoch": 0.446213413478461, "grad_norm": 11.63062858581543, "learning_rate": 9.896791164759125e-06, "loss": 0.66034231, "memory(GiB)": 34.88, "step": 16480, "train_speed(iter/s)": 0.41872 }, { "acc": 0.8590723, "epoch": 0.4463487937616766, "grad_norm": 8.251368522644043, "learning_rate": 9.896678022625464e-06, "loss": 0.69723701, "memory(GiB)": 34.88, "step": 16485, "train_speed(iter/s)": 0.418725 }, { "acc": 0.8677412, "epoch": 0.4464841740448921, "grad_norm": 7.781922817230225, "learning_rate": 9.896564819157553e-06, "loss": 0.63341141, "memory(GiB)": 34.88, "step": 16490, "train_speed(iter/s)": 0.418729 }, { "acc": 0.85494633, "epoch": 0.4466195543281077, "grad_norm": 5.183620929718018, "learning_rate": 9.896451554356808e-06, "loss": 0.71907215, "memory(GiB)": 34.88, "step": 16495, "train_speed(iter/s)": 0.418734 }, { "acc": 0.85246305, "epoch": 0.4467549346113232, "grad_norm": 10.886341094970703, "learning_rate": 9.896338228224652e-06, "loss": 0.75332212, "memory(GiB)": 34.88, "step": 16500, "train_speed(iter/s)": 0.418738 }, { "acc": 0.87495251, "epoch": 0.4468903148945388, "grad_norm": 22.241186141967773, "learning_rate": 9.896224840762499e-06, "loss": 0.62877569, "memory(GiB)": 34.88, "step": 16505, "train_speed(iter/s)": 0.418742 }, { "acc": 0.8656991, "epoch": 0.4470256951777543, "grad_norm": 9.685700416564941, "learning_rate": 9.896111391971772e-06, "loss": 0.69748521, "memory(GiB)": 34.88, "step": 16510, "train_speed(iter/s)": 0.418747 }, { "acc": 0.86800404, "epoch": 0.44716107546096984, "grad_norm": 6.642758846282959, "learning_rate": 9.895997881853896e-06, "loss": 0.67385874, "memory(GiB)": 34.88, "step": 16515, "train_speed(iter/s)": 0.418752 }, { "acc": 0.85359707, "epoch": 0.4472964557441854, "grad_norm": 6.361993312835693, "learning_rate": 9.895884310410285e-06, "loss": 0.70209332, "memory(GiB)": 34.88, "step": 16520, "train_speed(iter/s)": 0.418756 }, { "acc": 0.88502617, "epoch": 0.44743183602740094, "grad_norm": 6.55271577835083, "learning_rate": 9.895770677642369e-06, "loss": 0.55995688, "memory(GiB)": 34.88, "step": 16525, "train_speed(iter/s)": 0.41876 }, { "acc": 0.84996471, "epoch": 0.4475672163106165, "grad_norm": 11.10836124420166, "learning_rate": 9.895656983551567e-06, "loss": 0.8185461, "memory(GiB)": 34.88, "step": 16530, "train_speed(iter/s)": 0.418765 }, { "acc": 0.84545135, "epoch": 0.44770259659383205, "grad_norm": 5.6822662353515625, "learning_rate": 9.895543228139306e-06, "loss": 0.74848943, "memory(GiB)": 34.88, "step": 16535, "train_speed(iter/s)": 0.41877 }, { "acc": 0.86857786, "epoch": 0.4478379768770476, "grad_norm": 7.325137615203857, "learning_rate": 9.89542941140701e-06, "loss": 0.65014114, "memory(GiB)": 34.88, "step": 16540, "train_speed(iter/s)": 0.418775 }, { "acc": 0.84543819, "epoch": 0.44797335716026315, "grad_norm": 10.410146713256836, "learning_rate": 9.895315533356104e-06, "loss": 0.75342121, "memory(GiB)": 34.88, "step": 16545, "train_speed(iter/s)": 0.418779 }, { "acc": 0.87658062, "epoch": 0.44810873744347873, "grad_norm": 6.03975248336792, "learning_rate": 9.895201593988015e-06, "loss": 0.59213934, "memory(GiB)": 34.88, "step": 16550, "train_speed(iter/s)": 0.418783 }, { "acc": 0.83846292, "epoch": 0.44824411772669426, "grad_norm": 16.932266235351562, "learning_rate": 9.895087593304171e-06, "loss": 0.80341272, "memory(GiB)": 34.88, "step": 16555, "train_speed(iter/s)": 0.418786 }, { "acc": 0.84091444, "epoch": 0.44837949800990984, "grad_norm": 23.51144027709961, "learning_rate": 9.894973531305997e-06, "loss": 0.8288826, "memory(GiB)": 34.88, "step": 16560, "train_speed(iter/s)": 0.41879 }, { "acc": 0.83385143, "epoch": 0.44851487829312536, "grad_norm": 8.470457077026367, "learning_rate": 9.894859407994927e-06, "loss": 0.88498554, "memory(GiB)": 34.88, "step": 16565, "train_speed(iter/s)": 0.418794 }, { "acc": 0.86423035, "epoch": 0.44865025857634094, "grad_norm": 7.576759338378906, "learning_rate": 9.894745223372388e-06, "loss": 0.63535128, "memory(GiB)": 34.88, "step": 16570, "train_speed(iter/s)": 0.418797 }, { "acc": 0.84525566, "epoch": 0.44878563885955647, "grad_norm": 8.696273803710938, "learning_rate": 9.894630977439811e-06, "loss": 0.81535978, "memory(GiB)": 34.88, "step": 16575, "train_speed(iter/s)": 0.418802 }, { "acc": 0.83375187, "epoch": 0.44892101914277205, "grad_norm": 44.094051361083984, "learning_rate": 9.894516670198625e-06, "loss": 0.90260811, "memory(GiB)": 34.88, "step": 16580, "train_speed(iter/s)": 0.418806 }, { "acc": 0.87008991, "epoch": 0.4490563994259876, "grad_norm": 3.996608257293701, "learning_rate": 9.894402301650264e-06, "loss": 0.58911343, "memory(GiB)": 34.88, "step": 16585, "train_speed(iter/s)": 0.41881 }, { "acc": 0.8799861, "epoch": 0.44919177970920315, "grad_norm": 7.812189102172852, "learning_rate": 9.894287871796161e-06, "loss": 0.62716961, "memory(GiB)": 34.88, "step": 16590, "train_speed(iter/s)": 0.418815 }, { "acc": 0.86392183, "epoch": 0.4493271599924187, "grad_norm": 16.076101303100586, "learning_rate": 9.894173380637747e-06, "loss": 0.74671435, "memory(GiB)": 34.88, "step": 16595, "train_speed(iter/s)": 0.418818 }, { "acc": 0.82705078, "epoch": 0.44946254027563426, "grad_norm": 13.209068298339844, "learning_rate": 9.89405882817646e-06, "loss": 0.91835499, "memory(GiB)": 34.88, "step": 16600, "train_speed(iter/s)": 0.418823 }, { "acc": 0.84566917, "epoch": 0.4495979205588498, "grad_norm": 7.368173599243164, "learning_rate": 9.893944214413731e-06, "loss": 0.79191036, "memory(GiB)": 34.88, "step": 16605, "train_speed(iter/s)": 0.418827 }, { "acc": 0.86041927, "epoch": 0.44973330084206536, "grad_norm": 4.919780731201172, "learning_rate": 9.893829539350997e-06, "loss": 0.64865208, "memory(GiB)": 34.88, "step": 16610, "train_speed(iter/s)": 0.418832 }, { "acc": 0.8648201, "epoch": 0.4498686811252809, "grad_norm": 6.69362735748291, "learning_rate": 9.893714802989697e-06, "loss": 0.67656074, "memory(GiB)": 34.88, "step": 16615, "train_speed(iter/s)": 0.418838 }, { "acc": 0.85433826, "epoch": 0.45000406140849647, "grad_norm": 8.094328880310059, "learning_rate": 9.893600005331266e-06, "loss": 0.72973237, "memory(GiB)": 34.88, "step": 16620, "train_speed(iter/s)": 0.418842 }, { "acc": 0.86125126, "epoch": 0.450139441691712, "grad_norm": 33.2290153503418, "learning_rate": 9.893485146377143e-06, "loss": 0.68973532, "memory(GiB)": 34.88, "step": 16625, "train_speed(iter/s)": 0.418847 }, { "acc": 0.85399399, "epoch": 0.4502748219749276, "grad_norm": 18.955341339111328, "learning_rate": 9.893370226128763e-06, "loss": 0.73451395, "memory(GiB)": 34.88, "step": 16630, "train_speed(iter/s)": 0.41885 }, { "acc": 0.84850082, "epoch": 0.4504102022581431, "grad_norm": 7.937717437744141, "learning_rate": 9.893255244587573e-06, "loss": 0.82592297, "memory(GiB)": 34.88, "step": 16635, "train_speed(iter/s)": 0.418855 }, { "acc": 0.82477169, "epoch": 0.4505455825413587, "grad_norm": 10.303083419799805, "learning_rate": 9.893140201755008e-06, "loss": 0.94182262, "memory(GiB)": 34.88, "step": 16640, "train_speed(iter/s)": 0.418859 }, { "acc": 0.85378666, "epoch": 0.4506809628245742, "grad_norm": 9.437880516052246, "learning_rate": 9.893025097632509e-06, "loss": 0.63850651, "memory(GiB)": 34.88, "step": 16645, "train_speed(iter/s)": 0.418863 }, { "acc": 0.86629143, "epoch": 0.4508163431077898, "grad_norm": 11.13740348815918, "learning_rate": 9.892909932221522e-06, "loss": 0.64513979, "memory(GiB)": 34.88, "step": 16650, "train_speed(iter/s)": 0.418868 }, { "acc": 0.85839043, "epoch": 0.4509517233910053, "grad_norm": 8.533589363098145, "learning_rate": 9.892794705523486e-06, "loss": 0.69602785, "memory(GiB)": 34.88, "step": 16655, "train_speed(iter/s)": 0.418873 }, { "acc": 0.87446899, "epoch": 0.4510871036742209, "grad_norm": 10.269721031188965, "learning_rate": 9.892679417539845e-06, "loss": 0.68289928, "memory(GiB)": 34.88, "step": 16660, "train_speed(iter/s)": 0.418877 }, { "acc": 0.85766773, "epoch": 0.4512224839574364, "grad_norm": 16.990400314331055, "learning_rate": 9.892564068272044e-06, "loss": 0.71846828, "memory(GiB)": 34.88, "step": 16665, "train_speed(iter/s)": 0.418881 }, { "acc": 0.83494816, "epoch": 0.451357864240652, "grad_norm": 7.675314426422119, "learning_rate": 9.892448657721526e-06, "loss": 0.8982008, "memory(GiB)": 34.88, "step": 16670, "train_speed(iter/s)": 0.418885 }, { "acc": 0.85610409, "epoch": 0.4514932445238675, "grad_norm": 6.346210956573486, "learning_rate": 9.892333185889743e-06, "loss": 0.71379986, "memory(GiB)": 34.88, "step": 16675, "train_speed(iter/s)": 0.41889 }, { "acc": 0.85169325, "epoch": 0.4516286248070831, "grad_norm": 11.073882102966309, "learning_rate": 9.892217652778131e-06, "loss": 0.7954896, "memory(GiB)": 34.88, "step": 16680, "train_speed(iter/s)": 0.418894 }, { "acc": 0.86674805, "epoch": 0.4517640050902986, "grad_norm": 6.501344680786133, "learning_rate": 9.892102058388146e-06, "loss": 0.57389126, "memory(GiB)": 34.88, "step": 16685, "train_speed(iter/s)": 0.418899 }, { "acc": 0.85239639, "epoch": 0.4518993853735142, "grad_norm": 6.313263416290283, "learning_rate": 9.891986402721233e-06, "loss": 0.81393375, "memory(GiB)": 34.88, "step": 16690, "train_speed(iter/s)": 0.418903 }, { "acc": 0.84454536, "epoch": 0.45203476565672973, "grad_norm": 5.543473720550537, "learning_rate": 9.891870685778842e-06, "loss": 0.80978508, "memory(GiB)": 34.88, "step": 16695, "train_speed(iter/s)": 0.418907 }, { "acc": 0.86053886, "epoch": 0.4521701459399453, "grad_norm": 7.023774147033691, "learning_rate": 9.891754907562421e-06, "loss": 0.69278765, "memory(GiB)": 34.88, "step": 16700, "train_speed(iter/s)": 0.41891 }, { "acc": 0.87563381, "epoch": 0.45230552622316084, "grad_norm": 12.313946723937988, "learning_rate": 9.891639068073421e-06, "loss": 0.71612287, "memory(GiB)": 34.88, "step": 16705, "train_speed(iter/s)": 0.418915 }, { "acc": 0.8441267, "epoch": 0.4524409065063764, "grad_norm": 9.592320442199707, "learning_rate": 9.891523167313291e-06, "loss": 0.85039625, "memory(GiB)": 34.88, "step": 16710, "train_speed(iter/s)": 0.41892 }, { "acc": 0.87772493, "epoch": 0.45257628678959194, "grad_norm": 20.753681182861328, "learning_rate": 9.891407205283489e-06, "loss": 0.57259893, "memory(GiB)": 34.88, "step": 16715, "train_speed(iter/s)": 0.418924 }, { "acc": 0.86204271, "epoch": 0.4527116670728075, "grad_norm": 10.127181053161621, "learning_rate": 9.891291181985462e-06, "loss": 0.73459425, "memory(GiB)": 34.88, "step": 16720, "train_speed(iter/s)": 0.418929 }, { "acc": 0.87516842, "epoch": 0.45284704735602305, "grad_norm": 8.824049949645996, "learning_rate": 9.891175097420664e-06, "loss": 0.62427626, "memory(GiB)": 34.88, "step": 16725, "train_speed(iter/s)": 0.418934 }, { "acc": 0.84095688, "epoch": 0.4529824276392386, "grad_norm": 5.362615585327148, "learning_rate": 9.891058951590551e-06, "loss": 0.77493095, "memory(GiB)": 34.88, "step": 16730, "train_speed(iter/s)": 0.418938 }, { "acc": 0.87811365, "epoch": 0.45311780792245415, "grad_norm": 56.018585205078125, "learning_rate": 9.890942744496577e-06, "loss": 0.65095077, "memory(GiB)": 34.88, "step": 16735, "train_speed(iter/s)": 0.418943 }, { "acc": 0.86001472, "epoch": 0.45325318820566973, "grad_norm": 9.633461952209473, "learning_rate": 9.890826476140198e-06, "loss": 0.66993027, "memory(GiB)": 34.88, "step": 16740, "train_speed(iter/s)": 0.418948 }, { "acc": 0.83181, "epoch": 0.45338856848888526, "grad_norm": 6.708169937133789, "learning_rate": 9.89071014652287e-06, "loss": 0.95740185, "memory(GiB)": 34.88, "step": 16745, "train_speed(iter/s)": 0.418952 }, { "acc": 0.8480401, "epoch": 0.45352394877210084, "grad_norm": 14.836182594299316, "learning_rate": 9.890593755646052e-06, "loss": 0.74490814, "memory(GiB)": 34.88, "step": 16750, "train_speed(iter/s)": 0.418957 }, { "acc": 0.8491518, "epoch": 0.45365932905531636, "grad_norm": 10.3935546875, "learning_rate": 9.8904773035112e-06, "loss": 0.74076328, "memory(GiB)": 34.88, "step": 16755, "train_speed(iter/s)": 0.418961 }, { "acc": 0.85750732, "epoch": 0.45379470933853194, "grad_norm": 5.712055683135986, "learning_rate": 9.890360790119773e-06, "loss": 0.73221092, "memory(GiB)": 34.88, "step": 16760, "train_speed(iter/s)": 0.418966 }, { "acc": 0.82816839, "epoch": 0.45393008962174747, "grad_norm": 7.647499084472656, "learning_rate": 9.890244215473231e-06, "loss": 0.86504478, "memory(GiB)": 34.88, "step": 16765, "train_speed(iter/s)": 0.418969 }, { "acc": 0.83237448, "epoch": 0.45406546990496305, "grad_norm": 7.960300445556641, "learning_rate": 9.890127579573035e-06, "loss": 0.84258766, "memory(GiB)": 34.88, "step": 16770, "train_speed(iter/s)": 0.418974 }, { "acc": 0.85849075, "epoch": 0.4542008501881786, "grad_norm": 10.539230346679688, "learning_rate": 9.890010882420645e-06, "loss": 0.77850046, "memory(GiB)": 34.88, "step": 16775, "train_speed(iter/s)": 0.418979 }, { "acc": 0.86075087, "epoch": 0.45433623047139415, "grad_norm": 7.667275905609131, "learning_rate": 9.889894124017525e-06, "loss": 0.74603229, "memory(GiB)": 34.88, "step": 16780, "train_speed(iter/s)": 0.418983 }, { "acc": 0.86608267, "epoch": 0.4544716107546097, "grad_norm": 11.899262428283691, "learning_rate": 9.889777304365133e-06, "loss": 0.71756063, "memory(GiB)": 34.88, "step": 16785, "train_speed(iter/s)": 0.418988 }, { "acc": 0.83812504, "epoch": 0.45460699103782526, "grad_norm": 13.164468765258789, "learning_rate": 9.889660423464937e-06, "loss": 0.84157486, "memory(GiB)": 34.88, "step": 16790, "train_speed(iter/s)": 0.418992 }, { "acc": 0.85019608, "epoch": 0.4547423713210408, "grad_norm": 9.959195137023926, "learning_rate": 9.8895434813184e-06, "loss": 0.77897463, "memory(GiB)": 34.88, "step": 16795, "train_speed(iter/s)": 0.418997 }, { "acc": 0.85110941, "epoch": 0.45487775160425636, "grad_norm": 16.789703369140625, "learning_rate": 9.889426477926985e-06, "loss": 0.7947288, "memory(GiB)": 34.88, "step": 16800, "train_speed(iter/s)": 0.419002 }, { "acc": 0.8610755, "epoch": 0.4550131318874719, "grad_norm": 8.403373718261719, "learning_rate": 9.889309413292159e-06, "loss": 0.69658494, "memory(GiB)": 34.88, "step": 16805, "train_speed(iter/s)": 0.419006 }, { "acc": 0.86464624, "epoch": 0.45514851217068747, "grad_norm": 6.52646541595459, "learning_rate": 9.88919228741539e-06, "loss": 0.66671362, "memory(GiB)": 34.88, "step": 16810, "train_speed(iter/s)": 0.419011 }, { "acc": 0.86236067, "epoch": 0.455283892453903, "grad_norm": 10.282819747924805, "learning_rate": 9.889075100298145e-06, "loss": 0.64250822, "memory(GiB)": 34.88, "step": 16815, "train_speed(iter/s)": 0.419015 }, { "acc": 0.84768515, "epoch": 0.4554192727371186, "grad_norm": 9.971925735473633, "learning_rate": 9.888957851941888e-06, "loss": 0.69250731, "memory(GiB)": 34.88, "step": 16820, "train_speed(iter/s)": 0.419019 }, { "acc": 0.86450386, "epoch": 0.4555546530203341, "grad_norm": 9.806297302246094, "learning_rate": 9.888840542348093e-06, "loss": 0.76121206, "memory(GiB)": 34.88, "step": 16825, "train_speed(iter/s)": 0.419025 }, { "acc": 0.86148319, "epoch": 0.4556900333035497, "grad_norm": 6.303301811218262, "learning_rate": 9.888723171518226e-06, "loss": 0.70425034, "memory(GiB)": 34.88, "step": 16830, "train_speed(iter/s)": 0.419028 }, { "acc": 0.8560483, "epoch": 0.4558254135867652, "grad_norm": 10.358926773071289, "learning_rate": 9.888605739453757e-06, "loss": 0.75486364, "memory(GiB)": 34.88, "step": 16835, "train_speed(iter/s)": 0.419032 }, { "acc": 0.86824055, "epoch": 0.4559607938699808, "grad_norm": 7.563216686248779, "learning_rate": 9.888488246156161e-06, "loss": 0.68188477, "memory(GiB)": 34.88, "step": 16840, "train_speed(iter/s)": 0.419036 }, { "acc": 0.85931311, "epoch": 0.4560961741531963, "grad_norm": 11.67724895477295, "learning_rate": 9.888370691626906e-06, "loss": 0.72816477, "memory(GiB)": 34.88, "step": 16845, "train_speed(iter/s)": 0.419041 }, { "acc": 0.87611427, "epoch": 0.4562315544364119, "grad_norm": 7.575107574462891, "learning_rate": 9.888253075867465e-06, "loss": 0.52300529, "memory(GiB)": 34.88, "step": 16850, "train_speed(iter/s)": 0.419045 }, { "acc": 0.85477772, "epoch": 0.4563669347196274, "grad_norm": 12.403491020202637, "learning_rate": 9.888135398879314e-06, "loss": 0.7909955, "memory(GiB)": 34.88, "step": 16855, "train_speed(iter/s)": 0.41905 }, { "acc": 0.87133102, "epoch": 0.456502315002843, "grad_norm": 5.514827728271484, "learning_rate": 9.888017660663924e-06, "loss": 0.61130867, "memory(GiB)": 34.88, "step": 16860, "train_speed(iter/s)": 0.419055 }, { "acc": 0.8377327, "epoch": 0.4566376952860585, "grad_norm": 12.465641021728516, "learning_rate": 9.887899861222773e-06, "loss": 0.84169617, "memory(GiB)": 34.88, "step": 16865, "train_speed(iter/s)": 0.41906 }, { "acc": 0.85043774, "epoch": 0.4567730755692741, "grad_norm": 31.213809967041016, "learning_rate": 9.887782000557334e-06, "loss": 0.79663358, "memory(GiB)": 34.88, "step": 16870, "train_speed(iter/s)": 0.419063 }, { "acc": 0.86565685, "epoch": 0.4569084558524896, "grad_norm": 6.290955066680908, "learning_rate": 9.887664078669086e-06, "loss": 0.65137424, "memory(GiB)": 34.88, "step": 16875, "train_speed(iter/s)": 0.419067 }, { "acc": 0.82948723, "epoch": 0.4570438361357052, "grad_norm": 15.59825325012207, "learning_rate": 9.887546095559503e-06, "loss": 0.85440693, "memory(GiB)": 34.88, "step": 16880, "train_speed(iter/s)": 0.419072 }, { "acc": 0.84904575, "epoch": 0.45717921641892073, "grad_norm": 6.9529829025268555, "learning_rate": 9.887428051230065e-06, "loss": 0.77442999, "memory(GiB)": 34.88, "step": 16885, "train_speed(iter/s)": 0.419073 }, { "acc": 0.84927816, "epoch": 0.4573145967021363, "grad_norm": 10.639188766479492, "learning_rate": 9.88730994568225e-06, "loss": 0.86885862, "memory(GiB)": 34.88, "step": 16890, "train_speed(iter/s)": 0.419077 }, { "acc": 0.81954193, "epoch": 0.45744997698535184, "grad_norm": 10.97003173828125, "learning_rate": 9.887191778917537e-06, "loss": 0.99728718, "memory(GiB)": 34.88, "step": 16895, "train_speed(iter/s)": 0.419081 }, { "acc": 0.86429472, "epoch": 0.4575853572685674, "grad_norm": 5.2902140617370605, "learning_rate": 9.88707355093741e-06, "loss": 0.6804697, "memory(GiB)": 34.88, "step": 16900, "train_speed(iter/s)": 0.419086 }, { "acc": 0.85920181, "epoch": 0.45772073755178294, "grad_norm": 5.9666643142700195, "learning_rate": 9.886955261743346e-06, "loss": 0.65563941, "memory(GiB)": 34.88, "step": 16905, "train_speed(iter/s)": 0.41909 }, { "acc": 0.8583252, "epoch": 0.4578561178349985, "grad_norm": 12.509734153747559, "learning_rate": 9.886836911336827e-06, "loss": 0.70673432, "memory(GiB)": 34.88, "step": 16910, "train_speed(iter/s)": 0.419094 }, { "acc": 0.88356762, "epoch": 0.45799149811821405, "grad_norm": 5.734221935272217, "learning_rate": 9.886718499719336e-06, "loss": 0.60245442, "memory(GiB)": 34.88, "step": 16915, "train_speed(iter/s)": 0.419098 }, { "acc": 0.855867, "epoch": 0.4581268784014296, "grad_norm": 3.295647382736206, "learning_rate": 9.886600026892359e-06, "loss": 0.68780403, "memory(GiB)": 34.88, "step": 16920, "train_speed(iter/s)": 0.419102 }, { "acc": 0.86452618, "epoch": 0.45826225868464515, "grad_norm": 6.402105331420898, "learning_rate": 9.886481492857376e-06, "loss": 0.65503783, "memory(GiB)": 34.88, "step": 16925, "train_speed(iter/s)": 0.419106 }, { "acc": 0.82719059, "epoch": 0.45839763896786073, "grad_norm": 9.560547828674316, "learning_rate": 9.886362897615875e-06, "loss": 0.87224693, "memory(GiB)": 34.88, "step": 16930, "train_speed(iter/s)": 0.419109 }, { "acc": 0.85500546, "epoch": 0.45853301925107626, "grad_norm": 8.081880569458008, "learning_rate": 9.886244241169342e-06, "loss": 0.7483799, "memory(GiB)": 34.88, "step": 16935, "train_speed(iter/s)": 0.419114 }, { "acc": 0.84758015, "epoch": 0.45866839953429184, "grad_norm": 12.907608985900879, "learning_rate": 9.88612552351926e-06, "loss": 0.69063158, "memory(GiB)": 34.88, "step": 16940, "train_speed(iter/s)": 0.419118 }, { "acc": 0.86123447, "epoch": 0.45880377981750736, "grad_norm": 6.8069939613342285, "learning_rate": 9.886006744667118e-06, "loss": 0.69543061, "memory(GiB)": 34.88, "step": 16945, "train_speed(iter/s)": 0.419123 }, { "acc": 0.83778162, "epoch": 0.45893916010072294, "grad_norm": 25.54588508605957, "learning_rate": 9.885887904614404e-06, "loss": 0.88430147, "memory(GiB)": 34.88, "step": 16950, "train_speed(iter/s)": 0.419126 }, { "acc": 0.873596, "epoch": 0.45907454038393847, "grad_norm": 9.837459564208984, "learning_rate": 9.885769003362607e-06, "loss": 0.69088535, "memory(GiB)": 34.88, "step": 16955, "train_speed(iter/s)": 0.419131 }, { "acc": 0.82526331, "epoch": 0.45920992066715405, "grad_norm": 12.926740646362305, "learning_rate": 9.885650040913217e-06, "loss": 0.93383846, "memory(GiB)": 34.88, "step": 16960, "train_speed(iter/s)": 0.419135 }, { "acc": 0.87947769, "epoch": 0.4593453009503696, "grad_norm": 4.109163761138916, "learning_rate": 9.885531017267722e-06, "loss": 0.58935776, "memory(GiB)": 34.88, "step": 16965, "train_speed(iter/s)": 0.41914 }, { "acc": 0.86326561, "epoch": 0.45948068123358515, "grad_norm": 7.060670375823975, "learning_rate": 9.885411932427616e-06, "loss": 0.74105163, "memory(GiB)": 34.88, "step": 16970, "train_speed(iter/s)": 0.419144 }, { "acc": 0.8540144, "epoch": 0.4596160615168007, "grad_norm": 7.428506374359131, "learning_rate": 9.885292786394389e-06, "loss": 0.67054329, "memory(GiB)": 34.88, "step": 16975, "train_speed(iter/s)": 0.419148 }, { "acc": 0.85546093, "epoch": 0.45975144180001626, "grad_norm": 9.593230247497559, "learning_rate": 9.885173579169533e-06, "loss": 0.77127762, "memory(GiB)": 34.88, "step": 16980, "train_speed(iter/s)": 0.419152 }, { "acc": 0.85944748, "epoch": 0.4598868220832318, "grad_norm": 11.525705337524414, "learning_rate": 9.88505431075454e-06, "loss": 0.71225109, "memory(GiB)": 34.88, "step": 16985, "train_speed(iter/s)": 0.419157 }, { "acc": 0.88206129, "epoch": 0.46002220236644736, "grad_norm": 5.596494674682617, "learning_rate": 9.884934981150908e-06, "loss": 0.51789289, "memory(GiB)": 34.88, "step": 16990, "train_speed(iter/s)": 0.419162 }, { "acc": 0.843715, "epoch": 0.4601575826496629, "grad_norm": 7.245397090911865, "learning_rate": 9.884815590360131e-06, "loss": 0.85869102, "memory(GiB)": 34.88, "step": 16995, "train_speed(iter/s)": 0.419166 }, { "acc": 0.8501667, "epoch": 0.46029296293287847, "grad_norm": 9.068352699279785, "learning_rate": 9.884696138383704e-06, "loss": 0.70040836, "memory(GiB)": 34.88, "step": 17000, "train_speed(iter/s)": 0.41917 }, { "acc": 0.83975, "epoch": 0.460428343216094, "grad_norm": 27.82855796813965, "learning_rate": 9.88457662522312e-06, "loss": 0.90022449, "memory(GiB)": 34.88, "step": 17005, "train_speed(iter/s)": 0.419174 }, { "acc": 0.84497051, "epoch": 0.4605637234993096, "grad_norm": 28.029495239257812, "learning_rate": 9.88445705087988e-06, "loss": 0.87415752, "memory(GiB)": 34.88, "step": 17010, "train_speed(iter/s)": 0.419179 }, { "acc": 0.85512981, "epoch": 0.4606991037825251, "grad_norm": 4.063507556915283, "learning_rate": 9.884337415355483e-06, "loss": 0.70930672, "memory(GiB)": 34.88, "step": 17015, "train_speed(iter/s)": 0.419183 }, { "acc": 0.83188534, "epoch": 0.4608344840657407, "grad_norm": 9.027463912963867, "learning_rate": 9.884217718651425e-06, "loss": 0.85150585, "memory(GiB)": 34.88, "step": 17020, "train_speed(iter/s)": 0.419187 }, { "acc": 0.83779163, "epoch": 0.4609698643489562, "grad_norm": 7.341042518615723, "learning_rate": 9.884097960769207e-06, "loss": 0.75267477, "memory(GiB)": 34.88, "step": 17025, "train_speed(iter/s)": 0.41919 }, { "acc": 0.8397295, "epoch": 0.4611052446321718, "grad_norm": 13.661757469177246, "learning_rate": 9.883978141710327e-06, "loss": 0.74518337, "memory(GiB)": 34.88, "step": 17030, "train_speed(iter/s)": 0.419194 }, { "acc": 0.87769794, "epoch": 0.4612406249153873, "grad_norm": 9.485307693481445, "learning_rate": 9.883858261476286e-06, "loss": 0.54191961, "memory(GiB)": 34.88, "step": 17035, "train_speed(iter/s)": 0.419198 }, { "acc": 0.85525894, "epoch": 0.4613760051986029, "grad_norm": 17.27496910095215, "learning_rate": 9.883738320068588e-06, "loss": 0.79758463, "memory(GiB)": 34.88, "step": 17040, "train_speed(iter/s)": 0.419202 }, { "acc": 0.88040161, "epoch": 0.4615113854818184, "grad_norm": 6.439296245574951, "learning_rate": 9.883618317488734e-06, "loss": 0.54451418, "memory(GiB)": 34.88, "step": 17045, "train_speed(iter/s)": 0.419207 }, { "acc": 0.85004854, "epoch": 0.461646765765034, "grad_norm": 5.491984844207764, "learning_rate": 9.883498253738229e-06, "loss": 0.81197643, "memory(GiB)": 34.88, "step": 17050, "train_speed(iter/s)": 0.419211 }, { "acc": 0.85167665, "epoch": 0.4617821460482495, "grad_norm": 11.060602188110352, "learning_rate": 9.883378128818576e-06, "loss": 0.77206264, "memory(GiB)": 34.88, "step": 17055, "train_speed(iter/s)": 0.419215 }, { "acc": 0.84990788, "epoch": 0.4619175263314651, "grad_norm": 4.4641571044921875, "learning_rate": 9.883257942731277e-06, "loss": 0.77191954, "memory(GiB)": 34.88, "step": 17060, "train_speed(iter/s)": 0.419219 }, { "acc": 0.83183804, "epoch": 0.4620529066146806, "grad_norm": 4.820712089538574, "learning_rate": 9.883137695477843e-06, "loss": 0.89358101, "memory(GiB)": 34.88, "step": 17065, "train_speed(iter/s)": 0.419224 }, { "acc": 0.84830999, "epoch": 0.4621882868978962, "grad_norm": 4.97428560256958, "learning_rate": 9.883017387059777e-06, "loss": 0.80454445, "memory(GiB)": 34.88, "step": 17070, "train_speed(iter/s)": 0.419228 }, { "acc": 0.84347897, "epoch": 0.46232366718111173, "grad_norm": 19.072792053222656, "learning_rate": 9.882897017478586e-06, "loss": 0.81910515, "memory(GiB)": 34.88, "step": 17075, "train_speed(iter/s)": 0.419232 }, { "acc": 0.84970226, "epoch": 0.4624590474643273, "grad_norm": 9.762726783752441, "learning_rate": 9.882776586735778e-06, "loss": 0.73358126, "memory(GiB)": 34.88, "step": 17080, "train_speed(iter/s)": 0.419237 }, { "acc": 0.8546526, "epoch": 0.46259442774754284, "grad_norm": 12.848697662353516, "learning_rate": 9.882656094832863e-06, "loss": 0.75241251, "memory(GiB)": 34.88, "step": 17085, "train_speed(iter/s)": 0.41924 }, { "acc": 0.85311928, "epoch": 0.4627298080307584, "grad_norm": 10.362931251525879, "learning_rate": 9.882535541771348e-06, "loss": 0.77228899, "memory(GiB)": 34.88, "step": 17090, "train_speed(iter/s)": 0.419245 }, { "acc": 0.83531742, "epoch": 0.46286518831397394, "grad_norm": 7.344670295715332, "learning_rate": 9.882414927552747e-06, "loss": 0.86687717, "memory(GiB)": 34.88, "step": 17095, "train_speed(iter/s)": 0.419249 }, { "acc": 0.86488438, "epoch": 0.4630005685971895, "grad_norm": 11.70888900756836, "learning_rate": 9.882294252178567e-06, "loss": 0.71859674, "memory(GiB)": 34.88, "step": 17100, "train_speed(iter/s)": 0.419254 }, { "acc": 0.88999538, "epoch": 0.46313594888040505, "grad_norm": 9.810014724731445, "learning_rate": 9.88217351565032e-06, "loss": 0.49335222, "memory(GiB)": 34.88, "step": 17105, "train_speed(iter/s)": 0.419259 }, { "acc": 0.86114826, "epoch": 0.4632713291636206, "grad_norm": 12.054177284240723, "learning_rate": 9.882052717969523e-06, "loss": 0.71331601, "memory(GiB)": 34.88, "step": 17110, "train_speed(iter/s)": 0.419263 }, { "acc": 0.86972809, "epoch": 0.46340670944683615, "grad_norm": 10.231781005859375, "learning_rate": 9.881931859137682e-06, "loss": 0.69964609, "memory(GiB)": 34.88, "step": 17115, "train_speed(iter/s)": 0.419268 }, { "acc": 0.84440575, "epoch": 0.46354208973005173, "grad_norm": 10.157248497009277, "learning_rate": 9.881810939156318e-06, "loss": 0.87875242, "memory(GiB)": 34.88, "step": 17120, "train_speed(iter/s)": 0.419272 }, { "acc": 0.8772398, "epoch": 0.46367747001326726, "grad_norm": 5.0647969245910645, "learning_rate": 9.881689958026942e-06, "loss": 0.55304289, "memory(GiB)": 34.88, "step": 17125, "train_speed(iter/s)": 0.419277 }, { "acc": 0.84807692, "epoch": 0.46381285029648284, "grad_norm": 9.235189437866211, "learning_rate": 9.881568915751071e-06, "loss": 0.77270718, "memory(GiB)": 34.88, "step": 17130, "train_speed(iter/s)": 0.419281 }, { "acc": 0.85834961, "epoch": 0.46394823057969836, "grad_norm": 5.68452787399292, "learning_rate": 9.881447812330221e-06, "loss": 0.68720808, "memory(GiB)": 34.88, "step": 17135, "train_speed(iter/s)": 0.419285 }, { "acc": 0.83676624, "epoch": 0.46408361086291394, "grad_norm": 12.834009170532227, "learning_rate": 9.881326647765906e-06, "loss": 0.90519533, "memory(GiB)": 34.88, "step": 17140, "train_speed(iter/s)": 0.419287 }, { "acc": 0.85245857, "epoch": 0.46421899114612947, "grad_norm": 7.048721790313721, "learning_rate": 9.881205422059648e-06, "loss": 0.73953533, "memory(GiB)": 34.88, "step": 17145, "train_speed(iter/s)": 0.419289 }, { "acc": 0.86505575, "epoch": 0.46435437142934505, "grad_norm": 8.779961585998535, "learning_rate": 9.881084135212964e-06, "loss": 0.73466749, "memory(GiB)": 34.88, "step": 17150, "train_speed(iter/s)": 0.419294 }, { "acc": 0.86096144, "epoch": 0.4644897517125606, "grad_norm": 28.6795654296875, "learning_rate": 9.880962787227375e-06, "loss": 0.76002116, "memory(GiB)": 34.88, "step": 17155, "train_speed(iter/s)": 0.419298 }, { "acc": 0.83989429, "epoch": 0.46462513199577615, "grad_norm": 7.5551981925964355, "learning_rate": 9.880841378104395e-06, "loss": 0.88049812, "memory(GiB)": 34.88, "step": 17160, "train_speed(iter/s)": 0.419301 }, { "acc": 0.84349823, "epoch": 0.4647605122789917, "grad_norm": 20.893430709838867, "learning_rate": 9.880719907845554e-06, "loss": 0.78301439, "memory(GiB)": 34.88, "step": 17165, "train_speed(iter/s)": 0.419305 }, { "acc": 0.84886913, "epoch": 0.46489589256220726, "grad_norm": 8.571855545043945, "learning_rate": 9.880598376452366e-06, "loss": 0.74805269, "memory(GiB)": 34.88, "step": 17170, "train_speed(iter/s)": 0.419309 }, { "acc": 0.87549925, "epoch": 0.4650312728454228, "grad_norm": 7.3442535400390625, "learning_rate": 9.880476783926359e-06, "loss": 0.61476192, "memory(GiB)": 34.88, "step": 17175, "train_speed(iter/s)": 0.419313 }, { "acc": 0.85084877, "epoch": 0.46516665312863836, "grad_norm": 12.139594078063965, "learning_rate": 9.88035513026905e-06, "loss": 0.90627823, "memory(GiB)": 34.88, "step": 17180, "train_speed(iter/s)": 0.419317 }, { "acc": 0.81748981, "epoch": 0.4653020334118539, "grad_norm": 8.596298217773438, "learning_rate": 9.88023341548197e-06, "loss": 1.0626276, "memory(GiB)": 34.88, "step": 17185, "train_speed(iter/s)": 0.419321 }, { "acc": 0.83624458, "epoch": 0.46543741369506947, "grad_norm": 8.930519104003906, "learning_rate": 9.880111639566639e-06, "loss": 0.91042652, "memory(GiB)": 34.88, "step": 17190, "train_speed(iter/s)": 0.419325 }, { "acc": 0.86108704, "epoch": 0.465572793978285, "grad_norm": 6.7584381103515625, "learning_rate": 9.879989802524583e-06, "loss": 0.76381264, "memory(GiB)": 34.88, "step": 17195, "train_speed(iter/s)": 0.419329 }, { "acc": 0.85050697, "epoch": 0.4657081742615006, "grad_norm": 7.705173015594482, "learning_rate": 9.879867904357332e-06, "loss": 0.77558818, "memory(GiB)": 34.88, "step": 17200, "train_speed(iter/s)": 0.419333 }, { "acc": 0.85796776, "epoch": 0.4658435545447161, "grad_norm": 11.094324111938477, "learning_rate": 9.879745945066407e-06, "loss": 0.70306988, "memory(GiB)": 34.88, "step": 17205, "train_speed(iter/s)": 0.419337 }, { "acc": 0.84496622, "epoch": 0.4659789348279317, "grad_norm": 6.954055309295654, "learning_rate": 9.87962392465334e-06, "loss": 0.71890731, "memory(GiB)": 34.88, "step": 17210, "train_speed(iter/s)": 0.419342 }, { "acc": 0.87959061, "epoch": 0.4661143151111472, "grad_norm": 8.618383407592773, "learning_rate": 9.879501843119659e-06, "loss": 0.65275488, "memory(GiB)": 34.88, "step": 17215, "train_speed(iter/s)": 0.419345 }, { "acc": 0.8677845, "epoch": 0.4662496953943628, "grad_norm": 11.732812881469727, "learning_rate": 9.87937970046689e-06, "loss": 0.65869393, "memory(GiB)": 34.88, "step": 17220, "train_speed(iter/s)": 0.419349 }, { "acc": 0.82485695, "epoch": 0.4663850756775783, "grad_norm": 7.153081893920898, "learning_rate": 9.879257496696569e-06, "loss": 0.92872353, "memory(GiB)": 34.88, "step": 17225, "train_speed(iter/s)": 0.419352 }, { "acc": 0.89466581, "epoch": 0.4665204559607939, "grad_norm": 7.327387809753418, "learning_rate": 9.879135231810225e-06, "loss": 0.58067932, "memory(GiB)": 34.88, "step": 17230, "train_speed(iter/s)": 0.419357 }, { "acc": 0.86526585, "epoch": 0.4666558362440094, "grad_norm": 10.88687515258789, "learning_rate": 9.879012905809383e-06, "loss": 0.69864635, "memory(GiB)": 34.88, "step": 17235, "train_speed(iter/s)": 0.419361 }, { "acc": 0.87985954, "epoch": 0.466791216527225, "grad_norm": 7.183938503265381, "learning_rate": 9.878890518695583e-06, "loss": 0.58483334, "memory(GiB)": 34.88, "step": 17240, "train_speed(iter/s)": 0.419362 }, { "acc": 0.88151283, "epoch": 0.4669265968104405, "grad_norm": 6.055560111999512, "learning_rate": 9.878768070470356e-06, "loss": 0.51801147, "memory(GiB)": 34.88, "step": 17245, "train_speed(iter/s)": 0.419366 }, { "acc": 0.84425726, "epoch": 0.4670619770936561, "grad_norm": 7.3453168869018555, "learning_rate": 9.878645561135237e-06, "loss": 0.76027808, "memory(GiB)": 34.88, "step": 17250, "train_speed(iter/s)": 0.419371 }, { "acc": 0.84229727, "epoch": 0.4671973573768716, "grad_norm": 10.05084228515625, "learning_rate": 9.878522990691757e-06, "loss": 0.81405058, "memory(GiB)": 34.88, "step": 17255, "train_speed(iter/s)": 0.419374 }, { "acc": 0.85990219, "epoch": 0.4673327376600872, "grad_norm": 19.678953170776367, "learning_rate": 9.878400359141455e-06, "loss": 0.65882072, "memory(GiB)": 34.88, "step": 17260, "train_speed(iter/s)": 0.419378 }, { "acc": 0.83237181, "epoch": 0.46746811794330273, "grad_norm": 8.900043487548828, "learning_rate": 9.878277666485864e-06, "loss": 0.85150881, "memory(GiB)": 34.88, "step": 17265, "train_speed(iter/s)": 0.419382 }, { "acc": 0.83936634, "epoch": 0.4676034982265183, "grad_norm": 5.778773307800293, "learning_rate": 9.878154912726526e-06, "loss": 0.80844383, "memory(GiB)": 34.88, "step": 17270, "train_speed(iter/s)": 0.419385 }, { "acc": 0.83461714, "epoch": 0.46773887850973384, "grad_norm": 7.743055820465088, "learning_rate": 9.878032097864974e-06, "loss": 0.90547228, "memory(GiB)": 34.88, "step": 17275, "train_speed(iter/s)": 0.419389 }, { "acc": 0.86878567, "epoch": 0.4678742587929494, "grad_norm": 6.453896999359131, "learning_rate": 9.877909221902748e-06, "loss": 0.60524554, "memory(GiB)": 34.88, "step": 17280, "train_speed(iter/s)": 0.419393 }, { "acc": 0.84823532, "epoch": 0.46800963907616494, "grad_norm": 9.401926040649414, "learning_rate": 9.877786284841387e-06, "loss": 0.846947, "memory(GiB)": 34.88, "step": 17285, "train_speed(iter/s)": 0.419397 }, { "acc": 0.84271965, "epoch": 0.4681450193593805, "grad_norm": 6.514658451080322, "learning_rate": 9.877663286682434e-06, "loss": 0.84745016, "memory(GiB)": 34.88, "step": 17290, "train_speed(iter/s)": 0.4194 }, { "acc": 0.85538349, "epoch": 0.46828039964259605, "grad_norm": 12.435771942138672, "learning_rate": 9.877540227427424e-06, "loss": 0.77967639, "memory(GiB)": 34.88, "step": 17295, "train_speed(iter/s)": 0.419405 }, { "acc": 0.82112103, "epoch": 0.4684157799258116, "grad_norm": 9.732022285461426, "learning_rate": 9.877417107077903e-06, "loss": 0.98228693, "memory(GiB)": 34.88, "step": 17300, "train_speed(iter/s)": 0.419408 }, { "acc": 0.85656128, "epoch": 0.46855116020902715, "grad_norm": 5.592350959777832, "learning_rate": 9.877293925635411e-06, "loss": 0.66608262, "memory(GiB)": 34.88, "step": 17305, "train_speed(iter/s)": 0.419413 }, { "acc": 0.84955807, "epoch": 0.46868654049224273, "grad_norm": 16.464197158813477, "learning_rate": 9.877170683101495e-06, "loss": 0.67091165, "memory(GiB)": 34.88, "step": 17310, "train_speed(iter/s)": 0.419417 }, { "acc": 0.85307713, "epoch": 0.46882192077545826, "grad_norm": 7.649644374847412, "learning_rate": 9.877047379477695e-06, "loss": 0.7926136, "memory(GiB)": 34.88, "step": 17315, "train_speed(iter/s)": 0.419421 }, { "acc": 0.85505333, "epoch": 0.46895730105867384, "grad_norm": 7.67596960067749, "learning_rate": 9.876924014765557e-06, "loss": 0.81525183, "memory(GiB)": 34.88, "step": 17320, "train_speed(iter/s)": 0.419424 }, { "acc": 0.86963158, "epoch": 0.46909268134188936, "grad_norm": 7.615268230438232, "learning_rate": 9.876800588966623e-06, "loss": 0.65910482, "memory(GiB)": 34.88, "step": 17325, "train_speed(iter/s)": 0.419429 }, { "acc": 0.87041779, "epoch": 0.46922806162510494, "grad_norm": 5.3433518409729, "learning_rate": 9.876677102082444e-06, "loss": 0.61092811, "memory(GiB)": 34.88, "step": 17330, "train_speed(iter/s)": 0.419433 }, { "acc": 0.85618038, "epoch": 0.46936344190832047, "grad_norm": 16.575830459594727, "learning_rate": 9.876553554114567e-06, "loss": 0.72287436, "memory(GiB)": 34.88, "step": 17335, "train_speed(iter/s)": 0.419437 }, { "acc": 0.86040697, "epoch": 0.46949882219153605, "grad_norm": 12.709665298461914, "learning_rate": 9.876429945064536e-06, "loss": 0.74603133, "memory(GiB)": 34.88, "step": 17340, "train_speed(iter/s)": 0.419442 }, { "acc": 0.85890217, "epoch": 0.46963420247475157, "grad_norm": 6.544720649719238, "learning_rate": 9.876306274933902e-06, "loss": 0.68223615, "memory(GiB)": 34.88, "step": 17345, "train_speed(iter/s)": 0.419446 }, { "acc": 0.8817955, "epoch": 0.46976958275796715, "grad_norm": 8.35822868347168, "learning_rate": 9.876182543724213e-06, "loss": 0.53742104, "memory(GiB)": 34.88, "step": 17350, "train_speed(iter/s)": 0.41945 }, { "acc": 0.87088814, "epoch": 0.4699049630411827, "grad_norm": 11.41092586517334, "learning_rate": 9.87605875143702e-06, "loss": 0.68369107, "memory(GiB)": 34.88, "step": 17355, "train_speed(iter/s)": 0.419455 }, { "acc": 0.85183678, "epoch": 0.47004034332439826, "grad_norm": 10.283082962036133, "learning_rate": 9.875934898073873e-06, "loss": 0.79117584, "memory(GiB)": 34.88, "step": 17360, "train_speed(iter/s)": 0.419458 }, { "acc": 0.84871368, "epoch": 0.4701757236076138, "grad_norm": 9.380682945251465, "learning_rate": 9.875810983636321e-06, "loss": 0.92074556, "memory(GiB)": 34.88, "step": 17365, "train_speed(iter/s)": 0.419462 }, { "acc": 0.8509079, "epoch": 0.47031110389082936, "grad_norm": 7.382125377655029, "learning_rate": 9.875687008125921e-06, "loss": 0.82559624, "memory(GiB)": 34.88, "step": 17370, "train_speed(iter/s)": 0.419467 }, { "acc": 0.85201588, "epoch": 0.4704464841740449, "grad_norm": 8.68907356262207, "learning_rate": 9.875562971544226e-06, "loss": 0.71978369, "memory(GiB)": 34.88, "step": 17375, "train_speed(iter/s)": 0.419471 }, { "acc": 0.8486475, "epoch": 0.47058186445726047, "grad_norm": 5.078396320343018, "learning_rate": 9.875438873892787e-06, "loss": 0.72770376, "memory(GiB)": 34.88, "step": 17380, "train_speed(iter/s)": 0.419475 }, { "acc": 0.85005236, "epoch": 0.470717244740476, "grad_norm": 4.121246337890625, "learning_rate": 9.875314715173158e-06, "loss": 0.72762718, "memory(GiB)": 34.88, "step": 17385, "train_speed(iter/s)": 0.419479 }, { "acc": 0.84191828, "epoch": 0.4708526250236916, "grad_norm": 10.899587631225586, "learning_rate": 9.875190495386897e-06, "loss": 0.84365578, "memory(GiB)": 34.88, "step": 17390, "train_speed(iter/s)": 0.419483 }, { "acc": 0.84833374, "epoch": 0.4709880053069071, "grad_norm": 6.891962051391602, "learning_rate": 9.875066214535556e-06, "loss": 0.7922718, "memory(GiB)": 34.88, "step": 17395, "train_speed(iter/s)": 0.419487 }, { "acc": 0.83216267, "epoch": 0.4711233855901227, "grad_norm": 17.434953689575195, "learning_rate": 9.874941872620698e-06, "loss": 0.80792732, "memory(GiB)": 34.88, "step": 17400, "train_speed(iter/s)": 0.419492 }, { "acc": 0.86011333, "epoch": 0.4712587658733382, "grad_norm": 6.4152350425720215, "learning_rate": 9.874817469643876e-06, "loss": 0.67129474, "memory(GiB)": 34.88, "step": 17405, "train_speed(iter/s)": 0.419495 }, { "acc": 0.84884939, "epoch": 0.4713941461565538, "grad_norm": 11.423596382141113, "learning_rate": 9.87469300560665e-06, "loss": 0.67028503, "memory(GiB)": 34.88, "step": 17410, "train_speed(iter/s)": 0.4195 }, { "acc": 0.86350269, "epoch": 0.4715295264397693, "grad_norm": 15.486968994140625, "learning_rate": 9.874568480510577e-06, "loss": 0.78118749, "memory(GiB)": 34.88, "step": 17415, "train_speed(iter/s)": 0.419505 }, { "acc": 0.84709854, "epoch": 0.4716649067229849, "grad_norm": 13.917011260986328, "learning_rate": 9.874443894357221e-06, "loss": 0.79878006, "memory(GiB)": 34.88, "step": 17420, "train_speed(iter/s)": 0.419509 }, { "acc": 0.84746456, "epoch": 0.4718002870062004, "grad_norm": 11.539769172668457, "learning_rate": 9.874319247148139e-06, "loss": 0.83235245, "memory(GiB)": 34.88, "step": 17425, "train_speed(iter/s)": 0.419513 }, { "acc": 0.85971651, "epoch": 0.471935667289416, "grad_norm": 8.330109596252441, "learning_rate": 9.874194538884895e-06, "loss": 0.76789312, "memory(GiB)": 34.88, "step": 17430, "train_speed(iter/s)": 0.419517 }, { "acc": 0.85002995, "epoch": 0.4720710475726315, "grad_norm": 4.060052871704102, "learning_rate": 9.87406976956905e-06, "loss": 0.78515267, "memory(GiB)": 34.88, "step": 17435, "train_speed(iter/s)": 0.419521 }, { "acc": 0.87565269, "epoch": 0.4722064278558471, "grad_norm": 9.105657577514648, "learning_rate": 9.873944939202166e-06, "loss": 0.60958757, "memory(GiB)": 34.88, "step": 17440, "train_speed(iter/s)": 0.419525 }, { "acc": 0.85554304, "epoch": 0.4723418081390626, "grad_norm": 10.743717193603516, "learning_rate": 9.873820047785808e-06, "loss": 0.65609245, "memory(GiB)": 34.88, "step": 17445, "train_speed(iter/s)": 0.419529 }, { "acc": 0.88094816, "epoch": 0.4724771884222782, "grad_norm": 26.197370529174805, "learning_rate": 9.873695095321542e-06, "loss": 0.65999742, "memory(GiB)": 34.88, "step": 17450, "train_speed(iter/s)": 0.419534 }, { "acc": 0.86611195, "epoch": 0.47261256870549373, "grad_norm": 7.53648567199707, "learning_rate": 9.87357008181093e-06, "loss": 0.62140646, "memory(GiB)": 34.88, "step": 17455, "train_speed(iter/s)": 0.419538 }, { "acc": 0.84957829, "epoch": 0.4727479489887093, "grad_norm": 11.884073257446289, "learning_rate": 9.87344500725554e-06, "loss": 0.84565439, "memory(GiB)": 34.88, "step": 17460, "train_speed(iter/s)": 0.419542 }, { "acc": 0.85567627, "epoch": 0.47288332927192483, "grad_norm": 14.751023292541504, "learning_rate": 9.873319871656938e-06, "loss": 0.70538425, "memory(GiB)": 34.88, "step": 17465, "train_speed(iter/s)": 0.419546 }, { "acc": 0.87032185, "epoch": 0.4730187095551404, "grad_norm": 6.813783645629883, "learning_rate": 9.873194675016694e-06, "loss": 0.65673046, "memory(GiB)": 34.88, "step": 17470, "train_speed(iter/s)": 0.41955 }, { "acc": 0.85167789, "epoch": 0.47315408983835594, "grad_norm": 5.790561199188232, "learning_rate": 9.873069417336373e-06, "loss": 0.7052866, "memory(GiB)": 34.88, "step": 17475, "train_speed(iter/s)": 0.419554 }, { "acc": 0.87090912, "epoch": 0.4732894701215715, "grad_norm": 7.521660804748535, "learning_rate": 9.872944098617546e-06, "loss": 0.71716781, "memory(GiB)": 34.88, "step": 17480, "train_speed(iter/s)": 0.419558 }, { "acc": 0.86922264, "epoch": 0.47342485040478705, "grad_norm": 6.195257663726807, "learning_rate": 9.872818718861782e-06, "loss": 0.61548605, "memory(GiB)": 34.88, "step": 17485, "train_speed(iter/s)": 0.419562 }, { "acc": 0.8712698, "epoch": 0.4735602306880026, "grad_norm": 9.05091381072998, "learning_rate": 9.872693278070653e-06, "loss": 0.66870136, "memory(GiB)": 34.88, "step": 17490, "train_speed(iter/s)": 0.419566 }, { "acc": 0.86774054, "epoch": 0.47369561097121815, "grad_norm": 8.865904808044434, "learning_rate": 9.872567776245727e-06, "loss": 0.55944071, "memory(GiB)": 34.88, "step": 17495, "train_speed(iter/s)": 0.419571 }, { "acc": 0.86347351, "epoch": 0.47383099125443373, "grad_norm": 8.121015548706055, "learning_rate": 9.872442213388577e-06, "loss": 0.7161397, "memory(GiB)": 34.88, "step": 17500, "train_speed(iter/s)": 0.419575 }, { "acc": 0.81586952, "epoch": 0.47396637153764926, "grad_norm": 6.850768089294434, "learning_rate": 9.872316589500781e-06, "loss": 1.00645313, "memory(GiB)": 34.88, "step": 17505, "train_speed(iter/s)": 0.419579 }, { "acc": 0.85543633, "epoch": 0.47410175182086484, "grad_norm": 8.782227516174316, "learning_rate": 9.872190904583909e-06, "loss": 0.79020524, "memory(GiB)": 34.88, "step": 17510, "train_speed(iter/s)": 0.419583 }, { "acc": 0.87469578, "epoch": 0.47423713210408036, "grad_norm": 6.657199859619141, "learning_rate": 9.872065158639534e-06, "loss": 0.66548123, "memory(GiB)": 34.88, "step": 17515, "train_speed(iter/s)": 0.419587 }, { "acc": 0.84923048, "epoch": 0.47437251238729594, "grad_norm": 8.703073501586914, "learning_rate": 9.871939351669234e-06, "loss": 0.77094536, "memory(GiB)": 34.88, "step": 17520, "train_speed(iter/s)": 0.419592 }, { "acc": 0.83541641, "epoch": 0.47450789267051147, "grad_norm": 6.269249439239502, "learning_rate": 9.871813483674585e-06, "loss": 0.88302193, "memory(GiB)": 34.88, "step": 17525, "train_speed(iter/s)": 0.419595 }, { "acc": 0.86763592, "epoch": 0.47464327295372705, "grad_norm": 7.384477138519287, "learning_rate": 9.87168755465716e-06, "loss": 0.66695309, "memory(GiB)": 34.88, "step": 17530, "train_speed(iter/s)": 0.4196 }, { "acc": 0.86727867, "epoch": 0.47477865323694257, "grad_norm": 11.929986953735352, "learning_rate": 9.871561564618541e-06, "loss": 0.56009326, "memory(GiB)": 34.88, "step": 17535, "train_speed(iter/s)": 0.419604 }, { "acc": 0.87365894, "epoch": 0.4749140335201581, "grad_norm": 16.44692039489746, "learning_rate": 9.871435513560302e-06, "loss": 0.61581774, "memory(GiB)": 34.88, "step": 17540, "train_speed(iter/s)": 0.419608 }, { "acc": 0.87071438, "epoch": 0.4750494138033737, "grad_norm": 17.748674392700195, "learning_rate": 9.871309401484025e-06, "loss": 0.60162516, "memory(GiB)": 34.88, "step": 17545, "train_speed(iter/s)": 0.419612 }, { "acc": 0.86587849, "epoch": 0.4751847940865892, "grad_norm": 10.168584823608398, "learning_rate": 9.87118322839129e-06, "loss": 0.68306541, "memory(GiB)": 34.88, "step": 17550, "train_speed(iter/s)": 0.419616 }, { "acc": 0.86011868, "epoch": 0.4753201743698048, "grad_norm": 4.221535682678223, "learning_rate": 9.871056994283675e-06, "loss": 0.76144714, "memory(GiB)": 34.88, "step": 17555, "train_speed(iter/s)": 0.41962 }, { "acc": 0.84942112, "epoch": 0.4754555546530203, "grad_norm": 4.643216609954834, "learning_rate": 9.870930699162764e-06, "loss": 0.79625835, "memory(GiB)": 34.88, "step": 17560, "train_speed(iter/s)": 0.419624 }, { "acc": 0.86720905, "epoch": 0.4755909349362359, "grad_norm": 9.361969947814941, "learning_rate": 9.870804343030139e-06, "loss": 0.68887577, "memory(GiB)": 34.88, "step": 17565, "train_speed(iter/s)": 0.419628 }, { "acc": 0.8544466, "epoch": 0.4757263152194514, "grad_norm": 8.142678260803223, "learning_rate": 9.870677925887382e-06, "loss": 0.69475956, "memory(GiB)": 34.88, "step": 17570, "train_speed(iter/s)": 0.419632 }, { "acc": 0.86987133, "epoch": 0.475861695502667, "grad_norm": 6.930013179779053, "learning_rate": 9.870551447736076e-06, "loss": 0.66986055, "memory(GiB)": 34.88, "step": 17575, "train_speed(iter/s)": 0.419637 }, { "acc": 0.86896029, "epoch": 0.4759970757858825, "grad_norm": 10.34201431274414, "learning_rate": 9.870424908577806e-06, "loss": 0.72646065, "memory(GiB)": 34.88, "step": 17580, "train_speed(iter/s)": 0.41964 }, { "acc": 0.84421482, "epoch": 0.4761324560690981, "grad_norm": 18.998109817504883, "learning_rate": 9.870298308414156e-06, "loss": 0.84390373, "memory(GiB)": 34.88, "step": 17585, "train_speed(iter/s)": 0.419644 }, { "acc": 0.85732384, "epoch": 0.4762678363523136, "grad_norm": 8.872379302978516, "learning_rate": 9.870171647246715e-06, "loss": 0.74229236, "memory(GiB)": 34.88, "step": 17590, "train_speed(iter/s)": 0.419649 }, { "acc": 0.87037783, "epoch": 0.4764032166355292, "grad_norm": 5.674380302429199, "learning_rate": 9.870044925077066e-06, "loss": 0.6827981, "memory(GiB)": 34.88, "step": 17595, "train_speed(iter/s)": 0.419653 }, { "acc": 0.85825014, "epoch": 0.47653859691874473, "grad_norm": 8.576272964477539, "learning_rate": 9.8699181419068e-06, "loss": 0.7483727, "memory(GiB)": 34.88, "step": 17600, "train_speed(iter/s)": 0.419657 }, { "acc": 0.85407896, "epoch": 0.4766739772019603, "grad_norm": 9.355534553527832, "learning_rate": 9.869791297737502e-06, "loss": 0.79084725, "memory(GiB)": 34.88, "step": 17605, "train_speed(iter/s)": 0.419661 }, { "acc": 0.85373726, "epoch": 0.47680935748517583, "grad_norm": 6.356855869293213, "learning_rate": 9.869664392570763e-06, "loss": 0.70196743, "memory(GiB)": 34.88, "step": 17610, "train_speed(iter/s)": 0.419665 }, { "acc": 0.8479002, "epoch": 0.4769447377683914, "grad_norm": 7.6345744132995605, "learning_rate": 9.869537426408172e-06, "loss": 0.74900475, "memory(GiB)": 34.88, "step": 17615, "train_speed(iter/s)": 0.419669 }, { "acc": 0.86895208, "epoch": 0.47708011805160694, "grad_norm": 10.106806755065918, "learning_rate": 9.86941039925132e-06, "loss": 0.85736332, "memory(GiB)": 34.88, "step": 17620, "train_speed(iter/s)": 0.419674 }, { "acc": 0.82439928, "epoch": 0.4772154983348225, "grad_norm": 16.62520980834961, "learning_rate": 9.869283311101797e-06, "loss": 0.90574341, "memory(GiB)": 34.88, "step": 17625, "train_speed(iter/s)": 0.419678 }, { "acc": 0.86582756, "epoch": 0.47735087861803804, "grad_norm": 6.287466049194336, "learning_rate": 9.869156161961197e-06, "loss": 0.67493815, "memory(GiB)": 34.88, "step": 17630, "train_speed(iter/s)": 0.419681 }, { "acc": 0.86323357, "epoch": 0.4774862589012536, "grad_norm": 8.615264892578125, "learning_rate": 9.86902895183111e-06, "loss": 0.73847914, "memory(GiB)": 34.88, "step": 17635, "train_speed(iter/s)": 0.419686 }, { "acc": 0.838974, "epoch": 0.47762163918446915, "grad_norm": 13.815069198608398, "learning_rate": 9.868901680713133e-06, "loss": 0.84141502, "memory(GiB)": 34.88, "step": 17640, "train_speed(iter/s)": 0.419689 }, { "acc": 0.87887344, "epoch": 0.47775701946768473, "grad_norm": 9.222084999084473, "learning_rate": 9.868774348608857e-06, "loss": 0.61925406, "memory(GiB)": 34.88, "step": 17645, "train_speed(iter/s)": 0.419693 }, { "acc": 0.85536327, "epoch": 0.47789239975090025, "grad_norm": 7.121380805969238, "learning_rate": 9.86864695551988e-06, "loss": 0.70353222, "memory(GiB)": 34.88, "step": 17650, "train_speed(iter/s)": 0.419697 }, { "acc": 0.84845963, "epoch": 0.47802778003411583, "grad_norm": 5.341585159301758, "learning_rate": 9.868519501447795e-06, "loss": 0.78366413, "memory(GiB)": 34.88, "step": 17655, "train_speed(iter/s)": 0.419701 }, { "acc": 0.84660587, "epoch": 0.47816316031733136, "grad_norm": 8.497039794921875, "learning_rate": 9.868391986394201e-06, "loss": 0.71409025, "memory(GiB)": 34.88, "step": 17660, "train_speed(iter/s)": 0.419705 }, { "acc": 0.83673592, "epoch": 0.47829854060054694, "grad_norm": 10.393622398376465, "learning_rate": 9.868264410360695e-06, "loss": 0.85325089, "memory(GiB)": 34.88, "step": 17665, "train_speed(iter/s)": 0.41971 }, { "acc": 0.83690548, "epoch": 0.47843392088376246, "grad_norm": 5.740002155303955, "learning_rate": 9.868136773348873e-06, "loss": 0.8038559, "memory(GiB)": 34.88, "step": 17670, "train_speed(iter/s)": 0.419713 }, { "acc": 0.85505447, "epoch": 0.47856930116697805, "grad_norm": 11.970423698425293, "learning_rate": 9.868009075360336e-06, "loss": 0.69125376, "memory(GiB)": 34.88, "step": 17675, "train_speed(iter/s)": 0.419716 }, { "acc": 0.88662205, "epoch": 0.47870468145019357, "grad_norm": 5.726503849029541, "learning_rate": 9.867881316396685e-06, "loss": 0.62725792, "memory(GiB)": 34.88, "step": 17680, "train_speed(iter/s)": 0.419721 }, { "acc": 0.82817526, "epoch": 0.47884006173340915, "grad_norm": 16.427698135375977, "learning_rate": 9.867753496459518e-06, "loss": 0.80537434, "memory(GiB)": 34.88, "step": 17685, "train_speed(iter/s)": 0.419724 }, { "acc": 0.86262207, "epoch": 0.4789754420166247, "grad_norm": 19.031850814819336, "learning_rate": 9.867625615550434e-06, "loss": 0.82507648, "memory(GiB)": 34.88, "step": 17690, "train_speed(iter/s)": 0.419728 }, { "acc": 0.8524868, "epoch": 0.47911082229984026, "grad_norm": 6.0144243240356445, "learning_rate": 9.867497673671041e-06, "loss": 0.68166747, "memory(GiB)": 34.88, "step": 17695, "train_speed(iter/s)": 0.419732 }, { "acc": 0.85574102, "epoch": 0.4792462025830558, "grad_norm": 9.097885131835938, "learning_rate": 9.867369670822937e-06, "loss": 0.83430586, "memory(GiB)": 34.88, "step": 17700, "train_speed(iter/s)": 0.419737 }, { "acc": 0.88155804, "epoch": 0.47938158286627136, "grad_norm": 14.272222518920898, "learning_rate": 9.867241607007728e-06, "loss": 0.57674265, "memory(GiB)": 34.88, "step": 17705, "train_speed(iter/s)": 0.41974 }, { "acc": 0.87432594, "epoch": 0.4795169631494869, "grad_norm": 4.5955095291137695, "learning_rate": 9.867113482227017e-06, "loss": 0.65768309, "memory(GiB)": 34.88, "step": 17710, "train_speed(iter/s)": 0.419744 }, { "acc": 0.86696701, "epoch": 0.47965234343270247, "grad_norm": 8.798728942871094, "learning_rate": 9.86698529648241e-06, "loss": 0.74465499, "memory(GiB)": 34.88, "step": 17715, "train_speed(iter/s)": 0.419748 }, { "acc": 0.8527338, "epoch": 0.479787723715918, "grad_norm": 5.944244384765625, "learning_rate": 9.86685704977551e-06, "loss": 0.72584991, "memory(GiB)": 34.88, "step": 17720, "train_speed(iter/s)": 0.419752 }, { "acc": 0.83805141, "epoch": 0.47992310399913357, "grad_norm": 9.788920402526855, "learning_rate": 9.866728742107927e-06, "loss": 0.89596643, "memory(GiB)": 34.88, "step": 17725, "train_speed(iter/s)": 0.419756 }, { "acc": 0.85784855, "epoch": 0.4800584842823491, "grad_norm": 6.043364524841309, "learning_rate": 9.866600373481266e-06, "loss": 0.79008741, "memory(GiB)": 34.88, "step": 17730, "train_speed(iter/s)": 0.41976 }, { "acc": 0.8415678, "epoch": 0.4801938645655647, "grad_norm": 8.234039306640625, "learning_rate": 9.866471943897137e-06, "loss": 0.82517366, "memory(GiB)": 34.88, "step": 17735, "train_speed(iter/s)": 0.419764 }, { "acc": 0.84460964, "epoch": 0.4803292448487802, "grad_norm": 5.93577241897583, "learning_rate": 9.866343453357146e-06, "loss": 0.74095879, "memory(GiB)": 34.88, "step": 17740, "train_speed(iter/s)": 0.419768 }, { "acc": 0.85779753, "epoch": 0.4804646251319958, "grad_norm": 7.732983112335205, "learning_rate": 9.866214901862907e-06, "loss": 0.84354534, "memory(GiB)": 34.88, "step": 17745, "train_speed(iter/s)": 0.419772 }, { "acc": 0.87509832, "epoch": 0.4806000054152113, "grad_norm": 13.197344779968262, "learning_rate": 9.866086289416025e-06, "loss": 0.63114271, "memory(GiB)": 34.88, "step": 17750, "train_speed(iter/s)": 0.419777 }, { "acc": 0.85058975, "epoch": 0.4807353856984269, "grad_norm": 11.006730079650879, "learning_rate": 9.865957616018115e-06, "loss": 0.73090191, "memory(GiB)": 34.88, "step": 17755, "train_speed(iter/s)": 0.41978 }, { "acc": 0.8661726, "epoch": 0.4808707659816424, "grad_norm": 11.606260299682617, "learning_rate": 9.865828881670789e-06, "loss": 0.68043251, "memory(GiB)": 34.88, "step": 17760, "train_speed(iter/s)": 0.419784 }, { "acc": 0.85149574, "epoch": 0.481006146264858, "grad_norm": 6.345789909362793, "learning_rate": 9.865700086375657e-06, "loss": 0.82098875, "memory(GiB)": 34.88, "step": 17765, "train_speed(iter/s)": 0.419788 }, { "acc": 0.86413002, "epoch": 0.4811415265480735, "grad_norm": 6.740135192871094, "learning_rate": 9.865571230134334e-06, "loss": 0.66190095, "memory(GiB)": 34.88, "step": 17770, "train_speed(iter/s)": 0.419792 }, { "acc": 0.87177992, "epoch": 0.4812769068312891, "grad_norm": 17.076963424682617, "learning_rate": 9.865442312948435e-06, "loss": 0.67054114, "memory(GiB)": 34.88, "step": 17775, "train_speed(iter/s)": 0.419796 }, { "acc": 0.85798054, "epoch": 0.4814122871145046, "grad_norm": 6.991623878479004, "learning_rate": 9.865313334819572e-06, "loss": 0.73097863, "memory(GiB)": 34.88, "step": 17780, "train_speed(iter/s)": 0.4198 }, { "acc": 0.84577751, "epoch": 0.4815476673977202, "grad_norm": 23.942590713500977, "learning_rate": 9.865184295749364e-06, "loss": 0.88631516, "memory(GiB)": 34.88, "step": 17785, "train_speed(iter/s)": 0.419803 }, { "acc": 0.87471848, "epoch": 0.4816830476809357, "grad_norm": 4.869440078735352, "learning_rate": 9.865055195739426e-06, "loss": 0.59227991, "memory(GiB)": 34.88, "step": 17790, "train_speed(iter/s)": 0.419807 }, { "acc": 0.83964443, "epoch": 0.4818184279641513, "grad_norm": 9.81409740447998, "learning_rate": 9.864926034791374e-06, "loss": 0.8465867, "memory(GiB)": 34.88, "step": 17795, "train_speed(iter/s)": 0.419811 }, { "acc": 0.82631311, "epoch": 0.48195380824736683, "grad_norm": 7.021780014038086, "learning_rate": 9.86479681290683e-06, "loss": 0.88234615, "memory(GiB)": 34.88, "step": 17800, "train_speed(iter/s)": 0.419815 }, { "acc": 0.85295258, "epoch": 0.4820891885305824, "grad_norm": 8.827539443969727, "learning_rate": 9.864667530087408e-06, "loss": 0.71927228, "memory(GiB)": 34.88, "step": 17805, "train_speed(iter/s)": 0.419818 }, { "acc": 0.83590527, "epoch": 0.48222456881379794, "grad_norm": 7.862199306488037, "learning_rate": 9.86453818633473e-06, "loss": 0.90104637, "memory(GiB)": 34.88, "step": 17810, "train_speed(iter/s)": 0.419821 }, { "acc": 0.84697037, "epoch": 0.4823599490970135, "grad_norm": 7.319089412689209, "learning_rate": 9.864408781650417e-06, "loss": 0.72751021, "memory(GiB)": 34.88, "step": 17815, "train_speed(iter/s)": 0.419825 }, { "acc": 0.87015114, "epoch": 0.48249532938022904, "grad_norm": 9.522855758666992, "learning_rate": 9.864279316036087e-06, "loss": 0.61932859, "memory(GiB)": 34.88, "step": 17820, "train_speed(iter/s)": 0.41983 }, { "acc": 0.85196972, "epoch": 0.4826307096634446, "grad_norm": 17.003366470336914, "learning_rate": 9.864149789493366e-06, "loss": 0.6879015, "memory(GiB)": 34.88, "step": 17825, "train_speed(iter/s)": 0.419834 }, { "acc": 0.87258816, "epoch": 0.48276608994666015, "grad_norm": 4.936910629272461, "learning_rate": 9.864020202023874e-06, "loss": 0.65516911, "memory(GiB)": 34.88, "step": 17830, "train_speed(iter/s)": 0.419838 }, { "acc": 0.86143665, "epoch": 0.48290147022987573, "grad_norm": 14.040456771850586, "learning_rate": 9.863890553629233e-06, "loss": 0.67188225, "memory(GiB)": 34.88, "step": 17835, "train_speed(iter/s)": 0.419842 }, { "acc": 0.88064032, "epoch": 0.48303685051309125, "grad_norm": 8.012097358703613, "learning_rate": 9.86376084431107e-06, "loss": 0.61507025, "memory(GiB)": 34.88, "step": 17840, "train_speed(iter/s)": 0.419846 }, { "acc": 0.86755247, "epoch": 0.48317223079630683, "grad_norm": 7.817777633666992, "learning_rate": 9.863631074071007e-06, "loss": 0.68157749, "memory(GiB)": 34.88, "step": 17845, "train_speed(iter/s)": 0.41985 }, { "acc": 0.85575581, "epoch": 0.48330761107952236, "grad_norm": 9.006749153137207, "learning_rate": 9.863501242910673e-06, "loss": 0.68797655, "memory(GiB)": 34.88, "step": 17850, "train_speed(iter/s)": 0.419854 }, { "acc": 0.84693279, "epoch": 0.48344299136273794, "grad_norm": 5.581990718841553, "learning_rate": 9.863371350831692e-06, "loss": 0.73148131, "memory(GiB)": 34.88, "step": 17855, "train_speed(iter/s)": 0.419858 }, { "acc": 0.87187004, "epoch": 0.48357837164595346, "grad_norm": 12.615009307861328, "learning_rate": 9.863241397835693e-06, "loss": 0.66551824, "memory(GiB)": 34.88, "step": 17860, "train_speed(iter/s)": 0.419862 }, { "acc": 0.81748934, "epoch": 0.48371375192916904, "grad_norm": 13.250532150268555, "learning_rate": 9.863111383924301e-06, "loss": 1.04957333, "memory(GiB)": 34.88, "step": 17865, "train_speed(iter/s)": 0.419866 }, { "acc": 0.86404572, "epoch": 0.48384913221238457, "grad_norm": 9.376349449157715, "learning_rate": 9.862981309099146e-06, "loss": 0.69095325, "memory(GiB)": 34.88, "step": 17870, "train_speed(iter/s)": 0.419869 }, { "acc": 0.86104431, "epoch": 0.48398451249560015, "grad_norm": 7.0289106369018555, "learning_rate": 9.86285117336186e-06, "loss": 0.73489876, "memory(GiB)": 34.88, "step": 17875, "train_speed(iter/s)": 0.419873 }, { "acc": 0.86379776, "epoch": 0.4841198927788157, "grad_norm": 6.495458126068115, "learning_rate": 9.86272097671407e-06, "loss": 0.60823402, "memory(GiB)": 34.88, "step": 17880, "train_speed(iter/s)": 0.419877 }, { "acc": 0.8381052, "epoch": 0.48425527306203126, "grad_norm": 17.43063735961914, "learning_rate": 9.862590719157409e-06, "loss": 0.85770082, "memory(GiB)": 34.88, "step": 17885, "train_speed(iter/s)": 0.419882 }, { "acc": 0.85110817, "epoch": 0.4843906533452468, "grad_norm": 16.000999450683594, "learning_rate": 9.862460400693506e-06, "loss": 0.74178886, "memory(GiB)": 34.88, "step": 17890, "train_speed(iter/s)": 0.419885 }, { "acc": 0.83999071, "epoch": 0.48452603362846236, "grad_norm": 8.89329719543457, "learning_rate": 9.862330021323996e-06, "loss": 0.83414078, "memory(GiB)": 34.88, "step": 17895, "train_speed(iter/s)": 0.419889 }, { "acc": 0.84978924, "epoch": 0.4846614139116779, "grad_norm": 7.916197776794434, "learning_rate": 9.862199581050511e-06, "loss": 0.75338898, "memory(GiB)": 34.88, "step": 17900, "train_speed(iter/s)": 0.419892 }, { "acc": 0.8348031, "epoch": 0.48479679419489347, "grad_norm": 7.573126792907715, "learning_rate": 9.862069079874687e-06, "loss": 0.84090996, "memory(GiB)": 34.88, "step": 17905, "train_speed(iter/s)": 0.419897 }, { "acc": 0.8571619, "epoch": 0.484932174478109, "grad_norm": 6.473740100860596, "learning_rate": 9.861938517798156e-06, "loss": 0.64140973, "memory(GiB)": 34.88, "step": 17910, "train_speed(iter/s)": 0.4199 }, { "acc": 0.87124672, "epoch": 0.48506755476132457, "grad_norm": 5.12910270690918, "learning_rate": 9.861807894822555e-06, "loss": 0.69997511, "memory(GiB)": 34.88, "step": 17915, "train_speed(iter/s)": 0.419904 }, { "acc": 0.87743912, "epoch": 0.4852029350445401, "grad_norm": 10.857758522033691, "learning_rate": 9.86167721094952e-06, "loss": 0.52315717, "memory(GiB)": 34.88, "step": 17920, "train_speed(iter/s)": 0.419908 }, { "acc": 0.82800922, "epoch": 0.4853383153277557, "grad_norm": 8.157116889953613, "learning_rate": 9.86154646618069e-06, "loss": 0.79094534, "memory(GiB)": 34.88, "step": 17925, "train_speed(iter/s)": 0.419912 }, { "acc": 0.86825361, "epoch": 0.4854736956109712, "grad_norm": 6.7937846183776855, "learning_rate": 9.861415660517698e-06, "loss": 0.66571884, "memory(GiB)": 34.88, "step": 17930, "train_speed(iter/s)": 0.419915 }, { "acc": 0.84628773, "epoch": 0.4856090758941868, "grad_norm": 3.0458920001983643, "learning_rate": 9.861284793962189e-06, "loss": 0.83264408, "memory(GiB)": 34.88, "step": 17935, "train_speed(iter/s)": 0.41992 }, { "acc": 0.85932808, "epoch": 0.4857444561774023, "grad_norm": 6.850589275360107, "learning_rate": 9.861153866515795e-06, "loss": 0.67975655, "memory(GiB)": 34.88, "step": 17940, "train_speed(iter/s)": 0.419924 }, { "acc": 0.83644161, "epoch": 0.4858798364606179, "grad_norm": 5.6941423416137695, "learning_rate": 9.861022878180163e-06, "loss": 0.73667355, "memory(GiB)": 34.88, "step": 17945, "train_speed(iter/s)": 0.419928 }, { "acc": 0.87200193, "epoch": 0.4860152167438334, "grad_norm": 7.009241580963135, "learning_rate": 9.860891828956932e-06, "loss": 0.60515785, "memory(GiB)": 34.88, "step": 17950, "train_speed(iter/s)": 0.419932 }, { "acc": 0.87376747, "epoch": 0.486150597027049, "grad_norm": 12.449165344238281, "learning_rate": 9.86076071884774e-06, "loss": 0.67089295, "memory(GiB)": 34.88, "step": 17955, "train_speed(iter/s)": 0.419936 }, { "acc": 0.86864204, "epoch": 0.4862859773102645, "grad_norm": 10.601043701171875, "learning_rate": 9.860629547854233e-06, "loss": 0.71907973, "memory(GiB)": 34.88, "step": 17960, "train_speed(iter/s)": 0.41994 }, { "acc": 0.83854742, "epoch": 0.4864213575934801, "grad_norm": 13.812369346618652, "learning_rate": 9.860498315978054e-06, "loss": 0.84198856, "memory(GiB)": 34.88, "step": 17965, "train_speed(iter/s)": 0.419943 }, { "acc": 0.86487532, "epoch": 0.4865567378766956, "grad_norm": 6.782001972198486, "learning_rate": 9.860367023220847e-06, "loss": 0.60050988, "memory(GiB)": 34.88, "step": 17970, "train_speed(iter/s)": 0.419946 }, { "acc": 0.84956064, "epoch": 0.4866921181599112, "grad_norm": 3.736668109893799, "learning_rate": 9.860235669584254e-06, "loss": 0.77571664, "memory(GiB)": 34.88, "step": 17975, "train_speed(iter/s)": 0.419949 }, { "acc": 0.86046734, "epoch": 0.4868274984431267, "grad_norm": 17.824216842651367, "learning_rate": 9.860104255069923e-06, "loss": 0.73186464, "memory(GiB)": 34.88, "step": 17980, "train_speed(iter/s)": 0.419953 }, { "acc": 0.83994722, "epoch": 0.4869628787263423, "grad_norm": 3.8107030391693115, "learning_rate": 9.859972779679498e-06, "loss": 0.8992487, "memory(GiB)": 34.88, "step": 17985, "train_speed(iter/s)": 0.419957 }, { "acc": 0.85108519, "epoch": 0.48709825900955783, "grad_norm": 7.1871747970581055, "learning_rate": 9.85984124341463e-06, "loss": 0.76445999, "memory(GiB)": 34.88, "step": 17990, "train_speed(iter/s)": 0.419961 }, { "acc": 0.89520054, "epoch": 0.4872336392927734, "grad_norm": 4.661371231079102, "learning_rate": 9.859709646276967e-06, "loss": 0.54551692, "memory(GiB)": 34.88, "step": 17995, "train_speed(iter/s)": 0.419966 }, { "acc": 0.83519707, "epoch": 0.48736901957598894, "grad_norm": 8.9371976852417, "learning_rate": 9.85957798826815e-06, "loss": 0.82254457, "memory(GiB)": 34.88, "step": 18000, "train_speed(iter/s)": 0.419969 }, { "acc": 0.83759298, "epoch": 0.4875043998592045, "grad_norm": 8.746129035949707, "learning_rate": 9.859446269389833e-06, "loss": 0.94888039, "memory(GiB)": 34.88, "step": 18005, "train_speed(iter/s)": 0.419973 }, { "acc": 0.84961586, "epoch": 0.48763978014242004, "grad_norm": 7.700508117675781, "learning_rate": 9.859314489643669e-06, "loss": 0.72191939, "memory(GiB)": 34.88, "step": 18010, "train_speed(iter/s)": 0.419977 }, { "acc": 0.82418242, "epoch": 0.4877751604256356, "grad_norm": 7.48177433013916, "learning_rate": 9.859182649031304e-06, "loss": 0.85545578, "memory(GiB)": 34.88, "step": 18015, "train_speed(iter/s)": 0.41998 }, { "acc": 0.83751163, "epoch": 0.48791054070885115, "grad_norm": 7.74905252456665, "learning_rate": 9.859050747554393e-06, "loss": 0.68575306, "memory(GiB)": 34.88, "step": 18020, "train_speed(iter/s)": 0.419985 }, { "acc": 0.87096481, "epoch": 0.48804592099206673, "grad_norm": 10.902579307556152, "learning_rate": 9.858918785214584e-06, "loss": 0.66797643, "memory(GiB)": 34.88, "step": 18025, "train_speed(iter/s)": 0.419989 }, { "acc": 0.86733131, "epoch": 0.48818130127528225, "grad_norm": 10.287949562072754, "learning_rate": 9.858786762013538e-06, "loss": 0.68042946, "memory(GiB)": 34.88, "step": 18030, "train_speed(iter/s)": 0.419993 }, { "acc": 0.83884964, "epoch": 0.48831668155849783, "grad_norm": 12.084737777709961, "learning_rate": 9.858654677952901e-06, "loss": 0.85236111, "memory(GiB)": 34.88, "step": 18035, "train_speed(iter/s)": 0.419996 }, { "acc": 0.85469055, "epoch": 0.48845206184171336, "grad_norm": 7.869819641113281, "learning_rate": 9.858522533034328e-06, "loss": 0.74392214, "memory(GiB)": 34.88, "step": 18040, "train_speed(iter/s)": 0.42 }, { "acc": 0.82939491, "epoch": 0.48858744212492894, "grad_norm": 12.071586608886719, "learning_rate": 9.85839032725948e-06, "loss": 1.04051037, "memory(GiB)": 34.88, "step": 18045, "train_speed(iter/s)": 0.420004 }, { "acc": 0.9016777, "epoch": 0.48872282240814446, "grad_norm": 6.0110931396484375, "learning_rate": 9.858258060630009e-06, "loss": 0.446979, "memory(GiB)": 34.88, "step": 18050, "train_speed(iter/s)": 0.420008 }, { "acc": 0.86402779, "epoch": 0.48885820269136004, "grad_norm": 7.604532241821289, "learning_rate": 9.858125733147572e-06, "loss": 0.72403226, "memory(GiB)": 34.88, "step": 18055, "train_speed(iter/s)": 0.420012 }, { "acc": 0.86002407, "epoch": 0.48899358297457557, "grad_norm": 12.88875675201416, "learning_rate": 9.85799334481383e-06, "loss": 0.69855156, "memory(GiB)": 34.88, "step": 18060, "train_speed(iter/s)": 0.420016 }, { "acc": 0.84356346, "epoch": 0.48912896325779115, "grad_norm": 8.359898567199707, "learning_rate": 9.857860895630438e-06, "loss": 0.81688643, "memory(GiB)": 34.88, "step": 18065, "train_speed(iter/s)": 0.42002 }, { "acc": 0.86543694, "epoch": 0.4892643435410067, "grad_norm": 10.403951644897461, "learning_rate": 9.857728385599054e-06, "loss": 0.65001135, "memory(GiB)": 34.88, "step": 18070, "train_speed(iter/s)": 0.420024 }, { "acc": 0.86020899, "epoch": 0.48939972382422225, "grad_norm": 10.020648002624512, "learning_rate": 9.85759581472134e-06, "loss": 0.61225939, "memory(GiB)": 34.88, "step": 18075, "train_speed(iter/s)": 0.420027 }, { "acc": 0.86833458, "epoch": 0.4895351041074378, "grad_norm": 10.101728439331055, "learning_rate": 9.85746318299896e-06, "loss": 0.66686602, "memory(GiB)": 34.88, "step": 18080, "train_speed(iter/s)": 0.420031 }, { "acc": 0.86068087, "epoch": 0.48967048439065336, "grad_norm": 25.332304000854492, "learning_rate": 9.85733049043357e-06, "loss": 0.71592751, "memory(GiB)": 34.88, "step": 18085, "train_speed(iter/s)": 0.420034 }, { "acc": 0.85072441, "epoch": 0.4898058646738689, "grad_norm": 15.469481468200684, "learning_rate": 9.857197737026833e-06, "loss": 0.74387784, "memory(GiB)": 34.88, "step": 18090, "train_speed(iter/s)": 0.420038 }, { "acc": 0.85309696, "epoch": 0.48994124495708447, "grad_norm": 7.648516654968262, "learning_rate": 9.857064922780418e-06, "loss": 0.72057848, "memory(GiB)": 34.88, "step": 18095, "train_speed(iter/s)": 0.420043 }, { "acc": 0.87790251, "epoch": 0.4900766252403, "grad_norm": 7.925242900848389, "learning_rate": 9.85693204769598e-06, "loss": 0.60959883, "memory(GiB)": 34.88, "step": 18100, "train_speed(iter/s)": 0.420047 }, { "acc": 0.86002884, "epoch": 0.49021200552351557, "grad_norm": 6.764688014984131, "learning_rate": 9.856799111775189e-06, "loss": 0.74380345, "memory(GiB)": 34.88, "step": 18105, "train_speed(iter/s)": 0.420051 }, { "acc": 0.87987051, "epoch": 0.4903473858067311, "grad_norm": 11.957676887512207, "learning_rate": 9.856666115019709e-06, "loss": 0.59217939, "memory(GiB)": 34.88, "step": 18110, "train_speed(iter/s)": 0.420055 }, { "acc": 0.89139652, "epoch": 0.4904827660899467, "grad_norm": 6.189092636108398, "learning_rate": 9.856533057431205e-06, "loss": 0.58822446, "memory(GiB)": 34.88, "step": 18115, "train_speed(iter/s)": 0.420058 }, { "acc": 0.86262264, "epoch": 0.4906181463731622, "grad_norm": 5.231683731079102, "learning_rate": 9.856399939011347e-06, "loss": 0.6873445, "memory(GiB)": 34.88, "step": 18120, "train_speed(iter/s)": 0.420062 }, { "acc": 0.84565315, "epoch": 0.4907535266563778, "grad_norm": 9.557893753051758, "learning_rate": 9.856266759761799e-06, "loss": 0.77927132, "memory(GiB)": 34.88, "step": 18125, "train_speed(iter/s)": 0.420066 }, { "acc": 0.85859461, "epoch": 0.4908889069395933, "grad_norm": 5.622358322143555, "learning_rate": 9.856133519684231e-06, "loss": 0.74292407, "memory(GiB)": 34.88, "step": 18130, "train_speed(iter/s)": 0.42007 }, { "acc": 0.8543047, "epoch": 0.4910242872228089, "grad_norm": 8.202179908752441, "learning_rate": 9.856000218780311e-06, "loss": 0.74297919, "memory(GiB)": 34.88, "step": 18135, "train_speed(iter/s)": 0.420074 }, { "acc": 0.85966177, "epoch": 0.4911596675060244, "grad_norm": 6.105876922607422, "learning_rate": 9.85586685705171e-06, "loss": 0.75183105, "memory(GiB)": 34.88, "step": 18140, "train_speed(iter/s)": 0.420078 }, { "acc": 0.86205215, "epoch": 0.49129504778924, "grad_norm": 6.8934760093688965, "learning_rate": 9.8557334345001e-06, "loss": 0.67456021, "memory(GiB)": 34.88, "step": 18145, "train_speed(iter/s)": 0.420081 }, { "acc": 0.86203442, "epoch": 0.4914304280724555, "grad_norm": 6.816761016845703, "learning_rate": 9.85559995112715e-06, "loss": 0.71825247, "memory(GiB)": 34.88, "step": 18150, "train_speed(iter/s)": 0.420085 }, { "acc": 0.86872959, "epoch": 0.4915658083556711, "grad_norm": 7.7375030517578125, "learning_rate": 9.855466406934534e-06, "loss": 0.62825041, "memory(GiB)": 34.88, "step": 18155, "train_speed(iter/s)": 0.420088 }, { "acc": 0.84582968, "epoch": 0.4917011886388866, "grad_norm": 8.150115013122559, "learning_rate": 9.855332801923923e-06, "loss": 0.76431856, "memory(GiB)": 34.88, "step": 18160, "train_speed(iter/s)": 0.420092 }, { "acc": 0.86924801, "epoch": 0.4918365689221022, "grad_norm": 29.75484275817871, "learning_rate": 9.85519913609699e-06, "loss": 0.72303476, "memory(GiB)": 34.88, "step": 18165, "train_speed(iter/s)": 0.420096 }, { "acc": 0.8733408, "epoch": 0.4919719492053177, "grad_norm": 9.349445343017578, "learning_rate": 9.855065409455412e-06, "loss": 0.63800459, "memory(GiB)": 34.88, "step": 18170, "train_speed(iter/s)": 0.4201 }, { "acc": 0.84744215, "epoch": 0.4921073294885333, "grad_norm": 13.645318984985352, "learning_rate": 9.854931622000863e-06, "loss": 0.80647793, "memory(GiB)": 34.88, "step": 18175, "train_speed(iter/s)": 0.420104 }, { "acc": 0.86692486, "epoch": 0.49224270977174883, "grad_norm": 4.036535739898682, "learning_rate": 9.854797773735018e-06, "loss": 0.63468699, "memory(GiB)": 34.88, "step": 18180, "train_speed(iter/s)": 0.420107 }, { "acc": 0.88254623, "epoch": 0.4923780900549644, "grad_norm": 10.454529762268066, "learning_rate": 9.854663864659557e-06, "loss": 0.50613422, "memory(GiB)": 34.88, "step": 18185, "train_speed(iter/s)": 0.420112 }, { "acc": 0.87059364, "epoch": 0.49251347033817994, "grad_norm": 22.995994567871094, "learning_rate": 9.854529894776154e-06, "loss": 0.66975574, "memory(GiB)": 34.88, "step": 18190, "train_speed(iter/s)": 0.420115 }, { "acc": 0.8551815, "epoch": 0.4926488506213955, "grad_norm": 6.971369743347168, "learning_rate": 9.854395864086487e-06, "loss": 0.69455166, "memory(GiB)": 34.88, "step": 18195, "train_speed(iter/s)": 0.420119 }, { "acc": 0.84495392, "epoch": 0.49278423090461104, "grad_norm": 17.806203842163086, "learning_rate": 9.854261772592237e-06, "loss": 0.7969367, "memory(GiB)": 34.88, "step": 18200, "train_speed(iter/s)": 0.420124 }, { "acc": 0.84320898, "epoch": 0.4929196111878266, "grad_norm": 7.849282741546631, "learning_rate": 9.854127620295085e-06, "loss": 0.84595642, "memory(GiB)": 34.88, "step": 18205, "train_speed(iter/s)": 0.420128 }, { "acc": 0.84544125, "epoch": 0.49305499147104215, "grad_norm": 10.738306045532227, "learning_rate": 9.853993407196708e-06, "loss": 0.81422663, "memory(GiB)": 34.88, "step": 18210, "train_speed(iter/s)": 0.420131 }, { "acc": 0.85708551, "epoch": 0.49319037175425773, "grad_norm": 5.271306991577148, "learning_rate": 9.853859133298788e-06, "loss": 0.7000598, "memory(GiB)": 34.88, "step": 18215, "train_speed(iter/s)": 0.420134 }, { "acc": 0.81944666, "epoch": 0.49332575203747325, "grad_norm": 9.064322471618652, "learning_rate": 9.853724798603011e-06, "loss": 0.99060364, "memory(GiB)": 34.88, "step": 18220, "train_speed(iter/s)": 0.420138 }, { "acc": 0.81371536, "epoch": 0.49346113232068883, "grad_norm": 13.39400577545166, "learning_rate": 9.853590403111054e-06, "loss": 1.08978615, "memory(GiB)": 34.88, "step": 18225, "train_speed(iter/s)": 0.420141 }, { "acc": 0.83933449, "epoch": 0.49359651260390436, "grad_norm": 14.653223991394043, "learning_rate": 9.853455946824604e-06, "loss": 0.82497721, "memory(GiB)": 34.88, "step": 18230, "train_speed(iter/s)": 0.420145 }, { "acc": 0.8760128, "epoch": 0.49373189288711994, "grad_norm": 12.609265327453613, "learning_rate": 9.853321429745346e-06, "loss": 0.7091785, "memory(GiB)": 34.88, "step": 18235, "train_speed(iter/s)": 0.420149 }, { "acc": 0.86820927, "epoch": 0.49386727317033546, "grad_norm": 7.713386058807373, "learning_rate": 9.853186851874965e-06, "loss": 0.74330397, "memory(GiB)": 34.88, "step": 18240, "train_speed(iter/s)": 0.420153 }, { "acc": 0.88127403, "epoch": 0.49400265345355104, "grad_norm": 6.543214797973633, "learning_rate": 9.853052213215143e-06, "loss": 0.57802773, "memory(GiB)": 34.88, "step": 18245, "train_speed(iter/s)": 0.420157 }, { "acc": 0.8650794, "epoch": 0.49413803373676657, "grad_norm": 7.262876510620117, "learning_rate": 9.85291751376757e-06, "loss": 0.67854333, "memory(GiB)": 34.88, "step": 18250, "train_speed(iter/s)": 0.420161 }, { "acc": 0.82802458, "epoch": 0.49427341401998215, "grad_norm": 10.763872146606445, "learning_rate": 9.852782753533933e-06, "loss": 0.91427898, "memory(GiB)": 34.88, "step": 18255, "train_speed(iter/s)": 0.420164 }, { "acc": 0.82684317, "epoch": 0.4944087943031977, "grad_norm": 15.618874549865723, "learning_rate": 9.85264793251592e-06, "loss": 0.86017208, "memory(GiB)": 34.88, "step": 18260, "train_speed(iter/s)": 0.420168 }, { "acc": 0.83224936, "epoch": 0.49454417458641325, "grad_norm": 15.896154403686523, "learning_rate": 9.852513050715218e-06, "loss": 0.91623268, "memory(GiB)": 34.88, "step": 18265, "train_speed(iter/s)": 0.420172 }, { "acc": 0.85365934, "epoch": 0.4946795548696288, "grad_norm": 16.612577438354492, "learning_rate": 9.852378108133521e-06, "loss": 0.78589754, "memory(GiB)": 34.88, "step": 18270, "train_speed(iter/s)": 0.420174 }, { "acc": 0.86020308, "epoch": 0.49481493515284436, "grad_norm": 7.293888568878174, "learning_rate": 9.852243104772515e-06, "loss": 0.67613707, "memory(GiB)": 34.88, "step": 18275, "train_speed(iter/s)": 0.420178 }, { "acc": 0.86679382, "epoch": 0.4949503154360599, "grad_norm": 25.0267391204834, "learning_rate": 9.852108040633894e-06, "loss": 0.68192558, "memory(GiB)": 34.88, "step": 18280, "train_speed(iter/s)": 0.420181 }, { "acc": 0.88357716, "epoch": 0.49508569571927546, "grad_norm": 12.242366790771484, "learning_rate": 9.851972915719347e-06, "loss": 0.5314044, "memory(GiB)": 34.88, "step": 18285, "train_speed(iter/s)": 0.420185 }, { "acc": 0.86732197, "epoch": 0.495221076002491, "grad_norm": 8.584891319274902, "learning_rate": 9.85183773003057e-06, "loss": 0.62120686, "memory(GiB)": 34.88, "step": 18290, "train_speed(iter/s)": 0.420189 }, { "acc": 0.85800447, "epoch": 0.49535645628570657, "grad_norm": 6.7103776931762695, "learning_rate": 9.851702483569256e-06, "loss": 0.68601627, "memory(GiB)": 34.88, "step": 18295, "train_speed(iter/s)": 0.420193 }, { "acc": 0.83583393, "epoch": 0.4954918365689221, "grad_norm": 12.679923057556152, "learning_rate": 9.851567176337096e-06, "loss": 0.85411091, "memory(GiB)": 34.88, "step": 18300, "train_speed(iter/s)": 0.420197 }, { "acc": 0.86757574, "epoch": 0.4956272168521377, "grad_norm": 4.90451717376709, "learning_rate": 9.851431808335788e-06, "loss": 0.65695424, "memory(GiB)": 34.88, "step": 18305, "train_speed(iter/s)": 0.4202 }, { "acc": 0.88275585, "epoch": 0.4957625971353532, "grad_norm": 7.686281681060791, "learning_rate": 9.85129637956703e-06, "loss": 0.56064482, "memory(GiB)": 34.88, "step": 18310, "train_speed(iter/s)": 0.420204 }, { "acc": 0.85553064, "epoch": 0.4958979774185688, "grad_norm": 15.271509170532227, "learning_rate": 9.85116089003251e-06, "loss": 0.71720533, "memory(GiB)": 34.88, "step": 18315, "train_speed(iter/s)": 0.420207 }, { "acc": 0.85732937, "epoch": 0.4960333577017843, "grad_norm": 11.599699974060059, "learning_rate": 9.851025339733935e-06, "loss": 0.82087965, "memory(GiB)": 34.88, "step": 18320, "train_speed(iter/s)": 0.420211 }, { "acc": 0.85275145, "epoch": 0.4961687379849999, "grad_norm": 27.458438873291016, "learning_rate": 9.850889728672999e-06, "loss": 0.75549464, "memory(GiB)": 34.88, "step": 18325, "train_speed(iter/s)": 0.420215 }, { "acc": 0.85629501, "epoch": 0.4963041182682154, "grad_norm": 9.690463066101074, "learning_rate": 9.850754056851398e-06, "loss": 0.76324167, "memory(GiB)": 34.88, "step": 18330, "train_speed(iter/s)": 0.420219 }, { "acc": 0.86893435, "epoch": 0.496439498551431, "grad_norm": 13.44332504272461, "learning_rate": 9.850618324270836e-06, "loss": 0.64911437, "memory(GiB)": 34.88, "step": 18335, "train_speed(iter/s)": 0.420223 }, { "acc": 0.86835375, "epoch": 0.4965748788346465, "grad_norm": 8.417776107788086, "learning_rate": 9.850482530933012e-06, "loss": 0.65673738, "memory(GiB)": 34.88, "step": 18340, "train_speed(iter/s)": 0.420227 }, { "acc": 0.84028635, "epoch": 0.4967102591178621, "grad_norm": 7.073190212249756, "learning_rate": 9.850346676839626e-06, "loss": 0.81522217, "memory(GiB)": 34.88, "step": 18345, "train_speed(iter/s)": 0.42023 }, { "acc": 0.85775709, "epoch": 0.4968456394010776, "grad_norm": 6.301894664764404, "learning_rate": 9.85021076199238e-06, "loss": 0.77288079, "memory(GiB)": 34.88, "step": 18350, "train_speed(iter/s)": 0.420234 }, { "acc": 0.86041536, "epoch": 0.4969810196842932, "grad_norm": 5.531222820281982, "learning_rate": 9.850074786392976e-06, "loss": 0.7064661, "memory(GiB)": 34.88, "step": 18355, "train_speed(iter/s)": 0.420237 }, { "acc": 0.82673006, "epoch": 0.4971163999675087, "grad_norm": 8.379087448120117, "learning_rate": 9.84993875004312e-06, "loss": 0.86410599, "memory(GiB)": 34.88, "step": 18360, "train_speed(iter/s)": 0.420241 }, { "acc": 0.84055405, "epoch": 0.4972517802507243, "grad_norm": 16.677988052368164, "learning_rate": 9.849802652944513e-06, "loss": 0.91651831, "memory(GiB)": 34.88, "step": 18365, "train_speed(iter/s)": 0.420244 }, { "acc": 0.86510792, "epoch": 0.49738716053393983, "grad_norm": 10.017643928527832, "learning_rate": 9.849666495098862e-06, "loss": 0.72811685, "memory(GiB)": 34.88, "step": 18370, "train_speed(iter/s)": 0.420248 }, { "acc": 0.87510529, "epoch": 0.4975225408171554, "grad_norm": 8.214746475219727, "learning_rate": 9.849530276507873e-06, "loss": 0.52723083, "memory(GiB)": 34.88, "step": 18375, "train_speed(iter/s)": 0.420251 }, { "acc": 0.87500629, "epoch": 0.49765792110037094, "grad_norm": 4.356014251708984, "learning_rate": 9.849393997173252e-06, "loss": 0.56533461, "memory(GiB)": 34.88, "step": 18380, "train_speed(iter/s)": 0.420255 }, { "acc": 0.86445293, "epoch": 0.4977933013835865, "grad_norm": 25.727252960205078, "learning_rate": 9.849257657096707e-06, "loss": 0.82588434, "memory(GiB)": 34.88, "step": 18385, "train_speed(iter/s)": 0.420259 }, { "acc": 0.85876637, "epoch": 0.49792868166680204, "grad_norm": 11.195433616638184, "learning_rate": 9.84912125627994e-06, "loss": 0.70838814, "memory(GiB)": 34.88, "step": 18390, "train_speed(iter/s)": 0.420262 }, { "acc": 0.86624937, "epoch": 0.4980640619500176, "grad_norm": 5.673129081726074, "learning_rate": 9.848984794724666e-06, "loss": 0.6577282, "memory(GiB)": 34.88, "step": 18395, "train_speed(iter/s)": 0.420266 }, { "acc": 0.8624342, "epoch": 0.49819944223323315, "grad_norm": 19.362394332885742, "learning_rate": 9.848848272432594e-06, "loss": 0.70339842, "memory(GiB)": 34.88, "step": 18400, "train_speed(iter/s)": 0.42027 }, { "acc": 0.86414728, "epoch": 0.4983348225164487, "grad_norm": 5.87000846862793, "learning_rate": 9.848711689405433e-06, "loss": 0.69421186, "memory(GiB)": 34.88, "step": 18405, "train_speed(iter/s)": 0.420274 }, { "acc": 0.83806295, "epoch": 0.49847020279966425, "grad_norm": 7.011607646942139, "learning_rate": 9.848575045644892e-06, "loss": 0.80919065, "memory(GiB)": 34.88, "step": 18410, "train_speed(iter/s)": 0.420278 }, { "acc": 0.88663425, "epoch": 0.49860558308287983, "grad_norm": 5.336743354797363, "learning_rate": 9.848438341152685e-06, "loss": 0.58381529, "memory(GiB)": 34.88, "step": 18415, "train_speed(iter/s)": 0.420282 }, { "acc": 0.87787018, "epoch": 0.49874096336609536, "grad_norm": 17.507484436035156, "learning_rate": 9.848301575930526e-06, "loss": 0.59754639, "memory(GiB)": 34.88, "step": 18420, "train_speed(iter/s)": 0.420286 }, { "acc": 0.88565941, "epoch": 0.49887634364931094, "grad_norm": 5.129770278930664, "learning_rate": 9.848164749980124e-06, "loss": 0.54213915, "memory(GiB)": 34.88, "step": 18425, "train_speed(iter/s)": 0.42029 }, { "acc": 0.82462711, "epoch": 0.49901172393252646, "grad_norm": 8.876436233520508, "learning_rate": 9.848027863303197e-06, "loss": 0.9623745, "memory(GiB)": 34.88, "step": 18430, "train_speed(iter/s)": 0.420293 }, { "acc": 0.83691425, "epoch": 0.49914710421574204, "grad_norm": 5.908947944641113, "learning_rate": 9.847890915901456e-06, "loss": 0.82855949, "memory(GiB)": 34.88, "step": 18435, "train_speed(iter/s)": 0.420297 }, { "acc": 0.84994602, "epoch": 0.49928248449895757, "grad_norm": 9.991643905639648, "learning_rate": 9.84775390777662e-06, "loss": 0.70888462, "memory(GiB)": 34.88, "step": 18440, "train_speed(iter/s)": 0.420298 }, { "acc": 0.84353724, "epoch": 0.49941786478217315, "grad_norm": 9.237373352050781, "learning_rate": 9.847616838930405e-06, "loss": 0.73360815, "memory(GiB)": 34.88, "step": 18445, "train_speed(iter/s)": 0.420302 }, { "acc": 0.84204426, "epoch": 0.4995532450653887, "grad_norm": 7.996246814727783, "learning_rate": 9.847479709364526e-06, "loss": 0.73458495, "memory(GiB)": 34.88, "step": 18450, "train_speed(iter/s)": 0.420302 }, { "acc": 0.87074738, "epoch": 0.49968862534860425, "grad_norm": 7.08854341506958, "learning_rate": 9.847342519080702e-06, "loss": 0.7111414, "memory(GiB)": 34.88, "step": 18455, "train_speed(iter/s)": 0.420303 }, { "acc": 0.8628746, "epoch": 0.4998240056318198, "grad_norm": 18.476972579956055, "learning_rate": 9.847205268080652e-06, "loss": 0.72768784, "memory(GiB)": 34.88, "step": 18460, "train_speed(iter/s)": 0.420303 }, { "acc": 0.87183409, "epoch": 0.49995938591503536, "grad_norm": 9.118171691894531, "learning_rate": 9.847067956366094e-06, "loss": 0.64878421, "memory(GiB)": 34.88, "step": 18465, "train_speed(iter/s)": 0.420305 }, { "acc": 0.86667347, "epoch": 0.5000947661982509, "grad_norm": 13.171309471130371, "learning_rate": 9.846930583938748e-06, "loss": 0.70060163, "memory(GiB)": 34.88, "step": 18470, "train_speed(iter/s)": 0.420307 }, { "acc": 0.86810589, "epoch": 0.5002301464814665, "grad_norm": 7.433750152587891, "learning_rate": 9.846793150800338e-06, "loss": 0.6595295, "memory(GiB)": 34.88, "step": 18475, "train_speed(iter/s)": 0.420308 }, { "acc": 0.85345707, "epoch": 0.500365526764682, "grad_norm": 12.35423755645752, "learning_rate": 9.846655656952581e-06, "loss": 0.72746434, "memory(GiB)": 34.88, "step": 18480, "train_speed(iter/s)": 0.42031 }, { "acc": 0.82904663, "epoch": 0.5005009070478975, "grad_norm": 13.028337478637695, "learning_rate": 9.846518102397204e-06, "loss": 0.78585339, "memory(GiB)": 34.88, "step": 18485, "train_speed(iter/s)": 0.420309 }, { "acc": 0.85204659, "epoch": 0.5006362873311131, "grad_norm": 4.538125514984131, "learning_rate": 9.846380487135927e-06, "loss": 0.74512973, "memory(GiB)": 34.88, "step": 18490, "train_speed(iter/s)": 0.42031 }, { "acc": 0.85875778, "epoch": 0.5007716676143287, "grad_norm": 25.850788116455078, "learning_rate": 9.846242811170476e-06, "loss": 0.68376141, "memory(GiB)": 34.88, "step": 18495, "train_speed(iter/s)": 0.420312 }, { "acc": 0.84220409, "epoch": 0.5009070478975443, "grad_norm": 9.580185890197754, "learning_rate": 9.846105074502571e-06, "loss": 0.81322794, "memory(GiB)": 34.88, "step": 18500, "train_speed(iter/s)": 0.420316 }, { "acc": 0.86892023, "epoch": 0.5010424281807597, "grad_norm": 6.891530513763428, "learning_rate": 9.845967277133944e-06, "loss": 0.6009757, "memory(GiB)": 34.88, "step": 18505, "train_speed(iter/s)": 0.42032 }, { "acc": 0.86988754, "epoch": 0.5011778084639753, "grad_norm": 7.028924465179443, "learning_rate": 9.845829419066316e-06, "loss": 0.73981957, "memory(GiB)": 34.88, "step": 18510, "train_speed(iter/s)": 0.420324 }, { "acc": 0.86874504, "epoch": 0.5013131887471909, "grad_norm": 14.974630355834961, "learning_rate": 9.845691500301416e-06, "loss": 0.59622231, "memory(GiB)": 34.88, "step": 18515, "train_speed(iter/s)": 0.420326 }, { "acc": 0.87749329, "epoch": 0.5014485690304065, "grad_norm": 5.76995849609375, "learning_rate": 9.845553520840972e-06, "loss": 0.61134372, "memory(GiB)": 34.88, "step": 18520, "train_speed(iter/s)": 0.420329 }, { "acc": 0.86163483, "epoch": 0.5015839493136219, "grad_norm": 8.692530632019043, "learning_rate": 9.845415480686713e-06, "loss": 0.74987984, "memory(GiB)": 34.88, "step": 18525, "train_speed(iter/s)": 0.420332 }, { "acc": 0.86664066, "epoch": 0.5017193295968375, "grad_norm": 6.090693473815918, "learning_rate": 9.845277379840367e-06, "loss": 0.60035543, "memory(GiB)": 34.88, "step": 18530, "train_speed(iter/s)": 0.420336 }, { "acc": 0.86909761, "epoch": 0.5018547098800531, "grad_norm": 10.254898071289062, "learning_rate": 9.845139218303664e-06, "loss": 0.61673574, "memory(GiB)": 34.88, "step": 18535, "train_speed(iter/s)": 0.42034 }, { "acc": 0.84741421, "epoch": 0.5019900901632687, "grad_norm": 8.486043930053711, "learning_rate": 9.845000996078337e-06, "loss": 0.77995901, "memory(GiB)": 34.88, "step": 18540, "train_speed(iter/s)": 0.420344 }, { "acc": 0.84682655, "epoch": 0.5021254704464841, "grad_norm": 8.848515510559082, "learning_rate": 9.844862713166113e-06, "loss": 0.74751549, "memory(GiB)": 34.88, "step": 18545, "train_speed(iter/s)": 0.420348 }, { "acc": 0.86912403, "epoch": 0.5022608507296997, "grad_norm": 10.408453941345215, "learning_rate": 9.844724369568726e-06, "loss": 0.64165678, "memory(GiB)": 34.88, "step": 18550, "train_speed(iter/s)": 0.420351 }, { "acc": 0.88691082, "epoch": 0.5023962310129153, "grad_norm": 8.355170249938965, "learning_rate": 9.844585965287915e-06, "loss": 0.52999864, "memory(GiB)": 34.88, "step": 18555, "train_speed(iter/s)": 0.420355 }, { "acc": 0.85007982, "epoch": 0.5025316112961309, "grad_norm": 6.311234474182129, "learning_rate": 9.844447500325403e-06, "loss": 0.82397795, "memory(GiB)": 34.88, "step": 18560, "train_speed(iter/s)": 0.420359 }, { "acc": 0.86136599, "epoch": 0.5026669915793464, "grad_norm": 17.878618240356445, "learning_rate": 9.844308974682932e-06, "loss": 0.69317489, "memory(GiB)": 34.88, "step": 18565, "train_speed(iter/s)": 0.420363 }, { "acc": 0.85970898, "epoch": 0.5028023718625619, "grad_norm": 18.677621841430664, "learning_rate": 9.844170388362237e-06, "loss": 0.63961515, "memory(GiB)": 34.88, "step": 18570, "train_speed(iter/s)": 0.420366 }, { "acc": 0.88558273, "epoch": 0.5029377521457775, "grad_norm": 5.712883949279785, "learning_rate": 9.844031741365053e-06, "loss": 0.61260066, "memory(GiB)": 34.88, "step": 18575, "train_speed(iter/s)": 0.420369 }, { "acc": 0.84550705, "epoch": 0.503073132428993, "grad_norm": 11.043224334716797, "learning_rate": 9.843893033693114e-06, "loss": 0.83795023, "memory(GiB)": 34.88, "step": 18580, "train_speed(iter/s)": 0.420372 }, { "acc": 0.88344297, "epoch": 0.5032085127122086, "grad_norm": 18.707914352416992, "learning_rate": 9.843754265348161e-06, "loss": 0.59108801, "memory(GiB)": 34.88, "step": 18585, "train_speed(iter/s)": 0.420376 }, { "acc": 0.84625721, "epoch": 0.5033438929954241, "grad_norm": 13.844317436218262, "learning_rate": 9.84361543633193e-06, "loss": 0.82913742, "memory(GiB)": 34.88, "step": 18590, "train_speed(iter/s)": 0.420379 }, { "acc": 0.85585022, "epoch": 0.5034792732786397, "grad_norm": 5.061917304992676, "learning_rate": 9.843476546646164e-06, "loss": 0.72704268, "memory(GiB)": 34.88, "step": 18595, "train_speed(iter/s)": 0.420383 }, { "acc": 0.84027138, "epoch": 0.5036146535618552, "grad_norm": 6.2265625, "learning_rate": 9.8433375962926e-06, "loss": 0.81708813, "memory(GiB)": 34.88, "step": 18600, "train_speed(iter/s)": 0.420386 }, { "acc": 0.86924858, "epoch": 0.5037500338450708, "grad_norm": 3.8269896507263184, "learning_rate": 9.843198585272979e-06, "loss": 0.68222694, "memory(GiB)": 34.88, "step": 18605, "train_speed(iter/s)": 0.42039 }, { "acc": 0.82702436, "epoch": 0.5038854141282864, "grad_norm": 9.15846061706543, "learning_rate": 9.843059513589041e-06, "loss": 0.9918993, "memory(GiB)": 34.88, "step": 18610, "train_speed(iter/s)": 0.420394 }, { "acc": 0.8659421, "epoch": 0.5040207944115019, "grad_norm": 6.011538028717041, "learning_rate": 9.84292038124253e-06, "loss": 0.65229964, "memory(GiB)": 34.88, "step": 18615, "train_speed(iter/s)": 0.420398 }, { "acc": 0.88481922, "epoch": 0.5041561746947174, "grad_norm": 9.822585105895996, "learning_rate": 9.842781188235188e-06, "loss": 0.53774805, "memory(GiB)": 34.88, "step": 18620, "train_speed(iter/s)": 0.420402 }, { "acc": 0.83451109, "epoch": 0.504291554977933, "grad_norm": 6.011683940887451, "learning_rate": 9.84264193456876e-06, "loss": 0.8079607, "memory(GiB)": 34.88, "step": 18625, "train_speed(iter/s)": 0.420406 }, { "acc": 0.85111284, "epoch": 0.5044269352611486, "grad_norm": 9.423924446105957, "learning_rate": 9.84250262024499e-06, "loss": 0.73037119, "memory(GiB)": 34.88, "step": 18630, "train_speed(iter/s)": 0.420409 }, { "acc": 0.85464134, "epoch": 0.5045623155443641, "grad_norm": 8.576181411743164, "learning_rate": 9.84236324526562e-06, "loss": 0.72404299, "memory(GiB)": 34.88, "step": 18635, "train_speed(iter/s)": 0.420413 }, { "acc": 0.82892542, "epoch": 0.5046976958275796, "grad_norm": 6.622706890106201, "learning_rate": 9.8422238096324e-06, "loss": 0.83562717, "memory(GiB)": 34.88, "step": 18640, "train_speed(iter/s)": 0.420416 }, { "acc": 0.87077131, "epoch": 0.5048330761107952, "grad_norm": 10.750322341918945, "learning_rate": 9.842084313347075e-06, "loss": 0.55645075, "memory(GiB)": 34.88, "step": 18645, "train_speed(iter/s)": 0.42042 }, { "acc": 0.8428463, "epoch": 0.5049684563940108, "grad_norm": 9.443477630615234, "learning_rate": 9.841944756411394e-06, "loss": 0.81639061, "memory(GiB)": 34.88, "step": 18650, "train_speed(iter/s)": 0.420423 }, { "acc": 0.85207596, "epoch": 0.5051038366772264, "grad_norm": 4.882994651794434, "learning_rate": 9.841805138827102e-06, "loss": 0.72437806, "memory(GiB)": 34.88, "step": 18655, "train_speed(iter/s)": 0.420427 }, { "acc": 0.84903431, "epoch": 0.5052392169604418, "grad_norm": 12.047292709350586, "learning_rate": 9.841665460595953e-06, "loss": 0.77108517, "memory(GiB)": 34.88, "step": 18660, "train_speed(iter/s)": 0.42043 }, { "acc": 0.85915451, "epoch": 0.5053745972436574, "grad_norm": 6.279339790344238, "learning_rate": 9.841525721719692e-06, "loss": 0.622651, "memory(GiB)": 34.88, "step": 18665, "train_speed(iter/s)": 0.420433 }, { "acc": 0.85323935, "epoch": 0.505509977526873, "grad_norm": 9.906854629516602, "learning_rate": 9.84138592220007e-06, "loss": 0.79032288, "memory(GiB)": 34.88, "step": 18670, "train_speed(iter/s)": 0.420436 }, { "acc": 0.84708538, "epoch": 0.5056453578100886, "grad_norm": 7.063328742980957, "learning_rate": 9.84124606203884e-06, "loss": 0.70334482, "memory(GiB)": 34.88, "step": 18675, "train_speed(iter/s)": 0.42044 }, { "acc": 0.86374903, "epoch": 0.505780738093304, "grad_norm": 10.155852317810059, "learning_rate": 9.841106141237754e-06, "loss": 0.81562052, "memory(GiB)": 34.88, "step": 18680, "train_speed(iter/s)": 0.420444 }, { "acc": 0.85150719, "epoch": 0.5059161183765196, "grad_norm": 9.089815139770508, "learning_rate": 9.840966159798566e-06, "loss": 0.76739025, "memory(GiB)": 34.88, "step": 18685, "train_speed(iter/s)": 0.420447 }, { "acc": 0.8668829, "epoch": 0.5060514986597352, "grad_norm": 5.847381591796875, "learning_rate": 9.840826117723026e-06, "loss": 0.59437623, "memory(GiB)": 34.88, "step": 18690, "train_speed(iter/s)": 0.420451 }, { "acc": 0.86766005, "epoch": 0.5061868789429508, "grad_norm": 8.658241271972656, "learning_rate": 9.840686015012892e-06, "loss": 0.72391958, "memory(GiB)": 34.88, "step": 18695, "train_speed(iter/s)": 0.420454 }, { "acc": 0.86515055, "epoch": 0.5063222592261662, "grad_norm": 9.504681587219238, "learning_rate": 9.840545851669915e-06, "loss": 0.55355105, "memory(GiB)": 34.88, "step": 18700, "train_speed(iter/s)": 0.420458 }, { "acc": 0.85391235, "epoch": 0.5064576395093818, "grad_norm": 10.859723091125488, "learning_rate": 9.840405627695855e-06, "loss": 0.83633022, "memory(GiB)": 34.88, "step": 18705, "train_speed(iter/s)": 0.420462 }, { "acc": 0.88661766, "epoch": 0.5065930197925974, "grad_norm": 4.39744234085083, "learning_rate": 9.840265343092464e-06, "loss": 0.57314816, "memory(GiB)": 34.88, "step": 18710, "train_speed(iter/s)": 0.420466 }, { "acc": 0.86782169, "epoch": 0.506728400075813, "grad_norm": 5.864882946014404, "learning_rate": 9.840124997861504e-06, "loss": 0.67560024, "memory(GiB)": 34.88, "step": 18715, "train_speed(iter/s)": 0.42047 }, { "acc": 0.84335012, "epoch": 0.5068637803590285, "grad_norm": 10.77628231048584, "learning_rate": 9.83998459200473e-06, "loss": 0.79413233, "memory(GiB)": 34.88, "step": 18720, "train_speed(iter/s)": 0.420473 }, { "acc": 0.86008968, "epoch": 0.506999160642244, "grad_norm": 9.484700202941895, "learning_rate": 9.839844125523904e-06, "loss": 0.704109, "memory(GiB)": 34.88, "step": 18725, "train_speed(iter/s)": 0.420477 }, { "acc": 0.8572751, "epoch": 0.5071345409254596, "grad_norm": 9.478559494018555, "learning_rate": 9.83970359842078e-06, "loss": 0.71151972, "memory(GiB)": 34.88, "step": 18730, "train_speed(iter/s)": 0.420481 }, { "acc": 0.87006664, "epoch": 0.5072699212086752, "grad_norm": 45.38307189941406, "learning_rate": 9.839563010697123e-06, "loss": 0.64267178, "memory(GiB)": 34.88, "step": 18735, "train_speed(iter/s)": 0.420485 }, { "acc": 0.85688839, "epoch": 0.5074053014918907, "grad_norm": 5.069864749908447, "learning_rate": 9.839422362354696e-06, "loss": 0.78044815, "memory(GiB)": 34.88, "step": 18740, "train_speed(iter/s)": 0.420488 }, { "acc": 0.87566423, "epoch": 0.5075406817751063, "grad_norm": 10.42398738861084, "learning_rate": 9.839281653395255e-06, "loss": 0.62951493, "memory(GiB)": 34.88, "step": 18745, "train_speed(iter/s)": 0.420492 }, { "acc": 0.86995392, "epoch": 0.5076760620583218, "grad_norm": 6.979913711547852, "learning_rate": 9.839140883820567e-06, "loss": 0.66767268, "memory(GiB)": 34.88, "step": 18750, "train_speed(iter/s)": 0.420494 }, { "acc": 0.86866016, "epoch": 0.5078114423415374, "grad_norm": 16.08103370666504, "learning_rate": 9.839000053632393e-06, "loss": 0.69878035, "memory(GiB)": 34.88, "step": 18755, "train_speed(iter/s)": 0.420498 }, { "acc": 0.81743889, "epoch": 0.5079468226247529, "grad_norm": 11.907859802246094, "learning_rate": 9.838859162832499e-06, "loss": 1.0277957, "memory(GiB)": 34.88, "step": 18760, "train_speed(iter/s)": 0.420501 }, { "acc": 0.84937458, "epoch": 0.5080822029079685, "grad_norm": 10.311161994934082, "learning_rate": 9.838718211422648e-06, "loss": 0.76511149, "memory(GiB)": 34.88, "step": 18765, "train_speed(iter/s)": 0.420505 }, { "acc": 0.86025944, "epoch": 0.508217583191184, "grad_norm": 6.124335289001465, "learning_rate": 9.838577199404608e-06, "loss": 0.77110577, "memory(GiB)": 34.88, "step": 18770, "train_speed(iter/s)": 0.420509 }, { "acc": 0.84471531, "epoch": 0.5083529634743996, "grad_norm": 9.509286880493164, "learning_rate": 9.838436126780145e-06, "loss": 0.8063035, "memory(GiB)": 34.88, "step": 18775, "train_speed(iter/s)": 0.420513 }, { "acc": 0.85064774, "epoch": 0.5084883437576151, "grad_norm": 13.515314102172852, "learning_rate": 9.838294993551025e-06, "loss": 0.87851334, "memory(GiB)": 34.88, "step": 18780, "train_speed(iter/s)": 0.420517 }, { "acc": 0.86268044, "epoch": 0.5086237240408307, "grad_norm": 7.146676063537598, "learning_rate": 9.838153799719013e-06, "loss": 0.66791768, "memory(GiB)": 34.88, "step": 18785, "train_speed(iter/s)": 0.42052 }, { "acc": 0.86377687, "epoch": 0.5087591043240463, "grad_norm": 9.64727783203125, "learning_rate": 9.838012545285885e-06, "loss": 0.81254463, "memory(GiB)": 34.88, "step": 18790, "train_speed(iter/s)": 0.420523 }, { "acc": 0.86770363, "epoch": 0.5088944846072618, "grad_norm": 4.443589210510254, "learning_rate": 9.837871230253406e-06, "loss": 0.64068851, "memory(GiB)": 34.88, "step": 18795, "train_speed(iter/s)": 0.420527 }, { "acc": 0.85248966, "epoch": 0.5090298648904773, "grad_norm": 7.158492565155029, "learning_rate": 9.837729854623347e-06, "loss": 0.77967782, "memory(GiB)": 34.88, "step": 18800, "train_speed(iter/s)": 0.420531 }, { "acc": 0.87199516, "epoch": 0.5091652451736929, "grad_norm": 17.61591339111328, "learning_rate": 9.837588418397478e-06, "loss": 0.7103354, "memory(GiB)": 34.88, "step": 18805, "train_speed(iter/s)": 0.420533 }, { "acc": 0.86499805, "epoch": 0.5093006254569085, "grad_norm": 5.503126621246338, "learning_rate": 9.837446921577572e-06, "loss": 0.72400174, "memory(GiB)": 34.88, "step": 18810, "train_speed(iter/s)": 0.420537 }, { "acc": 0.84776173, "epoch": 0.509436005740124, "grad_norm": 12.094684600830078, "learning_rate": 9.837305364165402e-06, "loss": 0.76225147, "memory(GiB)": 34.88, "step": 18815, "train_speed(iter/s)": 0.42054 }, { "acc": 0.8635273, "epoch": 0.5095713860233395, "grad_norm": 11.225913047790527, "learning_rate": 9.83716374616274e-06, "loss": 0.74354863, "memory(GiB)": 34.88, "step": 18820, "train_speed(iter/s)": 0.420543 }, { "acc": 0.85031757, "epoch": 0.5097067663065551, "grad_norm": 10.336024284362793, "learning_rate": 9.837022067571362e-06, "loss": 0.78619413, "memory(GiB)": 34.88, "step": 18825, "train_speed(iter/s)": 0.420546 }, { "acc": 0.87617064, "epoch": 0.5098421465897707, "grad_norm": 6.149856090545654, "learning_rate": 9.836880328393038e-06, "loss": 0.56242504, "memory(GiB)": 34.88, "step": 18830, "train_speed(iter/s)": 0.42055 }, { "acc": 0.8664156, "epoch": 0.5099775268729863, "grad_norm": 7.106505870819092, "learning_rate": 9.836738528629548e-06, "loss": 0.72941127, "memory(GiB)": 34.88, "step": 18835, "train_speed(iter/s)": 0.420554 }, { "acc": 0.86481676, "epoch": 0.5101129071562017, "grad_norm": 6.8254899978637695, "learning_rate": 9.836596668282669e-06, "loss": 0.6822876, "memory(GiB)": 34.88, "step": 18840, "train_speed(iter/s)": 0.420558 }, { "acc": 0.84224052, "epoch": 0.5102482874394173, "grad_norm": 8.661417961120605, "learning_rate": 9.836454747354175e-06, "loss": 0.82358837, "memory(GiB)": 34.88, "step": 18845, "train_speed(iter/s)": 0.420561 }, { "acc": 0.86246815, "epoch": 0.5103836677226329, "grad_norm": 8.263345718383789, "learning_rate": 9.836312765845846e-06, "loss": 0.69835715, "memory(GiB)": 34.88, "step": 18850, "train_speed(iter/s)": 0.420565 }, { "acc": 0.8656538, "epoch": 0.5105190480058485, "grad_norm": 14.866500854492188, "learning_rate": 9.836170723759459e-06, "loss": 0.73290625, "memory(GiB)": 34.88, "step": 18855, "train_speed(iter/s)": 0.420569 }, { "acc": 0.86070156, "epoch": 0.5106544282890639, "grad_norm": 9.299628257751465, "learning_rate": 9.836028621096794e-06, "loss": 0.66139021, "memory(GiB)": 34.88, "step": 18860, "train_speed(iter/s)": 0.420571 }, { "acc": 0.88245564, "epoch": 0.5107898085722795, "grad_norm": 6.89825963973999, "learning_rate": 9.835886457859631e-06, "loss": 0.58519011, "memory(GiB)": 34.88, "step": 18865, "train_speed(iter/s)": 0.420575 }, { "acc": 0.84898472, "epoch": 0.5109251888554951, "grad_norm": 17.312541961669922, "learning_rate": 9.835744234049752e-06, "loss": 0.67904005, "memory(GiB)": 34.88, "step": 18870, "train_speed(iter/s)": 0.420579 }, { "acc": 0.8758357, "epoch": 0.5110605691387107, "grad_norm": 5.940486907958984, "learning_rate": 9.835601949668938e-06, "loss": 0.56652193, "memory(GiB)": 34.88, "step": 18875, "train_speed(iter/s)": 0.420582 }, { "acc": 0.84460669, "epoch": 0.5111959494219261, "grad_norm": 6.397396564483643, "learning_rate": 9.835459604718971e-06, "loss": 0.82915554, "memory(GiB)": 34.88, "step": 18880, "train_speed(iter/s)": 0.420585 }, { "acc": 0.85673256, "epoch": 0.5113313297051417, "grad_norm": 11.3116455078125, "learning_rate": 9.835317199201633e-06, "loss": 0.71120024, "memory(GiB)": 34.88, "step": 18885, "train_speed(iter/s)": 0.420588 }, { "acc": 0.86348076, "epoch": 0.5114667099883573, "grad_norm": 14.921628952026367, "learning_rate": 9.835174733118709e-06, "loss": 0.69889479, "memory(GiB)": 34.88, "step": 18890, "train_speed(iter/s)": 0.420592 }, { "acc": 0.86185789, "epoch": 0.5116020902715729, "grad_norm": 7.513664722442627, "learning_rate": 9.835032206471986e-06, "loss": 0.7362936, "memory(GiB)": 34.88, "step": 18895, "train_speed(iter/s)": 0.420594 }, { "acc": 0.85866623, "epoch": 0.5117374705547884, "grad_norm": 20.40793228149414, "learning_rate": 9.834889619263246e-06, "loss": 0.69954343, "memory(GiB)": 34.88, "step": 18900, "train_speed(iter/s)": 0.420598 }, { "acc": 0.85951777, "epoch": 0.5118728508380039, "grad_norm": 23.979135513305664, "learning_rate": 9.834746971494276e-06, "loss": 0.75564737, "memory(GiB)": 34.88, "step": 18905, "train_speed(iter/s)": 0.420602 }, { "acc": 0.84409351, "epoch": 0.5120082311212195, "grad_norm": 10.773344993591309, "learning_rate": 9.834604263166865e-06, "loss": 0.77751193, "memory(GiB)": 34.88, "step": 18910, "train_speed(iter/s)": 0.420605 }, { "acc": 0.85051785, "epoch": 0.5121436114044351, "grad_norm": 8.962924003601074, "learning_rate": 9.834461494282797e-06, "loss": 0.87898149, "memory(GiB)": 34.88, "step": 18915, "train_speed(iter/s)": 0.420609 }, { "acc": 0.8789793, "epoch": 0.5122789916876506, "grad_norm": 18.1564998626709, "learning_rate": 9.834318664843864e-06, "loss": 0.60409799, "memory(GiB)": 34.88, "step": 18920, "train_speed(iter/s)": 0.420612 }, { "acc": 0.8431879, "epoch": 0.5124143719708661, "grad_norm": 6.043392658233643, "learning_rate": 9.834175774851854e-06, "loss": 0.75721216, "memory(GiB)": 34.88, "step": 18925, "train_speed(iter/s)": 0.420615 }, { "acc": 0.85950279, "epoch": 0.5125497522540817, "grad_norm": 11.965985298156738, "learning_rate": 9.834032824308556e-06, "loss": 0.79368849, "memory(GiB)": 34.88, "step": 18930, "train_speed(iter/s)": 0.420619 }, { "acc": 0.81759396, "epoch": 0.5126851325372973, "grad_norm": 7.890401363372803, "learning_rate": 9.83388981321576e-06, "loss": 0.92291012, "memory(GiB)": 34.88, "step": 18935, "train_speed(iter/s)": 0.420622 }, { "acc": 0.85800896, "epoch": 0.5128205128205128, "grad_norm": 5.300321578979492, "learning_rate": 9.833746741575263e-06, "loss": 0.79460311, "memory(GiB)": 34.88, "step": 18940, "train_speed(iter/s)": 0.420626 }, { "acc": 0.84332485, "epoch": 0.5129558931037284, "grad_norm": 5.180970191955566, "learning_rate": 9.83360360938885e-06, "loss": 0.89688177, "memory(GiB)": 34.88, "step": 18945, "train_speed(iter/s)": 0.420629 }, { "acc": 0.89426193, "epoch": 0.5130912733869439, "grad_norm": 12.912434577941895, "learning_rate": 9.83346041665832e-06, "loss": 0.52405806, "memory(GiB)": 34.88, "step": 18950, "train_speed(iter/s)": 0.420633 }, { "acc": 0.8483366, "epoch": 0.5132266536701595, "grad_norm": 11.694401741027832, "learning_rate": 9.83331716338546e-06, "loss": 0.78587012, "memory(GiB)": 34.88, "step": 18955, "train_speed(iter/s)": 0.420636 }, { "acc": 0.87458096, "epoch": 0.513362033953375, "grad_norm": 7.933924674987793, "learning_rate": 9.833173849572071e-06, "loss": 0.62301502, "memory(GiB)": 34.88, "step": 18960, "train_speed(iter/s)": 0.420639 }, { "acc": 0.86256781, "epoch": 0.5134974142365906, "grad_norm": 6.318724155426025, "learning_rate": 9.833030475219947e-06, "loss": 0.78612323, "memory(GiB)": 34.88, "step": 18965, "train_speed(iter/s)": 0.420643 }, { "acc": 0.84257746, "epoch": 0.5136327945198061, "grad_norm": 9.678140640258789, "learning_rate": 9.83288704033088e-06, "loss": 0.76053495, "memory(GiB)": 34.88, "step": 18970, "train_speed(iter/s)": 0.420646 }, { "acc": 0.86168222, "epoch": 0.5137681748030217, "grad_norm": 7.763301372528076, "learning_rate": 9.832743544906672e-06, "loss": 0.62720146, "memory(GiB)": 34.88, "step": 18975, "train_speed(iter/s)": 0.420649 }, { "acc": 0.87444277, "epoch": 0.5139035550862372, "grad_norm": 10.08972454071045, "learning_rate": 9.832599988949118e-06, "loss": 0.64266109, "memory(GiB)": 34.88, "step": 18980, "train_speed(iter/s)": 0.420653 }, { "acc": 0.84573603, "epoch": 0.5140389353694528, "grad_norm": 9.029200553894043, "learning_rate": 9.832456372460015e-06, "loss": 0.73080578, "memory(GiB)": 34.88, "step": 18985, "train_speed(iter/s)": 0.420656 }, { "acc": 0.86474895, "epoch": 0.5141743156526684, "grad_norm": 5.9888997077941895, "learning_rate": 9.832312695441165e-06, "loss": 0.64099178, "memory(GiB)": 34.88, "step": 18990, "train_speed(iter/s)": 0.42066 }, { "acc": 0.85471802, "epoch": 0.5143096959358839, "grad_norm": 6.7373576164245605, "learning_rate": 9.832168957894367e-06, "loss": 0.73484039, "memory(GiB)": 34.88, "step": 18995, "train_speed(iter/s)": 0.420664 }, { "acc": 0.88226128, "epoch": 0.5144450762190994, "grad_norm": 2.870469808578491, "learning_rate": 9.83202515982142e-06, "loss": 0.55257778, "memory(GiB)": 34.88, "step": 19000, "train_speed(iter/s)": 0.420668 }, { "acc": 0.85442276, "epoch": 0.514580456502315, "grad_norm": 8.831314086914062, "learning_rate": 9.831881301224126e-06, "loss": 0.69534779, "memory(GiB)": 34.88, "step": 19005, "train_speed(iter/s)": 0.420671 }, { "acc": 0.860499, "epoch": 0.5147158367855306, "grad_norm": 7.259422779083252, "learning_rate": 9.83173738210429e-06, "loss": 0.64707313, "memory(GiB)": 34.88, "step": 19010, "train_speed(iter/s)": 0.420675 }, { "acc": 0.86855574, "epoch": 0.5148512170687461, "grad_norm": 4.816897869110107, "learning_rate": 9.83159340246371e-06, "loss": 0.63783598, "memory(GiB)": 34.88, "step": 19015, "train_speed(iter/s)": 0.420678 }, { "acc": 0.86595478, "epoch": 0.5149865973519616, "grad_norm": 8.330568313598633, "learning_rate": 9.831449362304192e-06, "loss": 0.76867247, "memory(GiB)": 34.88, "step": 19020, "train_speed(iter/s)": 0.420682 }, { "acc": 0.83147602, "epoch": 0.5151219776351772, "grad_norm": 8.772618293762207, "learning_rate": 9.831305261627541e-06, "loss": 0.92287931, "memory(GiB)": 34.88, "step": 19025, "train_speed(iter/s)": 0.420685 }, { "acc": 0.85968132, "epoch": 0.5152573579183928, "grad_norm": 6.282960891723633, "learning_rate": 9.831161100435563e-06, "loss": 0.59286227, "memory(GiB)": 34.88, "step": 19030, "train_speed(iter/s)": 0.420688 }, { "acc": 0.86437635, "epoch": 0.5153927382016084, "grad_norm": 9.699106216430664, "learning_rate": 9.83101687873006e-06, "loss": 0.69225969, "memory(GiB)": 34.88, "step": 19035, "train_speed(iter/s)": 0.42069 }, { "acc": 0.83527622, "epoch": 0.5155281184848238, "grad_norm": 11.283163070678711, "learning_rate": 9.830872596512842e-06, "loss": 0.81195755, "memory(GiB)": 34.88, "step": 19040, "train_speed(iter/s)": 0.420694 }, { "acc": 0.86589432, "epoch": 0.5156634987680394, "grad_norm": 14.385202407836914, "learning_rate": 9.830728253785717e-06, "loss": 0.64751921, "memory(GiB)": 34.88, "step": 19045, "train_speed(iter/s)": 0.420697 }, { "acc": 0.86408491, "epoch": 0.515798879051255, "grad_norm": 6.794650554656982, "learning_rate": 9.830583850550491e-06, "loss": 0.78977814, "memory(GiB)": 34.88, "step": 19050, "train_speed(iter/s)": 0.420701 }, { "acc": 0.8506732, "epoch": 0.5159342593344706, "grad_norm": 9.838750839233398, "learning_rate": 9.830439386808973e-06, "loss": 0.70730748, "memory(GiB)": 34.88, "step": 19055, "train_speed(iter/s)": 0.420704 }, { "acc": 0.84507961, "epoch": 0.516069639617686, "grad_norm": 7.820568084716797, "learning_rate": 9.830294862562973e-06, "loss": 0.77659259, "memory(GiB)": 34.88, "step": 19060, "train_speed(iter/s)": 0.420708 }, { "acc": 0.86753445, "epoch": 0.5162050199009016, "grad_norm": 8.569157600402832, "learning_rate": 9.830150277814304e-06, "loss": 0.74018564, "memory(GiB)": 34.88, "step": 19065, "train_speed(iter/s)": 0.420711 }, { "acc": 0.86952705, "epoch": 0.5163404001841172, "grad_norm": 6.480945110321045, "learning_rate": 9.830005632564774e-06, "loss": 0.66050844, "memory(GiB)": 34.88, "step": 19070, "train_speed(iter/s)": 0.420714 }, { "acc": 0.86489515, "epoch": 0.5164757804673328, "grad_norm": 17.478071212768555, "learning_rate": 9.829860926816195e-06, "loss": 0.66459641, "memory(GiB)": 34.88, "step": 19075, "train_speed(iter/s)": 0.420718 }, { "acc": 0.82564716, "epoch": 0.5166111607505482, "grad_norm": 10.008819580078125, "learning_rate": 9.829716160570381e-06, "loss": 0.82159557, "memory(GiB)": 34.88, "step": 19080, "train_speed(iter/s)": 0.420722 }, { "acc": 0.86243668, "epoch": 0.5167465410337638, "grad_norm": 7.64700984954834, "learning_rate": 9.829571333829146e-06, "loss": 0.78030376, "memory(GiB)": 34.88, "step": 19085, "train_speed(iter/s)": 0.420725 }, { "acc": 0.84432621, "epoch": 0.5168819213169794, "grad_norm": 10.983717918395996, "learning_rate": 9.829426446594302e-06, "loss": 0.8054122, "memory(GiB)": 34.88, "step": 19090, "train_speed(iter/s)": 0.420728 }, { "acc": 0.85236607, "epoch": 0.517017301600195, "grad_norm": 30.756633758544922, "learning_rate": 9.829281498867668e-06, "loss": 0.83860111, "memory(GiB)": 34.88, "step": 19095, "train_speed(iter/s)": 0.420732 }, { "acc": 0.87303438, "epoch": 0.5171526818834105, "grad_norm": 9.027273178100586, "learning_rate": 9.829136490651054e-06, "loss": 0.61870289, "memory(GiB)": 34.88, "step": 19100, "train_speed(iter/s)": 0.420736 }, { "acc": 0.86396561, "epoch": 0.517288062166626, "grad_norm": 6.91870641708374, "learning_rate": 9.828991421946282e-06, "loss": 0.70161152, "memory(GiB)": 34.88, "step": 19105, "train_speed(iter/s)": 0.420739 }, { "acc": 0.86899548, "epoch": 0.5174234424498416, "grad_norm": 21.58272933959961, "learning_rate": 9.828846292755167e-06, "loss": 0.62699165, "memory(GiB)": 34.88, "step": 19110, "train_speed(iter/s)": 0.420743 }, { "acc": 0.827672, "epoch": 0.5175588227330572, "grad_norm": 16.75841522216797, "learning_rate": 9.828701103079526e-06, "loss": 0.91802654, "memory(GiB)": 34.88, "step": 19115, "train_speed(iter/s)": 0.420747 }, { "acc": 0.87091208, "epoch": 0.5176942030162727, "grad_norm": 7.288631916046143, "learning_rate": 9.82855585292118e-06, "loss": 0.66721926, "memory(GiB)": 34.88, "step": 19120, "train_speed(iter/s)": 0.42075 }, { "acc": 0.85769367, "epoch": 0.5178295832994882, "grad_norm": 10.441856384277344, "learning_rate": 9.828410542281946e-06, "loss": 0.73779268, "memory(GiB)": 34.88, "step": 19125, "train_speed(iter/s)": 0.420753 }, { "acc": 0.86832495, "epoch": 0.5179649635827038, "grad_norm": 7.954782009124756, "learning_rate": 9.828265171163645e-06, "loss": 0.64554453, "memory(GiB)": 34.88, "step": 19130, "train_speed(iter/s)": 0.420756 }, { "acc": 0.86873569, "epoch": 0.5181003438659194, "grad_norm": 9.53095817565918, "learning_rate": 9.828119739568099e-06, "loss": 0.65036435, "memory(GiB)": 34.88, "step": 19135, "train_speed(iter/s)": 0.420759 }, { "acc": 0.85237427, "epoch": 0.5182357241491349, "grad_norm": 9.42170524597168, "learning_rate": 9.82797424749713e-06, "loss": 0.73663082, "memory(GiB)": 34.88, "step": 19140, "train_speed(iter/s)": 0.420763 }, { "acc": 0.88651581, "epoch": 0.5183711044323505, "grad_norm": 5.974720001220703, "learning_rate": 9.82782869495256e-06, "loss": 0.55251875, "memory(GiB)": 34.88, "step": 19145, "train_speed(iter/s)": 0.420767 }, { "acc": 0.85524302, "epoch": 0.518506484715566, "grad_norm": 25.742706298828125, "learning_rate": 9.827683081936215e-06, "loss": 0.813346, "memory(GiB)": 34.88, "step": 19150, "train_speed(iter/s)": 0.42077 }, { "acc": 0.88331232, "epoch": 0.5186418649987816, "grad_norm": 4.589117527008057, "learning_rate": 9.827537408449914e-06, "loss": 0.59637151, "memory(GiB)": 34.88, "step": 19155, "train_speed(iter/s)": 0.420773 }, { "acc": 0.8545579, "epoch": 0.5187772452819971, "grad_norm": 14.229848861694336, "learning_rate": 9.827391674495484e-06, "loss": 0.73596907, "memory(GiB)": 34.88, "step": 19160, "train_speed(iter/s)": 0.420777 }, { "acc": 0.86690035, "epoch": 0.5189126255652127, "grad_norm": 8.300703048706055, "learning_rate": 9.827245880074752e-06, "loss": 0.70184932, "memory(GiB)": 34.88, "step": 19165, "train_speed(iter/s)": 0.42078 }, { "acc": 0.88622055, "epoch": 0.5190480058484283, "grad_norm": 5.337629318237305, "learning_rate": 9.827100025189541e-06, "loss": 0.5887136, "memory(GiB)": 34.88, "step": 19170, "train_speed(iter/s)": 0.420784 }, { "acc": 0.86298103, "epoch": 0.5191833861316438, "grad_norm": 7.606146335601807, "learning_rate": 9.826954109841683e-06, "loss": 0.67787495, "memory(GiB)": 34.88, "step": 19175, "train_speed(iter/s)": 0.420786 }, { "acc": 0.85287285, "epoch": 0.5193187664148593, "grad_norm": 21.336589813232422, "learning_rate": 9.826808134033002e-06, "loss": 0.76701012, "memory(GiB)": 34.88, "step": 19180, "train_speed(iter/s)": 0.42079 }, { "acc": 0.85796051, "epoch": 0.5194541466980749, "grad_norm": 6.498367786407471, "learning_rate": 9.826662097765328e-06, "loss": 0.76212988, "memory(GiB)": 34.88, "step": 19185, "train_speed(iter/s)": 0.420793 }, { "acc": 0.87408428, "epoch": 0.5195895269812905, "grad_norm": 13.438541412353516, "learning_rate": 9.826516001040492e-06, "loss": 0.61239023, "memory(GiB)": 34.88, "step": 19190, "train_speed(iter/s)": 0.420796 }, { "acc": 0.8621644, "epoch": 0.519724907264506, "grad_norm": 7.881662845611572, "learning_rate": 9.826369843860323e-06, "loss": 0.70115614, "memory(GiB)": 34.88, "step": 19195, "train_speed(iter/s)": 0.4208 }, { "acc": 0.87723026, "epoch": 0.5198602875477215, "grad_norm": 6.260354995727539, "learning_rate": 9.826223626226647e-06, "loss": 0.65812573, "memory(GiB)": 34.88, "step": 19200, "train_speed(iter/s)": 0.420803 }, { "acc": 0.83352785, "epoch": 0.5199956678309371, "grad_norm": 7.323879718780518, "learning_rate": 9.826077348141305e-06, "loss": 0.95106812, "memory(GiB)": 34.88, "step": 19205, "train_speed(iter/s)": 0.420807 }, { "acc": 0.85649176, "epoch": 0.5201310481141527, "grad_norm": 7.264333248138428, "learning_rate": 9.825931009606121e-06, "loss": 0.76506248, "memory(GiB)": 34.88, "step": 19210, "train_speed(iter/s)": 0.42081 }, { "acc": 0.85813961, "epoch": 0.5202664283973683, "grad_norm": 8.251653671264648, "learning_rate": 9.825784610622932e-06, "loss": 0.70151873, "memory(GiB)": 34.88, "step": 19215, "train_speed(iter/s)": 0.420814 }, { "acc": 0.8475997, "epoch": 0.5204018086805837, "grad_norm": 9.546117782592773, "learning_rate": 9.825638151193574e-06, "loss": 0.74325914, "memory(GiB)": 34.88, "step": 19220, "train_speed(iter/s)": 0.420817 }, { "acc": 0.88713284, "epoch": 0.5205371889637993, "grad_norm": 10.301459312438965, "learning_rate": 9.825491631319876e-06, "loss": 0.56509457, "memory(GiB)": 34.88, "step": 19225, "train_speed(iter/s)": 0.42082 }, { "acc": 0.85042381, "epoch": 0.5206725692470149, "grad_norm": 9.805486679077148, "learning_rate": 9.825345051003681e-06, "loss": 0.77951818, "memory(GiB)": 34.88, "step": 19230, "train_speed(iter/s)": 0.420824 }, { "acc": 0.85322428, "epoch": 0.5208079495302305, "grad_norm": 9.815888404846191, "learning_rate": 9.825198410246818e-06, "loss": 0.72670183, "memory(GiB)": 34.88, "step": 19235, "train_speed(iter/s)": 0.420828 }, { "acc": 0.8410677, "epoch": 0.5209433298134459, "grad_norm": 11.46562385559082, "learning_rate": 9.825051709051127e-06, "loss": 0.78878388, "memory(GiB)": 34.88, "step": 19240, "train_speed(iter/s)": 0.420831 }, { "acc": 0.86658258, "epoch": 0.5210787100966615, "grad_norm": 3.7514901161193848, "learning_rate": 9.824904947418446e-06, "loss": 0.60999985, "memory(GiB)": 34.88, "step": 19245, "train_speed(iter/s)": 0.420834 }, { "acc": 0.85807791, "epoch": 0.5212140903798771, "grad_norm": 9.33813190460205, "learning_rate": 9.824758125350613e-06, "loss": 0.78091202, "memory(GiB)": 34.88, "step": 19250, "train_speed(iter/s)": 0.420837 }, { "acc": 0.84246521, "epoch": 0.5213494706630927, "grad_norm": 9.136470794677734, "learning_rate": 9.824611242849466e-06, "loss": 0.77872753, "memory(GiB)": 34.88, "step": 19255, "train_speed(iter/s)": 0.42084 }, { "acc": 0.87466278, "epoch": 0.5214848509463081, "grad_norm": 7.563362121582031, "learning_rate": 9.824464299916849e-06, "loss": 0.67540226, "memory(GiB)": 34.88, "step": 19260, "train_speed(iter/s)": 0.420843 }, { "acc": 0.89046726, "epoch": 0.5216202312295237, "grad_norm": 8.191082954406738, "learning_rate": 9.824317296554598e-06, "loss": 0.48946176, "memory(GiB)": 34.88, "step": 19265, "train_speed(iter/s)": 0.420846 }, { "acc": 0.83801241, "epoch": 0.5217556115127393, "grad_norm": 18.770069122314453, "learning_rate": 9.824170232764557e-06, "loss": 0.87040415, "memory(GiB)": 34.88, "step": 19270, "train_speed(iter/s)": 0.420849 }, { "acc": 0.85827475, "epoch": 0.5218909917959549, "grad_norm": 6.990488052368164, "learning_rate": 9.824023108548569e-06, "loss": 0.71617632, "memory(GiB)": 34.88, "step": 19275, "train_speed(iter/s)": 0.420853 }, { "acc": 0.85272083, "epoch": 0.5220263720791704, "grad_norm": 13.709575653076172, "learning_rate": 9.823875923908474e-06, "loss": 0.75159287, "memory(GiB)": 34.88, "step": 19280, "train_speed(iter/s)": 0.420856 }, { "acc": 0.85877075, "epoch": 0.5221617523623859, "grad_norm": 5.981345176696777, "learning_rate": 9.82372867884612e-06, "loss": 0.67624235, "memory(GiB)": 34.88, "step": 19285, "train_speed(iter/s)": 0.42086 }, { "acc": 0.86373158, "epoch": 0.5222971326456015, "grad_norm": 13.936206817626953, "learning_rate": 9.823581373363346e-06, "loss": 0.64390879, "memory(GiB)": 34.88, "step": 19290, "train_speed(iter/s)": 0.420864 }, { "acc": 0.85848207, "epoch": 0.5224325129288171, "grad_norm": 7.158227443695068, "learning_rate": 9.823434007462002e-06, "loss": 0.6800818, "memory(GiB)": 34.88, "step": 19295, "train_speed(iter/s)": 0.420868 }, { "acc": 0.86057329, "epoch": 0.5225678932120326, "grad_norm": 6.060482501983643, "learning_rate": 9.823286581143935e-06, "loss": 0.70958467, "memory(GiB)": 34.88, "step": 19300, "train_speed(iter/s)": 0.42087 }, { "acc": 0.87949715, "epoch": 0.5227032734952481, "grad_norm": 7.460309982299805, "learning_rate": 9.823139094410986e-06, "loss": 0.55520239, "memory(GiB)": 34.88, "step": 19305, "train_speed(iter/s)": 0.420874 }, { "acc": 0.87401514, "epoch": 0.5228386537784637, "grad_norm": 9.349018096923828, "learning_rate": 9.822991547265007e-06, "loss": 0.71886015, "memory(GiB)": 34.88, "step": 19310, "train_speed(iter/s)": 0.420878 }, { "acc": 0.87626171, "epoch": 0.5229740340616793, "grad_norm": 6.347671031951904, "learning_rate": 9.822843939707846e-06, "loss": 0.67414026, "memory(GiB)": 34.88, "step": 19315, "train_speed(iter/s)": 0.420881 }, { "acc": 0.87390766, "epoch": 0.5231094143448948, "grad_norm": 6.165119171142578, "learning_rate": 9.82269627174135e-06, "loss": 0.64646249, "memory(GiB)": 34.88, "step": 19320, "train_speed(iter/s)": 0.420885 }, { "acc": 0.89004564, "epoch": 0.5232447946281104, "grad_norm": 6.622564315795898, "learning_rate": 9.822548543367371e-06, "loss": 0.53459778, "memory(GiB)": 34.88, "step": 19325, "train_speed(iter/s)": 0.420888 }, { "acc": 0.86022654, "epoch": 0.5233801749113259, "grad_norm": 6.953200817108154, "learning_rate": 9.822400754587759e-06, "loss": 0.63820677, "memory(GiB)": 34.88, "step": 19330, "train_speed(iter/s)": 0.420891 }, { "acc": 0.83738995, "epoch": 0.5235155551945415, "grad_norm": 8.092358589172363, "learning_rate": 9.822252905404362e-06, "loss": 0.95346985, "memory(GiB)": 34.88, "step": 19335, "train_speed(iter/s)": 0.420894 }, { "acc": 0.87136726, "epoch": 0.523650935477757, "grad_norm": 7.8341169357299805, "learning_rate": 9.822104995819038e-06, "loss": 0.61296911, "memory(GiB)": 34.88, "step": 19340, "train_speed(iter/s)": 0.420898 }, { "acc": 0.84701233, "epoch": 0.5237863157609726, "grad_norm": 12.731243133544922, "learning_rate": 9.821957025833637e-06, "loss": 0.82565088, "memory(GiB)": 34.88, "step": 19345, "train_speed(iter/s)": 0.420901 }, { "acc": 0.85362463, "epoch": 0.5239216960441881, "grad_norm": 8.849563598632812, "learning_rate": 9.821808995450011e-06, "loss": 0.75580521, "memory(GiB)": 34.88, "step": 19350, "train_speed(iter/s)": 0.420904 }, { "acc": 0.86000671, "epoch": 0.5240570763274037, "grad_norm": 7.372122287750244, "learning_rate": 9.821660904670019e-06, "loss": 0.62177534, "memory(GiB)": 34.88, "step": 19355, "train_speed(iter/s)": 0.420907 }, { "acc": 0.8739109, "epoch": 0.5241924566106192, "grad_norm": 7.705411911010742, "learning_rate": 9.82151275349551e-06, "loss": 0.60719233, "memory(GiB)": 34.88, "step": 19360, "train_speed(iter/s)": 0.42091 }, { "acc": 0.86796837, "epoch": 0.5243278368938348, "grad_norm": 6.658170223236084, "learning_rate": 9.821364541928344e-06, "loss": 0.68220563, "memory(GiB)": 34.88, "step": 19365, "train_speed(iter/s)": 0.420914 }, { "acc": 0.85279884, "epoch": 0.5244632171770504, "grad_norm": 11.168987274169922, "learning_rate": 9.821216269970377e-06, "loss": 0.77540126, "memory(GiB)": 34.88, "step": 19370, "train_speed(iter/s)": 0.420917 }, { "acc": 0.84907961, "epoch": 0.5245985974602659, "grad_norm": 5.483128547668457, "learning_rate": 9.821067937623466e-06, "loss": 0.75149426, "memory(GiB)": 34.88, "step": 19375, "train_speed(iter/s)": 0.42092 }, { "acc": 0.85011969, "epoch": 0.5247339777434814, "grad_norm": 12.977256774902344, "learning_rate": 9.820919544889468e-06, "loss": 0.74383144, "memory(GiB)": 34.88, "step": 19380, "train_speed(iter/s)": 0.420924 }, { "acc": 0.8549614, "epoch": 0.524869358026697, "grad_norm": 13.739730834960938, "learning_rate": 9.820771091770245e-06, "loss": 0.70066538, "memory(GiB)": 34.88, "step": 19385, "train_speed(iter/s)": 0.420928 }, { "acc": 0.87189465, "epoch": 0.5250047383099126, "grad_norm": 11.889148712158203, "learning_rate": 9.820622578267652e-06, "loss": 0.56300478, "memory(GiB)": 34.88, "step": 19390, "train_speed(iter/s)": 0.420931 }, { "acc": 0.86825399, "epoch": 0.5251401185931281, "grad_norm": 7.529813766479492, "learning_rate": 9.820474004383554e-06, "loss": 0.71127548, "memory(GiB)": 34.88, "step": 19395, "train_speed(iter/s)": 0.420934 }, { "acc": 0.84771957, "epoch": 0.5252754988763436, "grad_norm": 8.479971885681152, "learning_rate": 9.820325370119811e-06, "loss": 0.73642526, "memory(GiB)": 34.88, "step": 19400, "train_speed(iter/s)": 0.420938 }, { "acc": 0.86425037, "epoch": 0.5254108791595592, "grad_norm": 7.949577331542969, "learning_rate": 9.820176675478285e-06, "loss": 0.68857279, "memory(GiB)": 34.88, "step": 19405, "train_speed(iter/s)": 0.420941 }, { "acc": 0.85150681, "epoch": 0.5255462594427748, "grad_norm": 18.369272232055664, "learning_rate": 9.820027920460836e-06, "loss": 0.71497021, "memory(GiB)": 34.88, "step": 19410, "train_speed(iter/s)": 0.420945 }, { "acc": 0.88093634, "epoch": 0.5256816397259904, "grad_norm": 7.372071743011475, "learning_rate": 9.81987910506933e-06, "loss": 0.50584474, "memory(GiB)": 34.88, "step": 19415, "train_speed(iter/s)": 0.420948 }, { "acc": 0.85517101, "epoch": 0.5258170200092058, "grad_norm": 14.076654434204102, "learning_rate": 9.819730229305631e-06, "loss": 0.86934834, "memory(GiB)": 34.88, "step": 19420, "train_speed(iter/s)": 0.420952 }, { "acc": 0.84162369, "epoch": 0.5259524002924214, "grad_norm": 16.381946563720703, "learning_rate": 9.819581293171603e-06, "loss": 0.83098907, "memory(GiB)": 34.88, "step": 19425, "train_speed(iter/s)": 0.420956 }, { "acc": 0.86687775, "epoch": 0.526087780575637, "grad_norm": 6.6579389572143555, "learning_rate": 9.819432296669113e-06, "loss": 0.69119163, "memory(GiB)": 34.88, "step": 19430, "train_speed(iter/s)": 0.420959 }, { "acc": 0.86492977, "epoch": 0.5262231608588526, "grad_norm": 60.500144958496094, "learning_rate": 9.819283239800026e-06, "loss": 0.72404594, "memory(GiB)": 34.88, "step": 19435, "train_speed(iter/s)": 0.420962 }, { "acc": 0.87684212, "epoch": 0.526358541142068, "grad_norm": 20.691743850708008, "learning_rate": 9.81913412256621e-06, "loss": 0.5763113, "memory(GiB)": 34.88, "step": 19440, "train_speed(iter/s)": 0.420964 }, { "acc": 0.89167557, "epoch": 0.5264939214252836, "grad_norm": 2.789085865020752, "learning_rate": 9.818984944969532e-06, "loss": 0.51432877, "memory(GiB)": 34.88, "step": 19445, "train_speed(iter/s)": 0.420967 }, { "acc": 0.84925022, "epoch": 0.5266293017084992, "grad_norm": 8.357131958007812, "learning_rate": 9.818835707011864e-06, "loss": 0.72000799, "memory(GiB)": 34.88, "step": 19450, "train_speed(iter/s)": 0.420971 }, { "acc": 0.87643614, "epoch": 0.5267646819917148, "grad_norm": 7.571173667907715, "learning_rate": 9.818686408695072e-06, "loss": 0.57764826, "memory(GiB)": 34.88, "step": 19455, "train_speed(iter/s)": 0.420975 }, { "acc": 0.86456356, "epoch": 0.5269000622749302, "grad_norm": 9.276979446411133, "learning_rate": 9.81853705002103e-06, "loss": 0.61875496, "memory(GiB)": 34.88, "step": 19460, "train_speed(iter/s)": 0.420979 }, { "acc": 0.87397432, "epoch": 0.5270354425581458, "grad_norm": 12.608470916748047, "learning_rate": 9.818387630991604e-06, "loss": 0.66242495, "memory(GiB)": 34.88, "step": 19465, "train_speed(iter/s)": 0.420982 }, { "acc": 0.87764053, "epoch": 0.5271708228413614, "grad_norm": 12.622873306274414, "learning_rate": 9.81823815160867e-06, "loss": 0.50167494, "memory(GiB)": 34.88, "step": 19470, "train_speed(iter/s)": 0.420986 }, { "acc": 0.86262169, "epoch": 0.527306203124577, "grad_norm": 9.303487777709961, "learning_rate": 9.818088611874098e-06, "loss": 0.63001461, "memory(GiB)": 34.88, "step": 19475, "train_speed(iter/s)": 0.420989 }, { "acc": 0.86671877, "epoch": 0.5274415834077925, "grad_norm": 7.386744499206543, "learning_rate": 9.81793901178976e-06, "loss": 0.63400908, "memory(GiB)": 34.88, "step": 19480, "train_speed(iter/s)": 0.420993 }, { "acc": 0.87045822, "epoch": 0.527576963691008, "grad_norm": 7.218787670135498, "learning_rate": 9.817789351357535e-06, "loss": 0.67153397, "memory(GiB)": 34.88, "step": 19485, "train_speed(iter/s)": 0.420996 }, { "acc": 0.85677443, "epoch": 0.5277123439742236, "grad_norm": 8.989781379699707, "learning_rate": 9.817639630579292e-06, "loss": 0.71837893, "memory(GiB)": 34.88, "step": 19490, "train_speed(iter/s)": 0.421 }, { "acc": 0.86698456, "epoch": 0.5278477242574392, "grad_norm": 14.138588905334473, "learning_rate": 9.817489849456912e-06, "loss": 0.61412706, "memory(GiB)": 34.88, "step": 19495, "train_speed(iter/s)": 0.421003 }, { "acc": 0.88098707, "epoch": 0.5279831045406547, "grad_norm": 8.631270408630371, "learning_rate": 9.817340007992267e-06, "loss": 0.57241044, "memory(GiB)": 34.88, "step": 19500, "train_speed(iter/s)": 0.421007 }, { "acc": 0.85134239, "epoch": 0.5281184848238702, "grad_norm": 17.10683822631836, "learning_rate": 9.817190106187238e-06, "loss": 0.82681751, "memory(GiB)": 34.88, "step": 19505, "train_speed(iter/s)": 0.421009 }, { "acc": 0.84689627, "epoch": 0.5282538651070858, "grad_norm": 8.686392784118652, "learning_rate": 9.8170401440437e-06, "loss": 0.82551346, "memory(GiB)": 34.88, "step": 19510, "train_speed(iter/s)": 0.421012 }, { "acc": 0.86203604, "epoch": 0.5283892453903014, "grad_norm": 7.807919502258301, "learning_rate": 9.816890121563531e-06, "loss": 0.68213158, "memory(GiB)": 34.88, "step": 19515, "train_speed(iter/s)": 0.421016 }, { "acc": 0.86877155, "epoch": 0.5285246256735169, "grad_norm": 7.143519878387451, "learning_rate": 9.816740038748611e-06, "loss": 0.59391165, "memory(GiB)": 34.88, "step": 19520, "train_speed(iter/s)": 0.421019 }, { "acc": 0.86005802, "epoch": 0.5286600059567325, "grad_norm": 14.276223182678223, "learning_rate": 9.816589895600822e-06, "loss": 0.73927522, "memory(GiB)": 34.88, "step": 19525, "train_speed(iter/s)": 0.421023 }, { "acc": 0.86728878, "epoch": 0.528795386239948, "grad_norm": 9.146607398986816, "learning_rate": 9.816439692122044e-06, "loss": 0.68094397, "memory(GiB)": 34.88, "step": 19530, "train_speed(iter/s)": 0.421026 }, { "acc": 0.85797215, "epoch": 0.5289307665231636, "grad_norm": 9.119551658630371, "learning_rate": 9.816289428314157e-06, "loss": 0.73158159, "memory(GiB)": 34.88, "step": 19535, "train_speed(iter/s)": 0.42103 }, { "acc": 0.86552429, "epoch": 0.5290661468063791, "grad_norm": 14.873555183410645, "learning_rate": 9.816139104179044e-06, "loss": 0.65607872, "memory(GiB)": 34.88, "step": 19540, "train_speed(iter/s)": 0.421032 }, { "acc": 0.85766621, "epoch": 0.5292015270895947, "grad_norm": 9.427056312561035, "learning_rate": 9.81598871971859e-06, "loss": 0.72028704, "memory(GiB)": 34.88, "step": 19545, "train_speed(iter/s)": 0.421035 }, { "acc": 0.84346571, "epoch": 0.5293369073728103, "grad_norm": 7.98598575592041, "learning_rate": 9.815838274934678e-06, "loss": 0.8123662, "memory(GiB)": 34.88, "step": 19550, "train_speed(iter/s)": 0.421039 }, { "acc": 0.87618275, "epoch": 0.5294722876560258, "grad_norm": 5.073436260223389, "learning_rate": 9.815687769829188e-06, "loss": 0.62169323, "memory(GiB)": 34.88, "step": 19555, "train_speed(iter/s)": 0.421042 }, { "acc": 0.85150709, "epoch": 0.5296076679392413, "grad_norm": 14.67457389831543, "learning_rate": 9.815537204404015e-06, "loss": 0.80556879, "memory(GiB)": 34.88, "step": 19560, "train_speed(iter/s)": 0.421046 }, { "acc": 0.84888096, "epoch": 0.5297430482224569, "grad_norm": 22.90509796142578, "learning_rate": 9.815386578661035e-06, "loss": 0.68207359, "memory(GiB)": 34.88, "step": 19565, "train_speed(iter/s)": 0.421049 }, { "acc": 0.87475395, "epoch": 0.5298784285056725, "grad_norm": 6.476648330688477, "learning_rate": 9.81523589260214e-06, "loss": 0.66217546, "memory(GiB)": 34.88, "step": 19570, "train_speed(iter/s)": 0.421053 }, { "acc": 0.8750371, "epoch": 0.530013808788888, "grad_norm": 5.430109024047852, "learning_rate": 9.815085146229218e-06, "loss": 0.57021475, "memory(GiB)": 34.88, "step": 19575, "train_speed(iter/s)": 0.421056 }, { "acc": 0.85313263, "epoch": 0.5301491890721035, "grad_norm": 8.560614585876465, "learning_rate": 9.814934339544156e-06, "loss": 0.70387239, "memory(GiB)": 34.88, "step": 19580, "train_speed(iter/s)": 0.42106 }, { "acc": 0.85520821, "epoch": 0.5302845693553191, "grad_norm": 8.997124671936035, "learning_rate": 9.814783472548844e-06, "loss": 0.74104185, "memory(GiB)": 34.88, "step": 19585, "train_speed(iter/s)": 0.421063 }, { "acc": 0.87589359, "epoch": 0.5304199496385347, "grad_norm": 6.977688789367676, "learning_rate": 9.814632545245173e-06, "loss": 0.59132357, "memory(GiB)": 34.88, "step": 19590, "train_speed(iter/s)": 0.421067 }, { "acc": 0.85336533, "epoch": 0.5305553299217503, "grad_norm": 10.23696517944336, "learning_rate": 9.814481557635029e-06, "loss": 0.82081051, "memory(GiB)": 34.88, "step": 19595, "train_speed(iter/s)": 0.421071 }, { "acc": 0.82775049, "epoch": 0.5306907102049657, "grad_norm": 14.075553894042969, "learning_rate": 9.814330509720308e-06, "loss": 1.00447321, "memory(GiB)": 34.88, "step": 19600, "train_speed(iter/s)": 0.421074 }, { "acc": 0.86271133, "epoch": 0.5308260904881813, "grad_norm": 16.80362892150879, "learning_rate": 9.814179401502901e-06, "loss": 0.70361161, "memory(GiB)": 34.88, "step": 19605, "train_speed(iter/s)": 0.421076 }, { "acc": 0.85543156, "epoch": 0.5309614707713969, "grad_norm": 9.873324394226074, "learning_rate": 9.814028232984699e-06, "loss": 0.79096451, "memory(GiB)": 34.88, "step": 19610, "train_speed(iter/s)": 0.42108 }, { "acc": 0.86144085, "epoch": 0.5310968510546124, "grad_norm": 9.79393196105957, "learning_rate": 9.813877004167598e-06, "loss": 0.70270438, "memory(GiB)": 34.88, "step": 19615, "train_speed(iter/s)": 0.421083 }, { "acc": 0.83583164, "epoch": 0.5312322313378279, "grad_norm": 12.054673194885254, "learning_rate": 9.813725715053493e-06, "loss": 0.83711042, "memory(GiB)": 34.88, "step": 19620, "train_speed(iter/s)": 0.421085 }, { "acc": 0.8782629, "epoch": 0.5313676116210435, "grad_norm": 7.77802848815918, "learning_rate": 9.813574365644277e-06, "loss": 0.47888103, "memory(GiB)": 34.88, "step": 19625, "train_speed(iter/s)": 0.421089 }, { "acc": 0.8268816, "epoch": 0.5315029919042591, "grad_norm": 10.571003913879395, "learning_rate": 9.813422955941848e-06, "loss": 0.91043291, "memory(GiB)": 34.88, "step": 19630, "train_speed(iter/s)": 0.421092 }, { "acc": 0.85781603, "epoch": 0.5316383721874746, "grad_norm": 12.66012954711914, "learning_rate": 9.8132714859481e-06, "loss": 0.7212256, "memory(GiB)": 34.88, "step": 19635, "train_speed(iter/s)": 0.421095 }, { "acc": 0.87348633, "epoch": 0.5317737524706901, "grad_norm": 4.056346416473389, "learning_rate": 9.813119955664934e-06, "loss": 0.62196035, "memory(GiB)": 34.88, "step": 19640, "train_speed(iter/s)": 0.421098 }, { "acc": 0.8733057, "epoch": 0.5319091327539057, "grad_norm": 10.926881790161133, "learning_rate": 9.812968365094245e-06, "loss": 0.57474194, "memory(GiB)": 34.88, "step": 19645, "train_speed(iter/s)": 0.421101 }, { "acc": 0.89058561, "epoch": 0.5320445130371213, "grad_norm": 11.80569076538086, "learning_rate": 9.812816714237933e-06, "loss": 0.56390934, "memory(GiB)": 34.88, "step": 19650, "train_speed(iter/s)": 0.421105 }, { "acc": 0.85777435, "epoch": 0.5321798933203368, "grad_norm": 6.630864143371582, "learning_rate": 9.812665003097897e-06, "loss": 0.64368067, "memory(GiB)": 34.88, "step": 19655, "train_speed(iter/s)": 0.421108 }, { "acc": 0.87480335, "epoch": 0.5323152736035524, "grad_norm": 12.14991569519043, "learning_rate": 9.81251323167604e-06, "loss": 0.67970181, "memory(GiB)": 34.88, "step": 19660, "train_speed(iter/s)": 0.421112 }, { "acc": 0.87058239, "epoch": 0.5324506538867679, "grad_norm": 5.340123653411865, "learning_rate": 9.812361399974261e-06, "loss": 0.61619596, "memory(GiB)": 34.88, "step": 19665, "train_speed(iter/s)": 0.421115 }, { "acc": 0.85158854, "epoch": 0.5325860341699835, "grad_norm": 4.741554260253906, "learning_rate": 9.812209507994465e-06, "loss": 0.75956159, "memory(GiB)": 34.88, "step": 19670, "train_speed(iter/s)": 0.421118 }, { "acc": 0.84228439, "epoch": 0.532721414453199, "grad_norm": 4.250974655151367, "learning_rate": 9.812057555738548e-06, "loss": 0.79053621, "memory(GiB)": 34.88, "step": 19675, "train_speed(iter/s)": 0.42112 }, { "acc": 0.84800787, "epoch": 0.5328567947364146, "grad_norm": 7.589141368865967, "learning_rate": 9.811905543208421e-06, "loss": 0.79690084, "memory(GiB)": 34.88, "step": 19680, "train_speed(iter/s)": 0.421123 }, { "acc": 0.87188969, "epoch": 0.5329921750196301, "grad_norm": 24.12001609802246, "learning_rate": 9.811753470405986e-06, "loss": 0.61645474, "memory(GiB)": 34.88, "step": 19685, "train_speed(iter/s)": 0.421127 }, { "acc": 0.85924253, "epoch": 0.5331275553028457, "grad_norm": 7.300251007080078, "learning_rate": 9.811601337333145e-06, "loss": 0.71540418, "memory(GiB)": 34.88, "step": 19690, "train_speed(iter/s)": 0.42113 }, { "acc": 0.85159988, "epoch": 0.5332629355860612, "grad_norm": 7.297908306121826, "learning_rate": 9.811449143991808e-06, "loss": 0.72524333, "memory(GiB)": 34.88, "step": 19695, "train_speed(iter/s)": 0.421133 }, { "acc": 0.83454008, "epoch": 0.5333983158692768, "grad_norm": 5.268502712249756, "learning_rate": 9.81129689038388e-06, "loss": 0.82627344, "memory(GiB)": 34.88, "step": 19700, "train_speed(iter/s)": 0.421135 }, { "acc": 0.89791937, "epoch": 0.5335336961524924, "grad_norm": 6.066640377044678, "learning_rate": 9.811144576511265e-06, "loss": 0.49711366, "memory(GiB)": 34.88, "step": 19705, "train_speed(iter/s)": 0.421139 }, { "acc": 0.84640951, "epoch": 0.5336690764357079, "grad_norm": 23.00495719909668, "learning_rate": 9.810992202375873e-06, "loss": 0.81406307, "memory(GiB)": 34.88, "step": 19710, "train_speed(iter/s)": 0.421142 }, { "acc": 0.85855474, "epoch": 0.5338044567189234, "grad_norm": 15.719365119934082, "learning_rate": 9.810839767979616e-06, "loss": 0.77723904, "memory(GiB)": 34.88, "step": 19715, "train_speed(iter/s)": 0.421146 }, { "acc": 0.85202408, "epoch": 0.533939837002139, "grad_norm": 4.485607147216797, "learning_rate": 9.8106872733244e-06, "loss": 0.7518301, "memory(GiB)": 34.88, "step": 19720, "train_speed(iter/s)": 0.421149 }, { "acc": 0.85652199, "epoch": 0.5340752172853546, "grad_norm": 29.33757972717285, "learning_rate": 9.810534718412136e-06, "loss": 0.76553659, "memory(GiB)": 34.88, "step": 19725, "train_speed(iter/s)": 0.421153 }, { "acc": 0.87429848, "epoch": 0.5342105975685701, "grad_norm": 6.533641338348389, "learning_rate": 9.810382103244737e-06, "loss": 0.70955105, "memory(GiB)": 34.88, "step": 19730, "train_speed(iter/s)": 0.421156 }, { "acc": 0.85118675, "epoch": 0.5343459778517856, "grad_norm": 10.494531631469727, "learning_rate": 9.810229427824113e-06, "loss": 0.75525851, "memory(GiB)": 34.88, "step": 19735, "train_speed(iter/s)": 0.421159 }, { "acc": 0.85176306, "epoch": 0.5344813581350012, "grad_norm": 10.705741882324219, "learning_rate": 9.810076692152175e-06, "loss": 0.84453125, "memory(GiB)": 34.88, "step": 19740, "train_speed(iter/s)": 0.421163 }, { "acc": 0.87285051, "epoch": 0.5346167384182168, "grad_norm": 9.777936935424805, "learning_rate": 9.80992389623084e-06, "loss": 0.59166269, "memory(GiB)": 34.88, "step": 19745, "train_speed(iter/s)": 0.421165 }, { "acc": 0.8641366, "epoch": 0.5347521187014324, "grad_norm": 9.100300788879395, "learning_rate": 9.80977104006202e-06, "loss": 0.66841459, "memory(GiB)": 34.88, "step": 19750, "train_speed(iter/s)": 0.421168 }, { "acc": 0.86830482, "epoch": 0.5348874989846478, "grad_norm": 11.912684440612793, "learning_rate": 9.80961812364763e-06, "loss": 0.64599361, "memory(GiB)": 34.88, "step": 19755, "train_speed(iter/s)": 0.421171 }, { "acc": 0.86261406, "epoch": 0.5350228792678634, "grad_norm": 5.873701095581055, "learning_rate": 9.809465146989584e-06, "loss": 0.69354954, "memory(GiB)": 34.88, "step": 19760, "train_speed(iter/s)": 0.421174 }, { "acc": 0.84599171, "epoch": 0.535158259551079, "grad_norm": 6.0549702644348145, "learning_rate": 9.809312110089801e-06, "loss": 0.72356081, "memory(GiB)": 34.88, "step": 19765, "train_speed(iter/s)": 0.421178 }, { "acc": 0.87098522, "epoch": 0.5352936398342946, "grad_norm": 7.969841957092285, "learning_rate": 9.809159012950199e-06, "loss": 0.68086386, "memory(GiB)": 34.88, "step": 19770, "train_speed(iter/s)": 0.42118 }, { "acc": 0.85758553, "epoch": 0.53542902011751, "grad_norm": 10.705230712890625, "learning_rate": 9.809005855572692e-06, "loss": 0.78260193, "memory(GiB)": 34.88, "step": 19775, "train_speed(iter/s)": 0.421184 }, { "acc": 0.85906839, "epoch": 0.5355644004007256, "grad_norm": 4.648877143859863, "learning_rate": 9.8088526379592e-06, "loss": 0.69120822, "memory(GiB)": 34.88, "step": 19780, "train_speed(iter/s)": 0.421187 }, { "acc": 0.87186451, "epoch": 0.5356997806839412, "grad_norm": 6.382997035980225, "learning_rate": 9.808699360111645e-06, "loss": 0.59470263, "memory(GiB)": 34.88, "step": 19785, "train_speed(iter/s)": 0.42119 }, { "acc": 0.84465599, "epoch": 0.5358351609671568, "grad_norm": 10.433247566223145, "learning_rate": 9.808546022031942e-06, "loss": 0.79549732, "memory(GiB)": 34.88, "step": 19790, "train_speed(iter/s)": 0.421193 }, { "acc": 0.86657448, "epoch": 0.5359705412503722, "grad_norm": 9.703766822814941, "learning_rate": 9.808392623722017e-06, "loss": 0.68411722, "memory(GiB)": 34.88, "step": 19795, "train_speed(iter/s)": 0.421197 }, { "acc": 0.8755188, "epoch": 0.5361059215335878, "grad_norm": 15.305314064025879, "learning_rate": 9.808239165183787e-06, "loss": 0.5942627, "memory(GiB)": 34.88, "step": 19800, "train_speed(iter/s)": 0.4212 }, { "acc": 0.84961576, "epoch": 0.5362413018168034, "grad_norm": 7.898400783538818, "learning_rate": 9.808085646419178e-06, "loss": 0.79757948, "memory(GiB)": 34.88, "step": 19805, "train_speed(iter/s)": 0.421204 }, { "acc": 0.8438653, "epoch": 0.536376682100019, "grad_norm": 9.104391098022461, "learning_rate": 9.807932067430113e-06, "loss": 0.84245567, "memory(GiB)": 34.88, "step": 19810, "train_speed(iter/s)": 0.421207 }, { "acc": 0.85500498, "epoch": 0.5365120623832345, "grad_norm": 10.502467155456543, "learning_rate": 9.807778428218514e-06, "loss": 0.79306788, "memory(GiB)": 34.88, "step": 19815, "train_speed(iter/s)": 0.42121 }, { "acc": 0.85022182, "epoch": 0.53664744266645, "grad_norm": 9.363059043884277, "learning_rate": 9.807624728786307e-06, "loss": 0.77564955, "memory(GiB)": 34.88, "step": 19820, "train_speed(iter/s)": 0.421213 }, { "acc": 0.86835537, "epoch": 0.5367828229496656, "grad_norm": 7.02459192276001, "learning_rate": 9.807470969135415e-06, "loss": 0.67558413, "memory(GiB)": 34.88, "step": 19825, "train_speed(iter/s)": 0.421217 }, { "acc": 0.86383038, "epoch": 0.5369182032328812, "grad_norm": 5.66875696182251, "learning_rate": 9.807317149267768e-06, "loss": 0.77158847, "memory(GiB)": 34.88, "step": 19830, "train_speed(iter/s)": 0.42122 }, { "acc": 0.84667082, "epoch": 0.5370535835160967, "grad_norm": 9.735119819641113, "learning_rate": 9.807163269185289e-06, "loss": 0.81812878, "memory(GiB)": 34.88, "step": 19835, "train_speed(iter/s)": 0.421224 }, { "acc": 0.86495514, "epoch": 0.5371889637993122, "grad_norm": 7.904820442199707, "learning_rate": 9.807009328889907e-06, "loss": 0.71032476, "memory(GiB)": 34.88, "step": 19840, "train_speed(iter/s)": 0.421227 }, { "acc": 0.85841341, "epoch": 0.5373243440825278, "grad_norm": 6.8166584968566895, "learning_rate": 9.806855328383555e-06, "loss": 0.70676932, "memory(GiB)": 34.88, "step": 19845, "train_speed(iter/s)": 0.421231 }, { "acc": 0.84420319, "epoch": 0.5374597243657434, "grad_norm": 8.108226776123047, "learning_rate": 9.806701267668154e-06, "loss": 0.868958, "memory(GiB)": 34.88, "step": 19850, "train_speed(iter/s)": 0.421235 }, { "acc": 0.83292103, "epoch": 0.5375951046489589, "grad_norm": 6.4738335609436035, "learning_rate": 9.806547146745638e-06, "loss": 0.97796116, "memory(GiB)": 34.88, "step": 19855, "train_speed(iter/s)": 0.421238 }, { "acc": 0.85526295, "epoch": 0.5377304849321745, "grad_norm": 8.25489616394043, "learning_rate": 9.80639296561794e-06, "loss": 0.7873827, "memory(GiB)": 34.88, "step": 19860, "train_speed(iter/s)": 0.42124 }, { "acc": 0.85088062, "epoch": 0.53786586521539, "grad_norm": 6.333637714385986, "learning_rate": 9.806238724286989e-06, "loss": 0.77134528, "memory(GiB)": 34.88, "step": 19865, "train_speed(iter/s)": 0.421244 }, { "acc": 0.84127512, "epoch": 0.5380012454986056, "grad_norm": 12.365283966064453, "learning_rate": 9.806084422754715e-06, "loss": 0.83902025, "memory(GiB)": 34.88, "step": 19870, "train_speed(iter/s)": 0.421247 }, { "acc": 0.86480942, "epoch": 0.5381366257818211, "grad_norm": 16.300678253173828, "learning_rate": 9.805930061023056e-06, "loss": 0.72843962, "memory(GiB)": 34.88, "step": 19875, "train_speed(iter/s)": 0.421251 }, { "acc": 0.87197323, "epoch": 0.5382720060650367, "grad_norm": 10.179540634155273, "learning_rate": 9.805775639093941e-06, "loss": 0.63293409, "memory(GiB)": 34.88, "step": 19880, "train_speed(iter/s)": 0.421253 }, { "acc": 0.85494938, "epoch": 0.5384073863482522, "grad_norm": 6.083385944366455, "learning_rate": 9.805621156969305e-06, "loss": 0.69825468, "memory(GiB)": 34.88, "step": 19885, "train_speed(iter/s)": 0.421256 }, { "acc": 0.84248552, "epoch": 0.5385427666314678, "grad_norm": 12.856338500976562, "learning_rate": 9.805466614651086e-06, "loss": 0.93329077, "memory(GiB)": 34.88, "step": 19890, "train_speed(iter/s)": 0.421259 }, { "acc": 0.86570215, "epoch": 0.5386781469146833, "grad_norm": 11.392653465270996, "learning_rate": 9.805312012141216e-06, "loss": 0.81124935, "memory(GiB)": 34.88, "step": 19895, "train_speed(iter/s)": 0.421263 }, { "acc": 0.85406246, "epoch": 0.5388135271978989, "grad_norm": 14.411778450012207, "learning_rate": 9.805157349441638e-06, "loss": 0.7638876, "memory(GiB)": 34.88, "step": 19900, "train_speed(iter/s)": 0.421266 }, { "acc": 0.84652014, "epoch": 0.5389489074811145, "grad_norm": 7.54341459274292, "learning_rate": 9.805002626554285e-06, "loss": 0.76862097, "memory(GiB)": 34.88, "step": 19905, "train_speed(iter/s)": 0.421269 }, { "acc": 0.84287491, "epoch": 0.53908428776433, "grad_norm": 7.232206344604492, "learning_rate": 9.804847843481094e-06, "loss": 0.74735513, "memory(GiB)": 34.88, "step": 19910, "train_speed(iter/s)": 0.421272 }, { "acc": 0.84334822, "epoch": 0.5392196680475455, "grad_norm": 6.01292610168457, "learning_rate": 9.804693000224007e-06, "loss": 0.75322747, "memory(GiB)": 34.88, "step": 19915, "train_speed(iter/s)": 0.421274 }, { "acc": 0.861059, "epoch": 0.5393550483307611, "grad_norm": 7.090649127960205, "learning_rate": 9.80453809678496e-06, "loss": 0.67903485, "memory(GiB)": 34.88, "step": 19920, "train_speed(iter/s)": 0.421277 }, { "acc": 0.84293232, "epoch": 0.5394904286139767, "grad_norm": 24.73075294494629, "learning_rate": 9.8043831331659e-06, "loss": 0.91939211, "memory(GiB)": 34.88, "step": 19925, "train_speed(iter/s)": 0.42128 }, { "acc": 0.85907373, "epoch": 0.5396258088971922, "grad_norm": 7.850395679473877, "learning_rate": 9.804228109368762e-06, "loss": 0.65870342, "memory(GiB)": 34.88, "step": 19930, "train_speed(iter/s)": 0.421283 }, { "acc": 0.84680576, "epoch": 0.5397611891804077, "grad_norm": 9.932194709777832, "learning_rate": 9.804073025395489e-06, "loss": 0.90717087, "memory(GiB)": 34.88, "step": 19935, "train_speed(iter/s)": 0.421286 }, { "acc": 0.85866032, "epoch": 0.5398965694636233, "grad_norm": 10.37386703491211, "learning_rate": 9.803917881248026e-06, "loss": 0.70712852, "memory(GiB)": 34.88, "step": 19940, "train_speed(iter/s)": 0.421289 }, { "acc": 0.85790091, "epoch": 0.5400319497468389, "grad_norm": 27.83890724182129, "learning_rate": 9.803762676928315e-06, "loss": 0.74057932, "memory(GiB)": 34.88, "step": 19945, "train_speed(iter/s)": 0.421292 }, { "acc": 0.84467983, "epoch": 0.5401673300300545, "grad_norm": 9.777619361877441, "learning_rate": 9.8036074124383e-06, "loss": 0.79944181, "memory(GiB)": 34.88, "step": 19950, "train_speed(iter/s)": 0.421295 }, { "acc": 0.85443363, "epoch": 0.5403027103132699, "grad_norm": 5.206300258636475, "learning_rate": 9.803452087779929e-06, "loss": 0.83353786, "memory(GiB)": 34.88, "step": 19955, "train_speed(iter/s)": 0.421298 }, { "acc": 0.85163155, "epoch": 0.5404380905964855, "grad_norm": 9.026618957519531, "learning_rate": 9.803296702955142e-06, "loss": 0.7939539, "memory(GiB)": 34.88, "step": 19960, "train_speed(iter/s)": 0.421302 }, { "acc": 0.86725273, "epoch": 0.5405734708797011, "grad_norm": 16.62590980529785, "learning_rate": 9.803141257965893e-06, "loss": 0.64464087, "memory(GiB)": 34.88, "step": 19965, "train_speed(iter/s)": 0.421305 }, { "acc": 0.84858522, "epoch": 0.5407088511629167, "grad_norm": 10.03516674041748, "learning_rate": 9.80298575281412e-06, "loss": 0.77728686, "memory(GiB)": 34.88, "step": 19970, "train_speed(iter/s)": 0.421309 }, { "acc": 0.88394794, "epoch": 0.5408442314461321, "grad_norm": 3.6993205547332764, "learning_rate": 9.802830187501779e-06, "loss": 0.53809109, "memory(GiB)": 34.88, "step": 19975, "train_speed(iter/s)": 0.42131 }, { "acc": 0.88328562, "epoch": 0.5409796117293477, "grad_norm": 4.648527145385742, "learning_rate": 9.802674562030817e-06, "loss": 0.54547734, "memory(GiB)": 34.88, "step": 19980, "train_speed(iter/s)": 0.421313 }, { "acc": 0.8505435, "epoch": 0.5411149920125633, "grad_norm": 9.769984245300293, "learning_rate": 9.80251887640318e-06, "loss": 0.74556475, "memory(GiB)": 34.88, "step": 19985, "train_speed(iter/s)": 0.421316 }, { "acc": 0.87742767, "epoch": 0.5412503722957789, "grad_norm": 5.352456092834473, "learning_rate": 9.802363130620824e-06, "loss": 0.65795298, "memory(GiB)": 34.88, "step": 19990, "train_speed(iter/s)": 0.421318 }, { "acc": 0.87314587, "epoch": 0.5413857525789944, "grad_norm": 3.499903917312622, "learning_rate": 9.802207324685692e-06, "loss": 0.60278659, "memory(GiB)": 34.88, "step": 19995, "train_speed(iter/s)": 0.421321 }, { "acc": 0.84438276, "epoch": 0.5415211328622099, "grad_norm": 10.783670425415039, "learning_rate": 9.802051458599743e-06, "loss": 0.84199429, "memory(GiB)": 34.88, "step": 20000, "train_speed(iter/s)": 0.421325 }, { "epoch": 0.5415211328622099, "eval_acc": 0.5749523314125676, "eval_loss": 1.0812546014785767, "eval_runtime": 1297.658, "eval_samples_per_second": 66.508, "eval_steps_per_second": 2.079, "step": 20000 }, { "acc": 0.87932053, "epoch": 0.5416565131454255, "grad_norm": 7.632540225982666, "learning_rate": 9.801895532364925e-06, "loss": 0.59612408, "memory(GiB)": 34.88, "step": 20005, "train_speed(iter/s)": 0.409946 }, { "acc": 0.86098537, "epoch": 0.5417918934286411, "grad_norm": 8.1634521484375, "learning_rate": 9.801739545983197e-06, "loss": 0.70320668, "memory(GiB)": 34.88, "step": 20010, "train_speed(iter/s)": 0.409952 }, { "acc": 0.86172848, "epoch": 0.5419272737118566, "grad_norm": 6.381533145904541, "learning_rate": 9.801583499456506e-06, "loss": 0.68179207, "memory(GiB)": 34.88, "step": 20015, "train_speed(iter/s)": 0.409957 }, { "acc": 0.86265411, "epoch": 0.5420626539950721, "grad_norm": 24.368120193481445, "learning_rate": 9.80142739278681e-06, "loss": 0.75104313, "memory(GiB)": 34.88, "step": 20020, "train_speed(iter/s)": 0.409963 }, { "acc": 0.84769011, "epoch": 0.5421980342782877, "grad_norm": 11.257024765014648, "learning_rate": 9.801271225976067e-06, "loss": 0.8513464, "memory(GiB)": 34.88, "step": 20025, "train_speed(iter/s)": 0.409969 }, { "acc": 0.87109699, "epoch": 0.5423334145615033, "grad_norm": 7.2188591957092285, "learning_rate": 9.801114999026228e-06, "loss": 0.67449765, "memory(GiB)": 34.88, "step": 20030, "train_speed(iter/s)": 0.409975 }, { "acc": 0.8829155, "epoch": 0.5424687948447188, "grad_norm": 4.916101455688477, "learning_rate": 9.800958711939256e-06, "loss": 0.58089752, "memory(GiB)": 34.88, "step": 20035, "train_speed(iter/s)": 0.409981 }, { "acc": 0.84858942, "epoch": 0.5426041751279344, "grad_norm": 4.329159259796143, "learning_rate": 9.800802364717105e-06, "loss": 0.74471302, "memory(GiB)": 34.88, "step": 20040, "train_speed(iter/s)": 0.409987 }, { "acc": 0.86245327, "epoch": 0.5427395554111499, "grad_norm": 6.4162797927856445, "learning_rate": 9.800645957361732e-06, "loss": 0.69829121, "memory(GiB)": 34.88, "step": 20045, "train_speed(iter/s)": 0.409993 }, { "acc": 0.86102161, "epoch": 0.5428749356943655, "grad_norm": 9.675399780273438, "learning_rate": 9.800489489875102e-06, "loss": 0.66831112, "memory(GiB)": 34.88, "step": 20050, "train_speed(iter/s)": 0.409998 }, { "acc": 0.86341953, "epoch": 0.543010315977581, "grad_norm": 12.19606876373291, "learning_rate": 9.80033296225917e-06, "loss": 0.74190316, "memory(GiB)": 34.88, "step": 20055, "train_speed(iter/s)": 0.410003 }, { "acc": 0.84321299, "epoch": 0.5431456962607966, "grad_norm": 11.675067901611328, "learning_rate": 9.800176374515898e-06, "loss": 0.86561928, "memory(GiB)": 34.88, "step": 20060, "train_speed(iter/s)": 0.410008 }, { "acc": 0.83233099, "epoch": 0.5432810765440121, "grad_norm": 10.7365083694458, "learning_rate": 9.800019726647246e-06, "loss": 0.87037067, "memory(GiB)": 34.88, "step": 20065, "train_speed(iter/s)": 0.410014 }, { "acc": 0.85821152, "epoch": 0.5434164568272277, "grad_norm": 15.705143928527832, "learning_rate": 9.799863018655182e-06, "loss": 0.71081848, "memory(GiB)": 34.88, "step": 20070, "train_speed(iter/s)": 0.41002 }, { "acc": 0.86866989, "epoch": 0.5435518371104432, "grad_norm": 6.291865825653076, "learning_rate": 9.799706250541665e-06, "loss": 0.60084777, "memory(GiB)": 34.88, "step": 20075, "train_speed(iter/s)": 0.410025 }, { "acc": 0.865028, "epoch": 0.5436872173936588, "grad_norm": 9.593670845031738, "learning_rate": 9.799549422308658e-06, "loss": 0.7357626, "memory(GiB)": 34.88, "step": 20080, "train_speed(iter/s)": 0.410031 }, { "acc": 0.85387535, "epoch": 0.5438225976768744, "grad_norm": 6.330348491668701, "learning_rate": 9.799392533958128e-06, "loss": 0.73901858, "memory(GiB)": 34.88, "step": 20085, "train_speed(iter/s)": 0.410037 }, { "acc": 0.87940235, "epoch": 0.5439579779600899, "grad_norm": 6.525903701782227, "learning_rate": 9.799235585492038e-06, "loss": 0.55172586, "memory(GiB)": 34.88, "step": 20090, "train_speed(iter/s)": 0.410043 }, { "acc": 0.86277981, "epoch": 0.5440933582433054, "grad_norm": 7.596674919128418, "learning_rate": 9.799078576912357e-06, "loss": 0.71308703, "memory(GiB)": 34.88, "step": 20095, "train_speed(iter/s)": 0.410049 }, { "acc": 0.8739069, "epoch": 0.544228738526521, "grad_norm": 6.174765586853027, "learning_rate": 9.798921508221048e-06, "loss": 0.65544105, "memory(GiB)": 34.88, "step": 20100, "train_speed(iter/s)": 0.410055 }, { "acc": 0.83740463, "epoch": 0.5443641188097366, "grad_norm": 13.020439147949219, "learning_rate": 9.798764379420082e-06, "loss": 0.83605175, "memory(GiB)": 34.88, "step": 20105, "train_speed(iter/s)": 0.41006 }, { "acc": 0.86685505, "epoch": 0.5444994990929521, "grad_norm": 11.60455322265625, "learning_rate": 9.798607190511427e-06, "loss": 0.62728195, "memory(GiB)": 34.88, "step": 20110, "train_speed(iter/s)": 0.410066 }, { "acc": 0.87411194, "epoch": 0.5446348793761676, "grad_norm": 12.958128929138184, "learning_rate": 9.79844994149705e-06, "loss": 0.6367631, "memory(GiB)": 34.88, "step": 20115, "train_speed(iter/s)": 0.410072 }, { "acc": 0.84007568, "epoch": 0.5447702596593832, "grad_norm": 6.610434532165527, "learning_rate": 9.79829263237892e-06, "loss": 0.7958045, "memory(GiB)": 34.88, "step": 20120, "train_speed(iter/s)": 0.410077 }, { "acc": 0.86008787, "epoch": 0.5449056399425988, "grad_norm": 10.056429862976074, "learning_rate": 9.798135263159013e-06, "loss": 0.62260351, "memory(GiB)": 34.88, "step": 20125, "train_speed(iter/s)": 0.410083 }, { "acc": 0.89318533, "epoch": 0.5450410202258144, "grad_norm": 3.785327434539795, "learning_rate": 9.797977833839295e-06, "loss": 0.46733017, "memory(GiB)": 34.88, "step": 20130, "train_speed(iter/s)": 0.410089 }, { "acc": 0.86815453, "epoch": 0.5451764005090298, "grad_norm": 9.891918182373047, "learning_rate": 9.79782034442174e-06, "loss": 0.73656521, "memory(GiB)": 34.88, "step": 20135, "train_speed(iter/s)": 0.410095 }, { "acc": 0.85882549, "epoch": 0.5453117807922454, "grad_norm": 13.358968734741211, "learning_rate": 9.797662794908324e-06, "loss": 0.67461538, "memory(GiB)": 34.88, "step": 20140, "train_speed(iter/s)": 0.410101 }, { "acc": 0.85538149, "epoch": 0.545447161075461, "grad_norm": 8.059294700622559, "learning_rate": 9.797505185301015e-06, "loss": 0.75678706, "memory(GiB)": 34.88, "step": 20145, "train_speed(iter/s)": 0.410106 }, { "acc": 0.86011238, "epoch": 0.5455825413586766, "grad_norm": 7.27199649810791, "learning_rate": 9.79734751560179e-06, "loss": 0.67748356, "memory(GiB)": 34.88, "step": 20150, "train_speed(iter/s)": 0.410111 }, { "acc": 0.86856365, "epoch": 0.545717921641892, "grad_norm": 5.153161525726318, "learning_rate": 9.797189785812624e-06, "loss": 0.68578892, "memory(GiB)": 34.88, "step": 20155, "train_speed(iter/s)": 0.410117 }, { "acc": 0.87353249, "epoch": 0.5458533019251076, "grad_norm": 8.649246215820312, "learning_rate": 9.797031995935493e-06, "loss": 0.60796204, "memory(GiB)": 34.88, "step": 20160, "train_speed(iter/s)": 0.410122 }, { "acc": 0.8685236, "epoch": 0.5459886822083232, "grad_norm": 15.807604789733887, "learning_rate": 9.796874145972374e-06, "loss": 0.6227756, "memory(GiB)": 34.88, "step": 20165, "train_speed(iter/s)": 0.410127 }, { "acc": 0.86078386, "epoch": 0.5461240624915388, "grad_norm": 8.418336868286133, "learning_rate": 9.796716235925245e-06, "loss": 0.71402869, "memory(GiB)": 34.88, "step": 20170, "train_speed(iter/s)": 0.410133 }, { "acc": 0.84845867, "epoch": 0.5462594427747542, "grad_norm": 6.136288166046143, "learning_rate": 9.796558265796081e-06, "loss": 0.7598464, "memory(GiB)": 34.88, "step": 20175, "train_speed(iter/s)": 0.410138 }, { "acc": 0.85757389, "epoch": 0.5463948230579698, "grad_norm": 11.112028121948242, "learning_rate": 9.796400235586864e-06, "loss": 0.74012794, "memory(GiB)": 34.88, "step": 20180, "train_speed(iter/s)": 0.410144 }, { "acc": 0.87300692, "epoch": 0.5465302033411854, "grad_norm": 6.848942279815674, "learning_rate": 9.796242145299572e-06, "loss": 0.57618198, "memory(GiB)": 34.88, "step": 20185, "train_speed(iter/s)": 0.410149 }, { "acc": 0.87580814, "epoch": 0.546665583624401, "grad_norm": 8.58360481262207, "learning_rate": 9.796083994936187e-06, "loss": 0.67048807, "memory(GiB)": 34.88, "step": 20190, "train_speed(iter/s)": 0.410156 }, { "acc": 0.85287075, "epoch": 0.5468009639076165, "grad_norm": 7.6241774559021, "learning_rate": 9.795925784498689e-06, "loss": 0.72934351, "memory(GiB)": 34.88, "step": 20195, "train_speed(iter/s)": 0.410161 }, { "acc": 0.86615353, "epoch": 0.546936344190832, "grad_norm": 8.691046714782715, "learning_rate": 9.795767513989059e-06, "loss": 0.69664922, "memory(GiB)": 34.88, "step": 20200, "train_speed(iter/s)": 0.410167 }, { "acc": 0.87897139, "epoch": 0.5470717244740476, "grad_norm": 2.233762264251709, "learning_rate": 9.795609183409282e-06, "loss": 0.5974967, "memory(GiB)": 34.88, "step": 20205, "train_speed(iter/s)": 0.410173 }, { "acc": 0.85101166, "epoch": 0.5472071047572632, "grad_norm": 9.234845161437988, "learning_rate": 9.79545079276134e-06, "loss": 0.79361925, "memory(GiB)": 34.88, "step": 20210, "train_speed(iter/s)": 0.410179 }, { "acc": 0.87011185, "epoch": 0.5473424850404787, "grad_norm": 5.8776116371154785, "learning_rate": 9.795292342047217e-06, "loss": 0.6199688, "memory(GiB)": 34.88, "step": 20215, "train_speed(iter/s)": 0.410185 }, { "acc": 0.84891691, "epoch": 0.5474778653236942, "grad_norm": 9.44606876373291, "learning_rate": 9.795133831268897e-06, "loss": 0.94894161, "memory(GiB)": 34.88, "step": 20220, "train_speed(iter/s)": 0.410189 }, { "acc": 0.86261692, "epoch": 0.5476132456069098, "grad_norm": 7.00400447845459, "learning_rate": 9.794975260428369e-06, "loss": 0.70457745, "memory(GiB)": 34.88, "step": 20225, "train_speed(iter/s)": 0.410194 }, { "acc": 0.86721783, "epoch": 0.5477486258901254, "grad_norm": 9.697824478149414, "learning_rate": 9.794816629527614e-06, "loss": 0.73104677, "memory(GiB)": 34.88, "step": 20230, "train_speed(iter/s)": 0.410199 }, { "acc": 0.88615446, "epoch": 0.5478840061733409, "grad_norm": 11.576656341552734, "learning_rate": 9.794657938568627e-06, "loss": 0.53470793, "memory(GiB)": 34.88, "step": 20235, "train_speed(iter/s)": 0.410205 }, { "acc": 0.8498333, "epoch": 0.5480193864565565, "grad_norm": 6.918018341064453, "learning_rate": 9.794499187553387e-06, "loss": 0.81119289, "memory(GiB)": 34.88, "step": 20240, "train_speed(iter/s)": 0.41021 }, { "acc": 0.85757751, "epoch": 0.548154766739772, "grad_norm": 35.70751190185547, "learning_rate": 9.79434037648389e-06, "loss": 0.75640326, "memory(GiB)": 34.88, "step": 20245, "train_speed(iter/s)": 0.410216 }, { "acc": 0.83378468, "epoch": 0.5482901470229876, "grad_norm": 11.002665519714355, "learning_rate": 9.794181505362121e-06, "loss": 0.88755713, "memory(GiB)": 34.88, "step": 20250, "train_speed(iter/s)": 0.410222 }, { "acc": 0.8378191, "epoch": 0.5484255273062031, "grad_norm": 14.87975025177002, "learning_rate": 9.794022574190072e-06, "loss": 0.85611286, "memory(GiB)": 34.88, "step": 20255, "train_speed(iter/s)": 0.410227 }, { "acc": 0.87932434, "epoch": 0.5485609075894187, "grad_norm": 9.155869483947754, "learning_rate": 9.793863582969733e-06, "loss": 0.57244329, "memory(GiB)": 34.88, "step": 20260, "train_speed(iter/s)": 0.410232 }, { "acc": 0.86928034, "epoch": 0.5486962878726342, "grad_norm": 6.901664733886719, "learning_rate": 9.793704531703096e-06, "loss": 0.67254853, "memory(GiB)": 34.88, "step": 20265, "train_speed(iter/s)": 0.410238 }, { "acc": 0.86377726, "epoch": 0.5488316681558498, "grad_norm": 6.4745941162109375, "learning_rate": 9.793545420392155e-06, "loss": 0.68036876, "memory(GiB)": 34.88, "step": 20270, "train_speed(iter/s)": 0.410244 }, { "acc": 0.86176605, "epoch": 0.5489670484390653, "grad_norm": 8.81794548034668, "learning_rate": 9.793386249038901e-06, "loss": 0.75417576, "memory(GiB)": 34.88, "step": 20275, "train_speed(iter/s)": 0.410249 }, { "acc": 0.86688175, "epoch": 0.5491024287222809, "grad_norm": 16.906734466552734, "learning_rate": 9.793227017645328e-06, "loss": 0.68292828, "memory(GiB)": 34.88, "step": 20280, "train_speed(iter/s)": 0.410255 }, { "acc": 0.84461956, "epoch": 0.5492378090054965, "grad_norm": 7.953946113586426, "learning_rate": 9.793067726213434e-06, "loss": 0.74397736, "memory(GiB)": 34.88, "step": 20285, "train_speed(iter/s)": 0.41026 }, { "acc": 0.85594759, "epoch": 0.549373189288712, "grad_norm": 6.652230739593506, "learning_rate": 9.792908374745209e-06, "loss": 0.686868, "memory(GiB)": 34.88, "step": 20290, "train_speed(iter/s)": 0.410264 }, { "acc": 0.85416203, "epoch": 0.5495085695719275, "grad_norm": 9.342848777770996, "learning_rate": 9.792748963242652e-06, "loss": 0.76780491, "memory(GiB)": 34.88, "step": 20295, "train_speed(iter/s)": 0.410269 }, { "acc": 0.86836662, "epoch": 0.5496439498551431, "grad_norm": 7.359367847442627, "learning_rate": 9.79258949170776e-06, "loss": 0.745926, "memory(GiB)": 34.88, "step": 20300, "train_speed(iter/s)": 0.410274 }, { "acc": 0.87979069, "epoch": 0.5497793301383587, "grad_norm": 5.003194332122803, "learning_rate": 9.792429960142533e-06, "loss": 0.5845005, "memory(GiB)": 34.88, "step": 20305, "train_speed(iter/s)": 0.41028 }, { "acc": 0.85962524, "epoch": 0.5499147104215742, "grad_norm": 7.1845245361328125, "learning_rate": 9.792270368548966e-06, "loss": 0.55683427, "memory(GiB)": 34.88, "step": 20310, "train_speed(iter/s)": 0.410284 }, { "acc": 0.858992, "epoch": 0.5500500907047897, "grad_norm": 11.085155487060547, "learning_rate": 9.792110716929059e-06, "loss": 0.73933382, "memory(GiB)": 34.88, "step": 20315, "train_speed(iter/s)": 0.41029 }, { "acc": 0.88124123, "epoch": 0.5501854709880053, "grad_norm": 4.802183628082275, "learning_rate": 9.791951005284813e-06, "loss": 0.65344572, "memory(GiB)": 34.88, "step": 20320, "train_speed(iter/s)": 0.410295 }, { "acc": 0.85496006, "epoch": 0.5503208512712209, "grad_norm": 16.097475051879883, "learning_rate": 9.791791233618225e-06, "loss": 0.77850075, "memory(GiB)": 34.88, "step": 20325, "train_speed(iter/s)": 0.410298 }, { "acc": 0.87138252, "epoch": 0.5504562315544365, "grad_norm": 9.853652954101562, "learning_rate": 9.791631401931304e-06, "loss": 0.65072355, "memory(GiB)": 34.88, "step": 20330, "train_speed(iter/s)": 0.4103 }, { "acc": 0.86065083, "epoch": 0.5505916118376519, "grad_norm": 9.746665954589844, "learning_rate": 9.791471510226044e-06, "loss": 0.85760393, "memory(GiB)": 34.88, "step": 20335, "train_speed(iter/s)": 0.410305 }, { "acc": 0.84109888, "epoch": 0.5507269921208675, "grad_norm": 11.120645523071289, "learning_rate": 9.791311558504453e-06, "loss": 0.84690456, "memory(GiB)": 34.88, "step": 20340, "train_speed(iter/s)": 0.410309 }, { "acc": 0.83599987, "epoch": 0.5508623724040831, "grad_norm": 24.10748291015625, "learning_rate": 9.791151546768534e-06, "loss": 0.93241415, "memory(GiB)": 34.88, "step": 20345, "train_speed(iter/s)": 0.410312 }, { "acc": 0.86941071, "epoch": 0.5509977526872987, "grad_norm": 7.364782333374023, "learning_rate": 9.79099147502029e-06, "loss": 0.67787185, "memory(GiB)": 34.88, "step": 20350, "train_speed(iter/s)": 0.410318 }, { "acc": 0.84321747, "epoch": 0.5511331329705141, "grad_norm": 13.33559513092041, "learning_rate": 9.790831343261726e-06, "loss": 0.80251036, "memory(GiB)": 34.88, "step": 20355, "train_speed(iter/s)": 0.410324 }, { "acc": 0.85951757, "epoch": 0.5512685132537297, "grad_norm": 16.761777877807617, "learning_rate": 9.790671151494851e-06, "loss": 0.75786533, "memory(GiB)": 34.88, "step": 20360, "train_speed(iter/s)": 0.410329 }, { "acc": 0.86976204, "epoch": 0.5514038935369453, "grad_norm": 6.774224758148193, "learning_rate": 9.790510899721667e-06, "loss": 0.65327878, "memory(GiB)": 34.88, "step": 20365, "train_speed(iter/s)": 0.410334 }, { "acc": 0.87325592, "epoch": 0.5515392738201609, "grad_norm": 12.630633354187012, "learning_rate": 9.790350587944187e-06, "loss": 0.66865916, "memory(GiB)": 34.88, "step": 20370, "train_speed(iter/s)": 0.410338 }, { "acc": 0.84361973, "epoch": 0.5516746541033763, "grad_norm": 13.81874942779541, "learning_rate": 9.790190216164415e-06, "loss": 0.77640653, "memory(GiB)": 34.88, "step": 20375, "train_speed(iter/s)": 0.410343 }, { "acc": 0.86691809, "epoch": 0.5518100343865919, "grad_norm": 9.21068000793457, "learning_rate": 9.790029784384361e-06, "loss": 0.68136868, "memory(GiB)": 34.88, "step": 20380, "train_speed(iter/s)": 0.410348 }, { "acc": 0.86424942, "epoch": 0.5519454146698075, "grad_norm": 15.691572189331055, "learning_rate": 9.789869292606035e-06, "loss": 0.64340277, "memory(GiB)": 34.88, "step": 20385, "train_speed(iter/s)": 0.410354 }, { "acc": 0.86379757, "epoch": 0.5520807949530231, "grad_norm": 15.847015380859375, "learning_rate": 9.789708740831449e-06, "loss": 0.71798902, "memory(GiB)": 34.88, "step": 20390, "train_speed(iter/s)": 0.410359 }, { "acc": 0.86194839, "epoch": 0.5522161752362386, "grad_norm": 5.790842056274414, "learning_rate": 9.789548129062611e-06, "loss": 0.70705862, "memory(GiB)": 34.88, "step": 20395, "train_speed(iter/s)": 0.410364 }, { "acc": 0.85447693, "epoch": 0.5523515555194541, "grad_norm": 10.760978698730469, "learning_rate": 9.789387457301534e-06, "loss": 0.67411461, "memory(GiB)": 34.88, "step": 20400, "train_speed(iter/s)": 0.410369 }, { "acc": 0.85894547, "epoch": 0.5524869358026697, "grad_norm": 9.857504844665527, "learning_rate": 9.789226725550234e-06, "loss": 0.76976681, "memory(GiB)": 34.88, "step": 20405, "train_speed(iter/s)": 0.41037 }, { "acc": 0.87016487, "epoch": 0.5526223160858853, "grad_norm": 9.23982048034668, "learning_rate": 9.78906593381072e-06, "loss": 0.71468363, "memory(GiB)": 34.88, "step": 20410, "train_speed(iter/s)": 0.410374 }, { "acc": 0.86648064, "epoch": 0.5527576963691008, "grad_norm": 8.375171661376953, "learning_rate": 9.78890508208501e-06, "loss": 0.60294542, "memory(GiB)": 34.88, "step": 20415, "train_speed(iter/s)": 0.41038 }, { "acc": 0.88071079, "epoch": 0.5528930766523164, "grad_norm": 17.510936737060547, "learning_rate": 9.788744170375116e-06, "loss": 0.65083809, "memory(GiB)": 34.88, "step": 20420, "train_speed(iter/s)": 0.410385 }, { "acc": 0.84759369, "epoch": 0.5530284569355319, "grad_norm": 8.603476524353027, "learning_rate": 9.788583198683055e-06, "loss": 0.72992196, "memory(GiB)": 34.88, "step": 20425, "train_speed(iter/s)": 0.41039 }, { "acc": 0.87008038, "epoch": 0.5531638372187475, "grad_norm": 7.773343563079834, "learning_rate": 9.788422167010843e-06, "loss": 0.75440102, "memory(GiB)": 34.88, "step": 20430, "train_speed(iter/s)": 0.410396 }, { "acc": 0.8502634, "epoch": 0.553299217501963, "grad_norm": 12.844236373901367, "learning_rate": 9.788261075360498e-06, "loss": 0.70678883, "memory(GiB)": 34.88, "step": 20435, "train_speed(iter/s)": 0.410401 }, { "acc": 0.88809929, "epoch": 0.5534345977851786, "grad_norm": 8.338385581970215, "learning_rate": 9.788099923734036e-06, "loss": 0.58999004, "memory(GiB)": 34.88, "step": 20440, "train_speed(iter/s)": 0.410405 }, { "acc": 0.8238369, "epoch": 0.5535699780683941, "grad_norm": 10.200430870056152, "learning_rate": 9.787938712133481e-06, "loss": 0.99390373, "memory(GiB)": 34.88, "step": 20445, "train_speed(iter/s)": 0.41041 }, { "acc": 0.84363041, "epoch": 0.5537053583516097, "grad_norm": 8.798730850219727, "learning_rate": 9.787777440560847e-06, "loss": 0.78933425, "memory(GiB)": 34.88, "step": 20450, "train_speed(iter/s)": 0.410416 }, { "acc": 0.85026655, "epoch": 0.5538407386348252, "grad_norm": 15.55961799621582, "learning_rate": 9.787616109018156e-06, "loss": 0.83199291, "memory(GiB)": 34.88, "step": 20455, "train_speed(iter/s)": 0.410422 }, { "acc": 0.85699425, "epoch": 0.5539761189180408, "grad_norm": 10.210975646972656, "learning_rate": 9.787454717507428e-06, "loss": 0.73904743, "memory(GiB)": 34.88, "step": 20460, "train_speed(iter/s)": 0.410427 }, { "acc": 0.8517992, "epoch": 0.5541114992012564, "grad_norm": 15.452583312988281, "learning_rate": 9.787293266030687e-06, "loss": 0.71577878, "memory(GiB)": 34.88, "step": 20465, "train_speed(iter/s)": 0.410432 }, { "acc": 0.84686651, "epoch": 0.5542468794844719, "grad_norm": 5.855714797973633, "learning_rate": 9.787131754589955e-06, "loss": 0.6923202, "memory(GiB)": 34.88, "step": 20470, "train_speed(iter/s)": 0.410437 }, { "acc": 0.89009514, "epoch": 0.5543822597676874, "grad_norm": 14.96107006072998, "learning_rate": 9.786970183187252e-06, "loss": 0.48218875, "memory(GiB)": 34.88, "step": 20475, "train_speed(iter/s)": 0.410442 }, { "acc": 0.83605814, "epoch": 0.554517640050903, "grad_norm": 13.23825454711914, "learning_rate": 9.786808551824605e-06, "loss": 0.82596722, "memory(GiB)": 34.88, "step": 20480, "train_speed(iter/s)": 0.410447 }, { "acc": 0.88236189, "epoch": 0.5546530203341186, "grad_norm": 13.328871726989746, "learning_rate": 9.78664686050404e-06, "loss": 0.4490716, "memory(GiB)": 34.88, "step": 20485, "train_speed(iter/s)": 0.410453 }, { "acc": 0.85982685, "epoch": 0.5547884006173341, "grad_norm": 4.67385196685791, "learning_rate": 9.786485109227581e-06, "loss": 0.63287897, "memory(GiB)": 34.88, "step": 20490, "train_speed(iter/s)": 0.410459 }, { "acc": 0.85451059, "epoch": 0.5549237809005496, "grad_norm": 5.723808288574219, "learning_rate": 9.786323297997252e-06, "loss": 0.7387259, "memory(GiB)": 34.88, "step": 20495, "train_speed(iter/s)": 0.410464 }, { "acc": 0.86323681, "epoch": 0.5550591611837652, "grad_norm": 6.603364944458008, "learning_rate": 9.786161426815085e-06, "loss": 0.62291498, "memory(GiB)": 34.88, "step": 20500, "train_speed(iter/s)": 0.41047 }, { "acc": 0.8532877, "epoch": 0.5551945414669808, "grad_norm": 19.647056579589844, "learning_rate": 9.785999495683103e-06, "loss": 0.76298313, "memory(GiB)": 34.88, "step": 20505, "train_speed(iter/s)": 0.410476 }, { "acc": 0.85308075, "epoch": 0.5553299217501964, "grad_norm": 4.735311031341553, "learning_rate": 9.785837504603336e-06, "loss": 0.72303982, "memory(GiB)": 34.88, "step": 20510, "train_speed(iter/s)": 0.410481 }, { "acc": 0.86538391, "epoch": 0.5554653020334118, "grad_norm": 10.854740142822266, "learning_rate": 9.785675453577814e-06, "loss": 0.72005472, "memory(GiB)": 34.88, "step": 20515, "train_speed(iter/s)": 0.410487 }, { "acc": 0.8682827, "epoch": 0.5556006823166274, "grad_norm": 3.7906787395477295, "learning_rate": 9.785513342608566e-06, "loss": 0.65766077, "memory(GiB)": 34.88, "step": 20520, "train_speed(iter/s)": 0.410493 }, { "acc": 0.85666723, "epoch": 0.555736062599843, "grad_norm": 7.600958824157715, "learning_rate": 9.785351171697624e-06, "loss": 0.82824383, "memory(GiB)": 34.88, "step": 20525, "train_speed(iter/s)": 0.410498 }, { "acc": 0.87332916, "epoch": 0.5558714428830586, "grad_norm": 6.252861499786377, "learning_rate": 9.78518894084702e-06, "loss": 0.63753052, "memory(GiB)": 34.88, "step": 20530, "train_speed(iter/s)": 0.410503 }, { "acc": 0.85391369, "epoch": 0.556006823166274, "grad_norm": 14.31906795501709, "learning_rate": 9.785026650058783e-06, "loss": 0.70156145, "memory(GiB)": 34.88, "step": 20535, "train_speed(iter/s)": 0.410508 }, { "acc": 0.84474516, "epoch": 0.5561422034494896, "grad_norm": 11.481414794921875, "learning_rate": 9.78486429933495e-06, "loss": 0.77487602, "memory(GiB)": 34.88, "step": 20540, "train_speed(iter/s)": 0.410513 }, { "acc": 0.87698803, "epoch": 0.5562775837327052, "grad_norm": 7.25154447555542, "learning_rate": 9.784701888677552e-06, "loss": 0.59136553, "memory(GiB)": 34.88, "step": 20545, "train_speed(iter/s)": 0.410519 }, { "acc": 0.86829157, "epoch": 0.5564129640159208, "grad_norm": 6.009446620941162, "learning_rate": 9.784539418088625e-06, "loss": 0.67395954, "memory(GiB)": 34.88, "step": 20550, "train_speed(iter/s)": 0.410525 }, { "acc": 0.84767246, "epoch": 0.5565483442991362, "grad_norm": 8.86365795135498, "learning_rate": 9.784376887570201e-06, "loss": 0.83857574, "memory(GiB)": 34.88, "step": 20555, "train_speed(iter/s)": 0.410531 }, { "acc": 0.87395878, "epoch": 0.5566837245823518, "grad_norm": 9.264730453491211, "learning_rate": 9.784214297124322e-06, "loss": 0.67691541, "memory(GiB)": 34.88, "step": 20560, "train_speed(iter/s)": 0.410537 }, { "acc": 0.85761814, "epoch": 0.5568191048655674, "grad_norm": 6.731354236602783, "learning_rate": 9.784051646753021e-06, "loss": 0.71299596, "memory(GiB)": 34.88, "step": 20565, "train_speed(iter/s)": 0.410542 }, { "acc": 0.87713575, "epoch": 0.556954485148783, "grad_norm": 7.640605926513672, "learning_rate": 9.783888936458336e-06, "loss": 0.64506979, "memory(GiB)": 34.88, "step": 20570, "train_speed(iter/s)": 0.410547 }, { "acc": 0.87789421, "epoch": 0.5570898654319985, "grad_norm": 5.359803676605225, "learning_rate": 9.783726166242303e-06, "loss": 0.66790028, "memory(GiB)": 34.88, "step": 20575, "train_speed(iter/s)": 0.410553 }, { "acc": 0.84072094, "epoch": 0.557225245715214, "grad_norm": 8.130912780761719, "learning_rate": 9.783563336106966e-06, "loss": 0.80056486, "memory(GiB)": 34.88, "step": 20580, "train_speed(iter/s)": 0.410558 }, { "acc": 0.85534163, "epoch": 0.5573606259984296, "grad_norm": 9.527637481689453, "learning_rate": 9.78340044605436e-06, "loss": 0.79558396, "memory(GiB)": 34.88, "step": 20585, "train_speed(iter/s)": 0.410563 }, { "acc": 0.85448399, "epoch": 0.5574960062816452, "grad_norm": 10.199590682983398, "learning_rate": 9.783237496086527e-06, "loss": 0.62358351, "memory(GiB)": 34.88, "step": 20590, "train_speed(iter/s)": 0.410569 }, { "acc": 0.88578796, "epoch": 0.5576313865648607, "grad_norm": 11.904385566711426, "learning_rate": 9.783074486205511e-06, "loss": 0.56727133, "memory(GiB)": 34.88, "step": 20595, "train_speed(iter/s)": 0.410575 }, { "acc": 0.88095598, "epoch": 0.5577667668480762, "grad_norm": 6.541842460632324, "learning_rate": 9.782911416413351e-06, "loss": 0.60214372, "memory(GiB)": 34.88, "step": 20600, "train_speed(iter/s)": 0.41058 }, { "acc": 0.87246246, "epoch": 0.5579021471312918, "grad_norm": 5.381481647491455, "learning_rate": 9.78274828671209e-06, "loss": 0.64771147, "memory(GiB)": 34.88, "step": 20605, "train_speed(iter/s)": 0.410585 }, { "acc": 0.8592947, "epoch": 0.5580375274145074, "grad_norm": 8.718403816223145, "learning_rate": 9.782585097103773e-06, "loss": 0.78578024, "memory(GiB)": 34.88, "step": 20610, "train_speed(iter/s)": 0.41059 }, { "acc": 0.86184168, "epoch": 0.5581729076977229, "grad_norm": 6.870345592498779, "learning_rate": 9.782421847590443e-06, "loss": 0.68403039, "memory(GiB)": 34.88, "step": 20615, "train_speed(iter/s)": 0.410595 }, { "acc": 0.83765297, "epoch": 0.5583082879809385, "grad_norm": 7.713947772979736, "learning_rate": 9.782258538174145e-06, "loss": 0.87420607, "memory(GiB)": 34.88, "step": 20620, "train_speed(iter/s)": 0.410601 }, { "acc": 0.84355497, "epoch": 0.558443668264154, "grad_norm": 10.576641082763672, "learning_rate": 9.782095168856925e-06, "loss": 0.92910728, "memory(GiB)": 34.88, "step": 20625, "train_speed(iter/s)": 0.410605 }, { "acc": 0.85350819, "epoch": 0.5585790485473696, "grad_norm": 6.076261043548584, "learning_rate": 9.781931739640832e-06, "loss": 0.76605144, "memory(GiB)": 34.88, "step": 20630, "train_speed(iter/s)": 0.41061 }, { "acc": 0.86584892, "epoch": 0.5587144288305851, "grad_norm": 10.466280937194824, "learning_rate": 9.781768250527906e-06, "loss": 0.72239456, "memory(GiB)": 34.88, "step": 20635, "train_speed(iter/s)": 0.410616 }, { "acc": 0.84552526, "epoch": 0.5588498091138007, "grad_norm": 7.2318501472473145, "learning_rate": 9.781604701520206e-06, "loss": 0.85305386, "memory(GiB)": 34.88, "step": 20640, "train_speed(iter/s)": 0.410621 }, { "acc": 0.85562668, "epoch": 0.5589851893970162, "grad_norm": 11.081694602966309, "learning_rate": 9.781441092619771e-06, "loss": 0.78213725, "memory(GiB)": 34.88, "step": 20645, "train_speed(iter/s)": 0.410627 }, { "acc": 0.8316597, "epoch": 0.5591205696802317, "grad_norm": 10.906920433044434, "learning_rate": 9.781277423828656e-06, "loss": 0.83619347, "memory(GiB)": 34.88, "step": 20650, "train_speed(iter/s)": 0.410632 }, { "acc": 0.84432888, "epoch": 0.5592559499634473, "grad_norm": 6.269920825958252, "learning_rate": 9.781113695148909e-06, "loss": 0.81979074, "memory(GiB)": 34.88, "step": 20655, "train_speed(iter/s)": 0.410637 }, { "acc": 0.86655006, "epoch": 0.5593913302466629, "grad_norm": 11.628018379211426, "learning_rate": 9.780949906582581e-06, "loss": 0.7148304, "memory(GiB)": 34.88, "step": 20660, "train_speed(iter/s)": 0.410643 }, { "acc": 0.87093277, "epoch": 0.5595267105298785, "grad_norm": 6.739948272705078, "learning_rate": 9.780786058131723e-06, "loss": 0.59228282, "memory(GiB)": 34.88, "step": 20665, "train_speed(iter/s)": 0.410648 }, { "acc": 0.87884922, "epoch": 0.5596620908130939, "grad_norm": 7.155500888824463, "learning_rate": 9.780622149798391e-06, "loss": 0.58875074, "memory(GiB)": 34.88, "step": 20670, "train_speed(iter/s)": 0.410653 }, { "acc": 0.84570208, "epoch": 0.5597974710963095, "grad_norm": 7.272172927856445, "learning_rate": 9.780458181584637e-06, "loss": 0.75175705, "memory(GiB)": 34.88, "step": 20675, "train_speed(iter/s)": 0.410659 }, { "acc": 0.86949797, "epoch": 0.5599328513795251, "grad_norm": 6.283803462982178, "learning_rate": 9.780294153492513e-06, "loss": 0.62825947, "memory(GiB)": 34.88, "step": 20680, "train_speed(iter/s)": 0.410664 }, { "acc": 0.86944847, "epoch": 0.5600682316627407, "grad_norm": 8.13438606262207, "learning_rate": 9.780130065524074e-06, "loss": 0.61265087, "memory(GiB)": 34.88, "step": 20685, "train_speed(iter/s)": 0.410669 }, { "acc": 0.85996399, "epoch": 0.5602036119459561, "grad_norm": 7.908982276916504, "learning_rate": 9.779965917681377e-06, "loss": 0.74459496, "memory(GiB)": 34.88, "step": 20690, "train_speed(iter/s)": 0.410675 }, { "acc": 0.85742416, "epoch": 0.5603389922291717, "grad_norm": 16.619903564453125, "learning_rate": 9.779801709966477e-06, "loss": 0.7508935, "memory(GiB)": 34.88, "step": 20695, "train_speed(iter/s)": 0.410679 }, { "acc": 0.89052801, "epoch": 0.5604743725123873, "grad_norm": 7.740045547485352, "learning_rate": 9.779637442381434e-06, "loss": 0.48754244, "memory(GiB)": 34.88, "step": 20700, "train_speed(iter/s)": 0.410685 }, { "acc": 0.86127567, "epoch": 0.5606097527956029, "grad_norm": 19.944461822509766, "learning_rate": 9.779473114928303e-06, "loss": 0.70910349, "memory(GiB)": 34.88, "step": 20705, "train_speed(iter/s)": 0.41069 }, { "acc": 0.84931421, "epoch": 0.5607451330788183, "grad_norm": 5.910139560699463, "learning_rate": 9.779308727609142e-06, "loss": 0.80624695, "memory(GiB)": 34.88, "step": 20710, "train_speed(iter/s)": 0.410695 }, { "acc": 0.85869732, "epoch": 0.5608805133620339, "grad_norm": 18.512426376342773, "learning_rate": 9.779144280426012e-06, "loss": 0.76182947, "memory(GiB)": 34.88, "step": 20715, "train_speed(iter/s)": 0.410701 }, { "acc": 0.87971401, "epoch": 0.5610158936452495, "grad_norm": 9.647372245788574, "learning_rate": 9.778979773380971e-06, "loss": 0.61003523, "memory(GiB)": 34.88, "step": 20720, "train_speed(iter/s)": 0.410706 }, { "acc": 0.85938606, "epoch": 0.5611512739284651, "grad_norm": 10.401695251464844, "learning_rate": 9.778815206476083e-06, "loss": 0.60193405, "memory(GiB)": 34.88, "step": 20725, "train_speed(iter/s)": 0.410711 }, { "acc": 0.85605984, "epoch": 0.5612866542116806, "grad_norm": 13.85120677947998, "learning_rate": 9.778650579713406e-06, "loss": 0.73844852, "memory(GiB)": 34.88, "step": 20730, "train_speed(iter/s)": 0.410717 }, { "acc": 0.85903931, "epoch": 0.5614220344948961, "grad_norm": 7.472280502319336, "learning_rate": 9.778485893095006e-06, "loss": 0.71006041, "memory(GiB)": 34.88, "step": 20735, "train_speed(iter/s)": 0.410722 }, { "acc": 0.84504385, "epoch": 0.5615574147781117, "grad_norm": 7.331173896789551, "learning_rate": 9.778321146622943e-06, "loss": 0.77687469, "memory(GiB)": 34.88, "step": 20740, "train_speed(iter/s)": 0.410728 }, { "acc": 0.85017776, "epoch": 0.5616927950613273, "grad_norm": 9.433606147766113, "learning_rate": 9.778156340299282e-06, "loss": 0.69656591, "memory(GiB)": 34.88, "step": 20745, "train_speed(iter/s)": 0.410734 }, { "acc": 0.87836819, "epoch": 0.5618281753445428, "grad_norm": 6.361776828765869, "learning_rate": 9.777991474126086e-06, "loss": 0.62476039, "memory(GiB)": 34.88, "step": 20750, "train_speed(iter/s)": 0.410739 }, { "acc": 0.85881004, "epoch": 0.5619635556277583, "grad_norm": 11.21274471282959, "learning_rate": 9.777826548105423e-06, "loss": 0.79919696, "memory(GiB)": 34.88, "step": 20755, "train_speed(iter/s)": 0.410744 }, { "acc": 0.85731812, "epoch": 0.5620989359109739, "grad_norm": 9.61496639251709, "learning_rate": 9.777661562239357e-06, "loss": 0.74055376, "memory(GiB)": 34.88, "step": 20760, "train_speed(iter/s)": 0.410749 }, { "acc": 0.84859734, "epoch": 0.5622343161941895, "grad_norm": 8.303009033203125, "learning_rate": 9.777496516529956e-06, "loss": 0.7214313, "memory(GiB)": 34.88, "step": 20765, "train_speed(iter/s)": 0.410755 }, { "acc": 0.88545551, "epoch": 0.562369696477405, "grad_norm": 6.519348621368408, "learning_rate": 9.777331410979285e-06, "loss": 0.54193277, "memory(GiB)": 34.88, "step": 20770, "train_speed(iter/s)": 0.410761 }, { "acc": 0.86007061, "epoch": 0.5625050767606206, "grad_norm": 13.833438873291016, "learning_rate": 9.777166245589414e-06, "loss": 0.66696677, "memory(GiB)": 34.88, "step": 20775, "train_speed(iter/s)": 0.410765 }, { "acc": 0.85217457, "epoch": 0.5626404570438361, "grad_norm": 6.927941799163818, "learning_rate": 9.777001020362414e-06, "loss": 0.70738764, "memory(GiB)": 34.88, "step": 20780, "train_speed(iter/s)": 0.410771 }, { "acc": 0.86807156, "epoch": 0.5627758373270517, "grad_norm": 13.801663398742676, "learning_rate": 9.776835735300354e-06, "loss": 0.66237268, "memory(GiB)": 34.88, "step": 20785, "train_speed(iter/s)": 0.410777 }, { "acc": 0.84128437, "epoch": 0.5629112176102672, "grad_norm": 13.10226821899414, "learning_rate": 9.776670390405299e-06, "loss": 0.80593414, "memory(GiB)": 34.88, "step": 20790, "train_speed(iter/s)": 0.410783 }, { "acc": 0.88053455, "epoch": 0.5630465978934828, "grad_norm": 9.29199504852295, "learning_rate": 9.77650498567933e-06, "loss": 0.52485509, "memory(GiB)": 34.88, "step": 20795, "train_speed(iter/s)": 0.410788 }, { "acc": 0.86759129, "epoch": 0.5631819781766984, "grad_norm": 7.446674823760986, "learning_rate": 9.77633952112451e-06, "loss": 0.67612867, "memory(GiB)": 34.88, "step": 20800, "train_speed(iter/s)": 0.410793 }, { "acc": 0.88532257, "epoch": 0.5633173584599139, "grad_norm": 10.702444076538086, "learning_rate": 9.776173996742916e-06, "loss": 0.54472389, "memory(GiB)": 34.88, "step": 20805, "train_speed(iter/s)": 0.410798 }, { "acc": 0.86474438, "epoch": 0.5634527387431294, "grad_norm": 24.39348793029785, "learning_rate": 9.776008412536622e-06, "loss": 0.66065536, "memory(GiB)": 34.88, "step": 20810, "train_speed(iter/s)": 0.410803 }, { "acc": 0.8723259, "epoch": 0.563588119026345, "grad_norm": 6.902629852294922, "learning_rate": 9.775842768507701e-06, "loss": 0.66391187, "memory(GiB)": 34.88, "step": 20815, "train_speed(iter/s)": 0.410809 }, { "acc": 0.82682171, "epoch": 0.5637234993095606, "grad_norm": 7.789656639099121, "learning_rate": 9.775677064658229e-06, "loss": 0.9791379, "memory(GiB)": 34.88, "step": 20820, "train_speed(iter/s)": 0.410813 }, { "acc": 0.8759819, "epoch": 0.5638588795927761, "grad_norm": 5.980949401855469, "learning_rate": 9.77551130099028e-06, "loss": 0.68350205, "memory(GiB)": 34.88, "step": 20825, "train_speed(iter/s)": 0.410819 }, { "acc": 0.84240446, "epoch": 0.5639942598759916, "grad_norm": 5.694472312927246, "learning_rate": 9.775345477505931e-06, "loss": 0.93857765, "memory(GiB)": 34.88, "step": 20830, "train_speed(iter/s)": 0.410824 }, { "acc": 0.84330845, "epoch": 0.5641296401592072, "grad_norm": 12.267706871032715, "learning_rate": 9.775179594207262e-06, "loss": 0.82140446, "memory(GiB)": 34.88, "step": 20835, "train_speed(iter/s)": 0.410829 }, { "acc": 0.85854683, "epoch": 0.5642650204424228, "grad_norm": 49.636573791503906, "learning_rate": 9.775013651096347e-06, "loss": 0.77799425, "memory(GiB)": 34.88, "step": 20840, "train_speed(iter/s)": 0.410834 }, { "acc": 0.87743664, "epoch": 0.5644004007256384, "grad_norm": 3.7915568351745605, "learning_rate": 9.774847648175268e-06, "loss": 0.52695503, "memory(GiB)": 34.88, "step": 20845, "train_speed(iter/s)": 0.410839 }, { "acc": 0.84447193, "epoch": 0.5645357810088538, "grad_norm": 10.314929962158203, "learning_rate": 9.774681585446101e-06, "loss": 0.86751604, "memory(GiB)": 34.88, "step": 20850, "train_speed(iter/s)": 0.410845 }, { "acc": 0.86955757, "epoch": 0.5646711612920694, "grad_norm": 8.621872901916504, "learning_rate": 9.774515462910931e-06, "loss": 0.60784431, "memory(GiB)": 34.88, "step": 20855, "train_speed(iter/s)": 0.41085 }, { "acc": 0.86246262, "epoch": 0.564806541575285, "grad_norm": 6.107457637786865, "learning_rate": 9.774349280571835e-06, "loss": 0.66266866, "memory(GiB)": 34.88, "step": 20860, "train_speed(iter/s)": 0.410856 }, { "acc": 0.83709278, "epoch": 0.5649419218585006, "grad_norm": 11.103362083435059, "learning_rate": 9.774183038430895e-06, "loss": 0.83078918, "memory(GiB)": 34.88, "step": 20865, "train_speed(iter/s)": 0.410861 }, { "acc": 0.88678589, "epoch": 0.565077302141716, "grad_norm": 4.45075798034668, "learning_rate": 9.774016736490196e-06, "loss": 0.51736498, "memory(GiB)": 34.88, "step": 20870, "train_speed(iter/s)": 0.410867 }, { "acc": 0.84947348, "epoch": 0.5652126824249316, "grad_norm": 13.161428451538086, "learning_rate": 9.773850374751819e-06, "loss": 0.79458838, "memory(GiB)": 34.88, "step": 20875, "train_speed(iter/s)": 0.410872 }, { "acc": 0.88150444, "epoch": 0.5653480627081472, "grad_norm": 14.831478118896484, "learning_rate": 9.77368395321785e-06, "loss": 0.58510647, "memory(GiB)": 34.88, "step": 20880, "train_speed(iter/s)": 0.410877 }, { "acc": 0.87325392, "epoch": 0.5654834429913628, "grad_norm": 5.96509313583374, "learning_rate": 9.773517471890373e-06, "loss": 0.65274529, "memory(GiB)": 34.88, "step": 20885, "train_speed(iter/s)": 0.410883 }, { "acc": 0.87025099, "epoch": 0.5656188232745782, "grad_norm": 9.849081993103027, "learning_rate": 9.773350930771472e-06, "loss": 0.60924788, "memory(GiB)": 34.88, "step": 20890, "train_speed(iter/s)": 0.410888 }, { "acc": 0.87625771, "epoch": 0.5657542035577938, "grad_norm": 7.035236835479736, "learning_rate": 9.773184329863235e-06, "loss": 0.72891588, "memory(GiB)": 34.88, "step": 20895, "train_speed(iter/s)": 0.410894 }, { "acc": 0.83805447, "epoch": 0.5658895838410094, "grad_norm": 12.989825248718262, "learning_rate": 9.77301766916775e-06, "loss": 0.89825468, "memory(GiB)": 34.88, "step": 20900, "train_speed(iter/s)": 0.410899 }, { "acc": 0.85268927, "epoch": 0.566024964124225, "grad_norm": 21.257701873779297, "learning_rate": 9.7728509486871e-06, "loss": 0.7462697, "memory(GiB)": 34.88, "step": 20905, "train_speed(iter/s)": 0.410904 }, { "acc": 0.87766953, "epoch": 0.5661603444074405, "grad_norm": 5.818478584289551, "learning_rate": 9.772684168423379e-06, "loss": 0.63835087, "memory(GiB)": 34.88, "step": 20910, "train_speed(iter/s)": 0.410909 }, { "acc": 0.84621792, "epoch": 0.566295724690656, "grad_norm": 5.865543842315674, "learning_rate": 9.772517328378675e-06, "loss": 0.75278397, "memory(GiB)": 34.88, "step": 20915, "train_speed(iter/s)": 0.410914 }, { "acc": 0.8739356, "epoch": 0.5664311049738716, "grad_norm": 7.585170745849609, "learning_rate": 9.772350428555076e-06, "loss": 0.75783243, "memory(GiB)": 34.88, "step": 20920, "train_speed(iter/s)": 0.410919 }, { "acc": 0.86344404, "epoch": 0.5665664852570872, "grad_norm": 4.918925762176514, "learning_rate": 9.772183468954673e-06, "loss": 0.72609468, "memory(GiB)": 34.88, "step": 20925, "train_speed(iter/s)": 0.410925 }, { "acc": 0.82582407, "epoch": 0.5667018655403027, "grad_norm": 7.338334083557129, "learning_rate": 9.772016449579558e-06, "loss": 0.92108545, "memory(GiB)": 34.88, "step": 20930, "train_speed(iter/s)": 0.41093 }, { "acc": 0.88250246, "epoch": 0.5668372458235182, "grad_norm": 37.93569564819336, "learning_rate": 9.771849370431824e-06, "loss": 0.60166225, "memory(GiB)": 34.88, "step": 20935, "train_speed(iter/s)": 0.410936 }, { "acc": 0.84228764, "epoch": 0.5669726261067338, "grad_norm": 10.236199378967285, "learning_rate": 9.771682231513563e-06, "loss": 0.82189827, "memory(GiB)": 34.88, "step": 20940, "train_speed(iter/s)": 0.410941 }, { "acc": 0.84514742, "epoch": 0.5671080063899494, "grad_norm": 7.6726460456848145, "learning_rate": 9.771515032826871e-06, "loss": 0.78848228, "memory(GiB)": 34.88, "step": 20945, "train_speed(iter/s)": 0.410946 }, { "acc": 0.85029736, "epoch": 0.5672433866731649, "grad_norm": 14.418761253356934, "learning_rate": 9.771347774373839e-06, "loss": 0.82235212, "memory(GiB)": 34.88, "step": 20950, "train_speed(iter/s)": 0.410951 }, { "acc": 0.84615126, "epoch": 0.5673787669563805, "grad_norm": 12.623988151550293, "learning_rate": 9.771180456156562e-06, "loss": 0.81259375, "memory(GiB)": 34.88, "step": 20955, "train_speed(iter/s)": 0.410956 }, { "acc": 0.84427309, "epoch": 0.567514147239596, "grad_norm": 6.950362682342529, "learning_rate": 9.77101307817714e-06, "loss": 0.80817146, "memory(GiB)": 34.88, "step": 20960, "train_speed(iter/s)": 0.410961 }, { "acc": 0.85714684, "epoch": 0.5676495275228116, "grad_norm": 9.744222640991211, "learning_rate": 9.770845640437669e-06, "loss": 0.68461838, "memory(GiB)": 34.88, "step": 20965, "train_speed(iter/s)": 0.410966 }, { "acc": 0.87475529, "epoch": 0.5677849078060271, "grad_norm": 10.408045768737793, "learning_rate": 9.770678142940243e-06, "loss": 0.65468922, "memory(GiB)": 34.88, "step": 20970, "train_speed(iter/s)": 0.410971 }, { "acc": 0.86407356, "epoch": 0.5679202880892427, "grad_norm": 64.09368896484375, "learning_rate": 9.770510585686964e-06, "loss": 0.72380328, "memory(GiB)": 34.88, "step": 20975, "train_speed(iter/s)": 0.410976 }, { "acc": 0.88457375, "epoch": 0.5680556683724582, "grad_norm": 9.292304039001465, "learning_rate": 9.770342968679928e-06, "loss": 0.55140858, "memory(GiB)": 34.88, "step": 20980, "train_speed(iter/s)": 0.410981 }, { "acc": 0.8541337, "epoch": 0.5681910486556738, "grad_norm": 17.037382125854492, "learning_rate": 9.770175291921237e-06, "loss": 0.73321347, "memory(GiB)": 34.88, "step": 20985, "train_speed(iter/s)": 0.410986 }, { "acc": 0.87123766, "epoch": 0.5683264289388893, "grad_norm": 7.305670738220215, "learning_rate": 9.77000755541299e-06, "loss": 0.64912124, "memory(GiB)": 34.88, "step": 20990, "train_speed(iter/s)": 0.410991 }, { "acc": 0.90825462, "epoch": 0.5684618092221049, "grad_norm": 4.1601409912109375, "learning_rate": 9.76983975915729e-06, "loss": 0.44311576, "memory(GiB)": 34.88, "step": 20995, "train_speed(iter/s)": 0.410996 }, { "acc": 0.87162685, "epoch": 0.5685971895053205, "grad_norm": 7.286228179931641, "learning_rate": 9.769671903156235e-06, "loss": 0.57144184, "memory(GiB)": 34.88, "step": 21000, "train_speed(iter/s)": 0.411002 }, { "acc": 0.86719341, "epoch": 0.568732569788536, "grad_norm": 23.511112213134766, "learning_rate": 9.769503987411933e-06, "loss": 0.7018014, "memory(GiB)": 34.88, "step": 21005, "train_speed(iter/s)": 0.411007 }, { "acc": 0.88429289, "epoch": 0.5688679500717515, "grad_norm": 17.600980758666992, "learning_rate": 9.769336011926485e-06, "loss": 0.57081928, "memory(GiB)": 34.88, "step": 21010, "train_speed(iter/s)": 0.411013 }, { "acc": 0.86214142, "epoch": 0.5690033303549671, "grad_norm": 7.902711868286133, "learning_rate": 9.769167976701995e-06, "loss": 0.74462519, "memory(GiB)": 34.88, "step": 21015, "train_speed(iter/s)": 0.411018 }, { "acc": 0.85463085, "epoch": 0.5691387106381827, "grad_norm": 11.67481803894043, "learning_rate": 9.768999881740566e-06, "loss": 0.76344056, "memory(GiB)": 34.88, "step": 21020, "train_speed(iter/s)": 0.411023 }, { "acc": 0.84873219, "epoch": 0.5692740909213982, "grad_norm": 11.32702350616455, "learning_rate": 9.768831727044308e-06, "loss": 0.81209688, "memory(GiB)": 34.88, "step": 21025, "train_speed(iter/s)": 0.411029 }, { "acc": 0.84802227, "epoch": 0.5694094712046137, "grad_norm": 10.18635368347168, "learning_rate": 9.768663512615326e-06, "loss": 0.78959923, "memory(GiB)": 34.88, "step": 21030, "train_speed(iter/s)": 0.411034 }, { "acc": 0.85115376, "epoch": 0.5695448514878293, "grad_norm": 13.118824005126953, "learning_rate": 9.768495238455725e-06, "loss": 0.7850287, "memory(GiB)": 34.88, "step": 21035, "train_speed(iter/s)": 0.411039 }, { "acc": 0.83982878, "epoch": 0.5696802317710449, "grad_norm": 19.665918350219727, "learning_rate": 9.768326904567615e-06, "loss": 0.85116711, "memory(GiB)": 34.88, "step": 21040, "train_speed(iter/s)": 0.411044 }, { "acc": 0.85789766, "epoch": 0.5698156120542605, "grad_norm": 9.102876663208008, "learning_rate": 9.768158510953104e-06, "loss": 0.7514163, "memory(GiB)": 34.88, "step": 21045, "train_speed(iter/s)": 0.411049 }, { "acc": 0.87256851, "epoch": 0.5699509923374759, "grad_norm": 4.257805347442627, "learning_rate": 9.767990057614303e-06, "loss": 0.60234556, "memory(GiB)": 34.88, "step": 21050, "train_speed(iter/s)": 0.411054 }, { "acc": 0.87297096, "epoch": 0.5700863726206915, "grad_norm": 12.443281173706055, "learning_rate": 9.767821544553318e-06, "loss": 0.74863615, "memory(GiB)": 34.88, "step": 21055, "train_speed(iter/s)": 0.411059 }, { "acc": 0.85066233, "epoch": 0.5702217529039071, "grad_norm": 24.14336585998535, "learning_rate": 9.767652971772266e-06, "loss": 0.841541, "memory(GiB)": 34.88, "step": 21060, "train_speed(iter/s)": 0.411063 }, { "acc": 0.86051216, "epoch": 0.5703571331871227, "grad_norm": 7.051966667175293, "learning_rate": 9.767484339273254e-06, "loss": 0.66714621, "memory(GiB)": 34.88, "step": 21065, "train_speed(iter/s)": 0.411068 }, { "acc": 0.84455357, "epoch": 0.5704925134703381, "grad_norm": 9.44965934753418, "learning_rate": 9.767315647058397e-06, "loss": 0.86823368, "memory(GiB)": 34.88, "step": 21070, "train_speed(iter/s)": 0.411073 }, { "acc": 0.84181929, "epoch": 0.5706278937535537, "grad_norm": 14.175169944763184, "learning_rate": 9.767146895129808e-06, "loss": 0.84711037, "memory(GiB)": 34.88, "step": 21075, "train_speed(iter/s)": 0.411078 }, { "acc": 0.87106867, "epoch": 0.5707632740367693, "grad_norm": 7.805712699890137, "learning_rate": 9.766978083489598e-06, "loss": 0.69588919, "memory(GiB)": 34.88, "step": 21080, "train_speed(iter/s)": 0.411083 }, { "acc": 0.85046539, "epoch": 0.5708986543199849, "grad_norm": 22.828413009643555, "learning_rate": 9.766809212139885e-06, "loss": 0.67171946, "memory(GiB)": 34.88, "step": 21085, "train_speed(iter/s)": 0.411088 }, { "acc": 0.82935648, "epoch": 0.5710340346032003, "grad_norm": 8.235912322998047, "learning_rate": 9.766640281082783e-06, "loss": 0.87293034, "memory(GiB)": 34.88, "step": 21090, "train_speed(iter/s)": 0.411093 }, { "acc": 0.863974, "epoch": 0.5711694148864159, "grad_norm": 13.683464050292969, "learning_rate": 9.76647129032041e-06, "loss": 0.68234744, "memory(GiB)": 34.88, "step": 21095, "train_speed(iter/s)": 0.411099 }, { "acc": 0.86224203, "epoch": 0.5713047951696315, "grad_norm": 10.182071685791016, "learning_rate": 9.76630223985488e-06, "loss": 0.72748823, "memory(GiB)": 34.88, "step": 21100, "train_speed(iter/s)": 0.411104 }, { "acc": 0.87905483, "epoch": 0.5714401754528471, "grad_norm": 5.622429847717285, "learning_rate": 9.766133129688311e-06, "loss": 0.60174227, "memory(GiB)": 34.88, "step": 21105, "train_speed(iter/s)": 0.411109 }, { "acc": 0.86537418, "epoch": 0.5715755557360626, "grad_norm": 7.494642734527588, "learning_rate": 9.765963959822824e-06, "loss": 0.65164528, "memory(GiB)": 34.88, "step": 21110, "train_speed(iter/s)": 0.411115 }, { "acc": 0.86874847, "epoch": 0.5717109360192781, "grad_norm": 8.577204704284668, "learning_rate": 9.765794730260537e-06, "loss": 0.59537668, "memory(GiB)": 34.88, "step": 21115, "train_speed(iter/s)": 0.411119 }, { "acc": 0.84216366, "epoch": 0.5718463163024937, "grad_norm": 9.49777603149414, "learning_rate": 9.765625441003569e-06, "loss": 0.79248452, "memory(GiB)": 34.88, "step": 21120, "train_speed(iter/s)": 0.411124 }, { "acc": 0.86819077, "epoch": 0.5719816965857093, "grad_norm": 4.984625816345215, "learning_rate": 9.765456092054043e-06, "loss": 0.68677855, "memory(GiB)": 34.88, "step": 21125, "train_speed(iter/s)": 0.41113 }, { "acc": 0.85844479, "epoch": 0.5721170768689248, "grad_norm": 6.98812198638916, "learning_rate": 9.765286683414076e-06, "loss": 0.64419994, "memory(GiB)": 34.88, "step": 21130, "train_speed(iter/s)": 0.411134 }, { "acc": 0.87184334, "epoch": 0.5722524571521403, "grad_norm": 5.652944087982178, "learning_rate": 9.765117215085796e-06, "loss": 0.7294961, "memory(GiB)": 34.88, "step": 21135, "train_speed(iter/s)": 0.411139 }, { "acc": 0.85520248, "epoch": 0.5723878374353559, "grad_norm": 7.458493232727051, "learning_rate": 9.764947687071321e-06, "loss": 0.68297977, "memory(GiB)": 34.88, "step": 21140, "train_speed(iter/s)": 0.411144 }, { "acc": 0.8541111, "epoch": 0.5725232177185715, "grad_norm": 9.680530548095703, "learning_rate": 9.764778099372777e-06, "loss": 0.66960306, "memory(GiB)": 34.88, "step": 21145, "train_speed(iter/s)": 0.411149 }, { "acc": 0.88510437, "epoch": 0.572658598001787, "grad_norm": 8.237251281738281, "learning_rate": 9.764608451992288e-06, "loss": 0.54317126, "memory(GiB)": 34.88, "step": 21150, "train_speed(iter/s)": 0.411154 }, { "acc": 0.89346371, "epoch": 0.5727939782850026, "grad_norm": 8.341268539428711, "learning_rate": 9.764438744931978e-06, "loss": 0.57186985, "memory(GiB)": 34.88, "step": 21155, "train_speed(iter/s)": 0.41116 }, { "acc": 0.87568836, "epoch": 0.5729293585682181, "grad_norm": 4.357461929321289, "learning_rate": 9.764268978193978e-06, "loss": 0.60989738, "memory(GiB)": 34.88, "step": 21160, "train_speed(iter/s)": 0.411165 }, { "acc": 0.88263311, "epoch": 0.5730647388514337, "grad_norm": 22.660968780517578, "learning_rate": 9.764099151780407e-06, "loss": 0.59107866, "memory(GiB)": 34.88, "step": 21165, "train_speed(iter/s)": 0.41117 }, { "acc": 0.90715723, "epoch": 0.5732001191346492, "grad_norm": 9.15515422821045, "learning_rate": 9.763929265693398e-06, "loss": 0.46733475, "memory(GiB)": 34.88, "step": 21170, "train_speed(iter/s)": 0.411174 }, { "acc": 0.84473476, "epoch": 0.5733354994178648, "grad_norm": 11.7953462600708, "learning_rate": 9.763759319935077e-06, "loss": 0.76214948, "memory(GiB)": 34.88, "step": 21175, "train_speed(iter/s)": 0.411179 }, { "acc": 0.84542484, "epoch": 0.5734708797010803, "grad_norm": 7.622002124786377, "learning_rate": 9.763589314507573e-06, "loss": 0.83588161, "memory(GiB)": 34.88, "step": 21180, "train_speed(iter/s)": 0.411185 }, { "acc": 0.86324043, "epoch": 0.5736062599842959, "grad_norm": 55.91973876953125, "learning_rate": 9.763419249413016e-06, "loss": 0.76044712, "memory(GiB)": 34.88, "step": 21185, "train_speed(iter/s)": 0.41119 }, { "acc": 0.857055, "epoch": 0.5737416402675114, "grad_norm": 9.64315414428711, "learning_rate": 9.763249124653535e-06, "loss": 0.8176712, "memory(GiB)": 34.88, "step": 21190, "train_speed(iter/s)": 0.411196 }, { "acc": 0.87138672, "epoch": 0.573877020550727, "grad_norm": 11.731551170349121, "learning_rate": 9.763078940231264e-06, "loss": 0.66992121, "memory(GiB)": 34.88, "step": 21195, "train_speed(iter/s)": 0.411202 }, { "acc": 0.85258179, "epoch": 0.5740124008339426, "grad_norm": 5.803168773651123, "learning_rate": 9.762908696148334e-06, "loss": 0.6095645, "memory(GiB)": 34.88, "step": 21200, "train_speed(iter/s)": 0.411206 }, { "acc": 0.85660162, "epoch": 0.5741477811171581, "grad_norm": 13.106351852416992, "learning_rate": 9.762738392406876e-06, "loss": 0.70406179, "memory(GiB)": 34.88, "step": 21205, "train_speed(iter/s)": 0.411212 }, { "acc": 0.85219975, "epoch": 0.5742831614003736, "grad_norm": 22.715194702148438, "learning_rate": 9.762568029009025e-06, "loss": 0.65772643, "memory(GiB)": 34.88, "step": 21210, "train_speed(iter/s)": 0.411217 }, { "acc": 0.86971197, "epoch": 0.5744185416835892, "grad_norm": 4.184216022491455, "learning_rate": 9.762397605956913e-06, "loss": 0.53784161, "memory(GiB)": 34.88, "step": 21215, "train_speed(iter/s)": 0.411222 }, { "acc": 0.85671673, "epoch": 0.5745539219668048, "grad_norm": 5.954128265380859, "learning_rate": 9.762227123252676e-06, "loss": 0.79606562, "memory(GiB)": 34.88, "step": 21220, "train_speed(iter/s)": 0.411228 }, { "acc": 0.87744751, "epoch": 0.5746893022500204, "grad_norm": 13.837067604064941, "learning_rate": 9.762056580898452e-06, "loss": 0.65512466, "memory(GiB)": 34.88, "step": 21225, "train_speed(iter/s)": 0.411233 }, { "acc": 0.87598114, "epoch": 0.5748246825332358, "grad_norm": 5.044790267944336, "learning_rate": 9.761885978896375e-06, "loss": 0.66661968, "memory(GiB)": 34.88, "step": 21230, "train_speed(iter/s)": 0.411238 }, { "acc": 0.84521332, "epoch": 0.5749600628164514, "grad_norm": 18.206619262695312, "learning_rate": 9.76171531724858e-06, "loss": 0.88147144, "memory(GiB)": 34.88, "step": 21235, "train_speed(iter/s)": 0.411243 }, { "acc": 0.8509222, "epoch": 0.575095443099667, "grad_norm": 8.479092597961426, "learning_rate": 9.76154459595721e-06, "loss": 0.77832203, "memory(GiB)": 34.88, "step": 21240, "train_speed(iter/s)": 0.411248 }, { "acc": 0.87342901, "epoch": 0.5752308233828826, "grad_norm": 15.217414855957031, "learning_rate": 9.7613738150244e-06, "loss": 0.6460463, "memory(GiB)": 34.88, "step": 21245, "train_speed(iter/s)": 0.411253 }, { "acc": 0.84698877, "epoch": 0.575366203666098, "grad_norm": 4.110657215118408, "learning_rate": 9.76120297445229e-06, "loss": 0.75346141, "memory(GiB)": 34.88, "step": 21250, "train_speed(iter/s)": 0.411258 }, { "acc": 0.8667305, "epoch": 0.5755015839493136, "grad_norm": 13.796823501586914, "learning_rate": 9.761032074243022e-06, "loss": 0.70308366, "memory(GiB)": 34.88, "step": 21255, "train_speed(iter/s)": 0.411263 }, { "acc": 0.85646896, "epoch": 0.5756369642325292, "grad_norm": 9.260367393493652, "learning_rate": 9.760861114398735e-06, "loss": 0.69810576, "memory(GiB)": 34.88, "step": 21260, "train_speed(iter/s)": 0.411268 }, { "acc": 0.85610809, "epoch": 0.5757723445157448, "grad_norm": 7.069746971130371, "learning_rate": 9.76069009492157e-06, "loss": 0.81638851, "memory(GiB)": 34.88, "step": 21265, "train_speed(iter/s)": 0.411274 }, { "acc": 0.82391205, "epoch": 0.5759077247989602, "grad_norm": 9.114675521850586, "learning_rate": 9.760519015813671e-06, "loss": 0.90209904, "memory(GiB)": 34.88, "step": 21270, "train_speed(iter/s)": 0.411279 }, { "acc": 0.86226482, "epoch": 0.5760431050821758, "grad_norm": 13.80744743347168, "learning_rate": 9.76034787707718e-06, "loss": 0.71174111, "memory(GiB)": 34.88, "step": 21275, "train_speed(iter/s)": 0.411284 }, { "acc": 0.88036509, "epoch": 0.5761784853653914, "grad_norm": 10.379631996154785, "learning_rate": 9.760176678714242e-06, "loss": 0.67514715, "memory(GiB)": 34.88, "step": 21280, "train_speed(iter/s)": 0.411289 }, { "acc": 0.85456905, "epoch": 0.576313865648607, "grad_norm": 11.071320533752441, "learning_rate": 9.760005420727e-06, "loss": 0.83363428, "memory(GiB)": 34.88, "step": 21285, "train_speed(iter/s)": 0.411294 }, { "acc": 0.87516918, "epoch": 0.5764492459318225, "grad_norm": 5.9803361892700195, "learning_rate": 9.759834103117602e-06, "loss": 0.57859783, "memory(GiB)": 34.88, "step": 21290, "train_speed(iter/s)": 0.411299 }, { "acc": 0.82038231, "epoch": 0.576584626215038, "grad_norm": 10.544154167175293, "learning_rate": 9.75966272588819e-06, "loss": 0.99432249, "memory(GiB)": 34.88, "step": 21295, "train_speed(iter/s)": 0.411304 }, { "acc": 0.85563774, "epoch": 0.5767200064982536, "grad_norm": 8.202710151672363, "learning_rate": 9.759491289040914e-06, "loss": 0.65222425, "memory(GiB)": 34.88, "step": 21300, "train_speed(iter/s)": 0.411309 }, { "acc": 0.88220654, "epoch": 0.5768553867814692, "grad_norm": 14.958955764770508, "learning_rate": 9.759319792577921e-06, "loss": 0.56511259, "memory(GiB)": 34.88, "step": 21305, "train_speed(iter/s)": 0.411313 }, { "acc": 0.86956282, "epoch": 0.5769907670646847, "grad_norm": 11.495992660522461, "learning_rate": 9.759148236501357e-06, "loss": 0.63936758, "memory(GiB)": 34.88, "step": 21310, "train_speed(iter/s)": 0.411319 }, { "acc": 0.83462372, "epoch": 0.5771261473479002, "grad_norm": 10.662992477416992, "learning_rate": 9.758976620813375e-06, "loss": 0.80468206, "memory(GiB)": 34.88, "step": 21315, "train_speed(iter/s)": 0.411324 }, { "acc": 0.85404425, "epoch": 0.5772615276311158, "grad_norm": 9.103094100952148, "learning_rate": 9.758804945516121e-06, "loss": 0.75263114, "memory(GiB)": 34.88, "step": 21320, "train_speed(iter/s)": 0.411329 }, { "acc": 0.87092419, "epoch": 0.5773969079143314, "grad_norm": 6.385693073272705, "learning_rate": 9.758633210611751e-06, "loss": 0.61331282, "memory(GiB)": 34.88, "step": 21325, "train_speed(iter/s)": 0.411333 }, { "acc": 0.85919685, "epoch": 0.5775322881975469, "grad_norm": 22.836469650268555, "learning_rate": 9.75846141610241e-06, "loss": 0.79252782, "memory(GiB)": 34.88, "step": 21330, "train_speed(iter/s)": 0.411337 }, { "acc": 0.85458345, "epoch": 0.5776676684807625, "grad_norm": 10.00804328918457, "learning_rate": 9.758289561990254e-06, "loss": 0.65707226, "memory(GiB)": 34.88, "step": 21335, "train_speed(iter/s)": 0.411342 }, { "acc": 0.86419239, "epoch": 0.577803048763978, "grad_norm": 10.993785858154297, "learning_rate": 9.758117648277436e-06, "loss": 0.64912863, "memory(GiB)": 34.88, "step": 21340, "train_speed(iter/s)": 0.411348 }, { "acc": 0.84937935, "epoch": 0.5779384290471936, "grad_norm": 9.879923820495605, "learning_rate": 9.757945674966107e-06, "loss": 0.71320758, "memory(GiB)": 34.88, "step": 21345, "train_speed(iter/s)": 0.411353 }, { "acc": 0.89687672, "epoch": 0.5780738093304091, "grad_norm": 4.854494571685791, "learning_rate": 9.757773642058422e-06, "loss": 0.57117877, "memory(GiB)": 34.88, "step": 21350, "train_speed(iter/s)": 0.411359 }, { "acc": 0.87105999, "epoch": 0.5782091896136247, "grad_norm": 8.531072616577148, "learning_rate": 9.757601549556535e-06, "loss": 0.63718529, "memory(GiB)": 34.88, "step": 21355, "train_speed(iter/s)": 0.411363 }, { "acc": 0.83781719, "epoch": 0.5783445698968402, "grad_norm": 15.446675300598145, "learning_rate": 9.757429397462606e-06, "loss": 0.79303246, "memory(GiB)": 34.88, "step": 21360, "train_speed(iter/s)": 0.411369 }, { "acc": 0.88594418, "epoch": 0.5784799501800558, "grad_norm": 8.513187408447266, "learning_rate": 9.757257185778789e-06, "loss": 0.54625654, "memory(GiB)": 34.88, "step": 21365, "train_speed(iter/s)": 0.411374 }, { "acc": 0.85037718, "epoch": 0.5786153304632713, "grad_norm": 10.636856079101562, "learning_rate": 9.757084914507243e-06, "loss": 0.80054035, "memory(GiB)": 34.88, "step": 21370, "train_speed(iter/s)": 0.411379 }, { "acc": 0.88471737, "epoch": 0.5787507107464869, "grad_norm": 4.970668792724609, "learning_rate": 9.75691258365012e-06, "loss": 0.51780133, "memory(GiB)": 34.88, "step": 21375, "train_speed(iter/s)": 0.411384 }, { "acc": 0.86326818, "epoch": 0.5788860910297025, "grad_norm": 8.129195213317871, "learning_rate": 9.756740193209584e-06, "loss": 0.64022069, "memory(GiB)": 34.88, "step": 21380, "train_speed(iter/s)": 0.411389 }, { "acc": 0.88862858, "epoch": 0.579021471312918, "grad_norm": 11.092673301696777, "learning_rate": 9.756567743187796e-06, "loss": 0.55338607, "memory(GiB)": 34.88, "step": 21385, "train_speed(iter/s)": 0.411394 }, { "acc": 0.87213602, "epoch": 0.5791568515961335, "grad_norm": 5.985458850860596, "learning_rate": 9.756395233586911e-06, "loss": 0.63201694, "memory(GiB)": 34.88, "step": 21390, "train_speed(iter/s)": 0.411399 }, { "acc": 0.82728558, "epoch": 0.5792922318793491, "grad_norm": 5.661097526550293, "learning_rate": 9.756222664409095e-06, "loss": 0.90968943, "memory(GiB)": 34.88, "step": 21395, "train_speed(iter/s)": 0.411404 }, { "acc": 0.85098791, "epoch": 0.5794276121625647, "grad_norm": 9.426063537597656, "learning_rate": 9.756050035656507e-06, "loss": 0.73416133, "memory(GiB)": 34.88, "step": 21400, "train_speed(iter/s)": 0.41141 }, { "acc": 0.85340309, "epoch": 0.5795629924457802, "grad_norm": 5.777005195617676, "learning_rate": 9.75587734733131e-06, "loss": 0.78469243, "memory(GiB)": 34.88, "step": 21405, "train_speed(iter/s)": 0.411415 }, { "acc": 0.86032648, "epoch": 0.5796983727289957, "grad_norm": 6.523833274841309, "learning_rate": 9.755704599435667e-06, "loss": 0.77411404, "memory(GiB)": 34.88, "step": 21410, "train_speed(iter/s)": 0.411419 }, { "acc": 0.83046131, "epoch": 0.5798337530122113, "grad_norm": 5.871509075164795, "learning_rate": 9.755531791971745e-06, "loss": 0.85491114, "memory(GiB)": 34.88, "step": 21415, "train_speed(iter/s)": 0.411424 }, { "acc": 0.86891232, "epoch": 0.5799691332954269, "grad_norm": 6.308656692504883, "learning_rate": 9.755358924941703e-06, "loss": 0.77115526, "memory(GiB)": 34.88, "step": 21420, "train_speed(iter/s)": 0.411429 }, { "acc": 0.85706787, "epoch": 0.5801045135786425, "grad_norm": 8.93892765045166, "learning_rate": 9.755185998347711e-06, "loss": 0.68240881, "memory(GiB)": 34.88, "step": 21425, "train_speed(iter/s)": 0.411433 }, { "acc": 0.8693512, "epoch": 0.5802398938618579, "grad_norm": 17.79960060119629, "learning_rate": 9.755013012191933e-06, "loss": 0.67225184, "memory(GiB)": 34.88, "step": 21430, "train_speed(iter/s)": 0.411438 }, { "acc": 0.86703453, "epoch": 0.5803752741450735, "grad_norm": 6.966145038604736, "learning_rate": 9.754839966476538e-06, "loss": 0.73936453, "memory(GiB)": 34.88, "step": 21435, "train_speed(iter/s)": 0.411443 }, { "acc": 0.8518692, "epoch": 0.5805106544282891, "grad_norm": 13.209954261779785, "learning_rate": 9.754666861203692e-06, "loss": 0.83332767, "memory(GiB)": 34.88, "step": 21440, "train_speed(iter/s)": 0.411448 }, { "acc": 0.85741787, "epoch": 0.5806460347115047, "grad_norm": 21.83780288696289, "learning_rate": 9.754493696375565e-06, "loss": 0.67949147, "memory(GiB)": 34.88, "step": 21445, "train_speed(iter/s)": 0.411453 }, { "acc": 0.84677849, "epoch": 0.5807814149947201, "grad_norm": 10.99454402923584, "learning_rate": 9.754320471994325e-06, "loss": 0.84450092, "memory(GiB)": 34.88, "step": 21450, "train_speed(iter/s)": 0.411458 }, { "acc": 0.8722208, "epoch": 0.5809167952779357, "grad_norm": 15.222094535827637, "learning_rate": 9.75414718806214e-06, "loss": 0.6178319, "memory(GiB)": 34.88, "step": 21455, "train_speed(iter/s)": 0.411463 }, { "acc": 0.85790653, "epoch": 0.5810521755611513, "grad_norm": 8.327117919921875, "learning_rate": 9.753973844581188e-06, "loss": 0.61788726, "memory(GiB)": 34.88, "step": 21460, "train_speed(iter/s)": 0.411468 }, { "acc": 0.8495059, "epoch": 0.5811875558443669, "grad_norm": 20.2409725189209, "learning_rate": 9.753800441553632e-06, "loss": 0.83611021, "memory(GiB)": 34.88, "step": 21465, "train_speed(iter/s)": 0.411474 }, { "acc": 0.87891226, "epoch": 0.5813229361275823, "grad_norm": 10.214709281921387, "learning_rate": 9.753626978981648e-06, "loss": 0.5292407, "memory(GiB)": 34.88, "step": 21470, "train_speed(iter/s)": 0.411478 }, { "acc": 0.84272709, "epoch": 0.5814583164107979, "grad_norm": 8.436189651489258, "learning_rate": 9.75345345686741e-06, "loss": 0.82058296, "memory(GiB)": 34.88, "step": 21475, "train_speed(iter/s)": 0.411483 }, { "acc": 0.85869265, "epoch": 0.5815936966940135, "grad_norm": 8.266321182250977, "learning_rate": 9.75327987521309e-06, "loss": 0.67066154, "memory(GiB)": 34.88, "step": 21480, "train_speed(iter/s)": 0.411488 }, { "acc": 0.84963379, "epoch": 0.5817290769772291, "grad_norm": 6.440505504608154, "learning_rate": 9.753106234020861e-06, "loss": 0.78268957, "memory(GiB)": 34.88, "step": 21485, "train_speed(iter/s)": 0.411493 }, { "acc": 0.86737175, "epoch": 0.5818644572604446, "grad_norm": 7.83242130279541, "learning_rate": 9.752932533292901e-06, "loss": 0.60512128, "memory(GiB)": 34.88, "step": 21490, "train_speed(iter/s)": 0.411498 }, { "acc": 0.87113008, "epoch": 0.5819998375436601, "grad_norm": 15.077770233154297, "learning_rate": 9.752758773031386e-06, "loss": 0.7110116, "memory(GiB)": 34.88, "step": 21495, "train_speed(iter/s)": 0.411504 }, { "acc": 0.86329651, "epoch": 0.5821352178268757, "grad_norm": 10.62781047821045, "learning_rate": 9.752584953238494e-06, "loss": 0.66370726, "memory(GiB)": 34.88, "step": 21500, "train_speed(iter/s)": 0.411509 }, { "acc": 0.85447807, "epoch": 0.5822705981100913, "grad_norm": 21.790010452270508, "learning_rate": 9.752411073916396e-06, "loss": 0.71231651, "memory(GiB)": 34.88, "step": 21505, "train_speed(iter/s)": 0.411513 }, { "acc": 0.87620506, "epoch": 0.5824059783933068, "grad_norm": 21.93366050720215, "learning_rate": 9.752237135067275e-06, "loss": 0.67303486, "memory(GiB)": 34.88, "step": 21510, "train_speed(iter/s)": 0.411518 }, { "acc": 0.87776909, "epoch": 0.5825413586765223, "grad_norm": 11.003511428833008, "learning_rate": 9.752063136693311e-06, "loss": 0.53880291, "memory(GiB)": 34.88, "step": 21515, "train_speed(iter/s)": 0.411523 }, { "acc": 0.85972157, "epoch": 0.5826767389597379, "grad_norm": 9.629796981811523, "learning_rate": 9.75188907879668e-06, "loss": 0.73271551, "memory(GiB)": 34.88, "step": 21520, "train_speed(iter/s)": 0.411529 }, { "acc": 0.85630407, "epoch": 0.5828121192429535, "grad_norm": 8.081506729125977, "learning_rate": 9.751714961379566e-06, "loss": 0.69921303, "memory(GiB)": 34.88, "step": 21525, "train_speed(iter/s)": 0.411534 }, { "acc": 0.86050196, "epoch": 0.582947499526169, "grad_norm": 10.144571304321289, "learning_rate": 9.751540784444147e-06, "loss": 0.668297, "memory(GiB)": 34.88, "step": 21530, "train_speed(iter/s)": 0.411539 }, { "acc": 0.87441692, "epoch": 0.5830828798093846, "grad_norm": 4.456355094909668, "learning_rate": 9.751366547992607e-06, "loss": 0.56616068, "memory(GiB)": 34.88, "step": 21535, "train_speed(iter/s)": 0.411544 }, { "acc": 0.87461576, "epoch": 0.5832182600926001, "grad_norm": 14.565765380859375, "learning_rate": 9.751192252027129e-06, "loss": 0.66435308, "memory(GiB)": 34.88, "step": 21540, "train_speed(iter/s)": 0.411549 }, { "acc": 0.87679195, "epoch": 0.5833536403758157, "grad_norm": 10.393213272094727, "learning_rate": 9.751017896549894e-06, "loss": 0.64483347, "memory(GiB)": 34.88, "step": 21545, "train_speed(iter/s)": 0.411555 }, { "acc": 0.8582653, "epoch": 0.5834890206590312, "grad_norm": 7.384261131286621, "learning_rate": 9.750843481563088e-06, "loss": 0.850138, "memory(GiB)": 34.88, "step": 21550, "train_speed(iter/s)": 0.41156 }, { "acc": 0.84845676, "epoch": 0.5836244009422468, "grad_norm": 6.060776233673096, "learning_rate": 9.750669007068896e-06, "loss": 0.80459957, "memory(GiB)": 34.88, "step": 21555, "train_speed(iter/s)": 0.411565 }, { "acc": 0.86845646, "epoch": 0.5837597812254623, "grad_norm": 5.929738521575928, "learning_rate": 9.750494473069503e-06, "loss": 0.65770407, "memory(GiB)": 34.88, "step": 21560, "train_speed(iter/s)": 0.411571 }, { "acc": 0.88366594, "epoch": 0.5838951615086779, "grad_norm": 9.17078971862793, "learning_rate": 9.750319879567094e-06, "loss": 0.63153667, "memory(GiB)": 34.88, "step": 21565, "train_speed(iter/s)": 0.411576 }, { "acc": 0.84567451, "epoch": 0.5840305417918934, "grad_norm": 11.343087196350098, "learning_rate": 9.750145226563861e-06, "loss": 0.88051348, "memory(GiB)": 34.88, "step": 21570, "train_speed(iter/s)": 0.411581 }, { "acc": 0.86702003, "epoch": 0.584165922075109, "grad_norm": 7.83619499206543, "learning_rate": 9.749970514061984e-06, "loss": 0.56720767, "memory(GiB)": 34.88, "step": 21575, "train_speed(iter/s)": 0.411585 }, { "acc": 0.86704407, "epoch": 0.5843013023583246, "grad_norm": 4.841269016265869, "learning_rate": 9.74979574206366e-06, "loss": 0.68920107, "memory(GiB)": 34.88, "step": 21580, "train_speed(iter/s)": 0.41159 }, { "acc": 0.83857441, "epoch": 0.5844366826415401, "grad_norm": 5.264341831207275, "learning_rate": 9.749620910571073e-06, "loss": 0.89886036, "memory(GiB)": 34.88, "step": 21585, "train_speed(iter/s)": 0.411595 }, { "acc": 0.88502703, "epoch": 0.5845720629247556, "grad_norm": 10.208706855773926, "learning_rate": 9.749446019586415e-06, "loss": 0.64659114, "memory(GiB)": 34.88, "step": 21590, "train_speed(iter/s)": 0.4116 }, { "acc": 0.88790321, "epoch": 0.5847074432079712, "grad_norm": 9.81545639038086, "learning_rate": 9.749271069111877e-06, "loss": 0.56210737, "memory(GiB)": 34.88, "step": 21595, "train_speed(iter/s)": 0.411605 }, { "acc": 0.85927124, "epoch": 0.5848428234911868, "grad_norm": 7.7525105476379395, "learning_rate": 9.749096059149648e-06, "loss": 0.66397514, "memory(GiB)": 34.88, "step": 21600, "train_speed(iter/s)": 0.41161 }, { "acc": 0.84895706, "epoch": 0.5849782037744023, "grad_norm": 6.301987171173096, "learning_rate": 9.748920989701922e-06, "loss": 0.8937748, "memory(GiB)": 34.88, "step": 21605, "train_speed(iter/s)": 0.411615 }, { "acc": 0.8388134, "epoch": 0.5851135840576178, "grad_norm": 10.831082344055176, "learning_rate": 9.748745860770895e-06, "loss": 0.84720211, "memory(GiB)": 34.88, "step": 21610, "train_speed(iter/s)": 0.411619 }, { "acc": 0.84857521, "epoch": 0.5852489643408334, "grad_norm": 11.590347290039062, "learning_rate": 9.748570672358756e-06, "loss": 0.76083078, "memory(GiB)": 34.88, "step": 21615, "train_speed(iter/s)": 0.411624 }, { "acc": 0.85930119, "epoch": 0.585384344624049, "grad_norm": 6.873790740966797, "learning_rate": 9.748395424467705e-06, "loss": 0.69083848, "memory(GiB)": 34.88, "step": 21620, "train_speed(iter/s)": 0.411629 }, { "acc": 0.86145458, "epoch": 0.5855197249072646, "grad_norm": 11.146748542785645, "learning_rate": 9.748220117099932e-06, "loss": 0.71090837, "memory(GiB)": 34.88, "step": 21625, "train_speed(iter/s)": 0.411634 }, { "acc": 0.86356773, "epoch": 0.58565510519048, "grad_norm": 4.480288505554199, "learning_rate": 9.748044750257637e-06, "loss": 0.57936096, "memory(GiB)": 34.88, "step": 21630, "train_speed(iter/s)": 0.411639 }, { "acc": 0.88601894, "epoch": 0.5857904854736956, "grad_norm": 7.305234432220459, "learning_rate": 9.747869323943013e-06, "loss": 0.5059505, "memory(GiB)": 34.88, "step": 21635, "train_speed(iter/s)": 0.411644 }, { "acc": 0.86542959, "epoch": 0.5859258657569112, "grad_norm": 5.082758903503418, "learning_rate": 9.747693838158259e-06, "loss": 0.62316775, "memory(GiB)": 34.88, "step": 21640, "train_speed(iter/s)": 0.411649 }, { "acc": 0.87855053, "epoch": 0.5860612460401268, "grad_norm": 8.191995620727539, "learning_rate": 9.747518292905576e-06, "loss": 0.53721714, "memory(GiB)": 34.88, "step": 21645, "train_speed(iter/s)": 0.411654 }, { "acc": 0.87379093, "epoch": 0.5861966263233422, "grad_norm": 12.454850196838379, "learning_rate": 9.74734268818716e-06, "loss": 0.66153774, "memory(GiB)": 34.88, "step": 21650, "train_speed(iter/s)": 0.41166 }, { "acc": 0.86118355, "epoch": 0.5863320066065578, "grad_norm": 18.415143966674805, "learning_rate": 9.747167024005214e-06, "loss": 0.71194468, "memory(GiB)": 34.88, "step": 21655, "train_speed(iter/s)": 0.411664 }, { "acc": 0.85890932, "epoch": 0.5864673868897734, "grad_norm": 12.137621879577637, "learning_rate": 9.746991300361934e-06, "loss": 0.71913066, "memory(GiB)": 34.88, "step": 21660, "train_speed(iter/s)": 0.411669 }, { "acc": 0.84236708, "epoch": 0.5866027671729889, "grad_norm": 15.092557907104492, "learning_rate": 9.746815517259523e-06, "loss": 0.76011314, "memory(GiB)": 34.88, "step": 21665, "train_speed(iter/s)": 0.411674 }, { "acc": 0.87062731, "epoch": 0.5867381474562045, "grad_norm": 9.202138900756836, "learning_rate": 9.746639674700185e-06, "loss": 0.71582127, "memory(GiB)": 34.88, "step": 21670, "train_speed(iter/s)": 0.411679 }, { "acc": 0.84700947, "epoch": 0.58687352773942, "grad_norm": 10.282122611999512, "learning_rate": 9.746463772686122e-06, "loss": 0.78224497, "memory(GiB)": 34.88, "step": 21675, "train_speed(iter/s)": 0.411683 }, { "acc": 0.87266884, "epoch": 0.5870089080226356, "grad_norm": 4.155386924743652, "learning_rate": 9.746287811219537e-06, "loss": 0.67312102, "memory(GiB)": 34.88, "step": 21680, "train_speed(iter/s)": 0.411689 }, { "acc": 0.85184031, "epoch": 0.5871442883058511, "grad_norm": 9.927861213684082, "learning_rate": 9.746111790302633e-06, "loss": 0.80023518, "memory(GiB)": 34.88, "step": 21685, "train_speed(iter/s)": 0.411694 }, { "acc": 0.84058285, "epoch": 0.5872796685890667, "grad_norm": 27.181182861328125, "learning_rate": 9.745935709937615e-06, "loss": 0.85605049, "memory(GiB)": 34.88, "step": 21690, "train_speed(iter/s)": 0.411699 }, { "acc": 0.84891205, "epoch": 0.5874150488722822, "grad_norm": 6.686404228210449, "learning_rate": 9.745759570126691e-06, "loss": 0.83789291, "memory(GiB)": 34.88, "step": 21695, "train_speed(iter/s)": 0.411704 }, { "acc": 0.88233051, "epoch": 0.5875504291554978, "grad_norm": 4.019846439361572, "learning_rate": 9.745583370872069e-06, "loss": 0.58625135, "memory(GiB)": 34.88, "step": 21700, "train_speed(iter/s)": 0.411709 }, { "acc": 0.86681175, "epoch": 0.5876858094387133, "grad_norm": 8.697393417358398, "learning_rate": 9.74540711217595e-06, "loss": 0.70245256, "memory(GiB)": 34.88, "step": 21705, "train_speed(iter/s)": 0.411713 }, { "acc": 0.85152206, "epoch": 0.5878211897219289, "grad_norm": 12.703353881835938, "learning_rate": 9.745230794040548e-06, "loss": 0.76660967, "memory(GiB)": 34.88, "step": 21710, "train_speed(iter/s)": 0.411718 }, { "acc": 0.84742928, "epoch": 0.5879565700051445, "grad_norm": 19.467487335205078, "learning_rate": 9.745054416468069e-06, "loss": 0.84743328, "memory(GiB)": 34.88, "step": 21715, "train_speed(iter/s)": 0.411723 }, { "acc": 0.90068092, "epoch": 0.58809195028836, "grad_norm": 17.9534969329834, "learning_rate": 9.744877979460722e-06, "loss": 0.56059608, "memory(GiB)": 34.88, "step": 21720, "train_speed(iter/s)": 0.411728 }, { "acc": 0.86064434, "epoch": 0.5882273305715755, "grad_norm": 9.083358764648438, "learning_rate": 9.744701483020719e-06, "loss": 0.69316025, "memory(GiB)": 34.88, "step": 21725, "train_speed(iter/s)": 0.411733 }, { "acc": 0.86678085, "epoch": 0.5883627108547911, "grad_norm": 10.118457794189453, "learning_rate": 9.74452492715027e-06, "loss": 0.69004965, "memory(GiB)": 34.88, "step": 21730, "train_speed(iter/s)": 0.411738 }, { "acc": 0.85499039, "epoch": 0.5884980911380067, "grad_norm": 9.505946159362793, "learning_rate": 9.744348311851589e-06, "loss": 0.84954548, "memory(GiB)": 34.88, "step": 21735, "train_speed(iter/s)": 0.411743 }, { "acc": 0.85893631, "epoch": 0.5886334714212222, "grad_norm": 10.73692798614502, "learning_rate": 9.744171637126883e-06, "loss": 0.76155691, "memory(GiB)": 34.88, "step": 21740, "train_speed(iter/s)": 0.411748 }, { "acc": 0.88540239, "epoch": 0.5887688517044377, "grad_norm": 18.80516815185547, "learning_rate": 9.74399490297837e-06, "loss": 0.63439016, "memory(GiB)": 34.88, "step": 21745, "train_speed(iter/s)": 0.411753 }, { "acc": 0.86768398, "epoch": 0.5889042319876533, "grad_norm": 9.612578392028809, "learning_rate": 9.74381810940826e-06, "loss": 0.66750398, "memory(GiB)": 34.88, "step": 21750, "train_speed(iter/s)": 0.411758 }, { "acc": 0.84251575, "epoch": 0.5890396122708689, "grad_norm": 13.064826965332031, "learning_rate": 9.743641256418774e-06, "loss": 0.83802853, "memory(GiB)": 34.88, "step": 21755, "train_speed(iter/s)": 0.411763 }, { "acc": 0.85646477, "epoch": 0.5891749925540845, "grad_norm": 12.51478385925293, "learning_rate": 9.74346434401212e-06, "loss": 0.76469936, "memory(GiB)": 34.88, "step": 21760, "train_speed(iter/s)": 0.411767 }, { "acc": 0.86827126, "epoch": 0.5893103728372999, "grad_norm": 6.258970260620117, "learning_rate": 9.74328737219052e-06, "loss": 0.63696241, "memory(GiB)": 34.88, "step": 21765, "train_speed(iter/s)": 0.411772 }, { "acc": 0.86757851, "epoch": 0.5894457531205155, "grad_norm": 12.136690139770508, "learning_rate": 9.743110340956186e-06, "loss": 0.61272607, "memory(GiB)": 34.88, "step": 21770, "train_speed(iter/s)": 0.411777 }, { "acc": 0.87159481, "epoch": 0.5895811334037311, "grad_norm": 5.921748161315918, "learning_rate": 9.742933250311341e-06, "loss": 0.52641973, "memory(GiB)": 34.88, "step": 21775, "train_speed(iter/s)": 0.411782 }, { "acc": 0.8681385, "epoch": 0.5897165136869467, "grad_norm": 8.663596153259277, "learning_rate": 9.7427561002582e-06, "loss": 0.74402676, "memory(GiB)": 34.88, "step": 21780, "train_speed(iter/s)": 0.411787 }, { "acc": 0.84368019, "epoch": 0.5898518939701621, "grad_norm": 10.756707191467285, "learning_rate": 9.742578890798982e-06, "loss": 0.81767025, "memory(GiB)": 34.88, "step": 21785, "train_speed(iter/s)": 0.411791 }, { "acc": 0.87233677, "epoch": 0.5899872742533777, "grad_norm": 6.8835225105285645, "learning_rate": 9.742401621935908e-06, "loss": 0.62653494, "memory(GiB)": 34.88, "step": 21790, "train_speed(iter/s)": 0.411797 }, { "acc": 0.86181469, "epoch": 0.5901226545365933, "grad_norm": 6.905663967132568, "learning_rate": 9.7422242936712e-06, "loss": 0.68579102, "memory(GiB)": 34.88, "step": 21795, "train_speed(iter/s)": 0.411801 }, { "acc": 0.87440166, "epoch": 0.5902580348198089, "grad_norm": 7.967150688171387, "learning_rate": 9.742046906007076e-06, "loss": 0.613943, "memory(GiB)": 34.88, "step": 21800, "train_speed(iter/s)": 0.411806 }, { "acc": 0.84971304, "epoch": 0.5903934151030243, "grad_norm": 9.989053726196289, "learning_rate": 9.74186945894576e-06, "loss": 0.80751953, "memory(GiB)": 34.88, "step": 21805, "train_speed(iter/s)": 0.411811 }, { "acc": 0.8819088, "epoch": 0.5905287953862399, "grad_norm": 5.676482677459717, "learning_rate": 9.741691952489475e-06, "loss": 0.5647172, "memory(GiB)": 34.88, "step": 21810, "train_speed(iter/s)": 0.411816 }, { "acc": 0.85316162, "epoch": 0.5906641756694555, "grad_norm": 8.120585441589355, "learning_rate": 9.741514386640443e-06, "loss": 0.75794053, "memory(GiB)": 34.88, "step": 21815, "train_speed(iter/s)": 0.411821 }, { "acc": 0.83794365, "epoch": 0.5907995559526711, "grad_norm": 11.095184326171875, "learning_rate": 9.741336761400892e-06, "loss": 0.88161011, "memory(GiB)": 34.88, "step": 21820, "train_speed(iter/s)": 0.411826 }, { "acc": 0.86438303, "epoch": 0.5909349362358866, "grad_norm": 7.047887325286865, "learning_rate": 9.741159076773044e-06, "loss": 0.64681292, "memory(GiB)": 34.88, "step": 21825, "train_speed(iter/s)": 0.41183 }, { "acc": 0.85385408, "epoch": 0.5910703165191021, "grad_norm": 10.81308364868164, "learning_rate": 9.740981332759125e-06, "loss": 0.79755116, "memory(GiB)": 34.88, "step": 21830, "train_speed(iter/s)": 0.411835 }, { "acc": 0.86916027, "epoch": 0.5912056968023177, "grad_norm": 29.45272445678711, "learning_rate": 9.740803529361363e-06, "loss": 0.65957718, "memory(GiB)": 34.88, "step": 21835, "train_speed(iter/s)": 0.41184 }, { "acc": 0.84577579, "epoch": 0.5913410770855333, "grad_norm": 12.722545623779297, "learning_rate": 9.740625666581985e-06, "loss": 0.8966938, "memory(GiB)": 34.88, "step": 21840, "train_speed(iter/s)": 0.411845 }, { "acc": 0.84545546, "epoch": 0.5914764573687488, "grad_norm": 21.88652992248535, "learning_rate": 9.740447744423218e-06, "loss": 0.78799024, "memory(GiB)": 34.88, "step": 21845, "train_speed(iter/s)": 0.41185 }, { "acc": 0.85271664, "epoch": 0.5916118376519643, "grad_norm": 8.489776611328125, "learning_rate": 9.740269762887292e-06, "loss": 0.74647341, "memory(GiB)": 34.88, "step": 21850, "train_speed(iter/s)": 0.411855 }, { "acc": 0.8588335, "epoch": 0.5917472179351799, "grad_norm": 7.183720111846924, "learning_rate": 9.740091721976436e-06, "loss": 0.66095304, "memory(GiB)": 34.88, "step": 21855, "train_speed(iter/s)": 0.411859 }, { "acc": 0.87536249, "epoch": 0.5918825982183955, "grad_norm": 8.987931251525879, "learning_rate": 9.73991362169288e-06, "loss": 0.62304883, "memory(GiB)": 34.88, "step": 21860, "train_speed(iter/s)": 0.411864 }, { "acc": 0.83835135, "epoch": 0.592017978501611, "grad_norm": 9.397680282592773, "learning_rate": 9.739735462038856e-06, "loss": 0.86228418, "memory(GiB)": 34.88, "step": 21865, "train_speed(iter/s)": 0.411869 }, { "acc": 0.8606925, "epoch": 0.5921533587848266, "grad_norm": 22.284347534179688, "learning_rate": 9.739557243016593e-06, "loss": 0.70803471, "memory(GiB)": 34.88, "step": 21870, "train_speed(iter/s)": 0.411874 }, { "acc": 0.8603981, "epoch": 0.5922887390680421, "grad_norm": 11.05002498626709, "learning_rate": 9.739378964628329e-06, "loss": 0.74843912, "memory(GiB)": 34.88, "step": 21875, "train_speed(iter/s)": 0.411879 }, { "acc": 0.846385, "epoch": 0.5924241193512577, "grad_norm": 12.939947128295898, "learning_rate": 9.739200626876292e-06, "loss": 0.73941369, "memory(GiB)": 34.88, "step": 21880, "train_speed(iter/s)": 0.411884 }, { "acc": 0.85352669, "epoch": 0.5925594996344732, "grad_norm": 13.7546968460083, "learning_rate": 9.73902222976272e-06, "loss": 0.77336884, "memory(GiB)": 34.88, "step": 21885, "train_speed(iter/s)": 0.411889 }, { "acc": 0.86884737, "epoch": 0.5926948799176888, "grad_norm": 8.799738883972168, "learning_rate": 9.738843773289844e-06, "loss": 0.71342506, "memory(GiB)": 34.88, "step": 21890, "train_speed(iter/s)": 0.411893 }, { "acc": 0.88367348, "epoch": 0.5928302602009043, "grad_norm": 6.163087368011475, "learning_rate": 9.738665257459903e-06, "loss": 0.62516761, "memory(GiB)": 34.88, "step": 21895, "train_speed(iter/s)": 0.411897 }, { "acc": 0.85062294, "epoch": 0.5929656404841199, "grad_norm": 11.53322696685791, "learning_rate": 9.73848668227513e-06, "loss": 0.78069344, "memory(GiB)": 34.88, "step": 21900, "train_speed(iter/s)": 0.411901 }, { "acc": 0.88132362, "epoch": 0.5931010207673354, "grad_norm": 8.717767715454102, "learning_rate": 9.738308047737764e-06, "loss": 0.52817621, "memory(GiB)": 34.88, "step": 21905, "train_speed(iter/s)": 0.411906 }, { "acc": 0.85846939, "epoch": 0.593236401050551, "grad_norm": 15.988336563110352, "learning_rate": 9.738129353850045e-06, "loss": 0.74078245, "memory(GiB)": 34.88, "step": 21910, "train_speed(iter/s)": 0.411911 }, { "acc": 0.86907177, "epoch": 0.5933717813337666, "grad_norm": 13.117207527160645, "learning_rate": 9.737950600614207e-06, "loss": 0.73509116, "memory(GiB)": 34.88, "step": 21915, "train_speed(iter/s)": 0.411916 }, { "acc": 0.88188572, "epoch": 0.5935071616169821, "grad_norm": 10.427327156066895, "learning_rate": 9.73777178803249e-06, "loss": 0.50115972, "memory(GiB)": 34.88, "step": 21920, "train_speed(iter/s)": 0.411921 }, { "acc": 0.87398949, "epoch": 0.5936425419001976, "grad_norm": 16.30850601196289, "learning_rate": 9.737592916107137e-06, "loss": 0.6731595, "memory(GiB)": 34.88, "step": 21925, "train_speed(iter/s)": 0.411925 }, { "acc": 0.86471882, "epoch": 0.5937779221834132, "grad_norm": 8.834920883178711, "learning_rate": 9.737413984840386e-06, "loss": 0.70207243, "memory(GiB)": 34.88, "step": 21930, "train_speed(iter/s)": 0.41193 }, { "acc": 0.87061777, "epoch": 0.5939133024666288, "grad_norm": 3.5262339115142822, "learning_rate": 9.737234994234478e-06, "loss": 0.63546176, "memory(GiB)": 34.88, "step": 21935, "train_speed(iter/s)": 0.411935 }, { "acc": 0.86599874, "epoch": 0.5940486827498443, "grad_norm": 14.864945411682129, "learning_rate": 9.73705594429166e-06, "loss": 0.78232965, "memory(GiB)": 34.88, "step": 21940, "train_speed(iter/s)": 0.41194 }, { "acc": 0.85256186, "epoch": 0.5941840630330598, "grad_norm": 7.993470191955566, "learning_rate": 9.736876835014167e-06, "loss": 0.70822601, "memory(GiB)": 34.88, "step": 21945, "train_speed(iter/s)": 0.411945 }, { "acc": 0.86996269, "epoch": 0.5943194433162754, "grad_norm": 10.846500396728516, "learning_rate": 9.73669766640425e-06, "loss": 0.67821302, "memory(GiB)": 34.88, "step": 21950, "train_speed(iter/s)": 0.41195 }, { "acc": 0.87038574, "epoch": 0.594454823599491, "grad_norm": 11.491059303283691, "learning_rate": 9.73651843846415e-06, "loss": 0.622017, "memory(GiB)": 34.88, "step": 21955, "train_speed(iter/s)": 0.411955 }, { "acc": 0.87857723, "epoch": 0.5945902038827066, "grad_norm": 12.350236892700195, "learning_rate": 9.736339151196115e-06, "loss": 0.63824725, "memory(GiB)": 34.88, "step": 21960, "train_speed(iter/s)": 0.411961 }, { "acc": 0.8433094, "epoch": 0.594725584165922, "grad_norm": 12.835089683532715, "learning_rate": 9.736159804602385e-06, "loss": 0.8290844, "memory(GiB)": 34.88, "step": 21965, "train_speed(iter/s)": 0.411965 }, { "acc": 0.86401024, "epoch": 0.5948609644491376, "grad_norm": 6.506509780883789, "learning_rate": 9.735980398685213e-06, "loss": 0.63447695, "memory(GiB)": 34.88, "step": 21970, "train_speed(iter/s)": 0.411969 }, { "acc": 0.8705246, "epoch": 0.5949963447323532, "grad_norm": 8.422738075256348, "learning_rate": 9.735800933446843e-06, "loss": 0.74582586, "memory(GiB)": 34.88, "step": 21975, "train_speed(iter/s)": 0.411974 }, { "acc": 0.86958771, "epoch": 0.5951317250155688, "grad_norm": 7.354498386383057, "learning_rate": 9.735621408889524e-06, "loss": 0.64182544, "memory(GiB)": 34.88, "step": 21980, "train_speed(iter/s)": 0.411978 }, { "acc": 0.85100441, "epoch": 0.5952671052987842, "grad_norm": 12.881317138671875, "learning_rate": 9.735441825015504e-06, "loss": 0.78110528, "memory(GiB)": 34.88, "step": 21985, "train_speed(iter/s)": 0.411982 }, { "acc": 0.83415184, "epoch": 0.5954024855819998, "grad_norm": 8.588547706604004, "learning_rate": 9.735262181827034e-06, "loss": 0.85033188, "memory(GiB)": 34.88, "step": 21990, "train_speed(iter/s)": 0.411987 }, { "acc": 0.86917362, "epoch": 0.5955378658652154, "grad_norm": 5.471446514129639, "learning_rate": 9.735082479326365e-06, "loss": 0.60841155, "memory(GiB)": 34.88, "step": 21995, "train_speed(iter/s)": 0.411992 }, { "acc": 0.86310215, "epoch": 0.595673246148431, "grad_norm": 8.365926742553711, "learning_rate": 9.734902717515746e-06, "loss": 0.76540012, "memory(GiB)": 34.88, "step": 22000, "train_speed(iter/s)": 0.411997 }, { "acc": 0.83974495, "epoch": 0.5958086264316464, "grad_norm": 16.140169143676758, "learning_rate": 9.73472289639743e-06, "loss": 0.87945747, "memory(GiB)": 34.88, "step": 22005, "train_speed(iter/s)": 0.412002 }, { "acc": 0.85647354, "epoch": 0.595944006714862, "grad_norm": 9.371394157409668, "learning_rate": 9.73454301597367e-06, "loss": 0.76725702, "memory(GiB)": 34.88, "step": 22010, "train_speed(iter/s)": 0.412007 }, { "acc": 0.868713, "epoch": 0.5960793869980776, "grad_norm": 6.787662029266357, "learning_rate": 9.734363076246718e-06, "loss": 0.65639534, "memory(GiB)": 34.88, "step": 22015, "train_speed(iter/s)": 0.412011 }, { "acc": 0.86955929, "epoch": 0.5962147672812932, "grad_norm": 8.93356704711914, "learning_rate": 9.734183077218828e-06, "loss": 0.65607796, "memory(GiB)": 34.88, "step": 22020, "train_speed(iter/s)": 0.412016 }, { "acc": 0.86227436, "epoch": 0.5963501475645087, "grad_norm": 6.480292797088623, "learning_rate": 9.734003018892257e-06, "loss": 0.70654211, "memory(GiB)": 34.88, "step": 22025, "train_speed(iter/s)": 0.412021 }, { "acc": 0.86139746, "epoch": 0.5964855278477242, "grad_norm": 18.32906150817871, "learning_rate": 9.73382290126926e-06, "loss": 0.68205552, "memory(GiB)": 34.88, "step": 22030, "train_speed(iter/s)": 0.412026 }, { "acc": 0.86950302, "epoch": 0.5966209081309398, "grad_norm": 4.841209411621094, "learning_rate": 9.733642724352093e-06, "loss": 0.70911384, "memory(GiB)": 34.88, "step": 22035, "train_speed(iter/s)": 0.412031 }, { "acc": 0.86826839, "epoch": 0.5967562884141554, "grad_norm": 6.790355205535889, "learning_rate": 9.733462488143011e-06, "loss": 0.63683038, "memory(GiB)": 34.88, "step": 22040, "train_speed(iter/s)": 0.412036 }, { "acc": 0.85990314, "epoch": 0.5968916686973709, "grad_norm": 8.366984367370605, "learning_rate": 9.733282192644274e-06, "loss": 0.70547581, "memory(GiB)": 34.88, "step": 22045, "train_speed(iter/s)": 0.41204 }, { "acc": 0.85893955, "epoch": 0.5970270489805864, "grad_norm": 12.318827629089355, "learning_rate": 9.733101837858142e-06, "loss": 0.75691786, "memory(GiB)": 34.88, "step": 22050, "train_speed(iter/s)": 0.412044 }, { "acc": 0.8491993, "epoch": 0.597162429263802, "grad_norm": 12.820459365844727, "learning_rate": 9.73292142378687e-06, "loss": 0.80920916, "memory(GiB)": 34.88, "step": 22055, "train_speed(iter/s)": 0.412049 }, { "acc": 0.85872765, "epoch": 0.5972978095470176, "grad_norm": 8.426801681518555, "learning_rate": 9.732740950432723e-06, "loss": 0.67700872, "memory(GiB)": 34.88, "step": 22060, "train_speed(iter/s)": 0.412054 }, { "acc": 0.86636143, "epoch": 0.5974331898302331, "grad_norm": 13.023542404174805, "learning_rate": 9.732560417797957e-06, "loss": 0.67633905, "memory(GiB)": 34.88, "step": 22065, "train_speed(iter/s)": 0.412058 }, { "acc": 0.87411737, "epoch": 0.5975685701134487, "grad_norm": 5.5263776779174805, "learning_rate": 9.732379825884836e-06, "loss": 0.63374834, "memory(GiB)": 34.88, "step": 22070, "train_speed(iter/s)": 0.412064 }, { "acc": 0.85836411, "epoch": 0.5977039503966642, "grad_norm": 8.013446807861328, "learning_rate": 9.732199174695623e-06, "loss": 0.7531949, "memory(GiB)": 34.88, "step": 22075, "train_speed(iter/s)": 0.412068 }, { "acc": 0.85573959, "epoch": 0.5978393306798798, "grad_norm": 8.627091407775879, "learning_rate": 9.732018464232579e-06, "loss": 0.77713623, "memory(GiB)": 34.88, "step": 22080, "train_speed(iter/s)": 0.412073 }, { "acc": 0.86197414, "epoch": 0.5979747109630953, "grad_norm": 19.93497657775879, "learning_rate": 9.73183769449797e-06, "loss": 0.70708833, "memory(GiB)": 34.88, "step": 22085, "train_speed(iter/s)": 0.412077 }, { "acc": 0.85018253, "epoch": 0.5981100912463109, "grad_norm": 23.044662475585938, "learning_rate": 9.731656865494057e-06, "loss": 0.79951029, "memory(GiB)": 34.88, "step": 22090, "train_speed(iter/s)": 0.412081 }, { "acc": 0.85514441, "epoch": 0.5982454715295265, "grad_norm": 10.714164733886719, "learning_rate": 9.731475977223112e-06, "loss": 0.69030023, "memory(GiB)": 34.88, "step": 22095, "train_speed(iter/s)": 0.412086 }, { "acc": 0.87102566, "epoch": 0.598380851812742, "grad_norm": 10.631110191345215, "learning_rate": 9.731295029687394e-06, "loss": 0.61179948, "memory(GiB)": 34.88, "step": 22100, "train_speed(iter/s)": 0.412091 }, { "acc": 0.87961216, "epoch": 0.5985162320959575, "grad_norm": 9.564170837402344, "learning_rate": 9.731114022889172e-06, "loss": 0.5736949, "memory(GiB)": 34.88, "step": 22105, "train_speed(iter/s)": 0.412097 }, { "acc": 0.87699013, "epoch": 0.5986516123791731, "grad_norm": 11.070319175720215, "learning_rate": 9.730932956830714e-06, "loss": 0.61252236, "memory(GiB)": 34.88, "step": 22110, "train_speed(iter/s)": 0.412102 }, { "acc": 0.84501629, "epoch": 0.5987869926623887, "grad_norm": 13.581894874572754, "learning_rate": 9.730751831514287e-06, "loss": 0.82056093, "memory(GiB)": 34.88, "step": 22115, "train_speed(iter/s)": 0.412105 }, { "acc": 0.86971693, "epoch": 0.5989223729456042, "grad_norm": 11.566381454467773, "learning_rate": 9.730570646942162e-06, "loss": 0.69273124, "memory(GiB)": 34.88, "step": 22120, "train_speed(iter/s)": 0.412109 }, { "acc": 0.85779696, "epoch": 0.5990577532288197, "grad_norm": 10.470978736877441, "learning_rate": 9.730389403116607e-06, "loss": 0.77816944, "memory(GiB)": 34.88, "step": 22125, "train_speed(iter/s)": 0.412114 }, { "acc": 0.8719593, "epoch": 0.5991931335120353, "grad_norm": 9.197070121765137, "learning_rate": 9.730208100039894e-06, "loss": 0.57827625, "memory(GiB)": 34.88, "step": 22130, "train_speed(iter/s)": 0.412119 }, { "acc": 0.84168701, "epoch": 0.5993285137952509, "grad_norm": 8.056314468383789, "learning_rate": 9.730026737714292e-06, "loss": 0.81781616, "memory(GiB)": 34.88, "step": 22135, "train_speed(iter/s)": 0.412124 }, { "acc": 0.86859293, "epoch": 0.5994638940784665, "grad_norm": 8.422447204589844, "learning_rate": 9.729845316142075e-06, "loss": 0.59749203, "memory(GiB)": 34.88, "step": 22140, "train_speed(iter/s)": 0.412129 }, { "acc": 0.84443388, "epoch": 0.5995992743616819, "grad_norm": 8.274569511413574, "learning_rate": 9.729663835325515e-06, "loss": 0.73482966, "memory(GiB)": 34.88, "step": 22145, "train_speed(iter/s)": 0.412133 }, { "acc": 0.87035637, "epoch": 0.5997346546448975, "grad_norm": 13.637701988220215, "learning_rate": 9.729482295266883e-06, "loss": 0.61421618, "memory(GiB)": 34.88, "step": 22150, "train_speed(iter/s)": 0.412138 }, { "acc": 0.85140924, "epoch": 0.5998700349281131, "grad_norm": 70.13433837890625, "learning_rate": 9.729300695968458e-06, "loss": 0.76650825, "memory(GiB)": 34.88, "step": 22155, "train_speed(iter/s)": 0.412142 }, { "acc": 0.854671, "epoch": 0.6000054152113287, "grad_norm": 7.009624004364014, "learning_rate": 9.72911903743251e-06, "loss": 0.77072649, "memory(GiB)": 34.88, "step": 22160, "train_speed(iter/s)": 0.412147 }, { "acc": 0.84494247, "epoch": 0.6001407954945441, "grad_norm": 9.697738647460938, "learning_rate": 9.728937319661319e-06, "loss": 0.7491622, "memory(GiB)": 34.88, "step": 22165, "train_speed(iter/s)": 0.412152 }, { "acc": 0.84925079, "epoch": 0.6002761757777597, "grad_norm": 12.16601276397705, "learning_rate": 9.728755542657158e-06, "loss": 0.75589118, "memory(GiB)": 34.88, "step": 22170, "train_speed(iter/s)": 0.412157 }, { "acc": 0.85438566, "epoch": 0.6004115560609753, "grad_norm": 7.97317361831665, "learning_rate": 9.728573706422307e-06, "loss": 0.81853237, "memory(GiB)": 34.88, "step": 22175, "train_speed(iter/s)": 0.412162 }, { "acc": 0.86277781, "epoch": 0.6005469363441909, "grad_norm": 13.883281707763672, "learning_rate": 9.728391810959037e-06, "loss": 0.71272993, "memory(GiB)": 34.88, "step": 22180, "train_speed(iter/s)": 0.412167 }, { "acc": 0.85337334, "epoch": 0.6006823166274063, "grad_norm": 13.469477653503418, "learning_rate": 9.728209856269636e-06, "loss": 0.75605793, "memory(GiB)": 34.88, "step": 22185, "train_speed(iter/s)": 0.412172 }, { "acc": 0.85191278, "epoch": 0.6008176969106219, "grad_norm": 11.036376953125, "learning_rate": 9.72802784235638e-06, "loss": 0.90040035, "memory(GiB)": 34.88, "step": 22190, "train_speed(iter/s)": 0.412177 }, { "acc": 0.8695159, "epoch": 0.6009530771938375, "grad_norm": 6.267772197723389, "learning_rate": 9.727845769221546e-06, "loss": 0.63890333, "memory(GiB)": 34.88, "step": 22195, "train_speed(iter/s)": 0.412181 }, { "acc": 0.86967144, "epoch": 0.6010884574770531, "grad_norm": 6.016086578369141, "learning_rate": 9.727663636867417e-06, "loss": 0.66815209, "memory(GiB)": 34.88, "step": 22200, "train_speed(iter/s)": 0.412186 }, { "acc": 0.87887392, "epoch": 0.6012238377602686, "grad_norm": 8.100043296813965, "learning_rate": 9.727481445296277e-06, "loss": 0.57058644, "memory(GiB)": 34.88, "step": 22205, "train_speed(iter/s)": 0.412191 }, { "acc": 0.86072483, "epoch": 0.6013592180434841, "grad_norm": 4.70306921005249, "learning_rate": 9.727299194510403e-06, "loss": 0.58156195, "memory(GiB)": 34.88, "step": 22210, "train_speed(iter/s)": 0.412196 }, { "acc": 0.86450653, "epoch": 0.6014945983266997, "grad_norm": 6.342692852020264, "learning_rate": 9.727116884512084e-06, "loss": 0.7712369, "memory(GiB)": 34.88, "step": 22215, "train_speed(iter/s)": 0.4122 }, { "acc": 0.85435143, "epoch": 0.6016299786099153, "grad_norm": 16.14917755126953, "learning_rate": 9.726934515303598e-06, "loss": 0.81428337, "memory(GiB)": 34.88, "step": 22220, "train_speed(iter/s)": 0.412205 }, { "acc": 0.87713432, "epoch": 0.6017653588931308, "grad_norm": 8.905774116516113, "learning_rate": 9.726752086887232e-06, "loss": 0.72077084, "memory(GiB)": 34.88, "step": 22225, "train_speed(iter/s)": 0.41221 }, { "acc": 0.88895187, "epoch": 0.6019007391763463, "grad_norm": 10.286892890930176, "learning_rate": 9.726569599265274e-06, "loss": 0.53851099, "memory(GiB)": 34.88, "step": 22230, "train_speed(iter/s)": 0.412214 }, { "acc": 0.85573893, "epoch": 0.6020361194595619, "grad_norm": 6.036299705505371, "learning_rate": 9.726387052440005e-06, "loss": 0.74998188, "memory(GiB)": 34.88, "step": 22235, "train_speed(iter/s)": 0.412219 }, { "acc": 0.8456934, "epoch": 0.6021714997427775, "grad_norm": 8.626570701599121, "learning_rate": 9.726204446413717e-06, "loss": 0.851653, "memory(GiB)": 34.88, "step": 22240, "train_speed(iter/s)": 0.412223 }, { "acc": 0.86306725, "epoch": 0.602306880025993, "grad_norm": 17.45191764831543, "learning_rate": 9.726021781188693e-06, "loss": 0.72671242, "memory(GiB)": 34.88, "step": 22245, "train_speed(iter/s)": 0.412227 }, { "acc": 0.88540344, "epoch": 0.6024422603092086, "grad_norm": 6.658954620361328, "learning_rate": 9.725839056767224e-06, "loss": 0.60213094, "memory(GiB)": 34.88, "step": 22250, "train_speed(iter/s)": 0.412232 }, { "acc": 0.85798826, "epoch": 0.6025776405924241, "grad_norm": 4.624879360198975, "learning_rate": 9.725656273151597e-06, "loss": 0.76795721, "memory(GiB)": 34.88, "step": 22255, "train_speed(iter/s)": 0.412236 }, { "acc": 0.85495167, "epoch": 0.6027130208756397, "grad_norm": 13.295284271240234, "learning_rate": 9.725473430344103e-06, "loss": 0.73564715, "memory(GiB)": 34.88, "step": 22260, "train_speed(iter/s)": 0.412241 }, { "acc": 0.85422831, "epoch": 0.6028484011588552, "grad_norm": 12.492012977600098, "learning_rate": 9.725290528347034e-06, "loss": 0.88777809, "memory(GiB)": 34.88, "step": 22265, "train_speed(iter/s)": 0.412246 }, { "acc": 0.85424051, "epoch": 0.6029837814420708, "grad_norm": 13.125666618347168, "learning_rate": 9.725107567162676e-06, "loss": 0.6944416, "memory(GiB)": 34.88, "step": 22270, "train_speed(iter/s)": 0.412251 }, { "acc": 0.83327112, "epoch": 0.6031191617252863, "grad_norm": 8.115076065063477, "learning_rate": 9.724924546793327e-06, "loss": 0.87362804, "memory(GiB)": 34.88, "step": 22275, "train_speed(iter/s)": 0.412256 }, { "acc": 0.85312939, "epoch": 0.6032545420085019, "grad_norm": 5.741227626800537, "learning_rate": 9.724741467241277e-06, "loss": 0.78066225, "memory(GiB)": 34.88, "step": 22280, "train_speed(iter/s)": 0.41226 }, { "acc": 0.88871164, "epoch": 0.6033899222917174, "grad_norm": 10.651610374450684, "learning_rate": 9.724558328508818e-06, "loss": 0.53317027, "memory(GiB)": 34.88, "step": 22285, "train_speed(iter/s)": 0.412264 }, { "acc": 0.87251081, "epoch": 0.603525302574933, "grad_norm": 6.217031478881836, "learning_rate": 9.724375130598246e-06, "loss": 0.69460802, "memory(GiB)": 34.88, "step": 22290, "train_speed(iter/s)": 0.412269 }, { "acc": 0.86241474, "epoch": 0.6036606828581486, "grad_norm": 11.251970291137695, "learning_rate": 9.724191873511857e-06, "loss": 0.69791021, "memory(GiB)": 34.88, "step": 22295, "train_speed(iter/s)": 0.412275 }, { "acc": 0.84708481, "epoch": 0.6037960631413641, "grad_norm": 8.076518058776855, "learning_rate": 9.724008557251943e-06, "loss": 0.83347225, "memory(GiB)": 34.88, "step": 22300, "train_speed(iter/s)": 0.412278 }, { "acc": 0.88318415, "epoch": 0.6039314434245796, "grad_norm": 6.064067363739014, "learning_rate": 9.723825181820804e-06, "loss": 0.64004526, "memory(GiB)": 34.88, "step": 22305, "train_speed(iter/s)": 0.412283 }, { "acc": 0.85120096, "epoch": 0.6040668237077952, "grad_norm": 9.000828742980957, "learning_rate": 9.723641747220738e-06, "loss": 0.78887596, "memory(GiB)": 34.88, "step": 22310, "train_speed(iter/s)": 0.412287 }, { "acc": 0.85885735, "epoch": 0.6042022039910108, "grad_norm": 9.52517318725586, "learning_rate": 9.723458253454038e-06, "loss": 0.85568695, "memory(GiB)": 34.88, "step": 22315, "train_speed(iter/s)": 0.412292 }, { "acc": 0.86964569, "epoch": 0.6043375842742263, "grad_norm": 4.849233150482178, "learning_rate": 9.723274700523006e-06, "loss": 0.74205618, "memory(GiB)": 34.88, "step": 22320, "train_speed(iter/s)": 0.412296 }, { "acc": 0.8715951, "epoch": 0.6044729645574418, "grad_norm": 4.372901916503906, "learning_rate": 9.723091088429939e-06, "loss": 0.696064, "memory(GiB)": 34.88, "step": 22325, "train_speed(iter/s)": 0.4123 }, { "acc": 0.89311523, "epoch": 0.6046083448406574, "grad_norm": 4.342551231384277, "learning_rate": 9.72290741717714e-06, "loss": 0.47352133, "memory(GiB)": 34.88, "step": 22330, "train_speed(iter/s)": 0.412305 }, { "acc": 0.85646038, "epoch": 0.604743725123873, "grad_norm": 7.313433647155762, "learning_rate": 9.722723686766911e-06, "loss": 0.80996637, "memory(GiB)": 34.88, "step": 22335, "train_speed(iter/s)": 0.412309 }, { "acc": 0.87519026, "epoch": 0.6048791054070886, "grad_norm": 5.617393493652344, "learning_rate": 9.722539897201549e-06, "loss": 0.53793697, "memory(GiB)": 34.88, "step": 22340, "train_speed(iter/s)": 0.412314 }, { "acc": 0.85200596, "epoch": 0.605014485690304, "grad_norm": 16.105295181274414, "learning_rate": 9.722356048483356e-06, "loss": 0.82187433, "memory(GiB)": 34.88, "step": 22345, "train_speed(iter/s)": 0.412319 }, { "acc": 0.82850304, "epoch": 0.6051498659735196, "grad_norm": 6.757955551147461, "learning_rate": 9.72217214061464e-06, "loss": 0.84767294, "memory(GiB)": 34.88, "step": 22350, "train_speed(iter/s)": 0.412324 }, { "acc": 0.88991127, "epoch": 0.6052852462567352, "grad_norm": 10.319618225097656, "learning_rate": 9.721988173597703e-06, "loss": 0.5513341, "memory(GiB)": 34.88, "step": 22355, "train_speed(iter/s)": 0.412328 }, { "acc": 0.8625082, "epoch": 0.6054206265399508, "grad_norm": 5.949467182159424, "learning_rate": 9.72180414743485e-06, "loss": 0.70448952, "memory(GiB)": 34.88, "step": 22360, "train_speed(iter/s)": 0.412333 }, { "acc": 0.86069374, "epoch": 0.6055560068231662, "grad_norm": 14.301105499267578, "learning_rate": 9.721620062128383e-06, "loss": 0.74252663, "memory(GiB)": 34.88, "step": 22365, "train_speed(iter/s)": 0.412338 }, { "acc": 0.87021179, "epoch": 0.6056913871063818, "grad_norm": 10.638093948364258, "learning_rate": 9.72143591768061e-06, "loss": 0.72087293, "memory(GiB)": 34.88, "step": 22370, "train_speed(iter/s)": 0.412343 }, { "acc": 0.86176777, "epoch": 0.6058267673895974, "grad_norm": 8.62405014038086, "learning_rate": 9.721251714093839e-06, "loss": 0.59903202, "memory(GiB)": 34.88, "step": 22375, "train_speed(iter/s)": 0.412348 }, { "acc": 0.85137091, "epoch": 0.605962147672813, "grad_norm": 11.53415584564209, "learning_rate": 9.721067451370377e-06, "loss": 0.75746603, "memory(GiB)": 34.88, "step": 22380, "train_speed(iter/s)": 0.412353 }, { "acc": 0.85037518, "epoch": 0.6060975279560284, "grad_norm": 14.272228240966797, "learning_rate": 9.720883129512529e-06, "loss": 0.73573332, "memory(GiB)": 34.88, "step": 22385, "train_speed(iter/s)": 0.412357 }, { "acc": 0.86753092, "epoch": 0.606232908239244, "grad_norm": 6.1761603355407715, "learning_rate": 9.720698748522608e-06, "loss": 0.58158875, "memory(GiB)": 34.88, "step": 22390, "train_speed(iter/s)": 0.412362 }, { "acc": 0.87220869, "epoch": 0.6063682885224596, "grad_norm": 6.978998184204102, "learning_rate": 9.720514308402925e-06, "loss": 0.66878662, "memory(GiB)": 34.88, "step": 22395, "train_speed(iter/s)": 0.412366 }, { "acc": 0.85299444, "epoch": 0.6065036688056752, "grad_norm": 8.791929244995117, "learning_rate": 9.720329809155787e-06, "loss": 0.66277905, "memory(GiB)": 34.88, "step": 22400, "train_speed(iter/s)": 0.412371 }, { "acc": 0.8720211, "epoch": 0.6066390490888907, "grad_norm": 11.23330307006836, "learning_rate": 9.720145250783505e-06, "loss": 0.66618485, "memory(GiB)": 34.88, "step": 22405, "train_speed(iter/s)": 0.412375 }, { "acc": 0.88108044, "epoch": 0.6067744293721062, "grad_norm": 35.32891845703125, "learning_rate": 9.719960633288392e-06, "loss": 0.58843431, "memory(GiB)": 34.88, "step": 22410, "train_speed(iter/s)": 0.41238 }, { "acc": 0.86749229, "epoch": 0.6069098096553218, "grad_norm": 13.012079238891602, "learning_rate": 9.719775956672761e-06, "loss": 0.68088427, "memory(GiB)": 34.88, "step": 22415, "train_speed(iter/s)": 0.412384 }, { "acc": 0.86545115, "epoch": 0.6070451899385374, "grad_norm": 31.278610229492188, "learning_rate": 9.719591220938927e-06, "loss": 0.71708293, "memory(GiB)": 34.88, "step": 22420, "train_speed(iter/s)": 0.412389 }, { "acc": 0.85008163, "epoch": 0.6071805702217529, "grad_norm": 8.883119583129883, "learning_rate": 9.719406426089201e-06, "loss": 0.79947977, "memory(GiB)": 34.88, "step": 22425, "train_speed(iter/s)": 0.412393 }, { "acc": 0.84646511, "epoch": 0.6073159505049684, "grad_norm": 8.922768592834473, "learning_rate": 9.719221572125898e-06, "loss": 0.76485405, "memory(GiB)": 34.88, "step": 22430, "train_speed(iter/s)": 0.412397 }, { "acc": 0.84889679, "epoch": 0.607451330788184, "grad_norm": 10.079662322998047, "learning_rate": 9.719036659051337e-06, "loss": 0.85076313, "memory(GiB)": 34.88, "step": 22435, "train_speed(iter/s)": 0.412402 }, { "acc": 0.85444336, "epoch": 0.6075867110713996, "grad_norm": 5.973265171051025, "learning_rate": 9.718851686867833e-06, "loss": 0.78092442, "memory(GiB)": 34.88, "step": 22440, "train_speed(iter/s)": 0.412407 }, { "acc": 0.85184526, "epoch": 0.6077220913546151, "grad_norm": 5.854508876800537, "learning_rate": 9.718666655577702e-06, "loss": 0.7740633, "memory(GiB)": 34.88, "step": 22445, "train_speed(iter/s)": 0.412412 }, { "acc": 0.86700029, "epoch": 0.6078574716378307, "grad_norm": 6.927515029907227, "learning_rate": 9.71848156518326e-06, "loss": 0.75477009, "memory(GiB)": 34.88, "step": 22450, "train_speed(iter/s)": 0.412416 }, { "acc": 0.84885044, "epoch": 0.6079928519210462, "grad_norm": 13.094107627868652, "learning_rate": 9.71829641568683e-06, "loss": 0.82471771, "memory(GiB)": 34.88, "step": 22455, "train_speed(iter/s)": 0.412421 }, { "acc": 0.84623966, "epoch": 0.6081282322042618, "grad_norm": 9.499364852905273, "learning_rate": 9.71811120709073e-06, "loss": 0.78295689, "memory(GiB)": 34.88, "step": 22460, "train_speed(iter/s)": 0.412426 }, { "acc": 0.86359272, "epoch": 0.6082636124874773, "grad_norm": 5.445176124572754, "learning_rate": 9.717925939397281e-06, "loss": 0.65565476, "memory(GiB)": 34.88, "step": 22465, "train_speed(iter/s)": 0.41243 }, { "acc": 0.85602207, "epoch": 0.6083989927706929, "grad_norm": 10.297831535339355, "learning_rate": 9.7177406126088e-06, "loss": 0.72376556, "memory(GiB)": 34.88, "step": 22470, "train_speed(iter/s)": 0.412435 }, { "acc": 0.86602669, "epoch": 0.6085343730539085, "grad_norm": 10.16653060913086, "learning_rate": 9.717555226727611e-06, "loss": 0.73022418, "memory(GiB)": 34.88, "step": 22475, "train_speed(iter/s)": 0.41244 }, { "acc": 0.83534374, "epoch": 0.608669753337124, "grad_norm": 11.653706550598145, "learning_rate": 9.717369781756038e-06, "loss": 0.7445878, "memory(GiB)": 34.88, "step": 22480, "train_speed(iter/s)": 0.412444 }, { "acc": 0.8738636, "epoch": 0.6088051336203395, "grad_norm": 8.100259780883789, "learning_rate": 9.717184277696401e-06, "loss": 0.62891531, "memory(GiB)": 34.88, "step": 22485, "train_speed(iter/s)": 0.412448 }, { "acc": 0.85684175, "epoch": 0.6089405139035551, "grad_norm": 9.113733291625977, "learning_rate": 9.716998714551024e-06, "loss": 0.71133614, "memory(GiB)": 34.88, "step": 22490, "train_speed(iter/s)": 0.412452 }, { "acc": 0.89038391, "epoch": 0.6090758941867707, "grad_norm": 5.374849796295166, "learning_rate": 9.716813092322235e-06, "loss": 0.5490633, "memory(GiB)": 34.88, "step": 22495, "train_speed(iter/s)": 0.412457 }, { "acc": 0.86776104, "epoch": 0.6092112744699862, "grad_norm": 11.690303802490234, "learning_rate": 9.716627411012356e-06, "loss": 0.62390022, "memory(GiB)": 34.88, "step": 22500, "train_speed(iter/s)": 0.412461 }, { "acc": 0.88900547, "epoch": 0.6093466547532017, "grad_norm": 5.791740417480469, "learning_rate": 9.716441670623712e-06, "loss": 0.5121953, "memory(GiB)": 34.88, "step": 22505, "train_speed(iter/s)": 0.412465 }, { "acc": 0.85706148, "epoch": 0.6094820350364173, "grad_norm": 15.444964408874512, "learning_rate": 9.716255871158635e-06, "loss": 0.74594841, "memory(GiB)": 34.88, "step": 22510, "train_speed(iter/s)": 0.41247 }, { "acc": 0.87338047, "epoch": 0.6096174153196329, "grad_norm": 9.687832832336426, "learning_rate": 9.716070012619449e-06, "loss": 0.64958973, "memory(GiB)": 34.88, "step": 22515, "train_speed(iter/s)": 0.412475 }, { "acc": 0.86151505, "epoch": 0.6097527956028485, "grad_norm": 11.134687423706055, "learning_rate": 9.71588409500848e-06, "loss": 0.6974308, "memory(GiB)": 34.88, "step": 22520, "train_speed(iter/s)": 0.41248 }, { "acc": 0.88995609, "epoch": 0.6098881758860639, "grad_norm": 6.3391194343566895, "learning_rate": 9.715698118328062e-06, "loss": 0.52186403, "memory(GiB)": 34.88, "step": 22525, "train_speed(iter/s)": 0.412484 }, { "acc": 0.86687689, "epoch": 0.6100235561692795, "grad_norm": 4.536931991577148, "learning_rate": 9.71551208258052e-06, "loss": 0.7018568, "memory(GiB)": 34.88, "step": 22530, "train_speed(iter/s)": 0.412489 }, { "acc": 0.85805569, "epoch": 0.6101589364524951, "grad_norm": 6.2120161056518555, "learning_rate": 9.71532598776819e-06, "loss": 0.71441917, "memory(GiB)": 34.88, "step": 22535, "train_speed(iter/s)": 0.412494 }, { "acc": 0.83671875, "epoch": 0.6102943167357107, "grad_norm": 7.6926774978637695, "learning_rate": 9.715139833893396e-06, "loss": 0.80888577, "memory(GiB)": 34.88, "step": 22540, "train_speed(iter/s)": 0.412498 }, { "acc": 0.82837906, "epoch": 0.6104296970189261, "grad_norm": 6.455086708068848, "learning_rate": 9.714953620958475e-06, "loss": 0.89909286, "memory(GiB)": 34.88, "step": 22545, "train_speed(iter/s)": 0.412503 }, { "acc": 0.86310482, "epoch": 0.6105650773021417, "grad_norm": 12.228429794311523, "learning_rate": 9.71476734896576e-06, "loss": 0.67854681, "memory(GiB)": 34.88, "step": 22550, "train_speed(iter/s)": 0.412504 }, { "acc": 0.88116541, "epoch": 0.6107004575853573, "grad_norm": 7.4477033615112305, "learning_rate": 9.714581017917584e-06, "loss": 0.60078387, "memory(GiB)": 34.88, "step": 22555, "train_speed(iter/s)": 0.412509 }, { "acc": 0.87102528, "epoch": 0.6108358378685729, "grad_norm": 17.778600692749023, "learning_rate": 9.714394627816277e-06, "loss": 0.65638075, "memory(GiB)": 34.88, "step": 22560, "train_speed(iter/s)": 0.412514 }, { "acc": 0.8686224, "epoch": 0.6109712181517883, "grad_norm": 6.96485710144043, "learning_rate": 9.714208178664178e-06, "loss": 0.65265679, "memory(GiB)": 34.88, "step": 22565, "train_speed(iter/s)": 0.412518 }, { "acc": 0.87162428, "epoch": 0.6111065984350039, "grad_norm": 6.326142311096191, "learning_rate": 9.714021670463621e-06, "loss": 0.69543576, "memory(GiB)": 34.88, "step": 22570, "train_speed(iter/s)": 0.412523 }, { "acc": 0.85392666, "epoch": 0.6112419787182195, "grad_norm": 15.158754348754883, "learning_rate": 9.713835103216944e-06, "loss": 0.80799704, "memory(GiB)": 34.88, "step": 22575, "train_speed(iter/s)": 0.412527 }, { "acc": 0.85709629, "epoch": 0.6113773590014351, "grad_norm": 9.509648323059082, "learning_rate": 9.713648476926484e-06, "loss": 0.73456206, "memory(GiB)": 34.88, "step": 22580, "train_speed(iter/s)": 0.412532 }, { "acc": 0.83553009, "epoch": 0.6115127392846506, "grad_norm": 9.568656921386719, "learning_rate": 9.713461791594578e-06, "loss": 0.87958031, "memory(GiB)": 34.88, "step": 22585, "train_speed(iter/s)": 0.412537 }, { "acc": 0.8568924, "epoch": 0.6116481195678661, "grad_norm": 29.316755294799805, "learning_rate": 9.713275047223563e-06, "loss": 0.70973649, "memory(GiB)": 34.88, "step": 22590, "train_speed(iter/s)": 0.41254 }, { "acc": 0.86259518, "epoch": 0.6117834998510817, "grad_norm": 11.81561279296875, "learning_rate": 9.71308824381578e-06, "loss": 0.71675439, "memory(GiB)": 34.88, "step": 22595, "train_speed(iter/s)": 0.412545 }, { "acc": 0.86362619, "epoch": 0.6119188801342973, "grad_norm": 7.04831075668335, "learning_rate": 9.71290138137357e-06, "loss": 0.69044552, "memory(GiB)": 34.88, "step": 22600, "train_speed(iter/s)": 0.412548 }, { "acc": 0.86022549, "epoch": 0.6120542604175128, "grad_norm": 17.247676849365234, "learning_rate": 9.712714459899272e-06, "loss": 0.62679615, "memory(GiB)": 34.88, "step": 22605, "train_speed(iter/s)": 0.412553 }, { "acc": 0.85406752, "epoch": 0.6121896407007283, "grad_norm": 20.65338897705078, "learning_rate": 9.71252747939523e-06, "loss": 0.8008049, "memory(GiB)": 34.88, "step": 22610, "train_speed(iter/s)": 0.412558 }, { "acc": 0.86421547, "epoch": 0.6123250209839439, "grad_norm": 8.965126991271973, "learning_rate": 9.712340439863782e-06, "loss": 0.7455348, "memory(GiB)": 34.88, "step": 22615, "train_speed(iter/s)": 0.412562 }, { "acc": 0.83512573, "epoch": 0.6124604012671595, "grad_norm": 17.427778244018555, "learning_rate": 9.712153341307276e-06, "loss": 0.81534901, "memory(GiB)": 34.88, "step": 22620, "train_speed(iter/s)": 0.412567 }, { "acc": 0.87105818, "epoch": 0.612595781550375, "grad_norm": 8.859451293945312, "learning_rate": 9.711966183728051e-06, "loss": 0.60048141, "memory(GiB)": 34.88, "step": 22625, "train_speed(iter/s)": 0.412572 }, { "acc": 0.86746988, "epoch": 0.6127311618335906, "grad_norm": 10.76685905456543, "learning_rate": 9.711778967128457e-06, "loss": 0.67615399, "memory(GiB)": 34.88, "step": 22630, "train_speed(iter/s)": 0.412576 }, { "acc": 0.86948986, "epoch": 0.6128665421168061, "grad_norm": 6.444762229919434, "learning_rate": 9.711591691510836e-06, "loss": 0.59165783, "memory(GiB)": 34.88, "step": 22635, "train_speed(iter/s)": 0.412581 }, { "acc": 0.8350771, "epoch": 0.6130019224000217, "grad_norm": 6.4481329917907715, "learning_rate": 9.711404356877533e-06, "loss": 0.84179497, "memory(GiB)": 34.88, "step": 22640, "train_speed(iter/s)": 0.412585 }, { "acc": 0.86547108, "epoch": 0.6131373026832372, "grad_norm": 6.66871452331543, "learning_rate": 9.711216963230895e-06, "loss": 0.71802874, "memory(GiB)": 34.88, "step": 22645, "train_speed(iter/s)": 0.41259 }, { "acc": 0.86862888, "epoch": 0.6132726829664528, "grad_norm": 6.063541889190674, "learning_rate": 9.711029510573273e-06, "loss": 0.62864571, "memory(GiB)": 34.88, "step": 22650, "train_speed(iter/s)": 0.412595 }, { "acc": 0.88658752, "epoch": 0.6134080632496683, "grad_norm": 8.980278015136719, "learning_rate": 9.710841998907013e-06, "loss": 0.56105509, "memory(GiB)": 34.88, "step": 22655, "train_speed(iter/s)": 0.4126 }, { "acc": 0.88324299, "epoch": 0.6135434435328839, "grad_norm": 10.554795265197754, "learning_rate": 9.710654428234462e-06, "loss": 0.58138609, "memory(GiB)": 34.88, "step": 22660, "train_speed(iter/s)": 0.412604 }, { "acc": 0.86913662, "epoch": 0.6136788238160994, "grad_norm": 7.324384689331055, "learning_rate": 9.71046679855797e-06, "loss": 0.65184784, "memory(GiB)": 34.88, "step": 22665, "train_speed(iter/s)": 0.412608 }, { "acc": 0.85268755, "epoch": 0.613814204099315, "grad_norm": 14.240771293640137, "learning_rate": 9.71027910987989e-06, "loss": 0.78725119, "memory(GiB)": 34.88, "step": 22670, "train_speed(iter/s)": 0.412613 }, { "acc": 0.86624975, "epoch": 0.6139495843825306, "grad_norm": 6.216892242431641, "learning_rate": 9.710091362202573e-06, "loss": 0.61272111, "memory(GiB)": 34.88, "step": 22675, "train_speed(iter/s)": 0.412618 }, { "acc": 0.85281649, "epoch": 0.6140849646657461, "grad_norm": 7.240054130554199, "learning_rate": 9.70990355552837e-06, "loss": 0.70979362, "memory(GiB)": 34.88, "step": 22680, "train_speed(iter/s)": 0.412623 }, { "acc": 0.86869793, "epoch": 0.6142203449489616, "grad_norm": 6.67476749420166, "learning_rate": 9.709715689859633e-06, "loss": 0.61970387, "memory(GiB)": 34.88, "step": 22685, "train_speed(iter/s)": 0.412628 }, { "acc": 0.85836849, "epoch": 0.6143557252321772, "grad_norm": 8.145281791687012, "learning_rate": 9.709527765198715e-06, "loss": 0.78380637, "memory(GiB)": 34.88, "step": 22690, "train_speed(iter/s)": 0.412632 }, { "acc": 0.85520096, "epoch": 0.6144911055153928, "grad_norm": 9.541190147399902, "learning_rate": 9.709339781547972e-06, "loss": 0.74655495, "memory(GiB)": 34.88, "step": 22695, "train_speed(iter/s)": 0.412636 }, { "acc": 0.85490122, "epoch": 0.6146264857986082, "grad_norm": 11.785778999328613, "learning_rate": 9.709151738909758e-06, "loss": 0.7398942, "memory(GiB)": 34.88, "step": 22700, "train_speed(iter/s)": 0.412641 }, { "acc": 0.86071253, "epoch": 0.6147618660818238, "grad_norm": 9.683727264404297, "learning_rate": 9.708963637286427e-06, "loss": 0.5913949, "memory(GiB)": 34.88, "step": 22705, "train_speed(iter/s)": 0.412646 }, { "acc": 0.85398216, "epoch": 0.6148972463650394, "grad_norm": 7.70077657699585, "learning_rate": 9.708775476680338e-06, "loss": 0.70970697, "memory(GiB)": 34.88, "step": 22710, "train_speed(iter/s)": 0.412649 }, { "acc": 0.87836895, "epoch": 0.615032626648255, "grad_norm": 8.247889518737793, "learning_rate": 9.708587257093847e-06, "loss": 0.60984774, "memory(GiB)": 34.88, "step": 22715, "train_speed(iter/s)": 0.412652 }, { "acc": 0.87984915, "epoch": 0.6151680069314704, "grad_norm": 13.316827774047852, "learning_rate": 9.708398978529311e-06, "loss": 0.59078865, "memory(GiB)": 34.88, "step": 22720, "train_speed(iter/s)": 0.412654 }, { "acc": 0.85880146, "epoch": 0.615303387214686, "grad_norm": 10.180693626403809, "learning_rate": 9.708210640989092e-06, "loss": 0.64337626, "memory(GiB)": 34.88, "step": 22725, "train_speed(iter/s)": 0.412658 }, { "acc": 0.87146282, "epoch": 0.6154387674979016, "grad_norm": 11.373709678649902, "learning_rate": 9.708022244475543e-06, "loss": 0.64610338, "memory(GiB)": 34.88, "step": 22730, "train_speed(iter/s)": 0.412662 }, { "acc": 0.8795104, "epoch": 0.6155741477811172, "grad_norm": 6.452949523925781, "learning_rate": 9.707833788991028e-06, "loss": 0.57918415, "memory(GiB)": 34.88, "step": 22735, "train_speed(iter/s)": 0.412665 }, { "acc": 0.84450903, "epoch": 0.6157095280643327, "grad_norm": 28.791135787963867, "learning_rate": 9.70764527453791e-06, "loss": 0.73369074, "memory(GiB)": 34.88, "step": 22740, "train_speed(iter/s)": 0.412669 }, { "acc": 0.86922951, "epoch": 0.6158449083475482, "grad_norm": 12.092944145202637, "learning_rate": 9.707456701118545e-06, "loss": 0.71922736, "memory(GiB)": 34.88, "step": 22745, "train_speed(iter/s)": 0.412673 }, { "acc": 0.89300747, "epoch": 0.6159802886307638, "grad_norm": 6.934541702270508, "learning_rate": 9.7072680687353e-06, "loss": 0.49577131, "memory(GiB)": 34.88, "step": 22750, "train_speed(iter/s)": 0.412678 }, { "acc": 0.87094059, "epoch": 0.6161156689139794, "grad_norm": 22.218952178955078, "learning_rate": 9.707079377390535e-06, "loss": 0.5668767, "memory(GiB)": 34.88, "step": 22755, "train_speed(iter/s)": 0.412682 }, { "acc": 0.85634747, "epoch": 0.6162510491971949, "grad_norm": 11.420659065246582, "learning_rate": 9.706890627086615e-06, "loss": 0.72928424, "memory(GiB)": 34.88, "step": 22760, "train_speed(iter/s)": 0.412686 }, { "acc": 0.88420267, "epoch": 0.6163864294804104, "grad_norm": 4.186727046966553, "learning_rate": 9.706701817825905e-06, "loss": 0.60345716, "memory(GiB)": 34.88, "step": 22765, "train_speed(iter/s)": 0.412689 }, { "acc": 0.86890602, "epoch": 0.616521809763626, "grad_norm": 7.485956192016602, "learning_rate": 9.70651294961077e-06, "loss": 0.71258516, "memory(GiB)": 34.88, "step": 22770, "train_speed(iter/s)": 0.412693 }, { "acc": 0.87344275, "epoch": 0.6166571900468416, "grad_norm": 9.366456985473633, "learning_rate": 9.706324022443574e-06, "loss": 0.64227371, "memory(GiB)": 34.88, "step": 22775, "train_speed(iter/s)": 0.412696 }, { "acc": 0.85396137, "epoch": 0.6167925703300571, "grad_norm": 10.691436767578125, "learning_rate": 9.706135036326688e-06, "loss": 0.69619484, "memory(GiB)": 34.88, "step": 22780, "train_speed(iter/s)": 0.412699 }, { "acc": 0.86369162, "epoch": 0.6169279506132727, "grad_norm": 18.348806381225586, "learning_rate": 9.705945991262475e-06, "loss": 0.71486945, "memory(GiB)": 34.88, "step": 22785, "train_speed(iter/s)": 0.412702 }, { "acc": 0.88524246, "epoch": 0.6170633308964882, "grad_norm": 13.638080596923828, "learning_rate": 9.705756887253303e-06, "loss": 0.54103532, "memory(GiB)": 34.88, "step": 22790, "train_speed(iter/s)": 0.412706 }, { "acc": 0.8715682, "epoch": 0.6171987111797038, "grad_norm": 6.590390682220459, "learning_rate": 9.705567724301546e-06, "loss": 0.76856647, "memory(GiB)": 34.88, "step": 22795, "train_speed(iter/s)": 0.41271 }, { "acc": 0.87891426, "epoch": 0.6173340914629193, "grad_norm": 9.124168395996094, "learning_rate": 9.705378502409566e-06, "loss": 0.73594131, "memory(GiB)": 34.88, "step": 22800, "train_speed(iter/s)": 0.412714 }, { "acc": 0.8882369, "epoch": 0.6174694717461349, "grad_norm": 10.796951293945312, "learning_rate": 9.705189221579741e-06, "loss": 0.50764799, "memory(GiB)": 34.88, "step": 22805, "train_speed(iter/s)": 0.412718 }, { "acc": 0.86292591, "epoch": 0.6176048520293504, "grad_norm": 19.48278045654297, "learning_rate": 9.704999881814438e-06, "loss": 0.75360203, "memory(GiB)": 34.88, "step": 22810, "train_speed(iter/s)": 0.412721 }, { "acc": 0.86388493, "epoch": 0.617740232312566, "grad_norm": 8.352150917053223, "learning_rate": 9.70481048311603e-06, "loss": 0.79363098, "memory(GiB)": 34.88, "step": 22815, "train_speed(iter/s)": 0.412726 }, { "acc": 0.86582689, "epoch": 0.6178756125957815, "grad_norm": 12.123485565185547, "learning_rate": 9.704621025486889e-06, "loss": 0.67967978, "memory(GiB)": 34.88, "step": 22820, "train_speed(iter/s)": 0.41273 }, { "acc": 0.86629829, "epoch": 0.6180109928789971, "grad_norm": 8.553709983825684, "learning_rate": 9.704431508929388e-06, "loss": 0.60120144, "memory(GiB)": 34.88, "step": 22825, "train_speed(iter/s)": 0.412735 }, { "acc": 0.86228542, "epoch": 0.6181463731622127, "grad_norm": 6.25021505355835, "learning_rate": 9.704241933445903e-06, "loss": 0.7072834, "memory(GiB)": 34.88, "step": 22830, "train_speed(iter/s)": 0.412739 }, { "acc": 0.86857891, "epoch": 0.6182817534454282, "grad_norm": 4.709056377410889, "learning_rate": 9.704052299038807e-06, "loss": 0.63329964, "memory(GiB)": 34.88, "step": 22835, "train_speed(iter/s)": 0.412743 }, { "acc": 0.86502132, "epoch": 0.6184171337286437, "grad_norm": 20.223419189453125, "learning_rate": 9.703862605710475e-06, "loss": 0.63470306, "memory(GiB)": 34.88, "step": 22840, "train_speed(iter/s)": 0.412745 }, { "acc": 0.86436081, "epoch": 0.6185525140118593, "grad_norm": 14.567329406738281, "learning_rate": 9.703672853463284e-06, "loss": 0.72363739, "memory(GiB)": 34.88, "step": 22845, "train_speed(iter/s)": 0.412749 }, { "acc": 0.84098473, "epoch": 0.6186878942950749, "grad_norm": 9.839183807373047, "learning_rate": 9.703483042299612e-06, "loss": 0.77660131, "memory(GiB)": 34.88, "step": 22850, "train_speed(iter/s)": 0.412753 }, { "acc": 0.88957634, "epoch": 0.6188232745782904, "grad_norm": 6.0026068687438965, "learning_rate": 9.703293172221836e-06, "loss": 0.54491954, "memory(GiB)": 34.88, "step": 22855, "train_speed(iter/s)": 0.412758 }, { "acc": 0.8639822, "epoch": 0.6189586548615059, "grad_norm": 5.220221996307373, "learning_rate": 9.703103243232336e-06, "loss": 0.72218852, "memory(GiB)": 34.88, "step": 22860, "train_speed(iter/s)": 0.412762 }, { "acc": 0.87848854, "epoch": 0.6190940351447215, "grad_norm": 8.485882759094238, "learning_rate": 9.702913255333488e-06, "loss": 0.59728832, "memory(GiB)": 34.88, "step": 22865, "train_speed(iter/s)": 0.412767 }, { "acc": 0.8633935, "epoch": 0.6192294154279371, "grad_norm": 8.154121398925781, "learning_rate": 9.702723208527674e-06, "loss": 0.6680234, "memory(GiB)": 34.88, "step": 22870, "train_speed(iter/s)": 0.412772 }, { "acc": 0.84002132, "epoch": 0.6193647957111527, "grad_norm": 10.766393661499023, "learning_rate": 9.702533102817275e-06, "loss": 0.84700298, "memory(GiB)": 34.88, "step": 22875, "train_speed(iter/s)": 0.412776 }, { "acc": 0.86064377, "epoch": 0.6195001759943681, "grad_norm": 10.364694595336914, "learning_rate": 9.702342938204672e-06, "loss": 0.67853751, "memory(GiB)": 34.88, "step": 22880, "train_speed(iter/s)": 0.41278 }, { "acc": 0.86768398, "epoch": 0.6196355562775837, "grad_norm": 5.865871906280518, "learning_rate": 9.702152714692247e-06, "loss": 0.54622808, "memory(GiB)": 34.88, "step": 22885, "train_speed(iter/s)": 0.412785 }, { "acc": 0.85111313, "epoch": 0.6197709365607993, "grad_norm": 16.57376480102539, "learning_rate": 9.701962432282383e-06, "loss": 0.82949228, "memory(GiB)": 34.88, "step": 22890, "train_speed(iter/s)": 0.41279 }, { "acc": 0.86019192, "epoch": 0.6199063168440149, "grad_norm": 8.324668884277344, "learning_rate": 9.701772090977462e-06, "loss": 0.7948812, "memory(GiB)": 34.88, "step": 22895, "train_speed(iter/s)": 0.412794 }, { "acc": 0.87987843, "epoch": 0.6200416971272303, "grad_norm": 8.857500076293945, "learning_rate": 9.701581690779872e-06, "loss": 0.6119782, "memory(GiB)": 34.88, "step": 22900, "train_speed(iter/s)": 0.412799 }, { "acc": 0.84523916, "epoch": 0.6201770774104459, "grad_norm": 11.851508140563965, "learning_rate": 9.701391231691995e-06, "loss": 0.81053972, "memory(GiB)": 34.88, "step": 22905, "train_speed(iter/s)": 0.412804 }, { "acc": 0.8607995, "epoch": 0.6203124576936615, "grad_norm": 4.878355026245117, "learning_rate": 9.70120071371622e-06, "loss": 0.78651576, "memory(GiB)": 34.88, "step": 22910, "train_speed(iter/s)": 0.412807 }, { "acc": 0.85789795, "epoch": 0.6204478379768771, "grad_norm": 11.703736305236816, "learning_rate": 9.70101013685493e-06, "loss": 0.82242451, "memory(GiB)": 34.88, "step": 22915, "train_speed(iter/s)": 0.412812 }, { "acc": 0.86711168, "epoch": 0.6205832182600926, "grad_norm": 10.054588317871094, "learning_rate": 9.700819501110515e-06, "loss": 0.69829407, "memory(GiB)": 34.88, "step": 22920, "train_speed(iter/s)": 0.412816 }, { "acc": 0.90594997, "epoch": 0.6207185985433081, "grad_norm": 11.932579040527344, "learning_rate": 9.700628806485361e-06, "loss": 0.45752311, "memory(GiB)": 34.88, "step": 22925, "train_speed(iter/s)": 0.41282 }, { "acc": 0.85306721, "epoch": 0.6208539788265237, "grad_norm": 10.760738372802734, "learning_rate": 9.700438052981858e-06, "loss": 0.72034607, "memory(GiB)": 34.88, "step": 22930, "train_speed(iter/s)": 0.412825 }, { "acc": 0.86023397, "epoch": 0.6209893591097393, "grad_norm": 5.312288761138916, "learning_rate": 9.700247240602398e-06, "loss": 0.68323712, "memory(GiB)": 34.88, "step": 22935, "train_speed(iter/s)": 0.412829 }, { "acc": 0.85637436, "epoch": 0.6211247393929548, "grad_norm": 11.787145614624023, "learning_rate": 9.700056369349366e-06, "loss": 0.66287374, "memory(GiB)": 34.88, "step": 22940, "train_speed(iter/s)": 0.412834 }, { "acc": 0.88972321, "epoch": 0.6212601196761703, "grad_norm": 6.97967529296875, "learning_rate": 9.699865439225156e-06, "loss": 0.573946, "memory(GiB)": 34.88, "step": 22945, "train_speed(iter/s)": 0.412838 }, { "acc": 0.86736698, "epoch": 0.6213954999593859, "grad_norm": 6.894720077514648, "learning_rate": 9.69967445023216e-06, "loss": 0.67954941, "memory(GiB)": 34.88, "step": 22950, "train_speed(iter/s)": 0.412843 }, { "acc": 0.86134586, "epoch": 0.6215308802426015, "grad_norm": 22.385435104370117, "learning_rate": 9.69948340237277e-06, "loss": 0.69733415, "memory(GiB)": 34.88, "step": 22955, "train_speed(iter/s)": 0.412847 }, { "acc": 0.86826801, "epoch": 0.621666260525817, "grad_norm": 42.82523727416992, "learning_rate": 9.699292295649381e-06, "loss": 0.69073601, "memory(GiB)": 34.88, "step": 22960, "train_speed(iter/s)": 0.412852 }, { "acc": 0.87473755, "epoch": 0.6218016408090326, "grad_norm": 8.889647483825684, "learning_rate": 9.699101130064385e-06, "loss": 0.6488008, "memory(GiB)": 34.88, "step": 22965, "train_speed(iter/s)": 0.412856 }, { "acc": 0.87199707, "epoch": 0.6219370210922481, "grad_norm": 4.885500431060791, "learning_rate": 9.698909905620177e-06, "loss": 0.67607827, "memory(GiB)": 34.88, "step": 22970, "train_speed(iter/s)": 0.412861 }, { "acc": 0.8734086, "epoch": 0.6220724013754637, "grad_norm": 5.996955871582031, "learning_rate": 9.698718622319152e-06, "loss": 0.63173389, "memory(GiB)": 34.88, "step": 22975, "train_speed(iter/s)": 0.412865 }, { "acc": 0.87057171, "epoch": 0.6222077816586792, "grad_norm": 17.730663299560547, "learning_rate": 9.698527280163707e-06, "loss": 0.58227472, "memory(GiB)": 34.88, "step": 22980, "train_speed(iter/s)": 0.412869 }, { "acc": 0.8637948, "epoch": 0.6223431619418948, "grad_norm": 13.360807418823242, "learning_rate": 9.698335879156239e-06, "loss": 0.698385, "memory(GiB)": 34.88, "step": 22985, "train_speed(iter/s)": 0.412874 }, { "acc": 0.8821928, "epoch": 0.6224785422251103, "grad_norm": 6.765719890594482, "learning_rate": 9.698144419299146e-06, "loss": 0.52025876, "memory(GiB)": 34.88, "step": 22990, "train_speed(iter/s)": 0.412878 }, { "acc": 0.85127831, "epoch": 0.6226139225083259, "grad_norm": 9.25625228881836, "learning_rate": 9.697952900594823e-06, "loss": 0.78582649, "memory(GiB)": 34.88, "step": 22995, "train_speed(iter/s)": 0.412882 }, { "acc": 0.87834492, "epoch": 0.6227493027915414, "grad_norm": 5.638182640075684, "learning_rate": 9.697761323045673e-06, "loss": 0.58306894, "memory(GiB)": 34.88, "step": 23000, "train_speed(iter/s)": 0.412887 }, { "acc": 0.87464581, "epoch": 0.622884683074757, "grad_norm": 9.039575576782227, "learning_rate": 9.697569686654096e-06, "loss": 0.58466911, "memory(GiB)": 34.88, "step": 23005, "train_speed(iter/s)": 0.412891 }, { "acc": 0.86359558, "epoch": 0.6230200633579726, "grad_norm": 7.241645336151123, "learning_rate": 9.697377991422492e-06, "loss": 0.71207361, "memory(GiB)": 34.88, "step": 23010, "train_speed(iter/s)": 0.412896 }, { "acc": 0.87954788, "epoch": 0.6231554436411881, "grad_norm": 12.845244407653809, "learning_rate": 9.697186237353261e-06, "loss": 0.5832922, "memory(GiB)": 34.88, "step": 23015, "train_speed(iter/s)": 0.4129 }, { "acc": 0.86733761, "epoch": 0.6232908239244036, "grad_norm": 12.49050521850586, "learning_rate": 9.696994424448805e-06, "loss": 0.70508471, "memory(GiB)": 34.88, "step": 23020, "train_speed(iter/s)": 0.412905 }, { "acc": 0.86790848, "epoch": 0.6234262042076192, "grad_norm": 10.145925521850586, "learning_rate": 9.69680255271153e-06, "loss": 0.73367133, "memory(GiB)": 34.88, "step": 23025, "train_speed(iter/s)": 0.412909 }, { "acc": 0.86074409, "epoch": 0.6235615844908348, "grad_norm": 7.356086254119873, "learning_rate": 9.696610622143835e-06, "loss": 0.60997267, "memory(GiB)": 34.88, "step": 23030, "train_speed(iter/s)": 0.412914 }, { "acc": 0.86588116, "epoch": 0.6236969647740503, "grad_norm": 5.396669387817383, "learning_rate": 9.696418632748126e-06, "loss": 0.66774321, "memory(GiB)": 34.88, "step": 23035, "train_speed(iter/s)": 0.412919 }, { "acc": 0.86095514, "epoch": 0.6238323450572658, "grad_norm": 6.597871780395508, "learning_rate": 9.696226584526812e-06, "loss": 0.78735509, "memory(GiB)": 34.88, "step": 23040, "train_speed(iter/s)": 0.412922 }, { "acc": 0.84551849, "epoch": 0.6239677253404814, "grad_norm": 11.542974472045898, "learning_rate": 9.696034477482294e-06, "loss": 0.75931339, "memory(GiB)": 34.88, "step": 23045, "train_speed(iter/s)": 0.412927 }, { "acc": 0.86104412, "epoch": 0.624103105623697, "grad_norm": 17.20550537109375, "learning_rate": 9.695842311616978e-06, "loss": 0.65435696, "memory(GiB)": 34.88, "step": 23050, "train_speed(iter/s)": 0.412931 }, { "acc": 0.85080376, "epoch": 0.6242384859069126, "grad_norm": 8.377801895141602, "learning_rate": 9.695650086933274e-06, "loss": 0.7541297, "memory(GiB)": 34.88, "step": 23055, "train_speed(iter/s)": 0.412935 }, { "acc": 0.88120775, "epoch": 0.624373866190128, "grad_norm": 7.05517053604126, "learning_rate": 9.69545780343359e-06, "loss": 0.5625802, "memory(GiB)": 34.88, "step": 23060, "train_speed(iter/s)": 0.41294 }, { "acc": 0.88540878, "epoch": 0.6245092464733436, "grad_norm": 6.8963847160339355, "learning_rate": 9.695265461120333e-06, "loss": 0.59261236, "memory(GiB)": 34.88, "step": 23065, "train_speed(iter/s)": 0.412944 }, { "acc": 0.89714584, "epoch": 0.6246446267565592, "grad_norm": 6.160715579986572, "learning_rate": 9.695073059995915e-06, "loss": 0.47909098, "memory(GiB)": 34.88, "step": 23070, "train_speed(iter/s)": 0.412948 }, { "acc": 0.87451668, "epoch": 0.6247800070397748, "grad_norm": 10.07298755645752, "learning_rate": 9.694880600062744e-06, "loss": 0.63414154, "memory(GiB)": 34.88, "step": 23075, "train_speed(iter/s)": 0.412952 }, { "acc": 0.85252962, "epoch": 0.6249153873229902, "grad_norm": 7.093571662902832, "learning_rate": 9.694688081323231e-06, "loss": 0.66967335, "memory(GiB)": 34.88, "step": 23080, "train_speed(iter/s)": 0.412956 }, { "acc": 0.85952864, "epoch": 0.6250507676062058, "grad_norm": 9.9391450881958, "learning_rate": 9.694495503779789e-06, "loss": 0.76215549, "memory(GiB)": 34.88, "step": 23085, "train_speed(iter/s)": 0.412961 }, { "acc": 0.86947441, "epoch": 0.6251861478894214, "grad_norm": 7.096711158752441, "learning_rate": 9.694302867434829e-06, "loss": 0.6155334, "memory(GiB)": 34.88, "step": 23090, "train_speed(iter/s)": 0.412965 }, { "acc": 0.8591917, "epoch": 0.625321528172637, "grad_norm": 3.6611456871032715, "learning_rate": 9.694110172290765e-06, "loss": 0.64941225, "memory(GiB)": 34.88, "step": 23095, "train_speed(iter/s)": 0.412969 }, { "acc": 0.87762203, "epoch": 0.6254569084558524, "grad_norm": 14.087610244750977, "learning_rate": 9.693917418350012e-06, "loss": 0.65886416, "memory(GiB)": 34.88, "step": 23100, "train_speed(iter/s)": 0.412974 }, { "acc": 0.851894, "epoch": 0.625592288739068, "grad_norm": 8.577851295471191, "learning_rate": 9.69372460561498e-06, "loss": 0.72547469, "memory(GiB)": 34.88, "step": 23105, "train_speed(iter/s)": 0.412979 }, { "acc": 0.86545639, "epoch": 0.6257276690222836, "grad_norm": 12.821992874145508, "learning_rate": 9.693531734088091e-06, "loss": 0.66349545, "memory(GiB)": 34.88, "step": 23110, "train_speed(iter/s)": 0.412983 }, { "acc": 0.85964661, "epoch": 0.6258630493054992, "grad_norm": 7.045016288757324, "learning_rate": 9.693338803771756e-06, "loss": 0.72116613, "memory(GiB)": 34.88, "step": 23115, "train_speed(iter/s)": 0.412988 }, { "acc": 0.85065136, "epoch": 0.6259984295887147, "grad_norm": 11.750983238220215, "learning_rate": 9.693145814668395e-06, "loss": 0.77872372, "memory(GiB)": 34.88, "step": 23120, "train_speed(iter/s)": 0.412992 }, { "acc": 0.87229815, "epoch": 0.6261338098719302, "grad_norm": 8.140478134155273, "learning_rate": 9.692952766780422e-06, "loss": 0.71210008, "memory(GiB)": 34.88, "step": 23125, "train_speed(iter/s)": 0.412997 }, { "acc": 0.84508219, "epoch": 0.6262691901551458, "grad_norm": 6.450045108795166, "learning_rate": 9.69275966011026e-06, "loss": 0.7430315, "memory(GiB)": 34.88, "step": 23130, "train_speed(iter/s)": 0.413001 }, { "acc": 0.86124306, "epoch": 0.6264045704383614, "grad_norm": 8.840142250061035, "learning_rate": 9.692566494660322e-06, "loss": 0.78104415, "memory(GiB)": 34.88, "step": 23135, "train_speed(iter/s)": 0.413005 }, { "acc": 0.83984356, "epoch": 0.6265399507215769, "grad_norm": 8.816411972045898, "learning_rate": 9.692373270433032e-06, "loss": 0.63809166, "memory(GiB)": 34.88, "step": 23140, "train_speed(iter/s)": 0.413009 }, { "acc": 0.84937305, "epoch": 0.6266753310047924, "grad_norm": 10.52013111114502, "learning_rate": 9.692179987430812e-06, "loss": 0.75278502, "memory(GiB)": 34.88, "step": 23145, "train_speed(iter/s)": 0.413013 }, { "acc": 0.84274101, "epoch": 0.626810711288008, "grad_norm": 8.453539848327637, "learning_rate": 9.69198664565608e-06, "loss": 0.73214641, "memory(GiB)": 34.88, "step": 23150, "train_speed(iter/s)": 0.413017 }, { "acc": 0.85155411, "epoch": 0.6269460915712236, "grad_norm": 7.7859954833984375, "learning_rate": 9.691793245111259e-06, "loss": 0.74371195, "memory(GiB)": 34.88, "step": 23155, "train_speed(iter/s)": 0.413021 }, { "acc": 0.87400408, "epoch": 0.6270814718544391, "grad_norm": 17.718612670898438, "learning_rate": 9.691599785798772e-06, "loss": 0.6754159, "memory(GiB)": 34.88, "step": 23160, "train_speed(iter/s)": 0.413025 }, { "acc": 0.84921656, "epoch": 0.6272168521376547, "grad_norm": 13.105653762817383, "learning_rate": 9.691406267721041e-06, "loss": 0.86136341, "memory(GiB)": 34.88, "step": 23165, "train_speed(iter/s)": 0.413029 }, { "acc": 0.85766716, "epoch": 0.6273522324208702, "grad_norm": 15.25224494934082, "learning_rate": 9.691212690880493e-06, "loss": 0.6637764, "memory(GiB)": 34.88, "step": 23170, "train_speed(iter/s)": 0.413033 }, { "acc": 0.87330313, "epoch": 0.6274876127040858, "grad_norm": 17.06570053100586, "learning_rate": 9.69101905527955e-06, "loss": 0.6770215, "memory(GiB)": 34.88, "step": 23175, "train_speed(iter/s)": 0.413036 }, { "acc": 0.83734531, "epoch": 0.6276229929873013, "grad_norm": 16.613401412963867, "learning_rate": 9.690825360920641e-06, "loss": 0.90601072, "memory(GiB)": 34.88, "step": 23180, "train_speed(iter/s)": 0.413041 }, { "acc": 0.85359859, "epoch": 0.6277583732705169, "grad_norm": 8.02109146118164, "learning_rate": 9.690631607806189e-06, "loss": 0.69238291, "memory(GiB)": 34.88, "step": 23185, "train_speed(iter/s)": 0.413045 }, { "acc": 0.85310755, "epoch": 0.6278937535537324, "grad_norm": 16.05470848083496, "learning_rate": 9.690437795938621e-06, "loss": 0.75503111, "memory(GiB)": 34.88, "step": 23190, "train_speed(iter/s)": 0.41305 }, { "acc": 0.89517174, "epoch": 0.628029133836948, "grad_norm": 18.502708435058594, "learning_rate": 9.690243925320369e-06, "loss": 0.56070528, "memory(GiB)": 34.88, "step": 23195, "train_speed(iter/s)": 0.413054 }, { "acc": 0.89183798, "epoch": 0.6281645141201635, "grad_norm": 6.168966293334961, "learning_rate": 9.690049995953858e-06, "loss": 0.62493095, "memory(GiB)": 34.88, "step": 23200, "train_speed(iter/s)": 0.413059 }, { "acc": 0.88376942, "epoch": 0.6282998944033791, "grad_norm": 4.997012138366699, "learning_rate": 9.689856007841517e-06, "loss": 0.49732227, "memory(GiB)": 34.88, "step": 23205, "train_speed(iter/s)": 0.413063 }, { "acc": 0.88186998, "epoch": 0.6284352746865947, "grad_norm": 10.082222938537598, "learning_rate": 9.68966196098578e-06, "loss": 0.63020096, "memory(GiB)": 34.88, "step": 23210, "train_speed(iter/s)": 0.413068 }, { "acc": 0.8517518, "epoch": 0.6285706549698102, "grad_norm": 7.2383222579956055, "learning_rate": 9.689467855389074e-06, "loss": 0.84615936, "memory(GiB)": 34.88, "step": 23215, "train_speed(iter/s)": 0.41307 }, { "acc": 0.87579937, "epoch": 0.6287060352530257, "grad_norm": 7.90035343170166, "learning_rate": 9.689273691053832e-06, "loss": 0.69782443, "memory(GiB)": 34.88, "step": 23220, "train_speed(iter/s)": 0.413074 }, { "acc": 0.86160431, "epoch": 0.6288414155362413, "grad_norm": 10.930404663085938, "learning_rate": 9.689079467982486e-06, "loss": 0.7383811, "memory(GiB)": 34.88, "step": 23225, "train_speed(iter/s)": 0.413078 }, { "acc": 0.86703405, "epoch": 0.6289767958194569, "grad_norm": 7.027517318725586, "learning_rate": 9.688885186177468e-06, "loss": 0.59643893, "memory(GiB)": 34.88, "step": 23230, "train_speed(iter/s)": 0.413083 }, { "acc": 0.87191029, "epoch": 0.6291121761026724, "grad_norm": 13.570137977600098, "learning_rate": 9.688690845641215e-06, "loss": 0.685952, "memory(GiB)": 34.88, "step": 23235, "train_speed(iter/s)": 0.413087 }, { "acc": 0.87605581, "epoch": 0.6292475563858879, "grad_norm": 7.645495891571045, "learning_rate": 9.68849644637616e-06, "loss": 0.58570709, "memory(GiB)": 34.88, "step": 23240, "train_speed(iter/s)": 0.413091 }, { "acc": 0.87144299, "epoch": 0.6293829366691035, "grad_norm": 7.863365650177002, "learning_rate": 9.688301988384734e-06, "loss": 0.55102153, "memory(GiB)": 34.88, "step": 23245, "train_speed(iter/s)": 0.413096 }, { "acc": 0.88686371, "epoch": 0.6295183169523191, "grad_norm": 15.766730308532715, "learning_rate": 9.688107471669377e-06, "loss": 0.57440867, "memory(GiB)": 34.88, "step": 23250, "train_speed(iter/s)": 0.4131 }, { "acc": 0.86590929, "epoch": 0.6296536972355347, "grad_norm": 10.203484535217285, "learning_rate": 9.68791289623253e-06, "loss": 0.72959557, "memory(GiB)": 34.88, "step": 23255, "train_speed(iter/s)": 0.413105 }, { "acc": 0.87882442, "epoch": 0.6297890775187501, "grad_norm": 23.67437744140625, "learning_rate": 9.68771826207662e-06, "loss": 0.68855333, "memory(GiB)": 34.88, "step": 23260, "train_speed(iter/s)": 0.413109 }, { "acc": 0.87703915, "epoch": 0.6299244578019657, "grad_norm": 9.219606399536133, "learning_rate": 9.687523569204094e-06, "loss": 0.54834905, "memory(GiB)": 34.88, "step": 23265, "train_speed(iter/s)": 0.413113 }, { "acc": 0.85866299, "epoch": 0.6300598380851813, "grad_norm": 9.241353988647461, "learning_rate": 9.687328817617388e-06, "loss": 0.72466736, "memory(GiB)": 34.88, "step": 23270, "train_speed(iter/s)": 0.413117 }, { "acc": 0.86714306, "epoch": 0.6301952183683969, "grad_norm": 4.015933990478516, "learning_rate": 9.68713400731894e-06, "loss": 0.74851875, "memory(GiB)": 34.88, "step": 23275, "train_speed(iter/s)": 0.413121 }, { "acc": 0.84368992, "epoch": 0.6303305986516123, "grad_norm": 10.275636672973633, "learning_rate": 9.68693913831119e-06, "loss": 0.69267168, "memory(GiB)": 34.88, "step": 23280, "train_speed(iter/s)": 0.413126 }, { "acc": 0.86320744, "epoch": 0.6304659789348279, "grad_norm": 9.051403045654297, "learning_rate": 9.686744210596586e-06, "loss": 0.61332932, "memory(GiB)": 34.88, "step": 23285, "train_speed(iter/s)": 0.41313 }, { "acc": 0.85368109, "epoch": 0.6306013592180435, "grad_norm": 8.046674728393555, "learning_rate": 9.686549224177561e-06, "loss": 0.6706645, "memory(GiB)": 34.88, "step": 23290, "train_speed(iter/s)": 0.413134 }, { "acc": 0.8437088, "epoch": 0.6307367395012591, "grad_norm": 7.641251087188721, "learning_rate": 9.686354179056562e-06, "loss": 0.81600437, "memory(GiB)": 34.88, "step": 23295, "train_speed(iter/s)": 0.413139 }, { "acc": 0.85660706, "epoch": 0.6308721197844745, "grad_norm": 6.335917949676514, "learning_rate": 9.686159075236033e-06, "loss": 0.71549177, "memory(GiB)": 34.88, "step": 23300, "train_speed(iter/s)": 0.413143 }, { "acc": 0.84256573, "epoch": 0.6310075000676901, "grad_norm": 7.8657097816467285, "learning_rate": 9.685963912718417e-06, "loss": 0.75183773, "memory(GiB)": 34.88, "step": 23305, "train_speed(iter/s)": 0.413146 }, { "acc": 0.87540073, "epoch": 0.6311428803509057, "grad_norm": 6.927670478820801, "learning_rate": 9.685768691506157e-06, "loss": 0.68933978, "memory(GiB)": 34.88, "step": 23310, "train_speed(iter/s)": 0.41315 }, { "acc": 0.86570663, "epoch": 0.6312782606341213, "grad_norm": 19.52297019958496, "learning_rate": 9.6855734116017e-06, "loss": 0.67208343, "memory(GiB)": 34.88, "step": 23315, "train_speed(iter/s)": 0.413154 }, { "acc": 0.87482605, "epoch": 0.6314136409173368, "grad_norm": 8.240983963012695, "learning_rate": 9.685378073007495e-06, "loss": 0.656107, "memory(GiB)": 34.88, "step": 23320, "train_speed(iter/s)": 0.413159 }, { "acc": 0.84640865, "epoch": 0.6315490212005523, "grad_norm": 15.060233116149902, "learning_rate": 9.685182675725984e-06, "loss": 0.76604786, "memory(GiB)": 34.88, "step": 23325, "train_speed(iter/s)": 0.413164 }, { "acc": 0.85815125, "epoch": 0.6316844014837679, "grad_norm": 8.607234001159668, "learning_rate": 9.684987219759617e-06, "loss": 0.63084064, "memory(GiB)": 34.88, "step": 23330, "train_speed(iter/s)": 0.413168 }, { "acc": 0.8893136, "epoch": 0.6318197817669835, "grad_norm": 6.725101947784424, "learning_rate": 9.684791705110843e-06, "loss": 0.56222186, "memory(GiB)": 34.88, "step": 23335, "train_speed(iter/s)": 0.413173 }, { "acc": 0.84075975, "epoch": 0.631955162050199, "grad_norm": 7.568910598754883, "learning_rate": 9.684596131782112e-06, "loss": 0.88084106, "memory(GiB)": 34.88, "step": 23340, "train_speed(iter/s)": 0.413177 }, { "acc": 0.85683813, "epoch": 0.6320905423334146, "grad_norm": 7.005561351776123, "learning_rate": 9.684400499775873e-06, "loss": 0.7565238, "memory(GiB)": 34.88, "step": 23345, "train_speed(iter/s)": 0.413182 }, { "acc": 0.87851505, "epoch": 0.6322259226166301, "grad_norm": 4.7566986083984375, "learning_rate": 9.684204809094572e-06, "loss": 0.59786515, "memory(GiB)": 34.88, "step": 23350, "train_speed(iter/s)": 0.413186 }, { "acc": 0.85727854, "epoch": 0.6323613028998457, "grad_norm": 6.048294544219971, "learning_rate": 9.684009059740668e-06, "loss": 0.68064075, "memory(GiB)": 34.88, "step": 23355, "train_speed(iter/s)": 0.41319 }, { "acc": 0.86752663, "epoch": 0.6324966831830612, "grad_norm": 92.9236831665039, "learning_rate": 9.683813251716612e-06, "loss": 0.69544525, "memory(GiB)": 34.88, "step": 23360, "train_speed(iter/s)": 0.413194 }, { "acc": 0.87702942, "epoch": 0.6326320634662768, "grad_norm": 9.81377124786377, "learning_rate": 9.683617385024852e-06, "loss": 0.59315128, "memory(GiB)": 34.88, "step": 23365, "train_speed(iter/s)": 0.413198 }, { "acc": 0.85649614, "epoch": 0.6327674437494923, "grad_norm": 14.454089164733887, "learning_rate": 9.683421459667845e-06, "loss": 0.69729023, "memory(GiB)": 34.88, "step": 23370, "train_speed(iter/s)": 0.413202 }, { "acc": 0.84704227, "epoch": 0.6329028240327079, "grad_norm": 6.6864013671875, "learning_rate": 9.683225475648046e-06, "loss": 0.73803177, "memory(GiB)": 34.88, "step": 23375, "train_speed(iter/s)": 0.413206 }, { "acc": 0.86845131, "epoch": 0.6330382043159234, "grad_norm": 6.793206214904785, "learning_rate": 9.683029432967907e-06, "loss": 0.60086412, "memory(GiB)": 34.88, "step": 23380, "train_speed(iter/s)": 0.41321 }, { "acc": 0.86330128, "epoch": 0.633173584599139, "grad_norm": 9.529501914978027, "learning_rate": 9.682833331629887e-06, "loss": 0.68452969, "memory(GiB)": 34.88, "step": 23385, "train_speed(iter/s)": 0.413215 }, { "acc": 0.89676714, "epoch": 0.6333089648823546, "grad_norm": 7.840529918670654, "learning_rate": 9.68263717163644e-06, "loss": 0.50041304, "memory(GiB)": 34.88, "step": 23390, "train_speed(iter/s)": 0.413219 }, { "acc": 0.87297535, "epoch": 0.6334443451655701, "grad_norm": 7.849257469177246, "learning_rate": 9.682440952990025e-06, "loss": 0.61332035, "memory(GiB)": 34.88, "step": 23395, "train_speed(iter/s)": 0.413223 }, { "acc": 0.87034969, "epoch": 0.6335797254487856, "grad_norm": 3.4601645469665527, "learning_rate": 9.6822446756931e-06, "loss": 0.62979965, "memory(GiB)": 34.88, "step": 23400, "train_speed(iter/s)": 0.413228 }, { "acc": 0.87120667, "epoch": 0.6337151057320012, "grad_norm": 16.057449340820312, "learning_rate": 9.682048339748123e-06, "loss": 0.71823578, "memory(GiB)": 34.88, "step": 23405, "train_speed(iter/s)": 0.413233 }, { "acc": 0.87359657, "epoch": 0.6338504860152168, "grad_norm": 10.454751014709473, "learning_rate": 9.681851945157554e-06, "loss": 0.61623807, "memory(GiB)": 34.88, "step": 23410, "train_speed(iter/s)": 0.413236 }, { "acc": 0.86441956, "epoch": 0.6339858662984323, "grad_norm": 6.232401371002197, "learning_rate": 9.681655491923853e-06, "loss": 0.72053261, "memory(GiB)": 34.88, "step": 23415, "train_speed(iter/s)": 0.41324 }, { "acc": 0.86015253, "epoch": 0.6341212465816478, "grad_norm": 16.74773406982422, "learning_rate": 9.68145898004948e-06, "loss": 0.760042, "memory(GiB)": 34.88, "step": 23420, "train_speed(iter/s)": 0.413244 }, { "acc": 0.86747589, "epoch": 0.6342566268648634, "grad_norm": 10.002609252929688, "learning_rate": 9.6812624095369e-06, "loss": 0.71314783, "memory(GiB)": 34.88, "step": 23425, "train_speed(iter/s)": 0.413248 }, { "acc": 0.89580984, "epoch": 0.634392007148079, "grad_norm": 2.61824107170105, "learning_rate": 9.681065780388572e-06, "loss": 0.46165752, "memory(GiB)": 34.88, "step": 23430, "train_speed(iter/s)": 0.413252 }, { "acc": 0.88445606, "epoch": 0.6345273874312946, "grad_norm": 7.248935699462891, "learning_rate": 9.68086909260696e-06, "loss": 0.62154512, "memory(GiB)": 34.88, "step": 23435, "train_speed(iter/s)": 0.413257 }, { "acc": 0.86137838, "epoch": 0.63466276771451, "grad_norm": 12.417278289794922, "learning_rate": 9.680672346194528e-06, "loss": 0.70449018, "memory(GiB)": 34.88, "step": 23440, "train_speed(iter/s)": 0.413261 }, { "acc": 0.85427876, "epoch": 0.6347981479977256, "grad_norm": 7.458229064941406, "learning_rate": 9.680475541153742e-06, "loss": 0.72566175, "memory(GiB)": 34.88, "step": 23445, "train_speed(iter/s)": 0.413266 }, { "acc": 0.87284317, "epoch": 0.6349335282809412, "grad_norm": 9.60915470123291, "learning_rate": 9.680278677487064e-06, "loss": 0.59685836, "memory(GiB)": 34.88, "step": 23450, "train_speed(iter/s)": 0.41327 }, { "acc": 0.87660332, "epoch": 0.6350689085641568, "grad_norm": 8.619219779968262, "learning_rate": 9.680081755196965e-06, "loss": 0.6366003, "memory(GiB)": 34.88, "step": 23455, "train_speed(iter/s)": 0.413274 }, { "acc": 0.8715889, "epoch": 0.6352042888473722, "grad_norm": 11.549603462219238, "learning_rate": 9.679884774285909e-06, "loss": 0.70426693, "memory(GiB)": 34.88, "step": 23460, "train_speed(iter/s)": 0.413279 }, { "acc": 0.85686502, "epoch": 0.6353396691305878, "grad_norm": 10.13681411743164, "learning_rate": 9.679687734756363e-06, "loss": 0.73184047, "memory(GiB)": 34.88, "step": 23465, "train_speed(iter/s)": 0.413283 }, { "acc": 0.8522768, "epoch": 0.6354750494138034, "grad_norm": 7.823216438293457, "learning_rate": 9.679490636610795e-06, "loss": 0.7328938, "memory(GiB)": 34.88, "step": 23470, "train_speed(iter/s)": 0.413287 }, { "acc": 0.86304417, "epoch": 0.635610429697019, "grad_norm": 9.251255989074707, "learning_rate": 9.679293479851676e-06, "loss": 0.72103868, "memory(GiB)": 34.88, "step": 23475, "train_speed(iter/s)": 0.413292 }, { "acc": 0.87841911, "epoch": 0.6357458099802344, "grad_norm": 5.703705310821533, "learning_rate": 9.679096264481475e-06, "loss": 0.59928184, "memory(GiB)": 34.88, "step": 23480, "train_speed(iter/s)": 0.413297 }, { "acc": 0.85033512, "epoch": 0.63588119026345, "grad_norm": 7.524986267089844, "learning_rate": 9.67889899050266e-06, "loss": 0.78149714, "memory(GiB)": 34.88, "step": 23485, "train_speed(iter/s)": 0.4133 }, { "acc": 0.85135822, "epoch": 0.6360165705466656, "grad_norm": 12.539688110351562, "learning_rate": 9.678701657917707e-06, "loss": 0.75370026, "memory(GiB)": 34.88, "step": 23490, "train_speed(iter/s)": 0.413304 }, { "acc": 0.87379608, "epoch": 0.6361519508298812, "grad_norm": 5.7927470207214355, "learning_rate": 9.678504266729086e-06, "loss": 0.62676773, "memory(GiB)": 34.88, "step": 23495, "train_speed(iter/s)": 0.413309 }, { "acc": 0.86313162, "epoch": 0.6362873311130967, "grad_norm": 6.673725128173828, "learning_rate": 9.67830681693927e-06, "loss": 0.70759363, "memory(GiB)": 34.88, "step": 23500, "train_speed(iter/s)": 0.413313 }, { "acc": 0.89507828, "epoch": 0.6364227113963122, "grad_norm": 12.903623580932617, "learning_rate": 9.678109308550729e-06, "loss": 0.52418289, "memory(GiB)": 34.88, "step": 23505, "train_speed(iter/s)": 0.413317 }, { "acc": 0.86293392, "epoch": 0.6365580916795278, "grad_norm": 18.60160255432129, "learning_rate": 9.67791174156594e-06, "loss": 0.68621883, "memory(GiB)": 34.88, "step": 23510, "train_speed(iter/s)": 0.413321 }, { "acc": 0.87001266, "epoch": 0.6366934719627434, "grad_norm": 7.139578342437744, "learning_rate": 9.67771411598738e-06, "loss": 0.74987392, "memory(GiB)": 34.88, "step": 23515, "train_speed(iter/s)": 0.413325 }, { "acc": 0.87853222, "epoch": 0.6368288522459589, "grad_norm": 67.63288879394531, "learning_rate": 9.677516431817522e-06, "loss": 0.61858654, "memory(GiB)": 34.88, "step": 23520, "train_speed(iter/s)": 0.413328 }, { "acc": 0.872295, "epoch": 0.6369642325291744, "grad_norm": 6.709988594055176, "learning_rate": 9.677318689058842e-06, "loss": 0.64643812, "memory(GiB)": 34.88, "step": 23525, "train_speed(iter/s)": 0.413332 }, { "acc": 0.86307354, "epoch": 0.63709961281239, "grad_norm": 11.736499786376953, "learning_rate": 9.67712088771382e-06, "loss": 0.71107054, "memory(GiB)": 34.88, "step": 23530, "train_speed(iter/s)": 0.413337 }, { "acc": 0.8774888, "epoch": 0.6372349930956056, "grad_norm": 11.486010551452637, "learning_rate": 9.676923027784929e-06, "loss": 0.64460974, "memory(GiB)": 34.88, "step": 23535, "train_speed(iter/s)": 0.413341 }, { "acc": 0.877384, "epoch": 0.6373703733788211, "grad_norm": 6.548914909362793, "learning_rate": 9.67672510927465e-06, "loss": 0.55987892, "memory(GiB)": 34.88, "step": 23540, "train_speed(iter/s)": 0.413345 }, { "acc": 0.86135168, "epoch": 0.6375057536620367, "grad_norm": 7.835744380950928, "learning_rate": 9.676527132185467e-06, "loss": 0.72260952, "memory(GiB)": 34.88, "step": 23545, "train_speed(iter/s)": 0.41335 }, { "acc": 0.86032648, "epoch": 0.6376411339452522, "grad_norm": 12.553470611572266, "learning_rate": 9.676329096519852e-06, "loss": 0.72480946, "memory(GiB)": 34.88, "step": 23550, "train_speed(iter/s)": 0.413354 }, { "acc": 0.89773941, "epoch": 0.6377765142284678, "grad_norm": 3.8913328647613525, "learning_rate": 9.67613100228029e-06, "loss": 0.43351827, "memory(GiB)": 34.88, "step": 23555, "train_speed(iter/s)": 0.413358 }, { "acc": 0.88674088, "epoch": 0.6379118945116833, "grad_norm": 7.119120121002197, "learning_rate": 9.675932849469264e-06, "loss": 0.48566704, "memory(GiB)": 34.88, "step": 23560, "train_speed(iter/s)": 0.413362 }, { "acc": 0.8758646, "epoch": 0.6380472747948989, "grad_norm": 6.969833850860596, "learning_rate": 9.675734638089251e-06, "loss": 0.70074329, "memory(GiB)": 34.88, "step": 23565, "train_speed(iter/s)": 0.413367 }, { "acc": 0.87436275, "epoch": 0.6381826550781144, "grad_norm": 11.57814884185791, "learning_rate": 9.675536368142741e-06, "loss": 0.66933641, "memory(GiB)": 34.88, "step": 23570, "train_speed(iter/s)": 0.413371 }, { "acc": 0.86461449, "epoch": 0.63831803536133, "grad_norm": 6.51116418838501, "learning_rate": 9.675338039632212e-06, "loss": 0.69758515, "memory(GiB)": 34.88, "step": 23575, "train_speed(iter/s)": 0.413375 }, { "acc": 0.89039154, "epoch": 0.6384534156445455, "grad_norm": 8.086854934692383, "learning_rate": 9.67513965256015e-06, "loss": 0.60480599, "memory(GiB)": 34.88, "step": 23580, "train_speed(iter/s)": 0.41338 }, { "acc": 0.85630617, "epoch": 0.6385887959277611, "grad_norm": 8.606912612915039, "learning_rate": 9.674941206929044e-06, "loss": 0.69254079, "memory(GiB)": 34.88, "step": 23585, "train_speed(iter/s)": 0.413384 }, { "acc": 0.85669956, "epoch": 0.6387241762109767, "grad_norm": 9.408766746520996, "learning_rate": 9.674742702741375e-06, "loss": 0.72887869, "memory(GiB)": 34.88, "step": 23590, "train_speed(iter/s)": 0.413388 }, { "acc": 0.86237144, "epoch": 0.6388595564941922, "grad_norm": 5.917313575744629, "learning_rate": 9.674544139999629e-06, "loss": 0.66527724, "memory(GiB)": 34.88, "step": 23595, "train_speed(iter/s)": 0.413392 }, { "acc": 0.85507956, "epoch": 0.6389949367774077, "grad_norm": 12.410821914672852, "learning_rate": 9.674345518706297e-06, "loss": 0.81368418, "memory(GiB)": 34.88, "step": 23600, "train_speed(iter/s)": 0.413397 }, { "acc": 0.85933266, "epoch": 0.6391303170606233, "grad_norm": 5.9115891456604, "learning_rate": 9.674146838863865e-06, "loss": 0.75986261, "memory(GiB)": 34.88, "step": 23605, "train_speed(iter/s)": 0.413401 }, { "acc": 0.86707935, "epoch": 0.6392656973438389, "grad_norm": 6.582316875457764, "learning_rate": 9.673948100474827e-06, "loss": 0.65102544, "memory(GiB)": 34.88, "step": 23610, "train_speed(iter/s)": 0.413405 }, { "acc": 0.86564484, "epoch": 0.6394010776270544, "grad_norm": 6.697900295257568, "learning_rate": 9.673749303541662e-06, "loss": 0.65612478, "memory(GiB)": 34.88, "step": 23615, "train_speed(iter/s)": 0.413408 }, { "acc": 0.86287737, "epoch": 0.6395364579102699, "grad_norm": 10.161088943481445, "learning_rate": 9.673550448066871e-06, "loss": 0.65066729, "memory(GiB)": 34.88, "step": 23620, "train_speed(iter/s)": 0.413413 }, { "acc": 0.88163261, "epoch": 0.6396718381934855, "grad_norm": 16.17244529724121, "learning_rate": 9.673351534052939e-06, "loss": 0.56193252, "memory(GiB)": 34.88, "step": 23625, "train_speed(iter/s)": 0.413416 }, { "acc": 0.8448451, "epoch": 0.6398072184767011, "grad_norm": 9.806815147399902, "learning_rate": 9.67315256150236e-06, "loss": 0.80636044, "memory(GiB)": 34.88, "step": 23630, "train_speed(iter/s)": 0.41342 }, { "acc": 0.8553009, "epoch": 0.6399425987599167, "grad_norm": 5.919013977050781, "learning_rate": 9.672953530417626e-06, "loss": 0.76924458, "memory(GiB)": 34.88, "step": 23635, "train_speed(iter/s)": 0.413424 }, { "acc": 0.83240499, "epoch": 0.6400779790431321, "grad_norm": 10.480870246887207, "learning_rate": 9.672754440801229e-06, "loss": 0.8813345, "memory(GiB)": 34.88, "step": 23640, "train_speed(iter/s)": 0.413429 }, { "acc": 0.8505127, "epoch": 0.6402133593263477, "grad_norm": 7.15045690536499, "learning_rate": 9.672555292655665e-06, "loss": 0.80785236, "memory(GiB)": 34.88, "step": 23645, "train_speed(iter/s)": 0.413432 }, { "acc": 0.86539955, "epoch": 0.6403487396095633, "grad_norm": 9.399455070495605, "learning_rate": 9.672356085983428e-06, "loss": 0.67710466, "memory(GiB)": 34.88, "step": 23650, "train_speed(iter/s)": 0.413436 }, { "acc": 0.87165232, "epoch": 0.6404841198927789, "grad_norm": 7.164381504058838, "learning_rate": 9.672156820787013e-06, "loss": 0.63394642, "memory(GiB)": 34.88, "step": 23655, "train_speed(iter/s)": 0.41344 }, { "acc": 0.9067956, "epoch": 0.6406195001759943, "grad_norm": 6.20272159576416, "learning_rate": 9.671957497068917e-06, "loss": 0.45356503, "memory(GiB)": 34.88, "step": 23660, "train_speed(iter/s)": 0.413444 }, { "acc": 0.8669014, "epoch": 0.6407548804592099, "grad_norm": 6.28562593460083, "learning_rate": 9.671758114831635e-06, "loss": 0.67338924, "memory(GiB)": 34.88, "step": 23665, "train_speed(iter/s)": 0.413449 }, { "acc": 0.85545254, "epoch": 0.6408902607424255, "grad_norm": 9.386996269226074, "learning_rate": 9.671558674077668e-06, "loss": 0.7177043, "memory(GiB)": 34.88, "step": 23670, "train_speed(iter/s)": 0.413453 }, { "acc": 0.85283432, "epoch": 0.6410256410256411, "grad_norm": 9.802525520324707, "learning_rate": 9.671359174809511e-06, "loss": 0.69426355, "memory(GiB)": 34.88, "step": 23675, "train_speed(iter/s)": 0.413457 }, { "acc": 0.85552368, "epoch": 0.6411610213088565, "grad_norm": 5.056037425994873, "learning_rate": 9.671159617029666e-06, "loss": 0.82175589, "memory(GiB)": 34.88, "step": 23680, "train_speed(iter/s)": 0.413461 }, { "acc": 0.86526308, "epoch": 0.6412964015920721, "grad_norm": 4.330073356628418, "learning_rate": 9.67096000074063e-06, "loss": 0.67006192, "memory(GiB)": 34.88, "step": 23685, "train_speed(iter/s)": 0.413465 }, { "acc": 0.86887417, "epoch": 0.6414317818752877, "grad_norm": 6.885264873504639, "learning_rate": 9.670760325944905e-06, "loss": 0.7280304, "memory(GiB)": 34.88, "step": 23690, "train_speed(iter/s)": 0.41347 }, { "acc": 0.86879692, "epoch": 0.6415671621585033, "grad_norm": 8.575655937194824, "learning_rate": 9.670560592644995e-06, "loss": 0.63011713, "memory(GiB)": 34.88, "step": 23695, "train_speed(iter/s)": 0.413474 }, { "acc": 0.88963661, "epoch": 0.6417025424417188, "grad_norm": 11.894819259643555, "learning_rate": 9.670360800843397e-06, "loss": 0.55582781, "memory(GiB)": 34.88, "step": 23700, "train_speed(iter/s)": 0.413478 }, { "acc": 0.86811895, "epoch": 0.6418379227249343, "grad_norm": 8.299501419067383, "learning_rate": 9.670160950542618e-06, "loss": 0.69397225, "memory(GiB)": 34.88, "step": 23705, "train_speed(iter/s)": 0.413482 }, { "acc": 0.88456554, "epoch": 0.6419733030081499, "grad_norm": 10.37386703491211, "learning_rate": 9.669961041745157e-06, "loss": 0.65681505, "memory(GiB)": 34.88, "step": 23710, "train_speed(iter/s)": 0.413486 }, { "acc": 0.87678165, "epoch": 0.6421086832913655, "grad_norm": 6.823404312133789, "learning_rate": 9.669761074453524e-06, "loss": 0.62698593, "memory(GiB)": 34.88, "step": 23715, "train_speed(iter/s)": 0.413491 }, { "acc": 0.85071611, "epoch": 0.642244063574581, "grad_norm": 13.085235595703125, "learning_rate": 9.669561048670218e-06, "loss": 0.75158954, "memory(GiB)": 34.88, "step": 23720, "train_speed(iter/s)": 0.413494 }, { "acc": 0.85896988, "epoch": 0.6423794438577966, "grad_norm": 9.195805549621582, "learning_rate": 9.66936096439775e-06, "loss": 0.74818163, "memory(GiB)": 34.88, "step": 23725, "train_speed(iter/s)": 0.413498 }, { "acc": 0.86678448, "epoch": 0.6425148241410121, "grad_norm": 9.422666549682617, "learning_rate": 9.669160821638623e-06, "loss": 0.74124794, "memory(GiB)": 34.88, "step": 23730, "train_speed(iter/s)": 0.413502 }, { "acc": 0.88762627, "epoch": 0.6426502044242276, "grad_norm": 5.2537360191345215, "learning_rate": 9.668960620395346e-06, "loss": 0.46641288, "memory(GiB)": 34.88, "step": 23735, "train_speed(iter/s)": 0.413506 }, { "acc": 0.8872612, "epoch": 0.6427855847074432, "grad_norm": 6.2691216468811035, "learning_rate": 9.668760360670426e-06, "loss": 0.59005766, "memory(GiB)": 34.88, "step": 23740, "train_speed(iter/s)": 0.41351 }, { "acc": 0.89158068, "epoch": 0.6429209649906588, "grad_norm": 12.819406509399414, "learning_rate": 9.668560042466371e-06, "loss": 0.52899799, "memory(GiB)": 34.88, "step": 23745, "train_speed(iter/s)": 0.413514 }, { "acc": 0.88232508, "epoch": 0.6430563452738743, "grad_norm": 10.026135444641113, "learning_rate": 9.668359665785691e-06, "loss": 0.60710001, "memory(GiB)": 34.88, "step": 23750, "train_speed(iter/s)": 0.413518 }, { "acc": 0.88002186, "epoch": 0.6431917255570898, "grad_norm": 8.976057052612305, "learning_rate": 9.668159230630898e-06, "loss": 0.58627105, "memory(GiB)": 34.88, "step": 23755, "train_speed(iter/s)": 0.413522 }, { "acc": 0.86441965, "epoch": 0.6433271058403054, "grad_norm": 12.27953052520752, "learning_rate": 9.6679587370045e-06, "loss": 0.6437129, "memory(GiB)": 34.88, "step": 23760, "train_speed(iter/s)": 0.413526 }, { "acc": 0.8721302, "epoch": 0.643462486123521, "grad_norm": 19.48395347595215, "learning_rate": 9.667758184909011e-06, "loss": 0.63049078, "memory(GiB)": 34.88, "step": 23765, "train_speed(iter/s)": 0.413531 }, { "acc": 0.85182209, "epoch": 0.6435978664067366, "grad_norm": 6.012327671051025, "learning_rate": 9.66755757434694e-06, "loss": 0.78350868, "memory(GiB)": 34.88, "step": 23770, "train_speed(iter/s)": 0.413535 }, { "acc": 0.87287216, "epoch": 0.643733246689952, "grad_norm": 16.749900817871094, "learning_rate": 9.667356905320804e-06, "loss": 0.71076341, "memory(GiB)": 34.88, "step": 23775, "train_speed(iter/s)": 0.413539 }, { "acc": 0.87343102, "epoch": 0.6438686269731676, "grad_norm": 12.02688217163086, "learning_rate": 9.667156177833115e-06, "loss": 0.73103313, "memory(GiB)": 34.88, "step": 23780, "train_speed(iter/s)": 0.413543 }, { "acc": 0.86799717, "epoch": 0.6440040072563832, "grad_norm": 7.724246025085449, "learning_rate": 9.666955391886387e-06, "loss": 0.7109086, "memory(GiB)": 34.88, "step": 23785, "train_speed(iter/s)": 0.413547 }, { "acc": 0.84939461, "epoch": 0.6441393875395988, "grad_norm": 13.54655933380127, "learning_rate": 9.666754547483135e-06, "loss": 0.77004967, "memory(GiB)": 34.88, "step": 23790, "train_speed(iter/s)": 0.413551 }, { "acc": 0.87141075, "epoch": 0.6442747678228142, "grad_norm": 14.749463081359863, "learning_rate": 9.666553644625876e-06, "loss": 0.74000087, "memory(GiB)": 34.88, "step": 23795, "train_speed(iter/s)": 0.413555 }, { "acc": 0.88297844, "epoch": 0.6444101481060298, "grad_norm": 6.330349922180176, "learning_rate": 9.666352683317126e-06, "loss": 0.558952, "memory(GiB)": 34.88, "step": 23800, "train_speed(iter/s)": 0.41356 }, { "acc": 0.8542469, "epoch": 0.6445455283892454, "grad_norm": 18.176551818847656, "learning_rate": 9.666151663559404e-06, "loss": 0.75185013, "memory(GiB)": 34.88, "step": 23805, "train_speed(iter/s)": 0.413564 }, { "acc": 0.84632034, "epoch": 0.644680908672461, "grad_norm": 14.280608177185059, "learning_rate": 9.665950585355225e-06, "loss": 0.82632065, "memory(GiB)": 34.88, "step": 23810, "train_speed(iter/s)": 0.413568 }, { "acc": 0.8488245, "epoch": 0.6448162889556764, "grad_norm": 12.1511869430542, "learning_rate": 9.665749448707112e-06, "loss": 0.88951168, "memory(GiB)": 34.88, "step": 23815, "train_speed(iter/s)": 0.413572 }, { "acc": 0.89058762, "epoch": 0.644951669238892, "grad_norm": 8.970173835754395, "learning_rate": 9.66554825361758e-06, "loss": 0.54415398, "memory(GiB)": 34.88, "step": 23820, "train_speed(iter/s)": 0.413577 }, { "acc": 0.88151226, "epoch": 0.6450870495221076, "grad_norm": 7.597692012786865, "learning_rate": 9.665347000089154e-06, "loss": 0.56973953, "memory(GiB)": 34.88, "step": 23825, "train_speed(iter/s)": 0.413581 }, { "acc": 0.87148437, "epoch": 0.6452224298053232, "grad_norm": 9.654756546020508, "learning_rate": 9.665145688124352e-06, "loss": 0.66920109, "memory(GiB)": 34.88, "step": 23830, "train_speed(iter/s)": 0.413585 }, { "acc": 0.87306633, "epoch": 0.6453578100885387, "grad_norm": 16.714601516723633, "learning_rate": 9.664944317725697e-06, "loss": 0.62075243, "memory(GiB)": 34.88, "step": 23835, "train_speed(iter/s)": 0.413589 }, { "acc": 0.87290249, "epoch": 0.6454931903717542, "grad_norm": 3.8908193111419678, "learning_rate": 9.66474288889571e-06, "loss": 0.67551661, "memory(GiB)": 34.88, "step": 23840, "train_speed(iter/s)": 0.413593 }, { "acc": 0.88183222, "epoch": 0.6456285706549698, "grad_norm": 7.0609517097473145, "learning_rate": 9.664541401636917e-06, "loss": 0.55530758, "memory(GiB)": 34.88, "step": 23845, "train_speed(iter/s)": 0.413597 }, { "acc": 0.8818222, "epoch": 0.6457639509381854, "grad_norm": 12.680161476135254, "learning_rate": 9.664339855951842e-06, "loss": 0.60267754, "memory(GiB)": 34.88, "step": 23850, "train_speed(iter/s)": 0.413601 }, { "acc": 0.88108826, "epoch": 0.6458993312214009, "grad_norm": 7.7652201652526855, "learning_rate": 9.664138251843006e-06, "loss": 0.60606203, "memory(GiB)": 34.88, "step": 23855, "train_speed(iter/s)": 0.413606 }, { "acc": 0.87129154, "epoch": 0.6460347115046164, "grad_norm": 13.123090744018555, "learning_rate": 9.663936589312938e-06, "loss": 0.73755503, "memory(GiB)": 34.88, "step": 23860, "train_speed(iter/s)": 0.413611 }, { "acc": 0.85723124, "epoch": 0.646170091787832, "grad_norm": 10.412638664245605, "learning_rate": 9.663734868364163e-06, "loss": 0.67376165, "memory(GiB)": 34.88, "step": 23865, "train_speed(iter/s)": 0.413615 }, { "acc": 0.87741566, "epoch": 0.6463054720710476, "grad_norm": 7.1652631759643555, "learning_rate": 9.663533088999207e-06, "loss": 0.61460028, "memory(GiB)": 34.88, "step": 23870, "train_speed(iter/s)": 0.413619 }, { "acc": 0.88003492, "epoch": 0.6464408523542631, "grad_norm": 17.777149200439453, "learning_rate": 9.6633312512206e-06, "loss": 0.62418976, "memory(GiB)": 34.88, "step": 23875, "train_speed(iter/s)": 0.413622 }, { "acc": 0.87379351, "epoch": 0.6465762326374787, "grad_norm": 7.096278667449951, "learning_rate": 9.66312935503087e-06, "loss": 0.67246881, "memory(GiB)": 34.88, "step": 23880, "train_speed(iter/s)": 0.413626 }, { "acc": 0.8759573, "epoch": 0.6467116129206942, "grad_norm": 8.899229049682617, "learning_rate": 9.662927400432543e-06, "loss": 0.51034112, "memory(GiB)": 34.88, "step": 23885, "train_speed(iter/s)": 0.41363 }, { "acc": 0.84348907, "epoch": 0.6468469932039098, "grad_norm": 14.616583824157715, "learning_rate": 9.66272538742815e-06, "loss": 0.86355677, "memory(GiB)": 34.88, "step": 23890, "train_speed(iter/s)": 0.413635 }, { "acc": 0.85491943, "epoch": 0.6469823734871253, "grad_norm": 16.365615844726562, "learning_rate": 9.662523316020226e-06, "loss": 0.75987344, "memory(GiB)": 34.88, "step": 23895, "train_speed(iter/s)": 0.413639 }, { "acc": 0.86994038, "epoch": 0.6471177537703409, "grad_norm": 8.92470932006836, "learning_rate": 9.6623211862113e-06, "loss": 0.58495994, "memory(GiB)": 34.88, "step": 23900, "train_speed(iter/s)": 0.413643 }, { "acc": 0.88131208, "epoch": 0.6472531340535564, "grad_norm": 7.428187847137451, "learning_rate": 9.6621189980039e-06, "loss": 0.56242557, "memory(GiB)": 34.88, "step": 23905, "train_speed(iter/s)": 0.413648 }, { "acc": 0.85714436, "epoch": 0.647388514336772, "grad_norm": 11.491146087646484, "learning_rate": 9.661916751400561e-06, "loss": 0.71929688, "memory(GiB)": 34.88, "step": 23910, "train_speed(iter/s)": 0.413652 }, { "acc": 0.86217813, "epoch": 0.6475238946199875, "grad_norm": 8.449163436889648, "learning_rate": 9.661714446403822e-06, "loss": 0.70875101, "memory(GiB)": 34.88, "step": 23915, "train_speed(iter/s)": 0.413656 }, { "acc": 0.83960705, "epoch": 0.6476592749032031, "grad_norm": 9.409666061401367, "learning_rate": 9.661512083016208e-06, "loss": 0.84351206, "memory(GiB)": 34.88, "step": 23920, "train_speed(iter/s)": 0.41366 }, { "acc": 0.84935913, "epoch": 0.6477946551864187, "grad_norm": 7.157144069671631, "learning_rate": 9.661309661240263e-06, "loss": 0.83587952, "memory(GiB)": 34.88, "step": 23925, "train_speed(iter/s)": 0.413665 }, { "acc": 0.85579519, "epoch": 0.6479300354696342, "grad_norm": 18.174259185791016, "learning_rate": 9.661107181078516e-06, "loss": 0.65579119, "memory(GiB)": 34.88, "step": 23930, "train_speed(iter/s)": 0.413668 }, { "acc": 0.85908728, "epoch": 0.6480654157528497, "grad_norm": 12.600052833557129, "learning_rate": 9.660904642533505e-06, "loss": 0.74772129, "memory(GiB)": 34.88, "step": 23935, "train_speed(iter/s)": 0.413672 }, { "acc": 0.86412544, "epoch": 0.6482007960360653, "grad_norm": 7.233473777770996, "learning_rate": 9.66070204560777e-06, "loss": 0.65173883, "memory(GiB)": 34.88, "step": 23940, "train_speed(iter/s)": 0.413676 }, { "acc": 0.8920352, "epoch": 0.6483361763192809, "grad_norm": 9.895864486694336, "learning_rate": 9.660499390303848e-06, "loss": 0.53434668, "memory(GiB)": 34.88, "step": 23945, "train_speed(iter/s)": 0.41368 }, { "acc": 0.87642403, "epoch": 0.6484715566024964, "grad_norm": 7.416674613952637, "learning_rate": 9.660296676624276e-06, "loss": 0.66473637, "memory(GiB)": 34.88, "step": 23950, "train_speed(iter/s)": 0.413684 }, { "acc": 0.8820549, "epoch": 0.6486069368857119, "grad_norm": 7.900949001312256, "learning_rate": 9.660093904571592e-06, "loss": 0.56063185, "memory(GiB)": 34.88, "step": 23955, "train_speed(iter/s)": 0.413688 }, { "acc": 0.8827631, "epoch": 0.6487423171689275, "grad_norm": 7.422649383544922, "learning_rate": 9.659891074148342e-06, "loss": 0.57287788, "memory(GiB)": 34.88, "step": 23960, "train_speed(iter/s)": 0.413692 }, { "acc": 0.86455021, "epoch": 0.6488776974521431, "grad_norm": 7.786479473114014, "learning_rate": 9.65968818535706e-06, "loss": 0.67278395, "memory(GiB)": 34.88, "step": 23965, "train_speed(iter/s)": 0.413695 }, { "acc": 0.8793026, "epoch": 0.6490130777353587, "grad_norm": 9.174619674682617, "learning_rate": 9.659485238200292e-06, "loss": 0.64387136, "memory(GiB)": 34.88, "step": 23970, "train_speed(iter/s)": 0.4137 }, { "acc": 0.85280151, "epoch": 0.6491484580185741, "grad_norm": 7.342775344848633, "learning_rate": 9.659282232680579e-06, "loss": 0.71897326, "memory(GiB)": 34.88, "step": 23975, "train_speed(iter/s)": 0.413704 }, { "acc": 0.86050835, "epoch": 0.6492838383017897, "grad_norm": 14.316370964050293, "learning_rate": 9.659079168800464e-06, "loss": 0.6197207, "memory(GiB)": 34.88, "step": 23980, "train_speed(iter/s)": 0.413709 }, { "acc": 0.87954559, "epoch": 0.6494192185850053, "grad_norm": 6.436028957366943, "learning_rate": 9.658876046562491e-06, "loss": 0.59323597, "memory(GiB)": 34.88, "step": 23985, "train_speed(iter/s)": 0.413712 }, { "acc": 0.87921104, "epoch": 0.6495545988682209, "grad_norm": 8.105619430541992, "learning_rate": 9.658672865969204e-06, "loss": 0.66583681, "memory(GiB)": 34.88, "step": 23990, "train_speed(iter/s)": 0.413716 }, { "acc": 0.86375542, "epoch": 0.6496899791514363, "grad_norm": 9.282776832580566, "learning_rate": 9.658469627023149e-06, "loss": 0.70963616, "memory(GiB)": 34.88, "step": 23995, "train_speed(iter/s)": 0.413721 }, { "acc": 0.87277184, "epoch": 0.6498253594346519, "grad_norm": 7.8028950691223145, "learning_rate": 9.65826632972687e-06, "loss": 0.67589941, "memory(GiB)": 34.88, "step": 24000, "train_speed(iter/s)": 0.413725 }, { "acc": 0.88861485, "epoch": 0.6499607397178675, "grad_norm": 4.53658390045166, "learning_rate": 9.658062974082917e-06, "loss": 0.54848523, "memory(GiB)": 34.88, "step": 24005, "train_speed(iter/s)": 0.413729 }, { "acc": 0.86982679, "epoch": 0.6500961200010831, "grad_norm": 6.390591621398926, "learning_rate": 9.657859560093835e-06, "loss": 0.59015932, "memory(GiB)": 34.88, "step": 24010, "train_speed(iter/s)": 0.413733 }, { "acc": 0.87869482, "epoch": 0.6502315002842985, "grad_norm": 13.160883903503418, "learning_rate": 9.657656087762173e-06, "loss": 0.6287292, "memory(GiB)": 34.88, "step": 24015, "train_speed(iter/s)": 0.413737 }, { "acc": 0.8442791, "epoch": 0.6503668805675141, "grad_norm": 14.65692138671875, "learning_rate": 9.65745255709048e-06, "loss": 0.79059825, "memory(GiB)": 34.88, "step": 24020, "train_speed(iter/s)": 0.413741 }, { "acc": 0.86419697, "epoch": 0.6505022608507297, "grad_norm": 8.407515525817871, "learning_rate": 9.657248968081304e-06, "loss": 0.71371799, "memory(GiB)": 34.88, "step": 24025, "train_speed(iter/s)": 0.413745 }, { "acc": 0.86230621, "epoch": 0.6506376411339453, "grad_norm": 9.140836715698242, "learning_rate": 9.657045320737198e-06, "loss": 0.71178875, "memory(GiB)": 34.88, "step": 24030, "train_speed(iter/s)": 0.41375 }, { "acc": 0.87732458, "epoch": 0.6507730214171608, "grad_norm": 12.438558578491211, "learning_rate": 9.65684161506071e-06, "loss": 0.61400676, "memory(GiB)": 34.88, "step": 24035, "train_speed(iter/s)": 0.413754 }, { "acc": 0.8435524, "epoch": 0.6509084017003763, "grad_norm": 10.792561531066895, "learning_rate": 9.656637851054395e-06, "loss": 0.90636673, "memory(GiB)": 34.88, "step": 24040, "train_speed(iter/s)": 0.413758 }, { "acc": 0.87426291, "epoch": 0.6510437819835919, "grad_norm": 7.485747814178467, "learning_rate": 9.656434028720806e-06, "loss": 0.66749859, "memory(GiB)": 34.88, "step": 24045, "train_speed(iter/s)": 0.413762 }, { "acc": 0.86358471, "epoch": 0.6511791622668075, "grad_norm": 6.2396416664123535, "learning_rate": 9.656230148062492e-06, "loss": 0.76408234, "memory(GiB)": 34.88, "step": 24050, "train_speed(iter/s)": 0.413766 }, { "acc": 0.86846962, "epoch": 0.651314542550023, "grad_norm": 9.407710075378418, "learning_rate": 9.656026209082009e-06, "loss": 0.70003414, "memory(GiB)": 34.88, "step": 24055, "train_speed(iter/s)": 0.41377 }, { "acc": 0.85573702, "epoch": 0.6514499228332385, "grad_norm": 6.003354549407959, "learning_rate": 9.655822211781913e-06, "loss": 0.69789572, "memory(GiB)": 34.88, "step": 24060, "train_speed(iter/s)": 0.413774 }, { "acc": 0.89702053, "epoch": 0.6515853031164541, "grad_norm": 5.630220890045166, "learning_rate": 9.655618156164758e-06, "loss": 0.48159552, "memory(GiB)": 34.88, "step": 24065, "train_speed(iter/s)": 0.413778 }, { "acc": 0.87646408, "epoch": 0.6517206833996697, "grad_norm": 14.371870040893555, "learning_rate": 9.655414042233104e-06, "loss": 0.70368576, "memory(GiB)": 34.88, "step": 24070, "train_speed(iter/s)": 0.413782 }, { "acc": 0.88822145, "epoch": 0.6518560636828852, "grad_norm": 4.421794891357422, "learning_rate": 9.6552098699895e-06, "loss": 0.56372156, "memory(GiB)": 34.88, "step": 24075, "train_speed(iter/s)": 0.413786 }, { "acc": 0.87775164, "epoch": 0.6519914439661008, "grad_norm": 6.194084644317627, "learning_rate": 9.65500563943651e-06, "loss": 0.60725489, "memory(GiB)": 34.88, "step": 24080, "train_speed(iter/s)": 0.413789 }, { "acc": 0.86363297, "epoch": 0.6521268242493163, "grad_norm": 5.093056678771973, "learning_rate": 9.654801350576692e-06, "loss": 0.79020329, "memory(GiB)": 34.88, "step": 24085, "train_speed(iter/s)": 0.413793 }, { "acc": 0.87477417, "epoch": 0.6522622045325319, "grad_norm": 10.575164794921875, "learning_rate": 9.654597003412603e-06, "loss": 0.63300848, "memory(GiB)": 34.88, "step": 24090, "train_speed(iter/s)": 0.413798 }, { "acc": 0.88152151, "epoch": 0.6523975848157474, "grad_norm": 5.945868968963623, "learning_rate": 9.654392597946804e-06, "loss": 0.60177574, "memory(GiB)": 34.88, "step": 24095, "train_speed(iter/s)": 0.413802 }, { "acc": 0.8641325, "epoch": 0.652532965098963, "grad_norm": 11.002069473266602, "learning_rate": 9.654188134181856e-06, "loss": 0.68706274, "memory(GiB)": 34.88, "step": 24100, "train_speed(iter/s)": 0.413806 }, { "acc": 0.86098938, "epoch": 0.6526683453821785, "grad_norm": 9.713507652282715, "learning_rate": 9.653983612120319e-06, "loss": 0.67451978, "memory(GiB)": 34.88, "step": 24105, "train_speed(iter/s)": 0.41381 }, { "acc": 0.87696762, "epoch": 0.6528037256653941, "grad_norm": 10.682472229003906, "learning_rate": 9.653779031764755e-06, "loss": 0.61473436, "memory(GiB)": 34.88, "step": 24110, "train_speed(iter/s)": 0.413814 }, { "acc": 0.85969486, "epoch": 0.6529391059486096, "grad_norm": 11.935351371765137, "learning_rate": 9.653574393117729e-06, "loss": 0.82431707, "memory(GiB)": 34.88, "step": 24115, "train_speed(iter/s)": 0.413818 }, { "acc": 0.84369602, "epoch": 0.6530744862318252, "grad_norm": 12.05320930480957, "learning_rate": 9.653369696181802e-06, "loss": 0.81990852, "memory(GiB)": 34.88, "step": 24120, "train_speed(iter/s)": 0.413822 }, { "acc": 0.86959295, "epoch": 0.6532098665150408, "grad_norm": 7.579305171966553, "learning_rate": 9.653164940959539e-06, "loss": 0.67716212, "memory(GiB)": 34.88, "step": 24125, "train_speed(iter/s)": 0.413826 }, { "acc": 0.84210968, "epoch": 0.6533452467982563, "grad_norm": 11.762382507324219, "learning_rate": 9.652960127453506e-06, "loss": 0.85531263, "memory(GiB)": 34.88, "step": 24130, "train_speed(iter/s)": 0.41383 }, { "acc": 0.85502539, "epoch": 0.6534806270814718, "grad_norm": 15.359278678894043, "learning_rate": 9.652755255666265e-06, "loss": 0.75378685, "memory(GiB)": 34.88, "step": 24135, "train_speed(iter/s)": 0.413834 }, { "acc": 0.86763725, "epoch": 0.6536160073646874, "grad_norm": 6.3445143699646, "learning_rate": 9.652550325600388e-06, "loss": 0.65263915, "memory(GiB)": 34.88, "step": 24140, "train_speed(iter/s)": 0.413838 }, { "acc": 0.86608286, "epoch": 0.653751387647903, "grad_norm": 14.823546409606934, "learning_rate": 9.65234533725844e-06, "loss": 0.72653942, "memory(GiB)": 34.88, "step": 24145, "train_speed(iter/s)": 0.413842 }, { "acc": 0.88746662, "epoch": 0.6538867679311186, "grad_norm": 4.619513511657715, "learning_rate": 9.652140290642986e-06, "loss": 0.58902678, "memory(GiB)": 34.88, "step": 24150, "train_speed(iter/s)": 0.413846 }, { "acc": 0.85693455, "epoch": 0.654022148214334, "grad_norm": 9.15198802947998, "learning_rate": 9.651935185756596e-06, "loss": 0.84383974, "memory(GiB)": 34.88, "step": 24155, "train_speed(iter/s)": 0.41385 }, { "acc": 0.87752228, "epoch": 0.6541575284975496, "grad_norm": 10.18315601348877, "learning_rate": 9.651730022601842e-06, "loss": 0.63062263, "memory(GiB)": 34.88, "step": 24160, "train_speed(iter/s)": 0.413854 }, { "acc": 0.86528149, "epoch": 0.6542929087807652, "grad_norm": 8.924764633178711, "learning_rate": 9.651524801181293e-06, "loss": 0.71797948, "memory(GiB)": 34.88, "step": 24165, "train_speed(iter/s)": 0.413859 }, { "acc": 0.86992168, "epoch": 0.6544282890639808, "grad_norm": 26.006258010864258, "learning_rate": 9.65131952149752e-06, "loss": 0.70303955, "memory(GiB)": 34.88, "step": 24170, "train_speed(iter/s)": 0.413863 }, { "acc": 0.87843323, "epoch": 0.6545636693471962, "grad_norm": 7.037264347076416, "learning_rate": 9.651114183553093e-06, "loss": 0.58813405, "memory(GiB)": 34.88, "step": 24175, "train_speed(iter/s)": 0.413865 }, { "acc": 0.85885754, "epoch": 0.6546990496304118, "grad_norm": 5.904088973999023, "learning_rate": 9.650908787350585e-06, "loss": 0.74786744, "memory(GiB)": 34.88, "step": 24180, "train_speed(iter/s)": 0.413869 }, { "acc": 0.85567656, "epoch": 0.6548344299136274, "grad_norm": 10.640947341918945, "learning_rate": 9.650703332892566e-06, "loss": 0.7689898, "memory(GiB)": 34.88, "step": 24185, "train_speed(iter/s)": 0.413873 }, { "acc": 0.85902042, "epoch": 0.654969810196843, "grad_norm": 9.099798202514648, "learning_rate": 9.650497820181615e-06, "loss": 0.74361882, "memory(GiB)": 34.88, "step": 24190, "train_speed(iter/s)": 0.413877 }, { "acc": 0.87959776, "epoch": 0.6551051904800584, "grad_norm": 10.782754898071289, "learning_rate": 9.650292249220305e-06, "loss": 0.58934469, "memory(GiB)": 34.88, "step": 24195, "train_speed(iter/s)": 0.413881 }, { "acc": 0.85829391, "epoch": 0.655240570763274, "grad_norm": 14.321829795837402, "learning_rate": 9.650086620011208e-06, "loss": 0.87403259, "memory(GiB)": 34.88, "step": 24200, "train_speed(iter/s)": 0.413885 }, { "acc": 0.8499979, "epoch": 0.6553759510464896, "grad_norm": 15.742534637451172, "learning_rate": 9.649880932556904e-06, "loss": 0.8657095, "memory(GiB)": 34.88, "step": 24205, "train_speed(iter/s)": 0.41389 }, { "acc": 0.84910927, "epoch": 0.6555113313297052, "grad_norm": 5.602169036865234, "learning_rate": 9.649675186859968e-06, "loss": 0.76597953, "memory(GiB)": 34.88, "step": 24210, "train_speed(iter/s)": 0.413894 }, { "acc": 0.86070652, "epoch": 0.6556467116129207, "grad_norm": 8.25657844543457, "learning_rate": 9.649469382922975e-06, "loss": 0.73806152, "memory(GiB)": 34.88, "step": 24215, "train_speed(iter/s)": 0.413898 }, { "acc": 0.87679968, "epoch": 0.6557820918961362, "grad_norm": 5.263497352600098, "learning_rate": 9.649263520748507e-06, "loss": 0.57709599, "memory(GiB)": 34.88, "step": 24220, "train_speed(iter/s)": 0.413903 }, { "acc": 0.87727737, "epoch": 0.6559174721793518, "grad_norm": 5.666731357574463, "learning_rate": 9.64905760033914e-06, "loss": 0.55246172, "memory(GiB)": 34.88, "step": 24225, "train_speed(iter/s)": 0.413907 }, { "acc": 0.85505009, "epoch": 0.6560528524625674, "grad_norm": 15.634637832641602, "learning_rate": 9.648851621697456e-06, "loss": 0.75370636, "memory(GiB)": 34.88, "step": 24230, "train_speed(iter/s)": 0.41391 }, { "acc": 0.87572031, "epoch": 0.6561882327457829, "grad_norm": 6.822455406188965, "learning_rate": 9.648645584826034e-06, "loss": 0.60624685, "memory(GiB)": 34.88, "step": 24235, "train_speed(iter/s)": 0.413912 }, { "acc": 0.87719984, "epoch": 0.6563236130289984, "grad_norm": 8.372178077697754, "learning_rate": 9.648439489727452e-06, "loss": 0.64700651, "memory(GiB)": 34.88, "step": 24240, "train_speed(iter/s)": 0.413917 }, { "acc": 0.8548048, "epoch": 0.656458993312214, "grad_norm": 8.707771301269531, "learning_rate": 9.648233336404297e-06, "loss": 0.86309805, "memory(GiB)": 34.88, "step": 24245, "train_speed(iter/s)": 0.413921 }, { "acc": 0.88736534, "epoch": 0.6565943735954296, "grad_norm": 4.651147842407227, "learning_rate": 9.64802712485915e-06, "loss": 0.51092176, "memory(GiB)": 34.88, "step": 24250, "train_speed(iter/s)": 0.413925 }, { "acc": 0.87594995, "epoch": 0.6567297538786451, "grad_norm": 12.032506942749023, "learning_rate": 9.647820855094593e-06, "loss": 0.65695577, "memory(GiB)": 34.88, "step": 24255, "train_speed(iter/s)": 0.41393 }, { "acc": 0.87127781, "epoch": 0.6568651341618607, "grad_norm": 5.844195365905762, "learning_rate": 9.647614527113208e-06, "loss": 0.6403285, "memory(GiB)": 34.88, "step": 24260, "train_speed(iter/s)": 0.413934 }, { "acc": 0.87420559, "epoch": 0.6570005144450762, "grad_norm": 8.594748497009277, "learning_rate": 9.647408140917585e-06, "loss": 0.66819363, "memory(GiB)": 34.88, "step": 24265, "train_speed(iter/s)": 0.413937 }, { "acc": 0.86264896, "epoch": 0.6571358947282918, "grad_norm": 7.471070289611816, "learning_rate": 9.647201696510306e-06, "loss": 0.73108816, "memory(GiB)": 34.88, "step": 24270, "train_speed(iter/s)": 0.413941 }, { "acc": 0.88990297, "epoch": 0.6572712750115073, "grad_norm": 13.177690505981445, "learning_rate": 9.646995193893955e-06, "loss": 0.62666349, "memory(GiB)": 34.88, "step": 24275, "train_speed(iter/s)": 0.413945 }, { "acc": 0.86345387, "epoch": 0.6574066552947229, "grad_norm": 6.935574054718018, "learning_rate": 9.646788633071123e-06, "loss": 0.7379117, "memory(GiB)": 34.88, "step": 24280, "train_speed(iter/s)": 0.413949 }, { "acc": 0.87680569, "epoch": 0.6575420355779384, "grad_norm": 7.205397129058838, "learning_rate": 9.646582014044397e-06, "loss": 0.56696033, "memory(GiB)": 34.88, "step": 24285, "train_speed(iter/s)": 0.413954 }, { "acc": 0.85185032, "epoch": 0.657677415861154, "grad_norm": 10.605937004089355, "learning_rate": 9.646375336816361e-06, "loss": 0.76458087, "memory(GiB)": 34.88, "step": 24290, "train_speed(iter/s)": 0.413957 }, { "acc": 0.87120256, "epoch": 0.6578127961443695, "grad_norm": 7.294844150543213, "learning_rate": 9.64616860138961e-06, "loss": 0.66161489, "memory(GiB)": 34.88, "step": 24295, "train_speed(iter/s)": 0.413961 }, { "acc": 0.8529727, "epoch": 0.6579481764275851, "grad_norm": 5.220314979553223, "learning_rate": 9.645961807766731e-06, "loss": 0.81621523, "memory(GiB)": 34.88, "step": 24300, "train_speed(iter/s)": 0.413966 }, { "acc": 0.88250713, "epoch": 0.6580835567108007, "grad_norm": 5.705766201019287, "learning_rate": 9.645754955950314e-06, "loss": 0.5704464, "memory(GiB)": 34.88, "step": 24305, "train_speed(iter/s)": 0.41397 }, { "acc": 0.85691805, "epoch": 0.6582189369940162, "grad_norm": 7.717303276062012, "learning_rate": 9.645548045942951e-06, "loss": 0.70928102, "memory(GiB)": 34.88, "step": 24310, "train_speed(iter/s)": 0.413973 }, { "acc": 0.87176609, "epoch": 0.6583543172772317, "grad_norm": 12.387882232666016, "learning_rate": 9.645341077747233e-06, "loss": 0.72799807, "memory(GiB)": 34.88, "step": 24315, "train_speed(iter/s)": 0.413977 }, { "acc": 0.86214733, "epoch": 0.6584896975604473, "grad_norm": 5.073908805847168, "learning_rate": 9.645134051365755e-06, "loss": 0.74345922, "memory(GiB)": 34.88, "step": 24320, "train_speed(iter/s)": 0.413981 }, { "acc": 0.83520288, "epoch": 0.6586250778436629, "grad_norm": 11.757935523986816, "learning_rate": 9.644926966801106e-06, "loss": 0.9387516, "memory(GiB)": 34.88, "step": 24325, "train_speed(iter/s)": 0.413984 }, { "acc": 0.89536886, "epoch": 0.6587604581268784, "grad_norm": 7.864593982696533, "learning_rate": 9.644719824055886e-06, "loss": 0.54714384, "memory(GiB)": 34.88, "step": 24330, "train_speed(iter/s)": 0.413988 }, { "acc": 0.84894295, "epoch": 0.6588958384100939, "grad_norm": 9.819158554077148, "learning_rate": 9.644512623132686e-06, "loss": 0.74821215, "memory(GiB)": 34.88, "step": 24335, "train_speed(iter/s)": 0.413991 }, { "acc": 0.86115456, "epoch": 0.6590312186933095, "grad_norm": 10.401078224182129, "learning_rate": 9.644305364034102e-06, "loss": 0.73994637, "memory(GiB)": 34.88, "step": 24340, "train_speed(iter/s)": 0.413995 }, { "acc": 0.88660955, "epoch": 0.6591665989765251, "grad_norm": 3.9818267822265625, "learning_rate": 9.64409804676273e-06, "loss": 0.5494524, "memory(GiB)": 34.88, "step": 24345, "train_speed(iter/s)": 0.413999 }, { "acc": 0.88782673, "epoch": 0.6593019792597407, "grad_norm": 4.652040958404541, "learning_rate": 9.643890671321168e-06, "loss": 0.53728442, "memory(GiB)": 34.88, "step": 24350, "train_speed(iter/s)": 0.414004 }, { "acc": 0.89638271, "epoch": 0.6594373595429561, "grad_norm": 11.331351280212402, "learning_rate": 9.643683237712013e-06, "loss": 0.48976851, "memory(GiB)": 34.88, "step": 24355, "train_speed(iter/s)": 0.414008 }, { "acc": 0.84620628, "epoch": 0.6595727398261717, "grad_norm": 4.362467288970947, "learning_rate": 9.643475745937868e-06, "loss": 0.76973867, "memory(GiB)": 34.88, "step": 24360, "train_speed(iter/s)": 0.414012 }, { "acc": 0.86866932, "epoch": 0.6597081201093873, "grad_norm": 6.849191188812256, "learning_rate": 9.643268196001324e-06, "loss": 0.63182764, "memory(GiB)": 34.88, "step": 24365, "train_speed(iter/s)": 0.414016 }, { "acc": 0.90466299, "epoch": 0.6598435003926029, "grad_norm": 3.632035493850708, "learning_rate": 9.643060587904987e-06, "loss": 0.43558989, "memory(GiB)": 34.88, "step": 24370, "train_speed(iter/s)": 0.41402 }, { "acc": 0.86728039, "epoch": 0.6599788806758183, "grad_norm": 12.343083381652832, "learning_rate": 9.642852921651455e-06, "loss": 0.760885, "memory(GiB)": 34.88, "step": 24375, "train_speed(iter/s)": 0.414024 }, { "acc": 0.88536301, "epoch": 0.6601142609590339, "grad_norm": 11.27213191986084, "learning_rate": 9.642645197243332e-06, "loss": 0.5546731, "memory(GiB)": 34.88, "step": 24380, "train_speed(iter/s)": 0.414027 }, { "acc": 0.87191391, "epoch": 0.6602496412422495, "grad_norm": 6.719498634338379, "learning_rate": 9.642437414683218e-06, "loss": 0.53591108, "memory(GiB)": 34.88, "step": 24385, "train_speed(iter/s)": 0.414031 }, { "acc": 0.87115841, "epoch": 0.6603850215254651, "grad_norm": 6.706883907318115, "learning_rate": 9.642229573973716e-06, "loss": 0.5901186, "memory(GiB)": 34.88, "step": 24390, "train_speed(iter/s)": 0.414035 }, { "acc": 0.8912878, "epoch": 0.6605204018086805, "grad_norm": 6.161640644073486, "learning_rate": 9.642021675117432e-06, "loss": 0.48614368, "memory(GiB)": 34.88, "step": 24395, "train_speed(iter/s)": 0.414038 }, { "acc": 0.85141201, "epoch": 0.6606557820918961, "grad_norm": 15.407071113586426, "learning_rate": 9.641813718116966e-06, "loss": 0.83937912, "memory(GiB)": 34.88, "step": 24400, "train_speed(iter/s)": 0.414042 }, { "acc": 0.84189949, "epoch": 0.6607911623751117, "grad_norm": 8.517340660095215, "learning_rate": 9.641605702974926e-06, "loss": 0.8079483, "memory(GiB)": 34.88, "step": 24405, "train_speed(iter/s)": 0.414047 }, { "acc": 0.8788269, "epoch": 0.6609265426583273, "grad_norm": 11.298502922058105, "learning_rate": 9.641397629693918e-06, "loss": 0.56904087, "memory(GiB)": 34.88, "step": 24410, "train_speed(iter/s)": 0.414051 }, { "acc": 0.8618722, "epoch": 0.6610619229415428, "grad_norm": 13.279372215270996, "learning_rate": 9.641189498276549e-06, "loss": 0.70775337, "memory(GiB)": 34.88, "step": 24415, "train_speed(iter/s)": 0.414055 }, { "acc": 0.87702131, "epoch": 0.6611973032247583, "grad_norm": 12.888172149658203, "learning_rate": 9.640981308725423e-06, "loss": 0.65179453, "memory(GiB)": 34.88, "step": 24420, "train_speed(iter/s)": 0.414059 }, { "acc": 0.83971024, "epoch": 0.6613326835079739, "grad_norm": 8.676155090332031, "learning_rate": 9.640773061043151e-06, "loss": 0.89120617, "memory(GiB)": 34.88, "step": 24425, "train_speed(iter/s)": 0.414063 }, { "acc": 0.879949, "epoch": 0.6614680637911895, "grad_norm": 4.075950622558594, "learning_rate": 9.640564755232342e-06, "loss": 0.58751678, "memory(GiB)": 34.88, "step": 24430, "train_speed(iter/s)": 0.414066 }, { "acc": 0.86185408, "epoch": 0.661603444074405, "grad_norm": 6.792430877685547, "learning_rate": 9.640356391295603e-06, "loss": 0.7552763, "memory(GiB)": 34.88, "step": 24435, "train_speed(iter/s)": 0.41407 }, { "acc": 0.89349413, "epoch": 0.6617388243576205, "grad_norm": 5.189916133880615, "learning_rate": 9.640147969235542e-06, "loss": 0.5250062, "memory(GiB)": 34.88, "step": 24440, "train_speed(iter/s)": 0.414074 }, { "acc": 0.85951586, "epoch": 0.6618742046408361, "grad_norm": 7.961892127990723, "learning_rate": 9.63993948905478e-06, "loss": 0.78321509, "memory(GiB)": 34.88, "step": 24445, "train_speed(iter/s)": 0.414078 }, { "acc": 0.84890404, "epoch": 0.6620095849240517, "grad_norm": 20.8148193359375, "learning_rate": 9.639730950755917e-06, "loss": 0.83391409, "memory(GiB)": 34.88, "step": 24450, "train_speed(iter/s)": 0.414082 }, { "acc": 0.88825817, "epoch": 0.6621449652072672, "grad_norm": 24.444955825805664, "learning_rate": 9.639522354341571e-06, "loss": 0.62492599, "memory(GiB)": 34.88, "step": 24455, "train_speed(iter/s)": 0.414086 }, { "acc": 0.88446293, "epoch": 0.6622803454904828, "grad_norm": 5.40543270111084, "learning_rate": 9.639313699814355e-06, "loss": 0.55031962, "memory(GiB)": 34.88, "step": 24460, "train_speed(iter/s)": 0.41409 }, { "acc": 0.86140976, "epoch": 0.6624157257736983, "grad_norm": 10.42660140991211, "learning_rate": 9.639104987176884e-06, "loss": 0.80811148, "memory(GiB)": 34.88, "step": 24465, "train_speed(iter/s)": 0.414094 }, { "acc": 0.87114744, "epoch": 0.6625511060569139, "grad_norm": 5.891839504241943, "learning_rate": 9.638896216431767e-06, "loss": 0.57056141, "memory(GiB)": 34.88, "step": 24470, "train_speed(iter/s)": 0.414098 }, { "acc": 0.87388229, "epoch": 0.6626864863401294, "grad_norm": 6.238763809204102, "learning_rate": 9.638687387581625e-06, "loss": 0.60191803, "memory(GiB)": 34.88, "step": 24475, "train_speed(iter/s)": 0.414102 }, { "acc": 0.85896721, "epoch": 0.662821866623345, "grad_norm": 11.776185989379883, "learning_rate": 9.638478500629073e-06, "loss": 0.72279582, "memory(GiB)": 34.88, "step": 24480, "train_speed(iter/s)": 0.414106 }, { "acc": 0.89101162, "epoch": 0.6629572469065605, "grad_norm": 9.785969734191895, "learning_rate": 9.638269555576724e-06, "loss": 0.56502285, "memory(GiB)": 34.88, "step": 24485, "train_speed(iter/s)": 0.41411 }, { "acc": 0.86965637, "epoch": 0.6630926271897761, "grad_norm": 16.46390724182129, "learning_rate": 9.638060552427201e-06, "loss": 0.69596024, "memory(GiB)": 34.88, "step": 24490, "train_speed(iter/s)": 0.414114 }, { "acc": 0.8720438, "epoch": 0.6632280074729916, "grad_norm": 12.010826110839844, "learning_rate": 9.637851491183117e-06, "loss": 0.73656521, "memory(GiB)": 34.88, "step": 24495, "train_speed(iter/s)": 0.414118 }, { "acc": 0.86493549, "epoch": 0.6633633877562072, "grad_norm": 20.156381607055664, "learning_rate": 9.637642371847093e-06, "loss": 0.71592894, "memory(GiB)": 34.88, "step": 24500, "train_speed(iter/s)": 0.414121 }, { "acc": 0.87403431, "epoch": 0.6634987680394228, "grad_norm": 10.347576141357422, "learning_rate": 9.637433194421751e-06, "loss": 0.5894628, "memory(GiB)": 34.88, "step": 24505, "train_speed(iter/s)": 0.414124 }, { "acc": 0.8822422, "epoch": 0.6636341483226383, "grad_norm": 6.435085296630859, "learning_rate": 9.637223958909709e-06, "loss": 0.65150032, "memory(GiB)": 34.88, "step": 24510, "train_speed(iter/s)": 0.414128 }, { "acc": 0.87686977, "epoch": 0.6637695286058538, "grad_norm": 5.179061412811279, "learning_rate": 9.637014665313588e-06, "loss": 0.62465611, "memory(GiB)": 34.88, "step": 24515, "train_speed(iter/s)": 0.414132 }, { "acc": 0.87728863, "epoch": 0.6639049088890694, "grad_norm": 16.988895416259766, "learning_rate": 9.636805313636008e-06, "loss": 0.73955584, "memory(GiB)": 34.88, "step": 24520, "train_speed(iter/s)": 0.414136 }, { "acc": 0.85980263, "epoch": 0.664040289172285, "grad_norm": 5.614001274108887, "learning_rate": 9.636595903879595e-06, "loss": 0.69472489, "memory(GiB)": 34.88, "step": 24525, "train_speed(iter/s)": 0.41414 }, { "acc": 0.85438843, "epoch": 0.6641756694555006, "grad_norm": 6.916340351104736, "learning_rate": 9.636386436046972e-06, "loss": 0.68224239, "memory(GiB)": 34.88, "step": 24530, "train_speed(iter/s)": 0.414144 }, { "acc": 0.87567539, "epoch": 0.664311049738716, "grad_norm": 6.3195905685424805, "learning_rate": 9.636176910140761e-06, "loss": 0.6176405, "memory(GiB)": 34.88, "step": 24535, "train_speed(iter/s)": 0.414147 }, { "acc": 0.84398251, "epoch": 0.6644464300219316, "grad_norm": 14.993040084838867, "learning_rate": 9.635967326163587e-06, "loss": 0.89546661, "memory(GiB)": 34.88, "step": 24540, "train_speed(iter/s)": 0.414151 }, { "acc": 0.87500658, "epoch": 0.6645818103051472, "grad_norm": 10.112105369567871, "learning_rate": 9.635757684118077e-06, "loss": 0.57585053, "memory(GiB)": 34.88, "step": 24545, "train_speed(iter/s)": 0.414155 }, { "acc": 0.85783691, "epoch": 0.6647171905883628, "grad_norm": 10.840513229370117, "learning_rate": 9.635547984006855e-06, "loss": 0.74388995, "memory(GiB)": 34.88, "step": 24550, "train_speed(iter/s)": 0.414159 }, { "acc": 0.84340076, "epoch": 0.6648525708715782, "grad_norm": 9.167768478393555, "learning_rate": 9.63533822583255e-06, "loss": 0.78731623, "memory(GiB)": 34.88, "step": 24555, "train_speed(iter/s)": 0.414163 }, { "acc": 0.87064381, "epoch": 0.6649879511547938, "grad_norm": 7.517374038696289, "learning_rate": 9.635128409597788e-06, "loss": 0.6853281, "memory(GiB)": 34.88, "step": 24560, "train_speed(iter/s)": 0.414167 }, { "acc": 0.85136795, "epoch": 0.6651233314380094, "grad_norm": 6.414887428283691, "learning_rate": 9.6349185353052e-06, "loss": 0.76745634, "memory(GiB)": 34.88, "step": 24565, "train_speed(iter/s)": 0.41417 }, { "acc": 0.88703156, "epoch": 0.665258711721225, "grad_norm": 8.556095123291016, "learning_rate": 9.634708602957411e-06, "loss": 0.53106632, "memory(GiB)": 34.88, "step": 24570, "train_speed(iter/s)": 0.414175 }, { "acc": 0.88963966, "epoch": 0.6653940920044404, "grad_norm": 8.299762725830078, "learning_rate": 9.634498612557057e-06, "loss": 0.568922, "memory(GiB)": 34.88, "step": 24575, "train_speed(iter/s)": 0.414178 }, { "acc": 0.87298698, "epoch": 0.665529472287656, "grad_norm": 8.166971206665039, "learning_rate": 9.634288564106759e-06, "loss": 0.59759283, "memory(GiB)": 34.88, "step": 24580, "train_speed(iter/s)": 0.414182 }, { "acc": 0.87757368, "epoch": 0.6656648525708716, "grad_norm": 7.504389762878418, "learning_rate": 9.634078457609158e-06, "loss": 0.64011426, "memory(GiB)": 34.88, "step": 24585, "train_speed(iter/s)": 0.414186 }, { "acc": 0.84525414, "epoch": 0.6658002328540872, "grad_norm": 9.92921257019043, "learning_rate": 9.633868293066881e-06, "loss": 0.73716087, "memory(GiB)": 34.88, "step": 24590, "train_speed(iter/s)": 0.414191 }, { "acc": 0.82264986, "epoch": 0.6659356131373027, "grad_norm": 20.982133865356445, "learning_rate": 9.63365807048256e-06, "loss": 0.96425066, "memory(GiB)": 34.88, "step": 24595, "train_speed(iter/s)": 0.414194 }, { "acc": 0.86797924, "epoch": 0.6660709934205182, "grad_norm": 17.358631134033203, "learning_rate": 9.63344778985883e-06, "loss": 0.64703364, "memory(GiB)": 34.88, "step": 24600, "train_speed(iter/s)": 0.414198 }, { "acc": 0.87608109, "epoch": 0.6662063737037338, "grad_norm": 8.732311248779297, "learning_rate": 9.633237451198328e-06, "loss": 0.70003195, "memory(GiB)": 34.88, "step": 24605, "train_speed(iter/s)": 0.414202 }, { "acc": 0.82313709, "epoch": 0.6663417539869494, "grad_norm": 18.948162078857422, "learning_rate": 9.633027054503685e-06, "loss": 0.94245472, "memory(GiB)": 34.88, "step": 24610, "train_speed(iter/s)": 0.414206 }, { "acc": 0.87575483, "epoch": 0.6664771342701649, "grad_norm": 24.200092315673828, "learning_rate": 9.632816599777536e-06, "loss": 0.61009889, "memory(GiB)": 34.88, "step": 24615, "train_speed(iter/s)": 0.414211 }, { "acc": 0.86572447, "epoch": 0.6666125145533804, "grad_norm": 14.057012557983398, "learning_rate": 9.63260608702252e-06, "loss": 0.67585773, "memory(GiB)": 34.88, "step": 24620, "train_speed(iter/s)": 0.414215 }, { "acc": 0.84893665, "epoch": 0.666747894836596, "grad_norm": 7.369086742401123, "learning_rate": 9.632395516241273e-06, "loss": 0.6913517, "memory(GiB)": 34.88, "step": 24625, "train_speed(iter/s)": 0.414218 }, { "acc": 0.84924908, "epoch": 0.6668832751198116, "grad_norm": 9.224614143371582, "learning_rate": 9.632184887436434e-06, "loss": 0.82983198, "memory(GiB)": 34.88, "step": 24630, "train_speed(iter/s)": 0.414223 }, { "acc": 0.85942707, "epoch": 0.6670186554030271, "grad_norm": 7.91172456741333, "learning_rate": 9.63197420061064e-06, "loss": 0.71107497, "memory(GiB)": 34.88, "step": 24635, "train_speed(iter/s)": 0.414226 }, { "acc": 0.89950037, "epoch": 0.6671540356862427, "grad_norm": 7.668515205383301, "learning_rate": 9.631763455766532e-06, "loss": 0.45393381, "memory(GiB)": 34.88, "step": 24640, "train_speed(iter/s)": 0.41423 }, { "acc": 0.86733522, "epoch": 0.6672894159694582, "grad_norm": 8.258549690246582, "learning_rate": 9.631552652906746e-06, "loss": 0.66350636, "memory(GiB)": 34.88, "step": 24645, "train_speed(iter/s)": 0.414233 }, { "acc": 0.89756126, "epoch": 0.6674247962526738, "grad_norm": 5.1753034591674805, "learning_rate": 9.631341792033927e-06, "loss": 0.54063568, "memory(GiB)": 34.88, "step": 24650, "train_speed(iter/s)": 0.414238 }, { "acc": 0.88344135, "epoch": 0.6675601765358893, "grad_norm": 6.325366020202637, "learning_rate": 9.631130873150716e-06, "loss": 0.54728651, "memory(GiB)": 34.88, "step": 24655, "train_speed(iter/s)": 0.414241 }, { "acc": 0.87532806, "epoch": 0.6676955568191049, "grad_norm": 6.316571235656738, "learning_rate": 9.630919896259751e-06, "loss": 0.67355108, "memory(GiB)": 34.88, "step": 24660, "train_speed(iter/s)": 0.414246 }, { "acc": 0.86241035, "epoch": 0.6678309371023204, "grad_norm": 9.405306816101074, "learning_rate": 9.630708861363682e-06, "loss": 0.61466875, "memory(GiB)": 34.88, "step": 24665, "train_speed(iter/s)": 0.414249 }, { "acc": 0.88182192, "epoch": 0.667966317385536, "grad_norm": 6.3402018547058105, "learning_rate": 9.630497768465146e-06, "loss": 0.52758088, "memory(GiB)": 34.88, "step": 24670, "train_speed(iter/s)": 0.414253 }, { "acc": 0.85815105, "epoch": 0.6681016976687515, "grad_norm": 5.908500671386719, "learning_rate": 9.63028661756679e-06, "loss": 0.81186275, "memory(GiB)": 34.88, "step": 24675, "train_speed(iter/s)": 0.414257 }, { "acc": 0.84763508, "epoch": 0.6682370779519671, "grad_norm": 9.054203987121582, "learning_rate": 9.630075408671262e-06, "loss": 0.77955427, "memory(GiB)": 34.88, "step": 24680, "train_speed(iter/s)": 0.41426 }, { "acc": 0.8548687, "epoch": 0.6683724582351827, "grad_norm": 9.830869674682617, "learning_rate": 9.629864141781202e-06, "loss": 0.73770385, "memory(GiB)": 34.88, "step": 24685, "train_speed(iter/s)": 0.414264 }, { "acc": 0.87301311, "epoch": 0.6685078385183982, "grad_norm": 8.87773323059082, "learning_rate": 9.629652816899261e-06, "loss": 0.62031641, "memory(GiB)": 34.88, "step": 24690, "train_speed(iter/s)": 0.414268 }, { "acc": 0.89101715, "epoch": 0.6686432188016137, "grad_norm": 9.024559020996094, "learning_rate": 9.629441434028083e-06, "loss": 0.57008686, "memory(GiB)": 34.88, "step": 24695, "train_speed(iter/s)": 0.414271 }, { "acc": 0.88015299, "epoch": 0.6687785990848293, "grad_norm": 6.143923282623291, "learning_rate": 9.629229993170319e-06, "loss": 0.58435426, "memory(GiB)": 34.88, "step": 24700, "train_speed(iter/s)": 0.414275 }, { "acc": 0.85882444, "epoch": 0.6689139793680449, "grad_norm": 10.201176643371582, "learning_rate": 9.629018494328617e-06, "loss": 0.72243233, "memory(GiB)": 34.88, "step": 24705, "train_speed(iter/s)": 0.414278 }, { "acc": 0.85594349, "epoch": 0.6690493596512604, "grad_norm": 8.501660346984863, "learning_rate": 9.628806937505625e-06, "loss": 0.7125875, "memory(GiB)": 34.88, "step": 24710, "train_speed(iter/s)": 0.414282 }, { "acc": 0.89345894, "epoch": 0.6691847399344759, "grad_norm": 7.558516979217529, "learning_rate": 9.628595322703992e-06, "loss": 0.53036127, "memory(GiB)": 34.88, "step": 24715, "train_speed(iter/s)": 0.414286 }, { "acc": 0.86685753, "epoch": 0.6693201202176915, "grad_norm": 9.585545539855957, "learning_rate": 9.628383649926376e-06, "loss": 0.61750698, "memory(GiB)": 34.88, "step": 24720, "train_speed(iter/s)": 0.41429 }, { "acc": 0.88795452, "epoch": 0.6694555005009071, "grad_norm": 14.458541870117188, "learning_rate": 9.62817191917542e-06, "loss": 0.59463305, "memory(GiB)": 34.88, "step": 24725, "train_speed(iter/s)": 0.414294 }, { "acc": 0.8711462, "epoch": 0.6695908807841227, "grad_norm": 12.264060974121094, "learning_rate": 9.62796013045378e-06, "loss": 0.64174004, "memory(GiB)": 34.88, "step": 24730, "train_speed(iter/s)": 0.414298 }, { "acc": 0.88819046, "epoch": 0.6697262610673381, "grad_norm": 24.756677627563477, "learning_rate": 9.627748283764109e-06, "loss": 0.50781069, "memory(GiB)": 34.88, "step": 24735, "train_speed(iter/s)": 0.414303 }, { "acc": 0.87679749, "epoch": 0.6698616413505537, "grad_norm": 6.815526962280273, "learning_rate": 9.627536379109061e-06, "loss": 0.68305988, "memory(GiB)": 34.88, "step": 24740, "train_speed(iter/s)": 0.414306 }, { "acc": 0.86093712, "epoch": 0.6699970216337693, "grad_norm": 12.516497611999512, "learning_rate": 9.62732441649129e-06, "loss": 0.70311379, "memory(GiB)": 34.88, "step": 24745, "train_speed(iter/s)": 0.41431 }, { "acc": 0.88922691, "epoch": 0.6701324019169849, "grad_norm": 5.683319091796875, "learning_rate": 9.627112395913451e-06, "loss": 0.51513124, "memory(GiB)": 34.88, "step": 24750, "train_speed(iter/s)": 0.414314 }, { "acc": 0.90246658, "epoch": 0.6702677822002003, "grad_norm": 27.311248779296875, "learning_rate": 9.626900317378201e-06, "loss": 0.52784185, "memory(GiB)": 34.88, "step": 24755, "train_speed(iter/s)": 0.414317 }, { "acc": 0.87945652, "epoch": 0.6704031624834159, "grad_norm": 15.387592315673828, "learning_rate": 9.626688180888197e-06, "loss": 0.58580222, "memory(GiB)": 34.88, "step": 24760, "train_speed(iter/s)": 0.414321 }, { "acc": 0.881005, "epoch": 0.6705385427666315, "grad_norm": 10.017828941345215, "learning_rate": 9.626475986446095e-06, "loss": 0.66198311, "memory(GiB)": 34.88, "step": 24765, "train_speed(iter/s)": 0.414325 }, { "acc": 0.87490978, "epoch": 0.670673923049847, "grad_norm": 8.221205711364746, "learning_rate": 9.626263734054553e-06, "loss": 0.6276751, "memory(GiB)": 34.88, "step": 24770, "train_speed(iter/s)": 0.414328 }, { "acc": 0.86605225, "epoch": 0.6708093033330625, "grad_norm": 5.333644390106201, "learning_rate": 9.62605142371623e-06, "loss": 0.68285031, "memory(GiB)": 34.88, "step": 24775, "train_speed(iter/s)": 0.414331 }, { "acc": 0.88551064, "epoch": 0.6709446836162781, "grad_norm": 8.493803024291992, "learning_rate": 9.625839055433787e-06, "loss": 0.63856621, "memory(GiB)": 34.88, "step": 24780, "train_speed(iter/s)": 0.414336 }, { "acc": 0.8816412, "epoch": 0.6710800638994937, "grad_norm": 7.284549236297607, "learning_rate": 9.625626629209886e-06, "loss": 0.53185415, "memory(GiB)": 34.88, "step": 24785, "train_speed(iter/s)": 0.41434 }, { "acc": 0.87924938, "epoch": 0.6712154441827092, "grad_norm": 9.669193267822266, "learning_rate": 9.625414145047183e-06, "loss": 0.59299631, "memory(GiB)": 34.88, "step": 24790, "train_speed(iter/s)": 0.414344 }, { "acc": 0.86691694, "epoch": 0.6713508244659248, "grad_norm": 10.579002380371094, "learning_rate": 9.625201602948342e-06, "loss": 0.64195919, "memory(GiB)": 34.88, "step": 24795, "train_speed(iter/s)": 0.414347 }, { "acc": 0.86757717, "epoch": 0.6714862047491403, "grad_norm": 10.888063430786133, "learning_rate": 9.624989002916026e-06, "loss": 0.68752084, "memory(GiB)": 34.88, "step": 24800, "train_speed(iter/s)": 0.414351 }, { "acc": 0.8881875, "epoch": 0.6716215850323559, "grad_norm": 6.720761299133301, "learning_rate": 9.624776344952901e-06, "loss": 0.51773405, "memory(GiB)": 34.88, "step": 24805, "train_speed(iter/s)": 0.414355 }, { "acc": 0.83113403, "epoch": 0.6717569653155714, "grad_norm": 7.9879560470581055, "learning_rate": 9.624563629061627e-06, "loss": 0.90879402, "memory(GiB)": 34.88, "step": 24810, "train_speed(iter/s)": 0.414359 }, { "acc": 0.86543236, "epoch": 0.671892345598787, "grad_norm": 7.9787445068359375, "learning_rate": 9.624350855244868e-06, "loss": 0.67376108, "memory(GiB)": 34.88, "step": 24815, "train_speed(iter/s)": 0.414363 }, { "acc": 0.85847921, "epoch": 0.6720277258820025, "grad_norm": 7.321529388427734, "learning_rate": 9.624138023505292e-06, "loss": 0.70437064, "memory(GiB)": 34.88, "step": 24820, "train_speed(iter/s)": 0.414367 }, { "acc": 0.88731499, "epoch": 0.6721631061652181, "grad_norm": 5.220256805419922, "learning_rate": 9.623925133845564e-06, "loss": 0.49601154, "memory(GiB)": 34.88, "step": 24825, "train_speed(iter/s)": 0.414371 }, { "acc": 0.85180569, "epoch": 0.6722984864484336, "grad_norm": 11.333866119384766, "learning_rate": 9.623712186268351e-06, "loss": 0.73027167, "memory(GiB)": 34.88, "step": 24830, "train_speed(iter/s)": 0.414375 }, { "acc": 0.85433769, "epoch": 0.6724338667316492, "grad_norm": 10.388751029968262, "learning_rate": 9.62349918077632e-06, "loss": 0.72823873, "memory(GiB)": 34.88, "step": 24835, "train_speed(iter/s)": 0.414379 }, { "acc": 0.85340157, "epoch": 0.6725692470148648, "grad_norm": 9.661177635192871, "learning_rate": 9.623286117372144e-06, "loss": 0.71526709, "memory(GiB)": 34.88, "step": 24840, "train_speed(iter/s)": 0.414382 }, { "acc": 0.86775064, "epoch": 0.6727046272980803, "grad_norm": 7.693999290466309, "learning_rate": 9.623072996058485e-06, "loss": 0.66373692, "memory(GiB)": 34.88, "step": 24845, "train_speed(iter/s)": 0.414386 }, { "acc": 0.86494236, "epoch": 0.6728400075812958, "grad_norm": 7.515117168426514, "learning_rate": 9.622859816838017e-06, "loss": 0.57031474, "memory(GiB)": 34.88, "step": 24850, "train_speed(iter/s)": 0.414389 }, { "acc": 0.8754097, "epoch": 0.6729753878645114, "grad_norm": 8.23519515991211, "learning_rate": 9.622646579713407e-06, "loss": 0.62121925, "memory(GiB)": 34.88, "step": 24855, "train_speed(iter/s)": 0.414393 }, { "acc": 0.86590538, "epoch": 0.673110768147727, "grad_norm": 5.685159206390381, "learning_rate": 9.622433284687333e-06, "loss": 0.62694578, "memory(GiB)": 34.88, "step": 24860, "train_speed(iter/s)": 0.414397 }, { "acc": 0.87518015, "epoch": 0.6732461484309425, "grad_norm": 8.764470100402832, "learning_rate": 9.62221993176246e-06, "loss": 0.63027649, "memory(GiB)": 34.88, "step": 24865, "train_speed(iter/s)": 0.414401 }, { "acc": 0.87453938, "epoch": 0.673381528714158, "grad_norm": 11.587907791137695, "learning_rate": 9.622006520941463e-06, "loss": 0.63637209, "memory(GiB)": 34.88, "step": 24870, "train_speed(iter/s)": 0.414405 }, { "acc": 0.83371944, "epoch": 0.6735169089973736, "grad_norm": 16.631013870239258, "learning_rate": 9.621793052227014e-06, "loss": 0.90986443, "memory(GiB)": 34.88, "step": 24875, "train_speed(iter/s)": 0.414409 }, { "acc": 0.89147701, "epoch": 0.6736522892805892, "grad_norm": 5.289724826812744, "learning_rate": 9.62157952562179e-06, "loss": 0.60101728, "memory(GiB)": 34.88, "step": 24880, "train_speed(iter/s)": 0.414413 }, { "acc": 0.88707008, "epoch": 0.6737876695638048, "grad_norm": 4.951864242553711, "learning_rate": 9.621365941128465e-06, "loss": 0.66155128, "memory(GiB)": 34.88, "step": 24885, "train_speed(iter/s)": 0.414417 }, { "acc": 0.87790356, "epoch": 0.6739230498470202, "grad_norm": 5.945350646972656, "learning_rate": 9.621152298749715e-06, "loss": 0.76328702, "memory(GiB)": 34.88, "step": 24890, "train_speed(iter/s)": 0.414421 }, { "acc": 0.88006935, "epoch": 0.6740584301302358, "grad_norm": 6.207669258117676, "learning_rate": 9.620938598488214e-06, "loss": 0.62913246, "memory(GiB)": 34.88, "step": 24895, "train_speed(iter/s)": 0.414424 }, { "acc": 0.85953207, "epoch": 0.6741938104134514, "grad_norm": 8.430482864379883, "learning_rate": 9.62072484034664e-06, "loss": 0.82233381, "memory(GiB)": 34.88, "step": 24900, "train_speed(iter/s)": 0.414428 }, { "acc": 0.86211891, "epoch": 0.674329190696667, "grad_norm": 7.819765567779541, "learning_rate": 9.620511024327673e-06, "loss": 0.70830965, "memory(GiB)": 34.88, "step": 24905, "train_speed(iter/s)": 0.414432 }, { "acc": 0.86701412, "epoch": 0.6744645709798824, "grad_norm": 7.247269153594971, "learning_rate": 9.62029715043399e-06, "loss": 0.71530085, "memory(GiB)": 34.88, "step": 24910, "train_speed(iter/s)": 0.414436 }, { "acc": 0.85901279, "epoch": 0.674599951263098, "grad_norm": 8.8169527053833, "learning_rate": 9.620083218668267e-06, "loss": 0.77787123, "memory(GiB)": 34.88, "step": 24915, "train_speed(iter/s)": 0.414439 }, { "acc": 0.8575491, "epoch": 0.6747353315463136, "grad_norm": 7.747331619262695, "learning_rate": 9.619869229033189e-06, "loss": 0.73028808, "memory(GiB)": 34.88, "step": 24920, "train_speed(iter/s)": 0.414442 }, { "acc": 0.86146498, "epoch": 0.6748707118295292, "grad_norm": 72.5728759765625, "learning_rate": 9.619655181531433e-06, "loss": 0.63425593, "memory(GiB)": 34.88, "step": 24925, "train_speed(iter/s)": 0.414446 }, { "acc": 0.86657944, "epoch": 0.6750060921127446, "grad_norm": 12.040111541748047, "learning_rate": 9.61944107616568e-06, "loss": 0.6313632, "memory(GiB)": 34.88, "step": 24930, "train_speed(iter/s)": 0.41445 }, { "acc": 0.85939789, "epoch": 0.6751414723959602, "grad_norm": 16.506938934326172, "learning_rate": 9.619226912938618e-06, "loss": 0.62397676, "memory(GiB)": 34.88, "step": 24935, "train_speed(iter/s)": 0.414454 }, { "acc": 0.86109123, "epoch": 0.6752768526791758, "grad_norm": 9.719819068908691, "learning_rate": 9.619012691852923e-06, "loss": 0.78410769, "memory(GiB)": 34.88, "step": 24940, "train_speed(iter/s)": 0.414457 }, { "acc": 0.88976421, "epoch": 0.6754122329623914, "grad_norm": 4.047511100769043, "learning_rate": 9.618798412911282e-06, "loss": 0.52844367, "memory(GiB)": 34.88, "step": 24945, "train_speed(iter/s)": 0.414461 }, { "acc": 0.86185503, "epoch": 0.6755476132456069, "grad_norm": 6.295879364013672, "learning_rate": 9.618584076116379e-06, "loss": 0.75153933, "memory(GiB)": 34.88, "step": 24950, "train_speed(iter/s)": 0.414465 }, { "acc": 0.86769772, "epoch": 0.6756829935288224, "grad_norm": 7.338601589202881, "learning_rate": 9.618369681470898e-06, "loss": 0.69008999, "memory(GiB)": 34.88, "step": 24955, "train_speed(iter/s)": 0.414468 }, { "acc": 0.87044868, "epoch": 0.675818373812038, "grad_norm": 15.71418285369873, "learning_rate": 9.618155228977525e-06, "loss": 0.65316339, "memory(GiB)": 34.88, "step": 24960, "train_speed(iter/s)": 0.414472 }, { "acc": 0.88166637, "epoch": 0.6759537540952536, "grad_norm": 5.417322158813477, "learning_rate": 9.617940718638947e-06, "loss": 0.53902059, "memory(GiB)": 34.88, "step": 24965, "train_speed(iter/s)": 0.414476 }, { "acc": 0.88776073, "epoch": 0.6760891343784691, "grad_norm": 7.590817451477051, "learning_rate": 9.61772615045785e-06, "loss": 0.60198555, "memory(GiB)": 34.88, "step": 24970, "train_speed(iter/s)": 0.41448 }, { "acc": 0.87457495, "epoch": 0.6762245146616847, "grad_norm": 8.060004234313965, "learning_rate": 9.617511524436921e-06, "loss": 0.57704439, "memory(GiB)": 34.88, "step": 24975, "train_speed(iter/s)": 0.414484 }, { "acc": 0.8652565, "epoch": 0.6763598949449002, "grad_norm": 9.754244804382324, "learning_rate": 9.617296840578854e-06, "loss": 0.65931644, "memory(GiB)": 34.88, "step": 24980, "train_speed(iter/s)": 0.414488 }, { "acc": 0.880478, "epoch": 0.6764952752281158, "grad_norm": 8.656293869018555, "learning_rate": 9.617082098886332e-06, "loss": 0.50960846, "memory(GiB)": 34.88, "step": 24985, "train_speed(iter/s)": 0.414492 }, { "acc": 0.88918056, "epoch": 0.6766306555113313, "grad_norm": 8.844599723815918, "learning_rate": 9.616867299362048e-06, "loss": 0.57289944, "memory(GiB)": 34.88, "step": 24990, "train_speed(iter/s)": 0.414496 }, { "acc": 0.87089062, "epoch": 0.6767660357945469, "grad_norm": 14.401737213134766, "learning_rate": 9.616652442008693e-06, "loss": 0.75191374, "memory(GiB)": 34.88, "step": 24995, "train_speed(iter/s)": 0.4145 }, { "acc": 0.85177097, "epoch": 0.6769014160777624, "grad_norm": 18.130264282226562, "learning_rate": 9.616437526828956e-06, "loss": 0.78749094, "memory(GiB)": 34.88, "step": 25000, "train_speed(iter/s)": 0.414504 }, { "acc": 0.85295868, "epoch": 0.677036796360978, "grad_norm": 11.839037895202637, "learning_rate": 9.616222553825533e-06, "loss": 0.70994768, "memory(GiB)": 34.88, "step": 25005, "train_speed(iter/s)": 0.414507 }, { "acc": 0.87800875, "epoch": 0.6771721766441935, "grad_norm": 13.18114948272705, "learning_rate": 9.616007523001115e-06, "loss": 0.56264591, "memory(GiB)": 34.88, "step": 25010, "train_speed(iter/s)": 0.41451 }, { "acc": 0.87320728, "epoch": 0.6773075569274091, "grad_norm": 11.951447486877441, "learning_rate": 9.615792434358397e-06, "loss": 0.64793367, "memory(GiB)": 34.88, "step": 25015, "train_speed(iter/s)": 0.414514 }, { "acc": 0.86568794, "epoch": 0.6774429372106247, "grad_norm": 7.922375202178955, "learning_rate": 9.61557728790007e-06, "loss": 0.70458846, "memory(GiB)": 34.88, "step": 25020, "train_speed(iter/s)": 0.414517 }, { "acc": 0.86006174, "epoch": 0.6775783174938402, "grad_norm": 13.129341125488281, "learning_rate": 9.615362083628834e-06, "loss": 0.76928024, "memory(GiB)": 34.88, "step": 25025, "train_speed(iter/s)": 0.414521 }, { "acc": 0.87692423, "epoch": 0.6777136977770557, "grad_norm": 6.337723255157471, "learning_rate": 9.61514682154738e-06, "loss": 0.62025156, "memory(GiB)": 34.88, "step": 25030, "train_speed(iter/s)": 0.414525 }, { "acc": 0.8737175, "epoch": 0.6778490780602713, "grad_norm": 8.185266494750977, "learning_rate": 9.614931501658407e-06, "loss": 0.62424059, "memory(GiB)": 34.88, "step": 25035, "train_speed(iter/s)": 0.414529 }, { "acc": 0.88897057, "epoch": 0.6779844583434869, "grad_norm": 4.020063877105713, "learning_rate": 9.614716123964612e-06, "loss": 0.56604252, "memory(GiB)": 34.88, "step": 25040, "train_speed(iter/s)": 0.414533 }, { "acc": 0.85434456, "epoch": 0.6781198386267024, "grad_norm": 12.242546081542969, "learning_rate": 9.614500688468695e-06, "loss": 0.69426413, "memory(GiB)": 34.88, "step": 25045, "train_speed(iter/s)": 0.414537 }, { "acc": 0.88062, "epoch": 0.6782552189099179, "grad_norm": 3.933180332183838, "learning_rate": 9.614285195173351e-06, "loss": 0.57489367, "memory(GiB)": 34.88, "step": 25050, "train_speed(iter/s)": 0.414541 }, { "acc": 0.88388557, "epoch": 0.6783905991931335, "grad_norm": 9.613276481628418, "learning_rate": 9.614069644081282e-06, "loss": 0.55273609, "memory(GiB)": 34.88, "step": 25055, "train_speed(iter/s)": 0.414545 }, { "acc": 0.8756053, "epoch": 0.6785259794763491, "grad_norm": 16.921096801757812, "learning_rate": 9.613854035195187e-06, "loss": 0.64809027, "memory(GiB)": 34.88, "step": 25060, "train_speed(iter/s)": 0.414549 }, { "acc": 0.8559145, "epoch": 0.6786613597595647, "grad_norm": 16.040124893188477, "learning_rate": 9.613638368517768e-06, "loss": 0.68039908, "memory(GiB)": 34.88, "step": 25065, "train_speed(iter/s)": 0.414552 }, { "acc": 0.86452961, "epoch": 0.6787967400427801, "grad_norm": 9.413394927978516, "learning_rate": 9.613422644051725e-06, "loss": 0.74126234, "memory(GiB)": 34.88, "step": 25070, "train_speed(iter/s)": 0.414556 }, { "acc": 0.85752163, "epoch": 0.6789321203259957, "grad_norm": 11.158612251281738, "learning_rate": 9.613206861799765e-06, "loss": 0.72587385, "memory(GiB)": 34.88, "step": 25075, "train_speed(iter/s)": 0.41456 }, { "acc": 0.86644726, "epoch": 0.6790675006092113, "grad_norm": 8.786589622497559, "learning_rate": 9.612991021764584e-06, "loss": 0.69213595, "memory(GiB)": 34.88, "step": 25080, "train_speed(iter/s)": 0.414563 }, { "acc": 0.86148481, "epoch": 0.6792028808924269, "grad_norm": 8.71005630493164, "learning_rate": 9.61277512394889e-06, "loss": 0.80462694, "memory(GiB)": 34.88, "step": 25085, "train_speed(iter/s)": 0.414567 }, { "acc": 0.87260456, "epoch": 0.6793382611756423, "grad_norm": 11.372289657592773, "learning_rate": 9.612559168355389e-06, "loss": 0.65616627, "memory(GiB)": 34.88, "step": 25090, "train_speed(iter/s)": 0.41457 }, { "acc": 0.86892977, "epoch": 0.6794736414588579, "grad_norm": 8.148428916931152, "learning_rate": 9.612343154986782e-06, "loss": 0.6500617, "memory(GiB)": 34.88, "step": 25095, "train_speed(iter/s)": 0.414573 }, { "acc": 0.85263996, "epoch": 0.6796090217420735, "grad_norm": 14.77912712097168, "learning_rate": 9.612127083845778e-06, "loss": 0.89746399, "memory(GiB)": 34.88, "step": 25100, "train_speed(iter/s)": 0.414575 }, { "acc": 0.87506781, "epoch": 0.6797444020252891, "grad_norm": 8.466996192932129, "learning_rate": 9.611910954935083e-06, "loss": 0.64970112, "memory(GiB)": 34.88, "step": 25105, "train_speed(iter/s)": 0.414579 }, { "acc": 0.87685652, "epoch": 0.6798797823085045, "grad_norm": 15.955643653869629, "learning_rate": 9.611694768257404e-06, "loss": 0.60370359, "memory(GiB)": 34.88, "step": 25110, "train_speed(iter/s)": 0.414582 }, { "acc": 0.86640272, "epoch": 0.6800151625917201, "grad_norm": 8.00244140625, "learning_rate": 9.61147852381545e-06, "loss": 0.64466476, "memory(GiB)": 34.88, "step": 25115, "train_speed(iter/s)": 0.414586 }, { "acc": 0.86945324, "epoch": 0.6801505428749357, "grad_norm": 15.776087760925293, "learning_rate": 9.61126222161193e-06, "loss": 0.64305048, "memory(GiB)": 34.88, "step": 25120, "train_speed(iter/s)": 0.41459 }, { "acc": 0.8771224, "epoch": 0.6802859231581513, "grad_norm": 6.141834259033203, "learning_rate": 9.611045861649553e-06, "loss": 0.72866526, "memory(GiB)": 34.88, "step": 25125, "train_speed(iter/s)": 0.414593 }, { "acc": 0.88150539, "epoch": 0.6804213034413668, "grad_norm": 4.92049503326416, "learning_rate": 9.610829443931029e-06, "loss": 0.61131082, "memory(GiB)": 34.88, "step": 25130, "train_speed(iter/s)": 0.414597 }, { "acc": 0.86725016, "epoch": 0.6805566837245823, "grad_norm": 13.488064765930176, "learning_rate": 9.610612968459068e-06, "loss": 0.72053223, "memory(GiB)": 34.88, "step": 25135, "train_speed(iter/s)": 0.414601 }, { "acc": 0.8831502, "epoch": 0.6806920640077979, "grad_norm": 5.742204666137695, "learning_rate": 9.610396435236385e-06, "loss": 0.56977882, "memory(GiB)": 34.88, "step": 25140, "train_speed(iter/s)": 0.414604 }, { "acc": 0.86841936, "epoch": 0.6808274442910135, "grad_norm": 7.0713629722595215, "learning_rate": 9.61017984426569e-06, "loss": 0.71248779, "memory(GiB)": 34.88, "step": 25145, "train_speed(iter/s)": 0.414608 }, { "acc": 0.8834053, "epoch": 0.680962824574229, "grad_norm": 11.00752067565918, "learning_rate": 9.609963195549697e-06, "loss": 0.51370468, "memory(GiB)": 34.88, "step": 25150, "train_speed(iter/s)": 0.41461 }, { "acc": 0.86642828, "epoch": 0.6810982048574445, "grad_norm": 15.234317779541016, "learning_rate": 9.60974648909112e-06, "loss": 0.67521443, "memory(GiB)": 34.88, "step": 25155, "train_speed(iter/s)": 0.414614 }, { "acc": 0.850284, "epoch": 0.6812335851406601, "grad_norm": 41.44087219238281, "learning_rate": 9.609529724892673e-06, "loss": 0.76114035, "memory(GiB)": 34.88, "step": 25160, "train_speed(iter/s)": 0.414617 }, { "acc": 0.83152246, "epoch": 0.6813689654238757, "grad_norm": 9.803790092468262, "learning_rate": 9.609312902957074e-06, "loss": 0.93206367, "memory(GiB)": 34.88, "step": 25165, "train_speed(iter/s)": 0.414619 }, { "acc": 0.86128407, "epoch": 0.6815043457070912, "grad_norm": 8.32497501373291, "learning_rate": 9.609096023287033e-06, "loss": 0.70262423, "memory(GiB)": 34.88, "step": 25170, "train_speed(iter/s)": 0.414621 }, { "acc": 0.87675056, "epoch": 0.6816397259903068, "grad_norm": 11.07122802734375, "learning_rate": 9.608879085885275e-06, "loss": 0.595682, "memory(GiB)": 34.88, "step": 25175, "train_speed(iter/s)": 0.414624 }, { "acc": 0.85223532, "epoch": 0.6817751062735223, "grad_norm": 11.799758911132812, "learning_rate": 9.608662090754511e-06, "loss": 0.71685939, "memory(GiB)": 34.88, "step": 25180, "train_speed(iter/s)": 0.414625 }, { "acc": 0.87628632, "epoch": 0.6819104865567379, "grad_norm": 4.499711990356445, "learning_rate": 9.608445037897464e-06, "loss": 0.64841447, "memory(GiB)": 34.88, "step": 25185, "train_speed(iter/s)": 0.414628 }, { "acc": 0.88410225, "epoch": 0.6820458668399534, "grad_norm": 6.021852970123291, "learning_rate": 9.608227927316849e-06, "loss": 0.60610456, "memory(GiB)": 34.88, "step": 25190, "train_speed(iter/s)": 0.414632 }, { "acc": 0.86618805, "epoch": 0.682181247123169, "grad_norm": 9.384607315063477, "learning_rate": 9.608010759015388e-06, "loss": 0.71035271, "memory(GiB)": 34.88, "step": 25195, "train_speed(iter/s)": 0.414635 }, { "acc": 0.86661272, "epoch": 0.6823166274063845, "grad_norm": 8.226588249206543, "learning_rate": 9.607793532995801e-06, "loss": 0.73422647, "memory(GiB)": 34.88, "step": 25200, "train_speed(iter/s)": 0.414638 }, { "acc": 0.86416416, "epoch": 0.6824520076896001, "grad_norm": 13.840510368347168, "learning_rate": 9.607576249260809e-06, "loss": 0.68217263, "memory(GiB)": 34.88, "step": 25205, "train_speed(iter/s)": 0.414641 }, { "acc": 0.86452084, "epoch": 0.6825873879728156, "grad_norm": 6.496129512786865, "learning_rate": 9.607358907813135e-06, "loss": 0.65255661, "memory(GiB)": 34.88, "step": 25210, "train_speed(iter/s)": 0.414645 }, { "acc": 0.86731138, "epoch": 0.6827227682560312, "grad_norm": 8.881689071655273, "learning_rate": 9.6071415086555e-06, "loss": 0.69540691, "memory(GiB)": 34.88, "step": 25215, "train_speed(iter/s)": 0.414649 }, { "acc": 0.87174702, "epoch": 0.6828581485392468, "grad_norm": 15.817368507385254, "learning_rate": 9.606924051790629e-06, "loss": 0.64091702, "memory(GiB)": 34.88, "step": 25220, "train_speed(iter/s)": 0.414653 }, { "acc": 0.87848845, "epoch": 0.6829935288224623, "grad_norm": 8.091449737548828, "learning_rate": 9.606706537221243e-06, "loss": 0.65966949, "memory(GiB)": 34.88, "step": 25225, "train_speed(iter/s)": 0.414656 }, { "acc": 0.86241331, "epoch": 0.6831289091056778, "grad_norm": 8.730466842651367, "learning_rate": 9.60648896495007e-06, "loss": 0.65146742, "memory(GiB)": 34.88, "step": 25230, "train_speed(iter/s)": 0.414658 }, { "acc": 0.87410574, "epoch": 0.6832642893888934, "grad_norm": 9.52750015258789, "learning_rate": 9.606271334979837e-06, "loss": 0.66359129, "memory(GiB)": 34.88, "step": 25235, "train_speed(iter/s)": 0.414661 }, { "acc": 0.87748585, "epoch": 0.683399669672109, "grad_norm": 5.957272052764893, "learning_rate": 9.606053647313264e-06, "loss": 0.67794952, "memory(GiB)": 34.88, "step": 25240, "train_speed(iter/s)": 0.414664 }, { "acc": 0.87577858, "epoch": 0.6835350499553245, "grad_norm": 12.205976486206055, "learning_rate": 9.605835901953084e-06, "loss": 0.71017303, "memory(GiB)": 34.88, "step": 25245, "train_speed(iter/s)": 0.414668 }, { "acc": 0.86929255, "epoch": 0.68367043023854, "grad_norm": 10.476896286010742, "learning_rate": 9.60561809890202e-06, "loss": 0.60526514, "memory(GiB)": 34.88, "step": 25250, "train_speed(iter/s)": 0.414672 }, { "acc": 0.83613625, "epoch": 0.6838058105217556, "grad_norm": 21.026504516601562, "learning_rate": 9.605400238162805e-06, "loss": 0.82718716, "memory(GiB)": 34.88, "step": 25255, "train_speed(iter/s)": 0.414676 }, { "acc": 0.86419296, "epoch": 0.6839411908049712, "grad_norm": 29.498964309692383, "learning_rate": 9.605182319738166e-06, "loss": 0.73033762, "memory(GiB)": 34.88, "step": 25260, "train_speed(iter/s)": 0.414679 }, { "acc": 0.85481462, "epoch": 0.6840765710881868, "grad_norm": 6.039641857147217, "learning_rate": 9.604964343630831e-06, "loss": 0.65985909, "memory(GiB)": 34.88, "step": 25265, "train_speed(iter/s)": 0.414683 }, { "acc": 0.88620682, "epoch": 0.6842119513714022, "grad_norm": 22.596038818359375, "learning_rate": 9.604746309843535e-06, "loss": 0.58591576, "memory(GiB)": 34.88, "step": 25270, "train_speed(iter/s)": 0.414687 }, { "acc": 0.86646242, "epoch": 0.6843473316546178, "grad_norm": 7.5171308517456055, "learning_rate": 9.604528218379003e-06, "loss": 0.60029802, "memory(GiB)": 34.88, "step": 25275, "train_speed(iter/s)": 0.41469 }, { "acc": 0.85524635, "epoch": 0.6844827119378334, "grad_norm": 7.396013259887695, "learning_rate": 9.604310069239974e-06, "loss": 0.70185304, "memory(GiB)": 34.88, "step": 25280, "train_speed(iter/s)": 0.414693 }, { "acc": 0.87488413, "epoch": 0.684618092221049, "grad_norm": 10.066043853759766, "learning_rate": 9.604091862429176e-06, "loss": 0.59925961, "memory(GiB)": 34.88, "step": 25285, "train_speed(iter/s)": 0.414697 }, { "acc": 0.87480593, "epoch": 0.6847534725042644, "grad_norm": 6.552973747253418, "learning_rate": 9.603873597949344e-06, "loss": 0.5722816, "memory(GiB)": 34.88, "step": 25290, "train_speed(iter/s)": 0.4147 }, { "acc": 0.85929375, "epoch": 0.68488885278748, "grad_norm": 4.475232124328613, "learning_rate": 9.60365527580321e-06, "loss": 0.6660758, "memory(GiB)": 34.88, "step": 25295, "train_speed(iter/s)": 0.414704 }, { "acc": 0.8683321, "epoch": 0.6850242330706956, "grad_norm": 11.95503044128418, "learning_rate": 9.603436895993514e-06, "loss": 0.60862808, "memory(GiB)": 34.88, "step": 25300, "train_speed(iter/s)": 0.414708 }, { "acc": 0.8749691, "epoch": 0.6851596133539112, "grad_norm": 9.825030326843262, "learning_rate": 9.603218458522987e-06, "loss": 0.71220732, "memory(GiB)": 34.88, "step": 25305, "train_speed(iter/s)": 0.414712 }, { "acc": 0.87536049, "epoch": 0.6852949936371266, "grad_norm": 8.571866989135742, "learning_rate": 9.60299996339437e-06, "loss": 0.61204743, "memory(GiB)": 34.88, "step": 25310, "train_speed(iter/s)": 0.414716 }, { "acc": 0.8470129, "epoch": 0.6854303739203422, "grad_norm": 9.24087905883789, "learning_rate": 9.602781410610394e-06, "loss": 0.77676086, "memory(GiB)": 34.88, "step": 25315, "train_speed(iter/s)": 0.41472 }, { "acc": 0.87102509, "epoch": 0.6855657542035578, "grad_norm": 9.332676887512207, "learning_rate": 9.602562800173798e-06, "loss": 0.60033507, "memory(GiB)": 34.88, "step": 25320, "train_speed(iter/s)": 0.414723 }, { "acc": 0.87843723, "epoch": 0.6857011344867734, "grad_norm": 9.924758911132812, "learning_rate": 9.602344132087326e-06, "loss": 0.65232277, "memory(GiB)": 34.88, "step": 25325, "train_speed(iter/s)": 0.414727 }, { "acc": 0.85829964, "epoch": 0.6858365147699889, "grad_norm": 10.829075813293457, "learning_rate": 9.602125406353713e-06, "loss": 0.78478565, "memory(GiB)": 34.88, "step": 25330, "train_speed(iter/s)": 0.414731 }, { "acc": 0.88390083, "epoch": 0.6859718950532044, "grad_norm": 6.713845252990723, "learning_rate": 9.601906622975699e-06, "loss": 0.59355106, "memory(GiB)": 34.88, "step": 25335, "train_speed(iter/s)": 0.414735 }, { "acc": 0.88842726, "epoch": 0.68610727533642, "grad_norm": 7.794294357299805, "learning_rate": 9.601687781956027e-06, "loss": 0.53504553, "memory(GiB)": 34.88, "step": 25340, "train_speed(iter/s)": 0.414738 }, { "acc": 0.85887318, "epoch": 0.6862426556196356, "grad_norm": 12.292571067810059, "learning_rate": 9.601468883297433e-06, "loss": 0.78401384, "memory(GiB)": 34.88, "step": 25345, "train_speed(iter/s)": 0.414742 }, { "acc": 0.85220566, "epoch": 0.6863780359028511, "grad_norm": 7.344164848327637, "learning_rate": 9.601249927002666e-06, "loss": 0.76265006, "memory(GiB)": 34.88, "step": 25350, "train_speed(iter/s)": 0.414745 }, { "acc": 0.8652401, "epoch": 0.6865134161860666, "grad_norm": 30.50529670715332, "learning_rate": 9.601030913074465e-06, "loss": 0.67092304, "memory(GiB)": 34.88, "step": 25355, "train_speed(iter/s)": 0.414748 }, { "acc": 0.87853394, "epoch": 0.6866487964692822, "grad_norm": 9.876436233520508, "learning_rate": 9.600811841515573e-06, "loss": 0.61906309, "memory(GiB)": 34.88, "step": 25360, "train_speed(iter/s)": 0.414752 }, { "acc": 0.86212101, "epoch": 0.6867841767524978, "grad_norm": 16.185285568237305, "learning_rate": 9.600592712328738e-06, "loss": 0.64208317, "memory(GiB)": 34.88, "step": 25365, "train_speed(iter/s)": 0.414756 }, { "acc": 0.8697073, "epoch": 0.6869195570357133, "grad_norm": 8.194626808166504, "learning_rate": 9.600373525516701e-06, "loss": 0.69020414, "memory(GiB)": 34.88, "step": 25370, "train_speed(iter/s)": 0.41476 }, { "acc": 0.86747618, "epoch": 0.6870549373189289, "grad_norm": 7.990865230560303, "learning_rate": 9.60015428108221e-06, "loss": 0.68973117, "memory(GiB)": 34.88, "step": 25375, "train_speed(iter/s)": 0.414763 }, { "acc": 0.86105289, "epoch": 0.6871903176021444, "grad_norm": 20.816234588623047, "learning_rate": 9.599934979028008e-06, "loss": 0.70557022, "memory(GiB)": 34.88, "step": 25380, "train_speed(iter/s)": 0.414766 }, { "acc": 0.85674419, "epoch": 0.68732569788536, "grad_norm": 8.651104927062988, "learning_rate": 9.599715619356849e-06, "loss": 0.7500124, "memory(GiB)": 34.88, "step": 25385, "train_speed(iter/s)": 0.41477 }, { "acc": 0.89555569, "epoch": 0.6874610781685755, "grad_norm": 6.324034690856934, "learning_rate": 9.599496202071476e-06, "loss": 0.40905385, "memory(GiB)": 34.88, "step": 25390, "train_speed(iter/s)": 0.414774 }, { "acc": 0.87305222, "epoch": 0.6875964584517911, "grad_norm": 9.750076293945312, "learning_rate": 9.599276727174637e-06, "loss": 0.65654669, "memory(GiB)": 34.88, "step": 25395, "train_speed(iter/s)": 0.414776 }, { "acc": 0.85259323, "epoch": 0.6877318387350067, "grad_norm": 12.85047721862793, "learning_rate": 9.599057194669083e-06, "loss": 0.72594733, "memory(GiB)": 34.88, "step": 25400, "train_speed(iter/s)": 0.41478 }, { "acc": 0.88112764, "epoch": 0.6878672190182222, "grad_norm": 7.353961944580078, "learning_rate": 9.598837604557565e-06, "loss": 0.52703381, "memory(GiB)": 34.88, "step": 25405, "train_speed(iter/s)": 0.414784 }, { "acc": 0.88098698, "epoch": 0.6880025993014377, "grad_norm": 8.21356201171875, "learning_rate": 9.598617956842834e-06, "loss": 0.70993834, "memory(GiB)": 34.88, "step": 25410, "train_speed(iter/s)": 0.414788 }, { "acc": 0.83692665, "epoch": 0.6881379795846533, "grad_norm": 14.185747146606445, "learning_rate": 9.598398251527637e-06, "loss": 0.93140278, "memory(GiB)": 34.88, "step": 25415, "train_speed(iter/s)": 0.414791 }, { "acc": 0.85115147, "epoch": 0.6882733598678689, "grad_norm": 13.592535018920898, "learning_rate": 9.59817848861473e-06, "loss": 0.84278069, "memory(GiB)": 34.88, "step": 25420, "train_speed(iter/s)": 0.414794 }, { "acc": 0.89391651, "epoch": 0.6884087401510844, "grad_norm": 7.0404863357543945, "learning_rate": 9.59795866810687e-06, "loss": 0.4773469, "memory(GiB)": 34.88, "step": 25425, "train_speed(iter/s)": 0.414798 }, { "acc": 0.87713928, "epoch": 0.6885441204342999, "grad_norm": 11.846699714660645, "learning_rate": 9.597738790006802e-06, "loss": 0.62411356, "memory(GiB)": 34.88, "step": 25430, "train_speed(iter/s)": 0.414801 }, { "acc": 0.86869278, "epoch": 0.6886795007175155, "grad_norm": 12.306727409362793, "learning_rate": 9.597518854317287e-06, "loss": 0.66717553, "memory(GiB)": 34.88, "step": 25435, "train_speed(iter/s)": 0.414805 }, { "acc": 0.86212492, "epoch": 0.6888148810007311, "grad_norm": 8.10827922821045, "learning_rate": 9.597298861041075e-06, "loss": 0.71684065, "memory(GiB)": 34.88, "step": 25440, "train_speed(iter/s)": 0.414809 }, { "acc": 0.85666885, "epoch": 0.6889502612839467, "grad_norm": 8.485672950744629, "learning_rate": 9.597078810180925e-06, "loss": 0.78537235, "memory(GiB)": 34.88, "step": 25445, "train_speed(iter/s)": 0.414813 }, { "acc": 0.83682241, "epoch": 0.6890856415671621, "grad_norm": 7.758424282073975, "learning_rate": 9.596858701739595e-06, "loss": 0.81586018, "memory(GiB)": 34.88, "step": 25450, "train_speed(iter/s)": 0.414816 }, { "acc": 0.86244287, "epoch": 0.6892210218503777, "grad_norm": 17.988752365112305, "learning_rate": 9.59663853571984e-06, "loss": 0.75088048, "memory(GiB)": 34.88, "step": 25455, "train_speed(iter/s)": 0.41482 }, { "acc": 0.8498209, "epoch": 0.6893564021335933, "grad_norm": 9.311698913574219, "learning_rate": 9.596418312124417e-06, "loss": 0.82789755, "memory(GiB)": 34.88, "step": 25460, "train_speed(iter/s)": 0.414822 }, { "acc": 0.88761444, "epoch": 0.6894917824168089, "grad_norm": 18.417335510253906, "learning_rate": 9.596198030956087e-06, "loss": 0.56492443, "memory(GiB)": 34.88, "step": 25465, "train_speed(iter/s)": 0.414826 }, { "acc": 0.86505852, "epoch": 0.6896271627000243, "grad_norm": 8.87582778930664, "learning_rate": 9.59597769221761e-06, "loss": 0.65663724, "memory(GiB)": 34.88, "step": 25470, "train_speed(iter/s)": 0.41483 }, { "acc": 0.8632103, "epoch": 0.6897625429832399, "grad_norm": 10.684952735900879, "learning_rate": 9.595757295911744e-06, "loss": 0.67077141, "memory(GiB)": 34.88, "step": 25475, "train_speed(iter/s)": 0.414834 }, { "acc": 0.8721179, "epoch": 0.6898979232664555, "grad_norm": 6.530065059661865, "learning_rate": 9.595536842041248e-06, "loss": 0.6372695, "memory(GiB)": 34.88, "step": 25480, "train_speed(iter/s)": 0.414838 }, { "acc": 0.86024179, "epoch": 0.6900333035496711, "grad_norm": 9.824944496154785, "learning_rate": 9.59531633060889e-06, "loss": 0.77324476, "memory(GiB)": 34.88, "step": 25485, "train_speed(iter/s)": 0.414842 }, { "acc": 0.85131111, "epoch": 0.6901686838328865, "grad_norm": 16.868040084838867, "learning_rate": 9.595095761617429e-06, "loss": 0.82540607, "memory(GiB)": 34.88, "step": 25490, "train_speed(iter/s)": 0.414844 }, { "acc": 0.8450634, "epoch": 0.6903040641161021, "grad_norm": 9.499857902526855, "learning_rate": 9.594875135069624e-06, "loss": 0.91397247, "memory(GiB)": 34.88, "step": 25495, "train_speed(iter/s)": 0.414848 }, { "acc": 0.85564632, "epoch": 0.6904394443993177, "grad_norm": 6.874720573425293, "learning_rate": 9.594654450968245e-06, "loss": 0.78719196, "memory(GiB)": 34.88, "step": 25500, "train_speed(iter/s)": 0.414851 }, { "acc": 0.88441753, "epoch": 0.6905748246825333, "grad_norm": 4.539665222167969, "learning_rate": 9.594433709316054e-06, "loss": 0.61027341, "memory(GiB)": 34.88, "step": 25505, "train_speed(iter/s)": 0.414855 }, { "acc": 0.88231039, "epoch": 0.6907102049657488, "grad_norm": 13.51410961151123, "learning_rate": 9.594212910115816e-06, "loss": 0.6936861, "memory(GiB)": 34.88, "step": 25510, "train_speed(iter/s)": 0.414859 }, { "acc": 0.87383137, "epoch": 0.6908455852489643, "grad_norm": 8.732793807983398, "learning_rate": 9.593992053370296e-06, "loss": 0.64104023, "memory(GiB)": 34.88, "step": 25515, "train_speed(iter/s)": 0.414862 }, { "acc": 0.87254782, "epoch": 0.6909809655321799, "grad_norm": 4.79699182510376, "learning_rate": 9.593771139082263e-06, "loss": 0.69954109, "memory(GiB)": 34.88, "step": 25520, "train_speed(iter/s)": 0.414865 }, { "acc": 0.85661545, "epoch": 0.6911163458153955, "grad_norm": 10.899574279785156, "learning_rate": 9.593550167254483e-06, "loss": 0.75621805, "memory(GiB)": 34.88, "step": 25525, "train_speed(iter/s)": 0.414869 }, { "acc": 0.87554836, "epoch": 0.691251726098611, "grad_norm": 7.899633407592773, "learning_rate": 9.593329137889725e-06, "loss": 0.59741592, "memory(GiB)": 34.88, "step": 25530, "train_speed(iter/s)": 0.414873 }, { "acc": 0.85450211, "epoch": 0.6913871063818265, "grad_norm": 12.852184295654297, "learning_rate": 9.593108050990757e-06, "loss": 0.78588495, "memory(GiB)": 34.88, "step": 25535, "train_speed(iter/s)": 0.414877 }, { "acc": 0.8704319, "epoch": 0.6915224866650421, "grad_norm": 6.621373653411865, "learning_rate": 9.592886906560349e-06, "loss": 0.6303319, "memory(GiB)": 34.88, "step": 25540, "train_speed(iter/s)": 0.414881 }, { "acc": 0.8966856, "epoch": 0.6916578669482577, "grad_norm": 6.082583904266357, "learning_rate": 9.592665704601269e-06, "loss": 0.48752913, "memory(GiB)": 34.88, "step": 25545, "train_speed(iter/s)": 0.414884 }, { "acc": 0.88057461, "epoch": 0.6917932472314732, "grad_norm": 7.044782638549805, "learning_rate": 9.59244444511629e-06, "loss": 0.60985632, "memory(GiB)": 34.88, "step": 25550, "train_speed(iter/s)": 0.414888 }, { "acc": 0.88197174, "epoch": 0.6919286275146888, "grad_norm": 5.467309951782227, "learning_rate": 9.592223128108186e-06, "loss": 0.600244, "memory(GiB)": 34.88, "step": 25555, "train_speed(iter/s)": 0.414892 }, { "acc": 0.87558928, "epoch": 0.6920640077979043, "grad_norm": 18.75627899169922, "learning_rate": 9.592001753579724e-06, "loss": 0.61990652, "memory(GiB)": 34.88, "step": 25560, "train_speed(iter/s)": 0.414896 }, { "acc": 0.85972633, "epoch": 0.6921993880811199, "grad_norm": 6.190335750579834, "learning_rate": 9.591780321533684e-06, "loss": 0.77429786, "memory(GiB)": 34.88, "step": 25565, "train_speed(iter/s)": 0.414899 }, { "acc": 0.85109615, "epoch": 0.6923347683643354, "grad_norm": 16.70707893371582, "learning_rate": 9.591558831972833e-06, "loss": 0.84282618, "memory(GiB)": 34.88, "step": 25570, "train_speed(iter/s)": 0.414903 }, { "acc": 0.8824358, "epoch": 0.692470148647551, "grad_norm": 9.252204895019531, "learning_rate": 9.59133728489995e-06, "loss": 0.54933243, "memory(GiB)": 34.88, "step": 25575, "train_speed(iter/s)": 0.414907 }, { "acc": 0.84321213, "epoch": 0.6926055289307665, "grad_norm": 17.691162109375, "learning_rate": 9.59111568031781e-06, "loss": 0.78434982, "memory(GiB)": 34.88, "step": 25580, "train_speed(iter/s)": 0.414911 }, { "acc": 0.87136278, "epoch": 0.6927409092139821, "grad_norm": 10.798408508300781, "learning_rate": 9.590894018229188e-06, "loss": 0.66929984, "memory(GiB)": 34.88, "step": 25585, "train_speed(iter/s)": 0.414915 }, { "acc": 0.86497526, "epoch": 0.6928762894971976, "grad_norm": 10.72022819519043, "learning_rate": 9.590672298636859e-06, "loss": 0.7187993, "memory(GiB)": 34.88, "step": 25590, "train_speed(iter/s)": 0.414919 }, { "acc": 0.84865427, "epoch": 0.6930116697804132, "grad_norm": 6.568930149078369, "learning_rate": 9.590450521543602e-06, "loss": 0.69381943, "memory(GiB)": 34.88, "step": 25595, "train_speed(iter/s)": 0.414923 }, { "acc": 0.86265392, "epoch": 0.6931470500636288, "grad_norm": 6.824554920196533, "learning_rate": 9.590228686952197e-06, "loss": 0.76032581, "memory(GiB)": 34.88, "step": 25600, "train_speed(iter/s)": 0.414926 }, { "acc": 0.85836792, "epoch": 0.6932824303468443, "grad_norm": 9.445237159729004, "learning_rate": 9.590006794865422e-06, "loss": 0.71667223, "memory(GiB)": 34.88, "step": 25605, "train_speed(iter/s)": 0.414929 }, { "acc": 0.87556877, "epoch": 0.6934178106300598, "grad_norm": 8.611693382263184, "learning_rate": 9.589784845286054e-06, "loss": 0.63433151, "memory(GiB)": 34.88, "step": 25610, "train_speed(iter/s)": 0.414933 }, { "acc": 0.85642023, "epoch": 0.6935531909132754, "grad_norm": 9.403270721435547, "learning_rate": 9.589562838216877e-06, "loss": 0.74289546, "memory(GiB)": 34.88, "step": 25615, "train_speed(iter/s)": 0.414937 }, { "acc": 0.87111111, "epoch": 0.693688571196491, "grad_norm": 12.154858589172363, "learning_rate": 9.58934077366067e-06, "loss": 0.61418772, "memory(GiB)": 34.88, "step": 25620, "train_speed(iter/s)": 0.414941 }, { "acc": 0.86531363, "epoch": 0.6938239514797065, "grad_norm": 5.925605297088623, "learning_rate": 9.589118651620214e-06, "loss": 0.66957107, "memory(GiB)": 34.88, "step": 25625, "train_speed(iter/s)": 0.414944 }, { "acc": 0.86254215, "epoch": 0.693959331762922, "grad_norm": 11.672176361083984, "learning_rate": 9.588896472098295e-06, "loss": 0.74976068, "memory(GiB)": 34.88, "step": 25630, "train_speed(iter/s)": 0.414947 }, { "acc": 0.90042334, "epoch": 0.6940947120461376, "grad_norm": 4.44218111038208, "learning_rate": 9.588674235097692e-06, "loss": 0.52756324, "memory(GiB)": 34.88, "step": 25635, "train_speed(iter/s)": 0.414951 }, { "acc": 0.89182615, "epoch": 0.6942300923293532, "grad_norm": 7.821364402770996, "learning_rate": 9.588451940621191e-06, "loss": 0.61935601, "memory(GiB)": 34.88, "step": 25640, "train_speed(iter/s)": 0.414955 }, { "acc": 0.87285357, "epoch": 0.6943654726125688, "grad_norm": 15.22555160522461, "learning_rate": 9.58822958867158e-06, "loss": 0.72115197, "memory(GiB)": 34.88, "step": 25645, "train_speed(iter/s)": 0.414959 }, { "acc": 0.88651466, "epoch": 0.6945008528957842, "grad_norm": 7.993630886077881, "learning_rate": 9.588007179251638e-06, "loss": 0.55117731, "memory(GiB)": 34.88, "step": 25650, "train_speed(iter/s)": 0.414962 }, { "acc": 0.87434511, "epoch": 0.6946362331789998, "grad_norm": 7.440519332885742, "learning_rate": 9.587784712364154e-06, "loss": 0.58138361, "memory(GiB)": 34.88, "step": 25655, "train_speed(iter/s)": 0.414966 }, { "acc": 0.86288319, "epoch": 0.6947716134622154, "grad_norm": 8.777229309082031, "learning_rate": 9.587562188011918e-06, "loss": 0.71736708, "memory(GiB)": 34.88, "step": 25660, "train_speed(iter/s)": 0.414967 }, { "acc": 0.88160019, "epoch": 0.694906993745431, "grad_norm": 12.021284103393555, "learning_rate": 9.587339606197711e-06, "loss": 0.63374395, "memory(GiB)": 34.88, "step": 25665, "train_speed(iter/s)": 0.414971 }, { "acc": 0.85972309, "epoch": 0.6950423740286464, "grad_norm": 19.027938842773438, "learning_rate": 9.587116966924326e-06, "loss": 0.80923481, "memory(GiB)": 34.88, "step": 25670, "train_speed(iter/s)": 0.414975 }, { "acc": 0.85476208, "epoch": 0.695177754311862, "grad_norm": 16.152650833129883, "learning_rate": 9.586894270194552e-06, "loss": 0.7435679, "memory(GiB)": 34.88, "step": 25675, "train_speed(iter/s)": 0.414979 }, { "acc": 0.87105389, "epoch": 0.6953131345950776, "grad_norm": 7.797977924346924, "learning_rate": 9.586671516011174e-06, "loss": 0.66653767, "memory(GiB)": 34.88, "step": 25680, "train_speed(iter/s)": 0.414982 }, { "acc": 0.86456566, "epoch": 0.6954485148782932, "grad_norm": 7.09028434753418, "learning_rate": 9.58644870437699e-06, "loss": 0.70677409, "memory(GiB)": 34.88, "step": 25685, "train_speed(iter/s)": 0.414986 }, { "acc": 0.8578763, "epoch": 0.6955838951615086, "grad_norm": 11.33210563659668, "learning_rate": 9.586225835294785e-06, "loss": 0.81100111, "memory(GiB)": 34.88, "step": 25690, "train_speed(iter/s)": 0.41499 }, { "acc": 0.87503595, "epoch": 0.6957192754447242, "grad_norm": 5.995726108551025, "learning_rate": 9.586002908767353e-06, "loss": 0.59410524, "memory(GiB)": 34.88, "step": 25695, "train_speed(iter/s)": 0.414994 }, { "acc": 0.84515409, "epoch": 0.6958546557279398, "grad_norm": 11.041603088378906, "learning_rate": 9.585779924797489e-06, "loss": 0.83778486, "memory(GiB)": 34.88, "step": 25700, "train_speed(iter/s)": 0.414997 }, { "acc": 0.86329365, "epoch": 0.6959900360111554, "grad_norm": 8.34824275970459, "learning_rate": 9.58555688338798e-06, "loss": 0.68546863, "memory(GiB)": 34.88, "step": 25705, "train_speed(iter/s)": 0.415 }, { "acc": 0.86847038, "epoch": 0.6961254162943709, "grad_norm": 5.7376179695129395, "learning_rate": 9.585333784541624e-06, "loss": 0.78492613, "memory(GiB)": 34.88, "step": 25710, "train_speed(iter/s)": 0.415004 }, { "acc": 0.8499567, "epoch": 0.6962607965775864, "grad_norm": 11.775886535644531, "learning_rate": 9.585110628261218e-06, "loss": 0.85050335, "memory(GiB)": 34.88, "step": 25715, "train_speed(iter/s)": 0.415007 }, { "acc": 0.8716856, "epoch": 0.696396176860802, "grad_norm": 13.040091514587402, "learning_rate": 9.584887414549556e-06, "loss": 0.65426841, "memory(GiB)": 34.88, "step": 25720, "train_speed(iter/s)": 0.415011 }, { "acc": 0.85864525, "epoch": 0.6965315571440176, "grad_norm": 15.588028907775879, "learning_rate": 9.584664143409432e-06, "loss": 0.73546419, "memory(GiB)": 34.88, "step": 25725, "train_speed(iter/s)": 0.415015 }, { "acc": 0.8621068, "epoch": 0.6966669374272331, "grad_norm": 9.605596542358398, "learning_rate": 9.584440814843644e-06, "loss": 0.80629005, "memory(GiB)": 34.88, "step": 25730, "train_speed(iter/s)": 0.415019 }, { "acc": 0.88291903, "epoch": 0.6968023177104486, "grad_norm": 5.306771278381348, "learning_rate": 9.58421742885499e-06, "loss": 0.53074942, "memory(GiB)": 34.88, "step": 25735, "train_speed(iter/s)": 0.415023 }, { "acc": 0.86058846, "epoch": 0.6969376979936642, "grad_norm": 7.593420505523682, "learning_rate": 9.583993985446267e-06, "loss": 0.71205845, "memory(GiB)": 34.88, "step": 25740, "train_speed(iter/s)": 0.415026 }, { "acc": 0.85626268, "epoch": 0.6970730782768798, "grad_norm": 8.70753288269043, "learning_rate": 9.583770484620278e-06, "loss": 0.65267639, "memory(GiB)": 34.88, "step": 25745, "train_speed(iter/s)": 0.41503 }, { "acc": 0.86659775, "epoch": 0.6972084585600953, "grad_norm": 25.95952606201172, "learning_rate": 9.583546926379819e-06, "loss": 0.69504547, "memory(GiB)": 34.88, "step": 25750, "train_speed(iter/s)": 0.415033 }, { "acc": 0.87166843, "epoch": 0.6973438388433109, "grad_norm": 12.181469917297363, "learning_rate": 9.58332331072769e-06, "loss": 0.64319296, "memory(GiB)": 34.88, "step": 25755, "train_speed(iter/s)": 0.415037 }, { "acc": 0.88400536, "epoch": 0.6974792191265264, "grad_norm": 7.116579055786133, "learning_rate": 9.583099637666695e-06, "loss": 0.59480639, "memory(GiB)": 34.88, "step": 25760, "train_speed(iter/s)": 0.415041 }, { "acc": 0.84773035, "epoch": 0.697614599409742, "grad_norm": 7.9130401611328125, "learning_rate": 9.582875907199636e-06, "loss": 0.75288882, "memory(GiB)": 34.88, "step": 25765, "train_speed(iter/s)": 0.415045 }, { "acc": 0.87416553, "epoch": 0.6977499796929575, "grad_norm": 21.058820724487305, "learning_rate": 9.582652119329313e-06, "loss": 0.54744873, "memory(GiB)": 34.88, "step": 25770, "train_speed(iter/s)": 0.415047 }, { "acc": 0.87087317, "epoch": 0.6978853599761731, "grad_norm": 4.914037227630615, "learning_rate": 9.58242827405853e-06, "loss": 0.61087122, "memory(GiB)": 34.88, "step": 25775, "train_speed(iter/s)": 0.415051 }, { "acc": 0.85750771, "epoch": 0.6980207402593887, "grad_norm": 10.518889427185059, "learning_rate": 9.582204371390092e-06, "loss": 0.72206445, "memory(GiB)": 34.88, "step": 25780, "train_speed(iter/s)": 0.415055 }, { "acc": 0.87783442, "epoch": 0.6981561205426042, "grad_norm": 13.390941619873047, "learning_rate": 9.581980411326805e-06, "loss": 0.62269335, "memory(GiB)": 34.88, "step": 25785, "train_speed(iter/s)": 0.415059 }, { "acc": 0.85262547, "epoch": 0.6982915008258197, "grad_norm": 19.125566482543945, "learning_rate": 9.581756393871472e-06, "loss": 0.86522465, "memory(GiB)": 34.88, "step": 25790, "train_speed(iter/s)": 0.415061 }, { "acc": 0.86642237, "epoch": 0.6984268811090353, "grad_norm": 13.589435577392578, "learning_rate": 9.581532319026903e-06, "loss": 0.68094015, "memory(GiB)": 34.88, "step": 25795, "train_speed(iter/s)": 0.415065 }, { "acc": 0.86791277, "epoch": 0.6985622613922509, "grad_norm": 11.240047454833984, "learning_rate": 9.581308186795901e-06, "loss": 0.70656586, "memory(GiB)": 34.88, "step": 25800, "train_speed(iter/s)": 0.415068 }, { "acc": 0.86561241, "epoch": 0.6986976416754663, "grad_norm": 9.951333045959473, "learning_rate": 9.581083997181274e-06, "loss": 0.64743652, "memory(GiB)": 34.88, "step": 25805, "train_speed(iter/s)": 0.415072 }, { "acc": 0.87031422, "epoch": 0.6988330219586819, "grad_norm": 5.523580074310303, "learning_rate": 9.580859750185832e-06, "loss": 0.65485377, "memory(GiB)": 34.88, "step": 25810, "train_speed(iter/s)": 0.415075 }, { "acc": 0.86740189, "epoch": 0.6989684022418975, "grad_norm": 19.110090255737305, "learning_rate": 9.580635445812385e-06, "loss": 0.65591612, "memory(GiB)": 34.88, "step": 25815, "train_speed(iter/s)": 0.415079 }, { "acc": 0.8486825, "epoch": 0.6991037825251131, "grad_norm": 10.658563613891602, "learning_rate": 9.580411084063742e-06, "loss": 0.75950317, "memory(GiB)": 34.88, "step": 25820, "train_speed(iter/s)": 0.415082 }, { "acc": 0.89045649, "epoch": 0.6992391628083285, "grad_norm": 6.140607833862305, "learning_rate": 9.58018666494271e-06, "loss": 0.48966346, "memory(GiB)": 34.88, "step": 25825, "train_speed(iter/s)": 0.415085 }, { "acc": 0.85835686, "epoch": 0.6993745430915441, "grad_norm": 9.720125198364258, "learning_rate": 9.579962188452106e-06, "loss": 0.66917725, "memory(GiB)": 34.88, "step": 25830, "train_speed(iter/s)": 0.415089 }, { "acc": 0.88101749, "epoch": 0.6995099233747597, "grad_norm": 8.714509963989258, "learning_rate": 9.57973765459474e-06, "loss": 0.52834401, "memory(GiB)": 34.88, "step": 25835, "train_speed(iter/s)": 0.415092 }, { "acc": 0.84412184, "epoch": 0.6996453036579753, "grad_norm": 8.987459182739258, "learning_rate": 9.579513063373424e-06, "loss": 0.95924759, "memory(GiB)": 34.88, "step": 25840, "train_speed(iter/s)": 0.415096 }, { "acc": 0.86482649, "epoch": 0.6997806839411908, "grad_norm": 17.37261199951172, "learning_rate": 9.57928841479097e-06, "loss": 0.7079133, "memory(GiB)": 34.88, "step": 25845, "train_speed(iter/s)": 0.415099 }, { "acc": 0.88316412, "epoch": 0.6999160642244063, "grad_norm": 20.671714782714844, "learning_rate": 9.579063708850194e-06, "loss": 0.59674788, "memory(GiB)": 34.88, "step": 25850, "train_speed(iter/s)": 0.415102 }, { "acc": 0.8719265, "epoch": 0.7000514445076219, "grad_norm": 6.954412937164307, "learning_rate": 9.578838945553912e-06, "loss": 0.67205133, "memory(GiB)": 34.88, "step": 25855, "train_speed(iter/s)": 0.415106 }, { "acc": 0.8780488, "epoch": 0.7001868247908375, "grad_norm": 6.754720211029053, "learning_rate": 9.578614124904937e-06, "loss": 0.67962027, "memory(GiB)": 34.88, "step": 25860, "train_speed(iter/s)": 0.41511 }, { "acc": 0.85979815, "epoch": 0.700322205074053, "grad_norm": 8.058832168579102, "learning_rate": 9.578389246906089e-06, "loss": 0.67704916, "memory(GiB)": 34.88, "step": 25865, "train_speed(iter/s)": 0.415113 }, { "acc": 0.86761141, "epoch": 0.7004575853572685, "grad_norm": 12.214723587036133, "learning_rate": 9.57816431156018e-06, "loss": 0.66902533, "memory(GiB)": 34.88, "step": 25870, "train_speed(iter/s)": 0.415117 }, { "acc": 0.84534826, "epoch": 0.7005929656404841, "grad_norm": 9.498770713806152, "learning_rate": 9.57793931887003e-06, "loss": 0.77968788, "memory(GiB)": 34.88, "step": 25875, "train_speed(iter/s)": 0.41512 }, { "acc": 0.8549345, "epoch": 0.7007283459236997, "grad_norm": 14.725030899047852, "learning_rate": 9.577714268838459e-06, "loss": 0.73426847, "memory(GiB)": 34.88, "step": 25880, "train_speed(iter/s)": 0.415122 }, { "acc": 0.87708302, "epoch": 0.7008637262069152, "grad_norm": 10.93556022644043, "learning_rate": 9.577489161468286e-06, "loss": 0.66031094, "memory(GiB)": 34.88, "step": 25885, "train_speed(iter/s)": 0.415125 }, { "acc": 0.85195141, "epoch": 0.7009991064901308, "grad_norm": 13.920172691345215, "learning_rate": 9.577263996762329e-06, "loss": 0.81191616, "memory(GiB)": 34.88, "step": 25890, "train_speed(iter/s)": 0.415129 }, { "acc": 0.87883816, "epoch": 0.7011344867733463, "grad_norm": 4.7033257484436035, "learning_rate": 9.57703877472341e-06, "loss": 0.59670362, "memory(GiB)": 34.88, "step": 25895, "train_speed(iter/s)": 0.415132 }, { "acc": 0.87039223, "epoch": 0.7012698670565619, "grad_norm": 10.147217750549316, "learning_rate": 9.57681349535435e-06, "loss": 0.68243876, "memory(GiB)": 34.88, "step": 25900, "train_speed(iter/s)": 0.415136 }, { "acc": 0.87702541, "epoch": 0.7014052473397774, "grad_norm": 8.131919860839844, "learning_rate": 9.576588158657971e-06, "loss": 0.61223674, "memory(GiB)": 34.88, "step": 25905, "train_speed(iter/s)": 0.41514 }, { "acc": 0.86972132, "epoch": 0.701540627622993, "grad_norm": 10.121916770935059, "learning_rate": 9.576362764637095e-06, "loss": 0.67092829, "memory(GiB)": 34.88, "step": 25910, "train_speed(iter/s)": 0.415144 }, { "acc": 0.88380203, "epoch": 0.7016760079062085, "grad_norm": 10.228730201721191, "learning_rate": 9.576137313294547e-06, "loss": 0.61409798, "memory(GiB)": 34.88, "step": 25915, "train_speed(iter/s)": 0.415148 }, { "acc": 0.86917095, "epoch": 0.7018113881894241, "grad_norm": 9.665847778320312, "learning_rate": 9.57591180463315e-06, "loss": 0.59080458, "memory(GiB)": 34.88, "step": 25920, "train_speed(iter/s)": 0.415151 }, { "acc": 0.86414061, "epoch": 0.7019467684726396, "grad_norm": 11.063426971435547, "learning_rate": 9.57568623865573e-06, "loss": 0.72228861, "memory(GiB)": 34.88, "step": 25925, "train_speed(iter/s)": 0.415154 }, { "acc": 0.86910858, "epoch": 0.7020821487558552, "grad_norm": 4.996868133544922, "learning_rate": 9.575460615365114e-06, "loss": 0.67693472, "memory(GiB)": 34.88, "step": 25930, "train_speed(iter/s)": 0.415158 }, { "acc": 0.85689974, "epoch": 0.7022175290390708, "grad_norm": 30.81141471862793, "learning_rate": 9.575234934764125e-06, "loss": 0.7201972, "memory(GiB)": 34.88, "step": 25935, "train_speed(iter/s)": 0.415162 }, { "acc": 0.84694939, "epoch": 0.7023529093222863, "grad_norm": 25.181011199951172, "learning_rate": 9.575009196855589e-06, "loss": 0.83693914, "memory(GiB)": 34.88, "step": 25940, "train_speed(iter/s)": 0.415166 }, { "acc": 0.87149229, "epoch": 0.7024882896055018, "grad_norm": 10.336740493774414, "learning_rate": 9.57478340164234e-06, "loss": 0.64304576, "memory(GiB)": 34.88, "step": 25945, "train_speed(iter/s)": 0.415169 }, { "acc": 0.86261158, "epoch": 0.7026236698887174, "grad_norm": 8.772272109985352, "learning_rate": 9.574557549127201e-06, "loss": 0.67195034, "memory(GiB)": 34.88, "step": 25950, "train_speed(iter/s)": 0.415173 }, { "acc": 0.8721509, "epoch": 0.702759050171933, "grad_norm": 10.55153751373291, "learning_rate": 9.574331639313003e-06, "loss": 0.70324821, "memory(GiB)": 34.88, "step": 25955, "train_speed(iter/s)": 0.415176 }, { "acc": 0.87043476, "epoch": 0.7028944304551485, "grad_norm": 11.876542091369629, "learning_rate": 9.574105672202578e-06, "loss": 0.76571031, "memory(GiB)": 34.88, "step": 25960, "train_speed(iter/s)": 0.415179 }, { "acc": 0.87707872, "epoch": 0.703029810738364, "grad_norm": 8.183690071105957, "learning_rate": 9.573879647798752e-06, "loss": 0.72424755, "memory(GiB)": 34.88, "step": 25965, "train_speed(iter/s)": 0.415183 }, { "acc": 0.8345541, "epoch": 0.7031651910215796, "grad_norm": 21.609092712402344, "learning_rate": 9.573653566104361e-06, "loss": 0.82577858, "memory(GiB)": 34.88, "step": 25970, "train_speed(iter/s)": 0.415186 }, { "acc": 0.86831207, "epoch": 0.7033005713047952, "grad_norm": 7.811191082000732, "learning_rate": 9.573427427122236e-06, "loss": 0.63458567, "memory(GiB)": 34.88, "step": 25975, "train_speed(iter/s)": 0.41519 }, { "acc": 0.87532082, "epoch": 0.7034359515880108, "grad_norm": 3.4065115451812744, "learning_rate": 9.573201230855209e-06, "loss": 0.57321053, "memory(GiB)": 34.88, "step": 25980, "train_speed(iter/s)": 0.415193 }, { "acc": 0.88782368, "epoch": 0.7035713318712262, "grad_norm": 6.201066017150879, "learning_rate": 9.572974977306113e-06, "loss": 0.5736382, "memory(GiB)": 34.88, "step": 25985, "train_speed(iter/s)": 0.415197 }, { "acc": 0.85680046, "epoch": 0.7037067121544418, "grad_norm": 19.077613830566406, "learning_rate": 9.572748666477785e-06, "loss": 0.69771376, "memory(GiB)": 34.88, "step": 25990, "train_speed(iter/s)": 0.4152 }, { "acc": 0.8796629, "epoch": 0.7038420924376574, "grad_norm": 10.07316780090332, "learning_rate": 9.572522298373055e-06, "loss": 0.67754593, "memory(GiB)": 34.88, "step": 25995, "train_speed(iter/s)": 0.415204 }, { "acc": 0.86806393, "epoch": 0.703977472720873, "grad_norm": 6.631534099578857, "learning_rate": 9.572295872994764e-06, "loss": 0.72050347, "memory(GiB)": 34.88, "step": 26000, "train_speed(iter/s)": 0.415207 }, { "acc": 0.89565887, "epoch": 0.7041128530040884, "grad_norm": 14.066167831420898, "learning_rate": 9.572069390345743e-06, "loss": 0.49447613, "memory(GiB)": 34.88, "step": 26005, "train_speed(iter/s)": 0.41521 }, { "acc": 0.87571287, "epoch": 0.704248233287304, "grad_norm": 10.996638298034668, "learning_rate": 9.571842850428836e-06, "loss": 0.62681866, "memory(GiB)": 34.88, "step": 26010, "train_speed(iter/s)": 0.415214 }, { "acc": 0.855233, "epoch": 0.7043836135705196, "grad_norm": 5.199553489685059, "learning_rate": 9.571616253246875e-06, "loss": 0.73108301, "memory(GiB)": 34.88, "step": 26015, "train_speed(iter/s)": 0.415217 }, { "acc": 0.86204901, "epoch": 0.7045189938537352, "grad_norm": 6.211212635040283, "learning_rate": 9.571389598802701e-06, "loss": 0.65870085, "memory(GiB)": 34.88, "step": 26020, "train_speed(iter/s)": 0.415221 }, { "acc": 0.83742447, "epoch": 0.7046543741369506, "grad_norm": 28.18963050842285, "learning_rate": 9.571162887099154e-06, "loss": 0.80229235, "memory(GiB)": 34.88, "step": 26025, "train_speed(iter/s)": 0.415224 }, { "acc": 0.84615231, "epoch": 0.7047897544201662, "grad_norm": 23.900609970092773, "learning_rate": 9.570936118139072e-06, "loss": 0.75496759, "memory(GiB)": 34.88, "step": 26030, "train_speed(iter/s)": 0.415228 }, { "acc": 0.8649416, "epoch": 0.7049251347033818, "grad_norm": 6.64745569229126, "learning_rate": 9.570709291925297e-06, "loss": 0.62540483, "memory(GiB)": 34.88, "step": 26035, "train_speed(iter/s)": 0.415231 }, { "acc": 0.87594175, "epoch": 0.7050605149865974, "grad_norm": 8.484308242797852, "learning_rate": 9.570482408460671e-06, "loss": 0.66919813, "memory(GiB)": 34.88, "step": 26040, "train_speed(iter/s)": 0.415235 }, { "acc": 0.84910488, "epoch": 0.7051958952698129, "grad_norm": 11.171528816223145, "learning_rate": 9.570255467748033e-06, "loss": 0.83566456, "memory(GiB)": 34.88, "step": 26045, "train_speed(iter/s)": 0.415238 }, { "acc": 0.8732645, "epoch": 0.7053312755530284, "grad_norm": 4.66101598739624, "learning_rate": 9.57002846979023e-06, "loss": 0.6102458, "memory(GiB)": 34.88, "step": 26050, "train_speed(iter/s)": 0.415241 }, { "acc": 0.85339575, "epoch": 0.705466655836244, "grad_norm": 15.2611665725708, "learning_rate": 9.569801414590104e-06, "loss": 0.73573027, "memory(GiB)": 34.88, "step": 26055, "train_speed(iter/s)": 0.415245 }, { "acc": 0.88480844, "epoch": 0.7056020361194596, "grad_norm": 9.423199653625488, "learning_rate": 9.569574302150499e-06, "loss": 0.58107047, "memory(GiB)": 34.88, "step": 26060, "train_speed(iter/s)": 0.415248 }, { "acc": 0.86685934, "epoch": 0.7057374164026751, "grad_norm": 22.427663803100586, "learning_rate": 9.569347132474257e-06, "loss": 0.67114654, "memory(GiB)": 34.88, "step": 26065, "train_speed(iter/s)": 0.415251 }, { "acc": 0.85485182, "epoch": 0.7058727966858906, "grad_norm": 21.00212860107422, "learning_rate": 9.569119905564232e-06, "loss": 0.79997377, "memory(GiB)": 34.88, "step": 26070, "train_speed(iter/s)": 0.415255 }, { "acc": 0.86305599, "epoch": 0.7060081769691062, "grad_norm": 9.270833969116211, "learning_rate": 9.568892621423263e-06, "loss": 0.76250806, "memory(GiB)": 34.88, "step": 26075, "train_speed(iter/s)": 0.415259 }, { "acc": 0.85133629, "epoch": 0.7061435572523218, "grad_norm": 15.639930725097656, "learning_rate": 9.5686652800542e-06, "loss": 0.81553135, "memory(GiB)": 34.88, "step": 26080, "train_speed(iter/s)": 0.415262 }, { "acc": 0.90916348, "epoch": 0.7062789375355373, "grad_norm": 6.03199577331543, "learning_rate": 9.56843788145989e-06, "loss": 0.46402569, "memory(GiB)": 34.88, "step": 26085, "train_speed(iter/s)": 0.415266 }, { "acc": 0.88124056, "epoch": 0.7064143178187529, "grad_norm": 8.082701683044434, "learning_rate": 9.56821042564318e-06, "loss": 0.58290148, "memory(GiB)": 34.88, "step": 26090, "train_speed(iter/s)": 0.41527 }, { "acc": 0.88930893, "epoch": 0.7065496981019684, "grad_norm": 8.211894035339355, "learning_rate": 9.567982912606924e-06, "loss": 0.5633172, "memory(GiB)": 34.88, "step": 26095, "train_speed(iter/s)": 0.415273 }, { "acc": 0.88609085, "epoch": 0.706685078385184, "grad_norm": 5.852335453033447, "learning_rate": 9.56775534235397e-06, "loss": 0.65674996, "memory(GiB)": 34.88, "step": 26100, "train_speed(iter/s)": 0.415276 }, { "acc": 0.84702015, "epoch": 0.7068204586683995, "grad_norm": 8.977283477783203, "learning_rate": 9.567527714887166e-06, "loss": 0.73030405, "memory(GiB)": 34.88, "step": 26105, "train_speed(iter/s)": 0.41528 }, { "acc": 0.87841873, "epoch": 0.7069558389516151, "grad_norm": 7.457922458648682, "learning_rate": 9.567300030209366e-06, "loss": 0.56399422, "memory(GiB)": 34.88, "step": 26110, "train_speed(iter/s)": 0.415283 }, { "acc": 0.90377922, "epoch": 0.7070912192348306, "grad_norm": 7.622304916381836, "learning_rate": 9.567072288323424e-06, "loss": 0.49074593, "memory(GiB)": 34.88, "step": 26115, "train_speed(iter/s)": 0.415287 }, { "acc": 0.85402889, "epoch": 0.7072265995180462, "grad_norm": 13.675498008728027, "learning_rate": 9.566844489232188e-06, "loss": 0.87021217, "memory(GiB)": 34.88, "step": 26120, "train_speed(iter/s)": 0.41529 }, { "acc": 0.86878052, "epoch": 0.7073619798012617, "grad_norm": 14.187421798706055, "learning_rate": 9.566616632938517e-06, "loss": 0.63225188, "memory(GiB)": 34.88, "step": 26125, "train_speed(iter/s)": 0.415293 }, { "acc": 0.87674828, "epoch": 0.7074973600844773, "grad_norm": 6.196242809295654, "learning_rate": 9.566388719445261e-06, "loss": 0.64314709, "memory(GiB)": 34.88, "step": 26130, "train_speed(iter/s)": 0.415297 }, { "acc": 0.85916719, "epoch": 0.7076327403676929, "grad_norm": 10.719536781311035, "learning_rate": 9.566160748755278e-06, "loss": 0.64677277, "memory(GiB)": 34.88, "step": 26135, "train_speed(iter/s)": 0.415301 }, { "acc": 0.88776035, "epoch": 0.7077681206509084, "grad_norm": 14.911755561828613, "learning_rate": 9.565932720871422e-06, "loss": 0.65484262, "memory(GiB)": 34.88, "step": 26140, "train_speed(iter/s)": 0.415304 }, { "acc": 0.86578674, "epoch": 0.7079035009341239, "grad_norm": 9.083680152893066, "learning_rate": 9.56570463579655e-06, "loss": 0.67218761, "memory(GiB)": 34.88, "step": 26145, "train_speed(iter/s)": 0.415307 }, { "acc": 0.84939747, "epoch": 0.7080388812173395, "grad_norm": 7.397806167602539, "learning_rate": 9.56547649353352e-06, "loss": 0.71369648, "memory(GiB)": 34.88, "step": 26150, "train_speed(iter/s)": 0.41531 }, { "acc": 0.8550127, "epoch": 0.7081742615005551, "grad_norm": 17.217599868774414, "learning_rate": 9.565248294085188e-06, "loss": 0.74771905, "memory(GiB)": 34.88, "step": 26155, "train_speed(iter/s)": 0.415313 }, { "acc": 0.88183556, "epoch": 0.7083096417837706, "grad_norm": 7.1997294425964355, "learning_rate": 9.565020037454415e-06, "loss": 0.5435883, "memory(GiB)": 34.88, "step": 26160, "train_speed(iter/s)": 0.415317 }, { "acc": 0.86879826, "epoch": 0.7084450220669861, "grad_norm": 12.115659713745117, "learning_rate": 9.564791723644058e-06, "loss": 0.62383919, "memory(GiB)": 34.88, "step": 26165, "train_speed(iter/s)": 0.41532 }, { "acc": 0.86096592, "epoch": 0.7085804023502017, "grad_norm": 11.110021591186523, "learning_rate": 9.56456335265698e-06, "loss": 0.72789469, "memory(GiB)": 34.88, "step": 26170, "train_speed(iter/s)": 0.415324 }, { "acc": 0.88124599, "epoch": 0.7087157826334173, "grad_norm": 9.2282133102417, "learning_rate": 9.56433492449604e-06, "loss": 0.58572712, "memory(GiB)": 34.88, "step": 26175, "train_speed(iter/s)": 0.415327 }, { "acc": 0.84914417, "epoch": 0.7088511629166329, "grad_norm": 9.570785522460938, "learning_rate": 9.564106439164098e-06, "loss": 0.72439737, "memory(GiB)": 34.88, "step": 26180, "train_speed(iter/s)": 0.415331 }, { "acc": 0.88339043, "epoch": 0.7089865431998483, "grad_norm": 10.905296325683594, "learning_rate": 9.563877896664018e-06, "loss": 0.60681682, "memory(GiB)": 34.88, "step": 26185, "train_speed(iter/s)": 0.415334 }, { "acc": 0.86902523, "epoch": 0.7091219234830639, "grad_norm": 20.27482032775879, "learning_rate": 9.563649296998666e-06, "loss": 0.73127432, "memory(GiB)": 34.88, "step": 26190, "train_speed(iter/s)": 0.415338 }, { "acc": 0.87261238, "epoch": 0.7092573037662795, "grad_norm": 15.723223686218262, "learning_rate": 9.563420640170898e-06, "loss": 0.66905394, "memory(GiB)": 34.88, "step": 26195, "train_speed(iter/s)": 0.415341 }, { "acc": 0.86415806, "epoch": 0.7093926840494951, "grad_norm": 10.551791191101074, "learning_rate": 9.563191926183586e-06, "loss": 0.66655812, "memory(GiB)": 34.88, "step": 26200, "train_speed(iter/s)": 0.415345 }, { "acc": 0.88049946, "epoch": 0.7095280643327105, "grad_norm": 12.220361709594727, "learning_rate": 9.562963155039591e-06, "loss": 0.60523024, "memory(GiB)": 34.88, "step": 26205, "train_speed(iter/s)": 0.415348 }, { "acc": 0.88769855, "epoch": 0.7096634446159261, "grad_norm": 7.559508800506592, "learning_rate": 9.56273432674178e-06, "loss": 0.55417781, "memory(GiB)": 34.88, "step": 26210, "train_speed(iter/s)": 0.415352 }, { "acc": 0.88353844, "epoch": 0.7097988248991417, "grad_norm": 15.531923294067383, "learning_rate": 9.56250544129302e-06, "loss": 0.6353035, "memory(GiB)": 34.88, "step": 26215, "train_speed(iter/s)": 0.415355 }, { "acc": 0.87070599, "epoch": 0.7099342051823573, "grad_norm": 7.481037616729736, "learning_rate": 9.562276498696175e-06, "loss": 0.61636615, "memory(GiB)": 34.88, "step": 26220, "train_speed(iter/s)": 0.415359 }, { "acc": 0.86374302, "epoch": 0.7100695854655728, "grad_norm": 17.909761428833008, "learning_rate": 9.562047498954118e-06, "loss": 0.69182296, "memory(GiB)": 34.88, "step": 26225, "train_speed(iter/s)": 0.415362 }, { "acc": 0.87546663, "epoch": 0.7102049657487883, "grad_norm": 16.538206100463867, "learning_rate": 9.561818442069715e-06, "loss": 0.68754511, "memory(GiB)": 34.88, "step": 26230, "train_speed(iter/s)": 0.415366 }, { "acc": 0.8688694, "epoch": 0.7103403460320039, "grad_norm": 8.036141395568848, "learning_rate": 9.561589328045833e-06, "loss": 0.61151457, "memory(GiB)": 34.88, "step": 26235, "train_speed(iter/s)": 0.41537 }, { "acc": 0.87164459, "epoch": 0.7104757263152195, "grad_norm": 29.42813491821289, "learning_rate": 9.561360156885347e-06, "loss": 0.60596581, "memory(GiB)": 34.88, "step": 26240, "train_speed(iter/s)": 0.415373 }, { "acc": 0.875177, "epoch": 0.710611106598435, "grad_norm": 7.95281457901001, "learning_rate": 9.561130928591125e-06, "loss": 0.62923565, "memory(GiB)": 34.88, "step": 26245, "train_speed(iter/s)": 0.415377 }, { "acc": 0.87741919, "epoch": 0.7107464868816505, "grad_norm": 6.388444900512695, "learning_rate": 9.560901643166038e-06, "loss": 0.57811761, "memory(GiB)": 34.88, "step": 26250, "train_speed(iter/s)": 0.41538 }, { "acc": 0.8743412, "epoch": 0.7108818671648661, "grad_norm": 8.603511810302734, "learning_rate": 9.560672300612962e-06, "loss": 0.72537756, "memory(GiB)": 34.88, "step": 26255, "train_speed(iter/s)": 0.415383 }, { "acc": 0.86014681, "epoch": 0.7110172474480817, "grad_norm": 9.998247146606445, "learning_rate": 9.560442900934764e-06, "loss": 0.73738499, "memory(GiB)": 34.88, "step": 26260, "train_speed(iter/s)": 0.415387 }, { "acc": 0.89184589, "epoch": 0.7111526277312972, "grad_norm": 112.56107330322266, "learning_rate": 9.560213444134324e-06, "loss": 0.54713306, "memory(GiB)": 34.88, "step": 26265, "train_speed(iter/s)": 0.415391 }, { "acc": 0.8810915, "epoch": 0.7112880080145128, "grad_norm": 8.532100677490234, "learning_rate": 9.55998393021451e-06, "loss": 0.55951257, "memory(GiB)": 34.88, "step": 26270, "train_speed(iter/s)": 0.415395 }, { "acc": 0.85552511, "epoch": 0.7114233882977283, "grad_norm": 9.440166473388672, "learning_rate": 9.559754359178201e-06, "loss": 0.74779348, "memory(GiB)": 34.88, "step": 26275, "train_speed(iter/s)": 0.415398 }, { "acc": 0.85946379, "epoch": 0.7115587685809439, "grad_norm": 10.731457710266113, "learning_rate": 9.559524731028275e-06, "loss": 0.70058146, "memory(GiB)": 34.88, "step": 26280, "train_speed(iter/s)": 0.415401 }, { "acc": 0.8639389, "epoch": 0.7116941488641594, "grad_norm": 9.30236530303955, "learning_rate": 9.559295045767604e-06, "loss": 0.66971703, "memory(GiB)": 34.88, "step": 26285, "train_speed(iter/s)": 0.415405 }, { "acc": 0.86392422, "epoch": 0.711829529147375, "grad_norm": 20.296527862548828, "learning_rate": 9.559065303399067e-06, "loss": 0.75403156, "memory(GiB)": 34.88, "step": 26290, "train_speed(iter/s)": 0.415408 }, { "acc": 0.8817646, "epoch": 0.7119649094305905, "grad_norm": 8.112751960754395, "learning_rate": 9.558835503925541e-06, "loss": 0.62078638, "memory(GiB)": 34.88, "step": 26295, "train_speed(iter/s)": 0.415412 }, { "acc": 0.90314102, "epoch": 0.7121002897138061, "grad_norm": 7.96848726272583, "learning_rate": 9.558605647349906e-06, "loss": 0.49837065, "memory(GiB)": 34.88, "step": 26300, "train_speed(iter/s)": 0.415415 }, { "acc": 0.86534157, "epoch": 0.7122356699970216, "grad_norm": 6.767037391662598, "learning_rate": 9.558375733675044e-06, "loss": 0.67786341, "memory(GiB)": 34.88, "step": 26305, "train_speed(iter/s)": 0.415419 }, { "acc": 0.87019005, "epoch": 0.7123710502802372, "grad_norm": 15.193979263305664, "learning_rate": 9.558145762903828e-06, "loss": 0.65583344, "memory(GiB)": 34.88, "step": 26310, "train_speed(iter/s)": 0.415422 }, { "acc": 0.87100573, "epoch": 0.7125064305634528, "grad_norm": 8.940074920654297, "learning_rate": 9.557915735039145e-06, "loss": 0.64935312, "memory(GiB)": 34.88, "step": 26315, "train_speed(iter/s)": 0.415425 }, { "acc": 0.8813343, "epoch": 0.7126418108466683, "grad_norm": 8.827899932861328, "learning_rate": 9.557685650083876e-06, "loss": 0.65611334, "memory(GiB)": 34.88, "step": 26320, "train_speed(iter/s)": 0.415428 }, { "acc": 0.8888134, "epoch": 0.7127771911298838, "grad_norm": 11.01457405090332, "learning_rate": 9.5574555080409e-06, "loss": 0.58047457, "memory(GiB)": 34.88, "step": 26325, "train_speed(iter/s)": 0.415431 }, { "acc": 0.86117516, "epoch": 0.7129125714130994, "grad_norm": 8.923213958740234, "learning_rate": 9.557225308913103e-06, "loss": 0.85385323, "memory(GiB)": 34.88, "step": 26330, "train_speed(iter/s)": 0.415434 }, { "acc": 0.85953264, "epoch": 0.713047951696315, "grad_norm": 10.149103164672852, "learning_rate": 9.556995052703366e-06, "loss": 0.71258712, "memory(GiB)": 34.88, "step": 26335, "train_speed(iter/s)": 0.415438 }, { "acc": 0.89245186, "epoch": 0.7131833319795305, "grad_norm": 4.736195087432861, "learning_rate": 9.556764739414577e-06, "loss": 0.56986923, "memory(GiB)": 34.88, "step": 26340, "train_speed(iter/s)": 0.415441 }, { "acc": 0.87591066, "epoch": 0.713318712262746, "grad_norm": 10.977614402770996, "learning_rate": 9.556534369049618e-06, "loss": 0.63377304, "memory(GiB)": 34.88, "step": 26345, "train_speed(iter/s)": 0.415444 }, { "acc": 0.89643097, "epoch": 0.7134540925459616, "grad_norm": 5.388235569000244, "learning_rate": 9.556303941611376e-06, "loss": 0.51969953, "memory(GiB)": 34.88, "step": 26350, "train_speed(iter/s)": 0.415447 }, { "acc": 0.85718594, "epoch": 0.7135894728291772, "grad_norm": 6.152830600738525, "learning_rate": 9.556073457102737e-06, "loss": 0.78974943, "memory(GiB)": 34.88, "step": 26355, "train_speed(iter/s)": 0.415451 }, { "acc": 0.85756254, "epoch": 0.7137248531123928, "grad_norm": 7.738406181335449, "learning_rate": 9.55584291552659e-06, "loss": 0.73856153, "memory(GiB)": 34.88, "step": 26360, "train_speed(iter/s)": 0.415454 }, { "acc": 0.86385593, "epoch": 0.7138602333956082, "grad_norm": 9.912071228027344, "learning_rate": 9.55561231688582e-06, "loss": 0.76149201, "memory(GiB)": 34.88, "step": 26365, "train_speed(iter/s)": 0.415458 }, { "acc": 0.83547258, "epoch": 0.7139956136788238, "grad_norm": 7.19632625579834, "learning_rate": 9.55538166118332e-06, "loss": 0.74986873, "memory(GiB)": 34.88, "step": 26370, "train_speed(iter/s)": 0.415461 }, { "acc": 0.88459053, "epoch": 0.7141309939620394, "grad_norm": 9.785988807678223, "learning_rate": 9.555150948421974e-06, "loss": 0.56736565, "memory(GiB)": 34.88, "step": 26375, "train_speed(iter/s)": 0.415465 }, { "acc": 0.86975517, "epoch": 0.714266374245255, "grad_norm": 9.42003059387207, "learning_rate": 9.554920178604675e-06, "loss": 0.72444944, "memory(GiB)": 34.88, "step": 26380, "train_speed(iter/s)": 0.415469 }, { "acc": 0.85296059, "epoch": 0.7144017545284704, "grad_norm": 7.259725093841553, "learning_rate": 9.554689351734318e-06, "loss": 0.82554207, "memory(GiB)": 34.88, "step": 26385, "train_speed(iter/s)": 0.415472 }, { "acc": 0.88757524, "epoch": 0.714537134811686, "grad_norm": 9.534279823303223, "learning_rate": 9.554458467813786e-06, "loss": 0.57039242, "memory(GiB)": 34.88, "step": 26390, "train_speed(iter/s)": 0.415476 }, { "acc": 0.86063089, "epoch": 0.7146725150949016, "grad_norm": 11.686905860900879, "learning_rate": 9.554227526845977e-06, "loss": 0.76880388, "memory(GiB)": 34.88, "step": 26395, "train_speed(iter/s)": 0.41548 }, { "acc": 0.87337265, "epoch": 0.7148078953781172, "grad_norm": 5.849584579467773, "learning_rate": 9.553996528833783e-06, "loss": 0.60686846, "memory(GiB)": 34.88, "step": 26400, "train_speed(iter/s)": 0.415483 }, { "acc": 0.85854988, "epoch": 0.7149432756613326, "grad_norm": 10.136445999145508, "learning_rate": 9.553765473780097e-06, "loss": 0.73290367, "memory(GiB)": 34.88, "step": 26405, "train_speed(iter/s)": 0.415486 }, { "acc": 0.86839209, "epoch": 0.7150786559445482, "grad_norm": 7.66372537612915, "learning_rate": 9.553534361687816e-06, "loss": 0.63026433, "memory(GiB)": 34.88, "step": 26410, "train_speed(iter/s)": 0.415489 }, { "acc": 0.84750347, "epoch": 0.7152140362277638, "grad_norm": 16.20387077331543, "learning_rate": 9.553303192559829e-06, "loss": 0.86707001, "memory(GiB)": 34.88, "step": 26415, "train_speed(iter/s)": 0.415493 }, { "acc": 0.88954163, "epoch": 0.7153494165109794, "grad_norm": 3.8354873657226562, "learning_rate": 9.553071966399038e-06, "loss": 0.54412599, "memory(GiB)": 34.88, "step": 26420, "train_speed(iter/s)": 0.415497 }, { "acc": 0.86516657, "epoch": 0.7154847967941949, "grad_norm": 10.740198135375977, "learning_rate": 9.552840683208336e-06, "loss": 0.72489805, "memory(GiB)": 34.88, "step": 26425, "train_speed(iter/s)": 0.4155 }, { "acc": 0.85737686, "epoch": 0.7156201770774104, "grad_norm": 23.410308837890625, "learning_rate": 9.552609342990624e-06, "loss": 0.76879253, "memory(GiB)": 34.88, "step": 26430, "train_speed(iter/s)": 0.415503 }, { "acc": 0.875774, "epoch": 0.715755557360626, "grad_norm": 6.554618835449219, "learning_rate": 9.552377945748797e-06, "loss": 0.59242582, "memory(GiB)": 34.88, "step": 26435, "train_speed(iter/s)": 0.415506 }, { "acc": 0.85938253, "epoch": 0.7158909376438416, "grad_norm": 10.727533340454102, "learning_rate": 9.552146491485752e-06, "loss": 0.78740835, "memory(GiB)": 34.88, "step": 26440, "train_speed(iter/s)": 0.41551 }, { "acc": 0.86563644, "epoch": 0.7160263179270571, "grad_norm": 12.989449501037598, "learning_rate": 9.551914980204393e-06, "loss": 0.74365907, "memory(GiB)": 34.88, "step": 26445, "train_speed(iter/s)": 0.415513 }, { "acc": 0.86145496, "epoch": 0.7161616982102726, "grad_norm": 10.090632438659668, "learning_rate": 9.551683411907617e-06, "loss": 0.68101587, "memory(GiB)": 34.88, "step": 26450, "train_speed(iter/s)": 0.415516 }, { "acc": 0.85164547, "epoch": 0.7162970784934882, "grad_norm": 8.782160758972168, "learning_rate": 9.551451786598326e-06, "loss": 0.71588635, "memory(GiB)": 34.88, "step": 26455, "train_speed(iter/s)": 0.415519 }, { "acc": 0.85520334, "epoch": 0.7164324587767038, "grad_norm": 10.523551940917969, "learning_rate": 9.551220104279421e-06, "loss": 0.80695686, "memory(GiB)": 34.88, "step": 26460, "train_speed(iter/s)": 0.415522 }, { "acc": 0.84653797, "epoch": 0.7165678390599193, "grad_norm": 9.085859298706055, "learning_rate": 9.550988364953804e-06, "loss": 0.77081766, "memory(GiB)": 34.88, "step": 26465, "train_speed(iter/s)": 0.415525 }, { "acc": 0.87826157, "epoch": 0.7167032193431349, "grad_norm": 6.9531636238098145, "learning_rate": 9.550756568624381e-06, "loss": 0.58407979, "memory(GiB)": 34.88, "step": 26470, "train_speed(iter/s)": 0.415529 }, { "acc": 0.86343498, "epoch": 0.7168385996263504, "grad_norm": 14.305742263793945, "learning_rate": 9.550524715294052e-06, "loss": 0.75461092, "memory(GiB)": 34.88, "step": 26475, "train_speed(iter/s)": 0.415532 }, { "acc": 0.86236057, "epoch": 0.716973979909566, "grad_norm": 19.948627471923828, "learning_rate": 9.55029280496572e-06, "loss": 0.73014517, "memory(GiB)": 34.88, "step": 26480, "train_speed(iter/s)": 0.415536 }, { "acc": 0.87074108, "epoch": 0.7171093601927815, "grad_norm": 3.437851667404175, "learning_rate": 9.550060837642297e-06, "loss": 0.68611259, "memory(GiB)": 34.88, "step": 26485, "train_speed(iter/s)": 0.415539 }, { "acc": 0.86435184, "epoch": 0.7172447404759971, "grad_norm": 5.41709041595459, "learning_rate": 9.549828813326682e-06, "loss": 0.65571351, "memory(GiB)": 34.88, "step": 26490, "train_speed(iter/s)": 0.415541 }, { "acc": 0.86449566, "epoch": 0.7173801207592126, "grad_norm": 13.602827072143555, "learning_rate": 9.549596732021784e-06, "loss": 0.63028269, "memory(GiB)": 34.88, "step": 26495, "train_speed(iter/s)": 0.415544 }, { "acc": 0.87444763, "epoch": 0.7175155010424282, "grad_norm": 7.992375373840332, "learning_rate": 9.549364593730513e-06, "loss": 0.72904539, "memory(GiB)": 34.88, "step": 26500, "train_speed(iter/s)": 0.415547 }, { "acc": 0.87689848, "epoch": 0.7176508813256437, "grad_norm": 10.360726356506348, "learning_rate": 9.549132398455773e-06, "loss": 0.62827415, "memory(GiB)": 34.88, "step": 26505, "train_speed(iter/s)": 0.415551 }, { "acc": 0.88154278, "epoch": 0.7177862616088593, "grad_norm": 10.346905708312988, "learning_rate": 9.548900146200473e-06, "loss": 0.66167374, "memory(GiB)": 34.88, "step": 26510, "train_speed(iter/s)": 0.415554 }, { "acc": 0.86323833, "epoch": 0.7179216418920749, "grad_norm": 8.19463062286377, "learning_rate": 9.548667836967523e-06, "loss": 0.7532999, "memory(GiB)": 34.88, "step": 26515, "train_speed(iter/s)": 0.415558 }, { "acc": 0.87879868, "epoch": 0.7180570221752904, "grad_norm": 9.086414337158203, "learning_rate": 9.548435470759837e-06, "loss": 0.61559415, "memory(GiB)": 34.88, "step": 26520, "train_speed(iter/s)": 0.41556 }, { "acc": 0.87602081, "epoch": 0.7181924024585059, "grad_norm": 13.946104049682617, "learning_rate": 9.548203047580318e-06, "loss": 0.6488503, "memory(GiB)": 34.88, "step": 26525, "train_speed(iter/s)": 0.415564 }, { "acc": 0.87958374, "epoch": 0.7183277827417215, "grad_norm": 12.01218032836914, "learning_rate": 9.547970567431886e-06, "loss": 0.58678732, "memory(GiB)": 34.88, "step": 26530, "train_speed(iter/s)": 0.415567 }, { "acc": 0.83783188, "epoch": 0.7184631630249371, "grad_norm": 10.58093547821045, "learning_rate": 9.54773803031745e-06, "loss": 0.82519913, "memory(GiB)": 34.88, "step": 26535, "train_speed(iter/s)": 0.415571 }, { "acc": 0.87424917, "epoch": 0.7185985433081526, "grad_norm": 10.29200553894043, "learning_rate": 9.54750543623992e-06, "loss": 0.66015196, "memory(GiB)": 34.88, "step": 26540, "train_speed(iter/s)": 0.415574 }, { "acc": 0.89146671, "epoch": 0.7187339235913681, "grad_norm": 15.267006874084473, "learning_rate": 9.547272785202211e-06, "loss": 0.58312883, "memory(GiB)": 34.88, "step": 26545, "train_speed(iter/s)": 0.415578 }, { "acc": 0.87222767, "epoch": 0.7188693038745837, "grad_norm": 18.46726417541504, "learning_rate": 9.54704007720724e-06, "loss": 0.56693449, "memory(GiB)": 34.88, "step": 26550, "train_speed(iter/s)": 0.415581 }, { "acc": 0.88889503, "epoch": 0.7190046841577993, "grad_norm": 6.984385013580322, "learning_rate": 9.546807312257922e-06, "loss": 0.56165485, "memory(GiB)": 34.88, "step": 26555, "train_speed(iter/s)": 0.415585 }, { "acc": 0.87475042, "epoch": 0.7191400644410149, "grad_norm": 12.628193855285645, "learning_rate": 9.54657449035717e-06, "loss": 0.67017879, "memory(GiB)": 34.88, "step": 26560, "train_speed(iter/s)": 0.415588 }, { "acc": 0.8734354, "epoch": 0.7192754447242303, "grad_norm": 7.305230140686035, "learning_rate": 9.546341611507902e-06, "loss": 0.67788024, "memory(GiB)": 34.88, "step": 26565, "train_speed(iter/s)": 0.415592 }, { "acc": 0.83570805, "epoch": 0.7194108250074459, "grad_norm": 8.24914836883545, "learning_rate": 9.546108675713036e-06, "loss": 0.93017817, "memory(GiB)": 34.88, "step": 26570, "train_speed(iter/s)": 0.415595 }, { "acc": 0.88057632, "epoch": 0.7195462052906615, "grad_norm": 4.823047161102295, "learning_rate": 9.545875682975489e-06, "loss": 0.62785597, "memory(GiB)": 34.88, "step": 26575, "train_speed(iter/s)": 0.415599 }, { "acc": 0.85920868, "epoch": 0.7196815855738771, "grad_norm": 13.06846809387207, "learning_rate": 9.545642633298178e-06, "loss": 0.77962575, "memory(GiB)": 34.88, "step": 26580, "train_speed(iter/s)": 0.415602 }, { "acc": 0.87652264, "epoch": 0.7198169658570925, "grad_norm": 12.528088569641113, "learning_rate": 9.545409526684026e-06, "loss": 0.63264861, "memory(GiB)": 34.88, "step": 26585, "train_speed(iter/s)": 0.415605 }, { "acc": 0.86648388, "epoch": 0.7199523461403081, "grad_norm": 6.199300289154053, "learning_rate": 9.545176363135951e-06, "loss": 0.71528907, "memory(GiB)": 34.88, "step": 26590, "train_speed(iter/s)": 0.415608 }, { "acc": 0.87001657, "epoch": 0.7200877264235237, "grad_norm": 12.847658157348633, "learning_rate": 9.544943142656876e-06, "loss": 0.75756569, "memory(GiB)": 34.88, "step": 26595, "train_speed(iter/s)": 0.415611 }, { "acc": 0.88115711, "epoch": 0.7202231067067393, "grad_norm": 9.668926239013672, "learning_rate": 9.544709865249719e-06, "loss": 0.54142032, "memory(GiB)": 34.88, "step": 26600, "train_speed(iter/s)": 0.415615 }, { "acc": 0.86050301, "epoch": 0.7203584869899547, "grad_norm": 20.067970275878906, "learning_rate": 9.544476530917404e-06, "loss": 0.69592705, "memory(GiB)": 34.88, "step": 26605, "train_speed(iter/s)": 0.415618 }, { "acc": 0.85624952, "epoch": 0.7204938672731703, "grad_norm": 7.648916244506836, "learning_rate": 9.544243139662855e-06, "loss": 0.75251684, "memory(GiB)": 34.88, "step": 26610, "train_speed(iter/s)": 0.415622 }, { "acc": 0.88857183, "epoch": 0.7206292475563859, "grad_norm": 7.715373992919922, "learning_rate": 9.544009691488994e-06, "loss": 0.60766459, "memory(GiB)": 34.88, "step": 26615, "train_speed(iter/s)": 0.415625 }, { "acc": 0.86811962, "epoch": 0.7207646278396015, "grad_norm": 9.73232364654541, "learning_rate": 9.543776186398746e-06, "loss": 0.63440251, "memory(GiB)": 34.88, "step": 26620, "train_speed(iter/s)": 0.415629 }, { "acc": 0.86337833, "epoch": 0.720900008122817, "grad_norm": 11.856888771057129, "learning_rate": 9.543542624395037e-06, "loss": 0.72513638, "memory(GiB)": 34.88, "step": 26625, "train_speed(iter/s)": 0.415632 }, { "acc": 0.88537054, "epoch": 0.7210353884060325, "grad_norm": 4.3098835945129395, "learning_rate": 9.543309005480793e-06, "loss": 0.55581341, "memory(GiB)": 34.88, "step": 26630, "train_speed(iter/s)": 0.415635 }, { "acc": 0.86233444, "epoch": 0.7211707686892481, "grad_norm": 10.245089530944824, "learning_rate": 9.543075329658938e-06, "loss": 0.68884115, "memory(GiB)": 34.88, "step": 26635, "train_speed(iter/s)": 0.415639 }, { "acc": 0.88787031, "epoch": 0.7213061489724637, "grad_norm": 13.326086044311523, "learning_rate": 9.5428415969324e-06, "loss": 0.56312594, "memory(GiB)": 34.88, "step": 26640, "train_speed(iter/s)": 0.415642 }, { "acc": 0.87130871, "epoch": 0.7214415292556792, "grad_norm": 10.362439155578613, "learning_rate": 9.54260780730411e-06, "loss": 0.70855136, "memory(GiB)": 34.88, "step": 26645, "train_speed(iter/s)": 0.415646 }, { "acc": 0.87398415, "epoch": 0.7215769095388948, "grad_norm": 9.780828475952148, "learning_rate": 9.542373960776993e-06, "loss": 0.62883739, "memory(GiB)": 34.88, "step": 26650, "train_speed(iter/s)": 0.415649 }, { "acc": 0.87119484, "epoch": 0.7217122898221103, "grad_norm": 12.283754348754883, "learning_rate": 9.54214005735398e-06, "loss": 0.65011425, "memory(GiB)": 34.88, "step": 26655, "train_speed(iter/s)": 0.415653 }, { "acc": 0.8596571, "epoch": 0.7218476701053259, "grad_norm": 14.246658325195312, "learning_rate": 9.541906097038001e-06, "loss": 0.80241852, "memory(GiB)": 34.88, "step": 26660, "train_speed(iter/s)": 0.415656 }, { "acc": 0.891008, "epoch": 0.7219830503885414, "grad_norm": 6.7326884269714355, "learning_rate": 9.541672079831988e-06, "loss": 0.5382205, "memory(GiB)": 34.88, "step": 26665, "train_speed(iter/s)": 0.41566 }, { "acc": 0.86031876, "epoch": 0.722118430671757, "grad_norm": 7.309505462646484, "learning_rate": 9.54143800573887e-06, "loss": 0.78467484, "memory(GiB)": 34.88, "step": 26670, "train_speed(iter/s)": 0.415662 }, { "acc": 0.88157339, "epoch": 0.7222538109549725, "grad_norm": 9.662833213806152, "learning_rate": 9.541203874761581e-06, "loss": 0.6636107, "memory(GiB)": 34.88, "step": 26675, "train_speed(iter/s)": 0.415665 }, { "acc": 0.89355974, "epoch": 0.7223891912381881, "grad_norm": 12.341447830200195, "learning_rate": 9.540969686903053e-06, "loss": 0.56013088, "memory(GiB)": 34.88, "step": 26680, "train_speed(iter/s)": 0.415669 }, { "acc": 0.85669975, "epoch": 0.7225245715214036, "grad_norm": 9.10291576385498, "learning_rate": 9.540735442166222e-06, "loss": 0.68411827, "memory(GiB)": 34.88, "step": 26685, "train_speed(iter/s)": 0.415672 }, { "acc": 0.88539619, "epoch": 0.7226599518046192, "grad_norm": 11.366621017456055, "learning_rate": 9.540501140554019e-06, "loss": 0.56367426, "memory(GiB)": 34.88, "step": 26690, "train_speed(iter/s)": 0.415675 }, { "acc": 0.8869194, "epoch": 0.7227953320878348, "grad_norm": 14.218141555786133, "learning_rate": 9.54026678206938e-06, "loss": 0.61271353, "memory(GiB)": 34.88, "step": 26695, "train_speed(iter/s)": 0.415678 }, { "acc": 0.87721434, "epoch": 0.7229307123710503, "grad_norm": 10.150455474853516, "learning_rate": 9.540032366715243e-06, "loss": 0.60664396, "memory(GiB)": 34.88, "step": 26700, "train_speed(iter/s)": 0.415682 }, { "acc": 0.86436768, "epoch": 0.7230660926542658, "grad_norm": 6.974517345428467, "learning_rate": 9.539797894494543e-06, "loss": 0.72173243, "memory(GiB)": 34.88, "step": 26705, "train_speed(iter/s)": 0.415685 }, { "acc": 0.88050385, "epoch": 0.7232014729374814, "grad_norm": 8.277486801147461, "learning_rate": 9.539563365410216e-06, "loss": 0.63297153, "memory(GiB)": 34.88, "step": 26710, "train_speed(iter/s)": 0.415688 }, { "acc": 0.88181324, "epoch": 0.723336853220697, "grad_norm": 11.263350486755371, "learning_rate": 9.5393287794652e-06, "loss": 0.58369761, "memory(GiB)": 34.88, "step": 26715, "train_speed(iter/s)": 0.415692 }, { "acc": 0.88252888, "epoch": 0.7234722335039125, "grad_norm": 9.559792518615723, "learning_rate": 9.539094136662437e-06, "loss": 0.60967216, "memory(GiB)": 34.88, "step": 26720, "train_speed(iter/s)": 0.415695 }, { "acc": 0.85684185, "epoch": 0.723607613787128, "grad_norm": 8.618300437927246, "learning_rate": 9.538859437004864e-06, "loss": 0.75372972, "memory(GiB)": 34.88, "step": 26725, "train_speed(iter/s)": 0.415698 }, { "acc": 0.87213287, "epoch": 0.7237429940703436, "grad_norm": 7.209364891052246, "learning_rate": 9.53862468049542e-06, "loss": 0.73580546, "memory(GiB)": 34.88, "step": 26730, "train_speed(iter/s)": 0.415702 }, { "acc": 0.85508509, "epoch": 0.7238783743535592, "grad_norm": 13.775870323181152, "learning_rate": 9.538389867137047e-06, "loss": 0.71436024, "memory(GiB)": 34.88, "step": 26735, "train_speed(iter/s)": 0.415705 }, { "acc": 0.88636885, "epoch": 0.7240137546367748, "grad_norm": 4.313376426696777, "learning_rate": 9.538154996932687e-06, "loss": 0.53528824, "memory(GiB)": 34.88, "step": 26740, "train_speed(iter/s)": 0.415708 }, { "acc": 0.854426, "epoch": 0.7241491349199902, "grad_norm": 7.4061713218688965, "learning_rate": 9.537920069885282e-06, "loss": 0.71055679, "memory(GiB)": 34.88, "step": 26745, "train_speed(iter/s)": 0.415711 }, { "acc": 0.8812541, "epoch": 0.7242845152032058, "grad_norm": 8.472341537475586, "learning_rate": 9.537685085997774e-06, "loss": 0.6589314, "memory(GiB)": 34.88, "step": 26750, "train_speed(iter/s)": 0.415713 }, { "acc": 0.87231503, "epoch": 0.7244198954864214, "grad_norm": 7.4003801345825195, "learning_rate": 9.537450045273108e-06, "loss": 0.63298821, "memory(GiB)": 34.88, "step": 26755, "train_speed(iter/s)": 0.415716 }, { "acc": 0.85928802, "epoch": 0.724555275769637, "grad_norm": 7.52108097076416, "learning_rate": 9.537214947714227e-06, "loss": 0.70551286, "memory(GiB)": 34.88, "step": 26760, "train_speed(iter/s)": 0.415719 }, { "acc": 0.87823362, "epoch": 0.7246906560528524, "grad_norm": 6.7547125816345215, "learning_rate": 9.536979793324077e-06, "loss": 0.67329445, "memory(GiB)": 34.88, "step": 26765, "train_speed(iter/s)": 0.415722 }, { "acc": 0.8548852, "epoch": 0.724826036336068, "grad_norm": 6.6376261711120605, "learning_rate": 9.536744582105602e-06, "loss": 0.71996794, "memory(GiB)": 34.88, "step": 26770, "train_speed(iter/s)": 0.415725 }, { "acc": 0.88065662, "epoch": 0.7249614166192836, "grad_norm": 11.460293769836426, "learning_rate": 9.536509314061752e-06, "loss": 0.51433392, "memory(GiB)": 34.88, "step": 26775, "train_speed(iter/s)": 0.415729 }, { "acc": 0.88582668, "epoch": 0.7250967969024992, "grad_norm": 46.81578063964844, "learning_rate": 9.536273989195472e-06, "loss": 0.68525681, "memory(GiB)": 34.88, "step": 26780, "train_speed(iter/s)": 0.415733 }, { "acc": 0.87952919, "epoch": 0.7252321771857146, "grad_norm": 6.7628493309021, "learning_rate": 9.53603860750971e-06, "loss": 0.63701544, "memory(GiB)": 34.88, "step": 26785, "train_speed(iter/s)": 0.415736 }, { "acc": 0.86940289, "epoch": 0.7253675574689302, "grad_norm": 9.454545974731445, "learning_rate": 9.535803169007413e-06, "loss": 0.67351713, "memory(GiB)": 34.88, "step": 26790, "train_speed(iter/s)": 0.415739 }, { "acc": 0.87140312, "epoch": 0.7255029377521458, "grad_norm": 33.76580047607422, "learning_rate": 9.535567673691533e-06, "loss": 0.65966411, "memory(GiB)": 34.88, "step": 26795, "train_speed(iter/s)": 0.415743 }, { "acc": 0.90013189, "epoch": 0.7256383180353614, "grad_norm": 8.90019416809082, "learning_rate": 9.535332121565018e-06, "loss": 0.53637938, "memory(GiB)": 34.88, "step": 26800, "train_speed(iter/s)": 0.415746 }, { "acc": 0.86084623, "epoch": 0.7257736983185769, "grad_norm": 14.6757230758667, "learning_rate": 9.53509651263082e-06, "loss": 0.66066742, "memory(GiB)": 34.88, "step": 26805, "train_speed(iter/s)": 0.415749 }, { "acc": 0.85514736, "epoch": 0.7259090786017924, "grad_norm": 13.377662658691406, "learning_rate": 9.53486084689189e-06, "loss": 0.86199093, "memory(GiB)": 34.88, "step": 26810, "train_speed(iter/s)": 0.415753 }, { "acc": 0.88280373, "epoch": 0.726044458885008, "grad_norm": 5.587002277374268, "learning_rate": 9.534625124351183e-06, "loss": 0.54433479, "memory(GiB)": 34.88, "step": 26815, "train_speed(iter/s)": 0.415756 }, { "acc": 0.86252441, "epoch": 0.7261798391682236, "grad_norm": 7.77592658996582, "learning_rate": 9.534389345011647e-06, "loss": 0.6407877, "memory(GiB)": 34.88, "step": 26820, "train_speed(iter/s)": 0.41576 }, { "acc": 0.88749504, "epoch": 0.7263152194514391, "grad_norm": 6.805294036865234, "learning_rate": 9.53415350887624e-06, "loss": 0.56003761, "memory(GiB)": 34.88, "step": 26825, "train_speed(iter/s)": 0.415763 }, { "acc": 0.86633377, "epoch": 0.7264505997346546, "grad_norm": 9.089296340942383, "learning_rate": 9.533917615947912e-06, "loss": 0.70087123, "memory(GiB)": 34.88, "step": 26830, "train_speed(iter/s)": 0.415766 }, { "acc": 0.87819061, "epoch": 0.7265859800178702, "grad_norm": 44.79900360107422, "learning_rate": 9.533681666229619e-06, "loss": 0.76207781, "memory(GiB)": 34.88, "step": 26835, "train_speed(iter/s)": 0.41577 }, { "acc": 0.83086338, "epoch": 0.7267213603010857, "grad_norm": 7.035587787628174, "learning_rate": 9.53344565972432e-06, "loss": 0.86134472, "memory(GiB)": 34.88, "step": 26840, "train_speed(iter/s)": 0.415772 }, { "acc": 0.8863821, "epoch": 0.7268567405843013, "grad_norm": 18.078025817871094, "learning_rate": 9.533209596434968e-06, "loss": 0.57759829, "memory(GiB)": 34.88, "step": 26845, "train_speed(iter/s)": 0.415775 }, { "acc": 0.87513809, "epoch": 0.7269921208675169, "grad_norm": 11.65742015838623, "learning_rate": 9.532973476364524e-06, "loss": 0.69964314, "memory(GiB)": 34.88, "step": 26850, "train_speed(iter/s)": 0.415778 }, { "acc": 0.86925201, "epoch": 0.7271275011507324, "grad_norm": 8.825989723205566, "learning_rate": 9.532737299515943e-06, "loss": 0.68826299, "memory(GiB)": 34.88, "step": 26855, "train_speed(iter/s)": 0.415782 }, { "acc": 0.85413723, "epoch": 0.7272628814339479, "grad_norm": 17.877826690673828, "learning_rate": 9.532501065892182e-06, "loss": 0.77378578, "memory(GiB)": 34.88, "step": 26860, "train_speed(iter/s)": 0.415785 }, { "acc": 0.84624605, "epoch": 0.7273982617171635, "grad_norm": 11.329801559448242, "learning_rate": 9.532264775496202e-06, "loss": 0.79066157, "memory(GiB)": 34.88, "step": 26865, "train_speed(iter/s)": 0.415788 }, { "acc": 0.89197741, "epoch": 0.7275336420003791, "grad_norm": 4.982966423034668, "learning_rate": 9.532028428330966e-06, "loss": 0.52460241, "memory(GiB)": 34.88, "step": 26870, "train_speed(iter/s)": 0.415791 }, { "acc": 0.91396885, "epoch": 0.7276690222835946, "grad_norm": 6.2354278564453125, "learning_rate": 9.53179202439943e-06, "loss": 0.4187407, "memory(GiB)": 34.88, "step": 26875, "train_speed(iter/s)": 0.415794 }, { "acc": 0.88324738, "epoch": 0.7278044025668101, "grad_norm": 11.335138320922852, "learning_rate": 9.531555563704559e-06, "loss": 0.63201056, "memory(GiB)": 34.88, "step": 26880, "train_speed(iter/s)": 0.415798 }, { "acc": 0.87496071, "epoch": 0.7279397828500257, "grad_norm": 7.395201206207275, "learning_rate": 9.531319046249312e-06, "loss": 0.64030905, "memory(GiB)": 34.88, "step": 26885, "train_speed(iter/s)": 0.415801 }, { "acc": 0.89215069, "epoch": 0.7280751631332413, "grad_norm": 7.034400463104248, "learning_rate": 9.531082472036654e-06, "loss": 0.52865915, "memory(GiB)": 34.88, "step": 26890, "train_speed(iter/s)": 0.415804 }, { "acc": 0.86827021, "epoch": 0.7282105434164569, "grad_norm": 12.582058906555176, "learning_rate": 9.530845841069547e-06, "loss": 0.70529308, "memory(GiB)": 34.88, "step": 26895, "train_speed(iter/s)": 0.415808 }, { "acc": 0.86812105, "epoch": 0.7283459236996723, "grad_norm": 10.051079750061035, "learning_rate": 9.530609153350957e-06, "loss": 0.63530464, "memory(GiB)": 34.88, "step": 26900, "train_speed(iter/s)": 0.415811 }, { "acc": 0.87849712, "epoch": 0.7284813039828879, "grad_norm": 7.405880451202393, "learning_rate": 9.530372408883849e-06, "loss": 0.56837807, "memory(GiB)": 34.88, "step": 26905, "train_speed(iter/s)": 0.415814 }, { "acc": 0.86983271, "epoch": 0.7286166842661035, "grad_norm": 10.392515182495117, "learning_rate": 9.530135607671187e-06, "loss": 0.67894754, "memory(GiB)": 34.88, "step": 26910, "train_speed(iter/s)": 0.415817 }, { "acc": 0.86110725, "epoch": 0.7287520645493191, "grad_norm": 6.7124457359313965, "learning_rate": 9.529898749715939e-06, "loss": 0.74438658, "memory(GiB)": 34.88, "step": 26915, "train_speed(iter/s)": 0.41582 }, { "acc": 0.84529991, "epoch": 0.7288874448325345, "grad_norm": 9.468565940856934, "learning_rate": 9.529661835021071e-06, "loss": 0.84340134, "memory(GiB)": 34.88, "step": 26920, "train_speed(iter/s)": 0.415823 }, { "acc": 0.88809071, "epoch": 0.7290228251157501, "grad_norm": 6.484241962432861, "learning_rate": 9.529424863589551e-06, "loss": 0.51228514, "memory(GiB)": 34.88, "step": 26925, "train_speed(iter/s)": 0.415827 }, { "acc": 0.88106737, "epoch": 0.7291582053989657, "grad_norm": 26.4502010345459, "learning_rate": 9.529187835424347e-06, "loss": 0.59146881, "memory(GiB)": 34.88, "step": 26930, "train_speed(iter/s)": 0.41583 }, { "acc": 0.85386963, "epoch": 0.7292935856821813, "grad_norm": 6.181041240692139, "learning_rate": 9.52895075052843e-06, "loss": 0.8527441, "memory(GiB)": 34.88, "step": 26935, "train_speed(iter/s)": 0.415834 }, { "acc": 0.86404724, "epoch": 0.7294289659653967, "grad_norm": 9.247445106506348, "learning_rate": 9.528713608904769e-06, "loss": 0.73541961, "memory(GiB)": 34.88, "step": 26940, "train_speed(iter/s)": 0.415837 }, { "acc": 0.85702553, "epoch": 0.7295643462486123, "grad_norm": 22.849037170410156, "learning_rate": 9.528476410556336e-06, "loss": 0.71322851, "memory(GiB)": 34.88, "step": 26945, "train_speed(iter/s)": 0.415841 }, { "acc": 0.8731616, "epoch": 0.7296997265318279, "grad_norm": 13.30410385131836, "learning_rate": 9.528239155486099e-06, "loss": 0.72011881, "memory(GiB)": 34.88, "step": 26950, "train_speed(iter/s)": 0.415844 }, { "acc": 0.87738152, "epoch": 0.7298351068150435, "grad_norm": 6.704305171966553, "learning_rate": 9.528001843697032e-06, "loss": 0.56353755, "memory(GiB)": 34.88, "step": 26955, "train_speed(iter/s)": 0.415848 }, { "acc": 0.86013117, "epoch": 0.729970487098259, "grad_norm": 5.339773654937744, "learning_rate": 9.52776447519211e-06, "loss": 0.72732797, "memory(GiB)": 34.88, "step": 26960, "train_speed(iter/s)": 0.415851 }, { "acc": 0.86701794, "epoch": 0.7301058673814745, "grad_norm": 11.822914123535156, "learning_rate": 9.527527049974302e-06, "loss": 0.72874494, "memory(GiB)": 34.88, "step": 26965, "train_speed(iter/s)": 0.415854 }, { "acc": 0.83329773, "epoch": 0.7302412476646901, "grad_norm": 10.132108688354492, "learning_rate": 9.527289568046586e-06, "loss": 0.87685852, "memory(GiB)": 34.88, "step": 26970, "train_speed(iter/s)": 0.415857 }, { "acc": 0.86167488, "epoch": 0.7303766279479057, "grad_norm": 14.061503410339355, "learning_rate": 9.527052029411934e-06, "loss": 0.72991123, "memory(GiB)": 34.88, "step": 26975, "train_speed(iter/s)": 0.41586 }, { "acc": 0.85478191, "epoch": 0.7305120082311212, "grad_norm": 7.651655673980713, "learning_rate": 9.526814434073325e-06, "loss": 0.76823149, "memory(GiB)": 34.88, "step": 26980, "train_speed(iter/s)": 0.415863 }, { "acc": 0.88042774, "epoch": 0.7306473885143367, "grad_norm": 8.0142183303833, "learning_rate": 9.526576782033732e-06, "loss": 0.56136227, "memory(GiB)": 34.88, "step": 26985, "train_speed(iter/s)": 0.415867 }, { "acc": 0.85063095, "epoch": 0.7307827687975523, "grad_norm": 9.698199272155762, "learning_rate": 9.526339073296135e-06, "loss": 0.90636024, "memory(GiB)": 34.88, "step": 26990, "train_speed(iter/s)": 0.41587 }, { "acc": 0.84288998, "epoch": 0.7309181490807679, "grad_norm": 5.974401950836182, "learning_rate": 9.52610130786351e-06, "loss": 0.83606758, "memory(GiB)": 34.88, "step": 26995, "train_speed(iter/s)": 0.415873 }, { "acc": 0.88400068, "epoch": 0.7310535293639834, "grad_norm": 12.528116226196289, "learning_rate": 9.525863485738835e-06, "loss": 0.56247225, "memory(GiB)": 34.88, "step": 27000, "train_speed(iter/s)": 0.415877 }, { "acc": 0.88302689, "epoch": 0.731188909647199, "grad_norm": 8.691643714904785, "learning_rate": 9.525625606925091e-06, "loss": 0.61402016, "memory(GiB)": 34.88, "step": 27005, "train_speed(iter/s)": 0.41588 }, { "acc": 0.85404654, "epoch": 0.7313242899304145, "grad_norm": 9.001471519470215, "learning_rate": 9.525387671425256e-06, "loss": 0.79931755, "memory(GiB)": 34.88, "step": 27010, "train_speed(iter/s)": 0.415883 }, { "acc": 0.88737221, "epoch": 0.7314596702136301, "grad_norm": 7.102478981018066, "learning_rate": 9.52514967924231e-06, "loss": 0.62356009, "memory(GiB)": 34.88, "step": 27015, "train_speed(iter/s)": 0.415886 }, { "acc": 0.86909332, "epoch": 0.7315950504968456, "grad_norm": 6.2942585945129395, "learning_rate": 9.524911630379239e-06, "loss": 0.65642414, "memory(GiB)": 34.88, "step": 27020, "train_speed(iter/s)": 0.41589 }, { "acc": 0.87185364, "epoch": 0.7317304307800612, "grad_norm": 7.213929176330566, "learning_rate": 9.52467352483902e-06, "loss": 0.6415606, "memory(GiB)": 34.88, "step": 27025, "train_speed(iter/s)": 0.415892 }, { "acc": 0.87590618, "epoch": 0.7318658110632767, "grad_norm": 7.901270389556885, "learning_rate": 9.52443536262464e-06, "loss": 0.63861408, "memory(GiB)": 34.88, "step": 27030, "train_speed(iter/s)": 0.415895 }, { "acc": 0.88177986, "epoch": 0.7320011913464923, "grad_norm": 9.454832077026367, "learning_rate": 9.524197143739077e-06, "loss": 0.52772579, "memory(GiB)": 34.88, "step": 27035, "train_speed(iter/s)": 0.415898 }, { "acc": 0.87748566, "epoch": 0.7321365716297078, "grad_norm": 13.99381160736084, "learning_rate": 9.52395886818532e-06, "loss": 0.64332385, "memory(GiB)": 34.88, "step": 27040, "train_speed(iter/s)": 0.415902 }, { "acc": 0.84571419, "epoch": 0.7322719519129234, "grad_norm": 34.16525650024414, "learning_rate": 9.523720535966352e-06, "loss": 0.80875034, "memory(GiB)": 34.88, "step": 27045, "train_speed(iter/s)": 0.415905 }, { "acc": 0.87485237, "epoch": 0.732407332196139, "grad_norm": 5.175162315368652, "learning_rate": 9.523482147085157e-06, "loss": 0.7395741, "memory(GiB)": 34.88, "step": 27050, "train_speed(iter/s)": 0.415909 }, { "acc": 0.87207079, "epoch": 0.7325427124793545, "grad_norm": 13.6865873336792, "learning_rate": 9.523243701544725e-06, "loss": 0.64973221, "memory(GiB)": 34.88, "step": 27055, "train_speed(iter/s)": 0.415912 }, { "acc": 0.88904161, "epoch": 0.73267809276257, "grad_norm": 5.83131742477417, "learning_rate": 9.52300519934804e-06, "loss": 0.53550601, "memory(GiB)": 34.88, "step": 27060, "train_speed(iter/s)": 0.415915 }, { "acc": 0.8395895, "epoch": 0.7328134730457856, "grad_norm": 9.912847518920898, "learning_rate": 9.522766640498092e-06, "loss": 0.89218502, "memory(GiB)": 34.88, "step": 27065, "train_speed(iter/s)": 0.415918 }, { "acc": 0.88845539, "epoch": 0.7329488533290012, "grad_norm": 11.408242225646973, "learning_rate": 9.522528024997867e-06, "loss": 0.55238132, "memory(GiB)": 34.88, "step": 27070, "train_speed(iter/s)": 0.415922 }, { "acc": 0.8826025, "epoch": 0.7330842336122168, "grad_norm": 12.976783752441406, "learning_rate": 9.522289352850355e-06, "loss": 0.60886974, "memory(GiB)": 34.88, "step": 27075, "train_speed(iter/s)": 0.415925 }, { "acc": 0.84774637, "epoch": 0.7332196138954322, "grad_norm": 8.315001487731934, "learning_rate": 9.522050624058547e-06, "loss": 0.79251509, "memory(GiB)": 34.88, "step": 27080, "train_speed(iter/s)": 0.415928 }, { "acc": 0.85759544, "epoch": 0.7333549941786478, "grad_norm": 18.85111427307129, "learning_rate": 9.52181183862543e-06, "loss": 0.83025494, "memory(GiB)": 34.88, "step": 27085, "train_speed(iter/s)": 0.415931 }, { "acc": 0.87163258, "epoch": 0.7334903744618634, "grad_norm": 5.672327041625977, "learning_rate": 9.521572996554e-06, "loss": 0.58835878, "memory(GiB)": 34.88, "step": 27090, "train_speed(iter/s)": 0.415934 }, { "acc": 0.88067541, "epoch": 0.733625754745079, "grad_norm": 7.477010726928711, "learning_rate": 9.521334097847248e-06, "loss": 0.56281123, "memory(GiB)": 34.88, "step": 27095, "train_speed(iter/s)": 0.415937 }, { "acc": 0.87557383, "epoch": 0.7337611350282944, "grad_norm": 5.583116054534912, "learning_rate": 9.521095142508163e-06, "loss": 0.65732546, "memory(GiB)": 34.88, "step": 27100, "train_speed(iter/s)": 0.415941 }, { "acc": 0.86101589, "epoch": 0.73389651531151, "grad_norm": 7.825833797454834, "learning_rate": 9.520856130539743e-06, "loss": 0.64912634, "memory(GiB)": 34.88, "step": 27105, "train_speed(iter/s)": 0.415944 }, { "acc": 0.88337307, "epoch": 0.7340318955947256, "grad_norm": 6.332833290100098, "learning_rate": 9.520617061944978e-06, "loss": 0.59079967, "memory(GiB)": 34.88, "step": 27110, "train_speed(iter/s)": 0.415947 }, { "acc": 0.89797859, "epoch": 0.7341672758779412, "grad_norm": 11.469096183776855, "learning_rate": 9.520377936726865e-06, "loss": 0.50745764, "memory(GiB)": 34.88, "step": 27115, "train_speed(iter/s)": 0.415951 }, { "acc": 0.85435534, "epoch": 0.7343026561611566, "grad_norm": 8.803529739379883, "learning_rate": 9.520138754888402e-06, "loss": 0.78410001, "memory(GiB)": 34.88, "step": 27120, "train_speed(iter/s)": 0.415954 }, { "acc": 0.84720945, "epoch": 0.7344380364443722, "grad_norm": 21.206340789794922, "learning_rate": 9.519899516432579e-06, "loss": 0.92402172, "memory(GiB)": 34.88, "step": 27125, "train_speed(iter/s)": 0.415958 }, { "acc": 0.86982956, "epoch": 0.7345734167275878, "grad_norm": 7.481003761291504, "learning_rate": 9.5196602213624e-06, "loss": 0.60997429, "memory(GiB)": 34.88, "step": 27130, "train_speed(iter/s)": 0.415961 }, { "acc": 0.87378941, "epoch": 0.7347087970108034, "grad_norm": 18.009429931640625, "learning_rate": 9.519420869680855e-06, "loss": 0.70013146, "memory(GiB)": 34.88, "step": 27135, "train_speed(iter/s)": 0.415964 }, { "acc": 0.87330894, "epoch": 0.7348441772940189, "grad_norm": 14.40597152709961, "learning_rate": 9.519181461390949e-06, "loss": 0.75626516, "memory(GiB)": 34.88, "step": 27140, "train_speed(iter/s)": 0.415968 }, { "acc": 0.84042168, "epoch": 0.7349795575772344, "grad_norm": 10.836441993713379, "learning_rate": 9.518941996495679e-06, "loss": 0.85340195, "memory(GiB)": 34.88, "step": 27145, "train_speed(iter/s)": 0.415971 }, { "acc": 0.88498049, "epoch": 0.73511493786045, "grad_norm": 11.559592247009277, "learning_rate": 9.518702474998043e-06, "loss": 0.60610342, "memory(GiB)": 34.88, "step": 27150, "train_speed(iter/s)": 0.415974 }, { "acc": 0.88199387, "epoch": 0.7352503181436656, "grad_norm": 8.255788803100586, "learning_rate": 9.518462896901043e-06, "loss": 0.60011821, "memory(GiB)": 34.88, "step": 27155, "train_speed(iter/s)": 0.415977 }, { "acc": 0.86249409, "epoch": 0.7353856984268811, "grad_norm": 9.333257675170898, "learning_rate": 9.518223262207681e-06, "loss": 0.67187338, "memory(GiB)": 34.88, "step": 27160, "train_speed(iter/s)": 0.415981 }, { "acc": 0.85451889, "epoch": 0.7355210787100966, "grad_norm": 9.667875289916992, "learning_rate": 9.517983570920957e-06, "loss": 0.75795431, "memory(GiB)": 34.88, "step": 27165, "train_speed(iter/s)": 0.415984 }, { "acc": 0.86856213, "epoch": 0.7356564589933122, "grad_norm": 2.8668909072875977, "learning_rate": 9.517743823043873e-06, "loss": 0.64884624, "memory(GiB)": 34.88, "step": 27170, "train_speed(iter/s)": 0.415987 }, { "acc": 0.88626003, "epoch": 0.7357918392765278, "grad_norm": 7.578988075256348, "learning_rate": 9.517504018579435e-06, "loss": 0.51857748, "memory(GiB)": 34.88, "step": 27175, "train_speed(iter/s)": 0.41599 }, { "acc": 0.87308121, "epoch": 0.7359272195597433, "grad_norm": 5.760814189910889, "learning_rate": 9.517264157530646e-06, "loss": 0.62041821, "memory(GiB)": 34.88, "step": 27180, "train_speed(iter/s)": 0.415993 }, { "acc": 0.85674362, "epoch": 0.7360625998429589, "grad_norm": 21.932661056518555, "learning_rate": 9.51702423990051e-06, "loss": 0.73749332, "memory(GiB)": 34.88, "step": 27185, "train_speed(iter/s)": 0.415996 }, { "acc": 0.90468216, "epoch": 0.7361979801261744, "grad_norm": 17.888103485107422, "learning_rate": 9.516784265692033e-06, "loss": 0.4719677, "memory(GiB)": 34.88, "step": 27190, "train_speed(iter/s)": 0.416 }, { "acc": 0.84541473, "epoch": 0.73633336040939, "grad_norm": 19.957141876220703, "learning_rate": 9.516544234908221e-06, "loss": 0.86942596, "memory(GiB)": 34.88, "step": 27195, "train_speed(iter/s)": 0.416003 }, { "acc": 0.85159941, "epoch": 0.7364687406926055, "grad_norm": 9.670356750488281, "learning_rate": 9.516304147552082e-06, "loss": 0.81394978, "memory(GiB)": 34.88, "step": 27200, "train_speed(iter/s)": 0.416006 }, { "acc": 0.85179415, "epoch": 0.7366041209758211, "grad_norm": 11.583863258361816, "learning_rate": 9.516064003626622e-06, "loss": 0.75656466, "memory(GiB)": 34.88, "step": 27205, "train_speed(iter/s)": 0.416009 }, { "acc": 0.84190617, "epoch": 0.7367395012590366, "grad_norm": 10.60391902923584, "learning_rate": 9.515823803134849e-06, "loss": 0.80721092, "memory(GiB)": 34.88, "step": 27210, "train_speed(iter/s)": 0.416012 }, { "acc": 0.85904694, "epoch": 0.7368748815422522, "grad_norm": 6.796594142913818, "learning_rate": 9.515583546079774e-06, "loss": 0.73718443, "memory(GiB)": 34.88, "step": 27215, "train_speed(iter/s)": 0.416015 }, { "acc": 0.8631319, "epoch": 0.7370102618254677, "grad_norm": 4.397488594055176, "learning_rate": 9.515343232464404e-06, "loss": 0.69044733, "memory(GiB)": 34.88, "step": 27220, "train_speed(iter/s)": 0.416019 }, { "acc": 0.85869713, "epoch": 0.7371456421086833, "grad_norm": 12.310370445251465, "learning_rate": 9.515102862291751e-06, "loss": 0.64950533, "memory(GiB)": 34.88, "step": 27225, "train_speed(iter/s)": 0.416022 }, { "acc": 0.85766392, "epoch": 0.7372810223918989, "grad_norm": 9.602890968322754, "learning_rate": 9.514862435564828e-06, "loss": 0.79573898, "memory(GiB)": 34.88, "step": 27230, "train_speed(iter/s)": 0.416026 }, { "acc": 0.86259108, "epoch": 0.7374164026751144, "grad_norm": 13.213896751403809, "learning_rate": 9.514621952286643e-06, "loss": 0.76494246, "memory(GiB)": 34.88, "step": 27235, "train_speed(iter/s)": 0.416029 }, { "acc": 0.89312153, "epoch": 0.7375517829583299, "grad_norm": 8.080577850341797, "learning_rate": 9.514381412460211e-06, "loss": 0.47715554, "memory(GiB)": 34.88, "step": 27240, "train_speed(iter/s)": 0.416033 }, { "acc": 0.90059338, "epoch": 0.7376871632415455, "grad_norm": 6.825869083404541, "learning_rate": 9.514140816088545e-06, "loss": 0.47009974, "memory(GiB)": 34.88, "step": 27245, "train_speed(iter/s)": 0.416036 }, { "acc": 0.85672846, "epoch": 0.7378225435247611, "grad_norm": 10.960538864135742, "learning_rate": 9.513900163174659e-06, "loss": 0.81853609, "memory(GiB)": 34.88, "step": 27250, "train_speed(iter/s)": 0.41604 }, { "acc": 0.89131794, "epoch": 0.7379579238079766, "grad_norm": 7.726433277130127, "learning_rate": 9.513659453721565e-06, "loss": 0.55578423, "memory(GiB)": 34.88, "step": 27255, "train_speed(iter/s)": 0.416043 }, { "acc": 0.8947319, "epoch": 0.7380933040911921, "grad_norm": 5.84358024597168, "learning_rate": 9.51341868773228e-06, "loss": 0.51888218, "memory(GiB)": 34.88, "step": 27260, "train_speed(iter/s)": 0.416046 }, { "acc": 0.85835762, "epoch": 0.7382286843744077, "grad_norm": 10.214873313903809, "learning_rate": 9.513177865209823e-06, "loss": 0.77764902, "memory(GiB)": 34.88, "step": 27265, "train_speed(iter/s)": 0.416049 }, { "acc": 0.84586887, "epoch": 0.7383640646576233, "grad_norm": 7.9771270751953125, "learning_rate": 9.512936986157208e-06, "loss": 0.87109718, "memory(GiB)": 34.88, "step": 27270, "train_speed(iter/s)": 0.416052 }, { "acc": 0.85578032, "epoch": 0.7384994449408389, "grad_norm": 8.76827621459961, "learning_rate": 9.51269605057745e-06, "loss": 0.77481713, "memory(GiB)": 34.88, "step": 27275, "train_speed(iter/s)": 0.416054 }, { "acc": 0.87208719, "epoch": 0.7386348252240543, "grad_norm": 5.932474136352539, "learning_rate": 9.512455058473574e-06, "loss": 0.62764282, "memory(GiB)": 34.88, "step": 27280, "train_speed(iter/s)": 0.416058 }, { "acc": 0.87142248, "epoch": 0.7387702055072699, "grad_norm": 7.919497013092041, "learning_rate": 9.512214009848595e-06, "loss": 0.63468218, "memory(GiB)": 34.88, "step": 27285, "train_speed(iter/s)": 0.416061 }, { "acc": 0.8803133, "epoch": 0.7389055857904855, "grad_norm": 78.4383773803711, "learning_rate": 9.511972904705528e-06, "loss": 0.62352076, "memory(GiB)": 34.88, "step": 27290, "train_speed(iter/s)": 0.416065 }, { "acc": 0.87517872, "epoch": 0.7390409660737011, "grad_norm": 11.528545379638672, "learning_rate": 9.5117317430474e-06, "loss": 0.65374284, "memory(GiB)": 34.88, "step": 27295, "train_speed(iter/s)": 0.416068 }, { "acc": 0.86289597, "epoch": 0.7391763463569165, "grad_norm": 32.618072509765625, "learning_rate": 9.511490524877232e-06, "loss": 0.79034929, "memory(GiB)": 34.88, "step": 27300, "train_speed(iter/s)": 0.416071 }, { "acc": 0.86496983, "epoch": 0.7393117266401321, "grad_norm": 17.64775276184082, "learning_rate": 9.51124925019804e-06, "loss": 0.7254261, "memory(GiB)": 34.88, "step": 27305, "train_speed(iter/s)": 0.416074 }, { "acc": 0.85896988, "epoch": 0.7394471069233477, "grad_norm": 9.910796165466309, "learning_rate": 9.511007919012853e-06, "loss": 0.7250484, "memory(GiB)": 34.88, "step": 27310, "train_speed(iter/s)": 0.416077 }, { "acc": 0.84580746, "epoch": 0.7395824872065633, "grad_norm": 5.686899662017822, "learning_rate": 9.51076653132469e-06, "loss": 0.76607518, "memory(GiB)": 34.88, "step": 27315, "train_speed(iter/s)": 0.41608 }, { "acc": 0.87526445, "epoch": 0.7397178674897787, "grad_norm": 10.368802070617676, "learning_rate": 9.510525087136576e-06, "loss": 0.6434216, "memory(GiB)": 34.88, "step": 27320, "train_speed(iter/s)": 0.416083 }, { "acc": 0.86978912, "epoch": 0.7398532477729943, "grad_norm": 7.721790313720703, "learning_rate": 9.510283586451536e-06, "loss": 0.60716267, "memory(GiB)": 34.88, "step": 27325, "train_speed(iter/s)": 0.416086 }, { "acc": 0.85186033, "epoch": 0.7399886280562099, "grad_norm": 6.838202476501465, "learning_rate": 9.510042029272595e-06, "loss": 0.68618937, "memory(GiB)": 34.88, "step": 27330, "train_speed(iter/s)": 0.41609 }, { "acc": 0.8660778, "epoch": 0.7401240083394255, "grad_norm": 13.513965606689453, "learning_rate": 9.509800415602779e-06, "loss": 0.65177603, "memory(GiB)": 34.88, "step": 27335, "train_speed(iter/s)": 0.416093 }, { "acc": 0.86194401, "epoch": 0.740259388622641, "grad_norm": 10.64463996887207, "learning_rate": 9.509558745445113e-06, "loss": 0.74349279, "memory(GiB)": 34.88, "step": 27340, "train_speed(iter/s)": 0.416096 }, { "acc": 0.85854826, "epoch": 0.7403947689058565, "grad_norm": 7.471065044403076, "learning_rate": 9.509317018802628e-06, "loss": 0.77065673, "memory(GiB)": 34.88, "step": 27345, "train_speed(iter/s)": 0.416099 }, { "acc": 0.86610975, "epoch": 0.7405301491890721, "grad_norm": 6.070776462554932, "learning_rate": 9.509075235678349e-06, "loss": 0.70962839, "memory(GiB)": 34.88, "step": 27350, "train_speed(iter/s)": 0.416102 }, { "acc": 0.84796104, "epoch": 0.7406655294722877, "grad_norm": 7.638345241546631, "learning_rate": 9.508833396075306e-06, "loss": 0.79735627, "memory(GiB)": 34.88, "step": 27355, "train_speed(iter/s)": 0.416104 }, { "acc": 0.88788414, "epoch": 0.7408009097555032, "grad_norm": 7.410654544830322, "learning_rate": 9.508591499996528e-06, "loss": 0.56324806, "memory(GiB)": 34.88, "step": 27360, "train_speed(iter/s)": 0.416106 }, { "acc": 0.85418129, "epoch": 0.7409362900387187, "grad_norm": 9.615087509155273, "learning_rate": 9.508349547445045e-06, "loss": 0.79909129, "memory(GiB)": 34.88, "step": 27365, "train_speed(iter/s)": 0.41611 }, { "acc": 0.83750944, "epoch": 0.7410716703219343, "grad_norm": 18.303667068481445, "learning_rate": 9.50810753842389e-06, "loss": 0.92380352, "memory(GiB)": 34.88, "step": 27370, "train_speed(iter/s)": 0.416112 }, { "acc": 0.86955757, "epoch": 0.7412070506051499, "grad_norm": 8.995882987976074, "learning_rate": 9.507865472936093e-06, "loss": 0.69961662, "memory(GiB)": 34.88, "step": 27375, "train_speed(iter/s)": 0.416115 }, { "acc": 0.88568516, "epoch": 0.7413424308883654, "grad_norm": 10.853100776672363, "learning_rate": 9.507623350984685e-06, "loss": 0.62278004, "memory(GiB)": 34.88, "step": 27380, "train_speed(iter/s)": 0.416119 }, { "acc": 0.87537785, "epoch": 0.741477811171581, "grad_norm": 7.419662952423096, "learning_rate": 9.5073811725727e-06, "loss": 0.62318163, "memory(GiB)": 34.88, "step": 27385, "train_speed(iter/s)": 0.416122 }, { "acc": 0.87727718, "epoch": 0.7416131914547965, "grad_norm": 5.83371639251709, "learning_rate": 9.507138937703176e-06, "loss": 0.64414282, "memory(GiB)": 34.88, "step": 27390, "train_speed(iter/s)": 0.416123 }, { "acc": 0.86732531, "epoch": 0.7417485717380121, "grad_norm": 7.7427191734313965, "learning_rate": 9.50689664637914e-06, "loss": 0.64451342, "memory(GiB)": 34.88, "step": 27395, "train_speed(iter/s)": 0.416127 }, { "acc": 0.85284481, "epoch": 0.7418839520212276, "grad_norm": 3.5777883529663086, "learning_rate": 9.506654298603633e-06, "loss": 0.81267109, "memory(GiB)": 34.88, "step": 27400, "train_speed(iter/s)": 0.41613 }, { "acc": 0.86306944, "epoch": 0.7420193323044432, "grad_norm": 5.906708240509033, "learning_rate": 9.506411894379688e-06, "loss": 0.7537446, "memory(GiB)": 34.88, "step": 27405, "train_speed(iter/s)": 0.416133 }, { "acc": 0.87489901, "epoch": 0.7421547125876587, "grad_norm": 9.901906967163086, "learning_rate": 9.506169433710343e-06, "loss": 0.6231513, "memory(GiB)": 34.88, "step": 27410, "train_speed(iter/s)": 0.416137 }, { "acc": 0.86799889, "epoch": 0.7422900928708743, "grad_norm": 6.171712398529053, "learning_rate": 9.505926916598636e-06, "loss": 0.64826345, "memory(GiB)": 34.88, "step": 27415, "train_speed(iter/s)": 0.41614 }, { "acc": 0.88679438, "epoch": 0.7424254731540898, "grad_norm": 5.651561737060547, "learning_rate": 9.5056843430476e-06, "loss": 0.5830616, "memory(GiB)": 34.88, "step": 27420, "train_speed(iter/s)": 0.416143 }, { "acc": 0.84793053, "epoch": 0.7425608534373054, "grad_norm": 13.760124206542969, "learning_rate": 9.505441713060281e-06, "loss": 0.8805933, "memory(GiB)": 34.88, "step": 27425, "train_speed(iter/s)": 0.416146 }, { "acc": 0.88006763, "epoch": 0.742696233720521, "grad_norm": 6.254233360290527, "learning_rate": 9.505199026639714e-06, "loss": 0.59055915, "memory(GiB)": 34.88, "step": 27430, "train_speed(iter/s)": 0.416149 }, { "acc": 0.85992413, "epoch": 0.7428316140037365, "grad_norm": 6.586903095245361, "learning_rate": 9.504956283788939e-06, "loss": 0.69026284, "memory(GiB)": 34.88, "step": 27435, "train_speed(iter/s)": 0.41615 }, { "acc": 0.85860844, "epoch": 0.742966994286952, "grad_norm": 11.707062721252441, "learning_rate": 9.504713484510997e-06, "loss": 0.80980844, "memory(GiB)": 34.88, "step": 27440, "train_speed(iter/s)": 0.416154 }, { "acc": 0.88241415, "epoch": 0.7431023745701676, "grad_norm": 5.8433308601379395, "learning_rate": 9.504470628808932e-06, "loss": 0.62586842, "memory(GiB)": 34.88, "step": 27445, "train_speed(iter/s)": 0.416157 }, { "acc": 0.83374882, "epoch": 0.7432377548533832, "grad_norm": 7.974151134490967, "learning_rate": 9.504227716685785e-06, "loss": 0.98913784, "memory(GiB)": 34.88, "step": 27450, "train_speed(iter/s)": 0.41616 }, { "acc": 0.88256245, "epoch": 0.7433731351365988, "grad_norm": 6.621821880340576, "learning_rate": 9.503984748144597e-06, "loss": 0.6237093, "memory(GiB)": 34.88, "step": 27455, "train_speed(iter/s)": 0.416163 }, { "acc": 0.86032801, "epoch": 0.7435085154198142, "grad_norm": 12.203096389770508, "learning_rate": 9.503741723188415e-06, "loss": 0.650278, "memory(GiB)": 34.88, "step": 27460, "train_speed(iter/s)": 0.416167 }, { "acc": 0.89092445, "epoch": 0.7436438957030298, "grad_norm": 15.984268188476562, "learning_rate": 9.50349864182028e-06, "loss": 0.55302515, "memory(GiB)": 34.88, "step": 27465, "train_speed(iter/s)": 0.41617 }, { "acc": 0.89308338, "epoch": 0.7437792759862454, "grad_norm": 8.668733596801758, "learning_rate": 9.50325550404324e-06, "loss": 0.50449758, "memory(GiB)": 34.88, "step": 27470, "train_speed(iter/s)": 0.416172 }, { "acc": 0.87093496, "epoch": 0.743914656269461, "grad_norm": 11.235420227050781, "learning_rate": 9.503012309860337e-06, "loss": 0.67109056, "memory(GiB)": 34.88, "step": 27475, "train_speed(iter/s)": 0.416176 }, { "acc": 0.89974461, "epoch": 0.7440500365526764, "grad_norm": 6.698401927947998, "learning_rate": 9.502769059274622e-06, "loss": 0.48361402, "memory(GiB)": 34.88, "step": 27480, "train_speed(iter/s)": 0.416179 }, { "acc": 0.8685936, "epoch": 0.744185416835892, "grad_norm": 7.7040228843688965, "learning_rate": 9.50252575228914e-06, "loss": 0.67919641, "memory(GiB)": 34.88, "step": 27485, "train_speed(iter/s)": 0.416182 }, { "acc": 0.85384846, "epoch": 0.7443207971191076, "grad_norm": 7.3774614334106445, "learning_rate": 9.502282388906939e-06, "loss": 0.68203673, "memory(GiB)": 34.88, "step": 27490, "train_speed(iter/s)": 0.416184 }, { "acc": 0.89477482, "epoch": 0.7444561774023232, "grad_norm": 4.290188789367676, "learning_rate": 9.502038969131067e-06, "loss": 0.52912393, "memory(GiB)": 34.88, "step": 27495, "train_speed(iter/s)": 0.416186 }, { "acc": 0.85671091, "epoch": 0.7445915576855386, "grad_norm": 12.254733085632324, "learning_rate": 9.501795492964574e-06, "loss": 0.67730441, "memory(GiB)": 34.88, "step": 27500, "train_speed(iter/s)": 0.416189 }, { "acc": 0.88504429, "epoch": 0.7447269379687542, "grad_norm": 8.866389274597168, "learning_rate": 9.50155196041051e-06, "loss": 0.48315749, "memory(GiB)": 34.88, "step": 27505, "train_speed(iter/s)": 0.416192 }, { "acc": 0.8775095, "epoch": 0.7448623182519698, "grad_norm": 7.662598133087158, "learning_rate": 9.501308371471925e-06, "loss": 0.5965405, "memory(GiB)": 34.88, "step": 27510, "train_speed(iter/s)": 0.416195 }, { "acc": 0.85826035, "epoch": 0.7449976985351854, "grad_norm": 12.536595344543457, "learning_rate": 9.501064726151874e-06, "loss": 0.73775539, "memory(GiB)": 34.88, "step": 27515, "train_speed(iter/s)": 0.416198 }, { "acc": 0.86956558, "epoch": 0.7451330788184009, "grad_norm": 6.715415000915527, "learning_rate": 9.5008210244534e-06, "loss": 0.72339449, "memory(GiB)": 34.88, "step": 27520, "train_speed(iter/s)": 0.416201 }, { "acc": 0.87544432, "epoch": 0.7452684591016164, "grad_norm": 11.660975456237793, "learning_rate": 9.500577266379569e-06, "loss": 0.68418517, "memory(GiB)": 34.88, "step": 27525, "train_speed(iter/s)": 0.416205 }, { "acc": 0.88259687, "epoch": 0.745403839384832, "grad_norm": 7.513753414154053, "learning_rate": 9.500333451933424e-06, "loss": 0.6140193, "memory(GiB)": 34.88, "step": 27530, "train_speed(iter/s)": 0.416208 }, { "acc": 0.87030659, "epoch": 0.7455392196680476, "grad_norm": 11.524057388305664, "learning_rate": 9.500089581118023e-06, "loss": 0.66572666, "memory(GiB)": 34.88, "step": 27535, "train_speed(iter/s)": 0.416211 }, { "acc": 0.85820332, "epoch": 0.7456745999512631, "grad_norm": 8.191885948181152, "learning_rate": 9.499845653936422e-06, "loss": 0.70365663, "memory(GiB)": 34.88, "step": 27540, "train_speed(iter/s)": 0.416214 }, { "acc": 0.86206303, "epoch": 0.7458099802344786, "grad_norm": 5.5872802734375, "learning_rate": 9.499601670391672e-06, "loss": 0.83240757, "memory(GiB)": 34.88, "step": 27545, "train_speed(iter/s)": 0.416217 }, { "acc": 0.87533722, "epoch": 0.7459453605176942, "grad_norm": 11.844865798950195, "learning_rate": 9.499357630486836e-06, "loss": 0.63352823, "memory(GiB)": 34.88, "step": 27550, "train_speed(iter/s)": 0.416218 }, { "acc": 0.88101845, "epoch": 0.7460807408009098, "grad_norm": 5.748259544372559, "learning_rate": 9.499113534224968e-06, "loss": 0.56370392, "memory(GiB)": 34.88, "step": 27555, "train_speed(iter/s)": 0.416221 }, { "acc": 0.87295446, "epoch": 0.7462161210841253, "grad_norm": 7.930516242980957, "learning_rate": 9.498869381609126e-06, "loss": 0.62039433, "memory(GiB)": 34.88, "step": 27560, "train_speed(iter/s)": 0.416224 }, { "acc": 0.86592264, "epoch": 0.7463515013673409, "grad_norm": 12.17360782623291, "learning_rate": 9.498625172642368e-06, "loss": 0.68548651, "memory(GiB)": 34.88, "step": 27565, "train_speed(iter/s)": 0.416226 }, { "acc": 0.88455086, "epoch": 0.7464868816505564, "grad_norm": 10.894829750061035, "learning_rate": 9.498380907327752e-06, "loss": 0.62764463, "memory(GiB)": 34.88, "step": 27570, "train_speed(iter/s)": 0.416228 }, { "acc": 0.8693984, "epoch": 0.746622261933772, "grad_norm": 9.246175765991211, "learning_rate": 9.49813658566834e-06, "loss": 0.62410092, "memory(GiB)": 34.88, "step": 27575, "train_speed(iter/s)": 0.41623 }, { "acc": 0.88377609, "epoch": 0.7467576422169875, "grad_norm": 6.473415851593018, "learning_rate": 9.497892207667192e-06, "loss": 0.51920986, "memory(GiB)": 34.88, "step": 27580, "train_speed(iter/s)": 0.416232 }, { "acc": 0.90503044, "epoch": 0.7468930225002031, "grad_norm": 4.563370704650879, "learning_rate": 9.49764777332737e-06, "loss": 0.40597434, "memory(GiB)": 34.88, "step": 27585, "train_speed(iter/s)": 0.416235 }, { "acc": 0.86462297, "epoch": 0.7470284027834186, "grad_norm": 6.934689044952393, "learning_rate": 9.497403282651935e-06, "loss": 0.69743156, "memory(GiB)": 34.88, "step": 27590, "train_speed(iter/s)": 0.416238 }, { "acc": 0.87396631, "epoch": 0.7471637830666342, "grad_norm": 7.7602410316467285, "learning_rate": 9.497158735643949e-06, "loss": 0.70393648, "memory(GiB)": 34.88, "step": 27595, "train_speed(iter/s)": 0.416241 }, { "acc": 0.87204723, "epoch": 0.7472991633498497, "grad_norm": 12.517468452453613, "learning_rate": 9.496914132306476e-06, "loss": 0.67292519, "memory(GiB)": 34.88, "step": 27600, "train_speed(iter/s)": 0.416244 }, { "acc": 0.89243555, "epoch": 0.7474345436330653, "grad_norm": 12.873734474182129, "learning_rate": 9.496669472642582e-06, "loss": 0.49358053, "memory(GiB)": 34.88, "step": 27605, "train_speed(iter/s)": 0.416247 }, { "acc": 0.85839748, "epoch": 0.7475699239162809, "grad_norm": 9.651507377624512, "learning_rate": 9.496424756655331e-06, "loss": 0.69124413, "memory(GiB)": 34.88, "step": 27610, "train_speed(iter/s)": 0.41625 }, { "acc": 0.86303043, "epoch": 0.7477053041994964, "grad_norm": 14.622220039367676, "learning_rate": 9.496179984347786e-06, "loss": 0.64017, "memory(GiB)": 34.88, "step": 27615, "train_speed(iter/s)": 0.416253 }, { "acc": 0.85862751, "epoch": 0.7478406844827119, "grad_norm": 10.025251388549805, "learning_rate": 9.495935155723017e-06, "loss": 0.66204181, "memory(GiB)": 34.88, "step": 27620, "train_speed(iter/s)": 0.416256 }, { "acc": 0.88667831, "epoch": 0.7479760647659275, "grad_norm": 4.644735813140869, "learning_rate": 9.495690270784087e-06, "loss": 0.48676243, "memory(GiB)": 34.88, "step": 27625, "train_speed(iter/s)": 0.416258 }, { "acc": 0.87444649, "epoch": 0.7481114450491431, "grad_norm": 6.301916122436523, "learning_rate": 9.495445329534067e-06, "loss": 0.6447341, "memory(GiB)": 34.88, "step": 27630, "train_speed(iter/s)": 0.41626 }, { "acc": 0.87632618, "epoch": 0.7482468253323586, "grad_norm": 6.827747344970703, "learning_rate": 9.495200331976024e-06, "loss": 0.66720123, "memory(GiB)": 34.88, "step": 27635, "train_speed(iter/s)": 0.416263 }, { "acc": 0.88420696, "epoch": 0.7483822056155741, "grad_norm": 7.273409366607666, "learning_rate": 9.494955278113028e-06, "loss": 0.53609266, "memory(GiB)": 34.88, "step": 27640, "train_speed(iter/s)": 0.416267 }, { "acc": 0.86453743, "epoch": 0.7485175858987897, "grad_norm": 8.5049467086792, "learning_rate": 9.494710167948148e-06, "loss": 0.76455841, "memory(GiB)": 34.88, "step": 27645, "train_speed(iter/s)": 0.41627 }, { "acc": 0.85604219, "epoch": 0.7486529661820053, "grad_norm": 7.230156898498535, "learning_rate": 9.494465001484455e-06, "loss": 0.7580677, "memory(GiB)": 34.88, "step": 27650, "train_speed(iter/s)": 0.416273 }, { "acc": 0.86740971, "epoch": 0.7487883464652209, "grad_norm": 9.11076545715332, "learning_rate": 9.494219778725017e-06, "loss": 0.63746071, "memory(GiB)": 34.88, "step": 27655, "train_speed(iter/s)": 0.416276 }, { "acc": 0.89140482, "epoch": 0.7489237267484363, "grad_norm": 6.543437957763672, "learning_rate": 9.493974499672913e-06, "loss": 0.51122179, "memory(GiB)": 34.88, "step": 27660, "train_speed(iter/s)": 0.416279 }, { "acc": 0.86461372, "epoch": 0.7490591070316519, "grad_norm": 11.226584434509277, "learning_rate": 9.493729164331208e-06, "loss": 0.71981931, "memory(GiB)": 34.88, "step": 27665, "train_speed(iter/s)": 0.416282 }, { "acc": 0.88348293, "epoch": 0.7491944873148675, "grad_norm": 7.815238952636719, "learning_rate": 9.493483772702979e-06, "loss": 0.5868906, "memory(GiB)": 34.88, "step": 27670, "train_speed(iter/s)": 0.416285 }, { "acc": 0.88782444, "epoch": 0.7493298675980831, "grad_norm": 6.986152648925781, "learning_rate": 9.4932383247913e-06, "loss": 0.54277973, "memory(GiB)": 34.88, "step": 27675, "train_speed(iter/s)": 0.416288 }, { "acc": 0.89339104, "epoch": 0.7494652478812985, "grad_norm": 5.680871963500977, "learning_rate": 9.492992820599245e-06, "loss": 0.50174065, "memory(GiB)": 34.88, "step": 27680, "train_speed(iter/s)": 0.416292 }, { "acc": 0.88038015, "epoch": 0.7496006281645141, "grad_norm": 7.508768081665039, "learning_rate": 9.49274726012989e-06, "loss": 0.6237278, "memory(GiB)": 34.88, "step": 27685, "train_speed(iter/s)": 0.416295 }, { "acc": 0.85949755, "epoch": 0.7497360084477297, "grad_norm": 8.03934097290039, "learning_rate": 9.49250164338631e-06, "loss": 0.73971901, "memory(GiB)": 34.88, "step": 27690, "train_speed(iter/s)": 0.416299 }, { "acc": 0.87556381, "epoch": 0.7498713887309453, "grad_norm": 6.162456512451172, "learning_rate": 9.492255970371584e-06, "loss": 0.66687222, "memory(GiB)": 34.88, "step": 27695, "train_speed(iter/s)": 0.416301 }, { "acc": 0.85627871, "epoch": 0.7500067690141607, "grad_norm": 6.537195682525635, "learning_rate": 9.492010241088786e-06, "loss": 0.68099689, "memory(GiB)": 34.88, "step": 27700, "train_speed(iter/s)": 0.416304 }, { "acc": 0.85778131, "epoch": 0.7501421492973763, "grad_norm": 6.264949321746826, "learning_rate": 9.491764455540998e-06, "loss": 0.77321215, "memory(GiB)": 34.88, "step": 27705, "train_speed(iter/s)": 0.416308 }, { "acc": 0.85060635, "epoch": 0.7502775295805919, "grad_norm": 12.287237167358398, "learning_rate": 9.491518613731298e-06, "loss": 0.76006761, "memory(GiB)": 34.88, "step": 27710, "train_speed(iter/s)": 0.416311 }, { "acc": 0.8858242, "epoch": 0.7504129098638075, "grad_norm": 34.83439636230469, "learning_rate": 9.491272715662765e-06, "loss": 0.64810324, "memory(GiB)": 34.88, "step": 27715, "train_speed(iter/s)": 0.416314 }, { "acc": 0.87842598, "epoch": 0.750548290147023, "grad_norm": 8.586634635925293, "learning_rate": 9.491026761338477e-06, "loss": 0.64178977, "memory(GiB)": 34.88, "step": 27720, "train_speed(iter/s)": 0.416318 }, { "acc": 0.88763351, "epoch": 0.7506836704302385, "grad_norm": 8.29832649230957, "learning_rate": 9.49078075076152e-06, "loss": 0.51343145, "memory(GiB)": 34.88, "step": 27725, "train_speed(iter/s)": 0.416321 }, { "acc": 0.88438168, "epoch": 0.7508190507134541, "grad_norm": 4.859045028686523, "learning_rate": 9.49053468393497e-06, "loss": 0.52274904, "memory(GiB)": 34.88, "step": 27730, "train_speed(iter/s)": 0.416324 }, { "acc": 0.88595934, "epoch": 0.7509544309966697, "grad_norm": 7.28706693649292, "learning_rate": 9.490288560861917e-06, "loss": 0.54552526, "memory(GiB)": 34.88, "step": 27735, "train_speed(iter/s)": 0.416326 }, { "acc": 0.87754545, "epoch": 0.7510898112798852, "grad_norm": 12.136798858642578, "learning_rate": 9.490042381545438e-06, "loss": 0.63920474, "memory(GiB)": 34.88, "step": 27740, "train_speed(iter/s)": 0.41633 }, { "acc": 0.85840263, "epoch": 0.7512251915631007, "grad_norm": 12.325572967529297, "learning_rate": 9.48979614598862e-06, "loss": 0.76871715, "memory(GiB)": 34.88, "step": 27745, "train_speed(iter/s)": 0.416333 }, { "acc": 0.90463781, "epoch": 0.7513605718463163, "grad_norm": 3.938810348510742, "learning_rate": 9.489549854194543e-06, "loss": 0.452003, "memory(GiB)": 34.88, "step": 27750, "train_speed(iter/s)": 0.416336 }, { "acc": 0.86543198, "epoch": 0.7514959521295319, "grad_norm": 10.647743225097656, "learning_rate": 9.489303506166299e-06, "loss": 0.65946569, "memory(GiB)": 34.88, "step": 27755, "train_speed(iter/s)": 0.416339 }, { "acc": 0.86821365, "epoch": 0.7516313324127474, "grad_norm": 9.062579154968262, "learning_rate": 9.489057101906969e-06, "loss": 0.67043757, "memory(GiB)": 34.88, "step": 27760, "train_speed(iter/s)": 0.416342 }, { "acc": 0.87291708, "epoch": 0.751766712695963, "grad_norm": 8.386490821838379, "learning_rate": 9.48881064141964e-06, "loss": 0.62258224, "memory(GiB)": 34.88, "step": 27765, "train_speed(iter/s)": 0.416346 }, { "acc": 0.86942215, "epoch": 0.7519020929791785, "grad_norm": 6.783514976501465, "learning_rate": 9.488564124707403e-06, "loss": 0.62050276, "memory(GiB)": 34.88, "step": 27770, "train_speed(iter/s)": 0.416349 }, { "acc": 0.85927219, "epoch": 0.7520374732623941, "grad_norm": 4.479864120483398, "learning_rate": 9.488317551773342e-06, "loss": 0.68405323, "memory(GiB)": 34.88, "step": 27775, "train_speed(iter/s)": 0.416352 }, { "acc": 0.87026882, "epoch": 0.7521728535456096, "grad_norm": 9.549882888793945, "learning_rate": 9.488070922620549e-06, "loss": 0.64214468, "memory(GiB)": 34.88, "step": 27780, "train_speed(iter/s)": 0.416355 }, { "acc": 0.85452805, "epoch": 0.7523082338288252, "grad_norm": 7.570284843444824, "learning_rate": 9.487824237252112e-06, "loss": 0.70949402, "memory(GiB)": 34.88, "step": 27785, "train_speed(iter/s)": 0.416357 }, { "acc": 0.84730129, "epoch": 0.7524436141120407, "grad_norm": 11.916695594787598, "learning_rate": 9.48757749567112e-06, "loss": 0.7781002, "memory(GiB)": 34.88, "step": 27790, "train_speed(iter/s)": 0.41636 }, { "acc": 0.85621138, "epoch": 0.7525789943952563, "grad_norm": 10.57055950164795, "learning_rate": 9.487330697880667e-06, "loss": 0.8503089, "memory(GiB)": 34.88, "step": 27795, "train_speed(iter/s)": 0.416363 }, { "acc": 0.89579887, "epoch": 0.7527143746784718, "grad_norm": 4.160983562469482, "learning_rate": 9.487083843883843e-06, "loss": 0.47659273, "memory(GiB)": 34.88, "step": 27800, "train_speed(iter/s)": 0.416366 }, { "acc": 0.85820322, "epoch": 0.7528497549616874, "grad_norm": 9.369807243347168, "learning_rate": 9.486836933683739e-06, "loss": 0.74140024, "memory(GiB)": 34.88, "step": 27805, "train_speed(iter/s)": 0.416368 }, { "acc": 0.87135782, "epoch": 0.752985135244903, "grad_norm": 7.6456403732299805, "learning_rate": 9.48658996728345e-06, "loss": 0.61192493, "memory(GiB)": 34.88, "step": 27810, "train_speed(iter/s)": 0.416371 }, { "acc": 0.87360601, "epoch": 0.7531205155281185, "grad_norm": 9.246397972106934, "learning_rate": 9.486342944686069e-06, "loss": 0.62564802, "memory(GiB)": 34.88, "step": 27815, "train_speed(iter/s)": 0.416374 }, { "acc": 0.85294018, "epoch": 0.753255895811334, "grad_norm": 9.953845024108887, "learning_rate": 9.48609586589469e-06, "loss": 0.75959997, "memory(GiB)": 34.88, "step": 27820, "train_speed(iter/s)": 0.416378 }, { "acc": 0.83270721, "epoch": 0.7533912760945496, "grad_norm": 8.118409156799316, "learning_rate": 9.485848730912409e-06, "loss": 0.90271673, "memory(GiB)": 34.88, "step": 27825, "train_speed(iter/s)": 0.416381 }, { "acc": 0.86946449, "epoch": 0.7535266563777652, "grad_norm": 12.457210540771484, "learning_rate": 9.485601539742322e-06, "loss": 0.66659484, "memory(GiB)": 34.88, "step": 27830, "train_speed(iter/s)": 0.416384 }, { "acc": 0.85564194, "epoch": 0.7536620366609807, "grad_norm": 8.777719497680664, "learning_rate": 9.485354292387523e-06, "loss": 0.79642754, "memory(GiB)": 34.88, "step": 27835, "train_speed(iter/s)": 0.416387 }, { "acc": 0.86034241, "epoch": 0.7537974169441962, "grad_norm": 6.019215106964111, "learning_rate": 9.485106988851113e-06, "loss": 0.74916334, "memory(GiB)": 34.88, "step": 27840, "train_speed(iter/s)": 0.41639 }, { "acc": 0.88387318, "epoch": 0.7539327972274118, "grad_norm": 4.436723709106445, "learning_rate": 9.484859629136186e-06, "loss": 0.55489454, "memory(GiB)": 34.88, "step": 27845, "train_speed(iter/s)": 0.416394 }, { "acc": 0.86672688, "epoch": 0.7540681775106274, "grad_norm": 7.916656017303467, "learning_rate": 9.484612213245845e-06, "loss": 0.73066249, "memory(GiB)": 34.88, "step": 27850, "train_speed(iter/s)": 0.416396 }, { "acc": 0.86499882, "epoch": 0.7542035577938428, "grad_norm": 10.41666030883789, "learning_rate": 9.484364741183187e-06, "loss": 0.67760863, "memory(GiB)": 34.88, "step": 27855, "train_speed(iter/s)": 0.416399 }, { "acc": 0.86425409, "epoch": 0.7543389380770584, "grad_norm": 7.01752233505249, "learning_rate": 9.484117212951313e-06, "loss": 0.70786586, "memory(GiB)": 34.88, "step": 27860, "train_speed(iter/s)": 0.416402 }, { "acc": 0.87696581, "epoch": 0.754474318360274, "grad_norm": 38.27001953125, "learning_rate": 9.483869628553322e-06, "loss": 0.58907051, "memory(GiB)": 34.88, "step": 27865, "train_speed(iter/s)": 0.416405 }, { "acc": 0.85481911, "epoch": 0.7546096986434896, "grad_norm": 10.020824432373047, "learning_rate": 9.483621987992317e-06, "loss": 0.76518345, "memory(GiB)": 34.88, "step": 27870, "train_speed(iter/s)": 0.416408 }, { "acc": 0.89566584, "epoch": 0.7547450789267051, "grad_norm": 5.623417377471924, "learning_rate": 9.483374291271398e-06, "loss": 0.42905807, "memory(GiB)": 34.88, "step": 27875, "train_speed(iter/s)": 0.416412 }, { "acc": 0.85996304, "epoch": 0.7548804592099206, "grad_norm": 15.23815631866455, "learning_rate": 9.483126538393674e-06, "loss": 0.82427502, "memory(GiB)": 34.88, "step": 27880, "train_speed(iter/s)": 0.416415 }, { "acc": 0.88605232, "epoch": 0.7550158394931362, "grad_norm": 6.821988105773926, "learning_rate": 9.482878729362241e-06, "loss": 0.57078876, "memory(GiB)": 34.88, "step": 27885, "train_speed(iter/s)": 0.416418 }, { "acc": 0.86347027, "epoch": 0.7551512197763518, "grad_norm": 7.096516132354736, "learning_rate": 9.482630864180208e-06, "loss": 0.63094263, "memory(GiB)": 34.88, "step": 27890, "train_speed(iter/s)": 0.416421 }, { "acc": 0.87474794, "epoch": 0.7552866000595673, "grad_norm": 9.245609283447266, "learning_rate": 9.482382942850676e-06, "loss": 0.6423214, "memory(GiB)": 34.88, "step": 27895, "train_speed(iter/s)": 0.416424 }, { "acc": 0.88781357, "epoch": 0.7554219803427829, "grad_norm": 6.916909217834473, "learning_rate": 9.482134965376755e-06, "loss": 0.59104738, "memory(GiB)": 34.88, "step": 27900, "train_speed(iter/s)": 0.416427 }, { "acc": 0.90069246, "epoch": 0.7555573606259984, "grad_norm": 7.791880130767822, "learning_rate": 9.48188693176155e-06, "loss": 0.5319768, "memory(GiB)": 34.88, "step": 27905, "train_speed(iter/s)": 0.41643 }, { "acc": 0.85536652, "epoch": 0.755692740909214, "grad_norm": 8.181818008422852, "learning_rate": 9.481638842008168e-06, "loss": 0.80395031, "memory(GiB)": 34.88, "step": 27910, "train_speed(iter/s)": 0.416433 }, { "acc": 0.86666241, "epoch": 0.7558281211924295, "grad_norm": 4.217158317565918, "learning_rate": 9.481390696119716e-06, "loss": 0.66266942, "memory(GiB)": 34.88, "step": 27915, "train_speed(iter/s)": 0.416436 }, { "acc": 0.86489716, "epoch": 0.7559635014756451, "grad_norm": 8.268394470214844, "learning_rate": 9.481142494099304e-06, "loss": 0.63796363, "memory(GiB)": 34.88, "step": 27920, "train_speed(iter/s)": 0.416439 }, { "acc": 0.86489105, "epoch": 0.7560988817588606, "grad_norm": 5.952899932861328, "learning_rate": 9.48089423595004e-06, "loss": 0.66922889, "memory(GiB)": 34.88, "step": 27925, "train_speed(iter/s)": 0.416443 }, { "acc": 0.88893805, "epoch": 0.7562342620420762, "grad_norm": 6.0761494636535645, "learning_rate": 9.480645921675032e-06, "loss": 0.56429396, "memory(GiB)": 34.88, "step": 27930, "train_speed(iter/s)": 0.416446 }, { "acc": 0.86860523, "epoch": 0.7563696423252917, "grad_norm": 16.69491195678711, "learning_rate": 9.480397551277396e-06, "loss": 0.77003598, "memory(GiB)": 34.88, "step": 27935, "train_speed(iter/s)": 0.416449 }, { "acc": 0.87400961, "epoch": 0.7565050226085073, "grad_norm": 11.523625373840332, "learning_rate": 9.480149124760237e-06, "loss": 0.66199284, "memory(GiB)": 34.88, "step": 27940, "train_speed(iter/s)": 0.416452 }, { "acc": 0.89242268, "epoch": 0.7566404028917229, "grad_norm": 6.538291931152344, "learning_rate": 9.479900642126672e-06, "loss": 0.58358741, "memory(GiB)": 34.88, "step": 27945, "train_speed(iter/s)": 0.416455 }, { "acc": 0.87347727, "epoch": 0.7567757831749384, "grad_norm": 8.616748809814453, "learning_rate": 9.479652103379813e-06, "loss": 0.70391569, "memory(GiB)": 34.88, "step": 27950, "train_speed(iter/s)": 0.416458 }, { "acc": 0.84601946, "epoch": 0.7569111634581539, "grad_norm": 12.168230056762695, "learning_rate": 9.479403508522772e-06, "loss": 0.80429649, "memory(GiB)": 34.88, "step": 27955, "train_speed(iter/s)": 0.416461 }, { "acc": 0.86682758, "epoch": 0.7570465437413695, "grad_norm": 12.624157905578613, "learning_rate": 9.479154857558661e-06, "loss": 0.61074905, "memory(GiB)": 34.88, "step": 27960, "train_speed(iter/s)": 0.416464 }, { "acc": 0.84606113, "epoch": 0.7571819240245851, "grad_norm": 15.259807586669922, "learning_rate": 9.4789061504906e-06, "loss": 0.76374636, "memory(GiB)": 34.88, "step": 27965, "train_speed(iter/s)": 0.416467 }, { "acc": 0.87016888, "epoch": 0.7573173043078006, "grad_norm": 12.306364059448242, "learning_rate": 9.4786573873217e-06, "loss": 0.62588749, "memory(GiB)": 34.88, "step": 27970, "train_speed(iter/s)": 0.416471 }, { "acc": 0.88059082, "epoch": 0.7574526845910161, "grad_norm": 9.026592254638672, "learning_rate": 9.478408568055081e-06, "loss": 0.66426578, "memory(GiB)": 34.88, "step": 27975, "train_speed(iter/s)": 0.416474 }, { "acc": 0.86659298, "epoch": 0.7575880648742317, "grad_norm": 9.720659255981445, "learning_rate": 9.478159692693857e-06, "loss": 0.7671648, "memory(GiB)": 34.88, "step": 27980, "train_speed(iter/s)": 0.416476 }, { "acc": 0.86873589, "epoch": 0.7577234451574473, "grad_norm": 5.084198474884033, "learning_rate": 9.477910761241148e-06, "loss": 0.66593866, "memory(GiB)": 34.88, "step": 27985, "train_speed(iter/s)": 0.416479 }, { "acc": 0.85560818, "epoch": 0.7578588254406629, "grad_norm": 11.526522636413574, "learning_rate": 9.477661773700071e-06, "loss": 0.87232857, "memory(GiB)": 34.88, "step": 27990, "train_speed(iter/s)": 0.416482 }, { "acc": 0.86030884, "epoch": 0.7579942057238783, "grad_norm": 7.622810363769531, "learning_rate": 9.477412730073745e-06, "loss": 0.6813592, "memory(GiB)": 34.88, "step": 27995, "train_speed(iter/s)": 0.416485 }, { "acc": 0.86453867, "epoch": 0.7581295860070939, "grad_norm": 6.6084184646606445, "learning_rate": 9.477163630365287e-06, "loss": 0.71153927, "memory(GiB)": 34.88, "step": 28000, "train_speed(iter/s)": 0.416488 }, { "acc": 0.86197157, "epoch": 0.7582649662903095, "grad_norm": 6.116239070892334, "learning_rate": 9.476914474577823e-06, "loss": 0.70872717, "memory(GiB)": 34.88, "step": 28005, "train_speed(iter/s)": 0.41649 }, { "acc": 0.86010771, "epoch": 0.7584003465735251, "grad_norm": 4.990753173828125, "learning_rate": 9.476665262714471e-06, "loss": 0.68721237, "memory(GiB)": 34.88, "step": 28010, "train_speed(iter/s)": 0.416493 }, { "acc": 0.87872677, "epoch": 0.7585357268567405, "grad_norm": 5.123696327209473, "learning_rate": 9.476415994778355e-06, "loss": 0.59533701, "memory(GiB)": 34.88, "step": 28015, "train_speed(iter/s)": 0.416495 }, { "acc": 0.86153793, "epoch": 0.7586711071399561, "grad_norm": 7.235398769378662, "learning_rate": 9.476166670772594e-06, "loss": 0.77399335, "memory(GiB)": 34.88, "step": 28020, "train_speed(iter/s)": 0.416498 }, { "acc": 0.86533928, "epoch": 0.7588064874231717, "grad_norm": 13.79883098602295, "learning_rate": 9.475917290700315e-06, "loss": 0.70920744, "memory(GiB)": 34.88, "step": 28025, "train_speed(iter/s)": 0.4165 }, { "acc": 0.87799969, "epoch": 0.7589418677063873, "grad_norm": 12.15279769897461, "learning_rate": 9.475667854564638e-06, "loss": 0.69603825, "memory(GiB)": 34.88, "step": 28030, "train_speed(iter/s)": 0.416503 }, { "acc": 0.89151535, "epoch": 0.7590772479896027, "grad_norm": 5.819723606109619, "learning_rate": 9.475418362368693e-06, "loss": 0.46097317, "memory(GiB)": 34.88, "step": 28035, "train_speed(iter/s)": 0.416506 }, { "acc": 0.86960239, "epoch": 0.7592126282728183, "grad_norm": 11.218332290649414, "learning_rate": 9.4751688141156e-06, "loss": 0.58773022, "memory(GiB)": 34.88, "step": 28040, "train_speed(iter/s)": 0.41651 }, { "acc": 0.87262869, "epoch": 0.7593480085560339, "grad_norm": 6.545990943908691, "learning_rate": 9.474919209808487e-06, "loss": 0.59984341, "memory(GiB)": 34.88, "step": 28045, "train_speed(iter/s)": 0.416513 }, { "acc": 0.84453745, "epoch": 0.7594833888392495, "grad_norm": 17.89188575744629, "learning_rate": 9.474669549450482e-06, "loss": 0.94060564, "memory(GiB)": 34.88, "step": 28050, "train_speed(iter/s)": 0.416516 }, { "acc": 0.87089653, "epoch": 0.759618769122465, "grad_norm": 9.176902770996094, "learning_rate": 9.474419833044712e-06, "loss": 0.66024675, "memory(GiB)": 34.88, "step": 28055, "train_speed(iter/s)": 0.416519 }, { "acc": 0.89376574, "epoch": 0.7597541494056805, "grad_norm": 12.446867942810059, "learning_rate": 9.474170060594306e-06, "loss": 0.58608446, "memory(GiB)": 34.88, "step": 28060, "train_speed(iter/s)": 0.416522 }, { "acc": 0.87980556, "epoch": 0.7598895296888961, "grad_norm": 9.692035675048828, "learning_rate": 9.47392023210239e-06, "loss": 0.57603836, "memory(GiB)": 34.88, "step": 28065, "train_speed(iter/s)": 0.416525 }, { "acc": 0.85771446, "epoch": 0.7600249099721117, "grad_norm": 12.01508617401123, "learning_rate": 9.473670347572096e-06, "loss": 0.7995441, "memory(GiB)": 34.88, "step": 28070, "train_speed(iter/s)": 0.416528 }, { "acc": 0.88367977, "epoch": 0.7601602902553272, "grad_norm": 6.347208499908447, "learning_rate": 9.473420407006552e-06, "loss": 0.61226063, "memory(GiB)": 34.88, "step": 28075, "train_speed(iter/s)": 0.416531 }, { "acc": 0.86851816, "epoch": 0.7602956705385427, "grad_norm": 6.779612064361572, "learning_rate": 9.473170410408894e-06, "loss": 0.62844172, "memory(GiB)": 34.88, "step": 28080, "train_speed(iter/s)": 0.416534 }, { "acc": 0.86738729, "epoch": 0.7604310508217583, "grad_norm": 5.811729431152344, "learning_rate": 9.472920357782246e-06, "loss": 0.70403624, "memory(GiB)": 34.88, "step": 28085, "train_speed(iter/s)": 0.416537 }, { "acc": 0.86725292, "epoch": 0.7605664311049739, "grad_norm": 8.370076179504395, "learning_rate": 9.47267024912975e-06, "loss": 0.67521257, "memory(GiB)": 34.88, "step": 28090, "train_speed(iter/s)": 0.41654 }, { "acc": 0.88122902, "epoch": 0.7607018113881894, "grad_norm": 8.119782447814941, "learning_rate": 9.47242008445453e-06, "loss": 0.65487714, "memory(GiB)": 34.88, "step": 28095, "train_speed(iter/s)": 0.416543 }, { "acc": 0.88250036, "epoch": 0.760837191671405, "grad_norm": 6.924844741821289, "learning_rate": 9.472169863759724e-06, "loss": 0.60214663, "memory(GiB)": 34.88, "step": 28100, "train_speed(iter/s)": 0.416546 }, { "acc": 0.866467, "epoch": 0.7609725719546205, "grad_norm": 7.48463249206543, "learning_rate": 9.471919587048466e-06, "loss": 0.65883999, "memory(GiB)": 34.88, "step": 28105, "train_speed(iter/s)": 0.416548 }, { "acc": 0.8771718, "epoch": 0.7611079522378361, "grad_norm": 7.922788143157959, "learning_rate": 9.471669254323893e-06, "loss": 0.69533691, "memory(GiB)": 34.88, "step": 28110, "train_speed(iter/s)": 0.416551 }, { "acc": 0.85714264, "epoch": 0.7612433325210516, "grad_norm": 9.942657470703125, "learning_rate": 9.471418865589138e-06, "loss": 0.77885475, "memory(GiB)": 34.88, "step": 28115, "train_speed(iter/s)": 0.416555 }, { "acc": 0.85286598, "epoch": 0.7613787128042672, "grad_norm": 18.316322326660156, "learning_rate": 9.471168420847338e-06, "loss": 0.69829569, "memory(GiB)": 34.88, "step": 28120, "train_speed(iter/s)": 0.416558 }, { "acc": 0.86291428, "epoch": 0.7615140930874827, "grad_norm": 8.936366081237793, "learning_rate": 9.470917920101634e-06, "loss": 0.67350392, "memory(GiB)": 34.88, "step": 28125, "train_speed(iter/s)": 0.416561 }, { "acc": 0.89788265, "epoch": 0.7616494733706983, "grad_norm": 5.4070658683776855, "learning_rate": 9.47066736335516e-06, "loss": 0.47724028, "memory(GiB)": 34.88, "step": 28130, "train_speed(iter/s)": 0.416564 }, { "acc": 0.86912117, "epoch": 0.7617848536539138, "grad_norm": 6.897956371307373, "learning_rate": 9.470416750611054e-06, "loss": 0.6805275, "memory(GiB)": 34.88, "step": 28135, "train_speed(iter/s)": 0.416567 }, { "acc": 0.89335785, "epoch": 0.7619202339371294, "grad_norm": 5.699477672576904, "learning_rate": 9.47016608187246e-06, "loss": 0.49874516, "memory(GiB)": 34.88, "step": 28140, "train_speed(iter/s)": 0.41657 }, { "acc": 0.86741705, "epoch": 0.762055614220345, "grad_norm": 8.195358276367188, "learning_rate": 9.469915357142513e-06, "loss": 0.65175982, "memory(GiB)": 34.88, "step": 28145, "train_speed(iter/s)": 0.416573 }, { "acc": 0.86137266, "epoch": 0.7621909945035605, "grad_norm": 7.090251445770264, "learning_rate": 9.469664576424357e-06, "loss": 0.76558757, "memory(GiB)": 34.88, "step": 28150, "train_speed(iter/s)": 0.416576 }, { "acc": 0.89683218, "epoch": 0.762326374786776, "grad_norm": 12.946577072143555, "learning_rate": 9.469413739721134e-06, "loss": 0.51653547, "memory(GiB)": 34.88, "step": 28155, "train_speed(iter/s)": 0.416579 }, { "acc": 0.88984013, "epoch": 0.7624617550699916, "grad_norm": 19.059337615966797, "learning_rate": 9.469162847035985e-06, "loss": 0.61074429, "memory(GiB)": 34.88, "step": 28160, "train_speed(iter/s)": 0.416582 }, { "acc": 0.88453388, "epoch": 0.7625971353532072, "grad_norm": 4.85204553604126, "learning_rate": 9.468911898372051e-06, "loss": 0.58275518, "memory(GiB)": 34.88, "step": 28165, "train_speed(iter/s)": 0.416585 }, { "acc": 0.88159447, "epoch": 0.7627325156364227, "grad_norm": 7.381138324737549, "learning_rate": 9.46866089373248e-06, "loss": 0.65979381, "memory(GiB)": 34.88, "step": 28170, "train_speed(iter/s)": 0.416588 }, { "acc": 0.86094837, "epoch": 0.7628678959196382, "grad_norm": 15.030881881713867, "learning_rate": 9.468409833120413e-06, "loss": 0.66206484, "memory(GiB)": 34.88, "step": 28175, "train_speed(iter/s)": 0.416591 }, { "acc": 0.88843164, "epoch": 0.7630032762028538, "grad_norm": 5.22805118560791, "learning_rate": 9.468158716538996e-06, "loss": 0.50274377, "memory(GiB)": 34.88, "step": 28180, "train_speed(iter/s)": 0.416594 }, { "acc": 0.87116489, "epoch": 0.7631386564860694, "grad_norm": 17.10462760925293, "learning_rate": 9.467907543991373e-06, "loss": 0.70816584, "memory(GiB)": 34.88, "step": 28185, "train_speed(iter/s)": 0.416597 }, { "acc": 0.87064924, "epoch": 0.763274036769285, "grad_norm": 16.170562744140625, "learning_rate": 9.467656315480693e-06, "loss": 0.64392962, "memory(GiB)": 34.88, "step": 28190, "train_speed(iter/s)": 0.416601 }, { "acc": 0.88331308, "epoch": 0.7634094170525004, "grad_norm": 5.714409351348877, "learning_rate": 9.467405031010102e-06, "loss": 0.56386375, "memory(GiB)": 34.88, "step": 28195, "train_speed(iter/s)": 0.416604 }, { "acc": 0.87341194, "epoch": 0.763544797335716, "grad_norm": 4.42449426651001, "learning_rate": 9.46715369058275e-06, "loss": 0.68001251, "memory(GiB)": 34.88, "step": 28200, "train_speed(iter/s)": 0.416607 }, { "acc": 0.88762808, "epoch": 0.7636801776189316, "grad_norm": 6.8495965003967285, "learning_rate": 9.466902294201781e-06, "loss": 0.57148361, "memory(GiB)": 34.88, "step": 28205, "train_speed(iter/s)": 0.41661 }, { "acc": 0.88035736, "epoch": 0.7638155579021472, "grad_norm": 8.204136848449707, "learning_rate": 9.466650841870348e-06, "loss": 0.62319078, "memory(GiB)": 34.88, "step": 28210, "train_speed(iter/s)": 0.416613 }, { "acc": 0.88680534, "epoch": 0.7639509381853626, "grad_norm": 8.330326080322266, "learning_rate": 9.466399333591599e-06, "loss": 0.55984621, "memory(GiB)": 34.88, "step": 28215, "train_speed(iter/s)": 0.416616 }, { "acc": 0.88972464, "epoch": 0.7640863184685782, "grad_norm": 3.92150616645813, "learning_rate": 9.466147769368687e-06, "loss": 0.473314, "memory(GiB)": 34.88, "step": 28220, "train_speed(iter/s)": 0.416618 }, { "acc": 0.86617279, "epoch": 0.7642216987517938, "grad_norm": 5.684650421142578, "learning_rate": 9.46589614920476e-06, "loss": 0.72406192, "memory(GiB)": 34.88, "step": 28225, "train_speed(iter/s)": 0.416621 }, { "acc": 0.86639681, "epoch": 0.7643570790350094, "grad_norm": 5.523629665374756, "learning_rate": 9.465644473102973e-06, "loss": 0.74007983, "memory(GiB)": 34.88, "step": 28230, "train_speed(iter/s)": 0.416624 }, { "acc": 0.88967819, "epoch": 0.7644924593182248, "grad_norm": 4.111112594604492, "learning_rate": 9.465392741066477e-06, "loss": 0.57238092, "memory(GiB)": 34.88, "step": 28235, "train_speed(iter/s)": 0.416627 }, { "acc": 0.8826519, "epoch": 0.7646278396014404, "grad_norm": 5.128859519958496, "learning_rate": 9.465140953098426e-06, "loss": 0.59248247, "memory(GiB)": 34.88, "step": 28240, "train_speed(iter/s)": 0.416629 }, { "acc": 0.88146973, "epoch": 0.764763219884656, "grad_norm": 15.754712104797363, "learning_rate": 9.464889109201974e-06, "loss": 0.68485632, "memory(GiB)": 34.88, "step": 28245, "train_speed(iter/s)": 0.416633 }, { "acc": 0.87509785, "epoch": 0.7648986001678716, "grad_norm": 6.862382888793945, "learning_rate": 9.464637209380275e-06, "loss": 0.5559298, "memory(GiB)": 34.88, "step": 28250, "train_speed(iter/s)": 0.416636 }, { "acc": 0.86445866, "epoch": 0.7650339804510871, "grad_norm": 10.48615550994873, "learning_rate": 9.464385253636487e-06, "loss": 0.72782578, "memory(GiB)": 34.88, "step": 28255, "train_speed(iter/s)": 0.416639 }, { "acc": 0.89698296, "epoch": 0.7651693607343026, "grad_norm": 9.848588943481445, "learning_rate": 9.464133241973764e-06, "loss": 0.49419618, "memory(GiB)": 34.88, "step": 28260, "train_speed(iter/s)": 0.416642 }, { "acc": 0.86926432, "epoch": 0.7653047410175182, "grad_norm": 7.819535255432129, "learning_rate": 9.463881174395264e-06, "loss": 0.65559812, "memory(GiB)": 34.88, "step": 28265, "train_speed(iter/s)": 0.416645 }, { "acc": 0.88322754, "epoch": 0.7654401213007338, "grad_norm": 10.917506217956543, "learning_rate": 9.463629050904142e-06, "loss": 0.54664192, "memory(GiB)": 34.88, "step": 28270, "train_speed(iter/s)": 0.416648 }, { "acc": 0.85614061, "epoch": 0.7655755015839493, "grad_norm": 8.151900291442871, "learning_rate": 9.463376871503562e-06, "loss": 0.70754189, "memory(GiB)": 34.88, "step": 28275, "train_speed(iter/s)": 0.416651 }, { "acc": 0.87388134, "epoch": 0.7657108818671648, "grad_norm": 7.926680088043213, "learning_rate": 9.463124636196677e-06, "loss": 0.64561629, "memory(GiB)": 34.88, "step": 28280, "train_speed(iter/s)": 0.416653 }, { "acc": 0.87017422, "epoch": 0.7658462621503804, "grad_norm": 6.9503092765808105, "learning_rate": 9.46287234498665e-06, "loss": 0.70562134, "memory(GiB)": 34.88, "step": 28285, "train_speed(iter/s)": 0.416657 }, { "acc": 0.88026752, "epoch": 0.765981642433596, "grad_norm": 6.641761779785156, "learning_rate": 9.462619997876642e-06, "loss": 0.49955759, "memory(GiB)": 34.88, "step": 28290, "train_speed(iter/s)": 0.41666 }, { "acc": 0.87883205, "epoch": 0.7661170227168115, "grad_norm": 18.163721084594727, "learning_rate": 9.462367594869813e-06, "loss": 0.64867954, "memory(GiB)": 34.88, "step": 28295, "train_speed(iter/s)": 0.416662 }, { "acc": 0.88287029, "epoch": 0.7662524030000271, "grad_norm": 3.237318754196167, "learning_rate": 9.462115135969325e-06, "loss": 0.50804472, "memory(GiB)": 34.88, "step": 28300, "train_speed(iter/s)": 0.416665 }, { "acc": 0.84606171, "epoch": 0.7663877832832426, "grad_norm": 6.593801975250244, "learning_rate": 9.461862621178338e-06, "loss": 0.86518173, "memory(GiB)": 34.88, "step": 28305, "train_speed(iter/s)": 0.416668 }, { "acc": 0.89699898, "epoch": 0.7665231635664582, "grad_norm": 5.444445610046387, "learning_rate": 9.461610050500018e-06, "loss": 0.4830699, "memory(GiB)": 34.88, "step": 28310, "train_speed(iter/s)": 0.416671 }, { "acc": 0.88998642, "epoch": 0.7666585438496737, "grad_norm": 6.547705173492432, "learning_rate": 9.46135742393753e-06, "loss": 0.58435116, "memory(GiB)": 34.88, "step": 28315, "train_speed(iter/s)": 0.416674 }, { "acc": 0.87900963, "epoch": 0.7667939241328893, "grad_norm": 9.649377822875977, "learning_rate": 9.461104741494037e-06, "loss": 0.66867414, "memory(GiB)": 34.88, "step": 28320, "train_speed(iter/s)": 0.416677 }, { "acc": 0.86890297, "epoch": 0.7669293044161049, "grad_norm": 8.805204391479492, "learning_rate": 9.460852003172704e-06, "loss": 0.69193459, "memory(GiB)": 34.88, "step": 28325, "train_speed(iter/s)": 0.41668 }, { "acc": 0.88395195, "epoch": 0.7670646846993204, "grad_norm": 8.50635051727295, "learning_rate": 9.460599208976699e-06, "loss": 0.65787668, "memory(GiB)": 34.88, "step": 28330, "train_speed(iter/s)": 0.416683 }, { "acc": 0.89466, "epoch": 0.7672000649825359, "grad_norm": 5.3768157958984375, "learning_rate": 9.460346358909185e-06, "loss": 0.55044217, "memory(GiB)": 34.88, "step": 28335, "train_speed(iter/s)": 0.416686 }, { "acc": 0.87056694, "epoch": 0.7673354452657515, "grad_norm": 6.344717502593994, "learning_rate": 9.460093452973334e-06, "loss": 0.61622219, "memory(GiB)": 34.88, "step": 28340, "train_speed(iter/s)": 0.416689 }, { "acc": 0.8629034, "epoch": 0.7674708255489671, "grad_norm": 7.610445022583008, "learning_rate": 9.45984049117231e-06, "loss": 0.78831596, "memory(GiB)": 34.88, "step": 28345, "train_speed(iter/s)": 0.416692 }, { "acc": 0.84432268, "epoch": 0.7676062058321826, "grad_norm": 9.837234497070312, "learning_rate": 9.459587473509286e-06, "loss": 0.80311871, "memory(GiB)": 34.88, "step": 28350, "train_speed(iter/s)": 0.416695 }, { "acc": 0.85200424, "epoch": 0.7677415861153981, "grad_norm": 22.75551414489746, "learning_rate": 9.459334399987429e-06, "loss": 0.78074994, "memory(GiB)": 34.88, "step": 28355, "train_speed(iter/s)": 0.416697 }, { "acc": 0.88069572, "epoch": 0.7678769663986137, "grad_norm": 9.122273445129395, "learning_rate": 9.459081270609907e-06, "loss": 0.62893763, "memory(GiB)": 34.88, "step": 28360, "train_speed(iter/s)": 0.4167 }, { "acc": 0.86135445, "epoch": 0.7680123466818293, "grad_norm": 27.455429077148438, "learning_rate": 9.458828085379896e-06, "loss": 0.82301683, "memory(GiB)": 34.88, "step": 28365, "train_speed(iter/s)": 0.416703 }, { "acc": 0.85639982, "epoch": 0.7681477269650449, "grad_norm": 8.541692733764648, "learning_rate": 9.458574844300565e-06, "loss": 0.73834438, "memory(GiB)": 34.88, "step": 28370, "train_speed(iter/s)": 0.416706 }, { "acc": 0.87911453, "epoch": 0.7682831072482603, "grad_norm": 8.592448234558105, "learning_rate": 9.458321547375084e-06, "loss": 0.60319099, "memory(GiB)": 34.88, "step": 28375, "train_speed(iter/s)": 0.416709 }, { "acc": 0.86880846, "epoch": 0.7684184875314759, "grad_norm": 24.475400924682617, "learning_rate": 9.45806819460663e-06, "loss": 0.65273247, "memory(GiB)": 34.88, "step": 28380, "train_speed(iter/s)": 0.416713 }, { "acc": 0.88004951, "epoch": 0.7685538678146915, "grad_norm": 5.330996990203857, "learning_rate": 9.457814785998377e-06, "loss": 0.66545453, "memory(GiB)": 34.88, "step": 28385, "train_speed(iter/s)": 0.416715 }, { "acc": 0.87562733, "epoch": 0.7686892480979071, "grad_norm": 6.747753143310547, "learning_rate": 9.457561321553494e-06, "loss": 0.60320888, "memory(GiB)": 34.88, "step": 28390, "train_speed(iter/s)": 0.416718 }, { "acc": 0.86250105, "epoch": 0.7688246283811225, "grad_norm": 10.066130638122559, "learning_rate": 9.457307801275163e-06, "loss": 0.74733944, "memory(GiB)": 34.88, "step": 28395, "train_speed(iter/s)": 0.416721 }, { "acc": 0.88011417, "epoch": 0.7689600086643381, "grad_norm": 10.813228607177734, "learning_rate": 9.457054225166555e-06, "loss": 0.52762942, "memory(GiB)": 34.88, "step": 28400, "train_speed(iter/s)": 0.416724 }, { "acc": 0.85773926, "epoch": 0.7690953889475537, "grad_norm": 11.432101249694824, "learning_rate": 9.45680059323085e-06, "loss": 0.75496216, "memory(GiB)": 34.88, "step": 28405, "train_speed(iter/s)": 0.416727 }, { "acc": 0.88507633, "epoch": 0.7692307692307693, "grad_norm": 5.828333854675293, "learning_rate": 9.456546905471222e-06, "loss": 0.56831169, "memory(GiB)": 34.88, "step": 28410, "train_speed(iter/s)": 0.41673 }, { "acc": 0.86988783, "epoch": 0.7693661495139847, "grad_norm": 14.046713829040527, "learning_rate": 9.45629316189085e-06, "loss": 0.74879556, "memory(GiB)": 34.88, "step": 28415, "train_speed(iter/s)": 0.416734 }, { "acc": 0.87482185, "epoch": 0.7695015297972003, "grad_norm": 5.9438300132751465, "learning_rate": 9.456039362492914e-06, "loss": 0.65217838, "memory(GiB)": 34.88, "step": 28420, "train_speed(iter/s)": 0.416737 }, { "acc": 0.89486885, "epoch": 0.7696369100804159, "grad_norm": 7.8244194984436035, "learning_rate": 9.455785507280593e-06, "loss": 0.59385128, "memory(GiB)": 34.88, "step": 28425, "train_speed(iter/s)": 0.41674 }, { "acc": 0.85955372, "epoch": 0.7697722903636315, "grad_norm": 9.856919288635254, "learning_rate": 9.455531596257065e-06, "loss": 0.71105242, "memory(GiB)": 34.88, "step": 28430, "train_speed(iter/s)": 0.416743 }, { "acc": 0.84700184, "epoch": 0.769907670646847, "grad_norm": 10.281911849975586, "learning_rate": 9.455277629425512e-06, "loss": 0.77780933, "memory(GiB)": 34.88, "step": 28435, "train_speed(iter/s)": 0.416746 }, { "acc": 0.87906256, "epoch": 0.7700430509300625, "grad_norm": 19.544113159179688, "learning_rate": 9.455023606789117e-06, "loss": 0.60681682, "memory(GiB)": 34.88, "step": 28440, "train_speed(iter/s)": 0.416749 }, { "acc": 0.89063091, "epoch": 0.7701784312132781, "grad_norm": 8.022722244262695, "learning_rate": 9.45476952835106e-06, "loss": 0.55964189, "memory(GiB)": 34.88, "step": 28445, "train_speed(iter/s)": 0.416752 }, { "acc": 0.88519077, "epoch": 0.7703138114964937, "grad_norm": 13.866045951843262, "learning_rate": 9.454515394114525e-06, "loss": 0.61389532, "memory(GiB)": 34.88, "step": 28450, "train_speed(iter/s)": 0.416755 }, { "acc": 0.85551825, "epoch": 0.7704491917797092, "grad_norm": 10.711538314819336, "learning_rate": 9.454261204082694e-06, "loss": 0.8273468, "memory(GiB)": 34.88, "step": 28455, "train_speed(iter/s)": 0.416757 }, { "acc": 0.86544285, "epoch": 0.7705845720629247, "grad_norm": 6.9767231941223145, "learning_rate": 9.454006958258754e-06, "loss": 0.74595909, "memory(GiB)": 34.88, "step": 28460, "train_speed(iter/s)": 0.416761 }, { "acc": 0.8825552, "epoch": 0.7707199523461403, "grad_norm": 7.494141101837158, "learning_rate": 9.453752656645885e-06, "loss": 0.58632841, "memory(GiB)": 34.88, "step": 28465, "train_speed(iter/s)": 0.416764 }, { "acc": 0.87304926, "epoch": 0.7708553326293559, "grad_norm": 12.855415344238281, "learning_rate": 9.453498299247279e-06, "loss": 0.65165997, "memory(GiB)": 34.88, "step": 28470, "train_speed(iter/s)": 0.416767 }, { "acc": 0.85039387, "epoch": 0.7709907129125714, "grad_norm": 11.067581176757812, "learning_rate": 9.453243886066118e-06, "loss": 0.80164051, "memory(GiB)": 34.88, "step": 28475, "train_speed(iter/s)": 0.416769 }, { "acc": 0.87040167, "epoch": 0.771126093195787, "grad_norm": 11.312422752380371, "learning_rate": 9.45298941710559e-06, "loss": 0.58086443, "memory(GiB)": 34.88, "step": 28480, "train_speed(iter/s)": 0.416771 }, { "acc": 0.85710173, "epoch": 0.7712614734790025, "grad_norm": 11.371442794799805, "learning_rate": 9.452734892368883e-06, "loss": 0.73352423, "memory(GiB)": 34.88, "step": 28485, "train_speed(iter/s)": 0.416775 }, { "acc": 0.86108761, "epoch": 0.7713968537622181, "grad_norm": 9.807629585266113, "learning_rate": 9.452480311859186e-06, "loss": 0.73055677, "memory(GiB)": 34.88, "step": 28490, "train_speed(iter/s)": 0.416778 }, { "acc": 0.86680193, "epoch": 0.7715322340454336, "grad_norm": 13.210271835327148, "learning_rate": 9.452225675579686e-06, "loss": 0.71785097, "memory(GiB)": 34.88, "step": 28495, "train_speed(iter/s)": 0.41678 }, { "acc": 0.85885487, "epoch": 0.7716676143286492, "grad_norm": 19.196306228637695, "learning_rate": 9.451970983533575e-06, "loss": 0.78140063, "memory(GiB)": 34.88, "step": 28500, "train_speed(iter/s)": 0.416783 }, { "acc": 0.89557915, "epoch": 0.7718029946118647, "grad_norm": 8.320330619812012, "learning_rate": 9.451716235724045e-06, "loss": 0.48079195, "memory(GiB)": 34.88, "step": 28505, "train_speed(iter/s)": 0.416786 }, { "acc": 0.88319969, "epoch": 0.7719383748950803, "grad_norm": 5.274319648742676, "learning_rate": 9.451461432154283e-06, "loss": 0.51811743, "memory(GiB)": 34.88, "step": 28510, "train_speed(iter/s)": 0.416789 }, { "acc": 0.87386608, "epoch": 0.7720737551782958, "grad_norm": 18.64872932434082, "learning_rate": 9.45120657282748e-06, "loss": 0.70126047, "memory(GiB)": 34.88, "step": 28515, "train_speed(iter/s)": 0.416792 }, { "acc": 0.86305943, "epoch": 0.7722091354615114, "grad_norm": 8.193936347961426, "learning_rate": 9.450951657746836e-06, "loss": 0.72257476, "memory(GiB)": 34.88, "step": 28520, "train_speed(iter/s)": 0.416795 }, { "acc": 0.89380159, "epoch": 0.772344515744727, "grad_norm": 4.715251922607422, "learning_rate": 9.45069668691554e-06, "loss": 0.44587345, "memory(GiB)": 34.88, "step": 28525, "train_speed(iter/s)": 0.416798 }, { "acc": 0.86778965, "epoch": 0.7724798960279425, "grad_norm": 6.148830890655518, "learning_rate": 9.450441660336783e-06, "loss": 0.67474532, "memory(GiB)": 34.88, "step": 28530, "train_speed(iter/s)": 0.416801 }, { "acc": 0.87387714, "epoch": 0.772615276311158, "grad_norm": 12.382896423339844, "learning_rate": 9.450186578013765e-06, "loss": 0.67139235, "memory(GiB)": 34.88, "step": 28535, "train_speed(iter/s)": 0.416804 }, { "acc": 0.86489744, "epoch": 0.7727506565943736, "grad_norm": 22.39110565185547, "learning_rate": 9.449931439949677e-06, "loss": 0.61310687, "memory(GiB)": 34.88, "step": 28540, "train_speed(iter/s)": 0.416806 }, { "acc": 0.8554491, "epoch": 0.7728860368775892, "grad_norm": 4.664644241333008, "learning_rate": 9.449676246147718e-06, "loss": 0.63016272, "memory(GiB)": 34.88, "step": 28545, "train_speed(iter/s)": 0.416808 }, { "acc": 0.86731281, "epoch": 0.7730214171608047, "grad_norm": 8.8263521194458, "learning_rate": 9.449420996611084e-06, "loss": 0.71065378, "memory(GiB)": 34.88, "step": 28550, "train_speed(iter/s)": 0.416811 }, { "acc": 0.85721598, "epoch": 0.7731567974440202, "grad_norm": 5.241532325744629, "learning_rate": 9.449165691342972e-06, "loss": 0.75731425, "memory(GiB)": 34.88, "step": 28555, "train_speed(iter/s)": 0.416814 }, { "acc": 0.90209026, "epoch": 0.7732921777272358, "grad_norm": 7.610533237457275, "learning_rate": 9.448910330346582e-06, "loss": 0.47886906, "memory(GiB)": 34.88, "step": 28560, "train_speed(iter/s)": 0.416817 }, { "acc": 0.88834, "epoch": 0.7734275580104514, "grad_norm": 7.734260559082031, "learning_rate": 9.448654913625111e-06, "loss": 0.58269005, "memory(GiB)": 34.88, "step": 28565, "train_speed(iter/s)": 0.416819 }, { "acc": 0.88628654, "epoch": 0.773562938293667, "grad_norm": 8.395855903625488, "learning_rate": 9.448399441181757e-06, "loss": 0.61332951, "memory(GiB)": 34.88, "step": 28570, "train_speed(iter/s)": 0.416823 }, { "acc": 0.87680321, "epoch": 0.7736983185768824, "grad_norm": 10.163323402404785, "learning_rate": 9.448143913019725e-06, "loss": 0.61043577, "memory(GiB)": 34.88, "step": 28575, "train_speed(iter/s)": 0.416826 }, { "acc": 0.86796885, "epoch": 0.773833698860098, "grad_norm": 8.979310035705566, "learning_rate": 9.447888329142213e-06, "loss": 0.68977489, "memory(GiB)": 34.88, "step": 28580, "train_speed(iter/s)": 0.416829 }, { "acc": 0.87542477, "epoch": 0.7739690791433136, "grad_norm": 5.192804336547852, "learning_rate": 9.447632689552422e-06, "loss": 0.62624741, "memory(GiB)": 34.88, "step": 28585, "train_speed(iter/s)": 0.416832 }, { "acc": 0.86301289, "epoch": 0.7741044594265292, "grad_norm": 13.078265190124512, "learning_rate": 9.447376994253555e-06, "loss": 0.78077841, "memory(GiB)": 34.88, "step": 28590, "train_speed(iter/s)": 0.416835 }, { "acc": 0.87987309, "epoch": 0.7742398397097446, "grad_norm": 10.191322326660156, "learning_rate": 9.447121243248817e-06, "loss": 0.60131073, "memory(GiB)": 34.88, "step": 28595, "train_speed(iter/s)": 0.416838 }, { "acc": 0.88601933, "epoch": 0.7743752199929602, "grad_norm": 6.857712745666504, "learning_rate": 9.44686543654141e-06, "loss": 0.50622087, "memory(GiB)": 34.88, "step": 28600, "train_speed(iter/s)": 0.416841 }, { "acc": 0.88213377, "epoch": 0.7745106002761758, "grad_norm": 19.121522903442383, "learning_rate": 9.446609574134539e-06, "loss": 0.53994761, "memory(GiB)": 34.88, "step": 28605, "train_speed(iter/s)": 0.416844 }, { "acc": 0.87455835, "epoch": 0.7746459805593914, "grad_norm": 5.427584171295166, "learning_rate": 9.446353656031408e-06, "loss": 0.62551813, "memory(GiB)": 34.88, "step": 28610, "train_speed(iter/s)": 0.416847 }, { "acc": 0.85785494, "epoch": 0.7747813608426068, "grad_norm": 8.393656730651855, "learning_rate": 9.446097682235223e-06, "loss": 0.68391762, "memory(GiB)": 34.88, "step": 28615, "train_speed(iter/s)": 0.416849 }, { "acc": 0.86377773, "epoch": 0.7749167411258224, "grad_norm": 8.877341270446777, "learning_rate": 9.445841652749194e-06, "loss": 0.70658441, "memory(GiB)": 34.88, "step": 28620, "train_speed(iter/s)": 0.416852 }, { "acc": 0.84193964, "epoch": 0.775052121409038, "grad_norm": 16.391592025756836, "learning_rate": 9.445585567576526e-06, "loss": 0.91338196, "memory(GiB)": 34.88, "step": 28625, "train_speed(iter/s)": 0.416854 }, { "acc": 0.90041943, "epoch": 0.7751875016922536, "grad_norm": 21.917007446289062, "learning_rate": 9.445329426720423e-06, "loss": 0.5605742, "memory(GiB)": 34.88, "step": 28630, "train_speed(iter/s)": 0.416857 }, { "acc": 0.85785522, "epoch": 0.7753228819754691, "grad_norm": 8.150739669799805, "learning_rate": 9.4450732301841e-06, "loss": 0.7547184, "memory(GiB)": 34.88, "step": 28635, "train_speed(iter/s)": 0.41686 }, { "acc": 0.88223171, "epoch": 0.7754582622586846, "grad_norm": 5.446441173553467, "learning_rate": 9.444816977970765e-06, "loss": 0.55310793, "memory(GiB)": 34.88, "step": 28640, "train_speed(iter/s)": 0.416863 }, { "acc": 0.87376747, "epoch": 0.7755936425419002, "grad_norm": 13.803007125854492, "learning_rate": 9.444560670083624e-06, "loss": 0.58110132, "memory(GiB)": 34.88, "step": 28645, "train_speed(iter/s)": 0.416866 }, { "acc": 0.88494415, "epoch": 0.7757290228251158, "grad_norm": 7.481236457824707, "learning_rate": 9.444304306525892e-06, "loss": 0.60496616, "memory(GiB)": 34.88, "step": 28650, "train_speed(iter/s)": 0.416868 }, { "acc": 0.8528635, "epoch": 0.7758644031083313, "grad_norm": 11.217987060546875, "learning_rate": 9.444047887300778e-06, "loss": 0.81034765, "memory(GiB)": 34.88, "step": 28655, "train_speed(iter/s)": 0.416871 }, { "acc": 0.85948181, "epoch": 0.7759997833915468, "grad_norm": 10.205163955688477, "learning_rate": 9.443791412411497e-06, "loss": 0.75970812, "memory(GiB)": 34.88, "step": 28660, "train_speed(iter/s)": 0.416874 }, { "acc": 0.86021576, "epoch": 0.7761351636747624, "grad_norm": 24.694538116455078, "learning_rate": 9.443534881861256e-06, "loss": 0.79272323, "memory(GiB)": 34.88, "step": 28665, "train_speed(iter/s)": 0.416877 }, { "acc": 0.87104816, "epoch": 0.776270543957978, "grad_norm": 11.249883651733398, "learning_rate": 9.443278295653277e-06, "loss": 0.7065764, "memory(GiB)": 34.88, "step": 28670, "train_speed(iter/s)": 0.41688 }, { "acc": 0.86524124, "epoch": 0.7764059242411935, "grad_norm": 7.892600059509277, "learning_rate": 9.443021653790768e-06, "loss": 0.82340937, "memory(GiB)": 34.88, "step": 28675, "train_speed(iter/s)": 0.416883 }, { "acc": 0.85803709, "epoch": 0.7765413045244091, "grad_norm": 16.14909553527832, "learning_rate": 9.442764956276947e-06, "loss": 0.73400469, "memory(GiB)": 34.88, "step": 28680, "train_speed(iter/s)": 0.416886 }, { "acc": 0.88655272, "epoch": 0.7766766848076246, "grad_norm": 5.602789402008057, "learning_rate": 9.442508203115026e-06, "loss": 0.59632807, "memory(GiB)": 34.88, "step": 28685, "train_speed(iter/s)": 0.416889 }, { "acc": 0.89679489, "epoch": 0.7768120650908402, "grad_norm": 6.554233551025391, "learning_rate": 9.442251394308227e-06, "loss": 0.45508776, "memory(GiB)": 34.88, "step": 28690, "train_speed(iter/s)": 0.416892 }, { "acc": 0.85191374, "epoch": 0.7769474453740557, "grad_norm": 18.152114868164062, "learning_rate": 9.44199452985976e-06, "loss": 0.75426006, "memory(GiB)": 34.88, "step": 28695, "train_speed(iter/s)": 0.416894 }, { "acc": 0.87785454, "epoch": 0.7770828256572713, "grad_norm": 7.083265781402588, "learning_rate": 9.44173760977285e-06, "loss": 0.63552785, "memory(GiB)": 34.88, "step": 28700, "train_speed(iter/s)": 0.416897 }, { "acc": 0.86468973, "epoch": 0.7772182059404869, "grad_norm": 4.622361660003662, "learning_rate": 9.44148063405071e-06, "loss": 0.70262928, "memory(GiB)": 34.88, "step": 28705, "train_speed(iter/s)": 0.416899 }, { "acc": 0.87042484, "epoch": 0.7773535862237024, "grad_norm": 7.3083906173706055, "learning_rate": 9.44122360269656e-06, "loss": 0.68125963, "memory(GiB)": 34.88, "step": 28710, "train_speed(iter/s)": 0.416902 }, { "acc": 0.87643604, "epoch": 0.7774889665069179, "grad_norm": 9.944021224975586, "learning_rate": 9.440966515713623e-06, "loss": 0.68947554, "memory(GiB)": 34.88, "step": 28715, "train_speed(iter/s)": 0.416904 }, { "acc": 0.86965923, "epoch": 0.7776243467901335, "grad_norm": 10.499743461608887, "learning_rate": 9.440709373105115e-06, "loss": 0.67965679, "memory(GiB)": 34.88, "step": 28720, "train_speed(iter/s)": 0.416908 }, { "acc": 0.85582409, "epoch": 0.7777597270733491, "grad_norm": 5.889216423034668, "learning_rate": 9.440452174874263e-06, "loss": 0.71808014, "memory(GiB)": 34.88, "step": 28725, "train_speed(iter/s)": 0.416911 }, { "acc": 0.87587433, "epoch": 0.7778951073565646, "grad_norm": 2.9107375144958496, "learning_rate": 9.440194921024284e-06, "loss": 0.59487, "memory(GiB)": 34.88, "step": 28730, "train_speed(iter/s)": 0.416913 }, { "acc": 0.83838024, "epoch": 0.7780304876397801, "grad_norm": 9.794647216796875, "learning_rate": 9.4399376115584e-06, "loss": 0.84659319, "memory(GiB)": 34.88, "step": 28735, "train_speed(iter/s)": 0.416916 }, { "acc": 0.89415503, "epoch": 0.7781658679229957, "grad_norm": 6.116236686706543, "learning_rate": 9.43968024647984e-06, "loss": 0.50315523, "memory(GiB)": 34.88, "step": 28740, "train_speed(iter/s)": 0.416919 }, { "acc": 0.86639824, "epoch": 0.7783012482062113, "grad_norm": 11.18465805053711, "learning_rate": 9.439422825791822e-06, "loss": 0.75332284, "memory(GiB)": 34.88, "step": 28745, "train_speed(iter/s)": 0.416922 }, { "acc": 0.86311264, "epoch": 0.7784366284894269, "grad_norm": 4.977967739105225, "learning_rate": 9.439165349497573e-06, "loss": 0.81897964, "memory(GiB)": 34.88, "step": 28750, "train_speed(iter/s)": 0.416925 }, { "acc": 0.87793045, "epoch": 0.7785720087726423, "grad_norm": 5.071927547454834, "learning_rate": 9.43890781760032e-06, "loss": 0.61193118, "memory(GiB)": 34.88, "step": 28755, "train_speed(iter/s)": 0.416928 }, { "acc": 0.8538023, "epoch": 0.7787073890558579, "grad_norm": 5.0820722579956055, "learning_rate": 9.438650230103288e-06, "loss": 0.7017746, "memory(GiB)": 34.88, "step": 28760, "train_speed(iter/s)": 0.41693 }, { "acc": 0.86393738, "epoch": 0.7788427693390735, "grad_norm": 8.718672752380371, "learning_rate": 9.438392587009702e-06, "loss": 0.68925781, "memory(GiB)": 34.88, "step": 28765, "train_speed(iter/s)": 0.416933 }, { "acc": 0.89034386, "epoch": 0.7789781496222891, "grad_norm": 9.628499031066895, "learning_rate": 9.438134888322791e-06, "loss": 0.58081436, "memory(GiB)": 34.88, "step": 28770, "train_speed(iter/s)": 0.416936 }, { "acc": 0.88391838, "epoch": 0.7791135299055045, "grad_norm": 8.816335678100586, "learning_rate": 9.437877134045783e-06, "loss": 0.54070039, "memory(GiB)": 34.88, "step": 28775, "train_speed(iter/s)": 0.416938 }, { "acc": 0.87814198, "epoch": 0.7792489101887201, "grad_norm": 17.008188247680664, "learning_rate": 9.437619324181907e-06, "loss": 0.5737689, "memory(GiB)": 34.88, "step": 28780, "train_speed(iter/s)": 0.416941 }, { "acc": 0.87279902, "epoch": 0.7793842904719357, "grad_norm": 20.341278076171875, "learning_rate": 9.437361458734393e-06, "loss": 0.62811198, "memory(GiB)": 34.88, "step": 28785, "train_speed(iter/s)": 0.416944 }, { "acc": 0.85695286, "epoch": 0.7795196707551513, "grad_norm": 8.34086799621582, "learning_rate": 9.43710353770647e-06, "loss": 0.77048635, "memory(GiB)": 34.88, "step": 28790, "train_speed(iter/s)": 0.416946 }, { "acc": 0.87911253, "epoch": 0.7796550510383667, "grad_norm": 10.080582618713379, "learning_rate": 9.436845561101372e-06, "loss": 0.57748122, "memory(GiB)": 34.88, "step": 28795, "train_speed(iter/s)": 0.41695 }, { "acc": 0.88139915, "epoch": 0.7797904313215823, "grad_norm": 15.772272109985352, "learning_rate": 9.436587528922327e-06, "loss": 0.70610323, "memory(GiB)": 34.88, "step": 28800, "train_speed(iter/s)": 0.416953 }, { "acc": 0.86896505, "epoch": 0.7799258116047979, "grad_norm": 9.881059646606445, "learning_rate": 9.43632944117257e-06, "loss": 0.68350534, "memory(GiB)": 34.88, "step": 28805, "train_speed(iter/s)": 0.416956 }, { "acc": 0.84701672, "epoch": 0.7800611918880135, "grad_norm": 25.35323715209961, "learning_rate": 9.436071297855331e-06, "loss": 0.79391537, "memory(GiB)": 34.88, "step": 28810, "train_speed(iter/s)": 0.416959 }, { "acc": 0.87564106, "epoch": 0.780196572171229, "grad_norm": 4.954032897949219, "learning_rate": 9.435813098973846e-06, "loss": 0.65528936, "memory(GiB)": 34.88, "step": 28815, "train_speed(iter/s)": 0.416962 }, { "acc": 0.87560539, "epoch": 0.7803319524544445, "grad_norm": 10.746912002563477, "learning_rate": 9.435554844531348e-06, "loss": 0.64680634, "memory(GiB)": 34.88, "step": 28820, "train_speed(iter/s)": 0.416965 }, { "acc": 0.86969547, "epoch": 0.7804673327376601, "grad_norm": 13.968381881713867, "learning_rate": 9.435296534531077e-06, "loss": 0.74678478, "memory(GiB)": 34.88, "step": 28825, "train_speed(iter/s)": 0.416968 }, { "acc": 0.88099327, "epoch": 0.7806027130208757, "grad_norm": 10.854658126831055, "learning_rate": 9.43503816897626e-06, "loss": 0.60731993, "memory(GiB)": 34.88, "step": 28830, "train_speed(iter/s)": 0.416971 }, { "acc": 0.88223686, "epoch": 0.7807380933040912, "grad_norm": 7.5644307136535645, "learning_rate": 9.434779747870142e-06, "loss": 0.55696373, "memory(GiB)": 34.88, "step": 28835, "train_speed(iter/s)": 0.416973 }, { "acc": 0.87805786, "epoch": 0.7808734735873067, "grad_norm": 10.427799224853516, "learning_rate": 9.434521271215957e-06, "loss": 0.66230626, "memory(GiB)": 34.88, "step": 28840, "train_speed(iter/s)": 0.416977 }, { "acc": 0.88006964, "epoch": 0.7810088538705223, "grad_norm": 17.990087509155273, "learning_rate": 9.43426273901694e-06, "loss": 0.65454464, "memory(GiB)": 34.88, "step": 28845, "train_speed(iter/s)": 0.416979 }, { "acc": 0.8630167, "epoch": 0.7811442341537379, "grad_norm": 6.639340400695801, "learning_rate": 9.434004151276333e-06, "loss": 0.62688065, "memory(GiB)": 34.88, "step": 28850, "train_speed(iter/s)": 0.416982 }, { "acc": 0.89160614, "epoch": 0.7812796144369534, "grad_norm": 6.345198631286621, "learning_rate": 9.433745507997377e-06, "loss": 0.47117968, "memory(GiB)": 34.88, "step": 28855, "train_speed(iter/s)": 0.416985 }, { "acc": 0.87594852, "epoch": 0.781414994720169, "grad_norm": 8.561182975769043, "learning_rate": 9.43348680918331e-06, "loss": 0.60813637, "memory(GiB)": 34.88, "step": 28860, "train_speed(iter/s)": 0.416988 }, { "acc": 0.83161306, "epoch": 0.7815503750033845, "grad_norm": 11.785304069519043, "learning_rate": 9.43322805483737e-06, "loss": 0.85269804, "memory(GiB)": 34.88, "step": 28865, "train_speed(iter/s)": 0.41699 }, { "acc": 0.85820789, "epoch": 0.7816857552866001, "grad_norm": 10.116095542907715, "learning_rate": 9.432969244962802e-06, "loss": 0.76101198, "memory(GiB)": 34.88, "step": 28870, "train_speed(iter/s)": 0.416994 }, { "acc": 0.86145802, "epoch": 0.7818211355698156, "grad_norm": 7.123450756072998, "learning_rate": 9.432710379562846e-06, "loss": 0.74336238, "memory(GiB)": 34.88, "step": 28875, "train_speed(iter/s)": 0.416997 }, { "acc": 0.88808136, "epoch": 0.7819565158530312, "grad_norm": 8.34375286102295, "learning_rate": 9.432451458640747e-06, "loss": 0.56393356, "memory(GiB)": 34.88, "step": 28880, "train_speed(iter/s)": 0.416999 }, { "acc": 0.89015942, "epoch": 0.7820918961362467, "grad_norm": 8.87049674987793, "learning_rate": 9.432192482199745e-06, "loss": 0.60429096, "memory(GiB)": 34.88, "step": 28885, "train_speed(iter/s)": 0.417002 }, { "acc": 0.87902002, "epoch": 0.7822272764194622, "grad_norm": 7.899331092834473, "learning_rate": 9.431933450243088e-06, "loss": 0.57747774, "memory(GiB)": 34.88, "step": 28890, "train_speed(iter/s)": 0.417005 }, { "acc": 0.86334019, "epoch": 0.7823626567026778, "grad_norm": 9.025288581848145, "learning_rate": 9.43167436277402e-06, "loss": 0.7353363, "memory(GiB)": 34.88, "step": 28895, "train_speed(iter/s)": 0.417008 }, { "acc": 0.86428471, "epoch": 0.7824980369858934, "grad_norm": 6.75211763381958, "learning_rate": 9.431415219795785e-06, "loss": 0.72649164, "memory(GiB)": 34.88, "step": 28900, "train_speed(iter/s)": 0.417011 }, { "acc": 0.86409016, "epoch": 0.782633417269109, "grad_norm": 15.257259368896484, "learning_rate": 9.43115602131163e-06, "loss": 0.65769134, "memory(GiB)": 34.88, "step": 28905, "train_speed(iter/s)": 0.417014 }, { "acc": 0.88820057, "epoch": 0.7827687975523244, "grad_norm": 8.947125434875488, "learning_rate": 9.430896767324803e-06, "loss": 0.49608159, "memory(GiB)": 34.88, "step": 28910, "train_speed(iter/s)": 0.417017 }, { "acc": 0.87926064, "epoch": 0.78290417783554, "grad_norm": 7.485633850097656, "learning_rate": 9.430637457838549e-06, "loss": 0.58311806, "memory(GiB)": 34.88, "step": 28915, "train_speed(iter/s)": 0.41702 }, { "acc": 0.88722925, "epoch": 0.7830395581187556, "grad_norm": 6.658151626586914, "learning_rate": 9.43037809285612e-06, "loss": 0.59633927, "memory(GiB)": 34.88, "step": 28920, "train_speed(iter/s)": 0.417023 }, { "acc": 0.88895826, "epoch": 0.7831749384019712, "grad_norm": 7.483544826507568, "learning_rate": 9.430118672380761e-06, "loss": 0.56263208, "memory(GiB)": 34.88, "step": 28925, "train_speed(iter/s)": 0.417025 }, { "acc": 0.86582394, "epoch": 0.7833103186851866, "grad_norm": 10.455859184265137, "learning_rate": 9.429859196415728e-06, "loss": 0.74688816, "memory(GiB)": 34.88, "step": 28930, "train_speed(iter/s)": 0.417028 }, { "acc": 0.86993275, "epoch": 0.7834456989684022, "grad_norm": 10.030191421508789, "learning_rate": 9.429599664964265e-06, "loss": 0.66727366, "memory(GiB)": 34.88, "step": 28935, "train_speed(iter/s)": 0.417031 }, { "acc": 0.88062153, "epoch": 0.7835810792516178, "grad_norm": 6.4548468589782715, "learning_rate": 9.429340078029623e-06, "loss": 0.55448518, "memory(GiB)": 34.88, "step": 28940, "train_speed(iter/s)": 0.417034 }, { "acc": 0.84504347, "epoch": 0.7837164595348334, "grad_norm": 13.763654708862305, "learning_rate": 9.429080435615058e-06, "loss": 0.75797262, "memory(GiB)": 34.88, "step": 28945, "train_speed(iter/s)": 0.417037 }, { "acc": 0.88303127, "epoch": 0.7838518398180488, "grad_norm": 32.03778839111328, "learning_rate": 9.428820737723823e-06, "loss": 0.60900354, "memory(GiB)": 34.88, "step": 28950, "train_speed(iter/s)": 0.41704 }, { "acc": 0.87499809, "epoch": 0.7839872201012644, "grad_norm": 9.619741439819336, "learning_rate": 9.428560984359167e-06, "loss": 0.68420367, "memory(GiB)": 34.88, "step": 28955, "train_speed(iter/s)": 0.417044 }, { "acc": 0.87222309, "epoch": 0.78412260038448, "grad_norm": 9.917402267456055, "learning_rate": 9.428301175524348e-06, "loss": 0.60711298, "memory(GiB)": 34.88, "step": 28960, "train_speed(iter/s)": 0.417047 }, { "acc": 0.87488537, "epoch": 0.7842579806676956, "grad_norm": 2.2958528995513916, "learning_rate": 9.428041311222616e-06, "loss": 0.67726183, "memory(GiB)": 34.88, "step": 28965, "train_speed(iter/s)": 0.417048 }, { "acc": 0.88295097, "epoch": 0.7843933609509111, "grad_norm": 5.887876510620117, "learning_rate": 9.427781391457231e-06, "loss": 0.57849555, "memory(GiB)": 34.88, "step": 28970, "train_speed(iter/s)": 0.417051 }, { "acc": 0.88978291, "epoch": 0.7845287412341266, "grad_norm": 6.4981369972229, "learning_rate": 9.427521416231446e-06, "loss": 0.59223757, "memory(GiB)": 34.88, "step": 28975, "train_speed(iter/s)": 0.417054 }, { "acc": 0.87349396, "epoch": 0.7846641215173422, "grad_norm": 9.1201810836792, "learning_rate": 9.42726138554852e-06, "loss": 0.64003806, "memory(GiB)": 34.88, "step": 28980, "train_speed(iter/s)": 0.417056 }, { "acc": 0.88482647, "epoch": 0.7847995018005578, "grad_norm": 19.096416473388672, "learning_rate": 9.427001299411707e-06, "loss": 0.54248457, "memory(GiB)": 34.88, "step": 28985, "train_speed(iter/s)": 0.417059 }, { "acc": 0.89452105, "epoch": 0.7849348820837733, "grad_norm": 12.770702362060547, "learning_rate": 9.426741157824269e-06, "loss": 0.55219145, "memory(GiB)": 34.88, "step": 28990, "train_speed(iter/s)": 0.417062 }, { "acc": 0.8728363, "epoch": 0.7850702623669888, "grad_norm": 22.783363342285156, "learning_rate": 9.42648096078946e-06, "loss": 0.64297843, "memory(GiB)": 34.88, "step": 28995, "train_speed(iter/s)": 0.417066 }, { "acc": 0.87584648, "epoch": 0.7852056426502044, "grad_norm": 11.170867919921875, "learning_rate": 9.426220708310546e-06, "loss": 0.69098325, "memory(GiB)": 34.88, "step": 29000, "train_speed(iter/s)": 0.417068 }, { "acc": 0.86141872, "epoch": 0.78534102293342, "grad_norm": 16.509248733520508, "learning_rate": 9.425960400390783e-06, "loss": 0.79405184, "memory(GiB)": 34.88, "step": 29005, "train_speed(iter/s)": 0.417071 }, { "acc": 0.87891397, "epoch": 0.7854764032166355, "grad_norm": 6.531442165374756, "learning_rate": 9.425700037033431e-06, "loss": 0.66268082, "memory(GiB)": 34.88, "step": 29010, "train_speed(iter/s)": 0.417073 }, { "acc": 0.86204987, "epoch": 0.7856117834998511, "grad_norm": 8.10974407196045, "learning_rate": 9.425439618241753e-06, "loss": 0.69090929, "memory(GiB)": 34.88, "step": 29015, "train_speed(iter/s)": 0.417076 }, { "acc": 0.88459053, "epoch": 0.7857471637830666, "grad_norm": 7.017132759094238, "learning_rate": 9.425179144019015e-06, "loss": 0.5397037, "memory(GiB)": 34.88, "step": 29020, "train_speed(iter/s)": 0.417079 }, { "acc": 0.85702372, "epoch": 0.7858825440662822, "grad_norm": 11.044381141662598, "learning_rate": 9.424918614368471e-06, "loss": 0.68674917, "memory(GiB)": 34.88, "step": 29025, "train_speed(iter/s)": 0.417081 }, { "acc": 0.88507652, "epoch": 0.7860179243494977, "grad_norm": 11.130006790161133, "learning_rate": 9.424658029293393e-06, "loss": 0.57955179, "memory(GiB)": 34.88, "step": 29030, "train_speed(iter/s)": 0.417084 }, { "acc": 0.88752747, "epoch": 0.7861533046327133, "grad_norm": 6.542020797729492, "learning_rate": 9.424397388797041e-06, "loss": 0.58301039, "memory(GiB)": 34.88, "step": 29035, "train_speed(iter/s)": 0.417087 }, { "acc": 0.8720768, "epoch": 0.7862886849159288, "grad_norm": 7.465973377227783, "learning_rate": 9.424136692882682e-06, "loss": 0.68646326, "memory(GiB)": 34.88, "step": 29040, "train_speed(iter/s)": 0.41709 }, { "acc": 0.84297028, "epoch": 0.7864240651991444, "grad_norm": 12.81242847442627, "learning_rate": 9.42387594155358e-06, "loss": 0.82942286, "memory(GiB)": 34.88, "step": 29045, "train_speed(iter/s)": 0.417093 }, { "acc": 0.87356768, "epoch": 0.7865594454823599, "grad_norm": 5.036779403686523, "learning_rate": 9.423615134813003e-06, "loss": 0.68969579, "memory(GiB)": 34.88, "step": 29050, "train_speed(iter/s)": 0.417095 }, { "acc": 0.88094759, "epoch": 0.7866948257655755, "grad_norm": 8.715302467346191, "learning_rate": 9.423354272664217e-06, "loss": 0.56209583, "memory(GiB)": 34.88, "step": 29055, "train_speed(iter/s)": 0.417098 }, { "acc": 0.87933273, "epoch": 0.7868302060487911, "grad_norm": 7.830261707305908, "learning_rate": 9.423093355110493e-06, "loss": 0.55773001, "memory(GiB)": 34.88, "step": 29060, "train_speed(iter/s)": 0.417101 }, { "acc": 0.87305012, "epoch": 0.7869655863320066, "grad_norm": 6.004371166229248, "learning_rate": 9.422832382155096e-06, "loss": 0.62466049, "memory(GiB)": 34.88, "step": 29065, "train_speed(iter/s)": 0.417104 }, { "acc": 0.86392975, "epoch": 0.7871009666152221, "grad_norm": 8.183770179748535, "learning_rate": 9.422571353801292e-06, "loss": 0.68536444, "memory(GiB)": 34.88, "step": 29070, "train_speed(iter/s)": 0.417107 }, { "acc": 0.87439384, "epoch": 0.7872363468984377, "grad_norm": 13.861506462097168, "learning_rate": 9.422310270052359e-06, "loss": 0.6224947, "memory(GiB)": 34.88, "step": 29075, "train_speed(iter/s)": 0.41711 }, { "acc": 0.89585323, "epoch": 0.7873717271816533, "grad_norm": 10.159991264343262, "learning_rate": 9.422049130911562e-06, "loss": 0.5499299, "memory(GiB)": 34.88, "step": 29080, "train_speed(iter/s)": 0.417112 }, { "acc": 0.85337296, "epoch": 0.7875071074648688, "grad_norm": 9.623200416564941, "learning_rate": 9.421787936382173e-06, "loss": 0.74291334, "memory(GiB)": 34.88, "step": 29085, "train_speed(iter/s)": 0.417115 }, { "acc": 0.88125305, "epoch": 0.7876424877480843, "grad_norm": 8.847297668457031, "learning_rate": 9.421526686467466e-06, "loss": 0.65652909, "memory(GiB)": 34.88, "step": 29090, "train_speed(iter/s)": 0.417118 }, { "acc": 0.85782213, "epoch": 0.7877778680312999, "grad_norm": 6.427328586578369, "learning_rate": 9.421265381170713e-06, "loss": 0.66734071, "memory(GiB)": 34.88, "step": 29095, "train_speed(iter/s)": 0.41712 }, { "acc": 0.86570396, "epoch": 0.7879132483145155, "grad_norm": 5.8756818771362305, "learning_rate": 9.421004020495186e-06, "loss": 0.6848278, "memory(GiB)": 34.88, "step": 29100, "train_speed(iter/s)": 0.417122 }, { "acc": 0.87921848, "epoch": 0.7880486285977311, "grad_norm": 11.289650917053223, "learning_rate": 9.420742604444159e-06, "loss": 0.64992852, "memory(GiB)": 34.88, "step": 29105, "train_speed(iter/s)": 0.417126 }, { "acc": 0.87952557, "epoch": 0.7881840088809465, "grad_norm": 10.417184829711914, "learning_rate": 9.42048113302091e-06, "loss": 0.67427006, "memory(GiB)": 34.88, "step": 29110, "train_speed(iter/s)": 0.417129 }, { "acc": 0.88008375, "epoch": 0.7883193891641621, "grad_norm": 10.655633926391602, "learning_rate": 9.420219606228711e-06, "loss": 0.65885353, "memory(GiB)": 34.88, "step": 29115, "train_speed(iter/s)": 0.417132 }, { "acc": 0.89174671, "epoch": 0.7884547694473777, "grad_norm": 7.403168678283691, "learning_rate": 9.419958024070838e-06, "loss": 0.53714952, "memory(GiB)": 34.88, "step": 29120, "train_speed(iter/s)": 0.417134 }, { "acc": 0.89835129, "epoch": 0.7885901497305933, "grad_norm": 6.459493637084961, "learning_rate": 9.419696386550571e-06, "loss": 0.48932376, "memory(GiB)": 34.88, "step": 29125, "train_speed(iter/s)": 0.417137 }, { "acc": 0.91078663, "epoch": 0.7887255300138087, "grad_norm": 6.670159816741943, "learning_rate": 9.419434693671184e-06, "loss": 0.43109593, "memory(GiB)": 34.88, "step": 29130, "train_speed(iter/s)": 0.41714 }, { "acc": 0.84602299, "epoch": 0.7888609102970243, "grad_norm": 7.501917362213135, "learning_rate": 9.419172945435958e-06, "loss": 0.77476492, "memory(GiB)": 34.88, "step": 29135, "train_speed(iter/s)": 0.417143 }, { "acc": 0.86467094, "epoch": 0.7889962905802399, "grad_norm": 15.139792442321777, "learning_rate": 9.41891114184817e-06, "loss": 0.71356831, "memory(GiB)": 34.88, "step": 29140, "train_speed(iter/s)": 0.417146 }, { "acc": 0.88416748, "epoch": 0.7891316708634555, "grad_norm": 17.50786781311035, "learning_rate": 9.418649282911099e-06, "loss": 0.58480716, "memory(GiB)": 34.88, "step": 29145, "train_speed(iter/s)": 0.417149 }, { "acc": 0.86946821, "epoch": 0.789267051146671, "grad_norm": 9.030757904052734, "learning_rate": 9.418387368628028e-06, "loss": 0.78143706, "memory(GiB)": 34.88, "step": 29150, "train_speed(iter/s)": 0.417151 }, { "acc": 0.86977911, "epoch": 0.7894024314298865, "grad_norm": 16.501737594604492, "learning_rate": 9.418125399002236e-06, "loss": 0.69615779, "memory(GiB)": 34.88, "step": 29155, "train_speed(iter/s)": 0.417154 }, { "acc": 0.90731297, "epoch": 0.7895378117131021, "grad_norm": 14.145899772644043, "learning_rate": 9.417863374037006e-06, "loss": 0.49505515, "memory(GiB)": 34.88, "step": 29160, "train_speed(iter/s)": 0.417157 }, { "acc": 0.84933357, "epoch": 0.7896731919963177, "grad_norm": 15.449835777282715, "learning_rate": 9.417601293735619e-06, "loss": 0.71357565, "memory(GiB)": 34.88, "step": 29165, "train_speed(iter/s)": 0.417159 }, { "acc": 0.86202736, "epoch": 0.7898085722795332, "grad_norm": 13.476065635681152, "learning_rate": 9.41733915810136e-06, "loss": 0.69966516, "memory(GiB)": 34.88, "step": 29170, "train_speed(iter/s)": 0.417161 }, { "acc": 0.87686558, "epoch": 0.7899439525627487, "grad_norm": 7.422229766845703, "learning_rate": 9.41707696713751e-06, "loss": 0.57996011, "memory(GiB)": 34.88, "step": 29175, "train_speed(iter/s)": 0.417165 }, { "acc": 0.85328426, "epoch": 0.7900793328459643, "grad_norm": 12.941145896911621, "learning_rate": 9.416814720847357e-06, "loss": 0.73190575, "memory(GiB)": 34.88, "step": 29180, "train_speed(iter/s)": 0.417166 }, { "acc": 0.88647575, "epoch": 0.7902147131291799, "grad_norm": 9.96083927154541, "learning_rate": 9.416552419234181e-06, "loss": 0.59152536, "memory(GiB)": 34.88, "step": 29185, "train_speed(iter/s)": 0.417169 }, { "acc": 0.88194857, "epoch": 0.7903500934123954, "grad_norm": 9.360910415649414, "learning_rate": 9.416290062301272e-06, "loss": 0.58053093, "memory(GiB)": 34.88, "step": 29190, "train_speed(iter/s)": 0.417172 }, { "acc": 0.88884134, "epoch": 0.790485473695611, "grad_norm": 5.133007049560547, "learning_rate": 9.416027650051917e-06, "loss": 0.60058804, "memory(GiB)": 34.88, "step": 29195, "train_speed(iter/s)": 0.417175 }, { "acc": 0.8800065, "epoch": 0.7906208539788265, "grad_norm": 6.741491317749023, "learning_rate": 9.4157651824894e-06, "loss": 0.61222305, "memory(GiB)": 34.88, "step": 29200, "train_speed(iter/s)": 0.417178 }, { "acc": 0.8570097, "epoch": 0.7907562342620421, "grad_norm": 10.437769889831543, "learning_rate": 9.415502659617011e-06, "loss": 0.68869286, "memory(GiB)": 34.88, "step": 29205, "train_speed(iter/s)": 0.41718 }, { "acc": 0.88335609, "epoch": 0.7908916145452576, "grad_norm": 10.1034574508667, "learning_rate": 9.415240081438039e-06, "loss": 0.62368093, "memory(GiB)": 34.88, "step": 29210, "train_speed(iter/s)": 0.417184 }, { "acc": 0.88495722, "epoch": 0.7910269948284732, "grad_norm": 2.6610300540924072, "learning_rate": 9.41497744795577e-06, "loss": 0.63178797, "memory(GiB)": 34.88, "step": 29215, "train_speed(iter/s)": 0.417186 }, { "acc": 0.88066397, "epoch": 0.7911623751116887, "grad_norm": 22.820140838623047, "learning_rate": 9.414714759173499e-06, "loss": 0.59226055, "memory(GiB)": 34.88, "step": 29220, "train_speed(iter/s)": 0.417189 }, { "acc": 0.87849379, "epoch": 0.7912977553949043, "grad_norm": 15.690184593200684, "learning_rate": 9.414452015094515e-06, "loss": 0.6790637, "memory(GiB)": 34.88, "step": 29225, "train_speed(iter/s)": 0.417193 }, { "acc": 0.85599852, "epoch": 0.7914331356781198, "grad_norm": 13.2592134475708, "learning_rate": 9.414189215722107e-06, "loss": 0.7510355, "memory(GiB)": 34.88, "step": 29230, "train_speed(iter/s)": 0.417195 }, { "acc": 0.86261101, "epoch": 0.7915685159613354, "grad_norm": 8.665544509887695, "learning_rate": 9.413926361059568e-06, "loss": 0.68907175, "memory(GiB)": 34.88, "step": 29235, "train_speed(iter/s)": 0.417198 }, { "acc": 0.8767766, "epoch": 0.791703896244551, "grad_norm": 10.441941261291504, "learning_rate": 9.413663451110193e-06, "loss": 0.67225409, "memory(GiB)": 34.88, "step": 29240, "train_speed(iter/s)": 0.417201 }, { "acc": 0.87240467, "epoch": 0.7918392765277665, "grad_norm": 17.536962509155273, "learning_rate": 9.413400485877272e-06, "loss": 0.68179736, "memory(GiB)": 34.88, "step": 29245, "train_speed(iter/s)": 0.417204 }, { "acc": 0.84904385, "epoch": 0.791974656810982, "grad_norm": 8.876773834228516, "learning_rate": 9.413137465364105e-06, "loss": 0.8120945, "memory(GiB)": 34.88, "step": 29250, "train_speed(iter/s)": 0.417207 }, { "acc": 0.86623831, "epoch": 0.7921100370941976, "grad_norm": 8.671602249145508, "learning_rate": 9.41287438957398e-06, "loss": 0.66411262, "memory(GiB)": 34.88, "step": 29255, "train_speed(iter/s)": 0.41721 }, { "acc": 0.90218239, "epoch": 0.7922454173774132, "grad_norm": 5.090657711029053, "learning_rate": 9.412611258510195e-06, "loss": 0.44148202, "memory(GiB)": 34.88, "step": 29260, "train_speed(iter/s)": 0.417213 }, { "acc": 0.87038603, "epoch": 0.7923807976606287, "grad_norm": 10.211528778076172, "learning_rate": 9.412348072176048e-06, "loss": 0.61351705, "memory(GiB)": 34.88, "step": 29265, "train_speed(iter/s)": 0.417215 }, { "acc": 0.86766272, "epoch": 0.7925161779438442, "grad_norm": 6.869706153869629, "learning_rate": 9.412084830574835e-06, "loss": 0.64013925, "memory(GiB)": 34.88, "step": 29270, "train_speed(iter/s)": 0.417218 }, { "acc": 0.87339382, "epoch": 0.7926515582270598, "grad_norm": 6.708427906036377, "learning_rate": 9.411821533709852e-06, "loss": 0.69644184, "memory(GiB)": 34.88, "step": 29275, "train_speed(iter/s)": 0.417221 }, { "acc": 0.85279446, "epoch": 0.7927869385102754, "grad_norm": 8.286134719848633, "learning_rate": 9.411558181584399e-06, "loss": 0.77133183, "memory(GiB)": 34.88, "step": 29280, "train_speed(iter/s)": 0.417224 }, { "acc": 0.86405592, "epoch": 0.792922318793491, "grad_norm": 44.74105453491211, "learning_rate": 9.411294774201774e-06, "loss": 0.66850438, "memory(GiB)": 34.88, "step": 29285, "train_speed(iter/s)": 0.417227 }, { "acc": 0.85670052, "epoch": 0.7930576990767064, "grad_norm": 12.876065254211426, "learning_rate": 9.411031311565278e-06, "loss": 0.75206194, "memory(GiB)": 34.88, "step": 29290, "train_speed(iter/s)": 0.417228 }, { "acc": 0.88628473, "epoch": 0.793193079359922, "grad_norm": 7.483942985534668, "learning_rate": 9.41076779367821e-06, "loss": 0.55364475, "memory(GiB)": 34.88, "step": 29295, "train_speed(iter/s)": 0.417231 }, { "acc": 0.8715168, "epoch": 0.7933284596431376, "grad_norm": 9.433233261108398, "learning_rate": 9.410504220543873e-06, "loss": 0.64037547, "memory(GiB)": 34.88, "step": 29300, "train_speed(iter/s)": 0.417235 }, { "acc": 0.89753189, "epoch": 0.7934638399263532, "grad_norm": 6.908406734466553, "learning_rate": 9.410240592165566e-06, "loss": 0.54618549, "memory(GiB)": 34.88, "step": 29305, "train_speed(iter/s)": 0.417238 }, { "acc": 0.85869665, "epoch": 0.7935992202095686, "grad_norm": 15.610448837280273, "learning_rate": 9.409976908546593e-06, "loss": 0.7233315, "memory(GiB)": 34.88, "step": 29310, "train_speed(iter/s)": 0.41724 }, { "acc": 0.87546387, "epoch": 0.7937346004927842, "grad_norm": 6.375506401062012, "learning_rate": 9.409713169690257e-06, "loss": 0.63127985, "memory(GiB)": 34.88, "step": 29315, "train_speed(iter/s)": 0.417243 }, { "acc": 0.86455517, "epoch": 0.7938699807759998, "grad_norm": 6.645719528198242, "learning_rate": 9.409449375599865e-06, "loss": 0.73270588, "memory(GiB)": 34.88, "step": 29320, "train_speed(iter/s)": 0.417246 }, { "acc": 0.86696262, "epoch": 0.7940053610592154, "grad_norm": 16.185733795166016, "learning_rate": 9.409185526278714e-06, "loss": 0.75591927, "memory(GiB)": 34.88, "step": 29325, "train_speed(iter/s)": 0.417249 }, { "acc": 0.8730814, "epoch": 0.7941407413424308, "grad_norm": 8.352801322937012, "learning_rate": 9.408921621730118e-06, "loss": 0.65957727, "memory(GiB)": 34.88, "step": 29330, "train_speed(iter/s)": 0.417252 }, { "acc": 0.88175211, "epoch": 0.7942761216256464, "grad_norm": 36.49774169921875, "learning_rate": 9.408657661957376e-06, "loss": 0.50072975, "memory(GiB)": 34.88, "step": 29335, "train_speed(iter/s)": 0.417255 }, { "acc": 0.86625128, "epoch": 0.794411501908862, "grad_norm": 18.09741973876953, "learning_rate": 9.408393646963798e-06, "loss": 0.68304029, "memory(GiB)": 34.88, "step": 29340, "train_speed(iter/s)": 0.417257 }, { "acc": 0.85278301, "epoch": 0.7945468821920776, "grad_norm": 11.200887680053711, "learning_rate": 9.40812957675269e-06, "loss": 0.76919498, "memory(GiB)": 34.88, "step": 29345, "train_speed(iter/s)": 0.417259 }, { "acc": 0.86558342, "epoch": 0.794682262475293, "grad_norm": 7.471277713775635, "learning_rate": 9.40786545132736e-06, "loss": 0.6426199, "memory(GiB)": 34.88, "step": 29350, "train_speed(iter/s)": 0.417263 }, { "acc": 0.87065582, "epoch": 0.7948176427585086, "grad_norm": 8.063745498657227, "learning_rate": 9.407601270691117e-06, "loss": 0.64388838, "memory(GiB)": 34.88, "step": 29355, "train_speed(iter/s)": 0.417266 }, { "acc": 0.86533222, "epoch": 0.7949530230417242, "grad_norm": 11.291234970092773, "learning_rate": 9.407337034847274e-06, "loss": 0.77237906, "memory(GiB)": 34.88, "step": 29360, "train_speed(iter/s)": 0.417269 }, { "acc": 0.88570719, "epoch": 0.7950884033249398, "grad_norm": 12.557601928710938, "learning_rate": 9.407072743799137e-06, "loss": 0.53666258, "memory(GiB)": 34.88, "step": 29365, "train_speed(iter/s)": 0.417271 }, { "acc": 0.86563921, "epoch": 0.7952237836081553, "grad_norm": 13.302142143249512, "learning_rate": 9.406808397550014e-06, "loss": 0.71361103, "memory(GiB)": 34.88, "step": 29370, "train_speed(iter/s)": 0.417274 }, { "acc": 0.85816708, "epoch": 0.7953591638913708, "grad_norm": 21.33910369873047, "learning_rate": 9.406543996103222e-06, "loss": 0.69405584, "memory(GiB)": 34.88, "step": 29375, "train_speed(iter/s)": 0.417277 }, { "acc": 0.86234074, "epoch": 0.7954945441745864, "grad_norm": 7.3912248611450195, "learning_rate": 9.406279539462071e-06, "loss": 0.70634851, "memory(GiB)": 34.88, "step": 29380, "train_speed(iter/s)": 0.41728 }, { "acc": 0.85264921, "epoch": 0.795629924457802, "grad_norm": 15.833710670471191, "learning_rate": 9.406015027629876e-06, "loss": 0.79877696, "memory(GiB)": 34.88, "step": 29385, "train_speed(iter/s)": 0.417283 }, { "acc": 0.85579376, "epoch": 0.7957653047410175, "grad_norm": 15.081368446350098, "learning_rate": 9.405750460609946e-06, "loss": 0.76736231, "memory(GiB)": 34.88, "step": 29390, "train_speed(iter/s)": 0.417285 }, { "acc": 0.86120872, "epoch": 0.7959006850242331, "grad_norm": 8.562430381774902, "learning_rate": 9.405485838405599e-06, "loss": 0.78383579, "memory(GiB)": 34.88, "step": 29395, "train_speed(iter/s)": 0.417288 }, { "acc": 0.84127274, "epoch": 0.7960360653074486, "grad_norm": 10.921366691589355, "learning_rate": 9.405221161020147e-06, "loss": 0.92089787, "memory(GiB)": 34.88, "step": 29400, "train_speed(iter/s)": 0.417291 }, { "acc": 0.85201473, "epoch": 0.7961714455906642, "grad_norm": 14.956668853759766, "learning_rate": 9.404956428456907e-06, "loss": 0.74507694, "memory(GiB)": 34.88, "step": 29405, "train_speed(iter/s)": 0.417293 }, { "acc": 0.88659344, "epoch": 0.7963068258738797, "grad_norm": 9.270655632019043, "learning_rate": 9.404691640719196e-06, "loss": 0.52877192, "memory(GiB)": 34.88, "step": 29410, "train_speed(iter/s)": 0.417296 }, { "acc": 0.9000803, "epoch": 0.7964422061570953, "grad_norm": 8.261153221130371, "learning_rate": 9.404426797810332e-06, "loss": 0.44607625, "memory(GiB)": 34.88, "step": 29415, "train_speed(iter/s)": 0.417299 }, { "acc": 0.86749458, "epoch": 0.7965775864403108, "grad_norm": 17.093542098999023, "learning_rate": 9.40416189973363e-06, "loss": 0.75560312, "memory(GiB)": 34.88, "step": 29420, "train_speed(iter/s)": 0.417302 }, { "acc": 0.87833233, "epoch": 0.7967129667235264, "grad_norm": 9.269384384155273, "learning_rate": 9.40389694649241e-06, "loss": 0.61381402, "memory(GiB)": 34.88, "step": 29425, "train_speed(iter/s)": 0.417304 }, { "acc": 0.84645882, "epoch": 0.7968483470067419, "grad_norm": 14.57458782196045, "learning_rate": 9.403631938089991e-06, "loss": 0.77455006, "memory(GiB)": 34.88, "step": 29430, "train_speed(iter/s)": 0.417308 }, { "acc": 0.88563099, "epoch": 0.7969837272899575, "grad_norm": 6.515408992767334, "learning_rate": 9.40336687452969e-06, "loss": 0.65025773, "memory(GiB)": 34.88, "step": 29435, "train_speed(iter/s)": 0.417311 }, { "acc": 0.87503719, "epoch": 0.7971191075731731, "grad_norm": 11.005475997924805, "learning_rate": 9.403101755814833e-06, "loss": 0.6493474, "memory(GiB)": 34.88, "step": 29440, "train_speed(iter/s)": 0.417313 }, { "acc": 0.86762228, "epoch": 0.7972544878563886, "grad_norm": 8.745978355407715, "learning_rate": 9.402836581948736e-06, "loss": 0.67052631, "memory(GiB)": 34.88, "step": 29445, "train_speed(iter/s)": 0.417315 }, { "acc": 0.87518482, "epoch": 0.7973898681396041, "grad_norm": 23.967544555664062, "learning_rate": 9.402571352934724e-06, "loss": 0.61707563, "memory(GiB)": 34.88, "step": 29450, "train_speed(iter/s)": 0.417318 }, { "acc": 0.85330296, "epoch": 0.7975252484228197, "grad_norm": 7.361678600311279, "learning_rate": 9.402306068776117e-06, "loss": 0.73528233, "memory(GiB)": 34.88, "step": 29455, "train_speed(iter/s)": 0.417321 }, { "acc": 0.87249603, "epoch": 0.7976606287060353, "grad_norm": 6.013700008392334, "learning_rate": 9.402040729476238e-06, "loss": 0.64321251, "memory(GiB)": 34.88, "step": 29460, "train_speed(iter/s)": 0.417323 }, { "acc": 0.8499506, "epoch": 0.7977960089892508, "grad_norm": 10.269868850708008, "learning_rate": 9.401775335038414e-06, "loss": 0.75051985, "memory(GiB)": 34.88, "step": 29465, "train_speed(iter/s)": 0.417326 }, { "acc": 0.87509117, "epoch": 0.7979313892724663, "grad_norm": 10.7418851852417, "learning_rate": 9.401509885465969e-06, "loss": 0.62734137, "memory(GiB)": 34.88, "step": 29470, "train_speed(iter/s)": 0.417328 }, { "acc": 0.86007223, "epoch": 0.7980667695556819, "grad_norm": 6.804587364196777, "learning_rate": 9.401244380762226e-06, "loss": 0.71345201, "memory(GiB)": 34.88, "step": 29475, "train_speed(iter/s)": 0.417331 }, { "acc": 0.87728882, "epoch": 0.7982021498388975, "grad_norm": 19.022216796875, "learning_rate": 9.400978820930512e-06, "loss": 0.63557053, "memory(GiB)": 34.88, "step": 29480, "train_speed(iter/s)": 0.417334 }, { "acc": 0.87938881, "epoch": 0.7983375301221131, "grad_norm": 7.108494758605957, "learning_rate": 9.400713205974156e-06, "loss": 0.57851911, "memory(GiB)": 34.88, "step": 29485, "train_speed(iter/s)": 0.417336 }, { "acc": 0.85186253, "epoch": 0.7984729104053285, "grad_norm": 12.909896850585938, "learning_rate": 9.40044753589648e-06, "loss": 0.82788334, "memory(GiB)": 34.88, "step": 29490, "train_speed(iter/s)": 0.417339 }, { "acc": 0.86957512, "epoch": 0.7986082906885441, "grad_norm": 5.3436598777771, "learning_rate": 9.400181810700817e-06, "loss": 0.58777475, "memory(GiB)": 34.88, "step": 29495, "train_speed(iter/s)": 0.417342 }, { "acc": 0.87859001, "epoch": 0.7987436709717597, "grad_norm": 7.233877182006836, "learning_rate": 9.399916030390494e-06, "loss": 0.62458072, "memory(GiB)": 34.88, "step": 29500, "train_speed(iter/s)": 0.417345 }, { "acc": 0.86226273, "epoch": 0.7988790512549753, "grad_norm": 11.835688591003418, "learning_rate": 9.399650194968838e-06, "loss": 0.76634521, "memory(GiB)": 34.88, "step": 29505, "train_speed(iter/s)": 0.417348 }, { "acc": 0.86560049, "epoch": 0.7990144315381907, "grad_norm": 7.566596508026123, "learning_rate": 9.399384304439186e-06, "loss": 0.72602949, "memory(GiB)": 34.88, "step": 29510, "train_speed(iter/s)": 0.417351 }, { "acc": 0.89786701, "epoch": 0.7991498118214063, "grad_norm": 11.003804206848145, "learning_rate": 9.399118358804861e-06, "loss": 0.48576999, "memory(GiB)": 34.88, "step": 29515, "train_speed(iter/s)": 0.417354 }, { "acc": 0.87319479, "epoch": 0.7992851921046219, "grad_norm": 20.1813907623291, "learning_rate": 9.398852358069198e-06, "loss": 0.76398344, "memory(GiB)": 34.88, "step": 29520, "train_speed(iter/s)": 0.417357 }, { "acc": 0.85523643, "epoch": 0.7994205723878375, "grad_norm": 11.696281433105469, "learning_rate": 9.39858630223553e-06, "loss": 0.75462751, "memory(GiB)": 34.88, "step": 29525, "train_speed(iter/s)": 0.417359 }, { "acc": 0.89430237, "epoch": 0.799555952671053, "grad_norm": 5.153225898742676, "learning_rate": 9.398320191307188e-06, "loss": 0.52150345, "memory(GiB)": 34.88, "step": 29530, "train_speed(iter/s)": 0.417362 }, { "acc": 0.86083822, "epoch": 0.7996913329542685, "grad_norm": 8.735594749450684, "learning_rate": 9.398054025287506e-06, "loss": 0.69670229, "memory(GiB)": 34.88, "step": 29535, "train_speed(iter/s)": 0.417365 }, { "acc": 0.85607615, "epoch": 0.7998267132374841, "grad_norm": 9.191580772399902, "learning_rate": 9.39778780417982e-06, "loss": 0.74543476, "memory(GiB)": 34.88, "step": 29540, "train_speed(iter/s)": 0.417367 }, { "acc": 0.85572624, "epoch": 0.7999620935206997, "grad_norm": 7.196527004241943, "learning_rate": 9.397521527987463e-06, "loss": 0.69300318, "memory(GiB)": 34.88, "step": 29545, "train_speed(iter/s)": 0.41737 }, { "acc": 0.86757336, "epoch": 0.8000974738039152, "grad_norm": 10.253507614135742, "learning_rate": 9.397255196713772e-06, "loss": 0.63879414, "memory(GiB)": 34.88, "step": 29550, "train_speed(iter/s)": 0.417373 }, { "acc": 0.90356865, "epoch": 0.8002328540871307, "grad_norm": 8.666812896728516, "learning_rate": 9.396988810362083e-06, "loss": 0.51989346, "memory(GiB)": 34.88, "step": 29555, "train_speed(iter/s)": 0.417376 }, { "acc": 0.88296633, "epoch": 0.8003682343703463, "grad_norm": 10.953656196594238, "learning_rate": 9.39672236893573e-06, "loss": 0.59429693, "memory(GiB)": 34.88, "step": 29560, "train_speed(iter/s)": 0.417379 }, { "acc": 0.87676163, "epoch": 0.8005036146535619, "grad_norm": 8.765512466430664, "learning_rate": 9.396455872438055e-06, "loss": 0.61448898, "memory(GiB)": 34.88, "step": 29565, "train_speed(iter/s)": 0.417382 }, { "acc": 0.85588293, "epoch": 0.8006389949367774, "grad_norm": 6.768349647521973, "learning_rate": 9.396189320872394e-06, "loss": 0.69745436, "memory(GiB)": 34.88, "step": 29570, "train_speed(iter/s)": 0.417385 }, { "acc": 0.86048069, "epoch": 0.800774375219993, "grad_norm": 7.955780506134033, "learning_rate": 9.395922714242086e-06, "loss": 0.86714125, "memory(GiB)": 34.88, "step": 29575, "train_speed(iter/s)": 0.417388 }, { "acc": 0.85368671, "epoch": 0.8009097555032085, "grad_norm": 9.281381607055664, "learning_rate": 9.395656052550473e-06, "loss": 0.80095625, "memory(GiB)": 34.88, "step": 29580, "train_speed(iter/s)": 0.41739 }, { "acc": 0.86040707, "epoch": 0.8010451357864241, "grad_norm": 10.418069839477539, "learning_rate": 9.395389335800894e-06, "loss": 0.79507122, "memory(GiB)": 34.88, "step": 29585, "train_speed(iter/s)": 0.417393 }, { "acc": 0.85829029, "epoch": 0.8011805160696396, "grad_norm": 13.805505752563477, "learning_rate": 9.39512256399669e-06, "loss": 0.66532631, "memory(GiB)": 34.88, "step": 29590, "train_speed(iter/s)": 0.417396 }, { "acc": 0.86301794, "epoch": 0.8013158963528552, "grad_norm": 10.46607494354248, "learning_rate": 9.394855737141202e-06, "loss": 0.74470234, "memory(GiB)": 34.88, "step": 29595, "train_speed(iter/s)": 0.417399 }, { "acc": 0.85461884, "epoch": 0.8014512766360707, "grad_norm": 45.51088333129883, "learning_rate": 9.394588855237775e-06, "loss": 0.78354793, "memory(GiB)": 34.88, "step": 29600, "train_speed(iter/s)": 0.417401 }, { "acc": 0.86895647, "epoch": 0.8015866569192863, "grad_norm": 10.550114631652832, "learning_rate": 9.39432191828975e-06, "loss": 0.67188501, "memory(GiB)": 34.88, "step": 29605, "train_speed(iter/s)": 0.417404 }, { "acc": 0.88138962, "epoch": 0.8017220372025018, "grad_norm": 9.177189826965332, "learning_rate": 9.39405492630047e-06, "loss": 0.64630566, "memory(GiB)": 34.88, "step": 29610, "train_speed(iter/s)": 0.417406 }, { "acc": 0.85268307, "epoch": 0.8018574174857174, "grad_norm": 8.986492156982422, "learning_rate": 9.393787879273284e-06, "loss": 0.8331068, "memory(GiB)": 34.88, "step": 29615, "train_speed(iter/s)": 0.417409 }, { "acc": 0.88582039, "epoch": 0.801992797768933, "grad_norm": 10.673365592956543, "learning_rate": 9.393520777211534e-06, "loss": 0.54711347, "memory(GiB)": 34.88, "step": 29620, "train_speed(iter/s)": 0.417412 }, { "acc": 0.88262844, "epoch": 0.8021281780521485, "grad_norm": 9.075024604797363, "learning_rate": 9.393253620118565e-06, "loss": 0.59557047, "memory(GiB)": 34.88, "step": 29625, "train_speed(iter/s)": 0.417415 }, { "acc": 0.88578062, "epoch": 0.802263558335364, "grad_norm": 5.798618316650391, "learning_rate": 9.392986407997727e-06, "loss": 0.59585109, "memory(GiB)": 34.88, "step": 29630, "train_speed(iter/s)": 0.417417 }, { "acc": 0.86689053, "epoch": 0.8023989386185796, "grad_norm": 5.472515106201172, "learning_rate": 9.392719140852363e-06, "loss": 0.68997917, "memory(GiB)": 34.88, "step": 29635, "train_speed(iter/s)": 0.41742 }, { "acc": 0.87512398, "epoch": 0.8025343189017952, "grad_norm": 9.835816383361816, "learning_rate": 9.392451818685825e-06, "loss": 0.57500148, "memory(GiB)": 34.88, "step": 29640, "train_speed(iter/s)": 0.417423 }, { "acc": 0.88892193, "epoch": 0.8026696991850107, "grad_norm": 5.129908561706543, "learning_rate": 9.39218444150146e-06, "loss": 0.53722348, "memory(GiB)": 34.88, "step": 29645, "train_speed(iter/s)": 0.417426 }, { "acc": 0.86282291, "epoch": 0.8028050794682262, "grad_norm": 8.064213752746582, "learning_rate": 9.39191700930262e-06, "loss": 0.75399694, "memory(GiB)": 34.88, "step": 29650, "train_speed(iter/s)": 0.417429 }, { "acc": 0.85303812, "epoch": 0.8029404597514418, "grad_norm": 8.11078929901123, "learning_rate": 9.39164952209265e-06, "loss": 0.85698586, "memory(GiB)": 34.88, "step": 29655, "train_speed(iter/s)": 0.417431 }, { "acc": 0.86662941, "epoch": 0.8030758400346574, "grad_norm": 8.43738842010498, "learning_rate": 9.391381979874904e-06, "loss": 0.67424402, "memory(GiB)": 34.88, "step": 29660, "train_speed(iter/s)": 0.417434 }, { "acc": 0.85438118, "epoch": 0.803211220317873, "grad_norm": 24.98348045349121, "learning_rate": 9.391114382652733e-06, "loss": 0.73952236, "memory(GiB)": 34.88, "step": 29665, "train_speed(iter/s)": 0.417437 }, { "acc": 0.88334837, "epoch": 0.8033466006010884, "grad_norm": 8.04535961151123, "learning_rate": 9.390846730429491e-06, "loss": 0.56920924, "memory(GiB)": 34.88, "step": 29670, "train_speed(iter/s)": 0.41744 }, { "acc": 0.87157469, "epoch": 0.803481980884304, "grad_norm": 5.722136497497559, "learning_rate": 9.390579023208529e-06, "loss": 0.6198719, "memory(GiB)": 34.88, "step": 29675, "train_speed(iter/s)": 0.417443 }, { "acc": 0.86440687, "epoch": 0.8036173611675196, "grad_norm": 9.881314277648926, "learning_rate": 9.390311260993198e-06, "loss": 0.7265748, "memory(GiB)": 34.88, "step": 29680, "train_speed(iter/s)": 0.417445 }, { "acc": 0.86506443, "epoch": 0.8037527414507352, "grad_norm": 9.034982681274414, "learning_rate": 9.390043443786857e-06, "loss": 0.72076559, "memory(GiB)": 34.88, "step": 29685, "train_speed(iter/s)": 0.417448 }, { "acc": 0.89670773, "epoch": 0.8038881217339506, "grad_norm": 8.661016464233398, "learning_rate": 9.389775571592859e-06, "loss": 0.53489375, "memory(GiB)": 34.88, "step": 29690, "train_speed(iter/s)": 0.41745 }, { "acc": 0.87904148, "epoch": 0.8040235020171662, "grad_norm": 6.166337013244629, "learning_rate": 9.38950764441456e-06, "loss": 0.62814078, "memory(GiB)": 34.88, "step": 29695, "train_speed(iter/s)": 0.417453 }, { "acc": 0.89576626, "epoch": 0.8041588823003818, "grad_norm": 6.851956367492676, "learning_rate": 9.389239662255315e-06, "loss": 0.50362291, "memory(GiB)": 34.88, "step": 29700, "train_speed(iter/s)": 0.417456 }, { "acc": 0.85757446, "epoch": 0.8042942625835974, "grad_norm": 11.284345626831055, "learning_rate": 9.388971625118482e-06, "loss": 0.72609329, "memory(GiB)": 34.88, "step": 29705, "train_speed(iter/s)": 0.417459 }, { "acc": 0.86696157, "epoch": 0.8044296428668128, "grad_norm": 10.377022743225098, "learning_rate": 9.388703533007419e-06, "loss": 0.70322666, "memory(GiB)": 34.88, "step": 29710, "train_speed(iter/s)": 0.417462 }, { "acc": 0.84458885, "epoch": 0.8045650231500284, "grad_norm": 6.318027019500732, "learning_rate": 9.388435385925482e-06, "loss": 0.82369041, "memory(GiB)": 34.88, "step": 29715, "train_speed(iter/s)": 0.417464 }, { "acc": 0.86762199, "epoch": 0.804700403433244, "grad_norm": 13.602075576782227, "learning_rate": 9.388167183876034e-06, "loss": 0.72372799, "memory(GiB)": 34.88, "step": 29720, "train_speed(iter/s)": 0.417467 }, { "acc": 0.85364704, "epoch": 0.8048357837164596, "grad_norm": 11.499574661254883, "learning_rate": 9.387898926862433e-06, "loss": 0.83812695, "memory(GiB)": 34.88, "step": 29725, "train_speed(iter/s)": 0.41747 }, { "acc": 0.88544579, "epoch": 0.804971163999675, "grad_norm": 6.551732063293457, "learning_rate": 9.38763061488804e-06, "loss": 0.53657093, "memory(GiB)": 34.88, "step": 29730, "train_speed(iter/s)": 0.417472 }, { "acc": 0.88604584, "epoch": 0.8051065442828906, "grad_norm": 7.645398139953613, "learning_rate": 9.387362247956214e-06, "loss": 0.56150446, "memory(GiB)": 34.88, "step": 29735, "train_speed(iter/s)": 0.417475 }, { "acc": 0.8801878, "epoch": 0.8052419245661062, "grad_norm": 7.532374382019043, "learning_rate": 9.387093826070318e-06, "loss": 0.54257226, "memory(GiB)": 34.88, "step": 29740, "train_speed(iter/s)": 0.417477 }, { "acc": 0.84223022, "epoch": 0.8053773048493218, "grad_norm": 9.377569198608398, "learning_rate": 9.386825349233715e-06, "loss": 0.88231983, "memory(GiB)": 34.88, "step": 29745, "train_speed(iter/s)": 0.41748 }, { "acc": 0.87874489, "epoch": 0.8055126851325373, "grad_norm": 9.23708438873291, "learning_rate": 9.386556817449766e-06, "loss": 0.60197144, "memory(GiB)": 34.88, "step": 29750, "train_speed(iter/s)": 0.417483 }, { "acc": 0.87124128, "epoch": 0.8056480654157528, "grad_norm": 5.401788711547852, "learning_rate": 9.38628823072184e-06, "loss": 0.61028757, "memory(GiB)": 34.88, "step": 29755, "train_speed(iter/s)": 0.417486 }, { "acc": 0.88150892, "epoch": 0.8057834456989684, "grad_norm": 5.735050678253174, "learning_rate": 9.386019589053296e-06, "loss": 0.62975678, "memory(GiB)": 34.88, "step": 29760, "train_speed(iter/s)": 0.417488 }, { "acc": 0.88213863, "epoch": 0.805918825982184, "grad_norm": 12.213950157165527, "learning_rate": 9.385750892447502e-06, "loss": 0.67443895, "memory(GiB)": 34.88, "step": 29765, "train_speed(iter/s)": 0.417491 }, { "acc": 0.87844563, "epoch": 0.8060542062653995, "grad_norm": 5.061877250671387, "learning_rate": 9.385482140907826e-06, "loss": 0.59373789, "memory(GiB)": 34.88, "step": 29770, "train_speed(iter/s)": 0.417494 }, { "acc": 0.86612473, "epoch": 0.8061895865486151, "grad_norm": 9.358560562133789, "learning_rate": 9.385213334437629e-06, "loss": 0.7367579, "memory(GiB)": 34.88, "step": 29775, "train_speed(iter/s)": 0.417496 }, { "acc": 0.86578932, "epoch": 0.8063249668318306, "grad_norm": 7.327521324157715, "learning_rate": 9.384944473040284e-06, "loss": 0.70106802, "memory(GiB)": 34.88, "step": 29780, "train_speed(iter/s)": 0.417499 }, { "acc": 0.88697958, "epoch": 0.8064603471150462, "grad_norm": 4.925135135650635, "learning_rate": 9.384675556719154e-06, "loss": 0.6002182, "memory(GiB)": 34.88, "step": 29785, "train_speed(iter/s)": 0.417501 }, { "acc": 0.86006823, "epoch": 0.8065957273982617, "grad_norm": 13.868095397949219, "learning_rate": 9.384406585477612e-06, "loss": 0.68543949, "memory(GiB)": 34.88, "step": 29790, "train_speed(iter/s)": 0.417504 }, { "acc": 0.87150955, "epoch": 0.8067311076814773, "grad_norm": 12.273006439208984, "learning_rate": 9.384137559319025e-06, "loss": 0.73852997, "memory(GiB)": 34.88, "step": 29795, "train_speed(iter/s)": 0.417506 }, { "acc": 0.87328625, "epoch": 0.8068664879646928, "grad_norm": 5.764831066131592, "learning_rate": 9.383868478246765e-06, "loss": 0.56164441, "memory(GiB)": 34.88, "step": 29800, "train_speed(iter/s)": 0.417509 }, { "acc": 0.89674978, "epoch": 0.8070018682479084, "grad_norm": 9.60239315032959, "learning_rate": 9.3835993422642e-06, "loss": 0.58295698, "memory(GiB)": 34.88, "step": 29805, "train_speed(iter/s)": 0.417512 }, { "acc": 0.88653631, "epoch": 0.8071372485311239, "grad_norm": 6.0869221687316895, "learning_rate": 9.383330151374702e-06, "loss": 0.57747145, "memory(GiB)": 34.88, "step": 29810, "train_speed(iter/s)": 0.417515 }, { "acc": 0.86543007, "epoch": 0.8072726288143395, "grad_norm": 13.811779975891113, "learning_rate": 9.383060905581647e-06, "loss": 0.66312895, "memory(GiB)": 34.88, "step": 29815, "train_speed(iter/s)": 0.417518 }, { "acc": 0.87562389, "epoch": 0.8074080090975551, "grad_norm": 7.518667697906494, "learning_rate": 9.382791604888404e-06, "loss": 0.6179904, "memory(GiB)": 34.88, "step": 29820, "train_speed(iter/s)": 0.41752 }, { "acc": 0.88546314, "epoch": 0.8075433893807706, "grad_norm": 7.0965189933776855, "learning_rate": 9.382522249298346e-06, "loss": 0.58707094, "memory(GiB)": 34.88, "step": 29825, "train_speed(iter/s)": 0.417523 }, { "acc": 0.8749464, "epoch": 0.8076787696639861, "grad_norm": 10.046664237976074, "learning_rate": 9.382252838814851e-06, "loss": 0.69645262, "memory(GiB)": 34.88, "step": 29830, "train_speed(iter/s)": 0.417526 }, { "acc": 0.87268429, "epoch": 0.8078141499472017, "grad_norm": 8.041099548339844, "learning_rate": 9.38198337344129e-06, "loss": 0.68391256, "memory(GiB)": 34.88, "step": 29835, "train_speed(iter/s)": 0.417529 }, { "acc": 0.89029837, "epoch": 0.8079495302304173, "grad_norm": 8.954981803894043, "learning_rate": 9.38171385318104e-06, "loss": 0.53914013, "memory(GiB)": 34.88, "step": 29840, "train_speed(iter/s)": 0.417531 }, { "acc": 0.88065815, "epoch": 0.8080849105136328, "grad_norm": 13.431334495544434, "learning_rate": 9.381444278037477e-06, "loss": 0.61250801, "memory(GiB)": 34.88, "step": 29845, "train_speed(iter/s)": 0.417534 }, { "acc": 0.87105856, "epoch": 0.8082202907968483, "grad_norm": 8.56704044342041, "learning_rate": 9.381174648013979e-06, "loss": 0.66293087, "memory(GiB)": 34.88, "step": 29850, "train_speed(iter/s)": 0.417537 }, { "acc": 0.85926437, "epoch": 0.8083556710800639, "grad_norm": 9.1445951461792, "learning_rate": 9.380904963113925e-06, "loss": 0.72415667, "memory(GiB)": 34.88, "step": 29855, "train_speed(iter/s)": 0.41754 }, { "acc": 0.89269505, "epoch": 0.8084910513632795, "grad_norm": 5.9656476974487305, "learning_rate": 9.380635223340688e-06, "loss": 0.49872389, "memory(GiB)": 34.88, "step": 29860, "train_speed(iter/s)": 0.417541 }, { "acc": 0.84170628, "epoch": 0.8086264316464951, "grad_norm": 11.587608337402344, "learning_rate": 9.380365428697652e-06, "loss": 0.89009628, "memory(GiB)": 34.88, "step": 29865, "train_speed(iter/s)": 0.417543 }, { "acc": 0.8687664, "epoch": 0.8087618119297105, "grad_norm": 13.333294868469238, "learning_rate": 9.380095579188197e-06, "loss": 0.61180873, "memory(GiB)": 34.88, "step": 29870, "train_speed(iter/s)": 0.417545 }, { "acc": 0.86794109, "epoch": 0.8088971922129261, "grad_norm": 16.37456703186035, "learning_rate": 9.379825674815697e-06, "loss": 0.66325974, "memory(GiB)": 34.88, "step": 29875, "train_speed(iter/s)": 0.417548 }, { "acc": 0.8374445, "epoch": 0.8090325724961417, "grad_norm": 10.801817893981934, "learning_rate": 9.379555715583542e-06, "loss": 0.85574875, "memory(GiB)": 34.88, "step": 29880, "train_speed(iter/s)": 0.417549 }, { "acc": 0.88381996, "epoch": 0.8091679527793573, "grad_norm": 9.619477272033691, "learning_rate": 9.379285701495107e-06, "loss": 0.65007105, "memory(GiB)": 34.88, "step": 29885, "train_speed(iter/s)": 0.41755 }, { "acc": 0.88513298, "epoch": 0.8093033330625727, "grad_norm": 8.413260459899902, "learning_rate": 9.379015632553777e-06, "loss": 0.68338976, "memory(GiB)": 34.88, "step": 29890, "train_speed(iter/s)": 0.417552 }, { "acc": 0.87793083, "epoch": 0.8094387133457883, "grad_norm": 9.154091835021973, "learning_rate": 9.378745508762934e-06, "loss": 0.56335926, "memory(GiB)": 34.88, "step": 29895, "train_speed(iter/s)": 0.417554 }, { "acc": 0.87534294, "epoch": 0.8095740936290039, "grad_norm": 12.842080116271973, "learning_rate": 9.378475330125965e-06, "loss": 0.61695938, "memory(GiB)": 34.88, "step": 29900, "train_speed(iter/s)": 0.417557 }, { "acc": 0.87394485, "epoch": 0.8097094739122195, "grad_norm": 9.935323715209961, "learning_rate": 9.37820509664625e-06, "loss": 0.67800808, "memory(GiB)": 34.88, "step": 29905, "train_speed(iter/s)": 0.417559 }, { "acc": 0.88121376, "epoch": 0.809844854195435, "grad_norm": 6.883742332458496, "learning_rate": 9.377934808327179e-06, "loss": 0.69883041, "memory(GiB)": 34.88, "step": 29910, "train_speed(iter/s)": 0.417561 }, { "acc": 0.89325542, "epoch": 0.8099802344786505, "grad_norm": 21.375049591064453, "learning_rate": 9.377664465172134e-06, "loss": 0.56865807, "memory(GiB)": 34.88, "step": 29915, "train_speed(iter/s)": 0.417563 }, { "acc": 0.8838028, "epoch": 0.8101156147618661, "grad_norm": 9.93110179901123, "learning_rate": 9.377394067184503e-06, "loss": 0.59346118, "memory(GiB)": 34.88, "step": 29920, "train_speed(iter/s)": 0.417565 }, { "acc": 0.87491741, "epoch": 0.8102509950450816, "grad_norm": 14.035712242126465, "learning_rate": 9.377123614367672e-06, "loss": 0.68933115, "memory(GiB)": 34.88, "step": 29925, "train_speed(iter/s)": 0.417568 }, { "acc": 0.85613327, "epoch": 0.8103863753282972, "grad_norm": 9.746929168701172, "learning_rate": 9.376853106725033e-06, "loss": 0.74446568, "memory(GiB)": 34.88, "step": 29930, "train_speed(iter/s)": 0.41757 }, { "acc": 0.88729992, "epoch": 0.8105217556115127, "grad_norm": 16.099090576171875, "learning_rate": 9.376582544259968e-06, "loss": 0.50257721, "memory(GiB)": 34.88, "step": 29935, "train_speed(iter/s)": 0.417572 }, { "acc": 0.89833717, "epoch": 0.8106571358947283, "grad_norm": 4.252865791320801, "learning_rate": 9.376311926975873e-06, "loss": 0.46834326, "memory(GiB)": 34.88, "step": 29940, "train_speed(iter/s)": 0.417574 }, { "acc": 0.87783804, "epoch": 0.8107925161779438, "grad_norm": 10.823081970214844, "learning_rate": 9.376041254876134e-06, "loss": 0.69090867, "memory(GiB)": 34.88, "step": 29945, "train_speed(iter/s)": 0.417576 }, { "acc": 0.85131092, "epoch": 0.8109278964611594, "grad_norm": 8.733173370361328, "learning_rate": 9.375770527964146e-06, "loss": 0.77950544, "memory(GiB)": 34.88, "step": 29950, "train_speed(iter/s)": 0.417579 }, { "acc": 0.87720461, "epoch": 0.811063276744375, "grad_norm": 6.938898086547852, "learning_rate": 9.375499746243293e-06, "loss": 0.61746674, "memory(GiB)": 34.88, "step": 29955, "train_speed(iter/s)": 0.417581 }, { "acc": 0.87754145, "epoch": 0.8111986570275905, "grad_norm": 8.307865142822266, "learning_rate": 9.375228909716973e-06, "loss": 0.57868385, "memory(GiB)": 34.88, "step": 29960, "train_speed(iter/s)": 0.417584 }, { "acc": 0.87817802, "epoch": 0.811334037310806, "grad_norm": 9.376814842224121, "learning_rate": 9.374958018388578e-06, "loss": 0.64695716, "memory(GiB)": 34.88, "step": 29965, "train_speed(iter/s)": 0.417586 }, { "acc": 0.88130465, "epoch": 0.8114694175940216, "grad_norm": 8.198312759399414, "learning_rate": 9.374687072261498e-06, "loss": 0.72870817, "memory(GiB)": 34.88, "step": 29970, "train_speed(iter/s)": 0.417587 }, { "acc": 0.85759182, "epoch": 0.8116047978772372, "grad_norm": 15.31782341003418, "learning_rate": 9.374416071339132e-06, "loss": 0.74441004, "memory(GiB)": 34.88, "step": 29975, "train_speed(iter/s)": 0.41759 }, { "acc": 0.84583893, "epoch": 0.8117401781604527, "grad_norm": 6.646302223205566, "learning_rate": 9.374145015624873e-06, "loss": 0.82197952, "memory(GiB)": 34.88, "step": 29980, "train_speed(iter/s)": 0.417592 }, { "acc": 0.87669449, "epoch": 0.8118755584436682, "grad_norm": 5.910333633422852, "learning_rate": 9.373873905122114e-06, "loss": 0.64874859, "memory(GiB)": 34.88, "step": 29985, "train_speed(iter/s)": 0.417594 }, { "acc": 0.86377983, "epoch": 0.8120109387268838, "grad_norm": 8.020276069641113, "learning_rate": 9.373602739834255e-06, "loss": 0.71844611, "memory(GiB)": 34.88, "step": 29990, "train_speed(iter/s)": 0.417597 }, { "acc": 0.89079638, "epoch": 0.8121463190100994, "grad_norm": 9.707955360412598, "learning_rate": 9.373331519764689e-06, "loss": 0.52000995, "memory(GiB)": 34.88, "step": 29995, "train_speed(iter/s)": 0.4176 }, { "acc": 0.88597565, "epoch": 0.812281699293315, "grad_norm": 14.068233489990234, "learning_rate": 9.373060244916816e-06, "loss": 0.63226571, "memory(GiB)": 34.88, "step": 30000, "train_speed(iter/s)": 0.417603 }, { "epoch": 0.812281699293315, "eval_acc": 0.585132416338292, "eval_loss": 1.0915454626083374, "eval_runtime": 1298.4794, "eval_samples_per_second": 66.466, "eval_steps_per_second": 2.078, "step": 30000 }, { "acc": 0.86972256, "epoch": 0.8124170795765304, "grad_norm": 10.855502128601074, "learning_rate": 9.372788915294035e-06, "loss": 0.70589938, "memory(GiB)": 34.88, "step": 30005, "train_speed(iter/s)": 0.410068 }, { "acc": 0.86765795, "epoch": 0.812552459859746, "grad_norm": 7.821189880371094, "learning_rate": 9.372517530899742e-06, "loss": 0.74096603, "memory(GiB)": 34.88, "step": 30010, "train_speed(iter/s)": 0.410071 }, { "acc": 0.86349258, "epoch": 0.8126878401429616, "grad_norm": 8.44017505645752, "learning_rate": 9.372246091737342e-06, "loss": 0.74223189, "memory(GiB)": 34.88, "step": 30015, "train_speed(iter/s)": 0.410075 }, { "acc": 0.88711472, "epoch": 0.8128232204261772, "grad_norm": 11.787969589233398, "learning_rate": 9.371974597810228e-06, "loss": 0.54965458, "memory(GiB)": 34.88, "step": 30020, "train_speed(iter/s)": 0.410079 }, { "acc": 0.85517979, "epoch": 0.8129586007093926, "grad_norm": 10.09820556640625, "learning_rate": 9.371703049121806e-06, "loss": 0.85040932, "memory(GiB)": 34.88, "step": 30025, "train_speed(iter/s)": 0.410082 }, { "acc": 0.87229185, "epoch": 0.8130939809926082, "grad_norm": 10.112238883972168, "learning_rate": 9.371431445675477e-06, "loss": 0.74582725, "memory(GiB)": 34.88, "step": 30030, "train_speed(iter/s)": 0.410086 }, { "acc": 0.84971628, "epoch": 0.8132293612758238, "grad_norm": 5.0700836181640625, "learning_rate": 9.371159787474643e-06, "loss": 0.7629457, "memory(GiB)": 34.88, "step": 30035, "train_speed(iter/s)": 0.41009 }, { "acc": 0.86733408, "epoch": 0.8133647415590394, "grad_norm": 5.310295581817627, "learning_rate": 9.370888074522703e-06, "loss": 0.58394775, "memory(GiB)": 34.88, "step": 30040, "train_speed(iter/s)": 0.410094 }, { "acc": 0.87723179, "epoch": 0.8135001218422548, "grad_norm": 7.67090368270874, "learning_rate": 9.370616306823069e-06, "loss": 0.64825845, "memory(GiB)": 34.88, "step": 30045, "train_speed(iter/s)": 0.410098 }, { "acc": 0.87366295, "epoch": 0.8136355021254704, "grad_norm": 5.9241251945495605, "learning_rate": 9.37034448437914e-06, "loss": 0.64702587, "memory(GiB)": 34.88, "step": 30050, "train_speed(iter/s)": 0.410102 }, { "acc": 0.88442011, "epoch": 0.813770882408686, "grad_norm": 7.823878765106201, "learning_rate": 9.370072607194322e-06, "loss": 0.62477589, "memory(GiB)": 34.88, "step": 30055, "train_speed(iter/s)": 0.410106 }, { "acc": 0.87657642, "epoch": 0.8139062626919016, "grad_norm": 11.573030471801758, "learning_rate": 9.369800675272021e-06, "loss": 0.70730047, "memory(GiB)": 34.88, "step": 30060, "train_speed(iter/s)": 0.410109 }, { "acc": 0.88648739, "epoch": 0.814041642975117, "grad_norm": 6.001942157745361, "learning_rate": 9.369528688615642e-06, "loss": 0.63048978, "memory(GiB)": 34.88, "step": 30065, "train_speed(iter/s)": 0.410113 }, { "acc": 0.87031794, "epoch": 0.8141770232583326, "grad_norm": 6.028524398803711, "learning_rate": 9.369256647228595e-06, "loss": 0.68092484, "memory(GiB)": 34.88, "step": 30070, "train_speed(iter/s)": 0.410117 }, { "acc": 0.89205418, "epoch": 0.8143124035415482, "grad_norm": 19.2346248626709, "learning_rate": 9.368984551114287e-06, "loss": 0.53241634, "memory(GiB)": 34.88, "step": 30075, "train_speed(iter/s)": 0.410121 }, { "acc": 0.86421299, "epoch": 0.8144477838247638, "grad_norm": 6.253881454467773, "learning_rate": 9.368712400276124e-06, "loss": 0.78816833, "memory(GiB)": 34.88, "step": 30080, "train_speed(iter/s)": 0.410125 }, { "acc": 0.87628918, "epoch": 0.8145831641079793, "grad_norm": 7.625064849853516, "learning_rate": 9.368440194717519e-06, "loss": 0.57205653, "memory(GiB)": 34.88, "step": 30085, "train_speed(iter/s)": 0.410129 }, { "acc": 0.84460011, "epoch": 0.8147185443911948, "grad_norm": 8.084280014038086, "learning_rate": 9.36816793444188e-06, "loss": 0.69943099, "memory(GiB)": 34.88, "step": 30090, "train_speed(iter/s)": 0.410133 }, { "acc": 0.84779997, "epoch": 0.8148539246744104, "grad_norm": 7.421690940856934, "learning_rate": 9.367895619452615e-06, "loss": 0.79291248, "memory(GiB)": 34.88, "step": 30095, "train_speed(iter/s)": 0.410136 }, { "acc": 0.86494617, "epoch": 0.814989304957626, "grad_norm": 9.361661911010742, "learning_rate": 9.36762324975314e-06, "loss": 0.68733187, "memory(GiB)": 34.88, "step": 30100, "train_speed(iter/s)": 0.41014 }, { "acc": 0.89898558, "epoch": 0.8151246852408415, "grad_norm": 10.18502426147461, "learning_rate": 9.367350825346865e-06, "loss": 0.52894049, "memory(GiB)": 34.88, "step": 30105, "train_speed(iter/s)": 0.410144 }, { "acc": 0.86473913, "epoch": 0.815260065524057, "grad_norm": 8.94737434387207, "learning_rate": 9.367078346237201e-06, "loss": 0.6520905, "memory(GiB)": 34.88, "step": 30110, "train_speed(iter/s)": 0.410148 }, { "acc": 0.88691998, "epoch": 0.8153954458072726, "grad_norm": 12.581239700317383, "learning_rate": 9.366805812427565e-06, "loss": 0.59728193, "memory(GiB)": 34.88, "step": 30115, "train_speed(iter/s)": 0.410152 }, { "acc": 0.8564599, "epoch": 0.8155308260904882, "grad_norm": 8.73482608795166, "learning_rate": 9.366533223921369e-06, "loss": 0.69913568, "memory(GiB)": 34.88, "step": 30120, "train_speed(iter/s)": 0.410156 }, { "acc": 0.89095612, "epoch": 0.8156662063737037, "grad_norm": 7.125681400299072, "learning_rate": 9.366260580722027e-06, "loss": 0.57204175, "memory(GiB)": 34.88, "step": 30125, "train_speed(iter/s)": 0.41016 }, { "acc": 0.87303257, "epoch": 0.8158015866569193, "grad_norm": 16.967607498168945, "learning_rate": 9.365987882832955e-06, "loss": 0.64211473, "memory(GiB)": 34.88, "step": 30130, "train_speed(iter/s)": 0.410163 }, { "acc": 0.90053368, "epoch": 0.8159369669401348, "grad_norm": 7.169380187988281, "learning_rate": 9.365715130257569e-06, "loss": 0.47769036, "memory(GiB)": 34.88, "step": 30135, "train_speed(iter/s)": 0.410168 }, { "acc": 0.87906475, "epoch": 0.8160723472233504, "grad_norm": 11.48847484588623, "learning_rate": 9.365442322999286e-06, "loss": 0.63594818, "memory(GiB)": 34.88, "step": 30140, "train_speed(iter/s)": 0.410171 }, { "acc": 0.87289543, "epoch": 0.8162077275065659, "grad_norm": 10.634796142578125, "learning_rate": 9.365169461061522e-06, "loss": 0.64747829, "memory(GiB)": 34.88, "step": 30145, "train_speed(iter/s)": 0.410176 }, { "acc": 0.85657921, "epoch": 0.8163431077897815, "grad_norm": 15.468088150024414, "learning_rate": 9.364896544447698e-06, "loss": 0.77817001, "memory(GiB)": 34.88, "step": 30150, "train_speed(iter/s)": 0.410179 }, { "acc": 0.87011395, "epoch": 0.816478488072997, "grad_norm": 5.719908237457275, "learning_rate": 9.36462357316123e-06, "loss": 0.59263968, "memory(GiB)": 34.88, "step": 30155, "train_speed(iter/s)": 0.410183 }, { "acc": 0.87188587, "epoch": 0.8166138683562126, "grad_norm": 10.350414276123047, "learning_rate": 9.36435054720554e-06, "loss": 0.67337775, "memory(GiB)": 34.88, "step": 30160, "train_speed(iter/s)": 0.410186 }, { "acc": 0.87103662, "epoch": 0.8167492486394281, "grad_norm": 6.477848052978516, "learning_rate": 9.364077466584046e-06, "loss": 0.68885226, "memory(GiB)": 34.88, "step": 30165, "train_speed(iter/s)": 0.41019 }, { "acc": 0.89013119, "epoch": 0.8168846289226437, "grad_norm": 11.087382316589355, "learning_rate": 9.36380433130017e-06, "loss": 0.563518, "memory(GiB)": 34.88, "step": 30170, "train_speed(iter/s)": 0.410194 }, { "acc": 0.85312843, "epoch": 0.8170200092058593, "grad_norm": 9.543856620788574, "learning_rate": 9.363531141357333e-06, "loss": 0.76569004, "memory(GiB)": 34.88, "step": 30175, "train_speed(iter/s)": 0.410198 }, { "acc": 0.86191425, "epoch": 0.8171553894890748, "grad_norm": 12.497124671936035, "learning_rate": 9.36325789675896e-06, "loss": 0.68380079, "memory(GiB)": 34.88, "step": 30180, "train_speed(iter/s)": 0.410202 }, { "acc": 0.88405762, "epoch": 0.8172907697722903, "grad_norm": 7.894209384918213, "learning_rate": 9.362984597508469e-06, "loss": 0.57457767, "memory(GiB)": 34.88, "step": 30185, "train_speed(iter/s)": 0.410206 }, { "acc": 0.86315737, "epoch": 0.8174261500555059, "grad_norm": 17.229795455932617, "learning_rate": 9.362711243609287e-06, "loss": 0.76132274, "memory(GiB)": 34.88, "step": 30190, "train_speed(iter/s)": 0.410209 }, { "acc": 0.87190933, "epoch": 0.8175615303387215, "grad_norm": 23.808015823364258, "learning_rate": 9.362437835064837e-06, "loss": 0.66085787, "memory(GiB)": 34.88, "step": 30195, "train_speed(iter/s)": 0.410213 }, { "acc": 0.89063616, "epoch": 0.8176969106219371, "grad_norm": 5.619999885559082, "learning_rate": 9.362164371878546e-06, "loss": 0.5319499, "memory(GiB)": 34.88, "step": 30200, "train_speed(iter/s)": 0.410217 }, { "acc": 0.88048429, "epoch": 0.8178322909051525, "grad_norm": 6.91588020324707, "learning_rate": 9.361890854053836e-06, "loss": 0.64949331, "memory(GiB)": 34.88, "step": 30205, "train_speed(iter/s)": 0.410221 }, { "acc": 0.86160545, "epoch": 0.8179676711883681, "grad_norm": 8.21065902709961, "learning_rate": 9.361617281594138e-06, "loss": 0.68910217, "memory(GiB)": 34.88, "step": 30210, "train_speed(iter/s)": 0.410224 }, { "acc": 0.86186733, "epoch": 0.8181030514715837, "grad_norm": 7.26423454284668, "learning_rate": 9.361343654502874e-06, "loss": 0.65552797, "memory(GiB)": 34.88, "step": 30215, "train_speed(iter/s)": 0.410228 }, { "acc": 0.8656559, "epoch": 0.8182384317547993, "grad_norm": 18.779619216918945, "learning_rate": 9.361069972783475e-06, "loss": 0.66784153, "memory(GiB)": 34.88, "step": 30220, "train_speed(iter/s)": 0.410231 }, { "acc": 0.89171762, "epoch": 0.8183738120380147, "grad_norm": 3.790614366531372, "learning_rate": 9.360796236439368e-06, "loss": 0.63949485, "memory(GiB)": 34.88, "step": 30225, "train_speed(iter/s)": 0.410235 }, { "acc": 0.90499382, "epoch": 0.8185091923212303, "grad_norm": 7.861325263977051, "learning_rate": 9.360522445473985e-06, "loss": 0.45968151, "memory(GiB)": 34.88, "step": 30230, "train_speed(iter/s)": 0.410239 }, { "acc": 0.85645714, "epoch": 0.8186445726044459, "grad_norm": 8.001321792602539, "learning_rate": 9.36024859989075e-06, "loss": 0.68098373, "memory(GiB)": 34.88, "step": 30235, "train_speed(iter/s)": 0.410243 }, { "acc": 0.8817584, "epoch": 0.8187799528876615, "grad_norm": 4.9008708000183105, "learning_rate": 9.359974699693098e-06, "loss": 0.61102066, "memory(GiB)": 34.88, "step": 30240, "train_speed(iter/s)": 0.410247 }, { "acc": 0.85863152, "epoch": 0.818915333170877, "grad_norm": 6.4772257804870605, "learning_rate": 9.35970074488446e-06, "loss": 0.79256554, "memory(GiB)": 34.88, "step": 30245, "train_speed(iter/s)": 0.410251 }, { "acc": 0.88029709, "epoch": 0.8190507134540925, "grad_norm": 7.68562650680542, "learning_rate": 9.359426735468268e-06, "loss": 0.5783268, "memory(GiB)": 34.88, "step": 30250, "train_speed(iter/s)": 0.410255 }, { "acc": 0.90278196, "epoch": 0.8191860937373081, "grad_norm": 7.729526042938232, "learning_rate": 9.359152671447951e-06, "loss": 0.45895433, "memory(GiB)": 34.88, "step": 30255, "train_speed(iter/s)": 0.410258 }, { "acc": 0.85198908, "epoch": 0.8193214740205237, "grad_norm": 8.212006568908691, "learning_rate": 9.358878552826945e-06, "loss": 0.78992691, "memory(GiB)": 34.88, "step": 30260, "train_speed(iter/s)": 0.410262 }, { "acc": 0.8858449, "epoch": 0.8194568543037392, "grad_norm": 14.705448150634766, "learning_rate": 9.358604379608684e-06, "loss": 0.54647818, "memory(GiB)": 34.88, "step": 30265, "train_speed(iter/s)": 0.410266 }, { "acc": 0.865802, "epoch": 0.8195922345869547, "grad_norm": 9.183642387390137, "learning_rate": 9.3583301517966e-06, "loss": 0.6476903, "memory(GiB)": 34.88, "step": 30270, "train_speed(iter/s)": 0.410269 }, { "acc": 0.87816734, "epoch": 0.8197276148701703, "grad_norm": 4.915541648864746, "learning_rate": 9.358055869394136e-06, "loss": 0.54356308, "memory(GiB)": 34.88, "step": 30275, "train_speed(iter/s)": 0.410273 }, { "acc": 0.86551332, "epoch": 0.8198629951533859, "grad_norm": 10.899072647094727, "learning_rate": 9.357781532404716e-06, "loss": 0.704811, "memory(GiB)": 34.88, "step": 30280, "train_speed(iter/s)": 0.410276 }, { "acc": 0.87479725, "epoch": 0.8199983754366014, "grad_norm": 7.4406514167785645, "learning_rate": 9.357507140831786e-06, "loss": 0.75201569, "memory(GiB)": 34.88, "step": 30285, "train_speed(iter/s)": 0.410279 }, { "acc": 0.8866394, "epoch": 0.820133755719817, "grad_norm": 8.454023361206055, "learning_rate": 9.357232694678778e-06, "loss": 0.54723101, "memory(GiB)": 34.88, "step": 30290, "train_speed(iter/s)": 0.410283 }, { "acc": 0.84298325, "epoch": 0.8202691360030325, "grad_norm": 5.590025424957275, "learning_rate": 9.356958193949134e-06, "loss": 0.87881641, "memory(GiB)": 34.88, "step": 30295, "train_speed(iter/s)": 0.410286 }, { "acc": 0.87191086, "epoch": 0.8204045162862481, "grad_norm": 9.453943252563477, "learning_rate": 9.35668363864629e-06, "loss": 0.66661921, "memory(GiB)": 34.88, "step": 30300, "train_speed(iter/s)": 0.41029 }, { "acc": 0.86234684, "epoch": 0.8205398965694636, "grad_norm": 7.470710754394531, "learning_rate": 9.356409028773686e-06, "loss": 0.66751752, "memory(GiB)": 34.88, "step": 30305, "train_speed(iter/s)": 0.410294 }, { "acc": 0.85427475, "epoch": 0.8206752768526792, "grad_norm": 9.139941215515137, "learning_rate": 9.35613436433476e-06, "loss": 0.76583285, "memory(GiB)": 34.88, "step": 30310, "train_speed(iter/s)": 0.410298 }, { "acc": 0.83937454, "epoch": 0.8208106571358947, "grad_norm": 10.674149513244629, "learning_rate": 9.355859645332958e-06, "loss": 0.94166813, "memory(GiB)": 34.88, "step": 30315, "train_speed(iter/s)": 0.410302 }, { "acc": 0.88283548, "epoch": 0.8209460374191103, "grad_norm": 6.228754043579102, "learning_rate": 9.355584871771715e-06, "loss": 0.61561069, "memory(GiB)": 34.88, "step": 30320, "train_speed(iter/s)": 0.410306 }, { "acc": 0.87632313, "epoch": 0.8210814177023258, "grad_norm": 5.691547870635986, "learning_rate": 9.355310043654478e-06, "loss": 0.64709215, "memory(GiB)": 34.88, "step": 30325, "train_speed(iter/s)": 0.41031 }, { "acc": 0.88919125, "epoch": 0.8212167979855414, "grad_norm": 5.370417594909668, "learning_rate": 9.35503516098469e-06, "loss": 0.60665884, "memory(GiB)": 34.88, "step": 30330, "train_speed(iter/s)": 0.410314 }, { "acc": 0.82314901, "epoch": 0.821352178268757, "grad_norm": 10.260148048400879, "learning_rate": 9.354760223765789e-06, "loss": 0.95590401, "memory(GiB)": 34.88, "step": 30335, "train_speed(iter/s)": 0.410317 }, { "acc": 0.84689407, "epoch": 0.8214875585519725, "grad_norm": 8.534214973449707, "learning_rate": 9.354485232001225e-06, "loss": 0.78552213, "memory(GiB)": 34.88, "step": 30340, "train_speed(iter/s)": 0.410321 }, { "acc": 0.88680334, "epoch": 0.821622938835188, "grad_norm": 6.4998674392700195, "learning_rate": 9.35421018569444e-06, "loss": 0.52509842, "memory(GiB)": 34.88, "step": 30345, "train_speed(iter/s)": 0.410325 }, { "acc": 0.8837698, "epoch": 0.8217583191184036, "grad_norm": 8.098268508911133, "learning_rate": 9.35393508484888e-06, "loss": 0.55361228, "memory(GiB)": 34.88, "step": 30350, "train_speed(iter/s)": 0.410329 }, { "acc": 0.87770576, "epoch": 0.8218936994016192, "grad_norm": 4.60148286819458, "learning_rate": 9.35365992946799e-06, "loss": 0.55418463, "memory(GiB)": 34.88, "step": 30355, "train_speed(iter/s)": 0.410332 }, { "acc": 0.87331352, "epoch": 0.8220290796848347, "grad_norm": 17.70667266845703, "learning_rate": 9.35338471955522e-06, "loss": 0.6651309, "memory(GiB)": 34.88, "step": 30360, "train_speed(iter/s)": 0.410335 }, { "acc": 0.87579498, "epoch": 0.8221644599680502, "grad_norm": 31.539228439331055, "learning_rate": 9.353109455114013e-06, "loss": 0.63344865, "memory(GiB)": 34.88, "step": 30365, "train_speed(iter/s)": 0.410339 }, { "acc": 0.87975683, "epoch": 0.8222998402512658, "grad_norm": 4.22813606262207, "learning_rate": 9.35283413614782e-06, "loss": 0.56855593, "memory(GiB)": 34.88, "step": 30370, "train_speed(iter/s)": 0.410343 }, { "acc": 0.88377876, "epoch": 0.8224352205344814, "grad_norm": 5.99257755279541, "learning_rate": 9.35255876266009e-06, "loss": 0.59775748, "memory(GiB)": 34.88, "step": 30375, "train_speed(iter/s)": 0.410347 }, { "acc": 0.88745842, "epoch": 0.822570600817697, "grad_norm": 7.087557315826416, "learning_rate": 9.352283334654273e-06, "loss": 0.57859583, "memory(GiB)": 34.88, "step": 30380, "train_speed(iter/s)": 0.41035 }, { "acc": 0.85924244, "epoch": 0.8227059811009124, "grad_norm": 25.516212463378906, "learning_rate": 9.352007852133816e-06, "loss": 0.74329243, "memory(GiB)": 34.88, "step": 30385, "train_speed(iter/s)": 0.410354 }, { "acc": 0.87029152, "epoch": 0.822841361384128, "grad_norm": 9.901715278625488, "learning_rate": 9.351732315102177e-06, "loss": 0.63867273, "memory(GiB)": 34.88, "step": 30390, "train_speed(iter/s)": 0.410358 }, { "acc": 0.88635893, "epoch": 0.8229767416673436, "grad_norm": 6.840579509735107, "learning_rate": 9.351456723562801e-06, "loss": 0.57205801, "memory(GiB)": 34.88, "step": 30395, "train_speed(iter/s)": 0.410361 }, { "acc": 0.86961117, "epoch": 0.8231121219505592, "grad_norm": 11.040739059448242, "learning_rate": 9.35118107751914e-06, "loss": 0.65335727, "memory(GiB)": 34.88, "step": 30400, "train_speed(iter/s)": 0.410365 }, { "acc": 0.88859138, "epoch": 0.8232475022337746, "grad_norm": 8.251982688903809, "learning_rate": 9.350905376974652e-06, "loss": 0.54491777, "memory(GiB)": 34.88, "step": 30405, "train_speed(iter/s)": 0.410369 }, { "acc": 0.86838207, "epoch": 0.8233828825169902, "grad_norm": 10.13121509552002, "learning_rate": 9.350629621932787e-06, "loss": 0.65241089, "memory(GiB)": 34.88, "step": 30410, "train_speed(iter/s)": 0.410373 }, { "acc": 0.87916145, "epoch": 0.8235182628002058, "grad_norm": 7.98991060256958, "learning_rate": 9.350353812397002e-06, "loss": 0.66547136, "memory(GiB)": 34.88, "step": 30415, "train_speed(iter/s)": 0.410377 }, { "acc": 0.90184488, "epoch": 0.8236536430834214, "grad_norm": 7.722672939300537, "learning_rate": 9.35007794837075e-06, "loss": 0.43042426, "memory(GiB)": 34.88, "step": 30420, "train_speed(iter/s)": 0.41038 }, { "acc": 0.88028612, "epoch": 0.8237890233666368, "grad_norm": 5.6889424324035645, "learning_rate": 9.349802029857485e-06, "loss": 0.64085617, "memory(GiB)": 34.88, "step": 30425, "train_speed(iter/s)": 0.410384 }, { "acc": 0.87113934, "epoch": 0.8239244036498524, "grad_norm": 5.468328952789307, "learning_rate": 9.349526056860668e-06, "loss": 0.63947496, "memory(GiB)": 34.88, "step": 30430, "train_speed(iter/s)": 0.410388 }, { "acc": 0.86091719, "epoch": 0.824059783933068, "grad_norm": 7.4861650466918945, "learning_rate": 9.349250029383755e-06, "loss": 0.78253202, "memory(GiB)": 34.88, "step": 30435, "train_speed(iter/s)": 0.410391 }, { "acc": 0.87272587, "epoch": 0.8241951642162836, "grad_norm": 5.55825662612915, "learning_rate": 9.348973947430201e-06, "loss": 0.67107325, "memory(GiB)": 34.88, "step": 30440, "train_speed(iter/s)": 0.410394 }, { "acc": 0.86064253, "epoch": 0.824330544499499, "grad_norm": 60.63978576660156, "learning_rate": 9.348697811003465e-06, "loss": 0.70221224, "memory(GiB)": 34.88, "step": 30445, "train_speed(iter/s)": 0.410398 }, { "acc": 0.89213448, "epoch": 0.8244659247827146, "grad_norm": 7.935004711151123, "learning_rate": 9.34842162010701e-06, "loss": 0.4903985, "memory(GiB)": 34.88, "step": 30450, "train_speed(iter/s)": 0.4104 }, { "acc": 0.86300926, "epoch": 0.8246013050659302, "grad_norm": 6.492377281188965, "learning_rate": 9.34814537474429e-06, "loss": 0.76125603, "memory(GiB)": 34.88, "step": 30455, "train_speed(iter/s)": 0.410404 }, { "acc": 0.8811223, "epoch": 0.8247366853491458, "grad_norm": 8.90649700164795, "learning_rate": 9.34786907491877e-06, "loss": 0.55103974, "memory(GiB)": 34.88, "step": 30460, "train_speed(iter/s)": 0.410408 }, { "acc": 0.89499798, "epoch": 0.8248720656323613, "grad_norm": 14.675076484680176, "learning_rate": 9.34759272063391e-06, "loss": 0.4909153, "memory(GiB)": 34.88, "step": 30465, "train_speed(iter/s)": 0.410411 }, { "acc": 0.87715702, "epoch": 0.8250074459155768, "grad_norm": 15.247480392456055, "learning_rate": 9.34731631189317e-06, "loss": 0.68002696, "memory(GiB)": 34.88, "step": 30470, "train_speed(iter/s)": 0.410415 }, { "acc": 0.8697156, "epoch": 0.8251428261987924, "grad_norm": 16.753768920898438, "learning_rate": 9.347039848700017e-06, "loss": 0.72292671, "memory(GiB)": 34.88, "step": 30475, "train_speed(iter/s)": 0.410419 }, { "acc": 0.85713415, "epoch": 0.825278206482008, "grad_norm": 4.949061393737793, "learning_rate": 9.34676333105791e-06, "loss": 0.66130581, "memory(GiB)": 34.88, "step": 30480, "train_speed(iter/s)": 0.410422 }, { "acc": 0.87787781, "epoch": 0.8254135867652235, "grad_norm": 8.774039268493652, "learning_rate": 9.346486758970314e-06, "loss": 0.59613352, "memory(GiB)": 34.88, "step": 30485, "train_speed(iter/s)": 0.410426 }, { "acc": 0.87683067, "epoch": 0.825548967048439, "grad_norm": 15.626546859741211, "learning_rate": 9.346210132440695e-06, "loss": 0.60513258, "memory(GiB)": 34.88, "step": 30490, "train_speed(iter/s)": 0.41043 }, { "acc": 0.86692286, "epoch": 0.8256843473316546, "grad_norm": 7.014272212982178, "learning_rate": 9.345933451472517e-06, "loss": 0.71105037, "memory(GiB)": 34.88, "step": 30495, "train_speed(iter/s)": 0.410433 }, { "acc": 0.86010942, "epoch": 0.8258197276148702, "grad_norm": 8.884100914001465, "learning_rate": 9.345656716069245e-06, "loss": 0.73494883, "memory(GiB)": 34.88, "step": 30500, "train_speed(iter/s)": 0.410437 }, { "acc": 0.86867447, "epoch": 0.8259551078980857, "grad_norm": 11.222723960876465, "learning_rate": 9.34537992623435e-06, "loss": 0.68375006, "memory(GiB)": 34.88, "step": 30505, "train_speed(iter/s)": 0.410441 }, { "acc": 0.88644123, "epoch": 0.8260904881813013, "grad_norm": 7.300101280212402, "learning_rate": 9.345103081971293e-06, "loss": 0.63628101, "memory(GiB)": 34.88, "step": 30510, "train_speed(iter/s)": 0.410445 }, { "acc": 0.86437998, "epoch": 0.8262258684645168, "grad_norm": 13.69954776763916, "learning_rate": 9.344826183283546e-06, "loss": 0.67037725, "memory(GiB)": 34.88, "step": 30515, "train_speed(iter/s)": 0.410448 }, { "acc": 0.87936649, "epoch": 0.8263612487477324, "grad_norm": 19.297988891601562, "learning_rate": 9.344549230174577e-06, "loss": 0.66176782, "memory(GiB)": 34.88, "step": 30520, "train_speed(iter/s)": 0.410452 }, { "acc": 0.88776112, "epoch": 0.8264966290309479, "grad_norm": 13.072673797607422, "learning_rate": 9.344272222647855e-06, "loss": 0.56930046, "memory(GiB)": 34.88, "step": 30525, "train_speed(iter/s)": 0.410455 }, { "acc": 0.86283493, "epoch": 0.8266320093141635, "grad_norm": 5.124778747558594, "learning_rate": 9.343995160706852e-06, "loss": 0.58945637, "memory(GiB)": 34.88, "step": 30530, "train_speed(iter/s)": 0.410459 }, { "acc": 0.88499956, "epoch": 0.826767389597379, "grad_norm": 13.237805366516113, "learning_rate": 9.343718044355035e-06, "loss": 0.61655002, "memory(GiB)": 34.88, "step": 30535, "train_speed(iter/s)": 0.410463 }, { "acc": 0.86291695, "epoch": 0.8269027698805946, "grad_norm": 8.021471977233887, "learning_rate": 9.34344087359588e-06, "loss": 0.70488148, "memory(GiB)": 34.88, "step": 30540, "train_speed(iter/s)": 0.410467 }, { "acc": 0.8767416, "epoch": 0.8270381501638101, "grad_norm": 6.061356544494629, "learning_rate": 9.343163648432855e-06, "loss": 0.62045593, "memory(GiB)": 34.88, "step": 30545, "train_speed(iter/s)": 0.41047 }, { "acc": 0.89040375, "epoch": 0.8271735304470257, "grad_norm": 6.03242826461792, "learning_rate": 9.342886368869437e-06, "loss": 0.51740704, "memory(GiB)": 34.88, "step": 30550, "train_speed(iter/s)": 0.410474 }, { "acc": 0.86514626, "epoch": 0.8273089107302413, "grad_norm": 6.0648956298828125, "learning_rate": 9.342609034909093e-06, "loss": 0.64965591, "memory(GiB)": 34.88, "step": 30555, "train_speed(iter/s)": 0.410477 }, { "acc": 0.8756587, "epoch": 0.8274442910134568, "grad_norm": 6.837334156036377, "learning_rate": 9.342331646555302e-06, "loss": 0.65744705, "memory(GiB)": 34.88, "step": 30560, "train_speed(iter/s)": 0.41048 }, { "acc": 0.86309233, "epoch": 0.8275796712966723, "grad_norm": 14.29580020904541, "learning_rate": 9.34205420381154e-06, "loss": 0.77015157, "memory(GiB)": 34.88, "step": 30565, "train_speed(iter/s)": 0.410484 }, { "acc": 0.8550148, "epoch": 0.8277150515798879, "grad_norm": 8.964865684509277, "learning_rate": 9.34177670668128e-06, "loss": 0.77816739, "memory(GiB)": 34.88, "step": 30570, "train_speed(iter/s)": 0.410488 }, { "acc": 0.86980314, "epoch": 0.8278504318631035, "grad_norm": 17.344942092895508, "learning_rate": 9.341499155167999e-06, "loss": 0.67380581, "memory(GiB)": 34.88, "step": 30575, "train_speed(iter/s)": 0.410491 }, { "acc": 0.86670532, "epoch": 0.8279858121463191, "grad_norm": 7.515597820281982, "learning_rate": 9.341221549275173e-06, "loss": 0.66650949, "memory(GiB)": 34.88, "step": 30580, "train_speed(iter/s)": 0.410495 }, { "acc": 0.89137049, "epoch": 0.8281211924295345, "grad_norm": 5.140060901641846, "learning_rate": 9.340943889006279e-06, "loss": 0.53538928, "memory(GiB)": 34.88, "step": 30585, "train_speed(iter/s)": 0.410499 }, { "acc": 0.8530447, "epoch": 0.8282565727127501, "grad_norm": 7.716109752655029, "learning_rate": 9.340666174364797e-06, "loss": 0.87998724, "memory(GiB)": 34.88, "step": 30590, "train_speed(iter/s)": 0.410502 }, { "acc": 0.86825142, "epoch": 0.8283919529959657, "grad_norm": 6.726121425628662, "learning_rate": 9.340388405354205e-06, "loss": 0.67457571, "memory(GiB)": 34.88, "step": 30595, "train_speed(iter/s)": 0.410506 }, { "acc": 0.89666252, "epoch": 0.8285273332791813, "grad_norm": 5.5897603034973145, "learning_rate": 9.340110581977984e-06, "loss": 0.49206338, "memory(GiB)": 34.88, "step": 30600, "train_speed(iter/s)": 0.410509 }, { "acc": 0.88500328, "epoch": 0.8286627135623967, "grad_norm": 6.679197788238525, "learning_rate": 9.339832704239612e-06, "loss": 0.55831318, "memory(GiB)": 34.88, "step": 30605, "train_speed(iter/s)": 0.410513 }, { "acc": 0.88328495, "epoch": 0.8287980938456123, "grad_norm": 14.004685401916504, "learning_rate": 9.339554772142572e-06, "loss": 0.62022142, "memory(GiB)": 34.88, "step": 30610, "train_speed(iter/s)": 0.410517 }, { "acc": 0.86586962, "epoch": 0.8289334741288279, "grad_norm": 5.54750919342041, "learning_rate": 9.339276785690345e-06, "loss": 0.56050167, "memory(GiB)": 34.88, "step": 30615, "train_speed(iter/s)": 0.41052 }, { "acc": 0.87687283, "epoch": 0.8290688544120435, "grad_norm": 9.298830032348633, "learning_rate": 9.338998744886411e-06, "loss": 0.65918655, "memory(GiB)": 34.88, "step": 30620, "train_speed(iter/s)": 0.410523 }, { "acc": 0.85120506, "epoch": 0.829204234695259, "grad_norm": 10.33125114440918, "learning_rate": 9.338720649734256e-06, "loss": 0.84317503, "memory(GiB)": 34.88, "step": 30625, "train_speed(iter/s)": 0.410527 }, { "acc": 0.88231955, "epoch": 0.8293396149784745, "grad_norm": 8.349944114685059, "learning_rate": 9.338442500237364e-06, "loss": 0.68279285, "memory(GiB)": 34.88, "step": 30630, "train_speed(iter/s)": 0.41053 }, { "acc": 0.87253551, "epoch": 0.8294749952616901, "grad_norm": 5.608755588531494, "learning_rate": 9.338164296399218e-06, "loss": 0.64828167, "memory(GiB)": 34.88, "step": 30635, "train_speed(iter/s)": 0.410533 }, { "acc": 0.89247532, "epoch": 0.8296103755449057, "grad_norm": 14.38936996459961, "learning_rate": 9.337886038223303e-06, "loss": 0.54349079, "memory(GiB)": 34.88, "step": 30640, "train_speed(iter/s)": 0.410537 }, { "acc": 0.86263447, "epoch": 0.8297457558281212, "grad_norm": 8.42068862915039, "learning_rate": 9.337607725713104e-06, "loss": 0.77983036, "memory(GiB)": 34.88, "step": 30645, "train_speed(iter/s)": 0.410541 }, { "acc": 0.89950552, "epoch": 0.8298811361113367, "grad_norm": 6.747247219085693, "learning_rate": 9.337329358872109e-06, "loss": 0.51666517, "memory(GiB)": 34.88, "step": 30650, "train_speed(iter/s)": 0.410544 }, { "acc": 0.87846537, "epoch": 0.8300165163945523, "grad_norm": 8.475239753723145, "learning_rate": 9.337050937703805e-06, "loss": 0.54186606, "memory(GiB)": 34.88, "step": 30655, "train_speed(iter/s)": 0.410547 }, { "acc": 0.86192551, "epoch": 0.8301518966777679, "grad_norm": 6.738470554351807, "learning_rate": 9.336772462211678e-06, "loss": 0.61955786, "memory(GiB)": 34.88, "step": 30660, "train_speed(iter/s)": 0.410551 }, { "acc": 0.88016071, "epoch": 0.8302872769609834, "grad_norm": 5.532372951507568, "learning_rate": 9.336493932399217e-06, "loss": 0.61743078, "memory(GiB)": 34.88, "step": 30665, "train_speed(iter/s)": 0.410555 }, { "acc": 0.85650721, "epoch": 0.830422657244199, "grad_norm": 6.126803874969482, "learning_rate": 9.336215348269912e-06, "loss": 0.75422945, "memory(GiB)": 34.88, "step": 30670, "train_speed(iter/s)": 0.410558 }, { "acc": 0.87659435, "epoch": 0.8305580375274145, "grad_norm": 8.456671714782715, "learning_rate": 9.335936709827254e-06, "loss": 0.7169898, "memory(GiB)": 34.88, "step": 30675, "train_speed(iter/s)": 0.410562 }, { "acc": 0.88921843, "epoch": 0.8306934178106301, "grad_norm": 24.46160316467285, "learning_rate": 9.335658017074732e-06, "loss": 0.55536575, "memory(GiB)": 34.88, "step": 30680, "train_speed(iter/s)": 0.410565 }, { "acc": 0.85028782, "epoch": 0.8308287980938456, "grad_norm": 9.992847442626953, "learning_rate": 9.335379270015836e-06, "loss": 0.76391211, "memory(GiB)": 34.88, "step": 30685, "train_speed(iter/s)": 0.410568 }, { "acc": 0.87757607, "epoch": 0.8309641783770612, "grad_norm": 11.73025894165039, "learning_rate": 9.335100468654061e-06, "loss": 0.70471077, "memory(GiB)": 34.88, "step": 30690, "train_speed(iter/s)": 0.410572 }, { "acc": 0.86546402, "epoch": 0.8310995586602767, "grad_norm": 12.518393516540527, "learning_rate": 9.334821612992897e-06, "loss": 0.71440029, "memory(GiB)": 34.88, "step": 30695, "train_speed(iter/s)": 0.410575 }, { "acc": 0.87621393, "epoch": 0.8312349389434923, "grad_norm": 8.510880470275879, "learning_rate": 9.334542703035837e-06, "loss": 0.69692707, "memory(GiB)": 34.88, "step": 30700, "train_speed(iter/s)": 0.410579 }, { "acc": 0.87053022, "epoch": 0.8313703192267078, "grad_norm": 9.632801055908203, "learning_rate": 9.334263738786375e-06, "loss": 0.77432241, "memory(GiB)": 34.88, "step": 30705, "train_speed(iter/s)": 0.410582 }, { "acc": 0.86882305, "epoch": 0.8315056995099234, "grad_norm": 21.92390251159668, "learning_rate": 9.333984720248009e-06, "loss": 0.71289797, "memory(GiB)": 34.88, "step": 30710, "train_speed(iter/s)": 0.410586 }, { "acc": 0.88414917, "epoch": 0.831641079793139, "grad_norm": 11.837440490722656, "learning_rate": 9.33370564742423e-06, "loss": 0.55501556, "memory(GiB)": 34.88, "step": 30715, "train_speed(iter/s)": 0.41059 }, { "acc": 0.90289793, "epoch": 0.8317764600763545, "grad_norm": 8.002978324890137, "learning_rate": 9.333426520318535e-06, "loss": 0.40628748, "memory(GiB)": 34.88, "step": 30720, "train_speed(iter/s)": 0.410593 }, { "acc": 0.86735325, "epoch": 0.83191184035957, "grad_norm": 7.270495891571045, "learning_rate": 9.333147338934422e-06, "loss": 0.61747427, "memory(GiB)": 34.88, "step": 30725, "train_speed(iter/s)": 0.410597 }, { "acc": 0.8847765, "epoch": 0.8320472206427856, "grad_norm": 7.127169132232666, "learning_rate": 9.332868103275387e-06, "loss": 0.61599712, "memory(GiB)": 34.88, "step": 30730, "train_speed(iter/s)": 0.410601 }, { "acc": 0.85428267, "epoch": 0.8321826009260012, "grad_norm": 20.46596908569336, "learning_rate": 9.332588813344929e-06, "loss": 0.77663331, "memory(GiB)": 34.88, "step": 30735, "train_speed(iter/s)": 0.410605 }, { "acc": 0.84828701, "epoch": 0.8323179812092167, "grad_norm": 6.100200176239014, "learning_rate": 9.332309469146545e-06, "loss": 0.81533623, "memory(GiB)": 34.88, "step": 30740, "train_speed(iter/s)": 0.410609 }, { "acc": 0.88612919, "epoch": 0.8324533614924322, "grad_norm": 6.106148719787598, "learning_rate": 9.332030070683736e-06, "loss": 0.61123848, "memory(GiB)": 34.88, "step": 30745, "train_speed(iter/s)": 0.410612 }, { "acc": 0.86903982, "epoch": 0.8325887417756478, "grad_norm": 6.870795726776123, "learning_rate": 9.331750617960003e-06, "loss": 0.64804287, "memory(GiB)": 34.88, "step": 30750, "train_speed(iter/s)": 0.410615 }, { "acc": 0.86648703, "epoch": 0.8327241220588634, "grad_norm": 6.22795295715332, "learning_rate": 9.331471110978844e-06, "loss": 0.67217765, "memory(GiB)": 34.88, "step": 30755, "train_speed(iter/s)": 0.410619 }, { "acc": 0.89960976, "epoch": 0.832859502342079, "grad_norm": 8.63311767578125, "learning_rate": 9.331191549743761e-06, "loss": 0.51746383, "memory(GiB)": 34.88, "step": 30760, "train_speed(iter/s)": 0.410623 }, { "acc": 0.87097054, "epoch": 0.8329948826252944, "grad_norm": 17.81694221496582, "learning_rate": 9.330911934258256e-06, "loss": 0.64447603, "memory(GiB)": 34.88, "step": 30765, "train_speed(iter/s)": 0.410626 }, { "acc": 0.89070292, "epoch": 0.83313026290851, "grad_norm": 6.280052185058594, "learning_rate": 9.330632264525834e-06, "loss": 0.53763056, "memory(GiB)": 34.88, "step": 30770, "train_speed(iter/s)": 0.41063 }, { "acc": 0.87373781, "epoch": 0.8332656431917256, "grad_norm": 7.978907585144043, "learning_rate": 9.330352540549994e-06, "loss": 0.65812669, "memory(GiB)": 34.88, "step": 30775, "train_speed(iter/s)": 0.410634 }, { "acc": 0.87957096, "epoch": 0.8334010234749412, "grad_norm": 11.757543563842773, "learning_rate": 9.330072762334246e-06, "loss": 0.55463781, "memory(GiB)": 34.88, "step": 30780, "train_speed(iter/s)": 0.410638 }, { "acc": 0.88042669, "epoch": 0.8335364037581566, "grad_norm": 37.82883071899414, "learning_rate": 9.32979292988209e-06, "loss": 0.65287294, "memory(GiB)": 34.88, "step": 30785, "train_speed(iter/s)": 0.410641 }, { "acc": 0.85283108, "epoch": 0.8336717840413722, "grad_norm": 14.808549880981445, "learning_rate": 9.329513043197034e-06, "loss": 0.76441197, "memory(GiB)": 34.88, "step": 30790, "train_speed(iter/s)": 0.410645 }, { "acc": 0.86680489, "epoch": 0.8338071643245878, "grad_norm": 6.136593818664551, "learning_rate": 9.329233102282582e-06, "loss": 0.58465843, "memory(GiB)": 34.88, "step": 30795, "train_speed(iter/s)": 0.410648 }, { "acc": 0.89932404, "epoch": 0.8339425446078034, "grad_norm": 6.699092388153076, "learning_rate": 9.328953107142242e-06, "loss": 0.51400852, "memory(GiB)": 34.88, "step": 30800, "train_speed(iter/s)": 0.410652 }, { "acc": 0.8638092, "epoch": 0.8340779248910188, "grad_norm": 14.080262184143066, "learning_rate": 9.328673057779521e-06, "loss": 0.76105299, "memory(GiB)": 34.88, "step": 30805, "train_speed(iter/s)": 0.410656 }, { "acc": 0.8732069, "epoch": 0.8342133051742344, "grad_norm": 14.510390281677246, "learning_rate": 9.32839295419793e-06, "loss": 0.61233959, "memory(GiB)": 34.88, "step": 30810, "train_speed(iter/s)": 0.410659 }, { "acc": 0.88485394, "epoch": 0.83434868545745, "grad_norm": 18.57030487060547, "learning_rate": 9.328112796400973e-06, "loss": 0.63915758, "memory(GiB)": 34.88, "step": 30815, "train_speed(iter/s)": 0.410662 }, { "acc": 0.87671843, "epoch": 0.8344840657406656, "grad_norm": 8.574296951293945, "learning_rate": 9.327832584392162e-06, "loss": 0.65741882, "memory(GiB)": 34.88, "step": 30820, "train_speed(iter/s)": 0.410666 }, { "acc": 0.86685762, "epoch": 0.834619446023881, "grad_norm": 18.370813369750977, "learning_rate": 9.327552318175008e-06, "loss": 0.75881476, "memory(GiB)": 34.88, "step": 30825, "train_speed(iter/s)": 0.41067 }, { "acc": 0.88116293, "epoch": 0.8347548263070966, "grad_norm": 8.811980247497559, "learning_rate": 9.32727199775302e-06, "loss": 0.57104092, "memory(GiB)": 34.88, "step": 30830, "train_speed(iter/s)": 0.410674 }, { "acc": 0.88229733, "epoch": 0.8348902065903122, "grad_norm": 8.15994930267334, "learning_rate": 9.326991623129713e-06, "loss": 0.58555055, "memory(GiB)": 34.88, "step": 30835, "train_speed(iter/s)": 0.410677 }, { "acc": 0.88777924, "epoch": 0.8350255868735278, "grad_norm": 22.57980728149414, "learning_rate": 9.326711194308594e-06, "loss": 0.55411267, "memory(GiB)": 34.88, "step": 30840, "train_speed(iter/s)": 0.410681 }, { "acc": 0.88100128, "epoch": 0.8351609671567433, "grad_norm": 12.356095314025879, "learning_rate": 9.326430711293181e-06, "loss": 0.66785541, "memory(GiB)": 34.88, "step": 30845, "train_speed(iter/s)": 0.410685 }, { "acc": 0.88999863, "epoch": 0.8352963474399588, "grad_norm": 10.433542251586914, "learning_rate": 9.326150174086984e-06, "loss": 0.49567838, "memory(GiB)": 34.88, "step": 30850, "train_speed(iter/s)": 0.410688 }, { "acc": 0.88632889, "epoch": 0.8354317277231744, "grad_norm": 9.169264793395996, "learning_rate": 9.32586958269352e-06, "loss": 0.57450891, "memory(GiB)": 34.88, "step": 30855, "train_speed(iter/s)": 0.410692 }, { "acc": 0.91140614, "epoch": 0.83556710800639, "grad_norm": 7.192392349243164, "learning_rate": 9.3255889371163e-06, "loss": 0.48975306, "memory(GiB)": 34.88, "step": 30860, "train_speed(iter/s)": 0.410696 }, { "acc": 0.86722946, "epoch": 0.8357024882896055, "grad_norm": 8.128971099853516, "learning_rate": 9.325308237358846e-06, "loss": 0.69035416, "memory(GiB)": 34.88, "step": 30865, "train_speed(iter/s)": 0.4107 }, { "acc": 0.8860199, "epoch": 0.835837868572821, "grad_norm": 8.328761100769043, "learning_rate": 9.32502748342467e-06, "loss": 0.58111506, "memory(GiB)": 34.88, "step": 30870, "train_speed(iter/s)": 0.410703 }, { "acc": 0.88874855, "epoch": 0.8359732488560366, "grad_norm": 8.16495418548584, "learning_rate": 9.324746675317287e-06, "loss": 0.52471619, "memory(GiB)": 34.88, "step": 30875, "train_speed(iter/s)": 0.410707 }, { "acc": 0.87276182, "epoch": 0.8361086291392522, "grad_norm": 8.062010765075684, "learning_rate": 9.324465813040219e-06, "loss": 0.7085844, "memory(GiB)": 34.88, "step": 30880, "train_speed(iter/s)": 0.410711 }, { "acc": 0.88236666, "epoch": 0.8362440094224677, "grad_norm": 20.528825759887695, "learning_rate": 9.324184896596983e-06, "loss": 0.60127001, "memory(GiB)": 34.88, "step": 30885, "train_speed(iter/s)": 0.410714 }, { "acc": 0.87313566, "epoch": 0.8363793897056833, "grad_norm": 10.19130802154541, "learning_rate": 9.323903925991098e-06, "loss": 0.59126344, "memory(GiB)": 34.88, "step": 30890, "train_speed(iter/s)": 0.410718 }, { "acc": 0.8770113, "epoch": 0.8365147699888988, "grad_norm": 10.823724746704102, "learning_rate": 9.32362290122608e-06, "loss": 0.65236025, "memory(GiB)": 34.88, "step": 30895, "train_speed(iter/s)": 0.410721 }, { "acc": 0.87246447, "epoch": 0.8366501502721144, "grad_norm": 6.997318267822266, "learning_rate": 9.323341822305455e-06, "loss": 0.71388092, "memory(GiB)": 34.88, "step": 30900, "train_speed(iter/s)": 0.410724 }, { "acc": 0.88116169, "epoch": 0.8367855305553299, "grad_norm": 6.186151027679443, "learning_rate": 9.323060689232743e-06, "loss": 0.59287519, "memory(GiB)": 34.88, "step": 30905, "train_speed(iter/s)": 0.410728 }, { "acc": 0.85459414, "epoch": 0.8369209108385455, "grad_norm": 9.379432678222656, "learning_rate": 9.322779502011463e-06, "loss": 0.76608963, "memory(GiB)": 34.88, "step": 30910, "train_speed(iter/s)": 0.410731 }, { "acc": 0.87220373, "epoch": 0.837056291121761, "grad_norm": 7.42057991027832, "learning_rate": 9.322498260645139e-06, "loss": 0.74621749, "memory(GiB)": 34.88, "step": 30915, "train_speed(iter/s)": 0.410735 }, { "acc": 0.84663887, "epoch": 0.8371916714049766, "grad_norm": 8.76191234588623, "learning_rate": 9.322216965137295e-06, "loss": 0.83289242, "memory(GiB)": 34.88, "step": 30920, "train_speed(iter/s)": 0.410738 }, { "acc": 0.86432076, "epoch": 0.8373270516881921, "grad_norm": 14.558947563171387, "learning_rate": 9.321935615491453e-06, "loss": 0.76348271, "memory(GiB)": 34.88, "step": 30925, "train_speed(iter/s)": 0.410741 }, { "acc": 0.86850863, "epoch": 0.8374624319714077, "grad_norm": 5.8659539222717285, "learning_rate": 9.321654211711138e-06, "loss": 0.70035782, "memory(GiB)": 34.88, "step": 30930, "train_speed(iter/s)": 0.410745 }, { "acc": 0.85220308, "epoch": 0.8375978122546233, "grad_norm": 16.5139102935791, "learning_rate": 9.321372753799877e-06, "loss": 0.79926023, "memory(GiB)": 34.88, "step": 30935, "train_speed(iter/s)": 0.410749 }, { "acc": 0.8634058, "epoch": 0.8377331925378388, "grad_norm": 13.061086654663086, "learning_rate": 9.321091241761192e-06, "loss": 0.72823601, "memory(GiB)": 34.88, "step": 30940, "train_speed(iter/s)": 0.410752 }, { "acc": 0.86844349, "epoch": 0.8378685728210543, "grad_norm": 6.107165336608887, "learning_rate": 9.320809675598614e-06, "loss": 0.67162566, "memory(GiB)": 34.88, "step": 30945, "train_speed(iter/s)": 0.410756 }, { "acc": 0.8798336, "epoch": 0.8380039531042699, "grad_norm": 11.199488639831543, "learning_rate": 9.320528055315668e-06, "loss": 0.6568542, "memory(GiB)": 34.88, "step": 30950, "train_speed(iter/s)": 0.41076 }, { "acc": 0.8879797, "epoch": 0.8381393333874855, "grad_norm": 7.467216491699219, "learning_rate": 9.32024638091588e-06, "loss": 0.52697778, "memory(GiB)": 34.88, "step": 30955, "train_speed(iter/s)": 0.410764 }, { "acc": 0.84779787, "epoch": 0.8382747136707009, "grad_norm": 14.957131385803223, "learning_rate": 9.319964652402781e-06, "loss": 0.77240362, "memory(GiB)": 34.88, "step": 30960, "train_speed(iter/s)": 0.410767 }, { "acc": 0.88798676, "epoch": 0.8384100939539165, "grad_norm": 10.22116470336914, "learning_rate": 9.3196828697799e-06, "loss": 0.54295349, "memory(GiB)": 34.88, "step": 30965, "train_speed(iter/s)": 0.41077 }, { "acc": 0.88222847, "epoch": 0.8385454742371321, "grad_norm": 8.215934753417969, "learning_rate": 9.319401033050765e-06, "loss": 0.58051357, "memory(GiB)": 34.88, "step": 30970, "train_speed(iter/s)": 0.410774 }, { "acc": 0.8741251, "epoch": 0.8386808545203477, "grad_norm": 10.96789264678955, "learning_rate": 9.31911914221891e-06, "loss": 0.63702273, "memory(GiB)": 34.88, "step": 30975, "train_speed(iter/s)": 0.410777 }, { "acc": 0.8738183, "epoch": 0.8388162348035632, "grad_norm": 6.3982014656066895, "learning_rate": 9.318837197287862e-06, "loss": 0.64223976, "memory(GiB)": 34.88, "step": 30980, "train_speed(iter/s)": 0.410781 }, { "acc": 0.90170097, "epoch": 0.8389516150867787, "grad_norm": 4.092942714691162, "learning_rate": 9.318555198261158e-06, "loss": 0.49603138, "memory(GiB)": 34.88, "step": 30985, "train_speed(iter/s)": 0.410784 }, { "acc": 0.85820808, "epoch": 0.8390869953699943, "grad_norm": 12.764171600341797, "learning_rate": 9.318273145142324e-06, "loss": 0.70602074, "memory(GiB)": 34.88, "step": 30990, "train_speed(iter/s)": 0.410787 }, { "acc": 0.868116, "epoch": 0.8392223756532099, "grad_norm": 13.38634967803955, "learning_rate": 9.317991037934897e-06, "loss": 0.72878675, "memory(GiB)": 34.88, "step": 30995, "train_speed(iter/s)": 0.410791 }, { "acc": 0.88060188, "epoch": 0.8393577559364254, "grad_norm": 10.681985855102539, "learning_rate": 9.317708876642415e-06, "loss": 0.58868465, "memory(GiB)": 34.88, "step": 31000, "train_speed(iter/s)": 0.410794 }, { "acc": 0.85893879, "epoch": 0.839493136219641, "grad_norm": 6.519906520843506, "learning_rate": 9.317426661268404e-06, "loss": 0.71697755, "memory(GiB)": 34.88, "step": 31005, "train_speed(iter/s)": 0.410798 }, { "acc": 0.87740879, "epoch": 0.8396285165028565, "grad_norm": 12.903746604919434, "learning_rate": 9.31714439181641e-06, "loss": 0.54385505, "memory(GiB)": 34.88, "step": 31010, "train_speed(iter/s)": 0.410801 }, { "acc": 0.87879286, "epoch": 0.8397638967860721, "grad_norm": 9.913917541503906, "learning_rate": 9.316862068289956e-06, "loss": 0.67114043, "memory(GiB)": 34.88, "step": 31015, "train_speed(iter/s)": 0.410805 }, { "acc": 0.88108845, "epoch": 0.8398992770692876, "grad_norm": 16.14031982421875, "learning_rate": 9.316579690692589e-06, "loss": 0.63479915, "memory(GiB)": 34.88, "step": 31020, "train_speed(iter/s)": 0.410809 }, { "acc": 0.88386364, "epoch": 0.8400346573525032, "grad_norm": 10.176058769226074, "learning_rate": 9.316297259027842e-06, "loss": 0.55036883, "memory(GiB)": 34.88, "step": 31025, "train_speed(iter/s)": 0.410812 }, { "acc": 0.86802912, "epoch": 0.8401700376357187, "grad_norm": 4.2457685470581055, "learning_rate": 9.316014773299253e-06, "loss": 0.67687154, "memory(GiB)": 34.88, "step": 31030, "train_speed(iter/s)": 0.410815 }, { "acc": 0.8769043, "epoch": 0.8403054179189343, "grad_norm": 6.50380277633667, "learning_rate": 9.315732233510364e-06, "loss": 0.63074441, "memory(GiB)": 34.88, "step": 31035, "train_speed(iter/s)": 0.410819 }, { "acc": 0.87958803, "epoch": 0.8404407982021498, "grad_norm": 12.433646202087402, "learning_rate": 9.315449639664709e-06, "loss": 0.63469973, "memory(GiB)": 34.88, "step": 31040, "train_speed(iter/s)": 0.410823 }, { "acc": 0.88539238, "epoch": 0.8405761784853654, "grad_norm": 14.526509284973145, "learning_rate": 9.315166991765832e-06, "loss": 0.59366188, "memory(GiB)": 34.88, "step": 31045, "train_speed(iter/s)": 0.410826 }, { "acc": 0.88881702, "epoch": 0.840711558768581, "grad_norm": 6.3697428703308105, "learning_rate": 9.314884289817274e-06, "loss": 0.59378338, "memory(GiB)": 34.88, "step": 31050, "train_speed(iter/s)": 0.41083 }, { "acc": 0.86567326, "epoch": 0.8408469390517965, "grad_norm": 9.062309265136719, "learning_rate": 9.314601533822574e-06, "loss": 0.76495223, "memory(GiB)": 34.88, "step": 31055, "train_speed(iter/s)": 0.410833 }, { "acc": 0.86838093, "epoch": 0.840982319335012, "grad_norm": 4.520322799682617, "learning_rate": 9.314318723785275e-06, "loss": 0.67100439, "memory(GiB)": 34.88, "step": 31060, "train_speed(iter/s)": 0.410836 }, { "acc": 0.87903423, "epoch": 0.8411176996182276, "grad_norm": 6.945228576660156, "learning_rate": 9.314035859708919e-06, "loss": 0.52023325, "memory(GiB)": 34.88, "step": 31065, "train_speed(iter/s)": 0.41084 }, { "acc": 0.86063786, "epoch": 0.8412530799014432, "grad_norm": 9.010824203491211, "learning_rate": 9.31375294159705e-06, "loss": 0.71510267, "memory(GiB)": 34.88, "step": 31070, "train_speed(iter/s)": 0.410843 }, { "acc": 0.87868996, "epoch": 0.8413884601846587, "grad_norm": 5.945690631866455, "learning_rate": 9.313469969453213e-06, "loss": 0.63189321, "memory(GiB)": 34.88, "step": 31075, "train_speed(iter/s)": 0.410847 }, { "acc": 0.8917675, "epoch": 0.8415238404678742, "grad_norm": 15.213325500488281, "learning_rate": 9.313186943280953e-06, "loss": 0.51937857, "memory(GiB)": 34.88, "step": 31080, "train_speed(iter/s)": 0.410851 }, { "acc": 0.88851814, "epoch": 0.8416592207510898, "grad_norm": 17.381591796875, "learning_rate": 9.312903863083813e-06, "loss": 0.58634534, "memory(GiB)": 34.88, "step": 31085, "train_speed(iter/s)": 0.410854 }, { "acc": 0.88478127, "epoch": 0.8417946010343054, "grad_norm": 8.976924896240234, "learning_rate": 9.312620728865342e-06, "loss": 0.59381928, "memory(GiB)": 34.88, "step": 31090, "train_speed(iter/s)": 0.410858 }, { "acc": 0.87105503, "epoch": 0.841929981317521, "grad_norm": 5.912143707275391, "learning_rate": 9.312337540629087e-06, "loss": 0.68249211, "memory(GiB)": 34.88, "step": 31095, "train_speed(iter/s)": 0.410862 }, { "acc": 0.8587657, "epoch": 0.8420653616007364, "grad_norm": 11.999267578125, "learning_rate": 9.312054298378592e-06, "loss": 0.85664196, "memory(GiB)": 34.88, "step": 31100, "train_speed(iter/s)": 0.410866 }, { "acc": 0.87162342, "epoch": 0.842200741883952, "grad_norm": 14.22995376586914, "learning_rate": 9.31177100211741e-06, "loss": 0.70619802, "memory(GiB)": 34.88, "step": 31105, "train_speed(iter/s)": 0.410869 }, { "acc": 0.85262842, "epoch": 0.8423361221671676, "grad_norm": 10.784896850585938, "learning_rate": 9.311487651849084e-06, "loss": 0.80281057, "memory(GiB)": 34.88, "step": 31110, "train_speed(iter/s)": 0.410873 }, { "acc": 0.89288979, "epoch": 0.8424715024503832, "grad_norm": 8.667669296264648, "learning_rate": 9.311204247577165e-06, "loss": 0.54907513, "memory(GiB)": 34.88, "step": 31115, "train_speed(iter/s)": 0.410877 }, { "acc": 0.86063271, "epoch": 0.8426068827335986, "grad_norm": 9.204651832580566, "learning_rate": 9.31092078930521e-06, "loss": 0.74483852, "memory(GiB)": 34.88, "step": 31120, "train_speed(iter/s)": 0.41088 }, { "acc": 0.87073755, "epoch": 0.8427422630168142, "grad_norm": 9.681648254394531, "learning_rate": 9.31063727703676e-06, "loss": 0.66280022, "memory(GiB)": 34.88, "step": 31125, "train_speed(iter/s)": 0.410883 }, { "acc": 0.88465347, "epoch": 0.8428776433000298, "grad_norm": 5.527500629425049, "learning_rate": 9.310353710775373e-06, "loss": 0.54539547, "memory(GiB)": 34.88, "step": 31130, "train_speed(iter/s)": 0.410886 }, { "acc": 0.88696651, "epoch": 0.8430130235832454, "grad_norm": 5.7219085693359375, "learning_rate": 9.3100700905246e-06, "loss": 0.53642764, "memory(GiB)": 34.88, "step": 31135, "train_speed(iter/s)": 0.410889 }, { "acc": 0.87288799, "epoch": 0.8431484038664608, "grad_norm": 12.5492582321167, "learning_rate": 9.309786416287993e-06, "loss": 0.58155251, "memory(GiB)": 34.88, "step": 31140, "train_speed(iter/s)": 0.410892 }, { "acc": 0.87108421, "epoch": 0.8432837841496764, "grad_norm": 9.29755973815918, "learning_rate": 9.309502688069105e-06, "loss": 0.64641557, "memory(GiB)": 34.88, "step": 31145, "train_speed(iter/s)": 0.410896 }, { "acc": 0.87134905, "epoch": 0.843419164432892, "grad_norm": 11.50195598602295, "learning_rate": 9.309218905871493e-06, "loss": 0.64961896, "memory(GiB)": 34.88, "step": 31150, "train_speed(iter/s)": 0.410899 }, { "acc": 0.86707706, "epoch": 0.8435545447161076, "grad_norm": 6.68118143081665, "learning_rate": 9.308935069698708e-06, "loss": 0.73806682, "memory(GiB)": 34.88, "step": 31155, "train_speed(iter/s)": 0.410902 }, { "acc": 0.88239021, "epoch": 0.843689924999323, "grad_norm": 6.563726902008057, "learning_rate": 9.308651179554309e-06, "loss": 0.6197721, "memory(GiB)": 34.88, "step": 31160, "train_speed(iter/s)": 0.410906 }, { "acc": 0.87941456, "epoch": 0.8438253052825386, "grad_norm": 16.329692840576172, "learning_rate": 9.308367235441851e-06, "loss": 0.64340105, "memory(GiB)": 34.88, "step": 31165, "train_speed(iter/s)": 0.41091 }, { "acc": 0.86513176, "epoch": 0.8439606855657542, "grad_norm": 8.292678833007812, "learning_rate": 9.30808323736489e-06, "loss": 0.71779437, "memory(GiB)": 34.88, "step": 31170, "train_speed(iter/s)": 0.410913 }, { "acc": 0.88371553, "epoch": 0.8440960658489698, "grad_norm": 5.00565242767334, "learning_rate": 9.307799185326985e-06, "loss": 0.55258784, "memory(GiB)": 34.88, "step": 31175, "train_speed(iter/s)": 0.410917 }, { "acc": 0.86537876, "epoch": 0.8442314461321853, "grad_norm": 13.422431945800781, "learning_rate": 9.307515079331694e-06, "loss": 0.73897634, "memory(GiB)": 34.88, "step": 31180, "train_speed(iter/s)": 0.41092 }, { "acc": 0.87623758, "epoch": 0.8443668264154008, "grad_norm": 15.385167121887207, "learning_rate": 9.307230919382576e-06, "loss": 0.70519376, "memory(GiB)": 34.88, "step": 31185, "train_speed(iter/s)": 0.410924 }, { "acc": 0.87461605, "epoch": 0.8445022066986164, "grad_norm": 24.003175735473633, "learning_rate": 9.306946705483191e-06, "loss": 0.69184213, "memory(GiB)": 34.88, "step": 31190, "train_speed(iter/s)": 0.410928 }, { "acc": 0.88067188, "epoch": 0.844637586981832, "grad_norm": 8.599313735961914, "learning_rate": 9.306662437637097e-06, "loss": 0.58390598, "memory(GiB)": 34.88, "step": 31195, "train_speed(iter/s)": 0.410931 }, { "acc": 0.88272362, "epoch": 0.8447729672650475, "grad_norm": 10.046957015991211, "learning_rate": 9.306378115847859e-06, "loss": 0.68383942, "memory(GiB)": 34.88, "step": 31200, "train_speed(iter/s)": 0.410935 }, { "acc": 0.89645367, "epoch": 0.844908347548263, "grad_norm": 11.6033296585083, "learning_rate": 9.306093740119036e-06, "loss": 0.47068682, "memory(GiB)": 34.88, "step": 31205, "train_speed(iter/s)": 0.410938 }, { "acc": 0.84634399, "epoch": 0.8450437278314786, "grad_norm": 5.7318034172058105, "learning_rate": 9.305809310454188e-06, "loss": 0.79652805, "memory(GiB)": 34.88, "step": 31210, "train_speed(iter/s)": 0.410942 }, { "acc": 0.898388, "epoch": 0.8451791081146942, "grad_norm": 6.6981916427612305, "learning_rate": 9.305524826856887e-06, "loss": 0.58829346, "memory(GiB)": 34.88, "step": 31215, "train_speed(iter/s)": 0.410946 }, { "acc": 0.87584972, "epoch": 0.8453144883979097, "grad_norm": 8.025644302368164, "learning_rate": 9.305240289330686e-06, "loss": 0.53760657, "memory(GiB)": 34.88, "step": 31220, "train_speed(iter/s)": 0.410949 }, { "acc": 0.8690958, "epoch": 0.8454498686811253, "grad_norm": 6.9950337409973145, "learning_rate": 9.304955697879155e-06, "loss": 0.67669563, "memory(GiB)": 34.88, "step": 31225, "train_speed(iter/s)": 0.410953 }, { "acc": 0.87850151, "epoch": 0.8455852489643408, "grad_norm": 7.597714424133301, "learning_rate": 9.304671052505858e-06, "loss": 0.64125929, "memory(GiB)": 34.88, "step": 31230, "train_speed(iter/s)": 0.410956 }, { "acc": 0.88537483, "epoch": 0.8457206292475564, "grad_norm": 8.667725563049316, "learning_rate": 9.30438635321436e-06, "loss": 0.68974762, "memory(GiB)": 34.88, "step": 31235, "train_speed(iter/s)": 0.41096 }, { "acc": 0.8796875, "epoch": 0.8458560095307719, "grad_norm": 8.521662712097168, "learning_rate": 9.30410160000823e-06, "loss": 0.60356402, "memory(GiB)": 34.88, "step": 31240, "train_speed(iter/s)": 0.410963 }, { "acc": 0.85927992, "epoch": 0.8459913898139875, "grad_norm": 10.713591575622559, "learning_rate": 9.303816792891034e-06, "loss": 0.77246895, "memory(GiB)": 34.88, "step": 31245, "train_speed(iter/s)": 0.410966 }, { "acc": 0.87258701, "epoch": 0.846126770097203, "grad_norm": 7.878384590148926, "learning_rate": 9.30353193186634e-06, "loss": 0.62711115, "memory(GiB)": 34.88, "step": 31250, "train_speed(iter/s)": 0.41097 }, { "acc": 0.87052841, "epoch": 0.8462621503804186, "grad_norm": 16.75082778930664, "learning_rate": 9.303247016937712e-06, "loss": 0.55162301, "memory(GiB)": 34.88, "step": 31255, "train_speed(iter/s)": 0.410974 }, { "acc": 0.88136206, "epoch": 0.8463975306636341, "grad_norm": 15.128561019897461, "learning_rate": 9.302962048108725e-06, "loss": 0.63546295, "memory(GiB)": 34.88, "step": 31260, "train_speed(iter/s)": 0.410977 }, { "acc": 0.87616339, "epoch": 0.8465329109468497, "grad_norm": 16.07102394104004, "learning_rate": 9.302677025382947e-06, "loss": 0.60938368, "memory(GiB)": 34.88, "step": 31265, "train_speed(iter/s)": 0.410981 }, { "acc": 0.8930809, "epoch": 0.8466682912300653, "grad_norm": 9.444083213806152, "learning_rate": 9.302391948763945e-06, "loss": 0.50843067, "memory(GiB)": 34.88, "step": 31270, "train_speed(iter/s)": 0.410984 }, { "acc": 0.88214474, "epoch": 0.8468036715132808, "grad_norm": 12.455187797546387, "learning_rate": 9.302106818255298e-06, "loss": 0.60652003, "memory(GiB)": 34.88, "step": 31275, "train_speed(iter/s)": 0.410988 }, { "acc": 0.87205334, "epoch": 0.8469390517964963, "grad_norm": 5.248563289642334, "learning_rate": 9.301821633860569e-06, "loss": 0.65843668, "memory(GiB)": 34.88, "step": 31280, "train_speed(iter/s)": 0.410991 }, { "acc": 0.88437519, "epoch": 0.8470744320797119, "grad_norm": 6.705349922180176, "learning_rate": 9.301536395583335e-06, "loss": 0.56179991, "memory(GiB)": 34.88, "step": 31285, "train_speed(iter/s)": 0.410995 }, { "acc": 0.87458172, "epoch": 0.8472098123629275, "grad_norm": 11.131035804748535, "learning_rate": 9.301251103427171e-06, "loss": 0.69791079, "memory(GiB)": 34.88, "step": 31290, "train_speed(iter/s)": 0.410998 }, { "acc": 0.86055174, "epoch": 0.847345192646143, "grad_norm": 5.966316223144531, "learning_rate": 9.300965757395645e-06, "loss": 0.66544399, "memory(GiB)": 34.88, "step": 31295, "train_speed(iter/s)": 0.411002 }, { "acc": 0.85411282, "epoch": 0.8474805729293585, "grad_norm": 10.056069374084473, "learning_rate": 9.300680357492338e-06, "loss": 0.70195398, "memory(GiB)": 34.88, "step": 31300, "train_speed(iter/s)": 0.411006 }, { "acc": 0.89196777, "epoch": 0.8476159532125741, "grad_norm": 9.818523406982422, "learning_rate": 9.30039490372082e-06, "loss": 0.47892008, "memory(GiB)": 34.88, "step": 31305, "train_speed(iter/s)": 0.411009 }, { "acc": 0.87914476, "epoch": 0.8477513334957897, "grad_norm": 13.629525184631348, "learning_rate": 9.300109396084668e-06, "loss": 0.58502774, "memory(GiB)": 34.88, "step": 31310, "train_speed(iter/s)": 0.411013 }, { "acc": 0.89543285, "epoch": 0.8478867137790053, "grad_norm": 6.028637409210205, "learning_rate": 9.299823834587463e-06, "loss": 0.4754467, "memory(GiB)": 34.88, "step": 31315, "train_speed(iter/s)": 0.411016 }, { "acc": 0.85706139, "epoch": 0.8480220940622207, "grad_norm": 8.76657485961914, "learning_rate": 9.29953821923278e-06, "loss": 0.71559153, "memory(GiB)": 34.88, "step": 31320, "train_speed(iter/s)": 0.41102 }, { "acc": 0.86978989, "epoch": 0.8481574743454363, "grad_norm": 10.398370742797852, "learning_rate": 9.29925255002419e-06, "loss": 0.64183741, "memory(GiB)": 34.88, "step": 31325, "train_speed(iter/s)": 0.411023 }, { "acc": 0.85272636, "epoch": 0.8482928546286519, "grad_norm": 8.710890769958496, "learning_rate": 9.29896682696528e-06, "loss": 0.73990831, "memory(GiB)": 34.88, "step": 31330, "train_speed(iter/s)": 0.411027 }, { "acc": 0.8857811, "epoch": 0.8484282349118675, "grad_norm": 6.22636604309082, "learning_rate": 9.298681050059628e-06, "loss": 0.56130342, "memory(GiB)": 34.88, "step": 31335, "train_speed(iter/s)": 0.411031 }, { "acc": 0.87844858, "epoch": 0.8485636151950829, "grad_norm": 16.522716522216797, "learning_rate": 9.298395219310811e-06, "loss": 0.66322265, "memory(GiB)": 34.88, "step": 31340, "train_speed(iter/s)": 0.411034 }, { "acc": 0.86782913, "epoch": 0.8486989954782985, "grad_norm": 9.483316421508789, "learning_rate": 9.29810933472241e-06, "loss": 0.73120842, "memory(GiB)": 34.88, "step": 31345, "train_speed(iter/s)": 0.411038 }, { "acc": 0.89614067, "epoch": 0.8488343757615141, "grad_norm": 6.1054582595825195, "learning_rate": 9.29782339629801e-06, "loss": 0.52254448, "memory(GiB)": 34.88, "step": 31350, "train_speed(iter/s)": 0.411041 }, { "acc": 0.88317223, "epoch": 0.8489697560447297, "grad_norm": 6.163998126983643, "learning_rate": 9.297537404041189e-06, "loss": 0.57228909, "memory(GiB)": 34.88, "step": 31355, "train_speed(iter/s)": 0.411044 }, { "acc": 0.84663858, "epoch": 0.8491051363279452, "grad_norm": 11.81348991394043, "learning_rate": 9.297251357955531e-06, "loss": 0.82052412, "memory(GiB)": 34.88, "step": 31360, "train_speed(iter/s)": 0.411047 }, { "acc": 0.8603178, "epoch": 0.8492405166111607, "grad_norm": 11.79477596282959, "learning_rate": 9.296965258044618e-06, "loss": 0.77605944, "memory(GiB)": 34.88, "step": 31365, "train_speed(iter/s)": 0.41105 }, { "acc": 0.89068546, "epoch": 0.8493758968943763, "grad_norm": 9.513374328613281, "learning_rate": 9.296679104312036e-06, "loss": 0.52656927, "memory(GiB)": 34.88, "step": 31370, "train_speed(iter/s)": 0.411053 }, { "acc": 0.86076641, "epoch": 0.8495112771775919, "grad_norm": 7.336248397827148, "learning_rate": 9.29639289676137e-06, "loss": 0.67377462, "memory(GiB)": 34.88, "step": 31375, "train_speed(iter/s)": 0.411056 }, { "acc": 0.87571268, "epoch": 0.8496466574608074, "grad_norm": 34.226444244384766, "learning_rate": 9.296106635396202e-06, "loss": 0.63036051, "memory(GiB)": 34.88, "step": 31380, "train_speed(iter/s)": 0.41106 }, { "acc": 0.89238157, "epoch": 0.8497820377440229, "grad_norm": 6.765810489654541, "learning_rate": 9.295820320220121e-06, "loss": 0.52189493, "memory(GiB)": 34.88, "step": 31385, "train_speed(iter/s)": 0.411064 }, { "acc": 0.89423571, "epoch": 0.8499174180272385, "grad_norm": 7.066256523132324, "learning_rate": 9.295533951236714e-06, "loss": 0.53729334, "memory(GiB)": 34.88, "step": 31390, "train_speed(iter/s)": 0.411068 }, { "acc": 0.87847633, "epoch": 0.8500527983104541, "grad_norm": 10.376053810119629, "learning_rate": 9.295247528449565e-06, "loss": 0.62101693, "memory(GiB)": 34.88, "step": 31395, "train_speed(iter/s)": 0.411071 }, { "acc": 0.86905146, "epoch": 0.8501881785936696, "grad_norm": 6.601024150848389, "learning_rate": 9.294961051862264e-06, "loss": 0.7238708, "memory(GiB)": 34.88, "step": 31400, "train_speed(iter/s)": 0.411075 }, { "acc": 0.87399254, "epoch": 0.8503235588768852, "grad_norm": 6.584787368774414, "learning_rate": 9.2946745214784e-06, "loss": 0.58546348, "memory(GiB)": 34.88, "step": 31405, "train_speed(iter/s)": 0.411078 }, { "acc": 0.86527691, "epoch": 0.8504589391601007, "grad_norm": 9.073066711425781, "learning_rate": 9.294387937301562e-06, "loss": 0.67863903, "memory(GiB)": 34.88, "step": 31410, "train_speed(iter/s)": 0.411082 }, { "acc": 0.86679258, "epoch": 0.8505943194433163, "grad_norm": 8.230023384094238, "learning_rate": 9.29410129933534e-06, "loss": 0.76474004, "memory(GiB)": 34.88, "step": 31415, "train_speed(iter/s)": 0.411085 }, { "acc": 0.85012569, "epoch": 0.8507296997265318, "grad_norm": 13.54732894897461, "learning_rate": 9.293814607583327e-06, "loss": 0.75352993, "memory(GiB)": 34.88, "step": 31420, "train_speed(iter/s)": 0.411089 }, { "acc": 0.88379507, "epoch": 0.8508650800097474, "grad_norm": 5.471715927124023, "learning_rate": 9.29352786204911e-06, "loss": 0.63001699, "memory(GiB)": 34.88, "step": 31425, "train_speed(iter/s)": 0.411092 }, { "acc": 0.85858431, "epoch": 0.851000460292963, "grad_norm": 8.635991096496582, "learning_rate": 9.293241062736283e-06, "loss": 0.75691361, "memory(GiB)": 34.88, "step": 31430, "train_speed(iter/s)": 0.411096 }, { "acc": 0.85671501, "epoch": 0.8511358405761785, "grad_norm": 8.928603172302246, "learning_rate": 9.292954209648438e-06, "loss": 0.79550591, "memory(GiB)": 34.88, "step": 31435, "train_speed(iter/s)": 0.411099 }, { "acc": 0.88394241, "epoch": 0.851271220859394, "grad_norm": 39.066123962402344, "learning_rate": 9.292667302789172e-06, "loss": 0.60334029, "memory(GiB)": 34.88, "step": 31440, "train_speed(iter/s)": 0.411103 }, { "acc": 0.8720871, "epoch": 0.8514066011426096, "grad_norm": 6.602163314819336, "learning_rate": 9.292380342162077e-06, "loss": 0.74005418, "memory(GiB)": 34.88, "step": 31445, "train_speed(iter/s)": 0.411106 }, { "acc": 0.87195005, "epoch": 0.8515419814258252, "grad_norm": 12.673577308654785, "learning_rate": 9.292093327770745e-06, "loss": 0.55518045, "memory(GiB)": 34.88, "step": 31450, "train_speed(iter/s)": 0.41111 }, { "acc": 0.86816654, "epoch": 0.8516773617090407, "grad_norm": 6.871840000152588, "learning_rate": 9.291806259618776e-06, "loss": 0.70448642, "memory(GiB)": 34.88, "step": 31455, "train_speed(iter/s)": 0.411114 }, { "acc": 0.88311825, "epoch": 0.8518127419922562, "grad_norm": 10.700484275817871, "learning_rate": 9.291519137709762e-06, "loss": 0.67710238, "memory(GiB)": 34.88, "step": 31460, "train_speed(iter/s)": 0.411117 }, { "acc": 0.86497612, "epoch": 0.8519481222754718, "grad_norm": 9.249543190002441, "learning_rate": 9.2912319620473e-06, "loss": 0.678228, "memory(GiB)": 34.88, "step": 31465, "train_speed(iter/s)": 0.411121 }, { "acc": 0.86807337, "epoch": 0.8520835025586874, "grad_norm": 6.162210464477539, "learning_rate": 9.290944732634991e-06, "loss": 0.75526524, "memory(GiB)": 34.88, "step": 31470, "train_speed(iter/s)": 0.411124 }, { "acc": 0.8810379, "epoch": 0.852218882841903, "grad_norm": 6.337108135223389, "learning_rate": 9.29065744947643e-06, "loss": 0.68371301, "memory(GiB)": 34.88, "step": 31475, "train_speed(iter/s)": 0.411128 }, { "acc": 0.86633949, "epoch": 0.8523542631251184, "grad_norm": 32.72860336303711, "learning_rate": 9.290370112575218e-06, "loss": 0.68724251, "memory(GiB)": 34.88, "step": 31480, "train_speed(iter/s)": 0.411132 }, { "acc": 0.88463116, "epoch": 0.852489643408334, "grad_norm": 9.2335205078125, "learning_rate": 9.290082721934951e-06, "loss": 0.63303246, "memory(GiB)": 34.88, "step": 31485, "train_speed(iter/s)": 0.411135 }, { "acc": 0.87322969, "epoch": 0.8526250236915496, "grad_norm": 8.833863258361816, "learning_rate": 9.289795277559234e-06, "loss": 0.71390238, "memory(GiB)": 34.88, "step": 31490, "train_speed(iter/s)": 0.411138 }, { "acc": 0.87091904, "epoch": 0.8527604039747652, "grad_norm": 9.594304084777832, "learning_rate": 9.289507779451663e-06, "loss": 0.68126116, "memory(GiB)": 34.88, "step": 31495, "train_speed(iter/s)": 0.411142 }, { "acc": 0.91399899, "epoch": 0.8528957842579806, "grad_norm": 9.215169906616211, "learning_rate": 9.289220227615843e-06, "loss": 0.4733912, "memory(GiB)": 34.88, "step": 31500, "train_speed(iter/s)": 0.411145 }, { "acc": 0.87486992, "epoch": 0.8530311645411962, "grad_norm": 11.836713790893555, "learning_rate": 9.288932622055375e-06, "loss": 0.66170006, "memory(GiB)": 34.88, "step": 31505, "train_speed(iter/s)": 0.411149 }, { "acc": 0.87632637, "epoch": 0.8531665448244118, "grad_norm": 5.330778121948242, "learning_rate": 9.288644962773862e-06, "loss": 0.65290699, "memory(GiB)": 34.88, "step": 31510, "train_speed(iter/s)": 0.411152 }, { "acc": 0.86123886, "epoch": 0.8533019251076274, "grad_norm": 8.81719970703125, "learning_rate": 9.288357249774907e-06, "loss": 0.71382236, "memory(GiB)": 34.88, "step": 31515, "train_speed(iter/s)": 0.411156 }, { "acc": 0.89456053, "epoch": 0.8534373053908428, "grad_norm": 12.344792366027832, "learning_rate": 9.288069483062113e-06, "loss": 0.5334384, "memory(GiB)": 34.88, "step": 31520, "train_speed(iter/s)": 0.41116 }, { "acc": 0.8630497, "epoch": 0.8535726856740584, "grad_norm": 9.964333534240723, "learning_rate": 9.287781662639086e-06, "loss": 0.82587357, "memory(GiB)": 34.88, "step": 31525, "train_speed(iter/s)": 0.411162 }, { "acc": 0.88273163, "epoch": 0.853708065957274, "grad_norm": 6.6037516593933105, "learning_rate": 9.287493788509433e-06, "loss": 0.61866693, "memory(GiB)": 34.88, "step": 31530, "train_speed(iter/s)": 0.411166 }, { "acc": 0.88499851, "epoch": 0.8538434462404896, "grad_norm": 5.9541802406311035, "learning_rate": 9.287205860676758e-06, "loss": 0.60609417, "memory(GiB)": 34.88, "step": 31535, "train_speed(iter/s)": 0.41117 }, { "acc": 0.88187513, "epoch": 0.853978826523705, "grad_norm": 8.009407997131348, "learning_rate": 9.286917879144668e-06, "loss": 0.53338609, "memory(GiB)": 34.88, "step": 31540, "train_speed(iter/s)": 0.411173 }, { "acc": 0.89753904, "epoch": 0.8541142068069206, "grad_norm": 8.326183319091797, "learning_rate": 9.286629843916774e-06, "loss": 0.61468458, "memory(GiB)": 34.88, "step": 31545, "train_speed(iter/s)": 0.411176 }, { "acc": 0.89361839, "epoch": 0.8542495870901362, "grad_norm": 6.056859016418457, "learning_rate": 9.28634175499668e-06, "loss": 0.51287789, "memory(GiB)": 34.88, "step": 31550, "train_speed(iter/s)": 0.41118 }, { "acc": 0.8711339, "epoch": 0.8543849673733518, "grad_norm": 6.688970565795898, "learning_rate": 9.286053612387996e-06, "loss": 0.55452461, "memory(GiB)": 34.88, "step": 31555, "train_speed(iter/s)": 0.411183 }, { "acc": 0.86588612, "epoch": 0.8545203476565673, "grad_norm": 6.910975456237793, "learning_rate": 9.285765416094332e-06, "loss": 0.72806253, "memory(GiB)": 34.88, "step": 31560, "train_speed(iter/s)": 0.411187 }, { "acc": 0.8828289, "epoch": 0.8546557279397828, "grad_norm": 3.292442560195923, "learning_rate": 9.285477166119298e-06, "loss": 0.59348755, "memory(GiB)": 34.88, "step": 31565, "train_speed(iter/s)": 0.41119 }, { "acc": 0.8772191, "epoch": 0.8547911082229984, "grad_norm": 6.444752216339111, "learning_rate": 9.285188862466505e-06, "loss": 0.58319392, "memory(GiB)": 34.88, "step": 31570, "train_speed(iter/s)": 0.411194 }, { "acc": 0.87246304, "epoch": 0.854926488506214, "grad_norm": 5.197853088378906, "learning_rate": 9.284900505139567e-06, "loss": 0.66898327, "memory(GiB)": 34.88, "step": 31575, "train_speed(iter/s)": 0.411197 }, { "acc": 0.85347662, "epoch": 0.8550618687894295, "grad_norm": 7.008600234985352, "learning_rate": 9.28461209414209e-06, "loss": 0.84487486, "memory(GiB)": 34.88, "step": 31580, "train_speed(iter/s)": 0.411201 }, { "acc": 0.86162033, "epoch": 0.855197249072645, "grad_norm": 7.8535308837890625, "learning_rate": 9.284323629477691e-06, "loss": 0.7798944, "memory(GiB)": 34.88, "step": 31585, "train_speed(iter/s)": 0.411204 }, { "acc": 0.8808362, "epoch": 0.8553326293558606, "grad_norm": 5.157219886779785, "learning_rate": 9.284035111149988e-06, "loss": 0.63998337, "memory(GiB)": 34.88, "step": 31590, "train_speed(iter/s)": 0.411208 }, { "acc": 0.86211014, "epoch": 0.8554680096390762, "grad_norm": 12.91242790222168, "learning_rate": 9.283746539162586e-06, "loss": 0.75011778, "memory(GiB)": 34.88, "step": 31595, "train_speed(iter/s)": 0.411211 }, { "acc": 0.85002575, "epoch": 0.8556033899222917, "grad_norm": 10.205294609069824, "learning_rate": 9.283457913519105e-06, "loss": 0.90961285, "memory(GiB)": 34.88, "step": 31600, "train_speed(iter/s)": 0.411214 }, { "acc": 0.87849827, "epoch": 0.8557387702055073, "grad_norm": 6.825438499450684, "learning_rate": 9.283169234223161e-06, "loss": 0.62133765, "memory(GiB)": 34.88, "step": 31605, "train_speed(iter/s)": 0.411218 }, { "acc": 0.89086494, "epoch": 0.8558741504887228, "grad_norm": 7.404874801635742, "learning_rate": 9.282880501278371e-06, "loss": 0.50904112, "memory(GiB)": 34.88, "step": 31610, "train_speed(iter/s)": 0.411222 }, { "acc": 0.89696198, "epoch": 0.8560095307719384, "grad_norm": 5.848932266235352, "learning_rate": 9.282591714688349e-06, "loss": 0.49078903, "memory(GiB)": 34.88, "step": 31615, "train_speed(iter/s)": 0.411225 }, { "acc": 0.88238249, "epoch": 0.8561449110551539, "grad_norm": 9.388354301452637, "learning_rate": 9.282302874456713e-06, "loss": 0.62679081, "memory(GiB)": 34.88, "step": 31620, "train_speed(iter/s)": 0.411229 }, { "acc": 0.85059681, "epoch": 0.8562802913383695, "grad_norm": 7.41532039642334, "learning_rate": 9.282013980587084e-06, "loss": 0.82971745, "memory(GiB)": 34.88, "step": 31625, "train_speed(iter/s)": 0.411232 }, { "acc": 0.88582907, "epoch": 0.856415671621585, "grad_norm": 14.434385299682617, "learning_rate": 9.281725033083077e-06, "loss": 0.64817724, "memory(GiB)": 34.88, "step": 31630, "train_speed(iter/s)": 0.411235 }, { "acc": 0.91423073, "epoch": 0.8565510519048006, "grad_norm": 5.083515167236328, "learning_rate": 9.281436031948315e-06, "loss": 0.4451551, "memory(GiB)": 34.88, "step": 31635, "train_speed(iter/s)": 0.411239 }, { "acc": 0.88561459, "epoch": 0.8566864321880161, "grad_norm": 7.608272075653076, "learning_rate": 9.28114697718642e-06, "loss": 0.58987112, "memory(GiB)": 34.88, "step": 31640, "train_speed(iter/s)": 0.411242 }, { "acc": 0.85641861, "epoch": 0.8568218124712317, "grad_norm": 35.89198303222656, "learning_rate": 9.280857868801005e-06, "loss": 0.81014481, "memory(GiB)": 34.88, "step": 31645, "train_speed(iter/s)": 0.411246 }, { "acc": 0.86154461, "epoch": 0.8569571927544473, "grad_norm": 10.54110336303711, "learning_rate": 9.2805687067957e-06, "loss": 0.69935617, "memory(GiB)": 34.88, "step": 31650, "train_speed(iter/s)": 0.41125 }, { "acc": 0.89681854, "epoch": 0.8570925730376628, "grad_norm": 13.306772232055664, "learning_rate": 9.280279491174124e-06, "loss": 0.52338123, "memory(GiB)": 34.88, "step": 31655, "train_speed(iter/s)": 0.411252 }, { "acc": 0.87757645, "epoch": 0.8572279533208783, "grad_norm": 6.249135494232178, "learning_rate": 9.2799902219399e-06, "loss": 0.6536305, "memory(GiB)": 34.88, "step": 31660, "train_speed(iter/s)": 0.411256 }, { "acc": 0.86991529, "epoch": 0.8573633336040939, "grad_norm": 6.565624713897705, "learning_rate": 9.279700899096653e-06, "loss": 0.62908592, "memory(GiB)": 34.88, "step": 31665, "train_speed(iter/s)": 0.411259 }, { "acc": 0.8772234, "epoch": 0.8574987138873095, "grad_norm": 13.420462608337402, "learning_rate": 9.279411522648004e-06, "loss": 0.55514035, "memory(GiB)": 34.88, "step": 31670, "train_speed(iter/s)": 0.411263 }, { "acc": 0.86649866, "epoch": 0.857634094170525, "grad_norm": 5.822627067565918, "learning_rate": 9.279122092597578e-06, "loss": 0.63070192, "memory(GiB)": 34.88, "step": 31675, "train_speed(iter/s)": 0.411266 }, { "acc": 0.88555851, "epoch": 0.8577694744537405, "grad_norm": 6.738651275634766, "learning_rate": 9.278832608949007e-06, "loss": 0.58726463, "memory(GiB)": 34.88, "step": 31680, "train_speed(iter/s)": 0.41127 }, { "acc": 0.87677975, "epoch": 0.8579048547369561, "grad_norm": 6.032751560211182, "learning_rate": 9.278543071705912e-06, "loss": 0.59016471, "memory(GiB)": 34.88, "step": 31685, "train_speed(iter/s)": 0.411273 }, { "acc": 0.87291965, "epoch": 0.8580402350201717, "grad_norm": 7.271452903747559, "learning_rate": 9.278253480871921e-06, "loss": 0.62299967, "memory(GiB)": 34.88, "step": 31690, "train_speed(iter/s)": 0.411276 }, { "acc": 0.87967739, "epoch": 0.8581756153033873, "grad_norm": 11.205204010009766, "learning_rate": 9.27796383645066e-06, "loss": 0.628088, "memory(GiB)": 34.88, "step": 31695, "train_speed(iter/s)": 0.41128 }, { "acc": 0.88808556, "epoch": 0.8583109955866027, "grad_norm": 13.309453964233398, "learning_rate": 9.277674138445763e-06, "loss": 0.57716084, "memory(GiB)": 34.88, "step": 31700, "train_speed(iter/s)": 0.411283 }, { "acc": 0.88971519, "epoch": 0.8584463758698183, "grad_norm": 7.6138386726379395, "learning_rate": 9.277384386860852e-06, "loss": 0.49694066, "memory(GiB)": 34.88, "step": 31705, "train_speed(iter/s)": 0.411287 }, { "acc": 0.87656155, "epoch": 0.8585817561530339, "grad_norm": 8.853544235229492, "learning_rate": 9.277094581699561e-06, "loss": 0.60125694, "memory(GiB)": 34.88, "step": 31710, "train_speed(iter/s)": 0.411291 }, { "acc": 0.86792469, "epoch": 0.8587171364362495, "grad_norm": 8.027435302734375, "learning_rate": 9.27680472296552e-06, "loss": 0.58536754, "memory(GiB)": 34.88, "step": 31715, "train_speed(iter/s)": 0.411294 }, { "acc": 0.88597937, "epoch": 0.8588525167194649, "grad_norm": 5.803452968597412, "learning_rate": 9.276514810662359e-06, "loss": 0.59123464, "memory(GiB)": 34.88, "step": 31720, "train_speed(iter/s)": 0.411297 }, { "acc": 0.89596701, "epoch": 0.8589878970026805, "grad_norm": 10.637591361999512, "learning_rate": 9.27622484479371e-06, "loss": 0.49857845, "memory(GiB)": 34.88, "step": 31725, "train_speed(iter/s)": 0.4113 }, { "acc": 0.88197126, "epoch": 0.8591232772858961, "grad_norm": 10.137338638305664, "learning_rate": 9.275934825363205e-06, "loss": 0.56282911, "memory(GiB)": 34.88, "step": 31730, "train_speed(iter/s)": 0.411304 }, { "acc": 0.86957531, "epoch": 0.8592586575691117, "grad_norm": 6.871265411376953, "learning_rate": 9.27564475237448e-06, "loss": 0.67541304, "memory(GiB)": 34.88, "step": 31735, "train_speed(iter/s)": 0.411307 }, { "acc": 0.88330822, "epoch": 0.8593940378523272, "grad_norm": 14.07055950164795, "learning_rate": 9.275354625831164e-06, "loss": 0.58133478, "memory(GiB)": 34.88, "step": 31740, "train_speed(iter/s)": 0.41131 }, { "acc": 0.88561029, "epoch": 0.8595294181355427, "grad_norm": 8.311554908752441, "learning_rate": 9.275064445736896e-06, "loss": 0.61460533, "memory(GiB)": 34.88, "step": 31745, "train_speed(iter/s)": 0.411314 }, { "acc": 0.86944981, "epoch": 0.8596647984187583, "grad_norm": 7.8086466789245605, "learning_rate": 9.274774212095305e-06, "loss": 0.60237265, "memory(GiB)": 34.88, "step": 31750, "train_speed(iter/s)": 0.411317 }, { "acc": 0.86727371, "epoch": 0.8598001787019739, "grad_norm": 15.160578727722168, "learning_rate": 9.274483924910033e-06, "loss": 0.70275555, "memory(GiB)": 34.88, "step": 31755, "train_speed(iter/s)": 0.411321 }, { "acc": 0.88125086, "epoch": 0.8599355589851894, "grad_norm": 9.150310516357422, "learning_rate": 9.274193584184715e-06, "loss": 0.63047543, "memory(GiB)": 34.88, "step": 31760, "train_speed(iter/s)": 0.411324 }, { "acc": 0.88188515, "epoch": 0.8600709392684049, "grad_norm": 12.797197341918945, "learning_rate": 9.273903189922985e-06, "loss": 0.59555621, "memory(GiB)": 34.88, "step": 31765, "train_speed(iter/s)": 0.411326 }, { "acc": 0.88712015, "epoch": 0.8602063195516205, "grad_norm": 7.332345962524414, "learning_rate": 9.273612742128484e-06, "loss": 0.51655283, "memory(GiB)": 34.88, "step": 31770, "train_speed(iter/s)": 0.411329 }, { "acc": 0.88640671, "epoch": 0.8603416998348361, "grad_norm": 5.661413192749023, "learning_rate": 9.273322240804848e-06, "loss": 0.50710135, "memory(GiB)": 34.88, "step": 31775, "train_speed(iter/s)": 0.411333 }, { "acc": 0.87153034, "epoch": 0.8604770801180516, "grad_norm": 6.568345546722412, "learning_rate": 9.273031685955718e-06, "loss": 0.62780914, "memory(GiB)": 34.88, "step": 31780, "train_speed(iter/s)": 0.411336 }, { "acc": 0.88520241, "epoch": 0.8606124604012672, "grad_norm": 9.23678207397461, "learning_rate": 9.272741077584734e-06, "loss": 0.54870148, "memory(GiB)": 34.88, "step": 31785, "train_speed(iter/s)": 0.411339 }, { "acc": 0.87821503, "epoch": 0.8607478406844827, "grad_norm": 13.328996658325195, "learning_rate": 9.272450415695535e-06, "loss": 0.66853476, "memory(GiB)": 34.88, "step": 31790, "train_speed(iter/s)": 0.411341 }, { "acc": 0.87762804, "epoch": 0.8608832209676983, "grad_norm": 5.579757213592529, "learning_rate": 9.272159700291762e-06, "loss": 0.56419668, "memory(GiB)": 34.88, "step": 31795, "train_speed(iter/s)": 0.411344 }, { "acc": 0.87762957, "epoch": 0.8610186012509138, "grad_norm": 8.80088996887207, "learning_rate": 9.271868931377059e-06, "loss": 0.6381216, "memory(GiB)": 34.88, "step": 31800, "train_speed(iter/s)": 0.411347 }, { "acc": 0.87309952, "epoch": 0.8611539815341294, "grad_norm": 4.7237772941589355, "learning_rate": 9.271578108955065e-06, "loss": 0.62975755, "memory(GiB)": 34.88, "step": 31805, "train_speed(iter/s)": 0.41135 }, { "acc": 0.87231894, "epoch": 0.861289361817345, "grad_norm": 8.556231498718262, "learning_rate": 9.271287233029426e-06, "loss": 0.74023142, "memory(GiB)": 34.88, "step": 31810, "train_speed(iter/s)": 0.411354 }, { "acc": 0.86601257, "epoch": 0.8614247421005605, "grad_norm": 15.133047103881836, "learning_rate": 9.270996303603785e-06, "loss": 0.73102722, "memory(GiB)": 34.88, "step": 31815, "train_speed(iter/s)": 0.411357 }, { "acc": 0.88308735, "epoch": 0.861560122383776, "grad_norm": 6.928770542144775, "learning_rate": 9.270705320681786e-06, "loss": 0.58930507, "memory(GiB)": 34.88, "step": 31820, "train_speed(iter/s)": 0.411361 }, { "acc": 0.85143833, "epoch": 0.8616955026669916, "grad_norm": 4.992280006408691, "learning_rate": 9.270414284267075e-06, "loss": 0.84402523, "memory(GiB)": 34.88, "step": 31825, "train_speed(iter/s)": 0.411364 }, { "acc": 0.88258448, "epoch": 0.8618308829502072, "grad_norm": 6.894776821136475, "learning_rate": 9.270123194363298e-06, "loss": 0.51739464, "memory(GiB)": 34.88, "step": 31830, "train_speed(iter/s)": 0.411367 }, { "acc": 0.85867863, "epoch": 0.8619662632334227, "grad_norm": 9.039022445678711, "learning_rate": 9.269832050974099e-06, "loss": 0.7645874, "memory(GiB)": 34.88, "step": 31835, "train_speed(iter/s)": 0.41137 }, { "acc": 0.87835236, "epoch": 0.8621016435166382, "grad_norm": 9.254463195800781, "learning_rate": 9.269540854103126e-06, "loss": 0.60227542, "memory(GiB)": 34.88, "step": 31840, "train_speed(iter/s)": 0.411372 }, { "acc": 0.88771105, "epoch": 0.8622370237998538, "grad_norm": 14.35069465637207, "learning_rate": 9.26924960375403e-06, "loss": 0.58184443, "memory(GiB)": 34.88, "step": 31845, "train_speed(iter/s)": 0.411374 }, { "acc": 0.89714394, "epoch": 0.8623724040830694, "grad_norm": 6.074344158172607, "learning_rate": 9.268958299930456e-06, "loss": 0.49212961, "memory(GiB)": 34.88, "step": 31850, "train_speed(iter/s)": 0.411377 }, { "acc": 0.88611469, "epoch": 0.862507784366285, "grad_norm": 13.28720760345459, "learning_rate": 9.268666942636054e-06, "loss": 0.60317822, "memory(GiB)": 34.88, "step": 31855, "train_speed(iter/s)": 0.411378 }, { "acc": 0.87452431, "epoch": 0.8626431646495004, "grad_norm": 5.034938335418701, "learning_rate": 9.268375531874475e-06, "loss": 0.70188584, "memory(GiB)": 34.88, "step": 31860, "train_speed(iter/s)": 0.411381 }, { "acc": 0.87989521, "epoch": 0.862778544932716, "grad_norm": 6.491044044494629, "learning_rate": 9.268084067649367e-06, "loss": 0.58998976, "memory(GiB)": 34.88, "step": 31865, "train_speed(iter/s)": 0.411383 }, { "acc": 0.87069826, "epoch": 0.8629139252159316, "grad_norm": 8.374263763427734, "learning_rate": 9.267792549964386e-06, "loss": 0.65337958, "memory(GiB)": 34.88, "step": 31870, "train_speed(iter/s)": 0.411387 }, { "acc": 0.87517662, "epoch": 0.8630493054991472, "grad_norm": 14.08728313446045, "learning_rate": 9.267500978823177e-06, "loss": 0.65242996, "memory(GiB)": 34.88, "step": 31875, "train_speed(iter/s)": 0.411387 }, { "acc": 0.87324543, "epoch": 0.8631846857823626, "grad_norm": 8.253503799438477, "learning_rate": 9.267209354229397e-06, "loss": 0.60849099, "memory(GiB)": 34.88, "step": 31880, "train_speed(iter/s)": 0.411391 }, { "acc": 0.88449726, "epoch": 0.8633200660655782, "grad_norm": 12.603310585021973, "learning_rate": 9.266917676186699e-06, "loss": 0.56114101, "memory(GiB)": 34.88, "step": 31885, "train_speed(iter/s)": 0.411394 }, { "acc": 0.87276955, "epoch": 0.8634554463487938, "grad_norm": 13.074905395507812, "learning_rate": 9.266625944698735e-06, "loss": 0.71959639, "memory(GiB)": 34.88, "step": 31890, "train_speed(iter/s)": 0.411397 }, { "acc": 0.88472881, "epoch": 0.8635908266320094, "grad_norm": 11.046806335449219, "learning_rate": 9.266334159769163e-06, "loss": 0.61105762, "memory(GiB)": 34.88, "step": 31895, "train_speed(iter/s)": 0.411399 }, { "acc": 0.85501251, "epoch": 0.8637262069152248, "grad_norm": 7.637296199798584, "learning_rate": 9.266042321401635e-06, "loss": 0.70032997, "memory(GiB)": 34.88, "step": 31900, "train_speed(iter/s)": 0.411402 }, { "acc": 0.86972017, "epoch": 0.8638615871984404, "grad_norm": 8.74476432800293, "learning_rate": 9.265750429599805e-06, "loss": 0.63076043, "memory(GiB)": 34.88, "step": 31905, "train_speed(iter/s)": 0.411406 }, { "acc": 0.88569813, "epoch": 0.863996967481656, "grad_norm": 7.190009593963623, "learning_rate": 9.265458484367334e-06, "loss": 0.59423208, "memory(GiB)": 34.88, "step": 31910, "train_speed(iter/s)": 0.411408 }, { "acc": 0.88338938, "epoch": 0.8641323477648716, "grad_norm": 12.578607559204102, "learning_rate": 9.26516648570788e-06, "loss": 0.58033495, "memory(GiB)": 34.88, "step": 31915, "train_speed(iter/s)": 0.411412 }, { "acc": 0.88548412, "epoch": 0.864267728048087, "grad_norm": 7.3745245933532715, "learning_rate": 9.264874433625096e-06, "loss": 0.59778948, "memory(GiB)": 34.88, "step": 31920, "train_speed(iter/s)": 0.411415 }, { "acc": 0.87785568, "epoch": 0.8644031083313026, "grad_norm": 8.723220825195312, "learning_rate": 9.26458232812264e-06, "loss": 0.59042249, "memory(GiB)": 34.88, "step": 31925, "train_speed(iter/s)": 0.411419 }, { "acc": 0.89478054, "epoch": 0.8645384886145182, "grad_norm": 9.628013610839844, "learning_rate": 9.264290169204177e-06, "loss": 0.54738722, "memory(GiB)": 34.88, "step": 31930, "train_speed(iter/s)": 0.411422 }, { "acc": 0.88319817, "epoch": 0.8646738688977338, "grad_norm": 5.477733135223389, "learning_rate": 9.263997956873364e-06, "loss": 0.62307873, "memory(GiB)": 34.88, "step": 31935, "train_speed(iter/s)": 0.411426 }, { "acc": 0.8715085, "epoch": 0.8648092491809493, "grad_norm": 3.6281280517578125, "learning_rate": 9.263705691133861e-06, "loss": 0.65046911, "memory(GiB)": 34.88, "step": 31940, "train_speed(iter/s)": 0.41143 }, { "acc": 0.8693922, "epoch": 0.8649446294641648, "grad_norm": 7.301745891571045, "learning_rate": 9.263413371989329e-06, "loss": 0.69926195, "memory(GiB)": 34.88, "step": 31945, "train_speed(iter/s)": 0.411433 }, { "acc": 0.90150127, "epoch": 0.8650800097473804, "grad_norm": 6.021596431732178, "learning_rate": 9.263120999443428e-06, "loss": 0.54704838, "memory(GiB)": 34.88, "step": 31950, "train_speed(iter/s)": 0.411436 }, { "acc": 0.87083397, "epoch": 0.865215390030596, "grad_norm": 10.406808853149414, "learning_rate": 9.262828573499827e-06, "loss": 0.75196948, "memory(GiB)": 34.88, "step": 31955, "train_speed(iter/s)": 0.411439 }, { "acc": 0.8846405, "epoch": 0.8653507703138115, "grad_norm": 12.086804389953613, "learning_rate": 9.262536094162183e-06, "loss": 0.62441287, "memory(GiB)": 34.88, "step": 31960, "train_speed(iter/s)": 0.411443 }, { "acc": 0.89411125, "epoch": 0.865486150597027, "grad_norm": 11.161593437194824, "learning_rate": 9.262243561434163e-06, "loss": 0.54742651, "memory(GiB)": 34.88, "step": 31965, "train_speed(iter/s)": 0.411446 }, { "acc": 0.88072109, "epoch": 0.8656215308802426, "grad_norm": 13.498162269592285, "learning_rate": 9.26195097531943e-06, "loss": 0.74476933, "memory(GiB)": 34.88, "step": 31970, "train_speed(iter/s)": 0.411449 }, { "acc": 0.87228069, "epoch": 0.8657569111634582, "grad_norm": 8.946473121643066, "learning_rate": 9.26165833582165e-06, "loss": 0.71382761, "memory(GiB)": 34.88, "step": 31975, "train_speed(iter/s)": 0.411452 }, { "acc": 0.87168703, "epoch": 0.8658922914466737, "grad_norm": 17.488866806030273, "learning_rate": 9.261365642944489e-06, "loss": 0.64703212, "memory(GiB)": 34.88, "step": 31980, "train_speed(iter/s)": 0.411455 }, { "acc": 0.87012825, "epoch": 0.8660276717298893, "grad_norm": 10.147177696228027, "learning_rate": 9.261072896691614e-06, "loss": 0.72998214, "memory(GiB)": 34.88, "step": 31985, "train_speed(iter/s)": 0.411458 }, { "acc": 0.87213516, "epoch": 0.8661630520131048, "grad_norm": 8.671988487243652, "learning_rate": 9.260780097066689e-06, "loss": 0.6279254, "memory(GiB)": 34.88, "step": 31990, "train_speed(iter/s)": 0.411462 }, { "acc": 0.90391722, "epoch": 0.8662984322963203, "grad_norm": 10.844451904296875, "learning_rate": 9.260487244073386e-06, "loss": 0.55251513, "memory(GiB)": 34.88, "step": 31995, "train_speed(iter/s)": 0.411466 }, { "acc": 0.88443279, "epoch": 0.8664338125795359, "grad_norm": 9.016788482666016, "learning_rate": 9.260194337715372e-06, "loss": 0.53902869, "memory(GiB)": 34.88, "step": 32000, "train_speed(iter/s)": 0.411469 }, { "acc": 0.86832438, "epoch": 0.8665691928627515, "grad_norm": 7.3597002029418945, "learning_rate": 9.259901377996317e-06, "loss": 0.69745665, "memory(GiB)": 34.88, "step": 32005, "train_speed(iter/s)": 0.411473 }, { "acc": 0.89916506, "epoch": 0.866704573145967, "grad_norm": 12.202168464660645, "learning_rate": 9.259608364919887e-06, "loss": 0.54180908, "memory(GiB)": 34.88, "step": 32010, "train_speed(iter/s)": 0.411476 }, { "acc": 0.87266989, "epoch": 0.8668399534291825, "grad_norm": 7.991631507873535, "learning_rate": 9.259315298489758e-06, "loss": 0.61416221, "memory(GiB)": 34.88, "step": 32015, "train_speed(iter/s)": 0.41148 }, { "acc": 0.87547522, "epoch": 0.8669753337123981, "grad_norm": 6.0798659324646, "learning_rate": 9.259022178709597e-06, "loss": 0.69395785, "memory(GiB)": 34.88, "step": 32020, "train_speed(iter/s)": 0.411484 }, { "acc": 0.86412802, "epoch": 0.8671107139956137, "grad_norm": 10.139276504516602, "learning_rate": 9.25872900558308e-06, "loss": 0.75641685, "memory(GiB)": 34.88, "step": 32025, "train_speed(iter/s)": 0.411487 }, { "acc": 0.87816563, "epoch": 0.8672460942788293, "grad_norm": 7.9328508377075195, "learning_rate": 9.258435779113876e-06, "loss": 0.65742097, "memory(GiB)": 34.88, "step": 32030, "train_speed(iter/s)": 0.411491 }, { "acc": 0.87527561, "epoch": 0.8673814745620447, "grad_norm": 8.415908813476562, "learning_rate": 9.25814249930566e-06, "loss": 0.69268732, "memory(GiB)": 34.88, "step": 32035, "train_speed(iter/s)": 0.411494 }, { "acc": 0.88054752, "epoch": 0.8675168548452603, "grad_norm": 15.231616020202637, "learning_rate": 9.257849166162105e-06, "loss": 0.58399172, "memory(GiB)": 34.88, "step": 32040, "train_speed(iter/s)": 0.411497 }, { "acc": 0.87828074, "epoch": 0.8676522351284759, "grad_norm": 11.128929138183594, "learning_rate": 9.257555779686887e-06, "loss": 0.68732185, "memory(GiB)": 34.88, "step": 32045, "train_speed(iter/s)": 0.4115 }, { "acc": 0.87606726, "epoch": 0.8677876154116915, "grad_norm": 6.442936897277832, "learning_rate": 9.257262339883679e-06, "loss": 0.75689898, "memory(GiB)": 34.88, "step": 32050, "train_speed(iter/s)": 0.411504 }, { "acc": 0.8557003, "epoch": 0.8679229956949069, "grad_norm": 8.702433586120605, "learning_rate": 9.256968846756158e-06, "loss": 0.80279465, "memory(GiB)": 34.88, "step": 32055, "train_speed(iter/s)": 0.411507 }, { "acc": 0.85753374, "epoch": 0.8680583759781225, "grad_norm": 8.453191757202148, "learning_rate": 9.256675300308002e-06, "loss": 0.7408906, "memory(GiB)": 34.88, "step": 32060, "train_speed(iter/s)": 0.41151 }, { "acc": 0.8811533, "epoch": 0.8681937562613381, "grad_norm": 8.278950691223145, "learning_rate": 9.256381700542886e-06, "loss": 0.6121675, "memory(GiB)": 34.88, "step": 32065, "train_speed(iter/s)": 0.411514 }, { "acc": 0.86731129, "epoch": 0.8683291365445537, "grad_norm": 9.497559547424316, "learning_rate": 9.25608804746449e-06, "loss": 0.72803397, "memory(GiB)": 34.88, "step": 32070, "train_speed(iter/s)": 0.411517 }, { "acc": 0.89097214, "epoch": 0.8684645168277692, "grad_norm": 13.172164916992188, "learning_rate": 9.25579434107649e-06, "loss": 0.47500172, "memory(GiB)": 34.88, "step": 32075, "train_speed(iter/s)": 0.411521 }, { "acc": 0.85100288, "epoch": 0.8685998971109847, "grad_norm": 10.494145393371582, "learning_rate": 9.25550058138257e-06, "loss": 0.93497372, "memory(GiB)": 34.88, "step": 32080, "train_speed(iter/s)": 0.411524 }, { "acc": 0.85258541, "epoch": 0.8687352773942003, "grad_norm": 6.630132675170898, "learning_rate": 9.255206768386403e-06, "loss": 0.76077018, "memory(GiB)": 34.88, "step": 32085, "train_speed(iter/s)": 0.411527 }, { "acc": 0.89451818, "epoch": 0.8688706576774159, "grad_norm": 7.903289318084717, "learning_rate": 9.254912902091674e-06, "loss": 0.54290266, "memory(GiB)": 34.88, "step": 32090, "train_speed(iter/s)": 0.41153 }, { "acc": 0.86983528, "epoch": 0.8690060379606314, "grad_norm": 8.314330101013184, "learning_rate": 9.254618982502064e-06, "loss": 0.57704973, "memory(GiB)": 34.88, "step": 32095, "train_speed(iter/s)": 0.411534 }, { "acc": 0.8925396, "epoch": 0.8691414182438469, "grad_norm": 7.103655815124512, "learning_rate": 9.254325009621255e-06, "loss": 0.48938608, "memory(GiB)": 34.88, "step": 32100, "train_speed(iter/s)": 0.411537 }, { "acc": 0.88057098, "epoch": 0.8692767985270625, "grad_norm": 11.511052131652832, "learning_rate": 9.254030983452928e-06, "loss": 0.58587132, "memory(GiB)": 34.88, "step": 32105, "train_speed(iter/s)": 0.41154 }, { "acc": 0.88987217, "epoch": 0.8694121788102781, "grad_norm": 8.217878341674805, "learning_rate": 9.253736904000767e-06, "loss": 0.5884068, "memory(GiB)": 34.88, "step": 32110, "train_speed(iter/s)": 0.411543 }, { "acc": 0.87343826, "epoch": 0.8695475590934936, "grad_norm": 18.405038833618164, "learning_rate": 9.253442771268459e-06, "loss": 0.6318006, "memory(GiB)": 34.88, "step": 32115, "train_speed(iter/s)": 0.411546 }, { "acc": 0.88158073, "epoch": 0.8696829393767092, "grad_norm": 5.70773458480835, "learning_rate": 9.253148585259682e-06, "loss": 0.51064148, "memory(GiB)": 34.88, "step": 32120, "train_speed(iter/s)": 0.411549 }, { "acc": 0.89279594, "epoch": 0.8698183196599247, "grad_norm": 5.734732151031494, "learning_rate": 9.252854345978127e-06, "loss": 0.48631678, "memory(GiB)": 34.88, "step": 32125, "train_speed(iter/s)": 0.411553 }, { "acc": 0.87051678, "epoch": 0.8699536999431403, "grad_norm": 15.772348403930664, "learning_rate": 9.252560053427478e-06, "loss": 0.68246775, "memory(GiB)": 34.88, "step": 32130, "train_speed(iter/s)": 0.411556 }, { "acc": 0.88165255, "epoch": 0.8700890802263558, "grad_norm": 19.531770706176758, "learning_rate": 9.252265707611421e-06, "loss": 0.52330065, "memory(GiB)": 34.88, "step": 32135, "train_speed(iter/s)": 0.411559 }, { "acc": 0.88720856, "epoch": 0.8702244605095714, "grad_norm": 9.303349494934082, "learning_rate": 9.251971308533643e-06, "loss": 0.58749294, "memory(GiB)": 34.88, "step": 32140, "train_speed(iter/s)": 0.411562 }, { "acc": 0.88100548, "epoch": 0.8703598407927869, "grad_norm": 10.210766792297363, "learning_rate": 9.251676856197834e-06, "loss": 0.64594378, "memory(GiB)": 34.88, "step": 32145, "train_speed(iter/s)": 0.411566 }, { "acc": 0.85823145, "epoch": 0.8704952210760025, "grad_norm": 10.707843780517578, "learning_rate": 9.25138235060768e-06, "loss": 0.71327157, "memory(GiB)": 34.88, "step": 32150, "train_speed(iter/s)": 0.411569 }, { "acc": 0.89732628, "epoch": 0.870630601359218, "grad_norm": 7.16450834274292, "learning_rate": 9.251087791766875e-06, "loss": 0.51849761, "memory(GiB)": 34.88, "step": 32155, "train_speed(iter/s)": 0.411572 }, { "acc": 0.89781513, "epoch": 0.8707659816424336, "grad_norm": 9.304376602172852, "learning_rate": 9.2507931796791e-06, "loss": 0.52226911, "memory(GiB)": 34.88, "step": 32160, "train_speed(iter/s)": 0.411576 }, { "acc": 0.87429676, "epoch": 0.8709013619256492, "grad_norm": 9.991010665893555, "learning_rate": 9.250498514348057e-06, "loss": 0.58619614, "memory(GiB)": 34.88, "step": 32165, "train_speed(iter/s)": 0.411579 }, { "acc": 0.89040337, "epoch": 0.8710367422088647, "grad_norm": 18.290388107299805, "learning_rate": 9.250203795777428e-06, "loss": 0.58138037, "memory(GiB)": 34.88, "step": 32170, "train_speed(iter/s)": 0.411582 }, { "acc": 0.90551701, "epoch": 0.8711721224920802, "grad_norm": 5.983582019805908, "learning_rate": 9.249909023970908e-06, "loss": 0.43694487, "memory(GiB)": 34.88, "step": 32175, "train_speed(iter/s)": 0.411585 }, { "acc": 0.87113285, "epoch": 0.8713075027752958, "grad_norm": 7.22683048248291, "learning_rate": 9.249614198932191e-06, "loss": 0.7364285, "memory(GiB)": 34.88, "step": 32180, "train_speed(iter/s)": 0.411589 }, { "acc": 0.86228771, "epoch": 0.8714428830585114, "grad_norm": 12.001616477966309, "learning_rate": 9.24931932066497e-06, "loss": 0.80930767, "memory(GiB)": 34.88, "step": 32185, "train_speed(iter/s)": 0.411591 }, { "acc": 0.86273422, "epoch": 0.8715782633417269, "grad_norm": 19.791099548339844, "learning_rate": 9.249024389172936e-06, "loss": 0.72977123, "memory(GiB)": 34.88, "step": 32190, "train_speed(iter/s)": 0.411595 }, { "acc": 0.86144152, "epoch": 0.8717136436249424, "grad_norm": 6.1060662269592285, "learning_rate": 9.248729404459787e-06, "loss": 0.6687304, "memory(GiB)": 34.88, "step": 32195, "train_speed(iter/s)": 0.411598 }, { "acc": 0.8486269, "epoch": 0.871849023908158, "grad_norm": 7.109910011291504, "learning_rate": 9.248434366529216e-06, "loss": 0.79076266, "memory(GiB)": 34.88, "step": 32200, "train_speed(iter/s)": 0.411602 }, { "acc": 0.87000237, "epoch": 0.8719844041913736, "grad_norm": 7.096767425537109, "learning_rate": 9.248139275384921e-06, "loss": 0.6566968, "memory(GiB)": 34.88, "step": 32205, "train_speed(iter/s)": 0.411605 }, { "acc": 0.87368927, "epoch": 0.8721197844745892, "grad_norm": 6.016000270843506, "learning_rate": 9.247844131030598e-06, "loss": 0.72807102, "memory(GiB)": 34.88, "step": 32210, "train_speed(iter/s)": 0.411609 }, { "acc": 0.86716957, "epoch": 0.8722551647578046, "grad_norm": 8.663692474365234, "learning_rate": 9.247548933469943e-06, "loss": 0.68396358, "memory(GiB)": 34.88, "step": 32215, "train_speed(iter/s)": 0.411612 }, { "acc": 0.85980396, "epoch": 0.8723905450410202, "grad_norm": 13.520525932312012, "learning_rate": 9.247253682706653e-06, "loss": 0.74392834, "memory(GiB)": 34.88, "step": 32220, "train_speed(iter/s)": 0.411616 }, { "acc": 0.86613235, "epoch": 0.8725259253242358, "grad_norm": 6.836771488189697, "learning_rate": 9.246958378744431e-06, "loss": 0.66023865, "memory(GiB)": 34.88, "step": 32225, "train_speed(iter/s)": 0.411619 }, { "acc": 0.87734261, "epoch": 0.8726613056074514, "grad_norm": 5.33915376663208, "learning_rate": 9.246663021586973e-06, "loss": 0.64854784, "memory(GiB)": 34.88, "step": 32230, "train_speed(iter/s)": 0.411623 }, { "acc": 0.86486769, "epoch": 0.8727966858906668, "grad_norm": 6.416806697845459, "learning_rate": 9.246367611237977e-06, "loss": 0.65629454, "memory(GiB)": 34.88, "step": 32235, "train_speed(iter/s)": 0.411626 }, { "acc": 0.86941891, "epoch": 0.8729320661738824, "grad_norm": 5.901249408721924, "learning_rate": 9.246072147701148e-06, "loss": 0.72215548, "memory(GiB)": 34.88, "step": 32240, "train_speed(iter/s)": 0.41163 }, { "acc": 0.85197849, "epoch": 0.873067446457098, "grad_norm": 8.392465591430664, "learning_rate": 9.245776630980185e-06, "loss": 0.79242706, "memory(GiB)": 34.88, "step": 32245, "train_speed(iter/s)": 0.411633 }, { "acc": 0.88542957, "epoch": 0.8732028267403136, "grad_norm": 9.592187881469727, "learning_rate": 9.24548106107879e-06, "loss": 0.57246075, "memory(GiB)": 34.88, "step": 32250, "train_speed(iter/s)": 0.411636 }, { "acc": 0.85957613, "epoch": 0.873338207023529, "grad_norm": 8.054122924804688, "learning_rate": 9.245185438000667e-06, "loss": 0.70558853, "memory(GiB)": 34.88, "step": 32255, "train_speed(iter/s)": 0.411639 }, { "acc": 0.88377018, "epoch": 0.8734735873067446, "grad_norm": 7.317694664001465, "learning_rate": 9.244889761749516e-06, "loss": 0.6326602, "memory(GiB)": 34.88, "step": 32260, "train_speed(iter/s)": 0.411643 }, { "acc": 0.88238964, "epoch": 0.8736089675899602, "grad_norm": 25.384506225585938, "learning_rate": 9.244594032329047e-06, "loss": 0.59373021, "memory(GiB)": 34.88, "step": 32265, "train_speed(iter/s)": 0.411646 }, { "acc": 0.87480907, "epoch": 0.8737443478731758, "grad_norm": 6.341161251068115, "learning_rate": 9.244298249742957e-06, "loss": 0.67022877, "memory(GiB)": 34.88, "step": 32270, "train_speed(iter/s)": 0.411649 }, { "acc": 0.87309532, "epoch": 0.8738797281563913, "grad_norm": 6.655760288238525, "learning_rate": 9.244002413994955e-06, "loss": 0.7115984, "memory(GiB)": 34.88, "step": 32275, "train_speed(iter/s)": 0.411653 }, { "acc": 0.87873611, "epoch": 0.8740151084396068, "grad_norm": 6.777320861816406, "learning_rate": 9.243706525088749e-06, "loss": 0.61658497, "memory(GiB)": 34.88, "step": 32280, "train_speed(iter/s)": 0.411656 }, { "acc": 0.87490644, "epoch": 0.8741504887228224, "grad_norm": 6.262671947479248, "learning_rate": 9.24341058302804e-06, "loss": 0.71502304, "memory(GiB)": 34.88, "step": 32285, "train_speed(iter/s)": 0.411659 }, { "acc": 0.88508062, "epoch": 0.874285869006038, "grad_norm": 13.07449722290039, "learning_rate": 9.243114587816542e-06, "loss": 0.70566521, "memory(GiB)": 34.88, "step": 32290, "train_speed(iter/s)": 0.411663 }, { "acc": 0.88512793, "epoch": 0.8744212492892535, "grad_norm": 14.503515243530273, "learning_rate": 9.242818539457957e-06, "loss": 0.63369689, "memory(GiB)": 34.88, "step": 32295, "train_speed(iter/s)": 0.411666 }, { "acc": 0.90128956, "epoch": 0.874556629572469, "grad_norm": 8.627986907958984, "learning_rate": 9.242522437955999e-06, "loss": 0.41288366, "memory(GiB)": 34.88, "step": 32300, "train_speed(iter/s)": 0.41167 }, { "acc": 0.88667526, "epoch": 0.8746920098556846, "grad_norm": 10.003837585449219, "learning_rate": 9.242226283314371e-06, "loss": 0.57746897, "memory(GiB)": 34.88, "step": 32305, "train_speed(iter/s)": 0.411673 }, { "acc": 0.88545723, "epoch": 0.8748273901389002, "grad_norm": 13.166805267333984, "learning_rate": 9.24193007553679e-06, "loss": 0.65305047, "memory(GiB)": 34.88, "step": 32310, "train_speed(iter/s)": 0.411677 }, { "acc": 0.8864995, "epoch": 0.8749627704221157, "grad_norm": 6.159267425537109, "learning_rate": 9.241633814626961e-06, "loss": 0.57719154, "memory(GiB)": 34.88, "step": 32315, "train_speed(iter/s)": 0.41168 }, { "acc": 0.90121183, "epoch": 0.8750981507053313, "grad_norm": 10.787089347839355, "learning_rate": 9.241337500588598e-06, "loss": 0.41832905, "memory(GiB)": 34.88, "step": 32320, "train_speed(iter/s)": 0.411683 }, { "acc": 0.89655495, "epoch": 0.8752335309885468, "grad_norm": 7.8072638511657715, "learning_rate": 9.24104113342541e-06, "loss": 0.48846793, "memory(GiB)": 34.88, "step": 32325, "train_speed(iter/s)": 0.411686 }, { "acc": 0.90360317, "epoch": 0.8753689112717624, "grad_norm": 17.824142456054688, "learning_rate": 9.240744713141112e-06, "loss": 0.52272806, "memory(GiB)": 34.88, "step": 32330, "train_speed(iter/s)": 0.411689 }, { "acc": 0.86705379, "epoch": 0.8755042915549779, "grad_norm": 8.090418815612793, "learning_rate": 9.24044823973942e-06, "loss": 0.65296535, "memory(GiB)": 34.88, "step": 32335, "train_speed(iter/s)": 0.411693 }, { "acc": 0.88219891, "epoch": 0.8756396718381935, "grad_norm": 7.990113735198975, "learning_rate": 9.24015171322404e-06, "loss": 0.65101433, "memory(GiB)": 34.88, "step": 32340, "train_speed(iter/s)": 0.411697 }, { "acc": 0.87430296, "epoch": 0.875775052121409, "grad_norm": 6.995288848876953, "learning_rate": 9.239855133598695e-06, "loss": 0.68526154, "memory(GiB)": 34.88, "step": 32345, "train_speed(iter/s)": 0.4117 }, { "acc": 0.88739443, "epoch": 0.8759104324046246, "grad_norm": 6.990627765655518, "learning_rate": 9.239558500867096e-06, "loss": 0.58274665, "memory(GiB)": 34.88, "step": 32350, "train_speed(iter/s)": 0.411703 }, { "acc": 0.89736624, "epoch": 0.8760458126878401, "grad_norm": 7.5079474449157715, "learning_rate": 9.239261815032959e-06, "loss": 0.5158628, "memory(GiB)": 34.88, "step": 32355, "train_speed(iter/s)": 0.411707 }, { "acc": 0.87717886, "epoch": 0.8761811929710557, "grad_norm": 7.157692909240723, "learning_rate": 9.238965076100001e-06, "loss": 0.61781912, "memory(GiB)": 34.88, "step": 32360, "train_speed(iter/s)": 0.41171 }, { "acc": 0.87337322, "epoch": 0.8763165732542713, "grad_norm": 11.253514289855957, "learning_rate": 9.23866828407194e-06, "loss": 0.64505901, "memory(GiB)": 34.88, "step": 32365, "train_speed(iter/s)": 0.411714 }, { "acc": 0.89112272, "epoch": 0.8764519535374868, "grad_norm": 17.010353088378906, "learning_rate": 9.238371438952492e-06, "loss": 0.5824245, "memory(GiB)": 34.88, "step": 32370, "train_speed(iter/s)": 0.411717 }, { "acc": 0.8731617, "epoch": 0.8765873338207023, "grad_norm": 4.919241428375244, "learning_rate": 9.238074540745377e-06, "loss": 0.64666481, "memory(GiB)": 34.88, "step": 32375, "train_speed(iter/s)": 0.411719 }, { "acc": 0.86640549, "epoch": 0.8767227141039179, "grad_norm": 16.628644943237305, "learning_rate": 9.237777589454316e-06, "loss": 0.70769758, "memory(GiB)": 34.88, "step": 32380, "train_speed(iter/s)": 0.411723 }, { "acc": 0.90827827, "epoch": 0.8768580943871335, "grad_norm": 6.635509967803955, "learning_rate": 9.237480585083026e-06, "loss": 0.41599379, "memory(GiB)": 34.88, "step": 32385, "train_speed(iter/s)": 0.411726 }, { "acc": 0.85500488, "epoch": 0.876993474670349, "grad_norm": 8.833048820495605, "learning_rate": 9.237183527635228e-06, "loss": 0.76137967, "memory(GiB)": 34.88, "step": 32390, "train_speed(iter/s)": 0.41173 }, { "acc": 0.88068943, "epoch": 0.8771288549535645, "grad_norm": 6.461484432220459, "learning_rate": 9.236886417114644e-06, "loss": 0.55756698, "memory(GiB)": 34.88, "step": 32395, "train_speed(iter/s)": 0.411733 }, { "acc": 0.87335424, "epoch": 0.8772642352367801, "grad_norm": 12.273098945617676, "learning_rate": 9.236589253524995e-06, "loss": 0.62241607, "memory(GiB)": 34.88, "step": 32400, "train_speed(iter/s)": 0.411736 }, { "acc": 0.84917355, "epoch": 0.8773996155199957, "grad_norm": 8.16423225402832, "learning_rate": 9.236292036870006e-06, "loss": 0.84311886, "memory(GiB)": 34.88, "step": 32405, "train_speed(iter/s)": 0.41174 }, { "acc": 0.88262634, "epoch": 0.8775349958032113, "grad_norm": 13.758426666259766, "learning_rate": 9.235994767153396e-06, "loss": 0.55895815, "memory(GiB)": 34.88, "step": 32410, "train_speed(iter/s)": 0.411742 }, { "acc": 0.87583199, "epoch": 0.8776703760864267, "grad_norm": 8.303007125854492, "learning_rate": 9.235697444378894e-06, "loss": 0.57313666, "memory(GiB)": 34.88, "step": 32415, "train_speed(iter/s)": 0.411746 }, { "acc": 0.89262657, "epoch": 0.8778057563696423, "grad_norm": 5.439934730529785, "learning_rate": 9.23540006855022e-06, "loss": 0.57085171, "memory(GiB)": 34.88, "step": 32420, "train_speed(iter/s)": 0.411749 }, { "acc": 0.89203415, "epoch": 0.8779411366528579, "grad_norm": 10.051634788513184, "learning_rate": 9.235102639671102e-06, "loss": 0.54979868, "memory(GiB)": 34.88, "step": 32425, "train_speed(iter/s)": 0.411752 }, { "acc": 0.885077, "epoch": 0.8780765169360735, "grad_norm": 10.232666015625, "learning_rate": 9.234805157745265e-06, "loss": 0.54211526, "memory(GiB)": 34.88, "step": 32430, "train_speed(iter/s)": 0.411755 }, { "acc": 0.88523159, "epoch": 0.8782118972192889, "grad_norm": 4.868200778961182, "learning_rate": 9.234507622776435e-06, "loss": 0.61243882, "memory(GiB)": 34.88, "step": 32435, "train_speed(iter/s)": 0.411758 }, { "acc": 0.87453804, "epoch": 0.8783472775025045, "grad_norm": 3.8498830795288086, "learning_rate": 9.234210034768339e-06, "loss": 0.60342932, "memory(GiB)": 34.88, "step": 32440, "train_speed(iter/s)": 0.411761 }, { "acc": 0.89001217, "epoch": 0.8784826577857201, "grad_norm": 7.436629772186279, "learning_rate": 9.233912393724706e-06, "loss": 0.52877932, "memory(GiB)": 34.88, "step": 32445, "train_speed(iter/s)": 0.411764 }, { "acc": 0.87417116, "epoch": 0.8786180380689357, "grad_norm": 5.6086649894714355, "learning_rate": 9.233614699649264e-06, "loss": 0.63006959, "memory(GiB)": 34.88, "step": 32450, "train_speed(iter/s)": 0.411768 }, { "acc": 0.86755247, "epoch": 0.8787534183521511, "grad_norm": 8.28408432006836, "learning_rate": 9.233316952545743e-06, "loss": 0.5974813, "memory(GiB)": 34.88, "step": 32455, "train_speed(iter/s)": 0.411771 }, { "acc": 0.86925287, "epoch": 0.8788887986353667, "grad_norm": 10.886298179626465, "learning_rate": 9.233019152417873e-06, "loss": 0.69651814, "memory(GiB)": 34.88, "step": 32460, "train_speed(iter/s)": 0.411775 }, { "acc": 0.88327427, "epoch": 0.8790241789185823, "grad_norm": 6.637970924377441, "learning_rate": 9.232721299269381e-06, "loss": 0.6169878, "memory(GiB)": 34.88, "step": 32465, "train_speed(iter/s)": 0.411778 }, { "acc": 0.88230534, "epoch": 0.8791595592017979, "grad_norm": 10.821473121643066, "learning_rate": 9.232423393104002e-06, "loss": 0.65648336, "memory(GiB)": 34.88, "step": 32470, "train_speed(iter/s)": 0.411782 }, { "acc": 0.86211796, "epoch": 0.8792949394850134, "grad_norm": 7.647271633148193, "learning_rate": 9.23212543392547e-06, "loss": 0.68625355, "memory(GiB)": 34.88, "step": 32475, "train_speed(iter/s)": 0.411785 }, { "acc": 0.89576263, "epoch": 0.8794303197682289, "grad_norm": 3.4053843021392822, "learning_rate": 9.231827421737512e-06, "loss": 0.4642333, "memory(GiB)": 34.88, "step": 32480, "train_speed(iter/s)": 0.411789 }, { "acc": 0.87932587, "epoch": 0.8795657000514445, "grad_norm": 7.657237529754639, "learning_rate": 9.231529356543861e-06, "loss": 0.65823069, "memory(GiB)": 34.88, "step": 32485, "train_speed(iter/s)": 0.411792 }, { "acc": 0.87234049, "epoch": 0.8797010803346601, "grad_norm": 6.664095878601074, "learning_rate": 9.231231238348257e-06, "loss": 0.69824018, "memory(GiB)": 34.88, "step": 32490, "train_speed(iter/s)": 0.411796 }, { "acc": 0.87937727, "epoch": 0.8798364606178756, "grad_norm": 4.11866569519043, "learning_rate": 9.230933067154429e-06, "loss": 0.71980071, "memory(GiB)": 34.88, "step": 32495, "train_speed(iter/s)": 0.411799 }, { "acc": 0.87789965, "epoch": 0.8799718409010912, "grad_norm": 8.866806983947754, "learning_rate": 9.230634842966114e-06, "loss": 0.65920839, "memory(GiB)": 34.88, "step": 32500, "train_speed(iter/s)": 0.411802 }, { "acc": 0.8725791, "epoch": 0.8801072211843067, "grad_norm": 5.484110355377197, "learning_rate": 9.230336565787049e-06, "loss": 0.61793413, "memory(GiB)": 34.88, "step": 32505, "train_speed(iter/s)": 0.411805 }, { "acc": 0.86903038, "epoch": 0.8802426014675223, "grad_norm": 7.549653053283691, "learning_rate": 9.230038235620968e-06, "loss": 0.70587072, "memory(GiB)": 34.88, "step": 32510, "train_speed(iter/s)": 0.411808 }, { "acc": 0.88523436, "epoch": 0.8803779817507378, "grad_norm": 15.19605541229248, "learning_rate": 9.229739852471611e-06, "loss": 0.61427183, "memory(GiB)": 34.88, "step": 32515, "train_speed(iter/s)": 0.411812 }, { "acc": 0.85697289, "epoch": 0.8805133620339534, "grad_norm": 13.966901779174805, "learning_rate": 9.229441416342713e-06, "loss": 0.73889704, "memory(GiB)": 34.88, "step": 32520, "train_speed(iter/s)": 0.411815 }, { "acc": 0.89234285, "epoch": 0.8806487423171689, "grad_norm": 7.682220458984375, "learning_rate": 9.229142927238017e-06, "loss": 0.58695531, "memory(GiB)": 34.88, "step": 32525, "train_speed(iter/s)": 0.411818 }, { "acc": 0.86319294, "epoch": 0.8807841226003845, "grad_norm": 14.7141695022583, "learning_rate": 9.228844385161256e-06, "loss": 0.74785595, "memory(GiB)": 34.88, "step": 32530, "train_speed(iter/s)": 0.411821 }, { "acc": 0.88795528, "epoch": 0.8809195028836, "grad_norm": 5.470006465911865, "learning_rate": 9.228545790116172e-06, "loss": 0.55866666, "memory(GiB)": 34.88, "step": 32535, "train_speed(iter/s)": 0.411824 }, { "acc": 0.83073292, "epoch": 0.8810548831668156, "grad_norm": 10.519258499145508, "learning_rate": 9.228247142106509e-06, "loss": 0.9052618, "memory(GiB)": 34.88, "step": 32540, "train_speed(iter/s)": 0.411827 }, { "acc": 0.87797298, "epoch": 0.8811902634500312, "grad_norm": 9.948583602905273, "learning_rate": 9.227948441136004e-06, "loss": 0.60465841, "memory(GiB)": 34.88, "step": 32545, "train_speed(iter/s)": 0.41183 }, { "acc": 0.88053455, "epoch": 0.8813256437332467, "grad_norm": 7.1177825927734375, "learning_rate": 9.227649687208401e-06, "loss": 0.60029345, "memory(GiB)": 34.88, "step": 32550, "train_speed(iter/s)": 0.411833 }, { "acc": 0.86745968, "epoch": 0.8814610240164622, "grad_norm": 5.439673900604248, "learning_rate": 9.22735088032744e-06, "loss": 0.72486095, "memory(GiB)": 34.88, "step": 32555, "train_speed(iter/s)": 0.411836 }, { "acc": 0.86014233, "epoch": 0.8815964042996778, "grad_norm": 10.095026016235352, "learning_rate": 9.22705202049687e-06, "loss": 0.84755611, "memory(GiB)": 34.88, "step": 32560, "train_speed(iter/s)": 0.411839 }, { "acc": 0.90336733, "epoch": 0.8817317845828934, "grad_norm": 3.897603988647461, "learning_rate": 9.226753107720428e-06, "loss": 0.43092709, "memory(GiB)": 34.88, "step": 32565, "train_speed(iter/s)": 0.411842 }, { "acc": 0.88008318, "epoch": 0.8818671648661089, "grad_norm": 5.564957141876221, "learning_rate": 9.226454142001861e-06, "loss": 0.65168753, "memory(GiB)": 34.88, "step": 32570, "train_speed(iter/s)": 0.411845 }, { "acc": 0.8802866, "epoch": 0.8820025451493244, "grad_norm": 5.797055721282959, "learning_rate": 9.226155123344916e-06, "loss": 0.5337121, "memory(GiB)": 34.88, "step": 32575, "train_speed(iter/s)": 0.411848 }, { "acc": 0.86322432, "epoch": 0.88213792543254, "grad_norm": 10.255858421325684, "learning_rate": 9.225856051753336e-06, "loss": 0.68036385, "memory(GiB)": 34.88, "step": 32580, "train_speed(iter/s)": 0.411852 }, { "acc": 0.87438202, "epoch": 0.8822733057157556, "grad_norm": 12.190579414367676, "learning_rate": 9.22555692723087e-06, "loss": 0.62716498, "memory(GiB)": 34.88, "step": 32585, "train_speed(iter/s)": 0.411855 }, { "acc": 0.85520115, "epoch": 0.8824086859989712, "grad_norm": 6.910272121429443, "learning_rate": 9.225257749781263e-06, "loss": 0.75032597, "memory(GiB)": 34.88, "step": 32590, "train_speed(iter/s)": 0.411859 }, { "acc": 0.89307632, "epoch": 0.8825440662821866, "grad_norm": 5.297361850738525, "learning_rate": 9.224958519408261e-06, "loss": 0.57068882, "memory(GiB)": 34.88, "step": 32595, "train_speed(iter/s)": 0.411862 }, { "acc": 0.85866823, "epoch": 0.8826794465654022, "grad_norm": 6.583578109741211, "learning_rate": 9.224659236115617e-06, "loss": 0.76207867, "memory(GiB)": 34.88, "step": 32600, "train_speed(iter/s)": 0.411865 }, { "acc": 0.88584995, "epoch": 0.8828148268486178, "grad_norm": 6.896803379058838, "learning_rate": 9.224359899907079e-06, "loss": 0.55963602, "memory(GiB)": 34.88, "step": 32605, "train_speed(iter/s)": 0.411868 }, { "acc": 0.89116268, "epoch": 0.8829502071318334, "grad_norm": 5.273982524871826, "learning_rate": 9.224060510786396e-06, "loss": 0.56713619, "memory(GiB)": 34.88, "step": 32610, "train_speed(iter/s)": 0.411872 }, { "acc": 0.88776302, "epoch": 0.8830855874150488, "grad_norm": 29.573627471923828, "learning_rate": 9.223761068757318e-06, "loss": 0.64656296, "memory(GiB)": 34.88, "step": 32615, "train_speed(iter/s)": 0.411875 }, { "acc": 0.89576149, "epoch": 0.8832209676982644, "grad_norm": 6.889462471008301, "learning_rate": 9.223461573823596e-06, "loss": 0.5474288, "memory(GiB)": 34.88, "step": 32620, "train_speed(iter/s)": 0.411879 }, { "acc": 0.88944998, "epoch": 0.88335634798148, "grad_norm": 3.638906717300415, "learning_rate": 9.223162025988983e-06, "loss": 0.53977518, "memory(GiB)": 34.88, "step": 32625, "train_speed(iter/s)": 0.411882 }, { "acc": 0.8691885, "epoch": 0.8834917282646956, "grad_norm": 10.850378036499023, "learning_rate": 9.222862425257228e-06, "loss": 0.60939465, "memory(GiB)": 34.88, "step": 32630, "train_speed(iter/s)": 0.411886 }, { "acc": 0.87487812, "epoch": 0.883627108547911, "grad_norm": 15.886923789978027, "learning_rate": 9.222562771632088e-06, "loss": 0.66070127, "memory(GiB)": 34.88, "step": 32635, "train_speed(iter/s)": 0.411889 }, { "acc": 0.86474419, "epoch": 0.8837624888311266, "grad_norm": 6.861863136291504, "learning_rate": 9.222263065117317e-06, "loss": 0.67715101, "memory(GiB)": 34.88, "step": 32640, "train_speed(iter/s)": 0.411893 }, { "acc": 0.88027039, "epoch": 0.8838978691143422, "grad_norm": 8.35218620300293, "learning_rate": 9.221963305716667e-06, "loss": 0.54900551, "memory(GiB)": 34.88, "step": 32645, "train_speed(iter/s)": 0.411896 }, { "acc": 0.87022238, "epoch": 0.8840332493975578, "grad_norm": 9.403714179992676, "learning_rate": 9.221663493433894e-06, "loss": 0.67390766, "memory(GiB)": 34.88, "step": 32650, "train_speed(iter/s)": 0.411899 }, { "acc": 0.87570448, "epoch": 0.8841686296807733, "grad_norm": 11.852380752563477, "learning_rate": 9.221363628272753e-06, "loss": 0.67144299, "memory(GiB)": 34.88, "step": 32655, "train_speed(iter/s)": 0.411902 }, { "acc": 0.88807116, "epoch": 0.8843040099639888, "grad_norm": 6.851027965545654, "learning_rate": 9.221063710237002e-06, "loss": 0.63098845, "memory(GiB)": 34.88, "step": 32660, "train_speed(iter/s)": 0.411906 }, { "acc": 0.89924307, "epoch": 0.8844393902472044, "grad_norm": 4.001091003417969, "learning_rate": 9.220763739330396e-06, "loss": 0.54972453, "memory(GiB)": 34.88, "step": 32665, "train_speed(iter/s)": 0.411909 }, { "acc": 0.87389479, "epoch": 0.88457477053042, "grad_norm": 19.78761100769043, "learning_rate": 9.220463715556694e-06, "loss": 0.63806138, "memory(GiB)": 34.88, "step": 32670, "train_speed(iter/s)": 0.411912 }, { "acc": 0.89225483, "epoch": 0.8847101508136355, "grad_norm": 50.21758270263672, "learning_rate": 9.220163638919655e-06, "loss": 0.56225853, "memory(GiB)": 34.88, "step": 32675, "train_speed(iter/s)": 0.411915 }, { "acc": 0.87200117, "epoch": 0.884845531096851, "grad_norm": 13.001462936401367, "learning_rate": 9.219863509423036e-06, "loss": 0.64751024, "memory(GiB)": 34.88, "step": 32680, "train_speed(iter/s)": 0.411919 }, { "acc": 0.87419081, "epoch": 0.8849809113800666, "grad_norm": 20.268325805664062, "learning_rate": 9.2195633270706e-06, "loss": 0.64959068, "memory(GiB)": 34.88, "step": 32685, "train_speed(iter/s)": 0.411922 }, { "acc": 0.87145243, "epoch": 0.8851162916632822, "grad_norm": 12.6893949508667, "learning_rate": 9.219263091866106e-06, "loss": 0.6439126, "memory(GiB)": 34.88, "step": 32690, "train_speed(iter/s)": 0.411925 }, { "acc": 0.85361271, "epoch": 0.8852516719464977, "grad_norm": 8.14350700378418, "learning_rate": 9.21896280381331e-06, "loss": 0.78469567, "memory(GiB)": 34.88, "step": 32695, "train_speed(iter/s)": 0.411928 }, { "acc": 0.8588378, "epoch": 0.8853870522297133, "grad_norm": 11.875162124633789, "learning_rate": 9.218662462915982e-06, "loss": 0.78183022, "memory(GiB)": 34.88, "step": 32700, "train_speed(iter/s)": 0.411931 }, { "acc": 0.89916248, "epoch": 0.8855224325129288, "grad_norm": 9.558063507080078, "learning_rate": 9.21836206917788e-06, "loss": 0.45265856, "memory(GiB)": 34.88, "step": 32705, "train_speed(iter/s)": 0.411935 }, { "acc": 0.87606773, "epoch": 0.8856578127961444, "grad_norm": 8.865981101989746, "learning_rate": 9.218061622602768e-06, "loss": 0.66212921, "memory(GiB)": 34.88, "step": 32710, "train_speed(iter/s)": 0.411938 }, { "acc": 0.85598402, "epoch": 0.8857931930793599, "grad_norm": 9.250475883483887, "learning_rate": 9.217761123194407e-06, "loss": 0.8031251, "memory(GiB)": 34.88, "step": 32715, "train_speed(iter/s)": 0.411941 }, { "acc": 0.86853933, "epoch": 0.8859285733625755, "grad_norm": 5.839211940765381, "learning_rate": 9.217460570956565e-06, "loss": 0.66990185, "memory(GiB)": 34.88, "step": 32720, "train_speed(iter/s)": 0.411944 }, { "acc": 0.87835398, "epoch": 0.886063953645791, "grad_norm": 5.0468878746032715, "learning_rate": 9.217159965893005e-06, "loss": 0.59470301, "memory(GiB)": 34.88, "step": 32725, "train_speed(iter/s)": 0.411947 }, { "acc": 0.90134926, "epoch": 0.8861993339290066, "grad_norm": 10.579058647155762, "learning_rate": 9.216859308007493e-06, "loss": 0.4706625, "memory(GiB)": 34.88, "step": 32730, "train_speed(iter/s)": 0.41195 }, { "acc": 0.89275913, "epoch": 0.8863347142122221, "grad_norm": 11.317720413208008, "learning_rate": 9.216558597303798e-06, "loss": 0.51611948, "memory(GiB)": 34.88, "step": 32735, "train_speed(iter/s)": 0.411953 }, { "acc": 0.9095417, "epoch": 0.8864700944954377, "grad_norm": 6.11451530456543, "learning_rate": 9.21625783378568e-06, "loss": 0.4547967, "memory(GiB)": 34.88, "step": 32740, "train_speed(iter/s)": 0.411956 }, { "acc": 0.86728821, "epoch": 0.8866054747786533, "grad_norm": 7.963024616241455, "learning_rate": 9.215957017456914e-06, "loss": 0.81785412, "memory(GiB)": 34.88, "step": 32745, "train_speed(iter/s)": 0.411959 }, { "acc": 0.87095013, "epoch": 0.8867408550618688, "grad_norm": 26.131776809692383, "learning_rate": 9.215656148321266e-06, "loss": 0.70973148, "memory(GiB)": 34.88, "step": 32750, "train_speed(iter/s)": 0.411962 }, { "acc": 0.85440121, "epoch": 0.8868762353450843, "grad_norm": 10.552324295043945, "learning_rate": 9.215355226382503e-06, "loss": 0.77628961, "memory(GiB)": 34.88, "step": 32755, "train_speed(iter/s)": 0.411965 }, { "acc": 0.8752039, "epoch": 0.8870116156282999, "grad_norm": 9.865804672241211, "learning_rate": 9.215054251644397e-06, "loss": 0.59485683, "memory(GiB)": 34.88, "step": 32760, "train_speed(iter/s)": 0.411969 }, { "acc": 0.87808981, "epoch": 0.8871469959115155, "grad_norm": 10.951172828674316, "learning_rate": 9.214753224110717e-06, "loss": 0.55384846, "memory(GiB)": 34.88, "step": 32765, "train_speed(iter/s)": 0.411972 }, { "acc": 0.87361393, "epoch": 0.887282376194731, "grad_norm": 8.572307586669922, "learning_rate": 9.214452143785234e-06, "loss": 0.65099697, "memory(GiB)": 34.88, "step": 32770, "train_speed(iter/s)": 0.411975 }, { "acc": 0.88245096, "epoch": 0.8874177564779465, "grad_norm": 6.209863662719727, "learning_rate": 9.21415101067172e-06, "loss": 0.53307428, "memory(GiB)": 34.88, "step": 32775, "train_speed(iter/s)": 0.411979 }, { "acc": 0.86951389, "epoch": 0.8875531367611621, "grad_norm": 14.517379760742188, "learning_rate": 9.21384982477395e-06, "loss": 0.70956926, "memory(GiB)": 34.88, "step": 32780, "train_speed(iter/s)": 0.411982 }, { "acc": 0.87508469, "epoch": 0.8876885170443777, "grad_norm": 9.623201370239258, "learning_rate": 9.21354858609569e-06, "loss": 0.6534791, "memory(GiB)": 34.88, "step": 32785, "train_speed(iter/s)": 0.411985 }, { "acc": 0.87805624, "epoch": 0.8878238973275933, "grad_norm": 5.571049213409424, "learning_rate": 9.213247294640721e-06, "loss": 0.57258654, "memory(GiB)": 34.88, "step": 32790, "train_speed(iter/s)": 0.411988 }, { "acc": 0.84518442, "epoch": 0.8879592776108087, "grad_norm": 11.304466247558594, "learning_rate": 9.212945950412813e-06, "loss": 0.73142686, "memory(GiB)": 34.88, "step": 32795, "train_speed(iter/s)": 0.411991 }, { "acc": 0.88057518, "epoch": 0.8880946578940243, "grad_norm": 8.986528396606445, "learning_rate": 9.212644553415741e-06, "loss": 0.58904748, "memory(GiB)": 34.88, "step": 32800, "train_speed(iter/s)": 0.411994 }, { "acc": 0.88549967, "epoch": 0.8882300381772399, "grad_norm": 7.566671371459961, "learning_rate": 9.212343103653283e-06, "loss": 0.58743649, "memory(GiB)": 34.88, "step": 32805, "train_speed(iter/s)": 0.411998 }, { "acc": 0.86988621, "epoch": 0.8883654184604555, "grad_norm": 19.698383331298828, "learning_rate": 9.212041601129215e-06, "loss": 0.6851717, "memory(GiB)": 34.88, "step": 32810, "train_speed(iter/s)": 0.412001 }, { "acc": 0.90898638, "epoch": 0.8885007987436709, "grad_norm": 14.096442222595215, "learning_rate": 9.211740045847311e-06, "loss": 0.4885808, "memory(GiB)": 34.88, "step": 32815, "train_speed(iter/s)": 0.412003 }, { "acc": 0.8872366, "epoch": 0.8886361790268865, "grad_norm": 7.59909725189209, "learning_rate": 9.211438437811349e-06, "loss": 0.6022831, "memory(GiB)": 34.88, "step": 32820, "train_speed(iter/s)": 0.412007 }, { "acc": 0.88450451, "epoch": 0.8887715593101021, "grad_norm": 8.3065767288208, "learning_rate": 9.21113677702511e-06, "loss": 0.5890553, "memory(GiB)": 34.88, "step": 32825, "train_speed(iter/s)": 0.41201 }, { "acc": 0.8842062, "epoch": 0.8889069395933177, "grad_norm": 10.879313468933105, "learning_rate": 9.210835063492372e-06, "loss": 0.60458927, "memory(GiB)": 34.88, "step": 32830, "train_speed(iter/s)": 0.412013 }, { "acc": 0.89000835, "epoch": 0.8890423198765331, "grad_norm": 3.6164512634277344, "learning_rate": 9.210533297216915e-06, "loss": 0.57121902, "memory(GiB)": 34.88, "step": 32835, "train_speed(iter/s)": 0.412016 }, { "acc": 0.88930626, "epoch": 0.8891777001597487, "grad_norm": 6.791632652282715, "learning_rate": 9.210231478202517e-06, "loss": 0.55754623, "memory(GiB)": 34.88, "step": 32840, "train_speed(iter/s)": 0.412019 }, { "acc": 0.89699574, "epoch": 0.8893130804429643, "grad_norm": 11.876246452331543, "learning_rate": 9.20992960645296e-06, "loss": 0.51347294, "memory(GiB)": 34.88, "step": 32845, "train_speed(iter/s)": 0.412022 }, { "acc": 0.88615055, "epoch": 0.8894484607261799, "grad_norm": 6.388457775115967, "learning_rate": 9.209627681972026e-06, "loss": 0.58787489, "memory(GiB)": 34.88, "step": 32850, "train_speed(iter/s)": 0.412026 }, { "acc": 0.88804064, "epoch": 0.8895838410093954, "grad_norm": 5.07595157623291, "learning_rate": 9.209325704763496e-06, "loss": 0.56999645, "memory(GiB)": 34.88, "step": 32855, "train_speed(iter/s)": 0.412029 }, { "acc": 0.88932076, "epoch": 0.8897192212926109, "grad_norm": 6.525789737701416, "learning_rate": 9.209023674831155e-06, "loss": 0.52835398, "memory(GiB)": 34.88, "step": 32860, "train_speed(iter/s)": 0.412032 }, { "acc": 0.87840538, "epoch": 0.8898546015758265, "grad_norm": 15.642423629760742, "learning_rate": 9.208721592178783e-06, "loss": 0.6546236, "memory(GiB)": 34.88, "step": 32865, "train_speed(iter/s)": 0.412035 }, { "acc": 0.87259378, "epoch": 0.8899899818590421, "grad_norm": 14.482162475585938, "learning_rate": 9.20841945681017e-06, "loss": 0.74828691, "memory(GiB)": 34.88, "step": 32870, "train_speed(iter/s)": 0.412039 }, { "acc": 0.8952774, "epoch": 0.8901253621422576, "grad_norm": 10.782443046569824, "learning_rate": 9.208117268729092e-06, "loss": 0.53163853, "memory(GiB)": 34.88, "step": 32875, "train_speed(iter/s)": 0.412042 }, { "acc": 0.89232674, "epoch": 0.8902607424254732, "grad_norm": 5.645571708679199, "learning_rate": 9.207815027939346e-06, "loss": 0.56736908, "memory(GiB)": 34.88, "step": 32880, "train_speed(iter/s)": 0.412045 }, { "acc": 0.89153938, "epoch": 0.8903961227086887, "grad_norm": 6.759779453277588, "learning_rate": 9.207512734444709e-06, "loss": 0.56599798, "memory(GiB)": 34.88, "step": 32885, "train_speed(iter/s)": 0.412048 }, { "acc": 0.85530653, "epoch": 0.8905315029919043, "grad_norm": 20.667604446411133, "learning_rate": 9.207210388248969e-06, "loss": 0.71456871, "memory(GiB)": 34.88, "step": 32890, "train_speed(iter/s)": 0.412051 }, { "acc": 0.86544857, "epoch": 0.8906668832751198, "grad_norm": 8.037873268127441, "learning_rate": 9.206907989355918e-06, "loss": 0.68728085, "memory(GiB)": 34.88, "step": 32895, "train_speed(iter/s)": 0.412054 }, { "acc": 0.87949944, "epoch": 0.8908022635583354, "grad_norm": 10.56718921661377, "learning_rate": 9.206605537769339e-06, "loss": 0.60057883, "memory(GiB)": 34.88, "step": 32900, "train_speed(iter/s)": 0.412057 }, { "acc": 0.8854125, "epoch": 0.8909376438415509, "grad_norm": 8.716681480407715, "learning_rate": 9.206303033493025e-06, "loss": 0.5573184, "memory(GiB)": 34.88, "step": 32905, "train_speed(iter/s)": 0.412061 }, { "acc": 0.86330748, "epoch": 0.8910730241247665, "grad_norm": 12.580655097961426, "learning_rate": 9.206000476530763e-06, "loss": 0.63425698, "memory(GiB)": 34.88, "step": 32910, "train_speed(iter/s)": 0.412064 }, { "acc": 0.87341194, "epoch": 0.891208404407982, "grad_norm": 4.472208499908447, "learning_rate": 9.205697866886343e-06, "loss": 0.67868595, "memory(GiB)": 34.88, "step": 32915, "train_speed(iter/s)": 0.412067 }, { "acc": 0.8906868, "epoch": 0.8913437846911976, "grad_norm": 4.597469806671143, "learning_rate": 9.205395204563559e-06, "loss": 0.57250175, "memory(GiB)": 34.88, "step": 32920, "train_speed(iter/s)": 0.41207 }, { "acc": 0.85912247, "epoch": 0.8914791649744132, "grad_norm": 22.796276092529297, "learning_rate": 9.205092489566198e-06, "loss": 0.82096252, "memory(GiB)": 34.88, "step": 32925, "train_speed(iter/s)": 0.412074 }, { "acc": 0.87023849, "epoch": 0.8916145452576287, "grad_norm": 14.560158729553223, "learning_rate": 9.204789721898055e-06, "loss": 0.69185119, "memory(GiB)": 34.88, "step": 32930, "train_speed(iter/s)": 0.412076 }, { "acc": 0.86433334, "epoch": 0.8917499255408442, "grad_norm": 6.596859931945801, "learning_rate": 9.20448690156292e-06, "loss": 0.69309397, "memory(GiB)": 34.88, "step": 32935, "train_speed(iter/s)": 0.41208 }, { "acc": 0.88448391, "epoch": 0.8918853058240598, "grad_norm": 13.131379127502441, "learning_rate": 9.204184028564589e-06, "loss": 0.56105633, "memory(GiB)": 34.88, "step": 32940, "train_speed(iter/s)": 0.412083 }, { "acc": 0.89035559, "epoch": 0.8920206861072754, "grad_norm": 10.391149520874023, "learning_rate": 9.203881102906856e-06, "loss": 0.4882236, "memory(GiB)": 34.88, "step": 32945, "train_speed(iter/s)": 0.412086 }, { "acc": 0.89410124, "epoch": 0.8921560663904909, "grad_norm": 47.57415008544922, "learning_rate": 9.203578124593514e-06, "loss": 0.60373564, "memory(GiB)": 34.88, "step": 32950, "train_speed(iter/s)": 0.412089 }, { "acc": 0.8840045, "epoch": 0.8922914466737064, "grad_norm": 7.242854118347168, "learning_rate": 9.203275093628363e-06, "loss": 0.66006918, "memory(GiB)": 34.88, "step": 32955, "train_speed(iter/s)": 0.412093 }, { "acc": 0.86489, "epoch": 0.892426826956922, "grad_norm": 6.950317859649658, "learning_rate": 9.202972010015192e-06, "loss": 0.68634357, "memory(GiB)": 34.88, "step": 32960, "train_speed(iter/s)": 0.412096 }, { "acc": 0.88848743, "epoch": 0.8925622072401376, "grad_norm": 10.851608276367188, "learning_rate": 9.2026688737578e-06, "loss": 0.56719089, "memory(GiB)": 34.88, "step": 32965, "train_speed(iter/s)": 0.412099 }, { "acc": 0.88143158, "epoch": 0.8926975875233532, "grad_norm": 7.806402206420898, "learning_rate": 9.202365684859988e-06, "loss": 0.58668232, "memory(GiB)": 34.88, "step": 32970, "train_speed(iter/s)": 0.412102 }, { "acc": 0.87693796, "epoch": 0.8928329678065686, "grad_norm": 10.80266284942627, "learning_rate": 9.202062443325551e-06, "loss": 0.55989432, "memory(GiB)": 34.88, "step": 32975, "train_speed(iter/s)": 0.412105 }, { "acc": 0.87596321, "epoch": 0.8929683480897842, "grad_norm": 8.09460735321045, "learning_rate": 9.201759149158289e-06, "loss": 0.64227796, "memory(GiB)": 34.88, "step": 32980, "train_speed(iter/s)": 0.412108 }, { "acc": 0.88050613, "epoch": 0.8931037283729998, "grad_norm": 9.86085033416748, "learning_rate": 9.201455802362e-06, "loss": 0.56884756, "memory(GiB)": 34.88, "step": 32985, "train_speed(iter/s)": 0.412112 }, { "acc": 0.87414513, "epoch": 0.8932391086562154, "grad_norm": 11.984343528747559, "learning_rate": 9.201152402940486e-06, "loss": 0.64448819, "memory(GiB)": 34.88, "step": 32990, "train_speed(iter/s)": 0.412115 }, { "acc": 0.85083103, "epoch": 0.8933744889394308, "grad_norm": 10.136957168579102, "learning_rate": 9.200848950897545e-06, "loss": 0.83959217, "memory(GiB)": 34.88, "step": 32995, "train_speed(iter/s)": 0.412118 }, { "acc": 0.87136965, "epoch": 0.8935098692226464, "grad_norm": 15.726198196411133, "learning_rate": 9.20054544623698e-06, "loss": 0.60637646, "memory(GiB)": 34.88, "step": 33000, "train_speed(iter/s)": 0.412122 }, { "acc": 0.88102531, "epoch": 0.893645249505862, "grad_norm": 8.193482398986816, "learning_rate": 9.200241888962592e-06, "loss": 0.56382732, "memory(GiB)": 34.88, "step": 33005, "train_speed(iter/s)": 0.412125 }, { "acc": 0.88766994, "epoch": 0.8937806297890776, "grad_norm": 7.211289882659912, "learning_rate": 9.199938279078185e-06, "loss": 0.50836401, "memory(GiB)": 34.88, "step": 33010, "train_speed(iter/s)": 0.412128 }, { "acc": 0.88568163, "epoch": 0.893916010072293, "grad_norm": 7.739567756652832, "learning_rate": 9.199634616587562e-06, "loss": 0.60386028, "memory(GiB)": 34.88, "step": 33015, "train_speed(iter/s)": 0.412131 }, { "acc": 0.88033009, "epoch": 0.8940513903555086, "grad_norm": 7.832873821258545, "learning_rate": 9.199330901494527e-06, "loss": 0.67270794, "memory(GiB)": 34.88, "step": 33020, "train_speed(iter/s)": 0.412134 }, { "acc": 0.88054876, "epoch": 0.8941867706387242, "grad_norm": 8.153916358947754, "learning_rate": 9.199027133802884e-06, "loss": 0.70785432, "memory(GiB)": 34.88, "step": 33025, "train_speed(iter/s)": 0.412137 }, { "acc": 0.88145103, "epoch": 0.8943221509219397, "grad_norm": 3.298572540283203, "learning_rate": 9.198723313516438e-06, "loss": 0.59217587, "memory(GiB)": 34.88, "step": 33030, "train_speed(iter/s)": 0.41214 }, { "acc": 0.87548246, "epoch": 0.8944575312051553, "grad_norm": 16.63486671447754, "learning_rate": 9.198419440638996e-06, "loss": 0.5543664, "memory(GiB)": 34.88, "step": 33035, "train_speed(iter/s)": 0.412143 }, { "acc": 0.89151611, "epoch": 0.8945929114883708, "grad_norm": 12.162714958190918, "learning_rate": 9.198115515174363e-06, "loss": 0.44975204, "memory(GiB)": 34.88, "step": 33040, "train_speed(iter/s)": 0.412147 }, { "acc": 0.86410084, "epoch": 0.8947282917715864, "grad_norm": 6.748116970062256, "learning_rate": 9.197811537126349e-06, "loss": 0.73580523, "memory(GiB)": 34.88, "step": 33045, "train_speed(iter/s)": 0.41215 }, { "acc": 0.87393293, "epoch": 0.8948636720548019, "grad_norm": 5.708230972290039, "learning_rate": 9.197507506498759e-06, "loss": 0.68738022, "memory(GiB)": 34.88, "step": 33050, "train_speed(iter/s)": 0.412153 }, { "acc": 0.88085804, "epoch": 0.8949990523380175, "grad_norm": 9.835095405578613, "learning_rate": 9.197203423295404e-06, "loss": 0.60668144, "memory(GiB)": 34.88, "step": 33055, "train_speed(iter/s)": 0.412157 }, { "acc": 0.87028456, "epoch": 0.895134432621233, "grad_norm": 11.877068519592285, "learning_rate": 9.196899287520093e-06, "loss": 0.74233432, "memory(GiB)": 34.88, "step": 33060, "train_speed(iter/s)": 0.41216 }, { "acc": 0.90411158, "epoch": 0.8952698129044486, "grad_norm": 5.190904140472412, "learning_rate": 9.196595099176632e-06, "loss": 0.45094404, "memory(GiB)": 34.88, "step": 33065, "train_speed(iter/s)": 0.412163 }, { "acc": 0.89478149, "epoch": 0.8954051931876641, "grad_norm": 5.871922969818115, "learning_rate": 9.196290858268836e-06, "loss": 0.5491641, "memory(GiB)": 34.88, "step": 33070, "train_speed(iter/s)": 0.412166 }, { "acc": 0.85281677, "epoch": 0.8955405734708797, "grad_norm": 13.018378257751465, "learning_rate": 9.195986564800515e-06, "loss": 0.83678551, "memory(GiB)": 34.88, "step": 33075, "train_speed(iter/s)": 0.412169 }, { "acc": 0.87178421, "epoch": 0.8956759537540953, "grad_norm": 8.73986530303955, "learning_rate": 9.19568221877548e-06, "loss": 0.59914522, "memory(GiB)": 34.88, "step": 33080, "train_speed(iter/s)": 0.412172 }, { "acc": 0.89136372, "epoch": 0.8958113340373108, "grad_norm": 4.6945271492004395, "learning_rate": 9.195377820197546e-06, "loss": 0.53540277, "memory(GiB)": 34.88, "step": 33085, "train_speed(iter/s)": 0.412175 }, { "acc": 0.87006702, "epoch": 0.8959467143205263, "grad_norm": 10.347139358520508, "learning_rate": 9.195073369070521e-06, "loss": 0.71508541, "memory(GiB)": 34.88, "step": 33090, "train_speed(iter/s)": 0.412179 }, { "acc": 0.87584534, "epoch": 0.8960820946037419, "grad_norm": 12.02073860168457, "learning_rate": 9.194768865398225e-06, "loss": 0.61572771, "memory(GiB)": 34.88, "step": 33095, "train_speed(iter/s)": 0.412182 }, { "acc": 0.88875771, "epoch": 0.8962174748869575, "grad_norm": 9.484484672546387, "learning_rate": 9.19446430918447e-06, "loss": 0.55227418, "memory(GiB)": 34.88, "step": 33100, "train_speed(iter/s)": 0.412185 }, { "acc": 0.89483814, "epoch": 0.896352855170173, "grad_norm": 4.568131446838379, "learning_rate": 9.194159700433071e-06, "loss": 0.46305981, "memory(GiB)": 34.88, "step": 33105, "train_speed(iter/s)": 0.412188 }, { "acc": 0.87711067, "epoch": 0.8964882354533885, "grad_norm": 9.079221725463867, "learning_rate": 9.19385503914784e-06, "loss": 0.63480177, "memory(GiB)": 34.88, "step": 33110, "train_speed(iter/s)": 0.412191 }, { "acc": 0.88201294, "epoch": 0.8966236157366041, "grad_norm": 9.322118759155273, "learning_rate": 9.193550325332602e-06, "loss": 0.58742599, "memory(GiB)": 34.88, "step": 33115, "train_speed(iter/s)": 0.412195 }, { "acc": 0.88170986, "epoch": 0.8967589960198197, "grad_norm": 11.325472831726074, "learning_rate": 9.193245558991167e-06, "loss": 0.59964824, "memory(GiB)": 34.88, "step": 33120, "train_speed(iter/s)": 0.412197 }, { "acc": 0.87669106, "epoch": 0.8968943763030353, "grad_norm": 8.316192626953125, "learning_rate": 9.192940740127354e-06, "loss": 0.60140462, "memory(GiB)": 34.88, "step": 33125, "train_speed(iter/s)": 0.412201 }, { "acc": 0.88256187, "epoch": 0.8970297565862507, "grad_norm": 14.834455490112305, "learning_rate": 9.192635868744985e-06, "loss": 0.55519018, "memory(GiB)": 34.88, "step": 33130, "train_speed(iter/s)": 0.412204 }, { "acc": 0.89128838, "epoch": 0.8971651368694663, "grad_norm": 10.215262413024902, "learning_rate": 9.192330944847875e-06, "loss": 0.55423746, "memory(GiB)": 34.88, "step": 33135, "train_speed(iter/s)": 0.412207 }, { "acc": 0.8852335, "epoch": 0.8973005171526819, "grad_norm": 8.126677513122559, "learning_rate": 9.192025968439848e-06, "loss": 0.61019115, "memory(GiB)": 34.88, "step": 33140, "train_speed(iter/s)": 0.41221 }, { "acc": 0.86310291, "epoch": 0.8974358974358975, "grad_norm": 27.4287109375, "learning_rate": 9.19172093952472e-06, "loss": 0.77965984, "memory(GiB)": 34.88, "step": 33145, "train_speed(iter/s)": 0.412212 }, { "acc": 0.88584929, "epoch": 0.8975712777191129, "grad_norm": 5.476166248321533, "learning_rate": 9.191415858106313e-06, "loss": 0.6818449, "memory(GiB)": 34.88, "step": 33150, "train_speed(iter/s)": 0.412215 }, { "acc": 0.89140902, "epoch": 0.8977066580023285, "grad_norm": 11.629975318908691, "learning_rate": 9.191110724188452e-06, "loss": 0.55611982, "memory(GiB)": 34.88, "step": 33155, "train_speed(iter/s)": 0.412218 }, { "acc": 0.85442009, "epoch": 0.8978420382855441, "grad_norm": 10.831496238708496, "learning_rate": 9.190805537774956e-06, "loss": 0.89673834, "memory(GiB)": 34.88, "step": 33160, "train_speed(iter/s)": 0.412221 }, { "acc": 0.885569, "epoch": 0.8979774185687597, "grad_norm": 14.345075607299805, "learning_rate": 9.190500298869649e-06, "loss": 0.66006184, "memory(GiB)": 34.88, "step": 33165, "train_speed(iter/s)": 0.412224 }, { "acc": 0.8801199, "epoch": 0.8981127988519751, "grad_norm": 6.545248985290527, "learning_rate": 9.190195007476355e-06, "loss": 0.57375131, "memory(GiB)": 34.88, "step": 33170, "train_speed(iter/s)": 0.412227 }, { "acc": 0.86413813, "epoch": 0.8982481791351907, "grad_norm": 11.41982364654541, "learning_rate": 9.189889663598899e-06, "loss": 0.67944446, "memory(GiB)": 34.88, "step": 33175, "train_speed(iter/s)": 0.41223 }, { "acc": 0.90601044, "epoch": 0.8983835594184063, "grad_norm": 14.563220024108887, "learning_rate": 9.189584267241104e-06, "loss": 0.48326998, "memory(GiB)": 34.88, "step": 33180, "train_speed(iter/s)": 0.412233 }, { "acc": 0.89005365, "epoch": 0.8985189397016219, "grad_norm": 16.019554138183594, "learning_rate": 9.189278818406798e-06, "loss": 0.51940727, "memory(GiB)": 34.88, "step": 33185, "train_speed(iter/s)": 0.412236 }, { "acc": 0.87507973, "epoch": 0.8986543199848374, "grad_norm": 6.721287727355957, "learning_rate": 9.188973317099807e-06, "loss": 0.57297564, "memory(GiB)": 34.88, "step": 33190, "train_speed(iter/s)": 0.41224 }, { "acc": 0.85657978, "epoch": 0.8987897002680529, "grad_norm": 6.353188514709473, "learning_rate": 9.188667763323956e-06, "loss": 0.80110226, "memory(GiB)": 34.88, "step": 33195, "train_speed(iter/s)": 0.412243 }, { "acc": 0.86850605, "epoch": 0.8989250805512685, "grad_norm": 6.347718715667725, "learning_rate": 9.188362157083074e-06, "loss": 0.6393209, "memory(GiB)": 34.88, "step": 33200, "train_speed(iter/s)": 0.412246 }, { "acc": 0.89034338, "epoch": 0.8990604608344841, "grad_norm": 17.502826690673828, "learning_rate": 9.18805649838099e-06, "loss": 0.58263116, "memory(GiB)": 34.88, "step": 33205, "train_speed(iter/s)": 0.412248 }, { "acc": 0.87369404, "epoch": 0.8991958411176996, "grad_norm": 9.557303428649902, "learning_rate": 9.187750787221532e-06, "loss": 0.70468731, "memory(GiB)": 34.88, "step": 33210, "train_speed(iter/s)": 0.412252 }, { "acc": 0.89819326, "epoch": 0.8993312214009151, "grad_norm": 14.855823516845703, "learning_rate": 9.18744502360853e-06, "loss": 0.44331951, "memory(GiB)": 34.88, "step": 33215, "train_speed(iter/s)": 0.412255 }, { "acc": 0.8751338, "epoch": 0.8994666016841307, "grad_norm": 9.421771049499512, "learning_rate": 9.187139207545815e-06, "loss": 0.63743277, "memory(GiB)": 34.88, "step": 33220, "train_speed(iter/s)": 0.412258 }, { "acc": 0.89929447, "epoch": 0.8996019819673463, "grad_norm": 4.308186054229736, "learning_rate": 9.186833339037218e-06, "loss": 0.5131269, "memory(GiB)": 34.88, "step": 33225, "train_speed(iter/s)": 0.412262 }, { "acc": 0.88042803, "epoch": 0.8997373622505618, "grad_norm": 7.610211372375488, "learning_rate": 9.186527418086569e-06, "loss": 0.6232152, "memory(GiB)": 34.88, "step": 33230, "train_speed(iter/s)": 0.412265 }, { "acc": 0.87247105, "epoch": 0.8998727425337774, "grad_norm": 41.32841110229492, "learning_rate": 9.186221444697699e-06, "loss": 0.7255589, "memory(GiB)": 34.88, "step": 33235, "train_speed(iter/s)": 0.412268 }, { "acc": 0.87969475, "epoch": 0.9000081228169929, "grad_norm": 8.772448539733887, "learning_rate": 9.185915418874444e-06, "loss": 0.57667656, "memory(GiB)": 34.88, "step": 33240, "train_speed(iter/s)": 0.412271 }, { "acc": 0.85188713, "epoch": 0.9001435031002085, "grad_norm": 14.385360717773438, "learning_rate": 9.185609340620637e-06, "loss": 0.77578096, "memory(GiB)": 34.88, "step": 33245, "train_speed(iter/s)": 0.412274 }, { "acc": 0.85475655, "epoch": 0.900278883383424, "grad_norm": 6.885438442230225, "learning_rate": 9.185303209940113e-06, "loss": 0.7783165, "memory(GiB)": 34.88, "step": 33250, "train_speed(iter/s)": 0.412277 }, { "acc": 0.89794693, "epoch": 0.9004142636666396, "grad_norm": 24.085487365722656, "learning_rate": 9.184997026836704e-06, "loss": 0.49639435, "memory(GiB)": 34.88, "step": 33255, "train_speed(iter/s)": 0.41228 }, { "acc": 0.86997719, "epoch": 0.9005496439498551, "grad_norm": 8.23715591430664, "learning_rate": 9.184690791314247e-06, "loss": 0.77057285, "memory(GiB)": 34.88, "step": 33260, "train_speed(iter/s)": 0.412283 }, { "acc": 0.88973894, "epoch": 0.9006850242330707, "grad_norm": 8.349475860595703, "learning_rate": 9.184384503376579e-06, "loss": 0.55689793, "memory(GiB)": 34.88, "step": 33265, "train_speed(iter/s)": 0.412287 }, { "acc": 0.89440565, "epoch": 0.9008204045162862, "grad_norm": 4.569744110107422, "learning_rate": 9.184078163027537e-06, "loss": 0.50612068, "memory(GiB)": 34.88, "step": 33270, "train_speed(iter/s)": 0.41229 }, { "acc": 0.88896484, "epoch": 0.9009557847995018, "grad_norm": 5.149348258972168, "learning_rate": 9.183771770270958e-06, "loss": 0.60271616, "memory(GiB)": 34.88, "step": 33275, "train_speed(iter/s)": 0.412293 }, { "acc": 0.88627796, "epoch": 0.9010911650827174, "grad_norm": 124.972900390625, "learning_rate": 9.183465325110678e-06, "loss": 0.57181854, "memory(GiB)": 34.88, "step": 33280, "train_speed(iter/s)": 0.412296 }, { "acc": 0.90026112, "epoch": 0.9012265453659329, "grad_norm": 4.865016937255859, "learning_rate": 9.18315882755054e-06, "loss": 0.48822503, "memory(GiB)": 34.88, "step": 33285, "train_speed(iter/s)": 0.4123 }, { "acc": 0.88566875, "epoch": 0.9013619256491484, "grad_norm": 9.530119895935059, "learning_rate": 9.18285227759438e-06, "loss": 0.5609045, "memory(GiB)": 34.88, "step": 33290, "train_speed(iter/s)": 0.412303 }, { "acc": 0.9014327, "epoch": 0.901497305932364, "grad_norm": 8.918930053710938, "learning_rate": 9.182545675246038e-06, "loss": 0.53131514, "memory(GiB)": 34.88, "step": 33295, "train_speed(iter/s)": 0.412306 }, { "acc": 0.87149906, "epoch": 0.9016326862155796, "grad_norm": 6.936838150024414, "learning_rate": 9.18223902050936e-06, "loss": 0.66638107, "memory(GiB)": 34.88, "step": 33300, "train_speed(iter/s)": 0.412309 }, { "acc": 0.87700195, "epoch": 0.9017680664987952, "grad_norm": 8.7618989944458, "learning_rate": 9.18193231338818e-06, "loss": 0.74771519, "memory(GiB)": 34.88, "step": 33305, "train_speed(iter/s)": 0.412312 }, { "acc": 0.86955595, "epoch": 0.9019034467820106, "grad_norm": 8.185037612915039, "learning_rate": 9.181625553886346e-06, "loss": 0.64966335, "memory(GiB)": 34.88, "step": 33310, "train_speed(iter/s)": 0.412315 }, { "acc": 0.86381025, "epoch": 0.9020388270652262, "grad_norm": 32.491336822509766, "learning_rate": 9.181318742007698e-06, "loss": 0.71963525, "memory(GiB)": 34.88, "step": 33315, "train_speed(iter/s)": 0.412318 }, { "acc": 0.89289742, "epoch": 0.9021742073484418, "grad_norm": 12.634282112121582, "learning_rate": 9.18101187775608e-06, "loss": 0.58724566, "memory(GiB)": 34.88, "step": 33320, "train_speed(iter/s)": 0.412322 }, { "acc": 0.87547436, "epoch": 0.9023095876316574, "grad_norm": 10.872456550598145, "learning_rate": 9.180704961135335e-06, "loss": 0.65204039, "memory(GiB)": 34.88, "step": 33325, "train_speed(iter/s)": 0.412325 }, { "acc": 0.87747555, "epoch": 0.9024449679148728, "grad_norm": 6.748617649078369, "learning_rate": 9.18039799214931e-06, "loss": 0.57416506, "memory(GiB)": 34.88, "step": 33330, "train_speed(iter/s)": 0.412328 }, { "acc": 0.8687479, "epoch": 0.9025803481980884, "grad_norm": 13.8569917678833, "learning_rate": 9.180090970801848e-06, "loss": 0.70937033, "memory(GiB)": 34.88, "step": 33335, "train_speed(iter/s)": 0.412331 }, { "acc": 0.85965939, "epoch": 0.902715728481304, "grad_norm": 21.479442596435547, "learning_rate": 9.179783897096799e-06, "loss": 0.85773392, "memory(GiB)": 34.88, "step": 33340, "train_speed(iter/s)": 0.412335 }, { "acc": 0.85363159, "epoch": 0.9028511087645196, "grad_norm": 12.197830200195312, "learning_rate": 9.179476771038003e-06, "loss": 0.78564978, "memory(GiB)": 34.88, "step": 33345, "train_speed(iter/s)": 0.412338 }, { "acc": 0.84902287, "epoch": 0.902986489047735, "grad_norm": 11.51815128326416, "learning_rate": 9.179169592629314e-06, "loss": 0.90383902, "memory(GiB)": 34.88, "step": 33350, "train_speed(iter/s)": 0.412341 }, { "acc": 0.86658649, "epoch": 0.9031218693309506, "grad_norm": 9.69533634185791, "learning_rate": 9.178862361874576e-06, "loss": 0.7028018, "memory(GiB)": 34.88, "step": 33355, "train_speed(iter/s)": 0.412344 }, { "acc": 0.85839329, "epoch": 0.9032572496141662, "grad_norm": 9.111360549926758, "learning_rate": 9.17855507877764e-06, "loss": 0.71953168, "memory(GiB)": 34.88, "step": 33360, "train_speed(iter/s)": 0.412347 }, { "acc": 0.88636284, "epoch": 0.9033926298973818, "grad_norm": 12.697392463684082, "learning_rate": 9.178247743342355e-06, "loss": 0.56833982, "memory(GiB)": 34.88, "step": 33365, "train_speed(iter/s)": 0.41235 }, { "acc": 0.8686655, "epoch": 0.9035280101805973, "grad_norm": 8.65910530090332, "learning_rate": 9.177940355572568e-06, "loss": 0.71383028, "memory(GiB)": 34.88, "step": 33370, "train_speed(iter/s)": 0.412354 }, { "acc": 0.87338982, "epoch": 0.9036633904638128, "grad_norm": 5.5463032722473145, "learning_rate": 9.177632915472133e-06, "loss": 0.70756564, "memory(GiB)": 34.88, "step": 33375, "train_speed(iter/s)": 0.412357 }, { "acc": 0.87980442, "epoch": 0.9037987707470284, "grad_norm": 8.987732887268066, "learning_rate": 9.1773254230449e-06, "loss": 0.59700575, "memory(GiB)": 34.88, "step": 33380, "train_speed(iter/s)": 0.41236 }, { "acc": 0.89730911, "epoch": 0.903934151030244, "grad_norm": 7.301475524902344, "learning_rate": 9.177017878294721e-06, "loss": 0.51257281, "memory(GiB)": 34.88, "step": 33385, "train_speed(iter/s)": 0.412363 }, { "acc": 0.86997261, "epoch": 0.9040695313134595, "grad_norm": 8.24500846862793, "learning_rate": 9.176710281225449e-06, "loss": 0.63195763, "memory(GiB)": 34.88, "step": 33390, "train_speed(iter/s)": 0.412366 }, { "acc": 0.88076687, "epoch": 0.904204911596675, "grad_norm": 12.801121711730957, "learning_rate": 9.176402631840937e-06, "loss": 0.66049418, "memory(GiB)": 34.88, "step": 33395, "train_speed(iter/s)": 0.412369 }, { "acc": 0.8790926, "epoch": 0.9043402918798906, "grad_norm": 11.215088844299316, "learning_rate": 9.176094930145037e-06, "loss": 0.59239907, "memory(GiB)": 34.88, "step": 33400, "train_speed(iter/s)": 0.412372 }, { "acc": 0.87732496, "epoch": 0.9044756721631062, "grad_norm": 7.863978862762451, "learning_rate": 9.175787176141608e-06, "loss": 0.62598038, "memory(GiB)": 34.88, "step": 33405, "train_speed(iter/s)": 0.412375 }, { "acc": 0.86846752, "epoch": 0.9046110524463217, "grad_norm": 7.446357250213623, "learning_rate": 9.175479369834502e-06, "loss": 0.64196329, "memory(GiB)": 34.88, "step": 33410, "train_speed(iter/s)": 0.412378 }, { "acc": 0.87287025, "epoch": 0.9047464327295373, "grad_norm": 8.177849769592285, "learning_rate": 9.175171511227574e-06, "loss": 0.67779622, "memory(GiB)": 34.88, "step": 33415, "train_speed(iter/s)": 0.41238 }, { "acc": 0.88673162, "epoch": 0.9048818130127528, "grad_norm": 12.886899948120117, "learning_rate": 9.174863600324686e-06, "loss": 0.59836726, "memory(GiB)": 34.88, "step": 33420, "train_speed(iter/s)": 0.412383 }, { "acc": 0.86280823, "epoch": 0.9050171932959684, "grad_norm": 24.083742141723633, "learning_rate": 9.174555637129688e-06, "loss": 0.70272613, "memory(GiB)": 34.88, "step": 33425, "train_speed(iter/s)": 0.412387 }, { "acc": 0.88805389, "epoch": 0.9051525735791839, "grad_norm": 12.737641334533691, "learning_rate": 9.174247621646443e-06, "loss": 0.55456553, "memory(GiB)": 34.88, "step": 33430, "train_speed(iter/s)": 0.41239 }, { "acc": 0.90021181, "epoch": 0.9052879538623995, "grad_norm": 5.404524326324463, "learning_rate": 9.173939553878805e-06, "loss": 0.48290744, "memory(GiB)": 34.88, "step": 33435, "train_speed(iter/s)": 0.412393 }, { "acc": 0.89631472, "epoch": 0.905423334145615, "grad_norm": 7.692117214202881, "learning_rate": 9.17363143383064e-06, "loss": 0.60402131, "memory(GiB)": 34.88, "step": 33440, "train_speed(iter/s)": 0.412396 }, { "acc": 0.91095858, "epoch": 0.9055587144288306, "grad_norm": 6.0993428230285645, "learning_rate": 9.1733232615058e-06, "loss": 0.43868852, "memory(GiB)": 34.88, "step": 33445, "train_speed(iter/s)": 0.412399 }, { "acc": 0.88138313, "epoch": 0.9056940947120461, "grad_norm": 6.883594036102295, "learning_rate": 9.17301503690815e-06, "loss": 0.62735834, "memory(GiB)": 34.88, "step": 33450, "train_speed(iter/s)": 0.412402 }, { "acc": 0.89487991, "epoch": 0.9058294749952617, "grad_norm": 5.231435298919678, "learning_rate": 9.17270676004155e-06, "loss": 0.53427458, "memory(GiB)": 34.88, "step": 33455, "train_speed(iter/s)": 0.412405 }, { "acc": 0.87216997, "epoch": 0.9059648552784773, "grad_norm": 8.158900260925293, "learning_rate": 9.172398430909862e-06, "loss": 0.65907907, "memory(GiB)": 34.88, "step": 33460, "train_speed(iter/s)": 0.412408 }, { "acc": 0.86313457, "epoch": 0.9061002355616928, "grad_norm": 12.703699111938477, "learning_rate": 9.172090049516949e-06, "loss": 0.69892626, "memory(GiB)": 34.88, "step": 33465, "train_speed(iter/s)": 0.412411 }, { "acc": 0.85868053, "epoch": 0.9062356158449083, "grad_norm": 9.625364303588867, "learning_rate": 9.171781615866672e-06, "loss": 0.86638842, "memory(GiB)": 34.88, "step": 33470, "train_speed(iter/s)": 0.412414 }, { "acc": 0.87375317, "epoch": 0.9063709961281239, "grad_norm": 8.42521858215332, "learning_rate": 9.171473129962897e-06, "loss": 0.60404034, "memory(GiB)": 34.88, "step": 33475, "train_speed(iter/s)": 0.412417 }, { "acc": 0.87223759, "epoch": 0.9065063764113395, "grad_norm": 4.743757247924805, "learning_rate": 9.171164591809489e-06, "loss": 0.62070727, "memory(GiB)": 34.88, "step": 33480, "train_speed(iter/s)": 0.41242 }, { "acc": 0.89074297, "epoch": 0.906641756694555, "grad_norm": 12.115799903869629, "learning_rate": 9.170856001410312e-06, "loss": 0.51063824, "memory(GiB)": 34.88, "step": 33485, "train_speed(iter/s)": 0.412423 }, { "acc": 0.88813896, "epoch": 0.9067771369777705, "grad_norm": 5.255401611328125, "learning_rate": 9.17054735876923e-06, "loss": 0.5543849, "memory(GiB)": 34.88, "step": 33490, "train_speed(iter/s)": 0.412426 }, { "acc": 0.84923029, "epoch": 0.9069125172609861, "grad_norm": 11.055423736572266, "learning_rate": 9.170238663890109e-06, "loss": 0.84397097, "memory(GiB)": 34.88, "step": 33495, "train_speed(iter/s)": 0.412429 }, { "acc": 0.89712133, "epoch": 0.9070478975442017, "grad_norm": 9.256296157836914, "learning_rate": 9.169929916776821e-06, "loss": 0.46082048, "memory(GiB)": 34.88, "step": 33500, "train_speed(iter/s)": 0.412432 }, { "acc": 0.89436512, "epoch": 0.9071832778274173, "grad_norm": 4.877458095550537, "learning_rate": 9.169621117433227e-06, "loss": 0.50462551, "memory(GiB)": 34.88, "step": 33505, "train_speed(iter/s)": 0.412435 }, { "acc": 0.87851133, "epoch": 0.9073186581106327, "grad_norm": 17.215797424316406, "learning_rate": 9.169312265863201e-06, "loss": 0.64605923, "memory(GiB)": 34.88, "step": 33510, "train_speed(iter/s)": 0.412438 }, { "acc": 0.8792408, "epoch": 0.9074540383938483, "grad_norm": 8.807138442993164, "learning_rate": 9.16900336207061e-06, "loss": 0.61660757, "memory(GiB)": 34.88, "step": 33515, "train_speed(iter/s)": 0.412441 }, { "acc": 0.87441082, "epoch": 0.9075894186770639, "grad_norm": 4.9399333000183105, "learning_rate": 9.168694406059322e-06, "loss": 0.67045832, "memory(GiB)": 34.88, "step": 33520, "train_speed(iter/s)": 0.412444 }, { "acc": 0.88078547, "epoch": 0.9077247989602795, "grad_norm": 5.363398551940918, "learning_rate": 9.168385397833209e-06, "loss": 0.51738453, "memory(GiB)": 34.88, "step": 33525, "train_speed(iter/s)": 0.412447 }, { "acc": 0.88191395, "epoch": 0.9078601792434949, "grad_norm": 6.218733787536621, "learning_rate": 9.16807633739614e-06, "loss": 0.57021236, "memory(GiB)": 34.88, "step": 33530, "train_speed(iter/s)": 0.412449 }, { "acc": 0.89073181, "epoch": 0.9079955595267105, "grad_norm": 12.857823371887207, "learning_rate": 9.167767224751992e-06, "loss": 0.59934158, "memory(GiB)": 34.88, "step": 33535, "train_speed(iter/s)": 0.412453 }, { "acc": 0.89190712, "epoch": 0.9081309398099261, "grad_norm": 8.630538940429688, "learning_rate": 9.16745805990463e-06, "loss": 0.59224253, "memory(GiB)": 34.88, "step": 33540, "train_speed(iter/s)": 0.412456 }, { "acc": 0.87498531, "epoch": 0.9082663200931417, "grad_norm": 7.412973403930664, "learning_rate": 9.16714884285793e-06, "loss": 0.64330668, "memory(GiB)": 34.88, "step": 33545, "train_speed(iter/s)": 0.412459 }, { "acc": 0.87080612, "epoch": 0.9084017003763571, "grad_norm": 19.155258178710938, "learning_rate": 9.166839573615769e-06, "loss": 0.69619255, "memory(GiB)": 34.88, "step": 33550, "train_speed(iter/s)": 0.412462 }, { "acc": 0.89451027, "epoch": 0.9085370806595727, "grad_norm": 9.40389347076416, "learning_rate": 9.166530252182015e-06, "loss": 0.54943018, "memory(GiB)": 34.88, "step": 33555, "train_speed(iter/s)": 0.412466 }, { "acc": 0.90057621, "epoch": 0.9086724609427883, "grad_norm": 5.248963356018066, "learning_rate": 9.166220878560547e-06, "loss": 0.45017791, "memory(GiB)": 34.88, "step": 33560, "train_speed(iter/s)": 0.412468 }, { "acc": 0.87959232, "epoch": 0.9088078412260039, "grad_norm": 8.72407054901123, "learning_rate": 9.165911452755238e-06, "loss": 0.71047692, "memory(GiB)": 34.88, "step": 33565, "train_speed(iter/s)": 0.412471 }, { "acc": 0.84642258, "epoch": 0.9089432215092194, "grad_norm": 14.956830024719238, "learning_rate": 9.165601974769968e-06, "loss": 0.74914818, "memory(GiB)": 34.88, "step": 33570, "train_speed(iter/s)": 0.412474 }, { "acc": 0.88606291, "epoch": 0.9090786017924349, "grad_norm": 13.52766227722168, "learning_rate": 9.16529244460861e-06, "loss": 0.56858935, "memory(GiB)": 34.88, "step": 33575, "train_speed(iter/s)": 0.412478 }, { "acc": 0.86875515, "epoch": 0.9092139820756505, "grad_norm": 16.546634674072266, "learning_rate": 9.164982862275043e-06, "loss": 0.71124964, "memory(GiB)": 34.88, "step": 33580, "train_speed(iter/s)": 0.412481 }, { "acc": 0.866891, "epoch": 0.9093493623588661, "grad_norm": 10.08458423614502, "learning_rate": 9.164673227773145e-06, "loss": 0.79616842, "memory(GiB)": 34.88, "step": 33585, "train_speed(iter/s)": 0.412483 }, { "acc": 0.86391582, "epoch": 0.9094847426420816, "grad_norm": 5.646021366119385, "learning_rate": 9.164363541106794e-06, "loss": 0.76201177, "memory(GiB)": 34.88, "step": 33590, "train_speed(iter/s)": 0.412487 }, { "acc": 0.88038044, "epoch": 0.9096201229252971, "grad_norm": 8.759039878845215, "learning_rate": 9.16405380227987e-06, "loss": 0.60534363, "memory(GiB)": 34.88, "step": 33595, "train_speed(iter/s)": 0.41249 }, { "acc": 0.86677761, "epoch": 0.9097555032085127, "grad_norm": 10.4325532913208, "learning_rate": 9.163744011296255e-06, "loss": 0.72855186, "memory(GiB)": 34.88, "step": 33600, "train_speed(iter/s)": 0.412493 }, { "acc": 0.88499222, "epoch": 0.9098908834917283, "grad_norm": 12.917136192321777, "learning_rate": 9.163434168159829e-06, "loss": 0.68147955, "memory(GiB)": 34.88, "step": 33605, "train_speed(iter/s)": 0.412496 }, { "acc": 0.88663664, "epoch": 0.9100262637749438, "grad_norm": 8.94970989227295, "learning_rate": 9.16312427287447e-06, "loss": 0.62022543, "memory(GiB)": 34.88, "step": 33610, "train_speed(iter/s)": 0.412499 }, { "acc": 0.85500431, "epoch": 0.9101616440581594, "grad_norm": 7.577213287353516, "learning_rate": 9.162814325444063e-06, "loss": 0.69451089, "memory(GiB)": 34.88, "step": 33615, "train_speed(iter/s)": 0.412502 }, { "acc": 0.88601685, "epoch": 0.9102970243413749, "grad_norm": 7.652554035186768, "learning_rate": 9.162504325872489e-06, "loss": 0.56379914, "memory(GiB)": 34.88, "step": 33620, "train_speed(iter/s)": 0.412505 }, { "acc": 0.87627687, "epoch": 0.9104324046245905, "grad_norm": 13.875779151916504, "learning_rate": 9.162194274163634e-06, "loss": 0.59729309, "memory(GiB)": 34.88, "step": 33625, "train_speed(iter/s)": 0.412508 }, { "acc": 0.89416237, "epoch": 0.910567784907806, "grad_norm": 5.438466548919678, "learning_rate": 9.16188417032138e-06, "loss": 0.5017415, "memory(GiB)": 34.88, "step": 33630, "train_speed(iter/s)": 0.412511 }, { "acc": 0.90107698, "epoch": 0.9107031651910216, "grad_norm": 14.52087688446045, "learning_rate": 9.161574014349612e-06, "loss": 0.46173139, "memory(GiB)": 34.88, "step": 33635, "train_speed(iter/s)": 0.412514 }, { "acc": 0.8824604, "epoch": 0.9108385454742371, "grad_norm": 7.983242511749268, "learning_rate": 9.161263806252216e-06, "loss": 0.6448329, "memory(GiB)": 34.88, "step": 33640, "train_speed(iter/s)": 0.412517 }, { "acc": 0.84704523, "epoch": 0.9109739257574527, "grad_norm": 16.436254501342773, "learning_rate": 9.160953546033077e-06, "loss": 0.84584341, "memory(GiB)": 34.88, "step": 33645, "train_speed(iter/s)": 0.41252 }, { "acc": 0.88664513, "epoch": 0.9111093060406682, "grad_norm": 8.977581977844238, "learning_rate": 9.160643233696081e-06, "loss": 0.56591616, "memory(GiB)": 34.88, "step": 33650, "train_speed(iter/s)": 0.412524 }, { "acc": 0.86901808, "epoch": 0.9112446863238838, "grad_norm": 10.505586624145508, "learning_rate": 9.160332869245118e-06, "loss": 0.64014478, "memory(GiB)": 34.88, "step": 33655, "train_speed(iter/s)": 0.412526 }, { "acc": 0.87504082, "epoch": 0.9113800666070994, "grad_norm": 13.546917915344238, "learning_rate": 9.160022452684074e-06, "loss": 0.6904377, "memory(GiB)": 34.88, "step": 33660, "train_speed(iter/s)": 0.41253 }, { "acc": 0.88200798, "epoch": 0.9115154468903149, "grad_norm": 13.225846290588379, "learning_rate": 9.159711984016836e-06, "loss": 0.62200432, "memory(GiB)": 34.88, "step": 33665, "train_speed(iter/s)": 0.412533 }, { "acc": 0.87802267, "epoch": 0.9116508271735304, "grad_norm": 4.603908061981201, "learning_rate": 9.159401463247298e-06, "loss": 0.54207115, "memory(GiB)": 34.88, "step": 33670, "train_speed(iter/s)": 0.412536 }, { "acc": 0.88456411, "epoch": 0.911786207456746, "grad_norm": 13.100544929504395, "learning_rate": 9.159090890379344e-06, "loss": 0.65653348, "memory(GiB)": 34.88, "step": 33675, "train_speed(iter/s)": 0.412539 }, { "acc": 0.87025337, "epoch": 0.9119215877399616, "grad_norm": 32.33182907104492, "learning_rate": 9.15878026541687e-06, "loss": 0.64621754, "memory(GiB)": 34.88, "step": 33680, "train_speed(iter/s)": 0.412542 }, { "acc": 0.88188725, "epoch": 0.9120569680231772, "grad_norm": 4.813167572021484, "learning_rate": 9.158469588363765e-06, "loss": 0.64563379, "memory(GiB)": 34.88, "step": 33685, "train_speed(iter/s)": 0.412546 }, { "acc": 0.86647339, "epoch": 0.9121923483063926, "grad_norm": 10.658889770507812, "learning_rate": 9.15815885922392e-06, "loss": 0.61804094, "memory(GiB)": 34.88, "step": 33690, "train_speed(iter/s)": 0.412549 }, { "acc": 0.86799364, "epoch": 0.9123277285896082, "grad_norm": 29.55710220336914, "learning_rate": 9.157848078001226e-06, "loss": 0.73159409, "memory(GiB)": 34.88, "step": 33695, "train_speed(iter/s)": 0.412552 }, { "acc": 0.8764431, "epoch": 0.9124631088728238, "grad_norm": 8.819405555725098, "learning_rate": 9.15753724469958e-06, "loss": 0.61033831, "memory(GiB)": 34.88, "step": 33700, "train_speed(iter/s)": 0.412555 }, { "acc": 0.88421574, "epoch": 0.9125984891560394, "grad_norm": 15.74268627166748, "learning_rate": 9.157226359322873e-06, "loss": 0.6417408, "memory(GiB)": 34.88, "step": 33705, "train_speed(iter/s)": 0.412558 }, { "acc": 0.89879055, "epoch": 0.9127338694392548, "grad_norm": 11.731853485107422, "learning_rate": 9.156915421875001e-06, "loss": 0.5343379, "memory(GiB)": 34.88, "step": 33710, "train_speed(iter/s)": 0.412561 }, { "acc": 0.87545614, "epoch": 0.9128692497224704, "grad_norm": 8.457483291625977, "learning_rate": 9.156604432359857e-06, "loss": 0.61681309, "memory(GiB)": 34.88, "step": 33715, "train_speed(iter/s)": 0.412563 }, { "acc": 0.88291121, "epoch": 0.913004630005686, "grad_norm": 7.811880111694336, "learning_rate": 9.15629339078134e-06, "loss": 0.55550003, "memory(GiB)": 34.88, "step": 33720, "train_speed(iter/s)": 0.412567 }, { "acc": 0.88561716, "epoch": 0.9131400102889016, "grad_norm": 10.10025691986084, "learning_rate": 9.155982297143345e-06, "loss": 0.56700029, "memory(GiB)": 34.88, "step": 33725, "train_speed(iter/s)": 0.41257 }, { "acc": 0.89027634, "epoch": 0.913275390572117, "grad_norm": 6.156303882598877, "learning_rate": 9.155671151449767e-06, "loss": 0.56960735, "memory(GiB)": 34.88, "step": 33730, "train_speed(iter/s)": 0.412573 }, { "acc": 0.87542973, "epoch": 0.9134107708553326, "grad_norm": 5.237026691436768, "learning_rate": 9.155359953704507e-06, "loss": 0.70419741, "memory(GiB)": 34.88, "step": 33735, "train_speed(iter/s)": 0.412576 }, { "acc": 0.88253593, "epoch": 0.9135461511385482, "grad_norm": 7.924574375152588, "learning_rate": 9.15504870391146e-06, "loss": 0.60740643, "memory(GiB)": 34.88, "step": 33740, "train_speed(iter/s)": 0.412579 }, { "acc": 0.82891064, "epoch": 0.9136815314217638, "grad_norm": 7.826365947723389, "learning_rate": 9.154737402074528e-06, "loss": 0.93889284, "memory(GiB)": 34.88, "step": 33745, "train_speed(iter/s)": 0.412582 }, { "acc": 0.90005913, "epoch": 0.9138169117049793, "grad_norm": 5.238698959350586, "learning_rate": 9.15442604819761e-06, "loss": 0.47521343, "memory(GiB)": 34.88, "step": 33750, "train_speed(iter/s)": 0.412585 }, { "acc": 0.90924559, "epoch": 0.9139522919881948, "grad_norm": 5.3652520179748535, "learning_rate": 9.154114642284605e-06, "loss": 0.3797281, "memory(GiB)": 34.88, "step": 33755, "train_speed(iter/s)": 0.412589 }, { "acc": 0.8825902, "epoch": 0.9140876722714104, "grad_norm": 9.830351829528809, "learning_rate": 9.153803184339415e-06, "loss": 0.60827284, "memory(GiB)": 34.88, "step": 33760, "train_speed(iter/s)": 0.412592 }, { "acc": 0.86401892, "epoch": 0.914223052554626, "grad_norm": 6.6385817527771, "learning_rate": 9.153491674365942e-06, "loss": 0.7276906, "memory(GiB)": 34.88, "step": 33765, "train_speed(iter/s)": 0.412595 }, { "acc": 0.88178272, "epoch": 0.9143584328378415, "grad_norm": 12.340346336364746, "learning_rate": 9.153180112368086e-06, "loss": 0.66232681, "memory(GiB)": 34.88, "step": 33770, "train_speed(iter/s)": 0.412598 }, { "acc": 0.86888866, "epoch": 0.914493813121057, "grad_norm": 12.320650100708008, "learning_rate": 9.152868498349753e-06, "loss": 0.728935, "memory(GiB)": 34.88, "step": 33775, "train_speed(iter/s)": 0.412601 }, { "acc": 0.88043432, "epoch": 0.9146291934042726, "grad_norm": 9.09425163269043, "learning_rate": 9.152556832314844e-06, "loss": 0.60196161, "memory(GiB)": 34.88, "step": 33780, "train_speed(iter/s)": 0.412604 }, { "acc": 0.87892647, "epoch": 0.9147645736874882, "grad_norm": 7.2730393409729, "learning_rate": 9.152245114267267e-06, "loss": 0.7422327, "memory(GiB)": 34.88, "step": 33785, "train_speed(iter/s)": 0.412607 }, { "acc": 0.88858337, "epoch": 0.9148999539707037, "grad_norm": 8.307662010192871, "learning_rate": 9.151933344210922e-06, "loss": 0.59353924, "memory(GiB)": 34.88, "step": 33790, "train_speed(iter/s)": 0.412611 }, { "acc": 0.87241573, "epoch": 0.9150353342539193, "grad_norm": 9.797338485717773, "learning_rate": 9.151621522149718e-06, "loss": 0.65724134, "memory(GiB)": 34.88, "step": 33795, "train_speed(iter/s)": 0.412614 }, { "acc": 0.8706274, "epoch": 0.9151707145371348, "grad_norm": 9.579995155334473, "learning_rate": 9.151309648087559e-06, "loss": 0.6531899, "memory(GiB)": 34.88, "step": 33800, "train_speed(iter/s)": 0.412617 }, { "acc": 0.86519699, "epoch": 0.9153060948203504, "grad_norm": 10.14766788482666, "learning_rate": 9.150997722028351e-06, "loss": 0.7919858, "memory(GiB)": 34.88, "step": 33805, "train_speed(iter/s)": 0.41262 }, { "acc": 0.87508469, "epoch": 0.9154414751035659, "grad_norm": 9.116291046142578, "learning_rate": 9.150685743976006e-06, "loss": 0.62089057, "memory(GiB)": 34.88, "step": 33810, "train_speed(iter/s)": 0.412623 }, { "acc": 0.89017658, "epoch": 0.9155768553867815, "grad_norm": 4.800625324249268, "learning_rate": 9.15037371393443e-06, "loss": 0.47681961, "memory(GiB)": 34.88, "step": 33815, "train_speed(iter/s)": 0.412626 }, { "acc": 0.88037863, "epoch": 0.915712235669997, "grad_norm": 7.091176986694336, "learning_rate": 9.15006163190753e-06, "loss": 0.60160351, "memory(GiB)": 34.88, "step": 33820, "train_speed(iter/s)": 0.412629 }, { "acc": 0.8638361, "epoch": 0.9158476159532126, "grad_norm": 8.284196853637695, "learning_rate": 9.149749497899216e-06, "loss": 0.75771813, "memory(GiB)": 34.88, "step": 33825, "train_speed(iter/s)": 0.412632 }, { "acc": 0.86744404, "epoch": 0.9159829962364281, "grad_norm": 5.816294193267822, "learning_rate": 9.1494373119134e-06, "loss": 0.69853048, "memory(GiB)": 34.88, "step": 33830, "train_speed(iter/s)": 0.412635 }, { "acc": 0.88259487, "epoch": 0.9161183765196437, "grad_norm": 15.453303337097168, "learning_rate": 9.149125073953991e-06, "loss": 0.62964845, "memory(GiB)": 34.88, "step": 33835, "train_speed(iter/s)": 0.412638 }, { "acc": 0.86463242, "epoch": 0.9162537568028593, "grad_norm": 10.574410438537598, "learning_rate": 9.148812784024901e-06, "loss": 0.80186901, "memory(GiB)": 34.88, "step": 33840, "train_speed(iter/s)": 0.412641 }, { "acc": 0.89785032, "epoch": 0.9163891370860748, "grad_norm": 4.661802768707275, "learning_rate": 9.148500442130042e-06, "loss": 0.61126494, "memory(GiB)": 34.88, "step": 33845, "train_speed(iter/s)": 0.412644 }, { "acc": 0.87263336, "epoch": 0.9165245173692903, "grad_norm": 4.623569965362549, "learning_rate": 9.148188048273326e-06, "loss": 0.68438697, "memory(GiB)": 34.88, "step": 33850, "train_speed(iter/s)": 0.412647 }, { "acc": 0.854669, "epoch": 0.9166598976525059, "grad_norm": 13.613208770751953, "learning_rate": 9.14787560245867e-06, "loss": 0.84387751, "memory(GiB)": 34.88, "step": 33855, "train_speed(iter/s)": 0.41265 }, { "acc": 0.89063406, "epoch": 0.9167952779357215, "grad_norm": 19.512855529785156, "learning_rate": 9.14756310468998e-06, "loss": 0.54182844, "memory(GiB)": 34.88, "step": 33860, "train_speed(iter/s)": 0.412653 }, { "acc": 0.86562843, "epoch": 0.916930658218937, "grad_norm": 5.741403579711914, "learning_rate": 9.14725055497118e-06, "loss": 0.77683291, "memory(GiB)": 34.88, "step": 33865, "train_speed(iter/s)": 0.412656 }, { "acc": 0.8529459, "epoch": 0.9170660385021525, "grad_norm": 3.6673707962036133, "learning_rate": 9.14693795330618e-06, "loss": 0.75881348, "memory(GiB)": 34.88, "step": 33870, "train_speed(iter/s)": 0.412659 }, { "acc": 0.90039463, "epoch": 0.9172014187853681, "grad_norm": 11.411948204040527, "learning_rate": 9.146625299698895e-06, "loss": 0.48210797, "memory(GiB)": 34.88, "step": 33875, "train_speed(iter/s)": 0.412662 }, { "acc": 0.87903214, "epoch": 0.9173367990685837, "grad_norm": 12.316125869750977, "learning_rate": 9.146312594153245e-06, "loss": 0.67575502, "memory(GiB)": 34.88, "step": 33880, "train_speed(iter/s)": 0.412665 }, { "acc": 0.89612484, "epoch": 0.9174721793517993, "grad_norm": 7.186234474182129, "learning_rate": 9.145999836673146e-06, "loss": 0.53559766, "memory(GiB)": 34.88, "step": 33885, "train_speed(iter/s)": 0.412669 }, { "acc": 0.88821554, "epoch": 0.9176075596350147, "grad_norm": 7.302913188934326, "learning_rate": 9.145687027262513e-06, "loss": 0.67135115, "memory(GiB)": 34.88, "step": 33890, "train_speed(iter/s)": 0.412672 }, { "acc": 0.88046875, "epoch": 0.9177429399182303, "grad_norm": 11.607179641723633, "learning_rate": 9.14537416592527e-06, "loss": 0.58837252, "memory(GiB)": 34.88, "step": 33895, "train_speed(iter/s)": 0.412675 }, { "acc": 0.88786526, "epoch": 0.9178783202014459, "grad_norm": 7.935451507568359, "learning_rate": 9.145061252665332e-06, "loss": 0.62270002, "memory(GiB)": 34.88, "step": 33900, "train_speed(iter/s)": 0.412678 }, { "acc": 0.87749996, "epoch": 0.9180137004846615, "grad_norm": 5.255790710449219, "learning_rate": 9.144748287486622e-06, "loss": 0.66332865, "memory(GiB)": 34.88, "step": 33905, "train_speed(iter/s)": 0.412681 }, { "acc": 0.88398628, "epoch": 0.9181490807678769, "grad_norm": 28.909475326538086, "learning_rate": 9.144435270393057e-06, "loss": 0.53147907, "memory(GiB)": 34.88, "step": 33910, "train_speed(iter/s)": 0.412684 }, { "acc": 0.88906975, "epoch": 0.9182844610510925, "grad_norm": 11.287409782409668, "learning_rate": 9.144122201388561e-06, "loss": 0.59047852, "memory(GiB)": 34.88, "step": 33915, "train_speed(iter/s)": 0.412687 }, { "acc": 0.86891718, "epoch": 0.9184198413343081, "grad_norm": 14.115828514099121, "learning_rate": 9.143809080477054e-06, "loss": 0.679597, "memory(GiB)": 34.88, "step": 33920, "train_speed(iter/s)": 0.41269 }, { "acc": 0.87399111, "epoch": 0.9185552216175237, "grad_norm": 8.424249649047852, "learning_rate": 9.143495907662459e-06, "loss": 0.73405662, "memory(GiB)": 34.88, "step": 33925, "train_speed(iter/s)": 0.412693 }, { "acc": 0.89568014, "epoch": 0.9186906019007391, "grad_norm": 4.955456256866455, "learning_rate": 9.143182682948701e-06, "loss": 0.5327693, "memory(GiB)": 34.88, "step": 33930, "train_speed(iter/s)": 0.412696 }, { "acc": 0.90650558, "epoch": 0.9188259821839547, "grad_norm": 7.254703998565674, "learning_rate": 9.142869406339701e-06, "loss": 0.47338762, "memory(GiB)": 34.88, "step": 33935, "train_speed(iter/s)": 0.4127 }, { "acc": 0.8856842, "epoch": 0.9189613624671703, "grad_norm": 5.466615676879883, "learning_rate": 9.142556077839384e-06, "loss": 0.61349587, "memory(GiB)": 34.88, "step": 33940, "train_speed(iter/s)": 0.412703 }, { "acc": 0.88330917, "epoch": 0.9190967427503859, "grad_norm": 8.195894241333008, "learning_rate": 9.142242697451676e-06, "loss": 0.5823451, "memory(GiB)": 34.88, "step": 33945, "train_speed(iter/s)": 0.412706 }, { "acc": 0.91146603, "epoch": 0.9192321230336014, "grad_norm": 4.926849365234375, "learning_rate": 9.141929265180502e-06, "loss": 0.41397839, "memory(GiB)": 34.88, "step": 33950, "train_speed(iter/s)": 0.412709 }, { "acc": 0.87339039, "epoch": 0.9193675033168169, "grad_norm": 6.786310195922852, "learning_rate": 9.141615781029789e-06, "loss": 0.66131287, "memory(GiB)": 34.88, "step": 33955, "train_speed(iter/s)": 0.412712 }, { "acc": 0.88985481, "epoch": 0.9195028836000325, "grad_norm": 4.6463751792907715, "learning_rate": 9.141302245003462e-06, "loss": 0.60956779, "memory(GiB)": 34.88, "step": 33960, "train_speed(iter/s)": 0.412715 }, { "acc": 0.89295349, "epoch": 0.9196382638832481, "grad_norm": 21.46983528137207, "learning_rate": 9.140988657105451e-06, "loss": 0.57858171, "memory(GiB)": 34.88, "step": 33965, "train_speed(iter/s)": 0.412718 }, { "acc": 0.88819351, "epoch": 0.9197736441664636, "grad_norm": 4.854879379272461, "learning_rate": 9.140675017339685e-06, "loss": 0.52564855, "memory(GiB)": 34.88, "step": 33970, "train_speed(iter/s)": 0.412722 }, { "acc": 0.87141228, "epoch": 0.9199090244496791, "grad_norm": 9.616854667663574, "learning_rate": 9.14036132571009e-06, "loss": 0.66268654, "memory(GiB)": 34.88, "step": 33975, "train_speed(iter/s)": 0.412724 }, { "acc": 0.88393955, "epoch": 0.9200444047328947, "grad_norm": 8.072797775268555, "learning_rate": 9.140047582220598e-06, "loss": 0.64068036, "memory(GiB)": 34.88, "step": 33980, "train_speed(iter/s)": 0.412728 }, { "acc": 0.87185345, "epoch": 0.9201797850161103, "grad_norm": 16.230159759521484, "learning_rate": 9.139733786875136e-06, "loss": 0.6612957, "memory(GiB)": 34.88, "step": 33985, "train_speed(iter/s)": 0.412731 }, { "acc": 0.86072941, "epoch": 0.9203151652993258, "grad_norm": 12.883716583251953, "learning_rate": 9.13941993967764e-06, "loss": 0.7621417, "memory(GiB)": 34.88, "step": 33990, "train_speed(iter/s)": 0.412733 }, { "acc": 0.86768913, "epoch": 0.9204505455825414, "grad_norm": 8.690542221069336, "learning_rate": 9.139106040632038e-06, "loss": 0.67210159, "memory(GiB)": 34.88, "step": 33995, "train_speed(iter/s)": 0.412736 }, { "acc": 0.87312241, "epoch": 0.9205859258657569, "grad_norm": 9.16653823852539, "learning_rate": 9.138792089742263e-06, "loss": 0.60854931, "memory(GiB)": 34.88, "step": 34000, "train_speed(iter/s)": 0.412739 }, { "acc": 0.87179222, "epoch": 0.9207213061489725, "grad_norm": 11.119647026062012, "learning_rate": 9.138478087012248e-06, "loss": 0.69417171, "memory(GiB)": 34.88, "step": 34005, "train_speed(iter/s)": 0.412743 }, { "acc": 0.86885624, "epoch": 0.920856686432188, "grad_norm": 10.097347259521484, "learning_rate": 9.138164032445926e-06, "loss": 0.67835913, "memory(GiB)": 34.88, "step": 34010, "train_speed(iter/s)": 0.412746 }, { "acc": 0.85892467, "epoch": 0.9209920667154036, "grad_norm": 9.816555976867676, "learning_rate": 9.137849926047231e-06, "loss": 0.75078969, "memory(GiB)": 34.88, "step": 34015, "train_speed(iter/s)": 0.412748 }, { "acc": 0.89349518, "epoch": 0.9211274469986191, "grad_norm": 5.4561028480529785, "learning_rate": 9.137535767820099e-06, "loss": 0.51105127, "memory(GiB)": 34.88, "step": 34020, "train_speed(iter/s)": 0.412751 }, { "acc": 0.88480501, "epoch": 0.9212628272818347, "grad_norm": 13.479381561279297, "learning_rate": 9.137221557768464e-06, "loss": 0.60544405, "memory(GiB)": 34.88, "step": 34025, "train_speed(iter/s)": 0.412754 }, { "acc": 0.88770857, "epoch": 0.9213982075650502, "grad_norm": 14.394775390625, "learning_rate": 9.136907295896264e-06, "loss": 0.57957091, "memory(GiB)": 34.88, "step": 34030, "train_speed(iter/s)": 0.412757 }, { "acc": 0.88569126, "epoch": 0.9215335878482658, "grad_norm": 16.271015167236328, "learning_rate": 9.136592982207433e-06, "loss": 0.55359631, "memory(GiB)": 34.88, "step": 34035, "train_speed(iter/s)": 0.412761 }, { "acc": 0.90536051, "epoch": 0.9216689681314814, "grad_norm": 5.311692237854004, "learning_rate": 9.136278616705911e-06, "loss": 0.47869511, "memory(GiB)": 34.88, "step": 34040, "train_speed(iter/s)": 0.412764 }, { "acc": 0.8604269, "epoch": 0.9218043484146968, "grad_norm": 7.951273441314697, "learning_rate": 9.135964199395635e-06, "loss": 0.65522079, "memory(GiB)": 34.88, "step": 34045, "train_speed(iter/s)": 0.412767 }, { "acc": 0.8836915, "epoch": 0.9219397286979124, "grad_norm": 11.26620864868164, "learning_rate": 9.135649730280545e-06, "loss": 0.61483684, "memory(GiB)": 34.88, "step": 34050, "train_speed(iter/s)": 0.41277 }, { "acc": 0.88721132, "epoch": 0.922075108981128, "grad_norm": 4.458832263946533, "learning_rate": 9.135335209364577e-06, "loss": 0.5298995, "memory(GiB)": 34.88, "step": 34055, "train_speed(iter/s)": 0.412773 }, { "acc": 0.87441702, "epoch": 0.9222104892643436, "grad_norm": 6.241204261779785, "learning_rate": 9.135020636651675e-06, "loss": 0.66014194, "memory(GiB)": 34.88, "step": 34060, "train_speed(iter/s)": 0.412776 }, { "acc": 0.87808685, "epoch": 0.922345869547559, "grad_norm": 30.899600982666016, "learning_rate": 9.134706012145775e-06, "loss": 0.57902994, "memory(GiB)": 34.88, "step": 34065, "train_speed(iter/s)": 0.412779 }, { "acc": 0.87426872, "epoch": 0.9224812498307746, "grad_norm": 10.99280071258545, "learning_rate": 9.134391335850822e-06, "loss": 0.63773651, "memory(GiB)": 34.88, "step": 34070, "train_speed(iter/s)": 0.412782 }, { "acc": 0.87608109, "epoch": 0.9226166301139902, "grad_norm": 10.845149040222168, "learning_rate": 9.134076607770758e-06, "loss": 0.59375162, "memory(GiB)": 34.88, "step": 34075, "train_speed(iter/s)": 0.412785 }, { "acc": 0.89039555, "epoch": 0.9227520103972058, "grad_norm": 6.413959980010986, "learning_rate": 9.133761827909526e-06, "loss": 0.51237693, "memory(GiB)": 34.88, "step": 34080, "train_speed(iter/s)": 0.412788 }, { "acc": 0.87225399, "epoch": 0.9228873906804212, "grad_norm": 10.413588523864746, "learning_rate": 9.133446996271064e-06, "loss": 0.66348143, "memory(GiB)": 34.88, "step": 34085, "train_speed(iter/s)": 0.412791 }, { "acc": 0.92071276, "epoch": 0.9230227709636368, "grad_norm": 6.488676071166992, "learning_rate": 9.133132112859324e-06, "loss": 0.38605344, "memory(GiB)": 34.88, "step": 34090, "train_speed(iter/s)": 0.412794 }, { "acc": 0.89327469, "epoch": 0.9231581512468524, "grad_norm": 3.8313708305358887, "learning_rate": 9.132817177678242e-06, "loss": 0.50959687, "memory(GiB)": 34.88, "step": 34095, "train_speed(iter/s)": 0.412797 }, { "acc": 0.86663017, "epoch": 0.923293531530068, "grad_norm": 7.60696268081665, "learning_rate": 9.132502190731769e-06, "loss": 0.76486011, "memory(GiB)": 34.88, "step": 34100, "train_speed(iter/s)": 0.4128 }, { "acc": 0.86932583, "epoch": 0.9234289118132835, "grad_norm": 11.634488105773926, "learning_rate": 9.132187152023849e-06, "loss": 0.68289981, "memory(GiB)": 34.88, "step": 34105, "train_speed(iter/s)": 0.412802 }, { "acc": 0.86914558, "epoch": 0.923564292096499, "grad_norm": 15.040432929992676, "learning_rate": 9.13187206155843e-06, "loss": 0.73207855, "memory(GiB)": 34.88, "step": 34110, "train_speed(iter/s)": 0.412805 }, { "acc": 0.88520346, "epoch": 0.9236996723797146, "grad_norm": 5.135944843292236, "learning_rate": 9.131556919339454e-06, "loss": 0.48882675, "memory(GiB)": 34.88, "step": 34115, "train_speed(iter/s)": 0.412809 }, { "acc": 0.85566692, "epoch": 0.9238350526629302, "grad_norm": 9.286678314208984, "learning_rate": 9.131241725370875e-06, "loss": 0.71934052, "memory(GiB)": 34.88, "step": 34120, "train_speed(iter/s)": 0.412812 }, { "acc": 0.87882929, "epoch": 0.9239704329461457, "grad_norm": 7.7112226486206055, "learning_rate": 9.13092647965664e-06, "loss": 0.61901989, "memory(GiB)": 34.88, "step": 34125, "train_speed(iter/s)": 0.412815 }, { "acc": 0.88622875, "epoch": 0.9241058132293613, "grad_norm": 11.116341590881348, "learning_rate": 9.130611182200696e-06, "loss": 0.5439178, "memory(GiB)": 34.88, "step": 34130, "train_speed(iter/s)": 0.412818 }, { "acc": 0.87691393, "epoch": 0.9242411935125768, "grad_norm": 6.919276714324951, "learning_rate": 9.130295833006994e-06, "loss": 0.58950753, "memory(GiB)": 34.88, "step": 34135, "train_speed(iter/s)": 0.412821 }, { "acc": 0.88603792, "epoch": 0.9243765737957924, "grad_norm": 12.660820007324219, "learning_rate": 9.129980432079485e-06, "loss": 0.5334322, "memory(GiB)": 34.88, "step": 34140, "train_speed(iter/s)": 0.412824 }, { "acc": 0.86814442, "epoch": 0.9245119540790079, "grad_norm": 9.53296184539795, "learning_rate": 9.129664979422118e-06, "loss": 0.68301978, "memory(GiB)": 34.88, "step": 34145, "train_speed(iter/s)": 0.412827 }, { "acc": 0.86069679, "epoch": 0.9246473343622235, "grad_norm": 8.209555625915527, "learning_rate": 9.129349475038848e-06, "loss": 0.69894395, "memory(GiB)": 34.88, "step": 34150, "train_speed(iter/s)": 0.41283 }, { "acc": 0.86150131, "epoch": 0.924782714645439, "grad_norm": 16.979150772094727, "learning_rate": 9.129033918933622e-06, "loss": 0.75435348, "memory(GiB)": 34.88, "step": 34155, "train_speed(iter/s)": 0.412833 }, { "acc": 0.88144112, "epoch": 0.9249180949286546, "grad_norm": 14.015585899353027, "learning_rate": 9.128718311110398e-06, "loss": 0.5288497, "memory(GiB)": 34.88, "step": 34160, "train_speed(iter/s)": 0.412836 }, { "acc": 0.8956337, "epoch": 0.9250534752118701, "grad_norm": 4.620670318603516, "learning_rate": 9.128402651573128e-06, "loss": 0.57193909, "memory(GiB)": 34.88, "step": 34165, "train_speed(iter/s)": 0.412838 }, { "acc": 0.88589134, "epoch": 0.9251888554950857, "grad_norm": 4.035422325134277, "learning_rate": 9.128086940325762e-06, "loss": 0.59538093, "memory(GiB)": 34.88, "step": 34170, "train_speed(iter/s)": 0.412841 }, { "acc": 0.87142763, "epoch": 0.9253242357783013, "grad_norm": 5.6661696434021, "learning_rate": 9.127771177372265e-06, "loss": 0.60776396, "memory(GiB)": 34.88, "step": 34175, "train_speed(iter/s)": 0.412842 }, { "acc": 0.87576866, "epoch": 0.9254596160615168, "grad_norm": 6.8220038414001465, "learning_rate": 9.127455362716581e-06, "loss": 0.59454961, "memory(GiB)": 34.88, "step": 34180, "train_speed(iter/s)": 0.412844 }, { "acc": 0.89265442, "epoch": 0.9255949963447323, "grad_norm": 6.60036039352417, "learning_rate": 9.127139496362675e-06, "loss": 0.6017961, "memory(GiB)": 34.88, "step": 34185, "train_speed(iter/s)": 0.412846 }, { "acc": 0.88198509, "epoch": 0.9257303766279479, "grad_norm": 13.085295677185059, "learning_rate": 9.1268235783145e-06, "loss": 0.59539361, "memory(GiB)": 34.88, "step": 34190, "train_speed(iter/s)": 0.412849 }, { "acc": 0.86642389, "epoch": 0.9258657569111635, "grad_norm": 12.110196113586426, "learning_rate": 9.126507608576014e-06, "loss": 0.73580379, "memory(GiB)": 34.88, "step": 34195, "train_speed(iter/s)": 0.412852 }, { "acc": 0.88508282, "epoch": 0.926001137194379, "grad_norm": 6.6215715408325195, "learning_rate": 9.126191587151175e-06, "loss": 0.62675538, "memory(GiB)": 34.88, "step": 34200, "train_speed(iter/s)": 0.412854 }, { "acc": 0.86821899, "epoch": 0.9261365174775945, "grad_norm": 7.934536457061768, "learning_rate": 9.125875514043942e-06, "loss": 0.60051427, "memory(GiB)": 34.88, "step": 34205, "train_speed(iter/s)": 0.412857 }, { "acc": 0.89945707, "epoch": 0.9262718977608101, "grad_norm": 7.715292930603027, "learning_rate": 9.125559389258273e-06, "loss": 0.55671005, "memory(GiB)": 34.88, "step": 34210, "train_speed(iter/s)": 0.412859 }, { "acc": 0.89489107, "epoch": 0.9264072780440257, "grad_norm": 9.99482250213623, "learning_rate": 9.12524321279813e-06, "loss": 0.67520208, "memory(GiB)": 34.88, "step": 34215, "train_speed(iter/s)": 0.412862 }, { "acc": 0.87195339, "epoch": 0.9265426583272413, "grad_norm": 9.99259090423584, "learning_rate": 9.124926984667474e-06, "loss": 0.68119154, "memory(GiB)": 34.88, "step": 34220, "train_speed(iter/s)": 0.412865 }, { "acc": 0.87379112, "epoch": 0.9266780386104567, "grad_norm": 15.178847312927246, "learning_rate": 9.124610704870264e-06, "loss": 0.68459859, "memory(GiB)": 34.88, "step": 34225, "train_speed(iter/s)": 0.412867 }, { "acc": 0.86388035, "epoch": 0.9268134188936723, "grad_norm": 5.650840759277344, "learning_rate": 9.124294373410467e-06, "loss": 0.65003996, "memory(GiB)": 34.88, "step": 34230, "train_speed(iter/s)": 0.412869 }, { "acc": 0.87229519, "epoch": 0.9269487991768879, "grad_norm": 9.062005043029785, "learning_rate": 9.12397799029204e-06, "loss": 0.71750183, "memory(GiB)": 34.88, "step": 34235, "train_speed(iter/s)": 0.412872 }, { "acc": 0.87412605, "epoch": 0.9270841794601035, "grad_norm": 7.9619011878967285, "learning_rate": 9.123661555518947e-06, "loss": 0.63001671, "memory(GiB)": 34.88, "step": 34240, "train_speed(iter/s)": 0.412874 }, { "acc": 0.8744607, "epoch": 0.9272195597433189, "grad_norm": 5.760134696960449, "learning_rate": 9.123345069095153e-06, "loss": 0.61370077, "memory(GiB)": 34.88, "step": 34245, "train_speed(iter/s)": 0.412877 }, { "acc": 0.89009476, "epoch": 0.9273549400265345, "grad_norm": 16.7094783782959, "learning_rate": 9.123028531024628e-06, "loss": 0.58933291, "memory(GiB)": 34.88, "step": 34250, "train_speed(iter/s)": 0.412878 }, { "acc": 0.88547707, "epoch": 0.9274903203097501, "grad_norm": 9.867939949035645, "learning_rate": 9.12271194131133e-06, "loss": 0.68917618, "memory(GiB)": 34.88, "step": 34255, "train_speed(iter/s)": 0.41288 }, { "acc": 0.86653633, "epoch": 0.9276257005929657, "grad_norm": 55.54945755004883, "learning_rate": 9.122395299959226e-06, "loss": 0.74730034, "memory(GiB)": 34.88, "step": 34260, "train_speed(iter/s)": 0.412883 }, { "acc": 0.88370247, "epoch": 0.9277610808761811, "grad_norm": 7.727210521697998, "learning_rate": 9.122078606972284e-06, "loss": 0.6018867, "memory(GiB)": 34.88, "step": 34265, "train_speed(iter/s)": 0.412886 }, { "acc": 0.86855087, "epoch": 0.9278964611593967, "grad_norm": 12.89676570892334, "learning_rate": 9.121761862354472e-06, "loss": 0.70490942, "memory(GiB)": 34.88, "step": 34270, "train_speed(iter/s)": 0.412889 }, { "acc": 0.89435482, "epoch": 0.9280318414426123, "grad_norm": 7.061161518096924, "learning_rate": 9.121445066109756e-06, "loss": 0.51002378, "memory(GiB)": 34.88, "step": 34275, "train_speed(iter/s)": 0.41289 }, { "acc": 0.8768013, "epoch": 0.9281672217258279, "grad_norm": 22.97966957092285, "learning_rate": 9.121128218242106e-06, "loss": 0.69939675, "memory(GiB)": 34.88, "step": 34280, "train_speed(iter/s)": 0.412894 }, { "acc": 0.88462572, "epoch": 0.9283026020090434, "grad_norm": 8.67979907989502, "learning_rate": 9.120811318755489e-06, "loss": 0.58873959, "memory(GiB)": 34.88, "step": 34285, "train_speed(iter/s)": 0.412895 }, { "acc": 0.85840092, "epoch": 0.9284379822922589, "grad_norm": 11.367023468017578, "learning_rate": 9.120494367653879e-06, "loss": 0.68021412, "memory(GiB)": 34.88, "step": 34290, "train_speed(iter/s)": 0.412898 }, { "acc": 0.86701279, "epoch": 0.9285733625754745, "grad_norm": 8.819453239440918, "learning_rate": 9.120177364941243e-06, "loss": 0.64100332, "memory(GiB)": 34.88, "step": 34295, "train_speed(iter/s)": 0.412901 }, { "acc": 0.87888641, "epoch": 0.9287087428586901, "grad_norm": 6.268348217010498, "learning_rate": 9.119860310621552e-06, "loss": 0.63314486, "memory(GiB)": 34.88, "step": 34300, "train_speed(iter/s)": 0.412903 }, { "acc": 0.85869408, "epoch": 0.9288441231419056, "grad_norm": 10.161589622497559, "learning_rate": 9.11954320469878e-06, "loss": 0.72054286, "memory(GiB)": 34.88, "step": 34305, "train_speed(iter/s)": 0.412906 }, { "acc": 0.86549864, "epoch": 0.9289795034251211, "grad_norm": 9.874358177185059, "learning_rate": 9.119226047176898e-06, "loss": 0.72001686, "memory(GiB)": 34.88, "step": 34310, "train_speed(iter/s)": 0.412909 }, { "acc": 0.88069401, "epoch": 0.9291148837083367, "grad_norm": 3.9943113327026367, "learning_rate": 9.118908838059875e-06, "loss": 0.54438434, "memory(GiB)": 34.88, "step": 34315, "train_speed(iter/s)": 0.412912 }, { "acc": 0.87807961, "epoch": 0.9292502639915523, "grad_norm": 9.460576057434082, "learning_rate": 9.118591577351694e-06, "loss": 0.64186401, "memory(GiB)": 34.88, "step": 34320, "train_speed(iter/s)": 0.412916 }, { "acc": 0.88932533, "epoch": 0.9293856442747678, "grad_norm": 7.201549530029297, "learning_rate": 9.11827426505632e-06, "loss": 0.57295456, "memory(GiB)": 34.88, "step": 34325, "train_speed(iter/s)": 0.412919 }, { "acc": 0.87853642, "epoch": 0.9295210245579834, "grad_norm": 5.283936023712158, "learning_rate": 9.117956901177734e-06, "loss": 0.68293123, "memory(GiB)": 34.88, "step": 34330, "train_speed(iter/s)": 0.412922 }, { "acc": 0.86201153, "epoch": 0.9296564048411989, "grad_norm": 107.62356567382812, "learning_rate": 9.117639485719909e-06, "loss": 0.83592281, "memory(GiB)": 34.88, "step": 34335, "train_speed(iter/s)": 0.412924 }, { "acc": 0.88881836, "epoch": 0.9297917851244145, "grad_norm": 9.239496231079102, "learning_rate": 9.117322018686823e-06, "loss": 0.57652407, "memory(GiB)": 34.88, "step": 34340, "train_speed(iter/s)": 0.412927 }, { "acc": 0.87587986, "epoch": 0.92992716540763, "grad_norm": 11.360527992248535, "learning_rate": 9.11700450008245e-06, "loss": 0.62941771, "memory(GiB)": 34.88, "step": 34345, "train_speed(iter/s)": 0.41293 }, { "acc": 0.85567646, "epoch": 0.9300625456908456, "grad_norm": 8.827672958374023, "learning_rate": 9.116686929910771e-06, "loss": 0.72437582, "memory(GiB)": 34.88, "step": 34350, "train_speed(iter/s)": 0.412932 }, { "acc": 0.88181181, "epoch": 0.9301979259740611, "grad_norm": 18.789623260498047, "learning_rate": 9.116369308175758e-06, "loss": 0.65141759, "memory(GiB)": 34.88, "step": 34355, "train_speed(iter/s)": 0.412935 }, { "acc": 0.87517242, "epoch": 0.9303333062572767, "grad_norm": 5.865650177001953, "learning_rate": 9.116051634881399e-06, "loss": 0.65233073, "memory(GiB)": 34.88, "step": 34360, "train_speed(iter/s)": 0.412937 }, { "acc": 0.88919888, "epoch": 0.9304686865404922, "grad_norm": 9.090014457702637, "learning_rate": 9.115733910031667e-06, "loss": 0.57415504, "memory(GiB)": 34.88, "step": 34365, "train_speed(iter/s)": 0.412939 }, { "acc": 0.87819538, "epoch": 0.9306040668237078, "grad_norm": 17.292951583862305, "learning_rate": 9.115416133630545e-06, "loss": 0.70807667, "memory(GiB)": 34.88, "step": 34370, "train_speed(iter/s)": 0.412942 }, { "acc": 0.87508593, "epoch": 0.9307394471069234, "grad_norm": 9.72012996673584, "learning_rate": 9.11509830568201e-06, "loss": 0.63546772, "memory(GiB)": 34.88, "step": 34375, "train_speed(iter/s)": 0.412945 }, { "acc": 0.86812782, "epoch": 0.9308748273901389, "grad_norm": 9.218280792236328, "learning_rate": 9.114780426190048e-06, "loss": 0.72871299, "memory(GiB)": 34.88, "step": 34380, "train_speed(iter/s)": 0.412948 }, { "acc": 0.88568916, "epoch": 0.9310102076733544, "grad_norm": 8.920004844665527, "learning_rate": 9.114462495158639e-06, "loss": 0.59663181, "memory(GiB)": 34.88, "step": 34385, "train_speed(iter/s)": 0.412951 }, { "acc": 0.87268562, "epoch": 0.93114558795657, "grad_norm": 10.24758243560791, "learning_rate": 9.114144512591766e-06, "loss": 0.57228827, "memory(GiB)": 34.88, "step": 34390, "train_speed(iter/s)": 0.412954 }, { "acc": 0.85935783, "epoch": 0.9312809682397856, "grad_norm": 10.273629188537598, "learning_rate": 9.113826478493412e-06, "loss": 0.75263424, "memory(GiB)": 34.88, "step": 34395, "train_speed(iter/s)": 0.412957 }, { "acc": 0.85616245, "epoch": 0.9314163485230011, "grad_norm": 8.278094291687012, "learning_rate": 9.11350839286756e-06, "loss": 0.83836899, "memory(GiB)": 34.88, "step": 34400, "train_speed(iter/s)": 0.41296 }, { "acc": 0.8633481, "epoch": 0.9315517288062166, "grad_norm": 6.711642742156982, "learning_rate": 9.113190255718197e-06, "loss": 0.66749053, "memory(GiB)": 34.88, "step": 34405, "train_speed(iter/s)": 0.412963 }, { "acc": 0.89001646, "epoch": 0.9316871090894322, "grad_norm": 9.548492431640625, "learning_rate": 9.112872067049308e-06, "loss": 0.51056595, "memory(GiB)": 34.88, "step": 34410, "train_speed(iter/s)": 0.412966 }, { "acc": 0.87029095, "epoch": 0.9318224893726478, "grad_norm": 11.554795265197754, "learning_rate": 9.112553826864875e-06, "loss": 0.6549593, "memory(GiB)": 34.88, "step": 34415, "train_speed(iter/s)": 0.412969 }, { "acc": 0.85854759, "epoch": 0.9319578696558634, "grad_norm": 8.864206314086914, "learning_rate": 9.11223553516889e-06, "loss": 0.77990618, "memory(GiB)": 34.88, "step": 34420, "train_speed(iter/s)": 0.412972 }, { "acc": 0.88858557, "epoch": 0.9320932499390788, "grad_norm": 7.856374740600586, "learning_rate": 9.111917191965338e-06, "loss": 0.558992, "memory(GiB)": 34.88, "step": 34425, "train_speed(iter/s)": 0.412975 }, { "acc": 0.88463144, "epoch": 0.9322286302222944, "grad_norm": 3.2569265365600586, "learning_rate": 9.111598797258207e-06, "loss": 0.60193653, "memory(GiB)": 34.88, "step": 34430, "train_speed(iter/s)": 0.412978 }, { "acc": 0.89194565, "epoch": 0.93236401050551, "grad_norm": 4.138202667236328, "learning_rate": 9.111280351051484e-06, "loss": 0.62729549, "memory(GiB)": 34.88, "step": 34435, "train_speed(iter/s)": 0.412981 }, { "acc": 0.8782402, "epoch": 0.9324993907887256, "grad_norm": 12.823586463928223, "learning_rate": 9.11096185334916e-06, "loss": 0.63854299, "memory(GiB)": 34.88, "step": 34440, "train_speed(iter/s)": 0.412984 }, { "acc": 0.89488811, "epoch": 0.932634771071941, "grad_norm": 7.066412925720215, "learning_rate": 9.110643304155225e-06, "loss": 0.57602386, "memory(GiB)": 34.88, "step": 34445, "train_speed(iter/s)": 0.412987 }, { "acc": 0.87765484, "epoch": 0.9327701513551566, "grad_norm": 9.053759574890137, "learning_rate": 9.110324703473667e-06, "loss": 0.65607767, "memory(GiB)": 34.88, "step": 34450, "train_speed(iter/s)": 0.41299 }, { "acc": 0.90874586, "epoch": 0.9329055316383722, "grad_norm": 5.542405128479004, "learning_rate": 9.11000605130848e-06, "loss": 0.45236511, "memory(GiB)": 34.88, "step": 34455, "train_speed(iter/s)": 0.412993 }, { "acc": 0.86181297, "epoch": 0.9330409119215878, "grad_norm": 7.422665596008301, "learning_rate": 9.109687347663653e-06, "loss": 0.73060398, "memory(GiB)": 34.88, "step": 34460, "train_speed(iter/s)": 0.412996 }, { "acc": 0.90875444, "epoch": 0.9331762922048032, "grad_norm": 5.426233768463135, "learning_rate": 9.109368592543183e-06, "loss": 0.43602638, "memory(GiB)": 34.88, "step": 34465, "train_speed(iter/s)": 0.412999 }, { "acc": 0.86017036, "epoch": 0.9333116724880188, "grad_norm": 28.094520568847656, "learning_rate": 9.109049785951058e-06, "loss": 0.61476383, "memory(GiB)": 34.88, "step": 34470, "train_speed(iter/s)": 0.413002 }, { "acc": 0.88423672, "epoch": 0.9334470527712344, "grad_norm": 5.886263370513916, "learning_rate": 9.108730927891273e-06, "loss": 0.50606036, "memory(GiB)": 34.88, "step": 34475, "train_speed(iter/s)": 0.413005 }, { "acc": 0.91400433, "epoch": 0.93358243305445, "grad_norm": 7.474267482757568, "learning_rate": 9.108412018367823e-06, "loss": 0.3698854, "memory(GiB)": 34.88, "step": 34480, "train_speed(iter/s)": 0.413008 }, { "acc": 0.88660641, "epoch": 0.9337178133376655, "grad_norm": 7.573317527770996, "learning_rate": 9.108093057384704e-06, "loss": 0.56576719, "memory(GiB)": 34.88, "step": 34485, "train_speed(iter/s)": 0.413011 }, { "acc": 0.86561537, "epoch": 0.933853193620881, "grad_norm": 10.702917098999023, "learning_rate": 9.107774044945911e-06, "loss": 0.72763691, "memory(GiB)": 34.88, "step": 34490, "train_speed(iter/s)": 0.413014 }, { "acc": 0.90678539, "epoch": 0.9339885739040966, "grad_norm": 3.8745224475860596, "learning_rate": 9.107454981055438e-06, "loss": 0.47374067, "memory(GiB)": 34.88, "step": 34495, "train_speed(iter/s)": 0.413017 }, { "acc": 0.89046059, "epoch": 0.9341239541873122, "grad_norm": 9.062739372253418, "learning_rate": 9.107135865717285e-06, "loss": 0.6059597, "memory(GiB)": 34.88, "step": 34500, "train_speed(iter/s)": 0.413019 }, { "acc": 0.87520561, "epoch": 0.9342593344705277, "grad_norm": 11.714734077453613, "learning_rate": 9.10681669893545e-06, "loss": 0.69199314, "memory(GiB)": 34.88, "step": 34505, "train_speed(iter/s)": 0.413022 }, { "acc": 0.86732864, "epoch": 0.9343947147537432, "grad_norm": 7.970599174499512, "learning_rate": 9.106497480713925e-06, "loss": 0.76371284, "memory(GiB)": 34.88, "step": 34510, "train_speed(iter/s)": 0.413025 }, { "acc": 0.88194723, "epoch": 0.9345300950369588, "grad_norm": 10.365737915039062, "learning_rate": 9.106178211056716e-06, "loss": 0.58519783, "memory(GiB)": 34.88, "step": 34515, "train_speed(iter/s)": 0.413028 }, { "acc": 0.86292973, "epoch": 0.9346654753201744, "grad_norm": 4.741851329803467, "learning_rate": 9.10585888996782e-06, "loss": 0.79102659, "memory(GiB)": 34.88, "step": 34520, "train_speed(iter/s)": 0.413031 }, { "acc": 0.86430206, "epoch": 0.9348008556033899, "grad_norm": 5.349764823913574, "learning_rate": 9.105539517451238e-06, "loss": 0.68876681, "memory(GiB)": 34.88, "step": 34525, "train_speed(iter/s)": 0.413034 }, { "acc": 0.87266903, "epoch": 0.9349362358866055, "grad_norm": 5.854146957397461, "learning_rate": 9.105220093510967e-06, "loss": 0.66979046, "memory(GiB)": 34.88, "step": 34530, "train_speed(iter/s)": 0.413037 }, { "acc": 0.88336849, "epoch": 0.935071616169821, "grad_norm": 8.345752716064453, "learning_rate": 9.104900618151013e-06, "loss": 0.63745027, "memory(GiB)": 34.88, "step": 34535, "train_speed(iter/s)": 0.413041 }, { "acc": 0.90059547, "epoch": 0.9352069964530366, "grad_norm": 6.1020989418029785, "learning_rate": 9.104581091375376e-06, "loss": 0.43940549, "memory(GiB)": 34.88, "step": 34540, "train_speed(iter/s)": 0.413043 }, { "acc": 0.87980242, "epoch": 0.9353423767362521, "grad_norm": 6.972809791564941, "learning_rate": 9.104261513188056e-06, "loss": 0.60547953, "memory(GiB)": 34.88, "step": 34545, "train_speed(iter/s)": 0.413047 }, { "acc": 0.85037241, "epoch": 0.9354777570194677, "grad_norm": 13.293439865112305, "learning_rate": 9.103941883593061e-06, "loss": 0.83771172, "memory(GiB)": 34.88, "step": 34550, "train_speed(iter/s)": 0.413049 }, { "acc": 0.88200731, "epoch": 0.9356131373026833, "grad_norm": 8.792068481445312, "learning_rate": 9.103622202594393e-06, "loss": 0.62310677, "memory(GiB)": 34.88, "step": 34555, "train_speed(iter/s)": 0.413052 }, { "acc": 0.87726936, "epoch": 0.9357485175858988, "grad_norm": 4.724158763885498, "learning_rate": 9.103302470196058e-06, "loss": 0.61834474, "memory(GiB)": 34.88, "step": 34560, "train_speed(iter/s)": 0.413055 }, { "acc": 0.9050581, "epoch": 0.9358838978691143, "grad_norm": 5.926728248596191, "learning_rate": 9.10298268640206e-06, "loss": 0.46452093, "memory(GiB)": 34.88, "step": 34565, "train_speed(iter/s)": 0.413058 }, { "acc": 0.85326042, "epoch": 0.9360192781523299, "grad_norm": 8.888312339782715, "learning_rate": 9.102662851216405e-06, "loss": 0.80143938, "memory(GiB)": 34.88, "step": 34570, "train_speed(iter/s)": 0.41306 }, { "acc": 0.89393616, "epoch": 0.9361546584355455, "grad_norm": 6.4547600746154785, "learning_rate": 9.102342964643097e-06, "loss": 0.52755117, "memory(GiB)": 34.88, "step": 34575, "train_speed(iter/s)": 0.413063 }, { "acc": 0.88960829, "epoch": 0.936290038718761, "grad_norm": 4.569934844970703, "learning_rate": 9.102023026686147e-06, "loss": 0.47472172, "memory(GiB)": 34.88, "step": 34580, "train_speed(iter/s)": 0.413066 }, { "acc": 0.89786129, "epoch": 0.9364254190019765, "grad_norm": 13.372764587402344, "learning_rate": 9.101703037349563e-06, "loss": 0.60362911, "memory(GiB)": 34.88, "step": 34585, "train_speed(iter/s)": 0.413069 }, { "acc": 0.86979599, "epoch": 0.9365607992851921, "grad_norm": 8.506978988647461, "learning_rate": 9.101382996637351e-06, "loss": 0.74035711, "memory(GiB)": 34.88, "step": 34590, "train_speed(iter/s)": 0.413072 }, { "acc": 0.87797298, "epoch": 0.9366961795684077, "grad_norm": 20.32227897644043, "learning_rate": 9.101062904553523e-06, "loss": 0.65014009, "memory(GiB)": 34.88, "step": 34595, "train_speed(iter/s)": 0.413074 }, { "acc": 0.8553443, "epoch": 0.9368315598516233, "grad_norm": 17.518648147583008, "learning_rate": 9.100742761102086e-06, "loss": 0.79944973, "memory(GiB)": 34.88, "step": 34600, "train_speed(iter/s)": 0.413077 }, { "acc": 0.89362612, "epoch": 0.9369669401348387, "grad_norm": 5.625335693359375, "learning_rate": 9.100422566287049e-06, "loss": 0.47481604, "memory(GiB)": 34.88, "step": 34605, "train_speed(iter/s)": 0.41308 }, { "acc": 0.89074383, "epoch": 0.9371023204180543, "grad_norm": 10.171554565429688, "learning_rate": 9.100102320112428e-06, "loss": 0.59620152, "memory(GiB)": 34.88, "step": 34610, "train_speed(iter/s)": 0.413083 }, { "acc": 0.87384434, "epoch": 0.9372377007012699, "grad_norm": 18.29502296447754, "learning_rate": 9.099782022582232e-06, "loss": 0.59836626, "memory(GiB)": 34.88, "step": 34615, "train_speed(iter/s)": 0.413086 }, { "acc": 0.92174644, "epoch": 0.9373730809844855, "grad_norm": 3.92897367477417, "learning_rate": 9.099461673700473e-06, "loss": 0.39959478, "memory(GiB)": 34.88, "step": 34620, "train_speed(iter/s)": 0.413089 }, { "acc": 0.86808205, "epoch": 0.9375084612677009, "grad_norm": 18.27273178100586, "learning_rate": 9.099141273471166e-06, "loss": 0.69854097, "memory(GiB)": 34.88, "step": 34625, "train_speed(iter/s)": 0.413092 }, { "acc": 0.86185675, "epoch": 0.9376438415509165, "grad_norm": 8.033726692199707, "learning_rate": 9.098820821898322e-06, "loss": 0.69896684, "memory(GiB)": 34.88, "step": 34630, "train_speed(iter/s)": 0.413095 }, { "acc": 0.88852329, "epoch": 0.9377792218341321, "grad_norm": 5.1482110023498535, "learning_rate": 9.098500318985955e-06, "loss": 0.59118676, "memory(GiB)": 34.88, "step": 34635, "train_speed(iter/s)": 0.413097 }, { "acc": 0.87753935, "epoch": 0.9379146021173477, "grad_norm": 14.638855934143066, "learning_rate": 9.098179764738085e-06, "loss": 0.69934835, "memory(GiB)": 34.88, "step": 34640, "train_speed(iter/s)": 0.4131 }, { "acc": 0.87972965, "epoch": 0.9380499824005631, "grad_norm": 39.3756217956543, "learning_rate": 9.097859159158722e-06, "loss": 0.62893586, "memory(GiB)": 34.88, "step": 34645, "train_speed(iter/s)": 0.413103 }, { "acc": 0.87761145, "epoch": 0.9381853626837787, "grad_norm": 4.559942245483398, "learning_rate": 9.097538502251885e-06, "loss": 0.6803771, "memory(GiB)": 34.88, "step": 34650, "train_speed(iter/s)": 0.413106 }, { "acc": 0.86406593, "epoch": 0.9383207429669943, "grad_norm": 6.11464262008667, "learning_rate": 9.097217794021588e-06, "loss": 0.68165789, "memory(GiB)": 34.88, "step": 34655, "train_speed(iter/s)": 0.413109 }, { "acc": 0.87447529, "epoch": 0.9384561232502099, "grad_norm": 5.911505699157715, "learning_rate": 9.096897034471852e-06, "loss": 0.71309519, "memory(GiB)": 34.88, "step": 34660, "train_speed(iter/s)": 0.413112 }, { "acc": 0.90510912, "epoch": 0.9385915035334254, "grad_norm": 6.474599361419678, "learning_rate": 9.096576223606694e-06, "loss": 0.54241333, "memory(GiB)": 34.88, "step": 34665, "train_speed(iter/s)": 0.413115 }, { "acc": 0.85737095, "epoch": 0.9387268838166409, "grad_norm": 18.20142364501953, "learning_rate": 9.096255361430132e-06, "loss": 0.92049789, "memory(GiB)": 34.88, "step": 34670, "train_speed(iter/s)": 0.413118 }, { "acc": 0.89919958, "epoch": 0.9388622640998565, "grad_norm": 6.0304646492004395, "learning_rate": 9.095934447946188e-06, "loss": 0.46120515, "memory(GiB)": 34.88, "step": 34675, "train_speed(iter/s)": 0.413122 }, { "acc": 0.89824886, "epoch": 0.9389976443830721, "grad_norm": 5.590111255645752, "learning_rate": 9.095613483158878e-06, "loss": 0.4997839, "memory(GiB)": 34.88, "step": 34680, "train_speed(iter/s)": 0.413124 }, { "acc": 0.88072557, "epoch": 0.9391330246662876, "grad_norm": 13.163175582885742, "learning_rate": 9.095292467072225e-06, "loss": 0.5730793, "memory(GiB)": 34.88, "step": 34685, "train_speed(iter/s)": 0.413127 }, { "acc": 0.88083267, "epoch": 0.9392684049495031, "grad_norm": 7.950333118438721, "learning_rate": 9.094971399690249e-06, "loss": 0.6720973, "memory(GiB)": 34.88, "step": 34690, "train_speed(iter/s)": 0.41313 }, { "acc": 0.88322449, "epoch": 0.9394037852327187, "grad_norm": 16.501758575439453, "learning_rate": 9.094650281016976e-06, "loss": 0.65683804, "memory(GiB)": 34.88, "step": 34695, "train_speed(iter/s)": 0.413133 }, { "acc": 0.86595058, "epoch": 0.9395391655159343, "grad_norm": 8.616238594055176, "learning_rate": 9.094329111056422e-06, "loss": 0.67898974, "memory(GiB)": 34.88, "step": 34700, "train_speed(iter/s)": 0.413136 }, { "acc": 0.87911434, "epoch": 0.9396745457991498, "grad_norm": 6.6668500900268555, "learning_rate": 9.094007889812618e-06, "loss": 0.59813204, "memory(GiB)": 34.88, "step": 34705, "train_speed(iter/s)": 0.413139 }, { "acc": 0.87706318, "epoch": 0.9398099260823654, "grad_norm": 8.672980308532715, "learning_rate": 9.093686617289583e-06, "loss": 0.66078863, "memory(GiB)": 34.88, "step": 34710, "train_speed(iter/s)": 0.413142 }, { "acc": 0.87431555, "epoch": 0.9399453063655809, "grad_norm": 9.198725700378418, "learning_rate": 9.09336529349134e-06, "loss": 0.61685929, "memory(GiB)": 34.88, "step": 34715, "train_speed(iter/s)": 0.413145 }, { "acc": 0.88508339, "epoch": 0.9400806866487965, "grad_norm": 7.539215087890625, "learning_rate": 9.09304391842192e-06, "loss": 0.54560432, "memory(GiB)": 34.88, "step": 34720, "train_speed(iter/s)": 0.413148 }, { "acc": 0.8961504, "epoch": 0.940216066932012, "grad_norm": 9.377634048461914, "learning_rate": 9.092722492085341e-06, "loss": 0.54116373, "memory(GiB)": 34.88, "step": 34725, "train_speed(iter/s)": 0.41315 }, { "acc": 0.88294277, "epoch": 0.9403514472152276, "grad_norm": 8.80216121673584, "learning_rate": 9.09240101448564e-06, "loss": 0.66466284, "memory(GiB)": 34.88, "step": 34730, "train_speed(iter/s)": 0.413153 }, { "acc": 0.90341854, "epoch": 0.9404868274984431, "grad_norm": 10.19378662109375, "learning_rate": 9.092079485626833e-06, "loss": 0.50508742, "memory(GiB)": 34.88, "step": 34735, "train_speed(iter/s)": 0.413156 }, { "acc": 0.86695614, "epoch": 0.9406222077816587, "grad_norm": 7.167037487030029, "learning_rate": 9.091757905512956e-06, "loss": 0.62758555, "memory(GiB)": 34.88, "step": 34740, "train_speed(iter/s)": 0.413159 }, { "acc": 0.86963329, "epoch": 0.9407575880648742, "grad_norm": 9.364119529724121, "learning_rate": 9.091436274148033e-06, "loss": 0.70185909, "memory(GiB)": 34.88, "step": 34745, "train_speed(iter/s)": 0.413161 }, { "acc": 0.90254154, "epoch": 0.9408929683480898, "grad_norm": 7.955445289611816, "learning_rate": 9.091114591536094e-06, "loss": 0.48792353, "memory(GiB)": 34.88, "step": 34750, "train_speed(iter/s)": 0.413164 }, { "acc": 0.89017735, "epoch": 0.9410283486313054, "grad_norm": 5.555813789367676, "learning_rate": 9.090792857681172e-06, "loss": 0.6120203, "memory(GiB)": 34.88, "step": 34755, "train_speed(iter/s)": 0.413167 }, { "acc": 0.87342377, "epoch": 0.9411637289145209, "grad_norm": 5.447152137756348, "learning_rate": 9.090471072587293e-06, "loss": 0.64360676, "memory(GiB)": 34.88, "step": 34760, "train_speed(iter/s)": 0.41317 }, { "acc": 0.86252537, "epoch": 0.9412991091977364, "grad_norm": 8.545753479003906, "learning_rate": 9.090149236258489e-06, "loss": 0.70789795, "memory(GiB)": 34.88, "step": 34765, "train_speed(iter/s)": 0.413173 }, { "acc": 0.86920576, "epoch": 0.941434489480952, "grad_norm": 6.886734962463379, "learning_rate": 9.089827348698791e-06, "loss": 0.62401915, "memory(GiB)": 34.88, "step": 34770, "train_speed(iter/s)": 0.413176 }, { "acc": 0.892132, "epoch": 0.9415698697641676, "grad_norm": 9.614628791809082, "learning_rate": 9.089505409912235e-06, "loss": 0.57284269, "memory(GiB)": 34.88, "step": 34775, "train_speed(iter/s)": 0.413179 }, { "acc": 0.87457857, "epoch": 0.9417052500473831, "grad_norm": 8.717554092407227, "learning_rate": 9.089183419902851e-06, "loss": 0.60732851, "memory(GiB)": 34.88, "step": 34780, "train_speed(iter/s)": 0.413181 }, { "acc": 0.88758793, "epoch": 0.9418406303305986, "grad_norm": 5.24932861328125, "learning_rate": 9.088861378674672e-06, "loss": 0.54766326, "memory(GiB)": 34.88, "step": 34785, "train_speed(iter/s)": 0.413184 }, { "acc": 0.89104576, "epoch": 0.9419760106138142, "grad_norm": 5.078676700592041, "learning_rate": 9.088539286231733e-06, "loss": 0.58054233, "memory(GiB)": 34.88, "step": 34790, "train_speed(iter/s)": 0.413187 }, { "acc": 0.86796474, "epoch": 0.9421113908970298, "grad_norm": 14.257281303405762, "learning_rate": 9.08821714257807e-06, "loss": 0.66657176, "memory(GiB)": 34.88, "step": 34795, "train_speed(iter/s)": 0.41319 }, { "acc": 0.88928795, "epoch": 0.9422467711802454, "grad_norm": 8.056431770324707, "learning_rate": 9.087894947717716e-06, "loss": 0.61740637, "memory(GiB)": 34.88, "step": 34800, "train_speed(iter/s)": 0.413193 }, { "acc": 0.88941832, "epoch": 0.9423821514634608, "grad_norm": 7.778558731079102, "learning_rate": 9.08757270165471e-06, "loss": 0.59527206, "memory(GiB)": 34.88, "step": 34805, "train_speed(iter/s)": 0.413196 }, { "acc": 0.87193241, "epoch": 0.9425175317466764, "grad_norm": 6.862718105316162, "learning_rate": 9.087250404393087e-06, "loss": 0.68395319, "memory(GiB)": 34.88, "step": 34810, "train_speed(iter/s)": 0.413199 }, { "acc": 0.89891157, "epoch": 0.942652912029892, "grad_norm": 6.399076461791992, "learning_rate": 9.086928055936883e-06, "loss": 0.42931495, "memory(GiB)": 34.88, "step": 34815, "train_speed(iter/s)": 0.413201 }, { "acc": 0.86862774, "epoch": 0.9427882923131076, "grad_norm": 10.155933380126953, "learning_rate": 9.086605656290139e-06, "loss": 0.71694269, "memory(GiB)": 34.88, "step": 34820, "train_speed(iter/s)": 0.413204 }, { "acc": 0.87428894, "epoch": 0.942923672596323, "grad_norm": 9.83697509765625, "learning_rate": 9.086283205456894e-06, "loss": 0.66983438, "memory(GiB)": 34.88, "step": 34825, "train_speed(iter/s)": 0.413207 }, { "acc": 0.88337469, "epoch": 0.9430590528795386, "grad_norm": 10.922720909118652, "learning_rate": 9.085960703441184e-06, "loss": 0.61497068, "memory(GiB)": 34.88, "step": 34830, "train_speed(iter/s)": 0.413209 }, { "acc": 0.8692338, "epoch": 0.9431944331627542, "grad_norm": 5.186404705047607, "learning_rate": 9.085638150247052e-06, "loss": 0.65025144, "memory(GiB)": 34.88, "step": 34835, "train_speed(iter/s)": 0.413212 }, { "acc": 0.89702873, "epoch": 0.9433298134459698, "grad_norm": 13.559622764587402, "learning_rate": 9.085315545878535e-06, "loss": 0.44671984, "memory(GiB)": 34.88, "step": 34840, "train_speed(iter/s)": 0.413216 }, { "acc": 0.84956589, "epoch": 0.9434651937291852, "grad_norm": 10.152617454528809, "learning_rate": 9.084992890339679e-06, "loss": 0.78179607, "memory(GiB)": 34.88, "step": 34845, "train_speed(iter/s)": 0.413218 }, { "acc": 0.89033718, "epoch": 0.9436005740124008, "grad_norm": 8.756869316101074, "learning_rate": 9.084670183634523e-06, "loss": 0.53896379, "memory(GiB)": 34.88, "step": 34850, "train_speed(iter/s)": 0.413221 }, { "acc": 0.89059753, "epoch": 0.9437359542956164, "grad_norm": 11.139673233032227, "learning_rate": 9.084347425767109e-06, "loss": 0.50835333, "memory(GiB)": 34.88, "step": 34855, "train_speed(iter/s)": 0.413224 }, { "acc": 0.88544846, "epoch": 0.943871334578832, "grad_norm": 13.46644115447998, "learning_rate": 9.084024616741483e-06, "loss": 0.59341521, "memory(GiB)": 34.88, "step": 34860, "train_speed(iter/s)": 0.413227 }, { "acc": 0.89001255, "epoch": 0.9440067148620475, "grad_norm": 7.7664337158203125, "learning_rate": 9.083701756561685e-06, "loss": 0.54775143, "memory(GiB)": 34.88, "step": 34865, "train_speed(iter/s)": 0.41323 }, { "acc": 0.88151608, "epoch": 0.944142095145263, "grad_norm": 7.632857799530029, "learning_rate": 9.083378845231762e-06, "loss": 0.6439353, "memory(GiB)": 34.88, "step": 34870, "train_speed(iter/s)": 0.413232 }, { "acc": 0.897752, "epoch": 0.9442774754284786, "grad_norm": 14.031707763671875, "learning_rate": 9.083055882755762e-06, "loss": 0.48959846, "memory(GiB)": 34.88, "step": 34875, "train_speed(iter/s)": 0.413235 }, { "acc": 0.87185879, "epoch": 0.9444128557116942, "grad_norm": 17.78635597229004, "learning_rate": 9.082732869137723e-06, "loss": 0.72871199, "memory(GiB)": 34.88, "step": 34880, "train_speed(iter/s)": 0.413238 }, { "acc": 0.88167896, "epoch": 0.9445482359949097, "grad_norm": 8.671908378601074, "learning_rate": 9.082409804381699e-06, "loss": 0.59282274, "memory(GiB)": 34.88, "step": 34885, "train_speed(iter/s)": 0.413241 }, { "acc": 0.89041386, "epoch": 0.9446836162781252, "grad_norm": 9.884058952331543, "learning_rate": 9.082086688491731e-06, "loss": 0.63922887, "memory(GiB)": 34.88, "step": 34890, "train_speed(iter/s)": 0.413244 }, { "acc": 0.85170097, "epoch": 0.9448189965613408, "grad_norm": 17.866336822509766, "learning_rate": 9.08176352147187e-06, "loss": 0.8354085, "memory(GiB)": 34.88, "step": 34895, "train_speed(iter/s)": 0.413247 }, { "acc": 0.88072748, "epoch": 0.9449543768445564, "grad_norm": 12.50339126586914, "learning_rate": 9.081440303326166e-06, "loss": 0.6452858, "memory(GiB)": 34.88, "step": 34900, "train_speed(iter/s)": 0.413249 }, { "acc": 0.88344011, "epoch": 0.9450897571277719, "grad_norm": 10.696887969970703, "learning_rate": 9.081117034058665e-06, "loss": 0.60432725, "memory(GiB)": 34.88, "step": 34905, "train_speed(iter/s)": 0.413253 }, { "acc": 0.87036333, "epoch": 0.9452251374109875, "grad_norm": 15.373416900634766, "learning_rate": 9.080793713673414e-06, "loss": 0.62347202, "memory(GiB)": 34.88, "step": 34910, "train_speed(iter/s)": 0.413256 }, { "acc": 0.86767941, "epoch": 0.945360517694203, "grad_norm": 11.373709678649902, "learning_rate": 9.08047034217447e-06, "loss": 0.684408, "memory(GiB)": 34.88, "step": 34915, "train_speed(iter/s)": 0.413258 }, { "acc": 0.88043728, "epoch": 0.9454958979774186, "grad_norm": 12.90076732635498, "learning_rate": 9.080146919565878e-06, "loss": 0.61628141, "memory(GiB)": 34.88, "step": 34920, "train_speed(iter/s)": 0.413261 }, { "acc": 0.89689178, "epoch": 0.9456312782606341, "grad_norm": 15.089191436767578, "learning_rate": 9.079823445851693e-06, "loss": 0.56862426, "memory(GiB)": 34.88, "step": 34925, "train_speed(iter/s)": 0.413264 }, { "acc": 0.87519283, "epoch": 0.9457666585438497, "grad_norm": 8.744699478149414, "learning_rate": 9.079499921035965e-06, "loss": 0.63525419, "memory(GiB)": 34.88, "step": 34930, "train_speed(iter/s)": 0.413267 }, { "acc": 0.87040644, "epoch": 0.9459020388270653, "grad_norm": 12.108589172363281, "learning_rate": 9.07917634512275e-06, "loss": 0.67925858, "memory(GiB)": 34.88, "step": 34935, "train_speed(iter/s)": 0.41327 }, { "acc": 0.86920185, "epoch": 0.9460374191102808, "grad_norm": 10.313053131103516, "learning_rate": 9.078852718116097e-06, "loss": 0.70516596, "memory(GiB)": 34.88, "step": 34940, "train_speed(iter/s)": 0.413273 }, { "acc": 0.88580122, "epoch": 0.9461727993934963, "grad_norm": 4.710737705230713, "learning_rate": 9.078529040020064e-06, "loss": 0.48971801, "memory(GiB)": 34.88, "step": 34945, "train_speed(iter/s)": 0.413276 }, { "acc": 0.89288321, "epoch": 0.9463081796767119, "grad_norm": 4.221964359283447, "learning_rate": 9.078205310838702e-06, "loss": 0.53155379, "memory(GiB)": 34.88, "step": 34950, "train_speed(iter/s)": 0.413279 }, { "acc": 0.89458494, "epoch": 0.9464435599599275, "grad_norm": 5.101822376251221, "learning_rate": 9.077881530576071e-06, "loss": 0.49947591, "memory(GiB)": 34.88, "step": 34955, "train_speed(iter/s)": 0.413282 }, { "acc": 0.8850914, "epoch": 0.946578940243143, "grad_norm": 10.228121757507324, "learning_rate": 9.077557699236221e-06, "loss": 0.57947726, "memory(GiB)": 34.88, "step": 34960, "train_speed(iter/s)": 0.413285 }, { "acc": 0.88963137, "epoch": 0.9467143205263585, "grad_norm": 6.000397205352783, "learning_rate": 9.077233816823214e-06, "loss": 0.57745328, "memory(GiB)": 34.88, "step": 34965, "train_speed(iter/s)": 0.413288 }, { "acc": 0.87585764, "epoch": 0.9468497008095741, "grad_norm": 14.595561981201172, "learning_rate": 9.076909883341105e-06, "loss": 0.67567768, "memory(GiB)": 34.88, "step": 34970, "train_speed(iter/s)": 0.413291 }, { "acc": 0.87289419, "epoch": 0.9469850810927897, "grad_norm": 7.75124979019165, "learning_rate": 9.076585898793953e-06, "loss": 0.59262495, "memory(GiB)": 34.88, "step": 34975, "train_speed(iter/s)": 0.413294 }, { "acc": 0.8790432, "epoch": 0.9471204613760053, "grad_norm": 7.578344345092773, "learning_rate": 9.076261863185813e-06, "loss": 0.60600147, "memory(GiB)": 34.88, "step": 34980, "train_speed(iter/s)": 0.413297 }, { "acc": 0.88533335, "epoch": 0.9472558416592207, "grad_norm": 5.202454090118408, "learning_rate": 9.07593777652075e-06, "loss": 0.5586504, "memory(GiB)": 34.88, "step": 34985, "train_speed(iter/s)": 0.413299 }, { "acc": 0.89674969, "epoch": 0.9473912219424363, "grad_norm": 9.571564674377441, "learning_rate": 9.075613638802818e-06, "loss": 0.52953329, "memory(GiB)": 34.88, "step": 34990, "train_speed(iter/s)": 0.413302 }, { "acc": 0.87573032, "epoch": 0.9475266022256519, "grad_norm": 6.891242504119873, "learning_rate": 9.07528945003608e-06, "loss": 0.64884248, "memory(GiB)": 34.88, "step": 34995, "train_speed(iter/s)": 0.413305 }, { "acc": 0.88209124, "epoch": 0.9476619825088675, "grad_norm": 23.314821243286133, "learning_rate": 9.074965210224599e-06, "loss": 0.62126546, "memory(GiB)": 34.88, "step": 35000, "train_speed(iter/s)": 0.413308 }, { "acc": 0.88326454, "epoch": 0.9477973627920829, "grad_norm": 16.79094123840332, "learning_rate": 9.074640919372435e-06, "loss": 0.59867525, "memory(GiB)": 34.88, "step": 35005, "train_speed(iter/s)": 0.413311 }, { "acc": 0.87425728, "epoch": 0.9479327430752985, "grad_norm": 7.581063270568848, "learning_rate": 9.074316577483649e-06, "loss": 0.60756364, "memory(GiB)": 34.88, "step": 35010, "train_speed(iter/s)": 0.413314 }, { "acc": 0.8615099, "epoch": 0.9480681233585141, "grad_norm": 24.015445709228516, "learning_rate": 9.073992184562306e-06, "loss": 0.79487095, "memory(GiB)": 34.88, "step": 35015, "train_speed(iter/s)": 0.413317 }, { "acc": 0.88700895, "epoch": 0.9482035036417297, "grad_norm": 12.09804630279541, "learning_rate": 9.073667740612469e-06, "loss": 0.62041888, "memory(GiB)": 34.88, "step": 35020, "train_speed(iter/s)": 0.41332 }, { "acc": 0.87158546, "epoch": 0.9483388839249451, "grad_norm": 12.098949432373047, "learning_rate": 9.073343245638202e-06, "loss": 0.65721416, "memory(GiB)": 34.88, "step": 35025, "train_speed(iter/s)": 0.413322 }, { "acc": 0.88031044, "epoch": 0.9484742642081607, "grad_norm": 38.84135437011719, "learning_rate": 9.07301869964357e-06, "loss": 0.62557316, "memory(GiB)": 34.88, "step": 35030, "train_speed(iter/s)": 0.413325 }, { "acc": 0.90173359, "epoch": 0.9486096444913763, "grad_norm": 4.238833904266357, "learning_rate": 9.072694102632638e-06, "loss": 0.49694514, "memory(GiB)": 34.88, "step": 35035, "train_speed(iter/s)": 0.413328 }, { "acc": 0.89583187, "epoch": 0.9487450247745919, "grad_norm": 5.443210124969482, "learning_rate": 9.072369454609474e-06, "loss": 0.44697108, "memory(GiB)": 34.88, "step": 35040, "train_speed(iter/s)": 0.413331 }, { "acc": 0.87492123, "epoch": 0.9488804050578074, "grad_norm": 9.456358909606934, "learning_rate": 9.072044755578144e-06, "loss": 0.57642283, "memory(GiB)": 34.88, "step": 35045, "train_speed(iter/s)": 0.413333 }, { "acc": 0.88697929, "epoch": 0.9490157853410229, "grad_norm": 8.244243621826172, "learning_rate": 9.071720005542716e-06, "loss": 0.55651884, "memory(GiB)": 34.88, "step": 35050, "train_speed(iter/s)": 0.413336 }, { "acc": 0.87276831, "epoch": 0.9491511656242385, "grad_norm": 8.820645332336426, "learning_rate": 9.071395204507256e-06, "loss": 0.69096537, "memory(GiB)": 34.88, "step": 35055, "train_speed(iter/s)": 0.413338 }, { "acc": 0.88642426, "epoch": 0.9492865459074541, "grad_norm": 5.6298933029174805, "learning_rate": 9.071070352475835e-06, "loss": 0.51584721, "memory(GiB)": 34.88, "step": 35060, "train_speed(iter/s)": 0.413342 }, { "acc": 0.90807018, "epoch": 0.9494219261906696, "grad_norm": 8.313061714172363, "learning_rate": 9.070745449452523e-06, "loss": 0.48505192, "memory(GiB)": 34.88, "step": 35065, "train_speed(iter/s)": 0.413344 }, { "acc": 0.87805195, "epoch": 0.9495573064738851, "grad_norm": 5.885952949523926, "learning_rate": 9.070420495441387e-06, "loss": 0.55506496, "memory(GiB)": 34.88, "step": 35070, "train_speed(iter/s)": 0.413347 }, { "acc": 0.88286285, "epoch": 0.9496926867571007, "grad_norm": 7.956295013427734, "learning_rate": 9.0700954904465e-06, "loss": 0.63083344, "memory(GiB)": 34.88, "step": 35075, "train_speed(iter/s)": 0.41335 }, { "acc": 0.86788197, "epoch": 0.9498280670403162, "grad_norm": 9.439796447753906, "learning_rate": 9.069770434471932e-06, "loss": 0.70326109, "memory(GiB)": 34.88, "step": 35080, "train_speed(iter/s)": 0.413352 }, { "acc": 0.85184746, "epoch": 0.9499634473235318, "grad_norm": 9.790549278259277, "learning_rate": 9.069445327521757e-06, "loss": 0.77128363, "memory(GiB)": 34.88, "step": 35085, "train_speed(iter/s)": 0.413355 }, { "acc": 0.89475298, "epoch": 0.9500988276067474, "grad_norm": 9.882755279541016, "learning_rate": 9.069120169600046e-06, "loss": 0.51941605, "memory(GiB)": 34.88, "step": 35090, "train_speed(iter/s)": 0.413358 }, { "acc": 0.86384172, "epoch": 0.9502342078899629, "grad_norm": 11.841442108154297, "learning_rate": 9.068794960710871e-06, "loss": 0.75680008, "memory(GiB)": 34.88, "step": 35095, "train_speed(iter/s)": 0.41336 }, { "acc": 0.84903421, "epoch": 0.9503695881731784, "grad_norm": 9.208165168762207, "learning_rate": 9.068469700858308e-06, "loss": 0.82688856, "memory(GiB)": 34.88, "step": 35100, "train_speed(iter/s)": 0.413363 }, { "acc": 0.90251446, "epoch": 0.950504968456394, "grad_norm": 10.023279190063477, "learning_rate": 9.068144390046433e-06, "loss": 0.46532645, "memory(GiB)": 34.88, "step": 35105, "train_speed(iter/s)": 0.413366 }, { "acc": 0.88356829, "epoch": 0.9506403487396096, "grad_norm": 16.309894561767578, "learning_rate": 9.067819028279318e-06, "loss": 0.65469675, "memory(GiB)": 34.88, "step": 35110, "train_speed(iter/s)": 0.413369 }, { "acc": 0.86851511, "epoch": 0.9507757290228251, "grad_norm": 9.133896827697754, "learning_rate": 9.067493615561038e-06, "loss": 0.73279853, "memory(GiB)": 34.88, "step": 35115, "train_speed(iter/s)": 0.413372 }, { "acc": 0.85197124, "epoch": 0.9509111093060406, "grad_norm": 9.218729972839355, "learning_rate": 9.067168151895675e-06, "loss": 0.85639343, "memory(GiB)": 34.88, "step": 35120, "train_speed(iter/s)": 0.413375 }, { "acc": 0.86113491, "epoch": 0.9510464895892562, "grad_norm": 10.064576148986816, "learning_rate": 9.0668426372873e-06, "loss": 0.70663981, "memory(GiB)": 34.88, "step": 35125, "train_speed(iter/s)": 0.413378 }, { "acc": 0.90100718, "epoch": 0.9511818698724718, "grad_norm": 5.1147284507751465, "learning_rate": 9.066517071739993e-06, "loss": 0.5091805, "memory(GiB)": 34.88, "step": 35130, "train_speed(iter/s)": 0.413381 }, { "acc": 0.87571392, "epoch": 0.9513172501556874, "grad_norm": 6.688961029052734, "learning_rate": 9.066191455257834e-06, "loss": 0.70581703, "memory(GiB)": 34.88, "step": 35135, "train_speed(iter/s)": 0.413383 }, { "acc": 0.86725578, "epoch": 0.9514526304389028, "grad_norm": 10.37279224395752, "learning_rate": 9.065865787844899e-06, "loss": 0.59114132, "memory(GiB)": 34.88, "step": 35140, "train_speed(iter/s)": 0.413386 }, { "acc": 0.86992397, "epoch": 0.9515880107221184, "grad_norm": 5.142200946807861, "learning_rate": 9.065540069505268e-06, "loss": 0.66983042, "memory(GiB)": 34.88, "step": 35145, "train_speed(iter/s)": 0.413389 }, { "acc": 0.90002365, "epoch": 0.951723391005334, "grad_norm": 5.585039138793945, "learning_rate": 9.065214300243025e-06, "loss": 0.42569051, "memory(GiB)": 34.88, "step": 35150, "train_speed(iter/s)": 0.413392 }, { "acc": 0.86461611, "epoch": 0.9518587712885496, "grad_norm": 14.412829399108887, "learning_rate": 9.064888480062248e-06, "loss": 0.71535993, "memory(GiB)": 34.88, "step": 35155, "train_speed(iter/s)": 0.413395 }, { "acc": 0.89162321, "epoch": 0.951994151571765, "grad_norm": 5.645907402038574, "learning_rate": 9.064562608967016e-06, "loss": 0.5659801, "memory(GiB)": 34.88, "step": 35160, "train_speed(iter/s)": 0.413398 }, { "acc": 0.8858593, "epoch": 0.9521295318549806, "grad_norm": 8.617277145385742, "learning_rate": 9.064236686961417e-06, "loss": 0.60107327, "memory(GiB)": 34.88, "step": 35165, "train_speed(iter/s)": 0.413401 }, { "acc": 0.9032835, "epoch": 0.9522649121381962, "grad_norm": 5.951972961425781, "learning_rate": 9.063910714049529e-06, "loss": 0.59779139, "memory(GiB)": 34.88, "step": 35170, "train_speed(iter/s)": 0.413404 }, { "acc": 0.88258419, "epoch": 0.9524002924214118, "grad_norm": 7.141279697418213, "learning_rate": 9.063584690235438e-06, "loss": 0.58526449, "memory(GiB)": 34.88, "step": 35175, "train_speed(iter/s)": 0.413407 }, { "acc": 0.87327232, "epoch": 0.9525356727046272, "grad_norm": 7.687140464782715, "learning_rate": 9.063258615523227e-06, "loss": 0.68718328, "memory(GiB)": 34.88, "step": 35180, "train_speed(iter/s)": 0.41341 }, { "acc": 0.88302431, "epoch": 0.9526710529878428, "grad_norm": 13.330163955688477, "learning_rate": 9.06293248991698e-06, "loss": 0.6193522, "memory(GiB)": 34.88, "step": 35185, "train_speed(iter/s)": 0.413412 }, { "acc": 0.87386866, "epoch": 0.9528064332710584, "grad_norm": 10.831811904907227, "learning_rate": 9.062606313420785e-06, "loss": 0.65783625, "memory(GiB)": 34.88, "step": 35190, "train_speed(iter/s)": 0.413414 }, { "acc": 0.88510532, "epoch": 0.952941813554274, "grad_norm": 9.6329927444458, "learning_rate": 9.062280086038727e-06, "loss": 0.61920681, "memory(GiB)": 34.88, "step": 35195, "train_speed(iter/s)": 0.413417 }, { "acc": 0.88647614, "epoch": 0.9530771938374895, "grad_norm": 7.116124629974365, "learning_rate": 9.06195380777489e-06, "loss": 0.5829174, "memory(GiB)": 34.88, "step": 35200, "train_speed(iter/s)": 0.413421 }, { "acc": 0.89145088, "epoch": 0.953212574120705, "grad_norm": 8.518142700195312, "learning_rate": 9.061627478633365e-06, "loss": 0.54824181, "memory(GiB)": 34.88, "step": 35205, "train_speed(iter/s)": 0.413423 }, { "acc": 0.89410629, "epoch": 0.9533479544039206, "grad_norm": 5.820627689361572, "learning_rate": 9.061301098618237e-06, "loss": 0.57029867, "memory(GiB)": 34.88, "step": 35210, "train_speed(iter/s)": 0.413426 }, { "acc": 0.86871014, "epoch": 0.9534833346871362, "grad_norm": 12.440116882324219, "learning_rate": 9.060974667733596e-06, "loss": 0.75969639, "memory(GiB)": 34.88, "step": 35215, "train_speed(iter/s)": 0.413429 }, { "acc": 0.87748413, "epoch": 0.9536187149703517, "grad_norm": 17.755783081054688, "learning_rate": 9.060648185983533e-06, "loss": 0.60724163, "memory(GiB)": 34.88, "step": 35220, "train_speed(iter/s)": 0.413432 }, { "acc": 0.90433102, "epoch": 0.9537540952535672, "grad_norm": 7.938991546630859, "learning_rate": 9.060321653372133e-06, "loss": 0.47748885, "memory(GiB)": 34.88, "step": 35225, "train_speed(iter/s)": 0.413435 }, { "acc": 0.89506426, "epoch": 0.9538894755367828, "grad_norm": 6.8331098556518555, "learning_rate": 9.05999506990349e-06, "loss": 0.40553222, "memory(GiB)": 34.88, "step": 35230, "train_speed(iter/s)": 0.413437 }, { "acc": 0.88191452, "epoch": 0.9540248558199984, "grad_norm": 18.186893463134766, "learning_rate": 9.059668435581694e-06, "loss": 0.68193021, "memory(GiB)": 34.88, "step": 35235, "train_speed(iter/s)": 0.41344 }, { "acc": 0.88366852, "epoch": 0.9541602361032139, "grad_norm": 6.637051582336426, "learning_rate": 9.059341750410839e-06, "loss": 0.50271678, "memory(GiB)": 34.88, "step": 35240, "train_speed(iter/s)": 0.413443 }, { "acc": 0.86075459, "epoch": 0.9542956163864295, "grad_norm": 9.79943561553955, "learning_rate": 9.059015014395013e-06, "loss": 0.82179203, "memory(GiB)": 34.88, "step": 35245, "train_speed(iter/s)": 0.413446 }, { "acc": 0.86644268, "epoch": 0.954430996669645, "grad_norm": 11.459097862243652, "learning_rate": 9.058688227538314e-06, "loss": 0.75808506, "memory(GiB)": 34.88, "step": 35250, "train_speed(iter/s)": 0.413449 }, { "acc": 0.86862812, "epoch": 0.9545663769528606, "grad_norm": 9.035116195678711, "learning_rate": 9.058361389844831e-06, "loss": 0.75285807, "memory(GiB)": 34.88, "step": 35255, "train_speed(iter/s)": 0.413452 }, { "acc": 0.88461332, "epoch": 0.9547017572360761, "grad_norm": 5.097288131713867, "learning_rate": 9.05803450131866e-06, "loss": 0.60327044, "memory(GiB)": 34.88, "step": 35260, "train_speed(iter/s)": 0.413455 }, { "acc": 0.88604307, "epoch": 0.9548371375192917, "grad_norm": 15.017203330993652, "learning_rate": 9.0577075619639e-06, "loss": 0.59565392, "memory(GiB)": 34.88, "step": 35265, "train_speed(iter/s)": 0.413458 }, { "acc": 0.86376162, "epoch": 0.9549725178025072, "grad_norm": 14.97394847869873, "learning_rate": 9.057380571784639e-06, "loss": 0.7745719, "memory(GiB)": 34.88, "step": 35270, "train_speed(iter/s)": 0.41346 }, { "acc": 0.85413055, "epoch": 0.9551078980857228, "grad_norm": 7.374904632568359, "learning_rate": 9.057053530784978e-06, "loss": 0.73939075, "memory(GiB)": 34.88, "step": 35275, "train_speed(iter/s)": 0.413462 }, { "acc": 0.8933116, "epoch": 0.9552432783689383, "grad_norm": 5.593530178070068, "learning_rate": 9.056726438969015e-06, "loss": 0.51483841, "memory(GiB)": 34.88, "step": 35280, "train_speed(iter/s)": 0.413464 }, { "acc": 0.89846506, "epoch": 0.9553786586521539, "grad_norm": 7.941723823547363, "learning_rate": 9.056399296340844e-06, "loss": 0.53972125, "memory(GiB)": 34.88, "step": 35285, "train_speed(iter/s)": 0.413468 }, { "acc": 0.85746794, "epoch": 0.9555140389353695, "grad_norm": 12.08784008026123, "learning_rate": 9.056072102904564e-06, "loss": 0.84070644, "memory(GiB)": 34.88, "step": 35290, "train_speed(iter/s)": 0.41347 }, { "acc": 0.87314892, "epoch": 0.955649419218585, "grad_norm": 7.732342720031738, "learning_rate": 9.055744858664274e-06, "loss": 0.63062472, "memory(GiB)": 34.88, "step": 35295, "train_speed(iter/s)": 0.413473 }, { "acc": 0.88911009, "epoch": 0.9557847995018005, "grad_norm": 11.106853485107422, "learning_rate": 9.055417563624074e-06, "loss": 0.6128263, "memory(GiB)": 34.88, "step": 35300, "train_speed(iter/s)": 0.413476 }, { "acc": 0.89934196, "epoch": 0.9559201797850161, "grad_norm": 9.617090225219727, "learning_rate": 9.055090217788065e-06, "loss": 0.52406497, "memory(GiB)": 34.88, "step": 35305, "train_speed(iter/s)": 0.413479 }, { "acc": 0.87929754, "epoch": 0.9560555600682317, "grad_norm": 10.11766242980957, "learning_rate": 9.054762821160344e-06, "loss": 0.67814407, "memory(GiB)": 34.88, "step": 35310, "train_speed(iter/s)": 0.413482 }, { "acc": 0.90397339, "epoch": 0.9561909403514472, "grad_norm": 16.402568817138672, "learning_rate": 9.054435373745016e-06, "loss": 0.48077841, "memory(GiB)": 34.88, "step": 35315, "train_speed(iter/s)": 0.413485 }, { "acc": 0.87914906, "epoch": 0.9563263206346627, "grad_norm": 9.00587272644043, "learning_rate": 9.05410787554618e-06, "loss": 0.66622667, "memory(GiB)": 34.88, "step": 35320, "train_speed(iter/s)": 0.413488 }, { "acc": 0.85058918, "epoch": 0.9564617009178783, "grad_norm": 9.341521263122559, "learning_rate": 9.053780326567942e-06, "loss": 0.87544632, "memory(GiB)": 34.88, "step": 35325, "train_speed(iter/s)": 0.413491 }, { "acc": 0.86914959, "epoch": 0.9565970812010939, "grad_norm": 18.553451538085938, "learning_rate": 9.053452726814402e-06, "loss": 0.74305072, "memory(GiB)": 34.88, "step": 35330, "train_speed(iter/s)": 0.413493 }, { "acc": 0.87202835, "epoch": 0.9567324614843095, "grad_norm": 11.307787895202637, "learning_rate": 9.053125076289665e-06, "loss": 0.70365438, "memory(GiB)": 34.88, "step": 35335, "train_speed(iter/s)": 0.413496 }, { "acc": 0.88577309, "epoch": 0.9568678417675249, "grad_norm": 10.073694229125977, "learning_rate": 9.052797374997836e-06, "loss": 0.53001986, "memory(GiB)": 34.88, "step": 35340, "train_speed(iter/s)": 0.413499 }, { "acc": 0.88395176, "epoch": 0.9570032220507405, "grad_norm": 9.827098846435547, "learning_rate": 9.052469622943019e-06, "loss": 0.59151192, "memory(GiB)": 34.88, "step": 35345, "train_speed(iter/s)": 0.413502 }, { "acc": 0.88372498, "epoch": 0.9571386023339561, "grad_norm": 9.90931510925293, "learning_rate": 9.052141820129322e-06, "loss": 0.63466043, "memory(GiB)": 34.88, "step": 35350, "train_speed(iter/s)": 0.413505 }, { "acc": 0.88344593, "epoch": 0.9572739826171717, "grad_norm": 5.354681968688965, "learning_rate": 9.051813966560849e-06, "loss": 0.6319962, "memory(GiB)": 34.88, "step": 35355, "train_speed(iter/s)": 0.413508 }, { "acc": 0.90122766, "epoch": 0.9574093629003871, "grad_norm": 6.1868157386779785, "learning_rate": 9.051486062241707e-06, "loss": 0.50028615, "memory(GiB)": 34.88, "step": 35360, "train_speed(iter/s)": 0.413511 }, { "acc": 0.87519951, "epoch": 0.9575447431836027, "grad_norm": 8.968547821044922, "learning_rate": 9.051158107176005e-06, "loss": 0.69391842, "memory(GiB)": 34.88, "step": 35365, "train_speed(iter/s)": 0.413514 }, { "acc": 0.87285147, "epoch": 0.9576801234668183, "grad_norm": 10.065877914428711, "learning_rate": 9.05083010136785e-06, "loss": 0.74237471, "memory(GiB)": 34.88, "step": 35370, "train_speed(iter/s)": 0.413517 }, { "acc": 0.87708549, "epoch": 0.9578155037500339, "grad_norm": 10.135324478149414, "learning_rate": 9.050502044821353e-06, "loss": 0.65010781, "memory(GiB)": 34.88, "step": 35375, "train_speed(iter/s)": 0.413519 }, { "acc": 0.87356796, "epoch": 0.9579508840332494, "grad_norm": 5.318429946899414, "learning_rate": 9.05017393754062e-06, "loss": 0.65418334, "memory(GiB)": 34.88, "step": 35380, "train_speed(iter/s)": 0.413522 }, { "acc": 0.8759964, "epoch": 0.9580862643164649, "grad_norm": 3.322540760040283, "learning_rate": 9.049845779529765e-06, "loss": 0.69256926, "memory(GiB)": 34.88, "step": 35385, "train_speed(iter/s)": 0.413525 }, { "acc": 0.88690262, "epoch": 0.9582216445996805, "grad_norm": 8.773134231567383, "learning_rate": 9.049517570792898e-06, "loss": 0.57994566, "memory(GiB)": 34.88, "step": 35390, "train_speed(iter/s)": 0.413527 }, { "acc": 0.87947912, "epoch": 0.9583570248828961, "grad_norm": 8.43359661102295, "learning_rate": 9.049189311334127e-06, "loss": 0.64769864, "memory(GiB)": 34.88, "step": 35395, "train_speed(iter/s)": 0.41353 }, { "acc": 0.89262199, "epoch": 0.9584924051661116, "grad_norm": 6.468480110168457, "learning_rate": 9.048861001157568e-06, "loss": 0.59451246, "memory(GiB)": 34.88, "step": 35400, "train_speed(iter/s)": 0.413533 }, { "acc": 0.86821041, "epoch": 0.9586277854493271, "grad_norm": 8.580269813537598, "learning_rate": 9.048532640267332e-06, "loss": 0.67802877, "memory(GiB)": 34.88, "step": 35405, "train_speed(iter/s)": 0.413536 }, { "acc": 0.89122257, "epoch": 0.9587631657325427, "grad_norm": 6.498648166656494, "learning_rate": 9.048204228667534e-06, "loss": 0.55094681, "memory(GiB)": 34.88, "step": 35410, "train_speed(iter/s)": 0.413538 }, { "acc": 0.88915558, "epoch": 0.9588985460157583, "grad_norm": 5.089902400970459, "learning_rate": 9.047875766362287e-06, "loss": 0.51629877, "memory(GiB)": 34.88, "step": 35415, "train_speed(iter/s)": 0.413541 }, { "acc": 0.87877245, "epoch": 0.9590339262989738, "grad_norm": 8.46435832977295, "learning_rate": 9.047547253355703e-06, "loss": 0.60879474, "memory(GiB)": 34.88, "step": 35420, "train_speed(iter/s)": 0.413544 }, { "acc": 0.85962524, "epoch": 0.9591693065821894, "grad_norm": 26.560848236083984, "learning_rate": 9.047218689651903e-06, "loss": 0.7234169, "memory(GiB)": 34.88, "step": 35425, "train_speed(iter/s)": 0.413547 }, { "acc": 0.88701773, "epoch": 0.9593046868654049, "grad_norm": 7.454511642456055, "learning_rate": 9.046890075254997e-06, "loss": 0.54602995, "memory(GiB)": 34.88, "step": 35430, "train_speed(iter/s)": 0.41355 }, { "acc": 0.87415705, "epoch": 0.9594400671486205, "grad_norm": 7.817765712738037, "learning_rate": 9.046561410169105e-06, "loss": 0.69107347, "memory(GiB)": 34.88, "step": 35435, "train_speed(iter/s)": 0.413553 }, { "acc": 0.87452507, "epoch": 0.959575447431836, "grad_norm": 20.7584171295166, "learning_rate": 9.046232694398344e-06, "loss": 0.79377279, "memory(GiB)": 34.88, "step": 35440, "train_speed(iter/s)": 0.413555 }, { "acc": 0.86466208, "epoch": 0.9597108277150516, "grad_norm": 6.630019664764404, "learning_rate": 9.045903927946829e-06, "loss": 0.70498385, "memory(GiB)": 34.88, "step": 35445, "train_speed(iter/s)": 0.413558 }, { "acc": 0.89673729, "epoch": 0.9598462079982671, "grad_norm": 6.87745475769043, "learning_rate": 9.045575110818683e-06, "loss": 0.54566889, "memory(GiB)": 34.88, "step": 35450, "train_speed(iter/s)": 0.413561 }, { "acc": 0.88655128, "epoch": 0.9599815882814827, "grad_norm": 8.277989387512207, "learning_rate": 9.045246243018022e-06, "loss": 0.62963014, "memory(GiB)": 34.88, "step": 35455, "train_speed(iter/s)": 0.413564 }, { "acc": 0.88157063, "epoch": 0.9601169685646982, "grad_norm": 17.046571731567383, "learning_rate": 9.044917324548966e-06, "loss": 0.58781004, "memory(GiB)": 34.88, "step": 35460, "train_speed(iter/s)": 0.413567 }, { "acc": 0.87778473, "epoch": 0.9602523488479138, "grad_norm": 11.918500900268555, "learning_rate": 9.044588355415635e-06, "loss": 0.59645877, "memory(GiB)": 34.88, "step": 35465, "train_speed(iter/s)": 0.413569 }, { "acc": 0.8769578, "epoch": 0.9603877291311294, "grad_norm": 6.364657402038574, "learning_rate": 9.044259335622154e-06, "loss": 0.66019511, "memory(GiB)": 34.88, "step": 35470, "train_speed(iter/s)": 0.413572 }, { "acc": 0.88201885, "epoch": 0.9605231094143449, "grad_norm": 5.9434895515441895, "learning_rate": 9.043930265172638e-06, "loss": 0.64886122, "memory(GiB)": 34.88, "step": 35475, "train_speed(iter/s)": 0.413575 }, { "acc": 0.87592449, "epoch": 0.9606584896975604, "grad_norm": 52.1552734375, "learning_rate": 9.043601144071214e-06, "loss": 0.63839965, "memory(GiB)": 34.88, "step": 35480, "train_speed(iter/s)": 0.413578 }, { "acc": 0.88635149, "epoch": 0.960793869980776, "grad_norm": 10.630511283874512, "learning_rate": 9.043271972322003e-06, "loss": 0.59556503, "memory(GiB)": 34.88, "step": 35485, "train_speed(iter/s)": 0.413581 }, { "acc": 0.89660301, "epoch": 0.9609292502639916, "grad_norm": 7.814677715301514, "learning_rate": 9.042942749929128e-06, "loss": 0.5220253, "memory(GiB)": 34.88, "step": 35490, "train_speed(iter/s)": 0.413584 }, { "acc": 0.87922058, "epoch": 0.9610646305472071, "grad_norm": 9.221651077270508, "learning_rate": 9.042613476896716e-06, "loss": 0.60650969, "memory(GiB)": 34.88, "step": 35495, "train_speed(iter/s)": 0.413587 }, { "acc": 0.88893661, "epoch": 0.9612000108304226, "grad_norm": 11.943270683288574, "learning_rate": 9.04228415322889e-06, "loss": 0.61028967, "memory(GiB)": 34.88, "step": 35500, "train_speed(iter/s)": 0.41359 }, { "acc": 0.88276329, "epoch": 0.9613353911136382, "grad_norm": 14.4579496383667, "learning_rate": 9.041954778929774e-06, "loss": 0.69772024, "memory(GiB)": 34.88, "step": 35505, "train_speed(iter/s)": 0.413593 }, { "acc": 0.87738457, "epoch": 0.9614707713968538, "grad_norm": 8.727473258972168, "learning_rate": 9.041625354003497e-06, "loss": 0.63251681, "memory(GiB)": 34.88, "step": 35510, "train_speed(iter/s)": 0.413596 }, { "acc": 0.8823349, "epoch": 0.9616061516800694, "grad_norm": 7.135795593261719, "learning_rate": 9.041295878454184e-06, "loss": 0.61914816, "memory(GiB)": 34.88, "step": 35515, "train_speed(iter/s)": 0.413598 }, { "acc": 0.90031433, "epoch": 0.9617415319632848, "grad_norm": 11.080004692077637, "learning_rate": 9.040966352285962e-06, "loss": 0.47261558, "memory(GiB)": 34.88, "step": 35520, "train_speed(iter/s)": 0.413601 }, { "acc": 0.8854435, "epoch": 0.9618769122465004, "grad_norm": 10.619255065917969, "learning_rate": 9.040636775502956e-06, "loss": 0.60370235, "memory(GiB)": 34.88, "step": 35525, "train_speed(iter/s)": 0.413604 }, { "acc": 0.88925819, "epoch": 0.962012292529716, "grad_norm": 7.034298419952393, "learning_rate": 9.040307148109303e-06, "loss": 0.54568624, "memory(GiB)": 34.88, "step": 35530, "train_speed(iter/s)": 0.413607 }, { "acc": 0.88326979, "epoch": 0.9621476728129316, "grad_norm": 7.99718713760376, "learning_rate": 9.039977470109125e-06, "loss": 0.53318424, "memory(GiB)": 34.88, "step": 35535, "train_speed(iter/s)": 0.41361 }, { "acc": 0.87457752, "epoch": 0.962283053096147, "grad_norm": 6.423867702484131, "learning_rate": 9.039647741506555e-06, "loss": 0.72145882, "memory(GiB)": 34.88, "step": 35540, "train_speed(iter/s)": 0.413613 }, { "acc": 0.88862371, "epoch": 0.9624184333793626, "grad_norm": 5.612811088562012, "learning_rate": 9.039317962305722e-06, "loss": 0.54558291, "memory(GiB)": 34.88, "step": 35545, "train_speed(iter/s)": 0.413616 }, { "acc": 0.86076612, "epoch": 0.9625538136625782, "grad_norm": 7.002873420715332, "learning_rate": 9.038988132510759e-06, "loss": 0.70452433, "memory(GiB)": 34.88, "step": 35550, "train_speed(iter/s)": 0.413618 }, { "acc": 0.88178015, "epoch": 0.9626891939457938, "grad_norm": 6.331031799316406, "learning_rate": 9.038658252125795e-06, "loss": 0.61551251, "memory(GiB)": 34.88, "step": 35555, "train_speed(iter/s)": 0.413621 }, { "acc": 0.87394886, "epoch": 0.9628245742290092, "grad_norm": 20.086822509765625, "learning_rate": 9.038328321154964e-06, "loss": 0.58039193, "memory(GiB)": 34.88, "step": 35560, "train_speed(iter/s)": 0.413623 }, { "acc": 0.88179798, "epoch": 0.9629599545122248, "grad_norm": 6.676421165466309, "learning_rate": 9.0379983396024e-06, "loss": 0.68914118, "memory(GiB)": 34.88, "step": 35565, "train_speed(iter/s)": 0.413626 }, { "acc": 0.87296906, "epoch": 0.9630953347954404, "grad_norm": 5.63956880569458, "learning_rate": 9.037668307472235e-06, "loss": 0.58925867, "memory(GiB)": 34.88, "step": 35570, "train_speed(iter/s)": 0.413629 }, { "acc": 0.8763463, "epoch": 0.963230715078656, "grad_norm": 15.36120891571045, "learning_rate": 9.037338224768603e-06, "loss": 0.63702831, "memory(GiB)": 34.88, "step": 35575, "train_speed(iter/s)": 0.413631 }, { "acc": 0.89300356, "epoch": 0.9633660953618715, "grad_norm": 4.782345294952393, "learning_rate": 9.037008091495642e-06, "loss": 0.54441714, "memory(GiB)": 34.88, "step": 35580, "train_speed(iter/s)": 0.413634 }, { "acc": 0.88479538, "epoch": 0.963501475645087, "grad_norm": 9.501346588134766, "learning_rate": 9.036677907657484e-06, "loss": 0.76542072, "memory(GiB)": 34.88, "step": 35585, "train_speed(iter/s)": 0.413636 }, { "acc": 0.84329872, "epoch": 0.9636368559283026, "grad_norm": 6.583562850952148, "learning_rate": 9.036347673258267e-06, "loss": 0.73047085, "memory(GiB)": 34.88, "step": 35590, "train_speed(iter/s)": 0.413639 }, { "acc": 0.86710243, "epoch": 0.9637722362115182, "grad_norm": 33.699039459228516, "learning_rate": 9.036017388302127e-06, "loss": 0.77846441, "memory(GiB)": 34.88, "step": 35595, "train_speed(iter/s)": 0.413642 }, { "acc": 0.88935318, "epoch": 0.9639076164947337, "grad_norm": 5.580628871917725, "learning_rate": 9.035687052793202e-06, "loss": 0.56457891, "memory(GiB)": 34.88, "step": 35600, "train_speed(iter/s)": 0.413645 }, { "acc": 0.87382793, "epoch": 0.9640429967779492, "grad_norm": 16.490100860595703, "learning_rate": 9.035356666735631e-06, "loss": 0.63834271, "memory(GiB)": 34.88, "step": 35605, "train_speed(iter/s)": 0.413647 }, { "acc": 0.86691914, "epoch": 0.9641783770611648, "grad_norm": 8.318679809570312, "learning_rate": 9.03502623013355e-06, "loss": 0.7744206, "memory(GiB)": 34.88, "step": 35610, "train_speed(iter/s)": 0.41365 }, { "acc": 0.88801928, "epoch": 0.9643137573443804, "grad_norm": 4.236145973205566, "learning_rate": 9.034695742991102e-06, "loss": 0.49893932, "memory(GiB)": 34.88, "step": 35615, "train_speed(iter/s)": 0.413653 }, { "acc": 0.86953554, "epoch": 0.9644491376275959, "grad_norm": 8.494354248046875, "learning_rate": 9.034365205312425e-06, "loss": 0.73510165, "memory(GiB)": 34.88, "step": 35620, "train_speed(iter/s)": 0.413655 }, { "acc": 0.8626339, "epoch": 0.9645845179108115, "grad_norm": 11.765525817871094, "learning_rate": 9.034034617101659e-06, "loss": 0.74691195, "memory(GiB)": 34.88, "step": 35625, "train_speed(iter/s)": 0.413658 }, { "acc": 0.87529364, "epoch": 0.964719898194027, "grad_norm": 11.998711585998535, "learning_rate": 9.033703978362947e-06, "loss": 0.61456099, "memory(GiB)": 34.88, "step": 35630, "train_speed(iter/s)": 0.41366 }, { "acc": 0.8762702, "epoch": 0.9648552784772426, "grad_norm": 12.144652366638184, "learning_rate": 9.033373289100427e-06, "loss": 0.56274662, "memory(GiB)": 34.88, "step": 35635, "train_speed(iter/s)": 0.413663 }, { "acc": 0.89893847, "epoch": 0.9649906587604581, "grad_norm": 12.187591552734375, "learning_rate": 9.033042549318249e-06, "loss": 0.56085033, "memory(GiB)": 34.88, "step": 35640, "train_speed(iter/s)": 0.413666 }, { "acc": 0.88491869, "epoch": 0.9651260390436737, "grad_norm": 13.896379470825195, "learning_rate": 9.032711759020548e-06, "loss": 0.65930691, "memory(GiB)": 34.88, "step": 35645, "train_speed(iter/s)": 0.413668 }, { "acc": 0.88248291, "epoch": 0.9652614193268892, "grad_norm": 16.744564056396484, "learning_rate": 9.032380918211473e-06, "loss": 0.61637673, "memory(GiB)": 34.88, "step": 35650, "train_speed(iter/s)": 0.413672 }, { "acc": 0.907726, "epoch": 0.9653967996101048, "grad_norm": 6.941861629486084, "learning_rate": 9.032050026895166e-06, "loss": 0.45621214, "memory(GiB)": 34.88, "step": 35655, "train_speed(iter/s)": 0.413675 }, { "acc": 0.87417068, "epoch": 0.9655321798933203, "grad_norm": 7.300917625427246, "learning_rate": 9.031719085075775e-06, "loss": 0.69864893, "memory(GiB)": 34.88, "step": 35660, "train_speed(iter/s)": 0.413677 }, { "acc": 0.89597282, "epoch": 0.9656675601765359, "grad_norm": 7.392394542694092, "learning_rate": 9.03138809275744e-06, "loss": 0.45742888, "memory(GiB)": 34.88, "step": 35665, "train_speed(iter/s)": 0.41368 }, { "acc": 0.883634, "epoch": 0.9658029404597515, "grad_norm": 6.887449741363525, "learning_rate": 9.031057049944313e-06, "loss": 0.61637497, "memory(GiB)": 34.88, "step": 35670, "train_speed(iter/s)": 0.413683 }, { "acc": 0.88704548, "epoch": 0.965938320742967, "grad_norm": 12.521805763244629, "learning_rate": 9.030725956640539e-06, "loss": 0.59276314, "memory(GiB)": 34.88, "step": 35675, "train_speed(iter/s)": 0.413686 }, { "acc": 0.87738323, "epoch": 0.9660737010261825, "grad_norm": 8.822442054748535, "learning_rate": 9.030394812850266e-06, "loss": 0.67927418, "memory(GiB)": 34.88, "step": 35680, "train_speed(iter/s)": 0.413689 }, { "acc": 0.88848286, "epoch": 0.9662090813093981, "grad_norm": 7.846463680267334, "learning_rate": 9.030063618577642e-06, "loss": 0.57829313, "memory(GiB)": 34.88, "step": 35685, "train_speed(iter/s)": 0.413692 }, { "acc": 0.88481064, "epoch": 0.9663444615926137, "grad_norm": 7.798151969909668, "learning_rate": 9.029732373826816e-06, "loss": 0.62746859, "memory(GiB)": 34.88, "step": 35690, "train_speed(iter/s)": 0.413694 }, { "acc": 0.89297066, "epoch": 0.9664798418758292, "grad_norm": 7.535812854766846, "learning_rate": 9.029401078601935e-06, "loss": 0.52798462, "memory(GiB)": 34.88, "step": 35695, "train_speed(iter/s)": 0.413697 }, { "acc": 0.89714451, "epoch": 0.9666152221590447, "grad_norm": 7.368092060089111, "learning_rate": 9.029069732907152e-06, "loss": 0.51092615, "memory(GiB)": 34.88, "step": 35700, "train_speed(iter/s)": 0.413699 }, { "acc": 0.90419254, "epoch": 0.9667506024422603, "grad_norm": 4.6170654296875, "learning_rate": 9.028738336746616e-06, "loss": 0.44265471, "memory(GiB)": 34.88, "step": 35705, "train_speed(iter/s)": 0.413703 }, { "acc": 0.8865633, "epoch": 0.9668859827254759, "grad_norm": 5.849545955657959, "learning_rate": 9.028406890124481e-06, "loss": 0.56675978, "memory(GiB)": 34.88, "step": 35710, "train_speed(iter/s)": 0.413705 }, { "acc": 0.88875208, "epoch": 0.9670213630086915, "grad_norm": 8.662610054016113, "learning_rate": 9.028075393044898e-06, "loss": 0.63365002, "memory(GiB)": 34.88, "step": 35715, "train_speed(iter/s)": 0.413708 }, { "acc": 0.88327503, "epoch": 0.9671567432919069, "grad_norm": 10.302311897277832, "learning_rate": 9.027743845512018e-06, "loss": 0.6255466, "memory(GiB)": 34.88, "step": 35720, "train_speed(iter/s)": 0.413711 }, { "acc": 0.86513472, "epoch": 0.9672921235751225, "grad_norm": 6.487624645233154, "learning_rate": 9.027412247529994e-06, "loss": 0.62015958, "memory(GiB)": 34.88, "step": 35725, "train_speed(iter/s)": 0.413714 }, { "acc": 0.87917976, "epoch": 0.9674275038583381, "grad_norm": 7.360928058624268, "learning_rate": 9.027080599102982e-06, "loss": 0.60038195, "memory(GiB)": 34.88, "step": 35730, "train_speed(iter/s)": 0.413716 }, { "acc": 0.8753643, "epoch": 0.9675628841415537, "grad_norm": 8.817303657531738, "learning_rate": 9.026748900235137e-06, "loss": 0.55467644, "memory(GiB)": 34.88, "step": 35735, "train_speed(iter/s)": 0.413719 }, { "acc": 0.87902193, "epoch": 0.9676982644247691, "grad_norm": 7.3424553871154785, "learning_rate": 9.026417150930612e-06, "loss": 0.64980989, "memory(GiB)": 34.88, "step": 35740, "train_speed(iter/s)": 0.413721 }, { "acc": 0.87813358, "epoch": 0.9678336447079847, "grad_norm": 9.771677017211914, "learning_rate": 9.026085351193564e-06, "loss": 0.60926714, "memory(GiB)": 34.88, "step": 35745, "train_speed(iter/s)": 0.413724 }, { "acc": 0.89704514, "epoch": 0.9679690249912003, "grad_norm": 8.990026473999023, "learning_rate": 9.02575350102815e-06, "loss": 0.57539463, "memory(GiB)": 34.88, "step": 35750, "train_speed(iter/s)": 0.413727 }, { "acc": 0.85508432, "epoch": 0.9681044052744159, "grad_norm": 10.034257888793945, "learning_rate": 9.025421600438525e-06, "loss": 0.69935989, "memory(GiB)": 34.88, "step": 35755, "train_speed(iter/s)": 0.41373 }, { "acc": 0.88344803, "epoch": 0.9682397855576313, "grad_norm": 9.193358421325684, "learning_rate": 9.025089649428847e-06, "loss": 0.5663126, "memory(GiB)": 34.88, "step": 35760, "train_speed(iter/s)": 0.413733 }, { "acc": 0.88949919, "epoch": 0.9683751658408469, "grad_norm": 14.62154483795166, "learning_rate": 9.024757648003279e-06, "loss": 0.57960081, "memory(GiB)": 34.88, "step": 35765, "train_speed(iter/s)": 0.413735 }, { "acc": 0.88159771, "epoch": 0.9685105461240625, "grad_norm": 6.798493385314941, "learning_rate": 9.024425596165972e-06, "loss": 0.60015783, "memory(GiB)": 34.88, "step": 35770, "train_speed(iter/s)": 0.413738 }, { "acc": 0.86969604, "epoch": 0.9686459264072781, "grad_norm": 15.564590454101562, "learning_rate": 9.024093493921093e-06, "loss": 0.74647727, "memory(GiB)": 34.88, "step": 35775, "train_speed(iter/s)": 0.413741 }, { "acc": 0.89414196, "epoch": 0.9687813066904936, "grad_norm": 5.182580947875977, "learning_rate": 9.023761341272798e-06, "loss": 0.60424509, "memory(GiB)": 34.88, "step": 35780, "train_speed(iter/s)": 0.413744 }, { "acc": 0.88399649, "epoch": 0.9689166869737091, "grad_norm": 7.780737400054932, "learning_rate": 9.02342913822525e-06, "loss": 0.62972965, "memory(GiB)": 34.88, "step": 35785, "train_speed(iter/s)": 0.413746 }, { "acc": 0.90035458, "epoch": 0.9690520672569247, "grad_norm": 4.338315963745117, "learning_rate": 9.023096884782608e-06, "loss": 0.50570669, "memory(GiB)": 34.88, "step": 35790, "train_speed(iter/s)": 0.413749 }, { "acc": 0.86085825, "epoch": 0.9691874475401403, "grad_norm": 15.817301750183105, "learning_rate": 9.022764580949035e-06, "loss": 0.82682171, "memory(GiB)": 34.88, "step": 35795, "train_speed(iter/s)": 0.413752 }, { "acc": 0.8583498, "epoch": 0.9693228278233558, "grad_norm": 10.894893646240234, "learning_rate": 9.022432226728697e-06, "loss": 0.78125648, "memory(GiB)": 34.88, "step": 35800, "train_speed(iter/s)": 0.413755 }, { "acc": 0.88833199, "epoch": 0.9694582081065714, "grad_norm": 6.132876396179199, "learning_rate": 9.022099822125754e-06, "loss": 0.61708832, "memory(GiB)": 34.88, "step": 35805, "train_speed(iter/s)": 0.413758 }, { "acc": 0.86181011, "epoch": 0.9695935883897869, "grad_norm": 6.548873424530029, "learning_rate": 9.021767367144371e-06, "loss": 0.73764863, "memory(GiB)": 34.88, "step": 35810, "train_speed(iter/s)": 0.41376 }, { "acc": 0.87379246, "epoch": 0.9697289686730025, "grad_norm": 9.244726181030273, "learning_rate": 9.021434861788711e-06, "loss": 0.67875752, "memory(GiB)": 34.88, "step": 35815, "train_speed(iter/s)": 0.413763 }, { "acc": 0.86639566, "epoch": 0.969864348956218, "grad_norm": 5.433780670166016, "learning_rate": 9.02110230606294e-06, "loss": 0.68686228, "memory(GiB)": 34.88, "step": 35820, "train_speed(iter/s)": 0.413766 }, { "acc": 0.88770485, "epoch": 0.9699997292394336, "grad_norm": 9.047618865966797, "learning_rate": 9.020769699971226e-06, "loss": 0.62458229, "memory(GiB)": 34.88, "step": 35825, "train_speed(iter/s)": 0.413768 }, { "acc": 0.90933008, "epoch": 0.9701351095226491, "grad_norm": 9.319609642028809, "learning_rate": 9.020437043517734e-06, "loss": 0.49447775, "memory(GiB)": 34.88, "step": 35830, "train_speed(iter/s)": 0.413771 }, { "acc": 0.87907276, "epoch": 0.9702704898058647, "grad_norm": 9.448859214782715, "learning_rate": 9.020104336706633e-06, "loss": 0.59420223, "memory(GiB)": 34.88, "step": 35835, "train_speed(iter/s)": 0.413774 }, { "acc": 0.88715448, "epoch": 0.9704058700890802, "grad_norm": 4.399376392364502, "learning_rate": 9.019771579542087e-06, "loss": 0.58355427, "memory(GiB)": 34.88, "step": 35840, "train_speed(iter/s)": 0.413777 }, { "acc": 0.87559252, "epoch": 0.9705412503722958, "grad_norm": 7.185961723327637, "learning_rate": 9.019438772028269e-06, "loss": 0.59982948, "memory(GiB)": 34.88, "step": 35845, "train_speed(iter/s)": 0.413779 }, { "acc": 0.88287525, "epoch": 0.9706766306555114, "grad_norm": 11.229613304138184, "learning_rate": 9.019105914169345e-06, "loss": 0.55888319, "memory(GiB)": 34.88, "step": 35850, "train_speed(iter/s)": 0.413782 }, { "acc": 0.88299952, "epoch": 0.9708120109387269, "grad_norm": 15.92629337310791, "learning_rate": 9.018773005969484e-06, "loss": 0.63480072, "memory(GiB)": 34.88, "step": 35855, "train_speed(iter/s)": 0.413785 }, { "acc": 0.89723864, "epoch": 0.9709473912219424, "grad_norm": 6.182407379150391, "learning_rate": 9.018440047432857e-06, "loss": 0.51889739, "memory(GiB)": 34.88, "step": 35860, "train_speed(iter/s)": 0.413788 }, { "acc": 0.90151844, "epoch": 0.971082771505158, "grad_norm": 5.572798728942871, "learning_rate": 9.018107038563636e-06, "loss": 0.52978129, "memory(GiB)": 34.88, "step": 35865, "train_speed(iter/s)": 0.41379 }, { "acc": 0.89580679, "epoch": 0.9712181517883736, "grad_norm": 7.195511817932129, "learning_rate": 9.017773979365995e-06, "loss": 0.59459381, "memory(GiB)": 34.88, "step": 35870, "train_speed(iter/s)": 0.413794 }, { "acc": 0.87262268, "epoch": 0.9713535320715891, "grad_norm": 7.055515289306641, "learning_rate": 9.0174408698441e-06, "loss": 0.64109039, "memory(GiB)": 34.88, "step": 35875, "train_speed(iter/s)": 0.413796 }, { "acc": 0.89350681, "epoch": 0.9714889123548046, "grad_norm": 8.359333038330078, "learning_rate": 9.01710771000213e-06, "loss": 0.46625586, "memory(GiB)": 34.88, "step": 35880, "train_speed(iter/s)": 0.413799 }, { "acc": 0.86802883, "epoch": 0.9716242926380202, "grad_norm": 10.294144630432129, "learning_rate": 9.016774499844254e-06, "loss": 0.70364594, "memory(GiB)": 34.88, "step": 35885, "train_speed(iter/s)": 0.413802 }, { "acc": 0.87387371, "epoch": 0.9717596729212358, "grad_norm": 5.07326078414917, "learning_rate": 9.016441239374651e-06, "loss": 0.64486041, "memory(GiB)": 34.88, "step": 35890, "train_speed(iter/s)": 0.413804 }, { "acc": 0.89281893, "epoch": 0.9718950532044514, "grad_norm": 9.310596466064453, "learning_rate": 9.016107928597488e-06, "loss": 0.51137176, "memory(GiB)": 34.88, "step": 35895, "train_speed(iter/s)": 0.413807 }, { "acc": 0.8772974, "epoch": 0.9720304334876668, "grad_norm": 7.35970401763916, "learning_rate": 9.015774567516947e-06, "loss": 0.68947821, "memory(GiB)": 34.88, "step": 35900, "train_speed(iter/s)": 0.41381 }, { "acc": 0.87123222, "epoch": 0.9721658137708824, "grad_norm": 9.113580703735352, "learning_rate": 9.015441156137202e-06, "loss": 0.68665314, "memory(GiB)": 34.88, "step": 35905, "train_speed(iter/s)": 0.413813 }, { "acc": 0.89316635, "epoch": 0.972301194054098, "grad_norm": 8.754266738891602, "learning_rate": 9.015107694462432e-06, "loss": 0.53798771, "memory(GiB)": 34.88, "step": 35910, "train_speed(iter/s)": 0.413816 }, { "acc": 0.88328238, "epoch": 0.9724365743373136, "grad_norm": 6.081780433654785, "learning_rate": 9.014774182496809e-06, "loss": 0.58153391, "memory(GiB)": 34.88, "step": 35915, "train_speed(iter/s)": 0.413818 }, { "acc": 0.87471085, "epoch": 0.972571954620529, "grad_norm": 6.5727643966674805, "learning_rate": 9.014440620244516e-06, "loss": 0.67683902, "memory(GiB)": 34.88, "step": 35920, "train_speed(iter/s)": 0.413821 }, { "acc": 0.89288225, "epoch": 0.9727073349037446, "grad_norm": 10.834273338317871, "learning_rate": 9.014107007709728e-06, "loss": 0.54208646, "memory(GiB)": 34.88, "step": 35925, "train_speed(iter/s)": 0.413823 }, { "acc": 0.85786772, "epoch": 0.9728427151869602, "grad_norm": 10.496321678161621, "learning_rate": 9.013773344896625e-06, "loss": 0.74276447, "memory(GiB)": 34.88, "step": 35930, "train_speed(iter/s)": 0.413825 }, { "acc": 0.85554695, "epoch": 0.9729780954701758, "grad_norm": 27.766429901123047, "learning_rate": 9.01343963180939e-06, "loss": 0.75203028, "memory(GiB)": 34.88, "step": 35935, "train_speed(iter/s)": 0.413828 }, { "acc": 0.86600761, "epoch": 0.9731134757533912, "grad_norm": 22.257240295410156, "learning_rate": 9.013105868452199e-06, "loss": 0.75521936, "memory(GiB)": 34.88, "step": 35940, "train_speed(iter/s)": 0.413831 }, { "acc": 0.86790104, "epoch": 0.9732488560366068, "grad_norm": 11.542081832885742, "learning_rate": 9.012772054829232e-06, "loss": 0.69217257, "memory(GiB)": 34.88, "step": 35945, "train_speed(iter/s)": 0.413833 }, { "acc": 0.87477684, "epoch": 0.9733842363198224, "grad_norm": 18.830020904541016, "learning_rate": 9.012438190944675e-06, "loss": 0.60932584, "memory(GiB)": 34.88, "step": 35950, "train_speed(iter/s)": 0.413836 }, { "acc": 0.87870026, "epoch": 0.973519616603038, "grad_norm": 4.592326641082764, "learning_rate": 9.01210427680271e-06, "loss": 0.54760714, "memory(GiB)": 34.88, "step": 35955, "train_speed(iter/s)": 0.413838 }, { "acc": 0.8847517, "epoch": 0.9736549968862535, "grad_norm": 10.446840286254883, "learning_rate": 9.01177031240752e-06, "loss": 0.58050718, "memory(GiB)": 34.88, "step": 35960, "train_speed(iter/s)": 0.413841 }, { "acc": 0.87007961, "epoch": 0.973790377169469, "grad_norm": 11.208595275878906, "learning_rate": 9.011436297763284e-06, "loss": 0.71648607, "memory(GiB)": 34.88, "step": 35965, "train_speed(iter/s)": 0.413844 }, { "acc": 0.88665848, "epoch": 0.9739257574526846, "grad_norm": 8.336403846740723, "learning_rate": 9.011102232874192e-06, "loss": 0.49878855, "memory(GiB)": 34.88, "step": 35970, "train_speed(iter/s)": 0.413847 }, { "acc": 0.89717407, "epoch": 0.9740611377359002, "grad_norm": 25.57662010192871, "learning_rate": 9.010768117744425e-06, "loss": 0.50965376, "memory(GiB)": 34.88, "step": 35975, "train_speed(iter/s)": 0.41385 }, { "acc": 0.88596201, "epoch": 0.9741965180191157, "grad_norm": 9.830710411071777, "learning_rate": 9.010433952378169e-06, "loss": 0.56398296, "memory(GiB)": 34.88, "step": 35980, "train_speed(iter/s)": 0.413853 }, { "acc": 0.86648769, "epoch": 0.9743318983023312, "grad_norm": 14.25625991821289, "learning_rate": 9.010099736779611e-06, "loss": 0.67923708, "memory(GiB)": 34.88, "step": 35985, "train_speed(iter/s)": 0.413855 }, { "acc": 0.90418873, "epoch": 0.9744672785855468, "grad_norm": 6.440884113311768, "learning_rate": 9.009765470952938e-06, "loss": 0.44653702, "memory(GiB)": 34.88, "step": 35990, "train_speed(iter/s)": 0.413857 }, { "acc": 0.8822176, "epoch": 0.9746026588687624, "grad_norm": 21.38611602783203, "learning_rate": 9.009431154902338e-06, "loss": 0.72024965, "memory(GiB)": 34.88, "step": 35995, "train_speed(iter/s)": 0.41386 }, { "acc": 0.88888721, "epoch": 0.9747380391519779, "grad_norm": 7.87675142288208, "learning_rate": 9.009096788631996e-06, "loss": 0.5680654, "memory(GiB)": 34.88, "step": 36000, "train_speed(iter/s)": 0.413863 }, { "acc": 0.891996, "epoch": 0.9748734194351935, "grad_norm": 4.784090518951416, "learning_rate": 9.008762372146103e-06, "loss": 0.49843445, "memory(GiB)": 34.88, "step": 36005, "train_speed(iter/s)": 0.413865 }, { "acc": 0.88387222, "epoch": 0.975008799718409, "grad_norm": 4.874525547027588, "learning_rate": 9.008427905448846e-06, "loss": 0.60821285, "memory(GiB)": 34.88, "step": 36010, "train_speed(iter/s)": 0.413868 }, { "acc": 0.88835716, "epoch": 0.9751441800016246, "grad_norm": 10.792243957519531, "learning_rate": 9.008093388544418e-06, "loss": 0.54675808, "memory(GiB)": 34.88, "step": 36015, "train_speed(iter/s)": 0.413871 }, { "acc": 0.85375509, "epoch": 0.9752795602848401, "grad_norm": 8.286308288574219, "learning_rate": 9.007758821437008e-06, "loss": 0.76749916, "memory(GiB)": 34.88, "step": 36020, "train_speed(iter/s)": 0.413873 }, { "acc": 0.86232309, "epoch": 0.9754149405680557, "grad_norm": 14.272178649902344, "learning_rate": 9.007424204130805e-06, "loss": 0.87135534, "memory(GiB)": 34.88, "step": 36025, "train_speed(iter/s)": 0.413875 }, { "acc": 0.87337008, "epoch": 0.9755503208512712, "grad_norm": 14.291016578674316, "learning_rate": 9.007089536630008e-06, "loss": 0.74457874, "memory(GiB)": 34.88, "step": 36030, "train_speed(iter/s)": 0.413878 }, { "acc": 0.8704092, "epoch": 0.9756857011344868, "grad_norm": 4.3456573486328125, "learning_rate": 9.006754818938798e-06, "loss": 0.71213932, "memory(GiB)": 34.88, "step": 36035, "train_speed(iter/s)": 0.413881 }, { "acc": 0.9012064, "epoch": 0.9758210814177023, "grad_norm": 6.078421592712402, "learning_rate": 9.006420051061378e-06, "loss": 0.46497507, "memory(GiB)": 34.88, "step": 36040, "train_speed(iter/s)": 0.413884 }, { "acc": 0.89255352, "epoch": 0.9759564617009179, "grad_norm": 6.915072441101074, "learning_rate": 9.006085233001934e-06, "loss": 0.56371017, "memory(GiB)": 34.88, "step": 36045, "train_speed(iter/s)": 0.413887 }, { "acc": 0.84899731, "epoch": 0.9760918419841335, "grad_norm": 11.000041007995605, "learning_rate": 9.005750364764668e-06, "loss": 0.83620329, "memory(GiB)": 34.88, "step": 36050, "train_speed(iter/s)": 0.413889 }, { "acc": 0.85831432, "epoch": 0.976227222267349, "grad_norm": 10.393198013305664, "learning_rate": 9.005415446353769e-06, "loss": 0.67581806, "memory(GiB)": 34.88, "step": 36055, "train_speed(iter/s)": 0.413892 }, { "acc": 0.87711334, "epoch": 0.9763626025505645, "grad_norm": 9.136651992797852, "learning_rate": 9.005080477773437e-06, "loss": 0.61398063, "memory(GiB)": 34.88, "step": 36060, "train_speed(iter/s)": 0.413895 }, { "acc": 0.87401714, "epoch": 0.9764979828337801, "grad_norm": 9.394640922546387, "learning_rate": 9.00474545902786e-06, "loss": 0.65597978, "memory(GiB)": 34.88, "step": 36065, "train_speed(iter/s)": 0.413897 }, { "acc": 0.88713617, "epoch": 0.9766333631169957, "grad_norm": 9.033208847045898, "learning_rate": 9.004410390121245e-06, "loss": 0.59105234, "memory(GiB)": 34.88, "step": 36070, "train_speed(iter/s)": 0.4139 }, { "acc": 0.88106747, "epoch": 0.9767687434002112, "grad_norm": 14.77502727508545, "learning_rate": 9.004075271057783e-06, "loss": 0.63468761, "memory(GiB)": 34.88, "step": 36075, "train_speed(iter/s)": 0.413903 }, { "acc": 0.88578835, "epoch": 0.9769041236834267, "grad_norm": 8.554184913635254, "learning_rate": 9.003740101841674e-06, "loss": 0.64040709, "memory(GiB)": 34.88, "step": 36080, "train_speed(iter/s)": 0.413906 }, { "acc": 0.87380285, "epoch": 0.9770395039666423, "grad_norm": 6.378628730773926, "learning_rate": 9.003404882477117e-06, "loss": 0.63212719, "memory(GiB)": 34.88, "step": 36085, "train_speed(iter/s)": 0.413909 }, { "acc": 0.88922768, "epoch": 0.9771748842498579, "grad_norm": 11.932404518127441, "learning_rate": 9.00306961296831e-06, "loss": 0.52819471, "memory(GiB)": 34.88, "step": 36090, "train_speed(iter/s)": 0.413912 }, { "acc": 0.8840868, "epoch": 0.9773102645330735, "grad_norm": 9.808570861816406, "learning_rate": 9.002734293319452e-06, "loss": 0.57160788, "memory(GiB)": 34.88, "step": 36095, "train_speed(iter/s)": 0.413914 }, { "acc": 0.88222733, "epoch": 0.9774456448162889, "grad_norm": 4.405440330505371, "learning_rate": 9.002398923534747e-06, "loss": 0.68067193, "memory(GiB)": 34.88, "step": 36100, "train_speed(iter/s)": 0.413916 }, { "acc": 0.88376417, "epoch": 0.9775810250995045, "grad_norm": 7.809818267822266, "learning_rate": 9.002063503618394e-06, "loss": 0.65171657, "memory(GiB)": 34.88, "step": 36105, "train_speed(iter/s)": 0.413919 }, { "acc": 0.903405, "epoch": 0.9777164053827201, "grad_norm": 11.198527336120605, "learning_rate": 9.001728033574594e-06, "loss": 0.43360176, "memory(GiB)": 34.88, "step": 36110, "train_speed(iter/s)": 0.413922 }, { "acc": 0.88377724, "epoch": 0.9778517856659356, "grad_norm": 6.092072010040283, "learning_rate": 9.001392513407552e-06, "loss": 0.60528836, "memory(GiB)": 34.88, "step": 36115, "train_speed(iter/s)": 0.413924 }, { "acc": 0.90251656, "epoch": 0.9779871659491511, "grad_norm": 7.497637748718262, "learning_rate": 9.001056943121468e-06, "loss": 0.5083622, "memory(GiB)": 34.88, "step": 36120, "train_speed(iter/s)": 0.413927 }, { "acc": 0.89188118, "epoch": 0.9781225462323667, "grad_norm": 6.083991050720215, "learning_rate": 9.00072132272055e-06, "loss": 0.53626075, "memory(GiB)": 34.88, "step": 36125, "train_speed(iter/s)": 0.41393 }, { "acc": 0.87083321, "epoch": 0.9782579265155823, "grad_norm": 6.785192966461182, "learning_rate": 9.000385652208996e-06, "loss": 0.71804104, "memory(GiB)": 34.88, "step": 36130, "train_speed(iter/s)": 0.413933 }, { "acc": 0.8995532, "epoch": 0.9783933067987978, "grad_norm": 8.547418594360352, "learning_rate": 9.000049931591017e-06, "loss": 0.49534874, "memory(GiB)": 34.88, "step": 36135, "train_speed(iter/s)": 0.413936 }, { "acc": 0.91131754, "epoch": 0.9785286870820133, "grad_norm": 7.996634006500244, "learning_rate": 8.999714160870814e-06, "loss": 0.42193279, "memory(GiB)": 34.88, "step": 36140, "train_speed(iter/s)": 0.413938 }, { "acc": 0.86665497, "epoch": 0.9786640673652289, "grad_norm": 12.680231094360352, "learning_rate": 8.999378340052599e-06, "loss": 0.74683952, "memory(GiB)": 34.88, "step": 36145, "train_speed(iter/s)": 0.413941 }, { "acc": 0.85327787, "epoch": 0.9787994476484445, "grad_norm": 8.749249458312988, "learning_rate": 8.999042469140571e-06, "loss": 0.82036839, "memory(GiB)": 34.88, "step": 36150, "train_speed(iter/s)": 0.413944 }, { "acc": 0.9110219, "epoch": 0.97893482793166, "grad_norm": 6.614302635192871, "learning_rate": 8.998706548138943e-06, "loss": 0.52253528, "memory(GiB)": 34.88, "step": 36155, "train_speed(iter/s)": 0.413947 }, { "acc": 0.88522272, "epoch": 0.9790702082148756, "grad_norm": 8.377596855163574, "learning_rate": 8.998370577051922e-06, "loss": 0.61088829, "memory(GiB)": 34.88, "step": 36160, "train_speed(iter/s)": 0.413949 }, { "acc": 0.87854872, "epoch": 0.9792055884980911, "grad_norm": 7.784817695617676, "learning_rate": 8.998034555883714e-06, "loss": 0.59183102, "memory(GiB)": 34.88, "step": 36165, "train_speed(iter/s)": 0.413952 }, { "acc": 0.88035975, "epoch": 0.9793409687813067, "grad_norm": 6.1544294357299805, "learning_rate": 8.997698484638534e-06, "loss": 0.6089695, "memory(GiB)": 34.88, "step": 36170, "train_speed(iter/s)": 0.413955 }, { "acc": 0.87549782, "epoch": 0.9794763490645222, "grad_norm": 15.828048706054688, "learning_rate": 8.997362363320585e-06, "loss": 0.69379563, "memory(GiB)": 34.88, "step": 36175, "train_speed(iter/s)": 0.413958 }, { "acc": 0.89237051, "epoch": 0.9796117293477378, "grad_norm": 6.340631484985352, "learning_rate": 8.997026191934083e-06, "loss": 0.54071894, "memory(GiB)": 34.88, "step": 36180, "train_speed(iter/s)": 0.413961 }, { "acc": 0.88551474, "epoch": 0.9797471096309534, "grad_norm": 7.569888114929199, "learning_rate": 8.996689970483237e-06, "loss": 0.57244954, "memory(GiB)": 34.88, "step": 36185, "train_speed(iter/s)": 0.413964 }, { "acc": 0.86910267, "epoch": 0.9798824899141689, "grad_norm": 5.708816051483154, "learning_rate": 8.99635369897226e-06, "loss": 0.67866211, "memory(GiB)": 34.88, "step": 36190, "train_speed(iter/s)": 0.413967 }, { "acc": 0.87353458, "epoch": 0.9800178701973844, "grad_norm": 8.981793403625488, "learning_rate": 8.996017377405362e-06, "loss": 0.65074806, "memory(GiB)": 34.88, "step": 36195, "train_speed(iter/s)": 0.413969 }, { "acc": 0.87802887, "epoch": 0.9801532504806, "grad_norm": 4.915964603424072, "learning_rate": 8.995681005786759e-06, "loss": 0.65003877, "memory(GiB)": 34.88, "step": 36200, "train_speed(iter/s)": 0.413972 }, { "acc": 0.87620401, "epoch": 0.9802886307638156, "grad_norm": 8.293463706970215, "learning_rate": 8.995344584120662e-06, "loss": 0.67249808, "memory(GiB)": 34.88, "step": 36205, "train_speed(iter/s)": 0.413975 }, { "acc": 0.86457577, "epoch": 0.9804240110470311, "grad_norm": 14.76872730255127, "learning_rate": 8.995008112411287e-06, "loss": 0.78574777, "memory(GiB)": 34.88, "step": 36210, "train_speed(iter/s)": 0.413977 }, { "acc": 0.85884132, "epoch": 0.9805593913302466, "grad_norm": 6.579467296600342, "learning_rate": 8.994671590662848e-06, "loss": 0.65761476, "memory(GiB)": 34.88, "step": 36215, "train_speed(iter/s)": 0.41398 }, { "acc": 0.87168026, "epoch": 0.9806947716134622, "grad_norm": 10.890395164489746, "learning_rate": 8.994335018879563e-06, "loss": 0.79424028, "memory(GiB)": 34.88, "step": 36220, "train_speed(iter/s)": 0.413983 }, { "acc": 0.8884244, "epoch": 0.9808301518966778, "grad_norm": 8.162660598754883, "learning_rate": 8.993998397065646e-06, "loss": 0.57347097, "memory(GiB)": 34.88, "step": 36225, "train_speed(iter/s)": 0.413986 }, { "acc": 0.86857796, "epoch": 0.9809655321798934, "grad_norm": 12.810787200927734, "learning_rate": 8.993661725225315e-06, "loss": 0.6894372, "memory(GiB)": 34.88, "step": 36230, "train_speed(iter/s)": 0.413988 }, { "acc": 0.87283211, "epoch": 0.9811009124631088, "grad_norm": 7.5893988609313965, "learning_rate": 8.993325003362784e-06, "loss": 0.56488848, "memory(GiB)": 34.88, "step": 36235, "train_speed(iter/s)": 0.413991 }, { "acc": 0.86096544, "epoch": 0.9812362927463244, "grad_norm": 11.882905006408691, "learning_rate": 8.992988231482279e-06, "loss": 0.70571976, "memory(GiB)": 34.88, "step": 36240, "train_speed(iter/s)": 0.413994 }, { "acc": 0.86687622, "epoch": 0.98137167302954, "grad_norm": 23.586774826049805, "learning_rate": 8.992651409588008e-06, "loss": 0.75220156, "memory(GiB)": 34.88, "step": 36245, "train_speed(iter/s)": 0.413997 }, { "acc": 0.89431667, "epoch": 0.9815070533127556, "grad_norm": 9.899137496948242, "learning_rate": 8.992314537684201e-06, "loss": 0.50851893, "memory(GiB)": 34.88, "step": 36250, "train_speed(iter/s)": 0.413999 }, { "acc": 0.86900368, "epoch": 0.981642433595971, "grad_norm": 18.762155532836914, "learning_rate": 8.99197761577507e-06, "loss": 0.67262998, "memory(GiB)": 34.88, "step": 36255, "train_speed(iter/s)": 0.414002 }, { "acc": 0.86494617, "epoch": 0.9817778138791866, "grad_norm": 13.535562515258789, "learning_rate": 8.991640643864839e-06, "loss": 0.70930586, "memory(GiB)": 34.88, "step": 36260, "train_speed(iter/s)": 0.414005 }, { "acc": 0.89842701, "epoch": 0.9819131941624022, "grad_norm": 6.124141216278076, "learning_rate": 8.99130362195773e-06, "loss": 0.50461617, "memory(GiB)": 34.88, "step": 36265, "train_speed(iter/s)": 0.414008 }, { "acc": 0.89689369, "epoch": 0.9820485744456178, "grad_norm": 4.658792972564697, "learning_rate": 8.990966550057964e-06, "loss": 0.50864644, "memory(GiB)": 34.88, "step": 36270, "train_speed(iter/s)": 0.41401 }, { "acc": 0.88199015, "epoch": 0.9821839547288332, "grad_norm": 6.459408760070801, "learning_rate": 8.990629428169762e-06, "loss": 0.64033318, "memory(GiB)": 34.88, "step": 36275, "train_speed(iter/s)": 0.414013 }, { "acc": 0.86034203, "epoch": 0.9823193350120488, "grad_norm": 21.160049438476562, "learning_rate": 8.990292256297348e-06, "loss": 0.77894578, "memory(GiB)": 34.88, "step": 36280, "train_speed(iter/s)": 0.414016 }, { "acc": 0.90069046, "epoch": 0.9824547152952644, "grad_norm": 14.43486213684082, "learning_rate": 8.989955034444946e-06, "loss": 0.51676369, "memory(GiB)": 34.88, "step": 36285, "train_speed(iter/s)": 0.414019 }, { "acc": 0.89921465, "epoch": 0.98259009557848, "grad_norm": 8.063901901245117, "learning_rate": 8.98961776261678e-06, "loss": 0.59029684, "memory(GiB)": 34.88, "step": 36290, "train_speed(iter/s)": 0.414022 }, { "acc": 0.89281368, "epoch": 0.9827254758616955, "grad_norm": 15.049970626831055, "learning_rate": 8.989280440817078e-06, "loss": 0.53636799, "memory(GiB)": 34.88, "step": 36295, "train_speed(iter/s)": 0.414024 }, { "acc": 0.87900419, "epoch": 0.982860856144911, "grad_norm": 6.200411796569824, "learning_rate": 8.988943069050062e-06, "loss": 0.61995625, "memory(GiB)": 34.88, "step": 36300, "train_speed(iter/s)": 0.414027 }, { "acc": 0.87927017, "epoch": 0.9829962364281266, "grad_norm": 9.661389350891113, "learning_rate": 8.988605647319959e-06, "loss": 0.66503091, "memory(GiB)": 34.88, "step": 36305, "train_speed(iter/s)": 0.41403 }, { "acc": 0.86101465, "epoch": 0.9831316167113422, "grad_norm": 25.397422790527344, "learning_rate": 8.988268175630995e-06, "loss": 0.82499228, "memory(GiB)": 34.88, "step": 36310, "train_speed(iter/s)": 0.414032 }, { "acc": 0.88237658, "epoch": 0.9832669969945577, "grad_norm": 7.806065559387207, "learning_rate": 8.987930653987399e-06, "loss": 0.68710566, "memory(GiB)": 34.88, "step": 36315, "train_speed(iter/s)": 0.414035 }, { "acc": 0.88622417, "epoch": 0.9834023772777732, "grad_norm": 11.581782341003418, "learning_rate": 8.9875930823934e-06, "loss": 0.65643492, "memory(GiB)": 34.88, "step": 36320, "train_speed(iter/s)": 0.414038 }, { "acc": 0.89156437, "epoch": 0.9835377575609888, "grad_norm": 7.448645114898682, "learning_rate": 8.987255460853225e-06, "loss": 0.57520905, "memory(GiB)": 34.88, "step": 36325, "train_speed(iter/s)": 0.414041 }, { "acc": 0.86111784, "epoch": 0.9836731378442044, "grad_norm": 10.592337608337402, "learning_rate": 8.986917789371103e-06, "loss": 0.75440168, "memory(GiB)": 34.88, "step": 36330, "train_speed(iter/s)": 0.414043 }, { "acc": 0.88011599, "epoch": 0.9838085181274199, "grad_norm": 6.335819244384766, "learning_rate": 8.986580067951267e-06, "loss": 0.72032304, "memory(GiB)": 34.88, "step": 36335, "train_speed(iter/s)": 0.414046 }, { "acc": 0.86884632, "epoch": 0.9839438984106355, "grad_norm": 22.363365173339844, "learning_rate": 8.986242296597943e-06, "loss": 0.7121367, "memory(GiB)": 34.88, "step": 36340, "train_speed(iter/s)": 0.414049 }, { "acc": 0.89091053, "epoch": 0.984079278693851, "grad_norm": 8.080714225769043, "learning_rate": 8.985904475315367e-06, "loss": 0.52940373, "memory(GiB)": 34.88, "step": 36345, "train_speed(iter/s)": 0.414051 }, { "acc": 0.87404337, "epoch": 0.9842146589770666, "grad_norm": 9.712889671325684, "learning_rate": 8.985566604107767e-06, "loss": 0.64526978, "memory(GiB)": 34.88, "step": 36350, "train_speed(iter/s)": 0.414054 }, { "acc": 0.88332615, "epoch": 0.9843500392602821, "grad_norm": 12.017403602600098, "learning_rate": 8.985228682979377e-06, "loss": 0.65077219, "memory(GiB)": 34.88, "step": 36355, "train_speed(iter/s)": 0.414056 }, { "acc": 0.89045849, "epoch": 0.9844854195434977, "grad_norm": 9.430904388427734, "learning_rate": 8.984890711934432e-06, "loss": 0.60653753, "memory(GiB)": 34.88, "step": 36360, "train_speed(iter/s)": 0.414059 }, { "acc": 0.88892488, "epoch": 0.9846207998267132, "grad_norm": 14.954158782958984, "learning_rate": 8.984552690977163e-06, "loss": 0.60468097, "memory(GiB)": 34.88, "step": 36365, "train_speed(iter/s)": 0.414062 }, { "acc": 0.87998505, "epoch": 0.9847561801099288, "grad_norm": 7.837226390838623, "learning_rate": 8.984214620111804e-06, "loss": 0.54593849, "memory(GiB)": 34.88, "step": 36370, "train_speed(iter/s)": 0.414065 }, { "acc": 0.89477615, "epoch": 0.9848915603931443, "grad_norm": 4.315186023712158, "learning_rate": 8.983876499342594e-06, "loss": 0.51056809, "memory(GiB)": 34.88, "step": 36375, "train_speed(iter/s)": 0.414068 }, { "acc": 0.88296108, "epoch": 0.9850269406763599, "grad_norm": 70.5353012084961, "learning_rate": 8.983538328673766e-06, "loss": 0.55715022, "memory(GiB)": 34.88, "step": 36380, "train_speed(iter/s)": 0.41407 }, { "acc": 0.88269424, "epoch": 0.9851623209595755, "grad_norm": 7.426548004150391, "learning_rate": 8.983200108109553e-06, "loss": 0.62309823, "memory(GiB)": 34.88, "step": 36385, "train_speed(iter/s)": 0.414073 }, { "acc": 0.85826931, "epoch": 0.985297701242791, "grad_norm": 20.260034561157227, "learning_rate": 8.982861837654197e-06, "loss": 0.78781323, "memory(GiB)": 34.88, "step": 36390, "train_speed(iter/s)": 0.414076 }, { "acc": 0.88979015, "epoch": 0.9854330815260065, "grad_norm": 9.677817344665527, "learning_rate": 8.982523517311934e-06, "loss": 0.577526, "memory(GiB)": 34.88, "step": 36395, "train_speed(iter/s)": 0.414079 }, { "acc": 0.87182159, "epoch": 0.9855684618092221, "grad_norm": 8.36695384979248, "learning_rate": 8.982185147087001e-06, "loss": 0.69136763, "memory(GiB)": 34.88, "step": 36400, "train_speed(iter/s)": 0.414081 }, { "acc": 0.8924572, "epoch": 0.9857038420924377, "grad_norm": 12.048600196838379, "learning_rate": 8.98184672698364e-06, "loss": 0.57343102, "memory(GiB)": 34.88, "step": 36405, "train_speed(iter/s)": 0.414084 }, { "acc": 0.8844368, "epoch": 0.9858392223756532, "grad_norm": 8.651131629943848, "learning_rate": 8.981508257006087e-06, "loss": 0.62928762, "memory(GiB)": 34.88, "step": 36410, "train_speed(iter/s)": 0.414087 }, { "acc": 0.85937958, "epoch": 0.9859746026588687, "grad_norm": 10.61115837097168, "learning_rate": 8.981169737158581e-06, "loss": 0.75980172, "memory(GiB)": 34.88, "step": 36415, "train_speed(iter/s)": 0.414089 }, { "acc": 0.89722309, "epoch": 0.9861099829420843, "grad_norm": 14.184349060058594, "learning_rate": 8.980831167445365e-06, "loss": 0.60466709, "memory(GiB)": 34.88, "step": 36420, "train_speed(iter/s)": 0.414092 }, { "acc": 0.9190465, "epoch": 0.9862453632252999, "grad_norm": 14.101569175720215, "learning_rate": 8.98049254787068e-06, "loss": 0.49915156, "memory(GiB)": 34.88, "step": 36425, "train_speed(iter/s)": 0.414095 }, { "acc": 0.88627968, "epoch": 0.9863807435085155, "grad_norm": 11.818673133850098, "learning_rate": 8.980153878438768e-06, "loss": 0.5427711, "memory(GiB)": 34.88, "step": 36430, "train_speed(iter/s)": 0.414097 }, { "acc": 0.88033524, "epoch": 0.9865161237917309, "grad_norm": 7.488239288330078, "learning_rate": 8.979815159153872e-06, "loss": 0.65084448, "memory(GiB)": 34.88, "step": 36435, "train_speed(iter/s)": 0.4141 }, { "acc": 0.8745079, "epoch": 0.9866515040749465, "grad_norm": 5.303984642028809, "learning_rate": 8.979476390020233e-06, "loss": 0.66083922, "memory(GiB)": 34.88, "step": 36440, "train_speed(iter/s)": 0.414103 }, { "acc": 0.89062138, "epoch": 0.9867868843581621, "grad_norm": 6.770103931427002, "learning_rate": 8.979137571042098e-06, "loss": 0.53684416, "memory(GiB)": 34.88, "step": 36445, "train_speed(iter/s)": 0.414105 }, { "acc": 0.87007637, "epoch": 0.9869222646413777, "grad_norm": 9.213302612304688, "learning_rate": 8.978798702223707e-06, "loss": 0.67228613, "memory(GiB)": 34.88, "step": 36450, "train_speed(iter/s)": 0.414108 }, { "acc": 0.88508015, "epoch": 0.9870576449245931, "grad_norm": 8.750516891479492, "learning_rate": 8.978459783569309e-06, "loss": 0.58640103, "memory(GiB)": 34.88, "step": 36455, "train_speed(iter/s)": 0.414111 }, { "acc": 0.87201157, "epoch": 0.9871930252078087, "grad_norm": 7.8936567306518555, "learning_rate": 8.978120815083147e-06, "loss": 0.6976655, "memory(GiB)": 34.88, "step": 36460, "train_speed(iter/s)": 0.414113 }, { "acc": 0.87863197, "epoch": 0.9873284054910243, "grad_norm": 6.637689590454102, "learning_rate": 8.977781796769469e-06, "loss": 0.73590345, "memory(GiB)": 34.88, "step": 36465, "train_speed(iter/s)": 0.414116 }, { "acc": 0.88512096, "epoch": 0.9874637857742399, "grad_norm": 10.982146263122559, "learning_rate": 8.97744272863252e-06, "loss": 0.49818244, "memory(GiB)": 34.88, "step": 36470, "train_speed(iter/s)": 0.414118 }, { "acc": 0.86773376, "epoch": 0.9875991660574553, "grad_norm": 20.07544708251953, "learning_rate": 8.977103610676549e-06, "loss": 0.71133504, "memory(GiB)": 34.88, "step": 36475, "train_speed(iter/s)": 0.414121 }, { "acc": 0.88107548, "epoch": 0.9877345463406709, "grad_norm": 7.318076133728027, "learning_rate": 8.976764442905805e-06, "loss": 0.63663054, "memory(GiB)": 34.88, "step": 36480, "train_speed(iter/s)": 0.414124 }, { "acc": 0.86980686, "epoch": 0.9878699266238865, "grad_norm": 11.742521286010742, "learning_rate": 8.976425225324534e-06, "loss": 0.693713, "memory(GiB)": 34.88, "step": 36485, "train_speed(iter/s)": 0.414126 }, { "acc": 0.88242359, "epoch": 0.9880053069071021, "grad_norm": 9.431650161743164, "learning_rate": 8.976085957936988e-06, "loss": 0.59220572, "memory(GiB)": 34.88, "step": 36490, "train_speed(iter/s)": 0.414129 }, { "acc": 0.87420435, "epoch": 0.9881406871903176, "grad_norm": 11.299039840698242, "learning_rate": 8.975746640747416e-06, "loss": 0.72288442, "memory(GiB)": 34.88, "step": 36495, "train_speed(iter/s)": 0.414131 }, { "acc": 0.86197701, "epoch": 0.9882760674735331, "grad_norm": 9.656341552734375, "learning_rate": 8.975407273760069e-06, "loss": 0.76358857, "memory(GiB)": 34.88, "step": 36500, "train_speed(iter/s)": 0.414134 }, { "acc": 0.8601162, "epoch": 0.9884114477567487, "grad_norm": 23.157752990722656, "learning_rate": 8.975067856979196e-06, "loss": 0.8179224, "memory(GiB)": 34.88, "step": 36505, "train_speed(iter/s)": 0.414136 }, { "acc": 0.89755268, "epoch": 0.9885468280399643, "grad_norm": 8.620265007019043, "learning_rate": 8.97472839040905e-06, "loss": 0.53031888, "memory(GiB)": 34.88, "step": 36510, "train_speed(iter/s)": 0.414139 }, { "acc": 0.91452503, "epoch": 0.9886822083231798, "grad_norm": 5.063680171966553, "learning_rate": 8.974388874053885e-06, "loss": 0.44737949, "memory(GiB)": 34.88, "step": 36515, "train_speed(iter/s)": 0.414141 }, { "acc": 0.86437941, "epoch": 0.9888175886063953, "grad_norm": 16.819929122924805, "learning_rate": 8.974049307917955e-06, "loss": 0.7540432, "memory(GiB)": 34.88, "step": 36520, "train_speed(iter/s)": 0.414144 }, { "acc": 0.86816101, "epoch": 0.9889529688896109, "grad_norm": 13.95063591003418, "learning_rate": 8.973709692005511e-06, "loss": 0.84086323, "memory(GiB)": 34.88, "step": 36525, "train_speed(iter/s)": 0.414146 }, { "acc": 0.89826393, "epoch": 0.9890883491728265, "grad_norm": 3.5336973667144775, "learning_rate": 8.973370026320808e-06, "loss": 0.50490541, "memory(GiB)": 34.88, "step": 36530, "train_speed(iter/s)": 0.414148 }, { "acc": 0.88688126, "epoch": 0.989223729456042, "grad_norm": 6.3531975746154785, "learning_rate": 8.9730303108681e-06, "loss": 0.6082726, "memory(GiB)": 34.88, "step": 36535, "train_speed(iter/s)": 0.414151 }, { "acc": 0.87887001, "epoch": 0.9893591097392576, "grad_norm": 12.831344604492188, "learning_rate": 8.972690545651644e-06, "loss": 0.58691697, "memory(GiB)": 34.88, "step": 36540, "train_speed(iter/s)": 0.414154 }, { "acc": 0.90900249, "epoch": 0.9894944900224731, "grad_norm": 6.639412879943848, "learning_rate": 8.972350730675698e-06, "loss": 0.39417562, "memory(GiB)": 34.88, "step": 36545, "train_speed(iter/s)": 0.414156 }, { "acc": 0.88002777, "epoch": 0.9896298703056887, "grad_norm": 19.120441436767578, "learning_rate": 8.972010865944515e-06, "loss": 0.58367128, "memory(GiB)": 34.88, "step": 36550, "train_speed(iter/s)": 0.414158 }, { "acc": 0.89849339, "epoch": 0.9897652505889042, "grad_norm": 7.013217926025391, "learning_rate": 8.971670951462356e-06, "loss": 0.53856459, "memory(GiB)": 34.88, "step": 36555, "train_speed(iter/s)": 0.414161 }, { "acc": 0.88607483, "epoch": 0.9899006308721198, "grad_norm": 12.839308738708496, "learning_rate": 8.971330987233477e-06, "loss": 0.55272975, "memory(GiB)": 34.88, "step": 36560, "train_speed(iter/s)": 0.414164 }, { "acc": 0.86661644, "epoch": 0.9900360111553353, "grad_norm": 9.945707321166992, "learning_rate": 8.970990973262138e-06, "loss": 0.62716799, "memory(GiB)": 34.88, "step": 36565, "train_speed(iter/s)": 0.414166 }, { "acc": 0.89457397, "epoch": 0.9901713914385509, "grad_norm": 7.957862377166748, "learning_rate": 8.970650909552595e-06, "loss": 0.51826024, "memory(GiB)": 34.88, "step": 36570, "train_speed(iter/s)": 0.414167 }, { "acc": 0.87828178, "epoch": 0.9903067717217664, "grad_norm": 7.16843318939209, "learning_rate": 8.970310796109111e-06, "loss": 0.60115895, "memory(GiB)": 34.88, "step": 36575, "train_speed(iter/s)": 0.41417 }, { "acc": 0.90863132, "epoch": 0.990442152004982, "grad_norm": 5.9389543533325195, "learning_rate": 8.969970632935948e-06, "loss": 0.48854904, "memory(GiB)": 34.88, "step": 36580, "train_speed(iter/s)": 0.414173 }, { "acc": 0.87677794, "epoch": 0.9905775322881976, "grad_norm": 8.292675018310547, "learning_rate": 8.969630420037365e-06, "loss": 0.53333597, "memory(GiB)": 34.88, "step": 36585, "train_speed(iter/s)": 0.414175 }, { "acc": 0.88083954, "epoch": 0.9907129125714131, "grad_norm": 5.74753475189209, "learning_rate": 8.969290157417622e-06, "loss": 0.63245926, "memory(GiB)": 34.88, "step": 36590, "train_speed(iter/s)": 0.414177 }, { "acc": 0.86550961, "epoch": 0.9908482928546286, "grad_norm": 9.559377670288086, "learning_rate": 8.968949845080986e-06, "loss": 0.70717916, "memory(GiB)": 34.88, "step": 36595, "train_speed(iter/s)": 0.414179 }, { "acc": 0.87895527, "epoch": 0.9909836731378442, "grad_norm": 6.224867820739746, "learning_rate": 8.968609483031718e-06, "loss": 0.68634582, "memory(GiB)": 34.88, "step": 36600, "train_speed(iter/s)": 0.414181 }, { "acc": 0.88661556, "epoch": 0.9911190534210598, "grad_norm": 5.976417064666748, "learning_rate": 8.968269071274079e-06, "loss": 0.66814194, "memory(GiB)": 34.88, "step": 36605, "train_speed(iter/s)": 0.414183 }, { "acc": 0.88803482, "epoch": 0.9912544337042754, "grad_norm": 8.174516677856445, "learning_rate": 8.967928609812337e-06, "loss": 0.58717213, "memory(GiB)": 34.88, "step": 36610, "train_speed(iter/s)": 0.414186 }, { "acc": 0.88524113, "epoch": 0.9913898139874908, "grad_norm": 12.349360466003418, "learning_rate": 8.967588098650755e-06, "loss": 0.5856328, "memory(GiB)": 34.88, "step": 36615, "train_speed(iter/s)": 0.414188 }, { "acc": 0.86820326, "epoch": 0.9915251942707064, "grad_norm": 12.998108863830566, "learning_rate": 8.967247537793602e-06, "loss": 0.66215935, "memory(GiB)": 34.88, "step": 36620, "train_speed(iter/s)": 0.414191 }, { "acc": 0.89145794, "epoch": 0.991660574553922, "grad_norm": 12.840280532836914, "learning_rate": 8.966906927245138e-06, "loss": 0.56587744, "memory(GiB)": 34.88, "step": 36625, "train_speed(iter/s)": 0.414193 }, { "acc": 0.8786417, "epoch": 0.9917959548371376, "grad_norm": 9.095660209655762, "learning_rate": 8.966566267009635e-06, "loss": 0.71795759, "memory(GiB)": 34.88, "step": 36630, "train_speed(iter/s)": 0.414195 }, { "acc": 0.88200712, "epoch": 0.991931335120353, "grad_norm": 9.03976821899414, "learning_rate": 8.966225557091358e-06, "loss": 0.66073079, "memory(GiB)": 34.88, "step": 36635, "train_speed(iter/s)": 0.414197 }, { "acc": 0.88528786, "epoch": 0.9920667154035686, "grad_norm": 9.378009796142578, "learning_rate": 8.965884797494576e-06, "loss": 0.58238716, "memory(GiB)": 34.88, "step": 36640, "train_speed(iter/s)": 0.4142 }, { "acc": 0.88339319, "epoch": 0.9922020956867842, "grad_norm": 8.357064247131348, "learning_rate": 8.965543988223558e-06, "loss": 0.64718142, "memory(GiB)": 34.88, "step": 36645, "train_speed(iter/s)": 0.414201 }, { "acc": 0.87240305, "epoch": 0.9923374759699998, "grad_norm": 8.91048812866211, "learning_rate": 8.965203129282573e-06, "loss": 0.64806609, "memory(GiB)": 34.88, "step": 36650, "train_speed(iter/s)": 0.414204 }, { "acc": 0.88400898, "epoch": 0.9924728562532152, "grad_norm": 8.465168952941895, "learning_rate": 8.96486222067589e-06, "loss": 0.62996473, "memory(GiB)": 34.88, "step": 36655, "train_speed(iter/s)": 0.414206 }, { "acc": 0.87950268, "epoch": 0.9926082365364308, "grad_norm": 8.971938133239746, "learning_rate": 8.964521262407777e-06, "loss": 0.56663475, "memory(GiB)": 34.88, "step": 36660, "train_speed(iter/s)": 0.414209 }, { "acc": 0.87477818, "epoch": 0.9927436168196464, "grad_norm": 10.92701530456543, "learning_rate": 8.964180254482516e-06, "loss": 0.74146724, "memory(GiB)": 34.88, "step": 36665, "train_speed(iter/s)": 0.41421 }, { "acc": 0.88686962, "epoch": 0.992878997102862, "grad_norm": 11.08440113067627, "learning_rate": 8.963839196904366e-06, "loss": 0.65695548, "memory(GiB)": 34.88, "step": 36670, "train_speed(iter/s)": 0.414212 }, { "acc": 0.90986118, "epoch": 0.9930143773860775, "grad_norm": 6.492523193359375, "learning_rate": 8.963498089677605e-06, "loss": 0.51869941, "memory(GiB)": 34.88, "step": 36675, "train_speed(iter/s)": 0.414214 }, { "acc": 0.86746817, "epoch": 0.993149757669293, "grad_norm": 6.895868301391602, "learning_rate": 8.963156932806506e-06, "loss": 0.68263249, "memory(GiB)": 34.88, "step": 36680, "train_speed(iter/s)": 0.414217 }, { "acc": 0.85929947, "epoch": 0.9932851379525086, "grad_norm": 8.033020973205566, "learning_rate": 8.962815726295342e-06, "loss": 0.71517911, "memory(GiB)": 34.88, "step": 36685, "train_speed(iter/s)": 0.41422 }, { "acc": 0.87701435, "epoch": 0.9934205182357242, "grad_norm": 8.840935707092285, "learning_rate": 8.962474470148388e-06, "loss": 0.65401697, "memory(GiB)": 34.88, "step": 36690, "train_speed(iter/s)": 0.414222 }, { "acc": 0.88828993, "epoch": 0.9935558985189397, "grad_norm": 6.0173444747924805, "learning_rate": 8.962133164369919e-06, "loss": 0.54351187, "memory(GiB)": 34.88, "step": 36695, "train_speed(iter/s)": 0.414225 }, { "acc": 0.89003792, "epoch": 0.9936912788021552, "grad_norm": 6.853145599365234, "learning_rate": 8.96179180896421e-06, "loss": 0.59548349, "memory(GiB)": 34.88, "step": 36700, "train_speed(iter/s)": 0.414228 }, { "acc": 0.87574024, "epoch": 0.9938266590853708, "grad_norm": 4.126655101776123, "learning_rate": 8.961450403935537e-06, "loss": 0.59579701, "memory(GiB)": 34.88, "step": 36705, "train_speed(iter/s)": 0.414229 }, { "acc": 0.88684769, "epoch": 0.9939620393685864, "grad_norm": 4.825623512268066, "learning_rate": 8.961108949288176e-06, "loss": 0.64143267, "memory(GiB)": 34.88, "step": 36710, "train_speed(iter/s)": 0.414232 }, { "acc": 0.87999725, "epoch": 0.9940974196518019, "grad_norm": 5.968723297119141, "learning_rate": 8.960767445026405e-06, "loss": 0.62523403, "memory(GiB)": 34.88, "step": 36715, "train_speed(iter/s)": 0.414235 }, { "acc": 0.88587675, "epoch": 0.9942327999350175, "grad_norm": 7.1515045166015625, "learning_rate": 8.960425891154502e-06, "loss": 0.60359831, "memory(GiB)": 34.88, "step": 36720, "train_speed(iter/s)": 0.414238 }, { "acc": 0.89080734, "epoch": 0.994368180218233, "grad_norm": 10.788386344909668, "learning_rate": 8.960084287676748e-06, "loss": 0.63128538, "memory(GiB)": 34.88, "step": 36725, "train_speed(iter/s)": 0.41424 }, { "acc": 0.88759613, "epoch": 0.9945035605014486, "grad_norm": 4.937773704528809, "learning_rate": 8.959742634597421e-06, "loss": 0.54046435, "memory(GiB)": 34.88, "step": 36730, "train_speed(iter/s)": 0.414243 }, { "acc": 0.88244076, "epoch": 0.9946389407846641, "grad_norm": 14.18010139465332, "learning_rate": 8.959400931920798e-06, "loss": 0.64170513, "memory(GiB)": 34.88, "step": 36735, "train_speed(iter/s)": 0.414245 }, { "acc": 0.87042665, "epoch": 0.9947743210678797, "grad_norm": 9.83411979675293, "learning_rate": 8.959059179651163e-06, "loss": 0.69196639, "memory(GiB)": 34.88, "step": 36740, "train_speed(iter/s)": 0.414248 }, { "acc": 0.88476086, "epoch": 0.9949097013510952, "grad_norm": 10.002828598022461, "learning_rate": 8.958717377792794e-06, "loss": 0.64066238, "memory(GiB)": 34.88, "step": 36745, "train_speed(iter/s)": 0.414251 }, { "acc": 0.86824512, "epoch": 0.9950450816343108, "grad_norm": 6.241497993469238, "learning_rate": 8.958375526349975e-06, "loss": 0.72747831, "memory(GiB)": 34.88, "step": 36750, "train_speed(iter/s)": 0.414253 }, { "acc": 0.89123936, "epoch": 0.9951804619175263, "grad_norm": 9.249593734741211, "learning_rate": 8.95803362532699e-06, "loss": 0.54192915, "memory(GiB)": 34.88, "step": 36755, "train_speed(iter/s)": 0.414256 }, { "acc": 0.88747368, "epoch": 0.9953158422007419, "grad_norm": 5.929793357849121, "learning_rate": 8.957691674728117e-06, "loss": 0.55629215, "memory(GiB)": 34.88, "step": 36760, "train_speed(iter/s)": 0.414259 }, { "acc": 0.86072693, "epoch": 0.9954512224839575, "grad_norm": 13.062467575073242, "learning_rate": 8.957349674557643e-06, "loss": 0.77652364, "memory(GiB)": 34.88, "step": 36765, "train_speed(iter/s)": 0.414262 }, { "acc": 0.86679287, "epoch": 0.995586602767173, "grad_norm": 12.435940742492676, "learning_rate": 8.957007624819852e-06, "loss": 0.68860388, "memory(GiB)": 34.88, "step": 36770, "train_speed(iter/s)": 0.414265 }, { "acc": 0.8880682, "epoch": 0.9957219830503885, "grad_norm": 10.350516319274902, "learning_rate": 8.95666552551903e-06, "loss": 0.66794181, "memory(GiB)": 34.88, "step": 36775, "train_speed(iter/s)": 0.414268 }, { "acc": 0.88204689, "epoch": 0.9958573633336041, "grad_norm": 8.180071830749512, "learning_rate": 8.956323376659459e-06, "loss": 0.5423943, "memory(GiB)": 34.88, "step": 36780, "train_speed(iter/s)": 0.41427 }, { "acc": 0.89569664, "epoch": 0.9959927436168197, "grad_norm": 6.258157730102539, "learning_rate": 8.955981178245426e-06, "loss": 0.53312855, "memory(GiB)": 34.88, "step": 36785, "train_speed(iter/s)": 0.414273 }, { "acc": 0.89096136, "epoch": 0.9961281239000352, "grad_norm": 11.705758094787598, "learning_rate": 8.95563893028122e-06, "loss": 0.59071617, "memory(GiB)": 34.88, "step": 36790, "train_speed(iter/s)": 0.414276 }, { "acc": 0.89522076, "epoch": 0.9962635041832507, "grad_norm": 7.0380539894104, "learning_rate": 8.95529663277113e-06, "loss": 0.5527504, "memory(GiB)": 34.88, "step": 36795, "train_speed(iter/s)": 0.414278 }, { "acc": 0.87516499, "epoch": 0.9963988844664663, "grad_norm": 13.153011322021484, "learning_rate": 8.954954285719437e-06, "loss": 0.69893818, "memory(GiB)": 34.88, "step": 36800, "train_speed(iter/s)": 0.414281 }, { "acc": 0.91565332, "epoch": 0.9965342647496819, "grad_norm": 10.00925064086914, "learning_rate": 8.954611889130435e-06, "loss": 0.50942574, "memory(GiB)": 34.88, "step": 36805, "train_speed(iter/s)": 0.414284 }, { "acc": 0.86872749, "epoch": 0.9966696450328975, "grad_norm": 8.829830169677734, "learning_rate": 8.954269443008411e-06, "loss": 0.7186914, "memory(GiB)": 34.88, "step": 36810, "train_speed(iter/s)": 0.414286 }, { "acc": 0.88372536, "epoch": 0.9968050253161129, "grad_norm": 9.062175750732422, "learning_rate": 8.953926947357656e-06, "loss": 0.64960623, "memory(GiB)": 34.88, "step": 36815, "train_speed(iter/s)": 0.414289 }, { "acc": 0.85598431, "epoch": 0.9969404055993285, "grad_norm": 6.237649440765381, "learning_rate": 8.953584402182461e-06, "loss": 0.76100144, "memory(GiB)": 34.88, "step": 36820, "train_speed(iter/s)": 0.414291 }, { "acc": 0.8770771, "epoch": 0.9970757858825441, "grad_norm": 11.526479721069336, "learning_rate": 8.953241807487115e-06, "loss": 0.63538818, "memory(GiB)": 34.88, "step": 36825, "train_speed(iter/s)": 0.414293 }, { "acc": 0.89055567, "epoch": 0.9972111661657597, "grad_norm": 9.564925193786621, "learning_rate": 8.952899163275912e-06, "loss": 0.51145186, "memory(GiB)": 34.88, "step": 36830, "train_speed(iter/s)": 0.414296 }, { "acc": 0.87661676, "epoch": 0.9973465464489751, "grad_norm": 4.166606903076172, "learning_rate": 8.95255646955314e-06, "loss": 0.6197134, "memory(GiB)": 34.88, "step": 36835, "train_speed(iter/s)": 0.414299 }, { "acc": 0.88547478, "epoch": 0.9974819267321907, "grad_norm": 7.955060005187988, "learning_rate": 8.952213726323098e-06, "loss": 0.62883658, "memory(GiB)": 34.88, "step": 36840, "train_speed(iter/s)": 0.414301 }, { "acc": 0.88897924, "epoch": 0.9976173070154063, "grad_norm": 7.543715476989746, "learning_rate": 8.951870933590076e-06, "loss": 0.67584314, "memory(GiB)": 34.88, "step": 36845, "train_speed(iter/s)": 0.414304 }, { "acc": 0.89598007, "epoch": 0.9977526872986219, "grad_norm": 8.362630844116211, "learning_rate": 8.951528091358367e-06, "loss": 0.48283839, "memory(GiB)": 34.88, "step": 36850, "train_speed(iter/s)": 0.414307 }, { "acc": 0.88023605, "epoch": 0.9978880675818373, "grad_norm": 8.120342254638672, "learning_rate": 8.95118519963227e-06, "loss": 0.52477894, "memory(GiB)": 34.88, "step": 36855, "train_speed(iter/s)": 0.414309 }, { "acc": 0.89905424, "epoch": 0.9980234478650529, "grad_norm": 7.316795349121094, "learning_rate": 8.950842258416077e-06, "loss": 0.49723873, "memory(GiB)": 34.88, "step": 36860, "train_speed(iter/s)": 0.414312 }, { "acc": 0.88180151, "epoch": 0.9981588281482685, "grad_norm": 10.119217872619629, "learning_rate": 8.950499267714083e-06, "loss": 0.65571637, "memory(GiB)": 34.88, "step": 36865, "train_speed(iter/s)": 0.414314 }, { "acc": 0.90078287, "epoch": 0.9982942084314841, "grad_norm": 8.339008331298828, "learning_rate": 8.950156227530589e-06, "loss": 0.5162653, "memory(GiB)": 34.88, "step": 36870, "train_speed(iter/s)": 0.414316 }, { "acc": 0.8937973, "epoch": 0.9984295887146996, "grad_norm": 6.430313587188721, "learning_rate": 8.949813137869887e-06, "loss": 0.51292701, "memory(GiB)": 34.88, "step": 36875, "train_speed(iter/s)": 0.414319 }, { "acc": 0.87774839, "epoch": 0.9985649689979151, "grad_norm": 8.372394561767578, "learning_rate": 8.949469998736279e-06, "loss": 0.6293992, "memory(GiB)": 34.88, "step": 36880, "train_speed(iter/s)": 0.414322 }, { "acc": 0.89008274, "epoch": 0.9987003492811307, "grad_norm": 6.3544745445251465, "learning_rate": 8.949126810134063e-06, "loss": 0.4741744, "memory(GiB)": 34.88, "step": 36885, "train_speed(iter/s)": 0.414324 }, { "acc": 0.90669355, "epoch": 0.9988357295643463, "grad_norm": 10.91152286529541, "learning_rate": 8.948783572067535e-06, "loss": 0.45361404, "memory(GiB)": 34.88, "step": 36890, "train_speed(iter/s)": 0.414327 }, { "acc": 0.8885479, "epoch": 0.9989711098475618, "grad_norm": 7.888538360595703, "learning_rate": 8.948440284540996e-06, "loss": 0.53319988, "memory(GiB)": 34.88, "step": 36895, "train_speed(iter/s)": 0.41433 }, { "acc": 0.87490244, "epoch": 0.9991064901307773, "grad_norm": 6.841423034667969, "learning_rate": 8.94809694755875e-06, "loss": 0.64699993, "memory(GiB)": 34.88, "step": 36900, "train_speed(iter/s)": 0.414332 }, { "acc": 0.90542679, "epoch": 0.9992418704139929, "grad_norm": 12.558232307434082, "learning_rate": 8.947753561125096e-06, "loss": 0.44825673, "memory(GiB)": 34.88, "step": 36905, "train_speed(iter/s)": 0.414335 }, { "acc": 0.87586699, "epoch": 0.9993772506972085, "grad_norm": 7.9667134284973145, "learning_rate": 8.947410125244332e-06, "loss": 0.72329798, "memory(GiB)": 34.88, "step": 36910, "train_speed(iter/s)": 0.414337 }, { "acc": 0.85160713, "epoch": 0.999512630980424, "grad_norm": 5.877831935882568, "learning_rate": 8.947066639920764e-06, "loss": 0.73714843, "memory(GiB)": 34.88, "step": 36915, "train_speed(iter/s)": 0.414339 }, { "acc": 0.8735096, "epoch": 0.9996480112636396, "grad_norm": 8.484366416931152, "learning_rate": 8.946723105158695e-06, "loss": 0.57997899, "memory(GiB)": 34.88, "step": 36920, "train_speed(iter/s)": 0.414342 }, { "acc": 0.88041477, "epoch": 0.9997833915468551, "grad_norm": 6.148575782775879, "learning_rate": 8.946379520962424e-06, "loss": 0.57697835, "memory(GiB)": 34.88, "step": 36925, "train_speed(iter/s)": 0.414344 }, { "acc": 0.89611511, "epoch": 0.9999187718300707, "grad_norm": 6.213749408721924, "learning_rate": 8.946035887336263e-06, "loss": 0.44636545, "memory(GiB)": 34.88, "step": 36930, "train_speed(iter/s)": 0.414347 }, { "acc": 0.87105722, "epoch": 1.0000541521132862, "grad_norm": 13.47207260131836, "learning_rate": 8.945692204284509e-06, "loss": 0.71720147, "memory(GiB)": 34.88, "step": 36935, "train_speed(iter/s)": 0.414344 }, { "acc": 0.88786163, "epoch": 1.0001895323965018, "grad_norm": 14.45678997039795, "learning_rate": 8.945348471811472e-06, "loss": 0.68612361, "memory(GiB)": 34.88, "step": 36940, "train_speed(iter/s)": 0.414346 }, { "acc": 0.88341465, "epoch": 1.0003249126797173, "grad_norm": 6.98847770690918, "learning_rate": 8.945004689921455e-06, "loss": 0.63152285, "memory(GiB)": 34.88, "step": 36945, "train_speed(iter/s)": 0.414349 }, { "acc": 0.87727118, "epoch": 1.000460292962933, "grad_norm": 22.416501998901367, "learning_rate": 8.944660858618765e-06, "loss": 0.61547127, "memory(GiB)": 34.88, "step": 36950, "train_speed(iter/s)": 0.414351 }, { "acc": 0.86527348, "epoch": 1.0005956732461485, "grad_norm": 6.094797134399414, "learning_rate": 8.944316977907714e-06, "loss": 0.64479947, "memory(GiB)": 34.88, "step": 36955, "train_speed(iter/s)": 0.414354 }, { "acc": 0.86327515, "epoch": 1.000731053529364, "grad_norm": 10.785540580749512, "learning_rate": 8.943973047792603e-06, "loss": 0.67288146, "memory(GiB)": 34.88, "step": 36960, "train_speed(iter/s)": 0.414356 }, { "acc": 0.88136845, "epoch": 1.0008664338125794, "grad_norm": 11.655862808227539, "learning_rate": 8.943629068277741e-06, "loss": 0.64116583, "memory(GiB)": 34.88, "step": 36965, "train_speed(iter/s)": 0.414359 }, { "acc": 0.89192581, "epoch": 1.001001814095795, "grad_norm": 5.030027866363525, "learning_rate": 8.943285039367442e-06, "loss": 0.4549499, "memory(GiB)": 34.88, "step": 36970, "train_speed(iter/s)": 0.414361 }, { "acc": 0.87774811, "epoch": 1.0011371943790106, "grad_norm": 16.5692138671875, "learning_rate": 8.942940961066013e-06, "loss": 0.7499722, "memory(GiB)": 34.88, "step": 36975, "train_speed(iter/s)": 0.414364 }, { "acc": 0.88767347, "epoch": 1.0012725746622262, "grad_norm": 12.96629524230957, "learning_rate": 8.942596833377762e-06, "loss": 0.5469492, "memory(GiB)": 34.88, "step": 36980, "train_speed(iter/s)": 0.414366 }, { "acc": 0.88824558, "epoch": 1.0014079549454418, "grad_norm": 21.384090423583984, "learning_rate": 8.942252656307004e-06, "loss": 0.76614594, "memory(GiB)": 34.88, "step": 36985, "train_speed(iter/s)": 0.414369 }, { "acc": 0.86851196, "epoch": 1.0015433352286573, "grad_norm": 7.981797218322754, "learning_rate": 8.941908429858047e-06, "loss": 0.72056112, "memory(GiB)": 34.88, "step": 36990, "train_speed(iter/s)": 0.414371 }, { "acc": 0.89621506, "epoch": 1.001678715511873, "grad_norm": 5.920599937438965, "learning_rate": 8.941564154035206e-06, "loss": 0.46238499, "memory(GiB)": 34.88, "step": 36995, "train_speed(iter/s)": 0.414374 }, { "acc": 0.90629101, "epoch": 1.0018140957950885, "grad_norm": 5.844128131866455, "learning_rate": 8.941219828842791e-06, "loss": 0.48656015, "memory(GiB)": 34.88, "step": 37000, "train_speed(iter/s)": 0.414376 }, { "acc": 0.88623295, "epoch": 1.0019494760783039, "grad_norm": 11.995558738708496, "learning_rate": 8.940875454285116e-06, "loss": 0.64658475, "memory(GiB)": 34.88, "step": 37005, "train_speed(iter/s)": 0.414379 }, { "acc": 0.88305426, "epoch": 1.0020848563615194, "grad_norm": 5.459467887878418, "learning_rate": 8.940531030366499e-06, "loss": 0.62453442, "memory(GiB)": 34.88, "step": 37010, "train_speed(iter/s)": 0.414381 }, { "acc": 0.90992937, "epoch": 1.002220236644735, "grad_norm": 15.012457847595215, "learning_rate": 8.940186557091248e-06, "loss": 0.4761889, "memory(GiB)": 34.88, "step": 37015, "train_speed(iter/s)": 0.414384 }, { "acc": 0.89827747, "epoch": 1.0023556169279506, "grad_norm": 7.9330363273620605, "learning_rate": 8.939842034463682e-06, "loss": 0.48891077, "memory(GiB)": 34.88, "step": 37020, "train_speed(iter/s)": 0.414386 }, { "acc": 0.86886997, "epoch": 1.0024909972111662, "grad_norm": 6.733742713928223, "learning_rate": 8.939497462488115e-06, "loss": 0.79064655, "memory(GiB)": 34.88, "step": 37025, "train_speed(iter/s)": 0.414388 }, { "acc": 0.87861843, "epoch": 1.0026263774943818, "grad_norm": 6.455490589141846, "learning_rate": 8.939152841168868e-06, "loss": 0.60310307, "memory(GiB)": 34.88, "step": 37030, "train_speed(iter/s)": 0.414391 }, { "acc": 0.88179989, "epoch": 1.0027617577775974, "grad_norm": 11.183976173400879, "learning_rate": 8.938808170510253e-06, "loss": 0.59081516, "memory(GiB)": 34.88, "step": 37035, "train_speed(iter/s)": 0.414393 }, { "acc": 0.89824696, "epoch": 1.002897138060813, "grad_norm": 14.69905948638916, "learning_rate": 8.938463450516589e-06, "loss": 0.56354733, "memory(GiB)": 34.88, "step": 37040, "train_speed(iter/s)": 0.414396 }, { "acc": 0.88940582, "epoch": 1.0030325183440283, "grad_norm": 8.36115550994873, "learning_rate": 8.938118681192195e-06, "loss": 0.5645771, "memory(GiB)": 34.88, "step": 37045, "train_speed(iter/s)": 0.414398 }, { "acc": 0.86146851, "epoch": 1.0031678986272439, "grad_norm": 8.241982460021973, "learning_rate": 8.937773862541391e-06, "loss": 0.85468273, "memory(GiB)": 34.88, "step": 37050, "train_speed(iter/s)": 0.414401 }, { "acc": 0.89256783, "epoch": 1.0033032789104595, "grad_norm": 6.344368934631348, "learning_rate": 8.937428994568494e-06, "loss": 0.56548643, "memory(GiB)": 34.88, "step": 37055, "train_speed(iter/s)": 0.414404 }, { "acc": 0.88242006, "epoch": 1.003438659193675, "grad_norm": 13.413211822509766, "learning_rate": 8.937084077277824e-06, "loss": 0.57420044, "memory(GiB)": 34.88, "step": 37060, "train_speed(iter/s)": 0.414406 }, { "acc": 0.89444904, "epoch": 1.0035740394768906, "grad_norm": 5.159535884857178, "learning_rate": 8.936739110673705e-06, "loss": 0.55859246, "memory(GiB)": 34.88, "step": 37065, "train_speed(iter/s)": 0.414409 }, { "acc": 0.87036018, "epoch": 1.0037094197601062, "grad_norm": 7.441877365112305, "learning_rate": 8.936394094760456e-06, "loss": 0.65005574, "memory(GiB)": 34.88, "step": 37070, "train_speed(iter/s)": 0.414412 }, { "acc": 0.88542557, "epoch": 1.0038448000433218, "grad_norm": 10.116533279418945, "learning_rate": 8.936049029542398e-06, "loss": 0.61311779, "memory(GiB)": 34.88, "step": 37075, "train_speed(iter/s)": 0.414414 }, { "acc": 0.88498993, "epoch": 1.0039801803265374, "grad_norm": 11.068194389343262, "learning_rate": 8.935703915023856e-06, "loss": 0.5635613, "memory(GiB)": 34.88, "step": 37080, "train_speed(iter/s)": 0.414417 }, { "acc": 0.88744984, "epoch": 1.0041155606097527, "grad_norm": 30.49250602722168, "learning_rate": 8.935358751209153e-06, "loss": 0.54769931, "memory(GiB)": 34.88, "step": 37085, "train_speed(iter/s)": 0.414419 }, { "acc": 0.89940281, "epoch": 1.0042509408929683, "grad_norm": 9.985427856445312, "learning_rate": 8.935013538102612e-06, "loss": 0.47941375, "memory(GiB)": 34.88, "step": 37090, "train_speed(iter/s)": 0.414422 }, { "acc": 0.87085543, "epoch": 1.0043863211761839, "grad_norm": 7.260748863220215, "learning_rate": 8.934668275708557e-06, "loss": 0.7184, "memory(GiB)": 34.88, "step": 37095, "train_speed(iter/s)": 0.414425 }, { "acc": 0.85913353, "epoch": 1.0045217014593995, "grad_norm": 11.718353271484375, "learning_rate": 8.934322964031313e-06, "loss": 0.72021179, "memory(GiB)": 34.88, "step": 37100, "train_speed(iter/s)": 0.414428 }, { "acc": 0.87858524, "epoch": 1.004657081742615, "grad_norm": 6.727770805358887, "learning_rate": 8.933977603075209e-06, "loss": 0.61950579, "memory(GiB)": 34.88, "step": 37105, "train_speed(iter/s)": 0.41443 }, { "acc": 0.88096476, "epoch": 1.0047924620258306, "grad_norm": 2.692021131515503, "learning_rate": 8.933632192844567e-06, "loss": 0.61997857, "memory(GiB)": 34.88, "step": 37110, "train_speed(iter/s)": 0.414432 }, { "acc": 0.88429804, "epoch": 1.0049278423090462, "grad_norm": 8.208669662475586, "learning_rate": 8.933286733343714e-06, "loss": 0.72880459, "memory(GiB)": 34.88, "step": 37115, "train_speed(iter/s)": 0.414434 }, { "acc": 0.89639015, "epoch": 1.0050632225922618, "grad_norm": 6.603367328643799, "learning_rate": 8.932941224576982e-06, "loss": 0.58043814, "memory(GiB)": 34.88, "step": 37120, "train_speed(iter/s)": 0.414437 }, { "acc": 0.87851839, "epoch": 1.0051986028754771, "grad_norm": 10.231675148010254, "learning_rate": 8.932595666548696e-06, "loss": 0.66378098, "memory(GiB)": 34.88, "step": 37125, "train_speed(iter/s)": 0.41444 }, { "acc": 0.89802914, "epoch": 1.0053339831586927, "grad_norm": 9.936914443969727, "learning_rate": 8.932250059263184e-06, "loss": 0.58255839, "memory(GiB)": 34.88, "step": 37130, "train_speed(iter/s)": 0.414442 }, { "acc": 0.87364187, "epoch": 1.0054693634419083, "grad_norm": 15.246819496154785, "learning_rate": 8.931904402724779e-06, "loss": 0.65615749, "memory(GiB)": 34.88, "step": 37135, "train_speed(iter/s)": 0.414445 }, { "acc": 0.89370422, "epoch": 1.0056047437251239, "grad_norm": 4.524639129638672, "learning_rate": 8.931558696937805e-06, "loss": 0.51065106, "memory(GiB)": 34.88, "step": 37140, "train_speed(iter/s)": 0.414447 }, { "acc": 0.89835243, "epoch": 1.0057401240083395, "grad_norm": 7.464092254638672, "learning_rate": 8.931212941906598e-06, "loss": 0.60040174, "memory(GiB)": 34.88, "step": 37145, "train_speed(iter/s)": 0.41445 }, { "acc": 0.8671999, "epoch": 1.005875504291555, "grad_norm": 11.886781692504883, "learning_rate": 8.930867137635489e-06, "loss": 0.70066376, "memory(GiB)": 34.88, "step": 37150, "train_speed(iter/s)": 0.414452 }, { "acc": 0.88986387, "epoch": 1.0060108845747706, "grad_norm": 6.026655673980713, "learning_rate": 8.930521284128807e-06, "loss": 0.52503281, "memory(GiB)": 34.88, "step": 37155, "train_speed(iter/s)": 0.414455 }, { "acc": 0.87421045, "epoch": 1.006146264857986, "grad_norm": 6.51495361328125, "learning_rate": 8.930175381390887e-06, "loss": 0.65101671, "memory(GiB)": 34.88, "step": 37160, "train_speed(iter/s)": 0.414458 }, { "acc": 0.88187113, "epoch": 1.0062816451412016, "grad_norm": 12.952872276306152, "learning_rate": 8.92982942942606e-06, "loss": 0.66796055, "memory(GiB)": 34.88, "step": 37165, "train_speed(iter/s)": 0.41446 }, { "acc": 0.87757349, "epoch": 1.0064170254244171, "grad_norm": 14.523767471313477, "learning_rate": 8.92948342823866e-06, "loss": 0.71920619, "memory(GiB)": 34.88, "step": 37170, "train_speed(iter/s)": 0.414463 }, { "acc": 0.87760468, "epoch": 1.0065524057076327, "grad_norm": 11.85883617401123, "learning_rate": 8.929137377833024e-06, "loss": 0.65444136, "memory(GiB)": 34.88, "step": 37175, "train_speed(iter/s)": 0.414465 }, { "acc": 0.88446903, "epoch": 1.0066877859908483, "grad_norm": 12.187798500061035, "learning_rate": 8.928791278213484e-06, "loss": 0.55498829, "memory(GiB)": 34.88, "step": 37180, "train_speed(iter/s)": 0.414468 }, { "acc": 0.88796539, "epoch": 1.0068231662740639, "grad_norm": 5.731698989868164, "learning_rate": 8.928445129384378e-06, "loss": 0.60273361, "memory(GiB)": 34.88, "step": 37185, "train_speed(iter/s)": 0.41447 }, { "acc": 0.87469234, "epoch": 1.0069585465572795, "grad_norm": 16.180700302124023, "learning_rate": 8.928098931350039e-06, "loss": 0.69879589, "memory(GiB)": 34.88, "step": 37190, "train_speed(iter/s)": 0.414473 }, { "acc": 0.87737694, "epoch": 1.007093926840495, "grad_norm": 5.083471775054932, "learning_rate": 8.927752684114806e-06, "loss": 0.59866018, "memory(GiB)": 34.88, "step": 37195, "train_speed(iter/s)": 0.414476 }, { "acc": 0.88534336, "epoch": 1.0072293071237104, "grad_norm": 8.965038299560547, "learning_rate": 8.927406387683018e-06, "loss": 0.6598084, "memory(GiB)": 34.88, "step": 37200, "train_speed(iter/s)": 0.414478 }, { "acc": 0.86031418, "epoch": 1.007364687406926, "grad_norm": 8.903295516967773, "learning_rate": 8.927060042059008e-06, "loss": 0.71183748, "memory(GiB)": 34.88, "step": 37205, "train_speed(iter/s)": 0.414481 }, { "acc": 0.90188923, "epoch": 1.0075000676901416, "grad_norm": 8.113879203796387, "learning_rate": 8.926713647247119e-06, "loss": 0.48446126, "memory(GiB)": 34.88, "step": 37210, "train_speed(iter/s)": 0.414483 }, { "acc": 0.90034237, "epoch": 1.0076354479733571, "grad_norm": 8.121219635009766, "learning_rate": 8.92636720325169e-06, "loss": 0.58289204, "memory(GiB)": 34.88, "step": 37215, "train_speed(iter/s)": 0.414486 }, { "acc": 0.87975798, "epoch": 1.0077708282565727, "grad_norm": 13.418241500854492, "learning_rate": 8.92602071007706e-06, "loss": 0.62559395, "memory(GiB)": 34.88, "step": 37220, "train_speed(iter/s)": 0.414489 }, { "acc": 0.90182314, "epoch": 1.0079062085397883, "grad_norm": 5.334571361541748, "learning_rate": 8.92567416772757e-06, "loss": 0.51347599, "memory(GiB)": 34.88, "step": 37225, "train_speed(iter/s)": 0.414491 }, { "acc": 0.87295313, "epoch": 1.0080415888230039, "grad_norm": 6.928528785705566, "learning_rate": 8.925327576207559e-06, "loss": 0.62926245, "memory(GiB)": 34.88, "step": 37230, "train_speed(iter/s)": 0.414494 }, { "acc": 0.88393459, "epoch": 1.0081769691062195, "grad_norm": 6.038732528686523, "learning_rate": 8.924980935521372e-06, "loss": 0.54920597, "memory(GiB)": 34.88, "step": 37235, "train_speed(iter/s)": 0.414496 }, { "acc": 0.88274117, "epoch": 1.0083123493894348, "grad_norm": 23.9130802154541, "learning_rate": 8.924634245673349e-06, "loss": 0.61248579, "memory(GiB)": 34.88, "step": 37240, "train_speed(iter/s)": 0.414498 }, { "acc": 0.87983217, "epoch": 1.0084477296726504, "grad_norm": 6.491400241851807, "learning_rate": 8.924287506667833e-06, "loss": 0.72812848, "memory(GiB)": 34.88, "step": 37245, "train_speed(iter/s)": 0.4145 }, { "acc": 0.88473969, "epoch": 1.008583109955866, "grad_norm": 12.770554542541504, "learning_rate": 8.923940718509168e-06, "loss": 0.58384528, "memory(GiB)": 34.88, "step": 37250, "train_speed(iter/s)": 0.414503 }, { "acc": 0.88326511, "epoch": 1.0087184902390816, "grad_norm": 12.935713768005371, "learning_rate": 8.923593881201697e-06, "loss": 0.65276361, "memory(GiB)": 34.88, "step": 37255, "train_speed(iter/s)": 0.414505 }, { "acc": 0.90199909, "epoch": 1.0088538705222971, "grad_norm": 7.741332054138184, "learning_rate": 8.92324699474977e-06, "loss": 0.49200802, "memory(GiB)": 34.88, "step": 37260, "train_speed(iter/s)": 0.414508 }, { "acc": 0.89756956, "epoch": 1.0089892508055127, "grad_norm": 10.736988067626953, "learning_rate": 8.922900059157727e-06, "loss": 0.59192619, "memory(GiB)": 34.88, "step": 37265, "train_speed(iter/s)": 0.414511 }, { "acc": 0.89785347, "epoch": 1.0091246310887283, "grad_norm": 22.689247131347656, "learning_rate": 8.922553074429916e-06, "loss": 0.52549419, "memory(GiB)": 34.88, "step": 37270, "train_speed(iter/s)": 0.414514 }, { "acc": 0.88586702, "epoch": 1.0092600113719439, "grad_norm": 9.185952186584473, "learning_rate": 8.922206040570683e-06, "loss": 0.63467655, "memory(GiB)": 34.88, "step": 37275, "train_speed(iter/s)": 0.414516 }, { "acc": 0.86544447, "epoch": 1.0093953916551592, "grad_norm": 6.957631587982178, "learning_rate": 8.921858957584376e-06, "loss": 0.71479797, "memory(GiB)": 34.88, "step": 37280, "train_speed(iter/s)": 0.414519 }, { "acc": 0.88787479, "epoch": 1.0095307719383748, "grad_norm": 10.454203605651855, "learning_rate": 8.921511825475341e-06, "loss": 0.64171038, "memory(GiB)": 34.88, "step": 37285, "train_speed(iter/s)": 0.414521 }, { "acc": 0.90154324, "epoch": 1.0096661522215904, "grad_norm": 7.315924167633057, "learning_rate": 8.92116464424793e-06, "loss": 0.47578039, "memory(GiB)": 34.88, "step": 37290, "train_speed(iter/s)": 0.414524 }, { "acc": 0.89796867, "epoch": 1.009801532504806, "grad_norm": 4.757676601409912, "learning_rate": 8.920817413906491e-06, "loss": 0.53148894, "memory(GiB)": 34.88, "step": 37295, "train_speed(iter/s)": 0.414526 }, { "acc": 0.87882776, "epoch": 1.0099369127880216, "grad_norm": 14.537627220153809, "learning_rate": 8.92047013445537e-06, "loss": 0.68543005, "memory(GiB)": 34.88, "step": 37300, "train_speed(iter/s)": 0.414529 }, { "acc": 0.90247078, "epoch": 1.0100722930712371, "grad_norm": 6.418614864349365, "learning_rate": 8.920122805898925e-06, "loss": 0.54627676, "memory(GiB)": 34.88, "step": 37305, "train_speed(iter/s)": 0.414531 }, { "acc": 0.87981224, "epoch": 1.0102076733544527, "grad_norm": 5.189750671386719, "learning_rate": 8.919775428241498e-06, "loss": 0.66857309, "memory(GiB)": 34.88, "step": 37310, "train_speed(iter/s)": 0.414534 }, { "acc": 0.87800131, "epoch": 1.0103430536376683, "grad_norm": 6.085308074951172, "learning_rate": 8.919428001487447e-06, "loss": 0.4986639, "memory(GiB)": 34.88, "step": 37315, "train_speed(iter/s)": 0.414536 }, { "acc": 0.86676941, "epoch": 1.0104784339208837, "grad_norm": 8.927721977233887, "learning_rate": 8.919080525641121e-06, "loss": 0.64624672, "memory(GiB)": 34.88, "step": 37320, "train_speed(iter/s)": 0.414539 }, { "acc": 0.88001366, "epoch": 1.0106138142040992, "grad_norm": 7.527516841888428, "learning_rate": 8.918733000706875e-06, "loss": 0.62782307, "memory(GiB)": 34.88, "step": 37325, "train_speed(iter/s)": 0.414542 }, { "acc": 0.90445232, "epoch": 1.0107491944873148, "grad_norm": 8.961297988891602, "learning_rate": 8.91838542668906e-06, "loss": 0.51160645, "memory(GiB)": 34.88, "step": 37330, "train_speed(iter/s)": 0.414544 }, { "acc": 0.86124172, "epoch": 1.0108845747705304, "grad_norm": 6.95699405670166, "learning_rate": 8.918037803592032e-06, "loss": 0.72515774, "memory(GiB)": 34.88, "step": 37335, "train_speed(iter/s)": 0.414547 }, { "acc": 0.89895287, "epoch": 1.011019955053746, "grad_norm": 11.078869819641113, "learning_rate": 8.917690131420145e-06, "loss": 0.48673239, "memory(GiB)": 34.88, "step": 37340, "train_speed(iter/s)": 0.414549 }, { "acc": 0.89180374, "epoch": 1.0111553353369616, "grad_norm": 8.545293807983398, "learning_rate": 8.917342410177756e-06, "loss": 0.57034259, "memory(GiB)": 34.88, "step": 37345, "train_speed(iter/s)": 0.414552 }, { "acc": 0.89373302, "epoch": 1.0112907156201771, "grad_norm": 8.114006042480469, "learning_rate": 8.916994639869217e-06, "loss": 0.54537654, "memory(GiB)": 34.88, "step": 37350, "train_speed(iter/s)": 0.414554 }, { "acc": 0.87457724, "epoch": 1.0114260959033927, "grad_norm": 7.193914413452148, "learning_rate": 8.916646820498887e-06, "loss": 0.59502826, "memory(GiB)": 34.88, "step": 37355, "train_speed(iter/s)": 0.414557 }, { "acc": 0.87721844, "epoch": 1.011561476186608, "grad_norm": 6.00902795791626, "learning_rate": 8.916298952071124e-06, "loss": 0.75053425, "memory(GiB)": 34.88, "step": 37360, "train_speed(iter/s)": 0.414559 }, { "acc": 0.87624321, "epoch": 1.0116968564698237, "grad_norm": 15.667283058166504, "learning_rate": 8.915951034590283e-06, "loss": 0.6554564, "memory(GiB)": 34.88, "step": 37365, "train_speed(iter/s)": 0.414562 }, { "acc": 0.88880529, "epoch": 1.0118322367530392, "grad_norm": 11.586654663085938, "learning_rate": 8.915603068060725e-06, "loss": 0.58266511, "memory(GiB)": 34.88, "step": 37370, "train_speed(iter/s)": 0.414564 }, { "acc": 0.87879982, "epoch": 1.0119676170362548, "grad_norm": 7.0732951164245605, "learning_rate": 8.915255052486808e-06, "loss": 0.55159731, "memory(GiB)": 34.88, "step": 37375, "train_speed(iter/s)": 0.414567 }, { "acc": 0.86825819, "epoch": 1.0121029973194704, "grad_norm": 11.65307903289795, "learning_rate": 8.914906987872893e-06, "loss": 0.68985248, "memory(GiB)": 34.88, "step": 37380, "train_speed(iter/s)": 0.414569 }, { "acc": 0.88991852, "epoch": 1.012238377602686, "grad_norm": 7.664618492126465, "learning_rate": 8.914558874223336e-06, "loss": 0.58735428, "memory(GiB)": 34.88, "step": 37385, "train_speed(iter/s)": 0.414572 }, { "acc": 0.86945562, "epoch": 1.0123737578859016, "grad_norm": 22.997692108154297, "learning_rate": 8.914210711542503e-06, "loss": 0.64311571, "memory(GiB)": 34.88, "step": 37390, "train_speed(iter/s)": 0.414574 }, { "acc": 0.88270931, "epoch": 1.0125091381691171, "grad_norm": 7.818882465362549, "learning_rate": 8.913862499834751e-06, "loss": 0.63595161, "memory(GiB)": 34.88, "step": 37395, "train_speed(iter/s)": 0.414577 }, { "acc": 0.91148129, "epoch": 1.0126445184523325, "grad_norm": 5.292921543121338, "learning_rate": 8.913514239104443e-06, "loss": 0.47005963, "memory(GiB)": 34.88, "step": 37400, "train_speed(iter/s)": 0.414579 }, { "acc": 0.90069647, "epoch": 1.012779898735548, "grad_norm": 14.342639923095703, "learning_rate": 8.913165929355944e-06, "loss": 0.46068544, "memory(GiB)": 34.88, "step": 37405, "train_speed(iter/s)": 0.414582 }, { "acc": 0.89084673, "epoch": 1.0129152790187637, "grad_norm": 13.91251277923584, "learning_rate": 8.912817570593616e-06, "loss": 0.5789474, "memory(GiB)": 34.88, "step": 37410, "train_speed(iter/s)": 0.414584 }, { "acc": 0.90538101, "epoch": 1.0130506593019792, "grad_norm": 6.7218499183654785, "learning_rate": 8.912469162821821e-06, "loss": 0.4948288, "memory(GiB)": 34.88, "step": 37415, "train_speed(iter/s)": 0.414587 }, { "acc": 0.89988613, "epoch": 1.0131860395851948, "grad_norm": 9.019180297851562, "learning_rate": 8.912120706044928e-06, "loss": 0.54155407, "memory(GiB)": 34.88, "step": 37420, "train_speed(iter/s)": 0.41459 }, { "acc": 0.85629158, "epoch": 1.0133214198684104, "grad_norm": 9.696961402893066, "learning_rate": 8.911772200267298e-06, "loss": 0.77904673, "memory(GiB)": 34.88, "step": 37425, "train_speed(iter/s)": 0.414592 }, { "acc": 0.86829033, "epoch": 1.013456800151626, "grad_norm": 8.873876571655273, "learning_rate": 8.9114236454933e-06, "loss": 0.75564303, "memory(GiB)": 34.88, "step": 37430, "train_speed(iter/s)": 0.414595 }, { "acc": 0.88807869, "epoch": 1.0135921804348416, "grad_norm": 10.090993881225586, "learning_rate": 8.911075041727294e-06, "loss": 0.62168784, "memory(GiB)": 34.88, "step": 37435, "train_speed(iter/s)": 0.414598 }, { "acc": 0.89473591, "epoch": 1.013727560718057, "grad_norm": 12.781231880187988, "learning_rate": 8.910726388973654e-06, "loss": 0.58358035, "memory(GiB)": 34.88, "step": 37440, "train_speed(iter/s)": 0.4146 }, { "acc": 0.88419142, "epoch": 1.0138629410012725, "grad_norm": 11.604629516601562, "learning_rate": 8.910377687236747e-06, "loss": 0.68032279, "memory(GiB)": 34.88, "step": 37445, "train_speed(iter/s)": 0.414603 }, { "acc": 0.8893548, "epoch": 1.013998321284488, "grad_norm": 6.944962501525879, "learning_rate": 8.910028936520936e-06, "loss": 0.60837584, "memory(GiB)": 34.88, "step": 37450, "train_speed(iter/s)": 0.414605 }, { "acc": 0.86950665, "epoch": 1.0141337015677037, "grad_norm": 9.671881675720215, "learning_rate": 8.909680136830592e-06, "loss": 0.75172596, "memory(GiB)": 34.88, "step": 37455, "train_speed(iter/s)": 0.414608 }, { "acc": 0.8831831, "epoch": 1.0142690818509192, "grad_norm": 6.481299877166748, "learning_rate": 8.90933128817009e-06, "loss": 0.67301388, "memory(GiB)": 34.88, "step": 37460, "train_speed(iter/s)": 0.41461 }, { "acc": 0.87456741, "epoch": 1.0144044621341348, "grad_norm": 11.434123992919922, "learning_rate": 8.908982390543791e-06, "loss": 0.72000761, "memory(GiB)": 34.88, "step": 37465, "train_speed(iter/s)": 0.414612 }, { "acc": 0.89855633, "epoch": 1.0145398424173504, "grad_norm": 4.806570529937744, "learning_rate": 8.90863344395607e-06, "loss": 0.48881345, "memory(GiB)": 34.88, "step": 37470, "train_speed(iter/s)": 0.414615 }, { "acc": 0.8804636, "epoch": 1.014675222700566, "grad_norm": 10.862404823303223, "learning_rate": 8.908284448411299e-06, "loss": 0.5305687, "memory(GiB)": 34.88, "step": 37475, "train_speed(iter/s)": 0.414617 }, { "acc": 0.86773033, "epoch": 1.0148106029837813, "grad_norm": 7.782657623291016, "learning_rate": 8.90793540391385e-06, "loss": 0.68286734, "memory(GiB)": 34.88, "step": 37480, "train_speed(iter/s)": 0.41462 }, { "acc": 0.9036582, "epoch": 1.014945983266997, "grad_norm": 5.818487644195557, "learning_rate": 8.907586310468095e-06, "loss": 0.51091113, "memory(GiB)": 34.88, "step": 37485, "train_speed(iter/s)": 0.414622 }, { "acc": 0.88151531, "epoch": 1.0150813635502125, "grad_norm": 7.187263011932373, "learning_rate": 8.907237168078404e-06, "loss": 0.59572449, "memory(GiB)": 34.88, "step": 37490, "train_speed(iter/s)": 0.414625 }, { "acc": 0.86610451, "epoch": 1.015216743833428, "grad_norm": 7.559378147125244, "learning_rate": 8.906887976749154e-06, "loss": 0.67989907, "memory(GiB)": 34.88, "step": 37495, "train_speed(iter/s)": 0.414627 }, { "acc": 0.87269745, "epoch": 1.0153521241166437, "grad_norm": 17.869041442871094, "learning_rate": 8.906538736484721e-06, "loss": 0.68978868, "memory(GiB)": 34.88, "step": 37500, "train_speed(iter/s)": 0.41463 }, { "acc": 0.8975666, "epoch": 1.0154875043998592, "grad_norm": 6.429849624633789, "learning_rate": 8.906189447289474e-06, "loss": 0.50747004, "memory(GiB)": 34.88, "step": 37505, "train_speed(iter/s)": 0.414633 }, { "acc": 0.87605247, "epoch": 1.0156228846830748, "grad_norm": 6.908369064331055, "learning_rate": 8.905840109167793e-06, "loss": 0.71367173, "memory(GiB)": 34.88, "step": 37510, "train_speed(iter/s)": 0.414635 }, { "acc": 0.8868412, "epoch": 1.0157582649662904, "grad_norm": 9.762985229492188, "learning_rate": 8.905490722124055e-06, "loss": 0.59739985, "memory(GiB)": 34.88, "step": 37515, "train_speed(iter/s)": 0.414637 }, { "acc": 0.86573601, "epoch": 1.0158936452495058, "grad_norm": 10.499791145324707, "learning_rate": 8.905141286162633e-06, "loss": 0.7031745, "memory(GiB)": 34.88, "step": 37520, "train_speed(iter/s)": 0.41464 }, { "acc": 0.90129957, "epoch": 1.0160290255327213, "grad_norm": 5.755733013153076, "learning_rate": 8.904791801287908e-06, "loss": 0.48599606, "memory(GiB)": 34.88, "step": 37525, "train_speed(iter/s)": 0.414643 }, { "acc": 0.87602654, "epoch": 1.016164405815937, "grad_norm": 7.032808780670166, "learning_rate": 8.904442267504254e-06, "loss": 0.62381163, "memory(GiB)": 34.88, "step": 37530, "train_speed(iter/s)": 0.414645 }, { "acc": 0.86619473, "epoch": 1.0162997860991525, "grad_norm": 9.49197006225586, "learning_rate": 8.904092684816053e-06, "loss": 0.66266837, "memory(GiB)": 34.88, "step": 37535, "train_speed(iter/s)": 0.414647 }, { "acc": 0.87435322, "epoch": 1.016435166382368, "grad_norm": 10.719549179077148, "learning_rate": 8.903743053227684e-06, "loss": 0.55073748, "memory(GiB)": 34.88, "step": 37540, "train_speed(iter/s)": 0.41465 }, { "acc": 0.87537422, "epoch": 1.0165705466655837, "grad_norm": 7.97046422958374, "learning_rate": 8.903393372743526e-06, "loss": 0.68027172, "memory(GiB)": 34.88, "step": 37545, "train_speed(iter/s)": 0.414652 }, { "acc": 0.89703026, "epoch": 1.0167059269487992, "grad_norm": 5.462719917297363, "learning_rate": 8.903043643367956e-06, "loss": 0.4673461, "memory(GiB)": 34.88, "step": 37550, "train_speed(iter/s)": 0.414655 }, { "acc": 0.87142429, "epoch": 1.0168413072320148, "grad_norm": 12.245999336242676, "learning_rate": 8.902693865105364e-06, "loss": 0.63831339, "memory(GiB)": 34.88, "step": 37555, "train_speed(iter/s)": 0.414657 }, { "acc": 0.87358589, "epoch": 1.0169766875152302, "grad_norm": 7.694332122802734, "learning_rate": 8.902344037960122e-06, "loss": 0.64809942, "memory(GiB)": 34.88, "step": 37560, "train_speed(iter/s)": 0.41466 }, { "acc": 0.87888222, "epoch": 1.0171120677984458, "grad_norm": 7.036419868469238, "learning_rate": 8.901994161936617e-06, "loss": 0.63622208, "memory(GiB)": 34.88, "step": 37565, "train_speed(iter/s)": 0.414662 }, { "acc": 0.88496399, "epoch": 1.0172474480816613, "grad_norm": 4.786320686340332, "learning_rate": 8.901644237039231e-06, "loss": 0.52580805, "memory(GiB)": 34.88, "step": 37570, "train_speed(iter/s)": 0.414665 }, { "acc": 0.89735241, "epoch": 1.017382828364877, "grad_norm": 12.165327072143555, "learning_rate": 8.901294263272349e-06, "loss": 0.54511166, "memory(GiB)": 34.88, "step": 37575, "train_speed(iter/s)": 0.414668 }, { "acc": 0.86952209, "epoch": 1.0175182086480925, "grad_norm": 12.336421966552734, "learning_rate": 8.900944240640351e-06, "loss": 0.64820719, "memory(GiB)": 34.88, "step": 37580, "train_speed(iter/s)": 0.41467 }, { "acc": 0.86563644, "epoch": 1.017653588931308, "grad_norm": 13.613280296325684, "learning_rate": 8.900594169147625e-06, "loss": 0.7359901, "memory(GiB)": 34.88, "step": 37585, "train_speed(iter/s)": 0.414672 }, { "acc": 0.87511883, "epoch": 1.0177889692145237, "grad_norm": 4.788947582244873, "learning_rate": 8.90024404879856e-06, "loss": 0.72160211, "memory(GiB)": 34.88, "step": 37590, "train_speed(iter/s)": 0.414675 }, { "acc": 0.8787426, "epoch": 1.0179243494977392, "grad_norm": 9.492087364196777, "learning_rate": 8.899893879597534e-06, "loss": 0.66548457, "memory(GiB)": 34.88, "step": 37595, "train_speed(iter/s)": 0.414677 }, { "acc": 0.9180151, "epoch": 1.0180597297809546, "grad_norm": 5.741060733795166, "learning_rate": 8.899543661548938e-06, "loss": 0.46942797, "memory(GiB)": 34.88, "step": 37600, "train_speed(iter/s)": 0.41468 }, { "acc": 0.86460333, "epoch": 1.0181951100641702, "grad_norm": 11.7116117477417, "learning_rate": 8.89919339465716e-06, "loss": 0.70061955, "memory(GiB)": 34.88, "step": 37605, "train_speed(iter/s)": 0.414683 }, { "acc": 0.84819679, "epoch": 1.0183304903473858, "grad_norm": 10.377008438110352, "learning_rate": 8.898843078926584e-06, "loss": 0.69810195, "memory(GiB)": 34.88, "step": 37610, "train_speed(iter/s)": 0.414685 }, { "acc": 0.89138155, "epoch": 1.0184658706306013, "grad_norm": 4.646606922149658, "learning_rate": 8.898492714361604e-06, "loss": 0.54589214, "memory(GiB)": 34.88, "step": 37615, "train_speed(iter/s)": 0.414688 }, { "acc": 0.9146246, "epoch": 1.018601250913817, "grad_norm": 4.235711574554443, "learning_rate": 8.898142300966602e-06, "loss": 0.42092447, "memory(GiB)": 34.88, "step": 37620, "train_speed(iter/s)": 0.41469 }, { "acc": 0.89106121, "epoch": 1.0187366311970325, "grad_norm": 8.867575645446777, "learning_rate": 8.897791838745973e-06, "loss": 0.62760167, "memory(GiB)": 34.88, "step": 37625, "train_speed(iter/s)": 0.414693 }, { "acc": 0.87717247, "epoch": 1.018872011480248, "grad_norm": 6.364133834838867, "learning_rate": 8.897441327704106e-06, "loss": 0.58702149, "memory(GiB)": 34.88, "step": 37630, "train_speed(iter/s)": 0.414696 }, { "acc": 0.87538128, "epoch": 1.0190073917634637, "grad_norm": 5.343918800354004, "learning_rate": 8.897090767845391e-06, "loss": 0.67501769, "memory(GiB)": 34.88, "step": 37635, "train_speed(iter/s)": 0.414698 }, { "acc": 0.88013802, "epoch": 1.019142772046679, "grad_norm": 8.161834716796875, "learning_rate": 8.89674015917422e-06, "loss": 0.60402207, "memory(GiB)": 34.88, "step": 37640, "train_speed(iter/s)": 0.414701 }, { "acc": 0.88208618, "epoch": 1.0192781523298946, "grad_norm": 9.904475212097168, "learning_rate": 8.896389501694984e-06, "loss": 0.67360277, "memory(GiB)": 34.88, "step": 37645, "train_speed(iter/s)": 0.414703 }, { "acc": 0.87383595, "epoch": 1.0194135326131102, "grad_norm": 9.316851615905762, "learning_rate": 8.896038795412078e-06, "loss": 0.67017326, "memory(GiB)": 34.88, "step": 37650, "train_speed(iter/s)": 0.414706 }, { "acc": 0.86871176, "epoch": 1.0195489128963258, "grad_norm": 10.710481643676758, "learning_rate": 8.895688040329892e-06, "loss": 0.74260387, "memory(GiB)": 34.88, "step": 37655, "train_speed(iter/s)": 0.414708 }, { "acc": 0.89553337, "epoch": 1.0196842931795413, "grad_norm": 8.515676498413086, "learning_rate": 8.895337236452824e-06, "loss": 0.61313043, "memory(GiB)": 34.88, "step": 37660, "train_speed(iter/s)": 0.414711 }, { "acc": 0.88285007, "epoch": 1.019819673462757, "grad_norm": 4.4361443519592285, "learning_rate": 8.894986383785265e-06, "loss": 0.54093409, "memory(GiB)": 34.88, "step": 37665, "train_speed(iter/s)": 0.414714 }, { "acc": 0.88717175, "epoch": 1.0199550537459725, "grad_norm": 12.017754554748535, "learning_rate": 8.894635482331611e-06, "loss": 0.52106175, "memory(GiB)": 34.88, "step": 37670, "train_speed(iter/s)": 0.414716 }, { "acc": 0.88641901, "epoch": 1.020090434029188, "grad_norm": 11.407780647277832, "learning_rate": 8.894284532096258e-06, "loss": 0.67908425, "memory(GiB)": 34.88, "step": 37675, "train_speed(iter/s)": 0.414719 }, { "acc": 0.86874371, "epoch": 1.0202258143124034, "grad_norm": 4.742300510406494, "learning_rate": 8.893933533083604e-06, "loss": 0.72160912, "memory(GiB)": 34.88, "step": 37680, "train_speed(iter/s)": 0.414721 }, { "acc": 0.88407803, "epoch": 1.020361194595619, "grad_norm": 8.149471282958984, "learning_rate": 8.893582485298045e-06, "loss": 0.57106514, "memory(GiB)": 34.88, "step": 37685, "train_speed(iter/s)": 0.414724 }, { "acc": 0.87850351, "epoch": 1.0204965748788346, "grad_norm": 10.86622142791748, "learning_rate": 8.893231388743975e-06, "loss": 0.62765126, "memory(GiB)": 34.88, "step": 37690, "train_speed(iter/s)": 0.414726 }, { "acc": 0.89982681, "epoch": 1.0206319551620502, "grad_norm": 8.022775650024414, "learning_rate": 8.892880243425798e-06, "loss": 0.4990449, "memory(GiB)": 34.88, "step": 37695, "train_speed(iter/s)": 0.414728 }, { "acc": 0.87647133, "epoch": 1.0207673354452658, "grad_norm": 8.376112937927246, "learning_rate": 8.892529049347908e-06, "loss": 0.72741313, "memory(GiB)": 34.88, "step": 37700, "train_speed(iter/s)": 0.414731 }, { "acc": 0.87904644, "epoch": 1.0209027157284813, "grad_norm": 9.4972562789917, "learning_rate": 8.892177806514708e-06, "loss": 0.73768997, "memory(GiB)": 34.88, "step": 37705, "train_speed(iter/s)": 0.414734 }, { "acc": 0.89446011, "epoch": 1.021038096011697, "grad_norm": 6.627746105194092, "learning_rate": 8.891826514930593e-06, "loss": 0.50510812, "memory(GiB)": 34.88, "step": 37710, "train_speed(iter/s)": 0.414736 }, { "acc": 0.89757061, "epoch": 1.0211734762949125, "grad_norm": 11.542776107788086, "learning_rate": 8.891475174599972e-06, "loss": 0.50366368, "memory(GiB)": 34.88, "step": 37715, "train_speed(iter/s)": 0.414738 }, { "acc": 0.86166716, "epoch": 1.0213088565781279, "grad_norm": 33.55169677734375, "learning_rate": 8.891123785527237e-06, "loss": 0.7498558, "memory(GiB)": 34.88, "step": 37720, "train_speed(iter/s)": 0.414741 }, { "acc": 0.89244823, "epoch": 1.0214442368613434, "grad_norm": 8.277615547180176, "learning_rate": 8.890772347716796e-06, "loss": 0.51816745, "memory(GiB)": 34.88, "step": 37725, "train_speed(iter/s)": 0.414743 }, { "acc": 0.87220249, "epoch": 1.021579617144559, "grad_norm": 6.358576774597168, "learning_rate": 8.89042086117305e-06, "loss": 0.68702269, "memory(GiB)": 34.88, "step": 37730, "train_speed(iter/s)": 0.414745 }, { "acc": 0.86291075, "epoch": 1.0217149974277746, "grad_norm": 8.451841354370117, "learning_rate": 8.890069325900399e-06, "loss": 0.75733709, "memory(GiB)": 34.88, "step": 37735, "train_speed(iter/s)": 0.414748 }, { "acc": 0.90945187, "epoch": 1.0218503777109902, "grad_norm": 4.170707702636719, "learning_rate": 8.88971774190325e-06, "loss": 0.404352, "memory(GiB)": 34.88, "step": 37740, "train_speed(iter/s)": 0.41475 }, { "acc": 0.91591721, "epoch": 1.0219857579942058, "grad_norm": 7.740670204162598, "learning_rate": 8.889366109186008e-06, "loss": 0.41326056, "memory(GiB)": 34.88, "step": 37745, "train_speed(iter/s)": 0.414753 }, { "acc": 0.88843727, "epoch": 1.0221211382774213, "grad_norm": 7.203803539276123, "learning_rate": 8.889014427753076e-06, "loss": 0.48841248, "memory(GiB)": 34.88, "step": 37750, "train_speed(iter/s)": 0.414755 }, { "acc": 0.88734388, "epoch": 1.022256518560637, "grad_norm": 8.676729202270508, "learning_rate": 8.888662697608859e-06, "loss": 0.57364264, "memory(GiB)": 34.88, "step": 37755, "train_speed(iter/s)": 0.414758 }, { "acc": 0.88779163, "epoch": 1.0223918988438523, "grad_norm": 7.458384037017822, "learning_rate": 8.888310918757764e-06, "loss": 0.56153703, "memory(GiB)": 34.88, "step": 37760, "train_speed(iter/s)": 0.41476 }, { "acc": 0.88529978, "epoch": 1.0225272791270679, "grad_norm": 9.461971282958984, "learning_rate": 8.887959091204198e-06, "loss": 0.58114605, "memory(GiB)": 34.88, "step": 37765, "train_speed(iter/s)": 0.414762 }, { "acc": 0.89383593, "epoch": 1.0226626594102834, "grad_norm": 7.535345554351807, "learning_rate": 8.887607214952568e-06, "loss": 0.51706705, "memory(GiB)": 34.88, "step": 37770, "train_speed(iter/s)": 0.414765 }, { "acc": 0.87681332, "epoch": 1.022798039693499, "grad_norm": 10.099774360656738, "learning_rate": 8.887255290007283e-06, "loss": 0.69283953, "memory(GiB)": 34.88, "step": 37775, "train_speed(iter/s)": 0.414767 }, { "acc": 0.88877058, "epoch": 1.0229334199767146, "grad_norm": 6.920489311218262, "learning_rate": 8.88690331637275e-06, "loss": 0.54455738, "memory(GiB)": 34.88, "step": 37780, "train_speed(iter/s)": 0.41477 }, { "acc": 0.88950624, "epoch": 1.0230688002599302, "grad_norm": 9.330635070800781, "learning_rate": 8.886551294053378e-06, "loss": 0.48277397, "memory(GiB)": 34.88, "step": 37785, "train_speed(iter/s)": 0.414772 }, { "acc": 0.87663937, "epoch": 1.0232041805431458, "grad_norm": 7.3126325607299805, "learning_rate": 8.886199223053577e-06, "loss": 0.57403941, "memory(GiB)": 34.88, "step": 37790, "train_speed(iter/s)": 0.414775 }, { "acc": 0.87604446, "epoch": 1.0233395608263613, "grad_norm": 5.8793792724609375, "learning_rate": 8.88584710337776e-06, "loss": 0.61576977, "memory(GiB)": 34.88, "step": 37795, "train_speed(iter/s)": 0.414777 }, { "acc": 0.87582006, "epoch": 1.0234749411095767, "grad_norm": 13.478242874145508, "learning_rate": 8.885494935030333e-06, "loss": 0.66473522, "memory(GiB)": 34.88, "step": 37800, "train_speed(iter/s)": 0.414779 }, { "acc": 0.8832077, "epoch": 1.0236103213927923, "grad_norm": 5.839123725891113, "learning_rate": 8.885142718015713e-06, "loss": 0.58234549, "memory(GiB)": 34.88, "step": 37805, "train_speed(iter/s)": 0.414782 }, { "acc": 0.89781551, "epoch": 1.0237457016760079, "grad_norm": 3.155548095703125, "learning_rate": 8.884790452338309e-06, "loss": 0.48673735, "memory(GiB)": 34.88, "step": 37810, "train_speed(iter/s)": 0.414784 }, { "acc": 0.86114807, "epoch": 1.0238810819592234, "grad_norm": 7.35123872756958, "learning_rate": 8.884438138002533e-06, "loss": 0.69685402, "memory(GiB)": 34.88, "step": 37815, "train_speed(iter/s)": 0.414787 }, { "acc": 0.88552589, "epoch": 1.024016462242439, "grad_norm": 8.069001197814941, "learning_rate": 8.884085775012801e-06, "loss": 0.65664587, "memory(GiB)": 34.88, "step": 37820, "train_speed(iter/s)": 0.414789 }, { "acc": 0.89851646, "epoch": 1.0241518425256546, "grad_norm": 8.479427337646484, "learning_rate": 8.883733363373527e-06, "loss": 0.53803601, "memory(GiB)": 34.88, "step": 37825, "train_speed(iter/s)": 0.414792 }, { "acc": 0.87803698, "epoch": 1.0242872228088702, "grad_norm": 8.70118522644043, "learning_rate": 8.883380903089122e-06, "loss": 0.65525618, "memory(GiB)": 34.88, "step": 37830, "train_speed(iter/s)": 0.414794 }, { "acc": 0.8673048, "epoch": 1.0244226030920858, "grad_norm": 6.0209527015686035, "learning_rate": 8.883028394164007e-06, "loss": 0.82282181, "memory(GiB)": 34.88, "step": 37835, "train_speed(iter/s)": 0.414796 }, { "acc": 0.87628326, "epoch": 1.0245579833753011, "grad_norm": 6.475090026855469, "learning_rate": 8.882675836602592e-06, "loss": 0.67696877, "memory(GiB)": 34.88, "step": 37840, "train_speed(iter/s)": 0.414799 }, { "acc": 0.8699461, "epoch": 1.0246933636585167, "grad_norm": 12.876474380493164, "learning_rate": 8.882323230409297e-06, "loss": 0.79141464, "memory(GiB)": 34.88, "step": 37845, "train_speed(iter/s)": 0.414801 }, { "acc": 0.88086929, "epoch": 1.0248287439417323, "grad_norm": 24.745019912719727, "learning_rate": 8.88197057558854e-06, "loss": 0.6999835, "memory(GiB)": 34.88, "step": 37850, "train_speed(iter/s)": 0.414803 }, { "acc": 0.87613707, "epoch": 1.0249641242249479, "grad_norm": 11.723231315612793, "learning_rate": 8.881617872144733e-06, "loss": 0.59724264, "memory(GiB)": 34.88, "step": 37855, "train_speed(iter/s)": 0.414805 }, { "acc": 0.90785065, "epoch": 1.0250995045081635, "grad_norm": 13.436180114746094, "learning_rate": 8.881265120082302e-06, "loss": 0.52822642, "memory(GiB)": 34.88, "step": 37860, "train_speed(iter/s)": 0.414808 }, { "acc": 0.88917618, "epoch": 1.025234884791379, "grad_norm": 4.8916521072387695, "learning_rate": 8.880912319405661e-06, "loss": 0.57951775, "memory(GiB)": 34.88, "step": 37865, "train_speed(iter/s)": 0.41481 }, { "acc": 0.89626617, "epoch": 1.0253702650745946, "grad_norm": 9.000588417053223, "learning_rate": 8.88055947011923e-06, "loss": 0.57249942, "memory(GiB)": 34.88, "step": 37870, "train_speed(iter/s)": 0.414812 }, { "acc": 0.87302055, "epoch": 1.0255056453578102, "grad_norm": 7.4251604080200195, "learning_rate": 8.88020657222743e-06, "loss": 0.58767195, "memory(GiB)": 34.88, "step": 37875, "train_speed(iter/s)": 0.414815 }, { "acc": 0.86284637, "epoch": 1.0256410256410255, "grad_norm": 13.878453254699707, "learning_rate": 8.879853625734683e-06, "loss": 0.77452984, "memory(GiB)": 34.88, "step": 37880, "train_speed(iter/s)": 0.414817 }, { "acc": 0.90376034, "epoch": 1.0257764059242411, "grad_norm": 6.373943328857422, "learning_rate": 8.879500630645408e-06, "loss": 0.50984879, "memory(GiB)": 34.88, "step": 37885, "train_speed(iter/s)": 0.41482 }, { "acc": 0.88936863, "epoch": 1.0259117862074567, "grad_norm": 6.898395538330078, "learning_rate": 8.879147586964028e-06, "loss": 0.57638402, "memory(GiB)": 34.88, "step": 37890, "train_speed(iter/s)": 0.414823 }, { "acc": 0.87599049, "epoch": 1.0260471664906723, "grad_norm": 4.175314426422119, "learning_rate": 8.878794494694965e-06, "loss": 0.66496358, "memory(GiB)": 34.88, "step": 37895, "train_speed(iter/s)": 0.414825 }, { "acc": 0.88246241, "epoch": 1.0261825467738879, "grad_norm": 9.574873924255371, "learning_rate": 8.878441353842644e-06, "loss": 0.60619135, "memory(GiB)": 34.88, "step": 37900, "train_speed(iter/s)": 0.414827 }, { "acc": 0.88603363, "epoch": 1.0263179270571035, "grad_norm": 5.015311241149902, "learning_rate": 8.878088164411487e-06, "loss": 0.62657499, "memory(GiB)": 34.88, "step": 37905, "train_speed(iter/s)": 0.414829 }, { "acc": 0.88280697, "epoch": 1.026453307340319, "grad_norm": 8.673966407775879, "learning_rate": 8.877734926405917e-06, "loss": 0.56112547, "memory(GiB)": 34.88, "step": 37910, "train_speed(iter/s)": 0.414832 }, { "acc": 0.89346619, "epoch": 1.0265886876235346, "grad_norm": 6.039122104644775, "learning_rate": 8.87738163983036e-06, "loss": 0.56484938, "memory(GiB)": 34.88, "step": 37915, "train_speed(iter/s)": 0.414834 }, { "acc": 0.88750916, "epoch": 1.02672406790675, "grad_norm": 10.745894432067871, "learning_rate": 8.877028304689245e-06, "loss": 0.63118343, "memory(GiB)": 34.88, "step": 37920, "train_speed(iter/s)": 0.414837 }, { "acc": 0.90766983, "epoch": 1.0268594481899656, "grad_norm": 5.483631610870361, "learning_rate": 8.876674920986996e-06, "loss": 0.44721045, "memory(GiB)": 34.88, "step": 37925, "train_speed(iter/s)": 0.414839 }, { "acc": 0.89625998, "epoch": 1.0269948284731811, "grad_norm": 6.327512741088867, "learning_rate": 8.876321488728038e-06, "loss": 0.5592474, "memory(GiB)": 34.88, "step": 37930, "train_speed(iter/s)": 0.414842 }, { "acc": 0.86978741, "epoch": 1.0271302087563967, "grad_norm": 16.51812744140625, "learning_rate": 8.8759680079168e-06, "loss": 0.66494718, "memory(GiB)": 34.88, "step": 37935, "train_speed(iter/s)": 0.414844 }, { "acc": 0.88280029, "epoch": 1.0272655890396123, "grad_norm": 10.59117603302002, "learning_rate": 8.87561447855771e-06, "loss": 0.56433506, "memory(GiB)": 34.88, "step": 37940, "train_speed(iter/s)": 0.414847 }, { "acc": 0.86737289, "epoch": 1.0274009693228279, "grad_norm": 6.9207024574279785, "learning_rate": 8.875260900655197e-06, "loss": 0.6704442, "memory(GiB)": 34.88, "step": 37945, "train_speed(iter/s)": 0.414849 }, { "acc": 0.9097599, "epoch": 1.0275363496060435, "grad_norm": 5.275101661682129, "learning_rate": 8.87490727421369e-06, "loss": 0.42895303, "memory(GiB)": 34.88, "step": 37950, "train_speed(iter/s)": 0.414852 }, { "acc": 0.85963469, "epoch": 1.027671729889259, "grad_norm": 7.441474437713623, "learning_rate": 8.874553599237619e-06, "loss": 0.79356337, "memory(GiB)": 34.88, "step": 37955, "train_speed(iter/s)": 0.414855 }, { "acc": 0.88717022, "epoch": 1.0278071101724744, "grad_norm": 6.046213626861572, "learning_rate": 8.874199875731412e-06, "loss": 0.59914379, "memory(GiB)": 34.88, "step": 37960, "train_speed(iter/s)": 0.414857 }, { "acc": 0.90744333, "epoch": 1.02794249045569, "grad_norm": 9.266548156738281, "learning_rate": 8.873846103699503e-06, "loss": 0.49820595, "memory(GiB)": 34.88, "step": 37965, "train_speed(iter/s)": 0.41486 }, { "acc": 0.86352558, "epoch": 1.0280778707389056, "grad_norm": 6.901295185089111, "learning_rate": 8.873492283146325e-06, "loss": 0.71982431, "memory(GiB)": 34.88, "step": 37970, "train_speed(iter/s)": 0.414862 }, { "acc": 0.87894564, "epoch": 1.0282132510221211, "grad_norm": 11.783746719360352, "learning_rate": 8.873138414076307e-06, "loss": 0.67014999, "memory(GiB)": 34.88, "step": 37975, "train_speed(iter/s)": 0.414864 }, { "acc": 0.88136253, "epoch": 1.0283486313053367, "grad_norm": 6.46390962600708, "learning_rate": 8.872784496493882e-06, "loss": 0.56852274, "memory(GiB)": 34.88, "step": 37980, "train_speed(iter/s)": 0.414866 }, { "acc": 0.86063089, "epoch": 1.0284840115885523, "grad_norm": 8.903071403503418, "learning_rate": 8.872430530403487e-06, "loss": 0.78714561, "memory(GiB)": 34.88, "step": 37985, "train_speed(iter/s)": 0.414869 }, { "acc": 0.89315615, "epoch": 1.0286193918717679, "grad_norm": 9.019078254699707, "learning_rate": 8.872076515809552e-06, "loss": 0.51976929, "memory(GiB)": 34.88, "step": 37990, "train_speed(iter/s)": 0.414872 }, { "acc": 0.86700506, "epoch": 1.0287547721549835, "grad_norm": 14.059484481811523, "learning_rate": 8.871722452716512e-06, "loss": 0.73025212, "memory(GiB)": 34.88, "step": 37995, "train_speed(iter/s)": 0.414874 }, { "acc": 0.88031731, "epoch": 1.0288901524381988, "grad_norm": 9.82673168182373, "learning_rate": 8.871368341128808e-06, "loss": 0.64806833, "memory(GiB)": 34.88, "step": 38000, "train_speed(iter/s)": 0.414876 }, { "acc": 0.87281122, "epoch": 1.0290255327214144, "grad_norm": 11.121286392211914, "learning_rate": 8.871014181050869e-06, "loss": 0.67043986, "memory(GiB)": 34.88, "step": 38005, "train_speed(iter/s)": 0.414879 }, { "acc": 0.87135115, "epoch": 1.02916091300463, "grad_norm": 11.063029289245605, "learning_rate": 8.870659972487135e-06, "loss": 0.66229587, "memory(GiB)": 34.88, "step": 38010, "train_speed(iter/s)": 0.414881 }, { "acc": 0.89478092, "epoch": 1.0292962932878456, "grad_norm": 4.56942081451416, "learning_rate": 8.870305715442043e-06, "loss": 0.58259521, "memory(GiB)": 34.88, "step": 38015, "train_speed(iter/s)": 0.414883 }, { "acc": 0.8862936, "epoch": 1.0294316735710611, "grad_norm": 12.031976699829102, "learning_rate": 8.869951409920029e-06, "loss": 0.63555984, "memory(GiB)": 34.88, "step": 38020, "train_speed(iter/s)": 0.414886 }, { "acc": 0.87187233, "epoch": 1.0295670538542767, "grad_norm": 12.351286888122559, "learning_rate": 8.869597055925532e-06, "loss": 0.70141907, "memory(GiB)": 34.88, "step": 38025, "train_speed(iter/s)": 0.414888 }, { "acc": 0.88043213, "epoch": 1.0297024341374923, "grad_norm": 9.410386085510254, "learning_rate": 8.869242653462994e-06, "loss": 0.62800808, "memory(GiB)": 34.88, "step": 38030, "train_speed(iter/s)": 0.414891 }, { "acc": 0.88464203, "epoch": 1.0298378144207079, "grad_norm": 8.67508602142334, "learning_rate": 8.868888202536851e-06, "loss": 0.52840395, "memory(GiB)": 34.88, "step": 38035, "train_speed(iter/s)": 0.414893 }, { "acc": 0.91054983, "epoch": 1.0299731947039232, "grad_norm": 6.603875637054443, "learning_rate": 8.868533703151543e-06, "loss": 0.50619774, "memory(GiB)": 34.88, "step": 38040, "train_speed(iter/s)": 0.414896 }, { "acc": 0.89224319, "epoch": 1.0301085749871388, "grad_norm": 6.763006210327148, "learning_rate": 8.868179155311514e-06, "loss": 0.52311125, "memory(GiB)": 34.88, "step": 38045, "train_speed(iter/s)": 0.414898 }, { "acc": 0.89132671, "epoch": 1.0302439552703544, "grad_norm": 6.437229633331299, "learning_rate": 8.867824559021205e-06, "loss": 0.49914236, "memory(GiB)": 34.88, "step": 38050, "train_speed(iter/s)": 0.4149 }, { "acc": 0.89595413, "epoch": 1.03037933555357, "grad_norm": 9.835874557495117, "learning_rate": 8.867469914285056e-06, "loss": 0.56047649, "memory(GiB)": 34.88, "step": 38055, "train_speed(iter/s)": 0.414903 }, { "acc": 0.88126822, "epoch": 1.0305147158367856, "grad_norm": 13.399887084960938, "learning_rate": 8.867115221107507e-06, "loss": 0.70246253, "memory(GiB)": 34.88, "step": 38060, "train_speed(iter/s)": 0.414905 }, { "acc": 0.88476124, "epoch": 1.0306500961200011, "grad_norm": 7.1369404792785645, "learning_rate": 8.866760479493007e-06, "loss": 0.65470037, "memory(GiB)": 34.88, "step": 38065, "train_speed(iter/s)": 0.414908 }, { "acc": 0.87926569, "epoch": 1.0307854764032167, "grad_norm": 7.4752912521362305, "learning_rate": 8.866405689445997e-06, "loss": 0.66851616, "memory(GiB)": 34.88, "step": 38070, "train_speed(iter/s)": 0.414911 }, { "acc": 0.86222334, "epoch": 1.0309208566864323, "grad_norm": 12.242349624633789, "learning_rate": 8.866050850970922e-06, "loss": 0.75466413, "memory(GiB)": 34.88, "step": 38075, "train_speed(iter/s)": 0.414913 }, { "acc": 0.8920929, "epoch": 1.0310562369696477, "grad_norm": 6.741575717926025, "learning_rate": 8.865695964072226e-06, "loss": 0.54384556, "memory(GiB)": 34.88, "step": 38080, "train_speed(iter/s)": 0.414916 }, { "acc": 0.88132238, "epoch": 1.0311916172528632, "grad_norm": 10.04307746887207, "learning_rate": 8.865341028754355e-06, "loss": 0.59031405, "memory(GiB)": 34.88, "step": 38085, "train_speed(iter/s)": 0.414918 }, { "acc": 0.90019245, "epoch": 1.0313269975360788, "grad_norm": 6.530912399291992, "learning_rate": 8.864986045021757e-06, "loss": 0.43819027, "memory(GiB)": 34.88, "step": 38090, "train_speed(iter/s)": 0.414921 }, { "acc": 0.88152142, "epoch": 1.0314623778192944, "grad_norm": 13.448836326599121, "learning_rate": 8.864631012878878e-06, "loss": 0.6456655, "memory(GiB)": 34.88, "step": 38095, "train_speed(iter/s)": 0.414922 }, { "acc": 0.88052235, "epoch": 1.03159775810251, "grad_norm": 17.965412139892578, "learning_rate": 8.864275932330164e-06, "loss": 0.61031394, "memory(GiB)": 34.88, "step": 38100, "train_speed(iter/s)": 0.414924 }, { "acc": 0.90382137, "epoch": 1.0317331383857256, "grad_norm": 10.077263832092285, "learning_rate": 8.863920803380064e-06, "loss": 0.48283391, "memory(GiB)": 34.88, "step": 38105, "train_speed(iter/s)": 0.414927 }, { "acc": 0.8946537, "epoch": 1.0318685186689411, "grad_norm": 6.29707145690918, "learning_rate": 8.863565626033027e-06, "loss": 0.49909763, "memory(GiB)": 34.88, "step": 38110, "train_speed(iter/s)": 0.414929 }, { "acc": 0.87973251, "epoch": 1.0320038989521567, "grad_norm": 16.82526206970215, "learning_rate": 8.863210400293504e-06, "loss": 0.68023233, "memory(GiB)": 34.88, "step": 38115, "train_speed(iter/s)": 0.414932 }, { "acc": 0.87460098, "epoch": 1.032139279235372, "grad_norm": 6.665896415710449, "learning_rate": 8.86285512616594e-06, "loss": 0.69906559, "memory(GiB)": 34.88, "step": 38120, "train_speed(iter/s)": 0.414934 }, { "acc": 0.89236183, "epoch": 1.0322746595185877, "grad_norm": 5.879065036773682, "learning_rate": 8.86249980365479e-06, "loss": 0.51976008, "memory(GiB)": 34.88, "step": 38125, "train_speed(iter/s)": 0.414937 }, { "acc": 0.88951368, "epoch": 1.0324100398018032, "grad_norm": 12.609177589416504, "learning_rate": 8.862144432764504e-06, "loss": 0.50992546, "memory(GiB)": 34.88, "step": 38130, "train_speed(iter/s)": 0.414939 }, { "acc": 0.85954885, "epoch": 1.0325454200850188, "grad_norm": 16.001405715942383, "learning_rate": 8.861789013499534e-06, "loss": 0.79364057, "memory(GiB)": 34.88, "step": 38135, "train_speed(iter/s)": 0.414942 }, { "acc": 0.8840621, "epoch": 1.0326808003682344, "grad_norm": 13.96235179901123, "learning_rate": 8.86143354586433e-06, "loss": 0.56508131, "memory(GiB)": 34.88, "step": 38140, "train_speed(iter/s)": 0.414944 }, { "acc": 0.8732914, "epoch": 1.03281618065145, "grad_norm": 12.842829704284668, "learning_rate": 8.861078029863346e-06, "loss": 0.7571506, "memory(GiB)": 34.88, "step": 38145, "train_speed(iter/s)": 0.414946 }, { "acc": 0.87369947, "epoch": 1.0329515609346656, "grad_norm": 9.060312271118164, "learning_rate": 8.860722465501039e-06, "loss": 0.674124, "memory(GiB)": 34.88, "step": 38150, "train_speed(iter/s)": 0.414948 }, { "acc": 0.90270863, "epoch": 1.033086941217881, "grad_norm": 6.629187107086182, "learning_rate": 8.860366852781858e-06, "loss": 0.46427155, "memory(GiB)": 34.88, "step": 38155, "train_speed(iter/s)": 0.414951 }, { "acc": 0.89480085, "epoch": 1.0332223215010965, "grad_norm": 4.985917568206787, "learning_rate": 8.860011191710261e-06, "loss": 0.48062491, "memory(GiB)": 34.88, "step": 38160, "train_speed(iter/s)": 0.414953 }, { "acc": 0.88935833, "epoch": 1.033357701784312, "grad_norm": 8.470671653747559, "learning_rate": 8.859655482290702e-06, "loss": 0.57074089, "memory(GiB)": 34.88, "step": 38165, "train_speed(iter/s)": 0.414956 }, { "acc": 0.86813898, "epoch": 1.0334930820675277, "grad_norm": 6.010828018188477, "learning_rate": 8.859299724527637e-06, "loss": 0.62528019, "memory(GiB)": 34.88, "step": 38170, "train_speed(iter/s)": 0.414958 }, { "acc": 0.87635889, "epoch": 1.0336284623507432, "grad_norm": 9.846138000488281, "learning_rate": 8.858943918425523e-06, "loss": 0.626752, "memory(GiB)": 34.88, "step": 38175, "train_speed(iter/s)": 0.41496 }, { "acc": 0.91396637, "epoch": 1.0337638426339588, "grad_norm": 5.984855651855469, "learning_rate": 8.858588063988818e-06, "loss": 0.45482912, "memory(GiB)": 34.88, "step": 38180, "train_speed(iter/s)": 0.414963 }, { "acc": 0.88951559, "epoch": 1.0338992229171744, "grad_norm": 4.592411994934082, "learning_rate": 8.858232161221978e-06, "loss": 0.64431095, "memory(GiB)": 34.88, "step": 38185, "train_speed(iter/s)": 0.414965 }, { "acc": 0.90375309, "epoch": 1.03403460320039, "grad_norm": 2.52296781539917, "learning_rate": 8.857876210129463e-06, "loss": 0.46090403, "memory(GiB)": 34.88, "step": 38190, "train_speed(iter/s)": 0.414968 }, { "acc": 0.87547331, "epoch": 1.0341699834836056, "grad_norm": 12.214597702026367, "learning_rate": 8.857520210715732e-06, "loss": 0.66267281, "memory(GiB)": 34.88, "step": 38195, "train_speed(iter/s)": 0.41497 }, { "acc": 0.85855875, "epoch": 1.034305363766821, "grad_norm": 16.4928035736084, "learning_rate": 8.857164162985242e-06, "loss": 0.79516206, "memory(GiB)": 34.88, "step": 38200, "train_speed(iter/s)": 0.414972 }, { "acc": 0.86313219, "epoch": 1.0344407440500365, "grad_norm": 10.435916900634766, "learning_rate": 8.856808066942455e-06, "loss": 0.7033309, "memory(GiB)": 34.88, "step": 38205, "train_speed(iter/s)": 0.414975 }, { "acc": 0.88098717, "epoch": 1.034576124333252, "grad_norm": 7.81487512588501, "learning_rate": 8.856451922591832e-06, "loss": 0.62732821, "memory(GiB)": 34.88, "step": 38210, "train_speed(iter/s)": 0.414977 }, { "acc": 0.88981543, "epoch": 1.0347115046164677, "grad_norm": 10.162261962890625, "learning_rate": 8.856095729937836e-06, "loss": 0.60174828, "memory(GiB)": 34.88, "step": 38215, "train_speed(iter/s)": 0.41498 }, { "acc": 0.87962246, "epoch": 1.0348468848996832, "grad_norm": 11.907100677490234, "learning_rate": 8.855739488984926e-06, "loss": 0.57914963, "memory(GiB)": 34.88, "step": 38220, "train_speed(iter/s)": 0.414983 }, { "acc": 0.89185972, "epoch": 1.0349822651828988, "grad_norm": 9.866690635681152, "learning_rate": 8.855383199737565e-06, "loss": 0.53844366, "memory(GiB)": 34.88, "step": 38225, "train_speed(iter/s)": 0.414985 }, { "acc": 0.88611994, "epoch": 1.0351176454661144, "grad_norm": 14.129805564880371, "learning_rate": 8.85502686220022e-06, "loss": 0.60186243, "memory(GiB)": 34.88, "step": 38230, "train_speed(iter/s)": 0.414988 }, { "acc": 0.86391087, "epoch": 1.0352530257493298, "grad_norm": 10.200177192687988, "learning_rate": 8.854670476377348e-06, "loss": 0.66166363, "memory(GiB)": 34.88, "step": 38235, "train_speed(iter/s)": 0.41499 }, { "acc": 0.88841267, "epoch": 1.0353884060325453, "grad_norm": 9.88891315460205, "learning_rate": 8.85431404227342e-06, "loss": 0.59103727, "memory(GiB)": 34.88, "step": 38240, "train_speed(iter/s)": 0.414992 }, { "acc": 0.89116154, "epoch": 1.035523786315761, "grad_norm": 7.575701713562012, "learning_rate": 8.853957559892897e-06, "loss": 0.53307762, "memory(GiB)": 34.88, "step": 38245, "train_speed(iter/s)": 0.414994 }, { "acc": 0.83121548, "epoch": 1.0356591665989765, "grad_norm": 12.394176483154297, "learning_rate": 8.853601029240247e-06, "loss": 1.01021595, "memory(GiB)": 34.88, "step": 38250, "train_speed(iter/s)": 0.414997 }, { "acc": 0.89723396, "epoch": 1.035794546882192, "grad_norm": 10.060104370117188, "learning_rate": 8.853244450319934e-06, "loss": 0.50085649, "memory(GiB)": 34.88, "step": 38255, "train_speed(iter/s)": 0.414999 }, { "acc": 0.86452675, "epoch": 1.0359299271654077, "grad_norm": 12.858074188232422, "learning_rate": 8.852887823136429e-06, "loss": 0.66392646, "memory(GiB)": 34.88, "step": 38260, "train_speed(iter/s)": 0.415002 }, { "acc": 0.89711905, "epoch": 1.0360653074486232, "grad_norm": 7.297204494476318, "learning_rate": 8.852531147694194e-06, "loss": 0.49605169, "memory(GiB)": 34.88, "step": 38265, "train_speed(iter/s)": 0.415004 }, { "acc": 0.8897459, "epoch": 1.0362006877318388, "grad_norm": 8.995878219604492, "learning_rate": 8.8521744239977e-06, "loss": 0.6438118, "memory(GiB)": 34.88, "step": 38270, "train_speed(iter/s)": 0.415007 }, { "acc": 0.87603331, "epoch": 1.0363360680150544, "grad_norm": 5.420494079589844, "learning_rate": 8.851817652051416e-06, "loss": 0.54290476, "memory(GiB)": 34.88, "step": 38275, "train_speed(iter/s)": 0.415009 }, { "acc": 0.90807962, "epoch": 1.0364714482982698, "grad_norm": 8.10566234588623, "learning_rate": 8.851460831859811e-06, "loss": 0.58302917, "memory(GiB)": 34.88, "step": 38280, "train_speed(iter/s)": 0.415011 }, { "acc": 0.88845539, "epoch": 1.0366068285814853, "grad_norm": 29.8663330078125, "learning_rate": 8.851103963427352e-06, "loss": 0.55334592, "memory(GiB)": 34.88, "step": 38285, "train_speed(iter/s)": 0.415013 }, { "acc": 0.90727644, "epoch": 1.036742208864701, "grad_norm": 6.543661117553711, "learning_rate": 8.850747046758516e-06, "loss": 0.49326124, "memory(GiB)": 34.88, "step": 38290, "train_speed(iter/s)": 0.415016 }, { "acc": 0.88514833, "epoch": 1.0368775891479165, "grad_norm": 7.893460273742676, "learning_rate": 8.850390081857767e-06, "loss": 0.54890323, "memory(GiB)": 34.88, "step": 38295, "train_speed(iter/s)": 0.415018 }, { "acc": 0.87180758, "epoch": 1.037012969431132, "grad_norm": 9.209010124206543, "learning_rate": 8.85003306872958e-06, "loss": 0.67581844, "memory(GiB)": 34.88, "step": 38300, "train_speed(iter/s)": 0.41502 }, { "acc": 0.88590889, "epoch": 1.0371483497143477, "grad_norm": 8.520352363586426, "learning_rate": 8.849676007378426e-06, "loss": 0.69546094, "memory(GiB)": 34.88, "step": 38305, "train_speed(iter/s)": 0.415023 }, { "acc": 0.86630421, "epoch": 1.0372837299975632, "grad_norm": 7.977405548095703, "learning_rate": 8.849318897808782e-06, "loss": 0.75505295, "memory(GiB)": 34.88, "step": 38310, "train_speed(iter/s)": 0.415025 }, { "acc": 0.88037024, "epoch": 1.0374191102807786, "grad_norm": 8.073823928833008, "learning_rate": 8.848961740025116e-06, "loss": 0.59725981, "memory(GiB)": 34.88, "step": 38315, "train_speed(iter/s)": 0.415027 }, { "acc": 0.90095987, "epoch": 1.0375544905639942, "grad_norm": 23.39939308166504, "learning_rate": 8.848604534031904e-06, "loss": 0.48956966, "memory(GiB)": 34.88, "step": 38320, "train_speed(iter/s)": 0.41503 }, { "acc": 0.89846745, "epoch": 1.0376898708472098, "grad_norm": 8.6735200881958, "learning_rate": 8.848247279833624e-06, "loss": 0.529566, "memory(GiB)": 34.88, "step": 38325, "train_speed(iter/s)": 0.415032 }, { "acc": 0.89474497, "epoch": 1.0378252511304253, "grad_norm": 9.362578392028809, "learning_rate": 8.847889977434747e-06, "loss": 0.51957164, "memory(GiB)": 34.88, "step": 38330, "train_speed(iter/s)": 0.415035 }, { "acc": 0.87362232, "epoch": 1.037960631413641, "grad_norm": 9.359681129455566, "learning_rate": 8.84753262683975e-06, "loss": 0.61788111, "memory(GiB)": 34.88, "step": 38335, "train_speed(iter/s)": 0.415037 }, { "acc": 0.86028051, "epoch": 1.0380960116968565, "grad_norm": 19.73184585571289, "learning_rate": 8.847175228053108e-06, "loss": 0.72856417, "memory(GiB)": 34.88, "step": 38340, "train_speed(iter/s)": 0.415039 }, { "acc": 0.86972427, "epoch": 1.038231391980072, "grad_norm": 9.702607154846191, "learning_rate": 8.846817781079304e-06, "loss": 0.6989665, "memory(GiB)": 34.88, "step": 38345, "train_speed(iter/s)": 0.415041 }, { "acc": 0.91904116, "epoch": 1.0383667722632877, "grad_norm": 7.885833263397217, "learning_rate": 8.84646028592281e-06, "loss": 0.39727495, "memory(GiB)": 34.88, "step": 38350, "train_speed(iter/s)": 0.415043 }, { "acc": 0.89974728, "epoch": 1.038502152546503, "grad_norm": 8.623080253601074, "learning_rate": 8.846102742588107e-06, "loss": 0.53818531, "memory(GiB)": 34.88, "step": 38355, "train_speed(iter/s)": 0.415046 }, { "acc": 0.86300526, "epoch": 1.0386375328297186, "grad_norm": 23.21466827392578, "learning_rate": 8.845745151079672e-06, "loss": 0.80705376, "memory(GiB)": 34.88, "step": 38360, "train_speed(iter/s)": 0.415048 }, { "acc": 0.87711973, "epoch": 1.0387729131129342, "grad_norm": 3.904482841491699, "learning_rate": 8.845387511401984e-06, "loss": 0.64411764, "memory(GiB)": 34.88, "step": 38365, "train_speed(iter/s)": 0.415051 }, { "acc": 0.90547009, "epoch": 1.0389082933961498, "grad_norm": 5.5944671630859375, "learning_rate": 8.845029823559528e-06, "loss": 0.43961058, "memory(GiB)": 34.88, "step": 38370, "train_speed(iter/s)": 0.415053 }, { "acc": 0.87329197, "epoch": 1.0390436736793653, "grad_norm": 9.36296558380127, "learning_rate": 8.84467208755678e-06, "loss": 0.79221668, "memory(GiB)": 34.88, "step": 38375, "train_speed(iter/s)": 0.415055 }, { "acc": 0.88824844, "epoch": 1.039179053962581, "grad_norm": 7.690299987792969, "learning_rate": 8.844314303398222e-06, "loss": 0.62131448, "memory(GiB)": 34.88, "step": 38380, "train_speed(iter/s)": 0.415057 }, { "acc": 0.85737495, "epoch": 1.0393144342457965, "grad_norm": 8.926047325134277, "learning_rate": 8.843956471088338e-06, "loss": 0.86263428, "memory(GiB)": 34.88, "step": 38385, "train_speed(iter/s)": 0.415059 }, { "acc": 0.87151003, "epoch": 1.039449814529012, "grad_norm": 21.467187881469727, "learning_rate": 8.843598590631611e-06, "loss": 0.6514493, "memory(GiB)": 34.88, "step": 38390, "train_speed(iter/s)": 0.415062 }, { "acc": 0.89673557, "epoch": 1.0395851948122274, "grad_norm": 10.156475067138672, "learning_rate": 8.843240662032518e-06, "loss": 0.54202337, "memory(GiB)": 34.88, "step": 38395, "train_speed(iter/s)": 0.415065 }, { "acc": 0.87599087, "epoch": 1.039720575095443, "grad_norm": 18.252378463745117, "learning_rate": 8.84288268529555e-06, "loss": 0.68624787, "memory(GiB)": 34.88, "step": 38400, "train_speed(iter/s)": 0.415067 }, { "acc": 0.91802664, "epoch": 1.0398559553786586, "grad_norm": 13.032415390014648, "learning_rate": 8.84252466042519e-06, "loss": 0.41389971, "memory(GiB)": 34.88, "step": 38405, "train_speed(iter/s)": 0.41507 }, { "acc": 0.8726243, "epoch": 1.0399913356618742, "grad_norm": 13.116676330566406, "learning_rate": 8.84216658742592e-06, "loss": 0.7544014, "memory(GiB)": 34.88, "step": 38410, "train_speed(iter/s)": 0.415072 }, { "acc": 0.87617569, "epoch": 1.0401267159450898, "grad_norm": 5.921605110168457, "learning_rate": 8.841808466302225e-06, "loss": 0.65006542, "memory(GiB)": 34.88, "step": 38415, "train_speed(iter/s)": 0.415074 }, { "acc": 0.89005604, "epoch": 1.0402620962283053, "grad_norm": 7.262845993041992, "learning_rate": 8.841450297058596e-06, "loss": 0.58447094, "memory(GiB)": 34.88, "step": 38420, "train_speed(iter/s)": 0.415076 }, { "acc": 0.87294722, "epoch": 1.040397476511521, "grad_norm": 17.43552017211914, "learning_rate": 8.841092079699516e-06, "loss": 0.61460361, "memory(GiB)": 34.88, "step": 38425, "train_speed(iter/s)": 0.415079 }, { "acc": 0.87815838, "epoch": 1.0405328567947365, "grad_norm": 8.904397010803223, "learning_rate": 8.840733814229474e-06, "loss": 0.68618875, "memory(GiB)": 34.88, "step": 38430, "train_speed(iter/s)": 0.415081 }, { "acc": 0.88756008, "epoch": 1.0406682370779519, "grad_norm": 7.6637797355651855, "learning_rate": 8.840375500652956e-06, "loss": 0.60688481, "memory(GiB)": 34.88, "step": 38435, "train_speed(iter/s)": 0.415083 }, { "acc": 0.88970413, "epoch": 1.0408036173611674, "grad_norm": 5.37631368637085, "learning_rate": 8.840017138974454e-06, "loss": 0.56613092, "memory(GiB)": 34.88, "step": 38440, "train_speed(iter/s)": 0.415085 }, { "acc": 0.91186962, "epoch": 1.040938997644383, "grad_norm": 5.324510097503662, "learning_rate": 8.839658729198455e-06, "loss": 0.46349001, "memory(GiB)": 34.88, "step": 38445, "train_speed(iter/s)": 0.415088 }, { "acc": 0.89699364, "epoch": 1.0410743779275986, "grad_norm": 4.093430042266846, "learning_rate": 8.839300271329448e-06, "loss": 0.51037874, "memory(GiB)": 34.88, "step": 38450, "train_speed(iter/s)": 0.41509 }, { "acc": 0.89046268, "epoch": 1.0412097582108142, "grad_norm": 6.884222984313965, "learning_rate": 8.838941765371924e-06, "loss": 0.49998989, "memory(GiB)": 34.88, "step": 38455, "train_speed(iter/s)": 0.415093 }, { "acc": 0.87231407, "epoch": 1.0413451384940298, "grad_norm": 11.425057411193848, "learning_rate": 8.838583211330376e-06, "loss": 0.6589119, "memory(GiB)": 34.88, "step": 38460, "train_speed(iter/s)": 0.415095 }, { "acc": 0.88338852, "epoch": 1.0414805187772453, "grad_norm": 7.066217422485352, "learning_rate": 8.838224609209293e-06, "loss": 0.69925036, "memory(GiB)": 34.88, "step": 38465, "train_speed(iter/s)": 0.415097 }, { "acc": 0.90021553, "epoch": 1.041615899060461, "grad_norm": 8.480876922607422, "learning_rate": 8.83786595901317e-06, "loss": 0.42081466, "memory(GiB)": 34.88, "step": 38470, "train_speed(iter/s)": 0.415099 }, { "acc": 0.86143188, "epoch": 1.0417512793436763, "grad_norm": 9.363036155700684, "learning_rate": 8.837507260746495e-06, "loss": 0.82612925, "memory(GiB)": 34.88, "step": 38475, "train_speed(iter/s)": 0.415102 }, { "acc": 0.88644915, "epoch": 1.0418866596268919, "grad_norm": 7.456128120422363, "learning_rate": 8.837148514413765e-06, "loss": 0.62472343, "memory(GiB)": 34.88, "step": 38480, "train_speed(iter/s)": 0.415104 }, { "acc": 0.89166794, "epoch": 1.0420220399101074, "grad_norm": 5.551084041595459, "learning_rate": 8.836789720019474e-06, "loss": 0.49473867, "memory(GiB)": 34.88, "step": 38485, "train_speed(iter/s)": 0.415107 }, { "acc": 0.87573147, "epoch": 1.042157420193323, "grad_norm": 9.863886833190918, "learning_rate": 8.836430877568115e-06, "loss": 0.57633724, "memory(GiB)": 34.88, "step": 38490, "train_speed(iter/s)": 0.415109 }, { "acc": 0.8928792, "epoch": 1.0422928004765386, "grad_norm": 5.653395175933838, "learning_rate": 8.836071987064186e-06, "loss": 0.56397572, "memory(GiB)": 34.88, "step": 38495, "train_speed(iter/s)": 0.415112 }, { "acc": 0.86843863, "epoch": 1.0424281807597542, "grad_norm": 4.959477424621582, "learning_rate": 8.83571304851218e-06, "loss": 0.62668962, "memory(GiB)": 34.88, "step": 38500, "train_speed(iter/s)": 0.415114 }, { "acc": 0.87987766, "epoch": 1.0425635610429698, "grad_norm": 12.748259544372559, "learning_rate": 8.835354061916595e-06, "loss": 0.64744377, "memory(GiB)": 34.88, "step": 38505, "train_speed(iter/s)": 0.415116 }, { "acc": 0.91362133, "epoch": 1.0426989413261853, "grad_norm": 11.962552070617676, "learning_rate": 8.834995027281927e-06, "loss": 0.44062471, "memory(GiB)": 34.88, "step": 38510, "train_speed(iter/s)": 0.415119 }, { "acc": 0.87586517, "epoch": 1.0428343216094007, "grad_norm": 25.054243087768555, "learning_rate": 8.834635944612673e-06, "loss": 0.7026515, "memory(GiB)": 34.88, "step": 38515, "train_speed(iter/s)": 0.415121 }, { "acc": 0.89624968, "epoch": 1.0429697018926163, "grad_norm": 5.271387100219727, "learning_rate": 8.834276813913333e-06, "loss": 0.47399411, "memory(GiB)": 34.88, "step": 38520, "train_speed(iter/s)": 0.415124 }, { "acc": 0.8596076, "epoch": 1.0431050821758319, "grad_norm": 13.302734375, "learning_rate": 8.833917635188405e-06, "loss": 0.73656778, "memory(GiB)": 34.88, "step": 38525, "train_speed(iter/s)": 0.415126 }, { "acc": 0.89126816, "epoch": 1.0432404624590474, "grad_norm": 6.209989547729492, "learning_rate": 8.833558408442389e-06, "loss": 0.60017767, "memory(GiB)": 34.88, "step": 38530, "train_speed(iter/s)": 0.415128 }, { "acc": 0.87890663, "epoch": 1.043375842742263, "grad_norm": 14.441869735717773, "learning_rate": 8.833199133679784e-06, "loss": 0.74438791, "memory(GiB)": 34.88, "step": 38535, "train_speed(iter/s)": 0.415131 }, { "acc": 0.88766947, "epoch": 1.0435112230254786, "grad_norm": 6.659463405609131, "learning_rate": 8.832839810905091e-06, "loss": 0.66645451, "memory(GiB)": 34.88, "step": 38540, "train_speed(iter/s)": 0.415133 }, { "acc": 0.89785976, "epoch": 1.0436466033086942, "grad_norm": 8.513096809387207, "learning_rate": 8.832480440122811e-06, "loss": 0.49028497, "memory(GiB)": 34.88, "step": 38545, "train_speed(iter/s)": 0.415136 }, { "acc": 0.87373333, "epoch": 1.0437819835919098, "grad_norm": 13.255013465881348, "learning_rate": 8.832121021337447e-06, "loss": 0.57339559, "memory(GiB)": 34.88, "step": 38550, "train_speed(iter/s)": 0.415138 }, { "acc": 0.88777885, "epoch": 1.0439173638751251, "grad_norm": 13.755060195922852, "learning_rate": 8.831761554553501e-06, "loss": 0.50037169, "memory(GiB)": 34.88, "step": 38555, "train_speed(iter/s)": 0.41514 }, { "acc": 0.88390541, "epoch": 1.0440527441583407, "grad_norm": 10.954681396484375, "learning_rate": 8.831402039775475e-06, "loss": 0.65514932, "memory(GiB)": 34.88, "step": 38560, "train_speed(iter/s)": 0.415143 }, { "acc": 0.89974823, "epoch": 1.0441881244415563, "grad_norm": 6.964783668518066, "learning_rate": 8.831042477007874e-06, "loss": 0.44937892, "memory(GiB)": 34.88, "step": 38565, "train_speed(iter/s)": 0.415145 }, { "acc": 0.88077202, "epoch": 1.0443235047247719, "grad_norm": 11.493309020996094, "learning_rate": 8.830682866255199e-06, "loss": 0.64061766, "memory(GiB)": 34.88, "step": 38570, "train_speed(iter/s)": 0.415147 }, { "acc": 0.87591181, "epoch": 1.0444588850079874, "grad_norm": 12.334027290344238, "learning_rate": 8.83032320752196e-06, "loss": 0.71078119, "memory(GiB)": 34.88, "step": 38575, "train_speed(iter/s)": 0.415149 }, { "acc": 0.91135397, "epoch": 1.044594265291203, "grad_norm": 5.840157985687256, "learning_rate": 8.829963500812659e-06, "loss": 0.41160803, "memory(GiB)": 34.88, "step": 38580, "train_speed(iter/s)": 0.415151 }, { "acc": 0.88186617, "epoch": 1.0447296455744186, "grad_norm": 7.407968044281006, "learning_rate": 8.8296037461318e-06, "loss": 0.57406878, "memory(GiB)": 34.88, "step": 38585, "train_speed(iter/s)": 0.415154 }, { "acc": 0.87699833, "epoch": 1.0448650258576342, "grad_norm": 9.408209800720215, "learning_rate": 8.829243943483895e-06, "loss": 0.66566601, "memory(GiB)": 34.88, "step": 38590, "train_speed(iter/s)": 0.415156 }, { "acc": 0.88811846, "epoch": 1.0450004061408495, "grad_norm": 15.601776123046875, "learning_rate": 8.828884092873448e-06, "loss": 0.56308413, "memory(GiB)": 34.88, "step": 38595, "train_speed(iter/s)": 0.415159 }, { "acc": 0.87467737, "epoch": 1.0451357864240651, "grad_norm": 9.076518058776855, "learning_rate": 8.82852419430497e-06, "loss": 0.65828371, "memory(GiB)": 34.88, "step": 38600, "train_speed(iter/s)": 0.41516 }, { "acc": 0.91011848, "epoch": 1.0452711667072807, "grad_norm": 11.068793296813965, "learning_rate": 8.828164247782963e-06, "loss": 0.487709, "memory(GiB)": 34.88, "step": 38605, "train_speed(iter/s)": 0.415163 }, { "acc": 0.86359138, "epoch": 1.0454065469904963, "grad_norm": 6.748088359832764, "learning_rate": 8.827804253311942e-06, "loss": 0.69009314, "memory(GiB)": 34.88, "step": 38610, "train_speed(iter/s)": 0.415166 }, { "acc": 0.87341433, "epoch": 1.0455419272737119, "grad_norm": 5.235990047454834, "learning_rate": 8.827444210896415e-06, "loss": 0.55825348, "memory(GiB)": 34.88, "step": 38615, "train_speed(iter/s)": 0.415168 }, { "acc": 0.87866249, "epoch": 1.0456773075569274, "grad_norm": 6.470832824707031, "learning_rate": 8.827084120540891e-06, "loss": 0.69444981, "memory(GiB)": 34.88, "step": 38620, "train_speed(iter/s)": 0.41517 }, { "acc": 0.8853981, "epoch": 1.045812687840143, "grad_norm": 15.79156494140625, "learning_rate": 8.826723982249881e-06, "loss": 0.57141247, "memory(GiB)": 34.88, "step": 38625, "train_speed(iter/s)": 0.415172 }, { "acc": 0.90096674, "epoch": 1.0459480681233586, "grad_norm": 7.283397674560547, "learning_rate": 8.826363796027899e-06, "loss": 0.52216835, "memory(GiB)": 34.88, "step": 38630, "train_speed(iter/s)": 0.415174 }, { "acc": 0.85698509, "epoch": 1.046083448406574, "grad_norm": 9.301689147949219, "learning_rate": 8.826003561879453e-06, "loss": 0.79299273, "memory(GiB)": 34.88, "step": 38635, "train_speed(iter/s)": 0.415177 }, { "acc": 0.89132061, "epoch": 1.0462188286897895, "grad_norm": 18.627613067626953, "learning_rate": 8.82564327980906e-06, "loss": 0.58337383, "memory(GiB)": 34.88, "step": 38640, "train_speed(iter/s)": 0.415179 }, { "acc": 0.87805014, "epoch": 1.0463542089730051, "grad_norm": 17.805164337158203, "learning_rate": 8.82528294982123e-06, "loss": 0.56534934, "memory(GiB)": 34.88, "step": 38645, "train_speed(iter/s)": 0.415181 }, { "acc": 0.88188019, "epoch": 1.0464895892562207, "grad_norm": 13.241097450256348, "learning_rate": 8.824922571920477e-06, "loss": 0.61983204, "memory(GiB)": 34.88, "step": 38650, "train_speed(iter/s)": 0.415183 }, { "acc": 0.87607489, "epoch": 1.0466249695394363, "grad_norm": 5.7613844871521, "learning_rate": 8.824562146111318e-06, "loss": 0.65493603, "memory(GiB)": 34.88, "step": 38655, "train_speed(iter/s)": 0.415185 }, { "acc": 0.87829943, "epoch": 1.0467603498226519, "grad_norm": 12.290838241577148, "learning_rate": 8.824201672398264e-06, "loss": 0.63695707, "memory(GiB)": 34.88, "step": 38660, "train_speed(iter/s)": 0.415188 }, { "acc": 0.87516479, "epoch": 1.0468957301058675, "grad_norm": 7.910211086273193, "learning_rate": 8.823841150785836e-06, "loss": 0.75679502, "memory(GiB)": 34.88, "step": 38665, "train_speed(iter/s)": 0.41519 }, { "acc": 0.88426208, "epoch": 1.047031110389083, "grad_norm": 11.585653305053711, "learning_rate": 8.823480581278545e-06, "loss": 0.60592604, "memory(GiB)": 34.88, "step": 38670, "train_speed(iter/s)": 0.415192 }, { "acc": 0.89084377, "epoch": 1.0471664906722984, "grad_norm": 5.756180286407471, "learning_rate": 8.82311996388091e-06, "loss": 0.50279675, "memory(GiB)": 34.88, "step": 38675, "train_speed(iter/s)": 0.415195 }, { "acc": 0.8956274, "epoch": 1.047301870955514, "grad_norm": 8.821089744567871, "learning_rate": 8.822759298597451e-06, "loss": 0.56915154, "memory(GiB)": 34.88, "step": 38680, "train_speed(iter/s)": 0.415197 }, { "acc": 0.89895554, "epoch": 1.0474372512387295, "grad_norm": 3.320221185684204, "learning_rate": 8.822398585432681e-06, "loss": 0.42959623, "memory(GiB)": 34.88, "step": 38685, "train_speed(iter/s)": 0.415199 }, { "acc": 0.89093628, "epoch": 1.0475726315219451, "grad_norm": 10.970064163208008, "learning_rate": 8.822037824391124e-06, "loss": 0.55728116, "memory(GiB)": 34.88, "step": 38690, "train_speed(iter/s)": 0.415202 }, { "acc": 0.90870762, "epoch": 1.0477080118051607, "grad_norm": 13.542941093444824, "learning_rate": 8.821677015477292e-06, "loss": 0.44235125, "memory(GiB)": 34.88, "step": 38695, "train_speed(iter/s)": 0.415204 }, { "acc": 0.86257515, "epoch": 1.0478433920883763, "grad_norm": 10.5419340133667, "learning_rate": 8.821316158695712e-06, "loss": 0.73611026, "memory(GiB)": 34.88, "step": 38700, "train_speed(iter/s)": 0.415206 }, { "acc": 0.8890378, "epoch": 1.0479787723715919, "grad_norm": 8.643146514892578, "learning_rate": 8.820955254050901e-06, "loss": 0.56943035, "memory(GiB)": 34.88, "step": 38705, "train_speed(iter/s)": 0.415209 }, { "acc": 0.90286827, "epoch": 1.0481141526548075, "grad_norm": 6.464821815490723, "learning_rate": 8.820594301547383e-06, "loss": 0.53276749, "memory(GiB)": 34.88, "step": 38710, "train_speed(iter/s)": 0.415211 }, { "acc": 0.92353764, "epoch": 1.0482495329380228, "grad_norm": 6.345776557922363, "learning_rate": 8.820233301189674e-06, "loss": 0.34858971, "memory(GiB)": 34.88, "step": 38715, "train_speed(iter/s)": 0.415213 }, { "acc": 0.89932289, "epoch": 1.0483849132212384, "grad_norm": 11.30691146850586, "learning_rate": 8.819872252982302e-06, "loss": 0.54195271, "memory(GiB)": 34.88, "step": 38720, "train_speed(iter/s)": 0.415216 }, { "acc": 0.88732262, "epoch": 1.048520293504454, "grad_norm": 7.828784942626953, "learning_rate": 8.819511156929787e-06, "loss": 0.6706727, "memory(GiB)": 34.88, "step": 38725, "train_speed(iter/s)": 0.415218 }, { "acc": 0.85809841, "epoch": 1.0486556737876696, "grad_norm": 32.478111267089844, "learning_rate": 8.819150013036652e-06, "loss": 0.73683085, "memory(GiB)": 34.88, "step": 38730, "train_speed(iter/s)": 0.41522 }, { "acc": 0.85349741, "epoch": 1.0487910540708851, "grad_norm": 12.527682304382324, "learning_rate": 8.818788821307423e-06, "loss": 0.88261509, "memory(GiB)": 34.88, "step": 38735, "train_speed(iter/s)": 0.415223 }, { "acc": 0.87289267, "epoch": 1.0489264343541007, "grad_norm": 9.465681076049805, "learning_rate": 8.818427581746623e-06, "loss": 0.71873217, "memory(GiB)": 34.88, "step": 38740, "train_speed(iter/s)": 0.415225 }, { "acc": 0.88406429, "epoch": 1.0490618146373163, "grad_norm": 11.774989128112793, "learning_rate": 8.818066294358779e-06, "loss": 0.55840473, "memory(GiB)": 34.88, "step": 38745, "train_speed(iter/s)": 0.415227 }, { "acc": 0.89366245, "epoch": 1.0491971949205319, "grad_norm": 8.916116714477539, "learning_rate": 8.817704959148415e-06, "loss": 0.59011192, "memory(GiB)": 34.88, "step": 38750, "train_speed(iter/s)": 0.415229 }, { "acc": 0.8786871, "epoch": 1.0493325752037472, "grad_norm": 6.020536422729492, "learning_rate": 8.817343576120056e-06, "loss": 0.57091804, "memory(GiB)": 34.88, "step": 38755, "train_speed(iter/s)": 0.415232 }, { "acc": 0.88438368, "epoch": 1.0494679554869628, "grad_norm": 5.222094535827637, "learning_rate": 8.816982145278234e-06, "loss": 0.56229916, "memory(GiB)": 34.88, "step": 38760, "train_speed(iter/s)": 0.415234 }, { "acc": 0.8737915, "epoch": 1.0496033357701784, "grad_norm": 11.478858947753906, "learning_rate": 8.816620666627473e-06, "loss": 0.59993, "memory(GiB)": 34.88, "step": 38765, "train_speed(iter/s)": 0.415237 }, { "acc": 0.86826458, "epoch": 1.049738716053394, "grad_norm": 10.529339790344238, "learning_rate": 8.816259140172301e-06, "loss": 0.75696783, "memory(GiB)": 34.88, "step": 38770, "train_speed(iter/s)": 0.415239 }, { "acc": 0.88013897, "epoch": 1.0498740963366096, "grad_norm": 9.763943672180176, "learning_rate": 8.815897565917249e-06, "loss": 0.57628059, "memory(GiB)": 34.88, "step": 38775, "train_speed(iter/s)": 0.415241 }, { "acc": 0.88841095, "epoch": 1.0500094766198251, "grad_norm": 8.026742935180664, "learning_rate": 8.815535943866844e-06, "loss": 0.61579361, "memory(GiB)": 34.88, "step": 38780, "train_speed(iter/s)": 0.415243 }, { "acc": 0.90237598, "epoch": 1.0501448569030407, "grad_norm": 7.100026607513428, "learning_rate": 8.815174274025618e-06, "loss": 0.48560886, "memory(GiB)": 34.88, "step": 38785, "train_speed(iter/s)": 0.415246 }, { "acc": 0.88872662, "epoch": 1.0502802371862563, "grad_norm": 6.861867427825928, "learning_rate": 8.8148125563981e-06, "loss": 0.57927513, "memory(GiB)": 34.88, "step": 38790, "train_speed(iter/s)": 0.415248 }, { "acc": 0.89770927, "epoch": 1.0504156174694717, "grad_norm": 7.3838725090026855, "learning_rate": 8.814450790988826e-06, "loss": 0.49152546, "memory(GiB)": 34.88, "step": 38795, "train_speed(iter/s)": 0.41525 }, { "acc": 0.86368446, "epoch": 1.0505509977526872, "grad_norm": 13.809107780456543, "learning_rate": 8.814088977802318e-06, "loss": 0.79144764, "memory(GiB)": 34.88, "step": 38800, "train_speed(iter/s)": 0.415253 }, { "acc": 0.8762558, "epoch": 1.0506863780359028, "grad_norm": 5.134865760803223, "learning_rate": 8.813727116843118e-06, "loss": 0.62114496, "memory(GiB)": 34.88, "step": 38805, "train_speed(iter/s)": 0.415255 }, { "acc": 0.90497551, "epoch": 1.0508217583191184, "grad_norm": 4.698051452636719, "learning_rate": 8.813365208115757e-06, "loss": 0.47891331, "memory(GiB)": 34.88, "step": 38810, "train_speed(iter/s)": 0.415258 }, { "acc": 0.89014225, "epoch": 1.050957138602334, "grad_norm": 4.598780155181885, "learning_rate": 8.813003251624762e-06, "loss": 0.57158337, "memory(GiB)": 34.88, "step": 38815, "train_speed(iter/s)": 0.41526 }, { "acc": 0.90497627, "epoch": 1.0510925188855496, "grad_norm": 8.010964393615723, "learning_rate": 8.812641247374676e-06, "loss": 0.51625395, "memory(GiB)": 34.88, "step": 38820, "train_speed(iter/s)": 0.415262 }, { "acc": 0.88394642, "epoch": 1.0512278991687651, "grad_norm": 5.853139400482178, "learning_rate": 8.81227919537003e-06, "loss": 0.51288438, "memory(GiB)": 34.88, "step": 38825, "train_speed(iter/s)": 0.415265 }, { "acc": 0.88568268, "epoch": 1.0513632794519807, "grad_norm": 5.646280288696289, "learning_rate": 8.811917095615357e-06, "loss": 0.6656714, "memory(GiB)": 34.88, "step": 38830, "train_speed(iter/s)": 0.415267 }, { "acc": 0.86917257, "epoch": 1.051498659735196, "grad_norm": 10.099136352539062, "learning_rate": 8.811554948115197e-06, "loss": 0.63082309, "memory(GiB)": 34.88, "step": 38835, "train_speed(iter/s)": 0.41527 }, { "acc": 0.89216776, "epoch": 1.0516340400184117, "grad_norm": 6.706290245056152, "learning_rate": 8.811192752874084e-06, "loss": 0.53061924, "memory(GiB)": 34.88, "step": 38840, "train_speed(iter/s)": 0.415272 }, { "acc": 0.89408665, "epoch": 1.0517694203016272, "grad_norm": 9.922689437866211, "learning_rate": 8.810830509896559e-06, "loss": 0.57357597, "memory(GiB)": 34.88, "step": 38845, "train_speed(iter/s)": 0.415274 }, { "acc": 0.88239365, "epoch": 1.0519048005848428, "grad_norm": 5.158347129821777, "learning_rate": 8.810468219187157e-06, "loss": 0.58289371, "memory(GiB)": 34.88, "step": 38850, "train_speed(iter/s)": 0.415277 }, { "acc": 0.91348782, "epoch": 1.0520401808680584, "grad_norm": 5.474571704864502, "learning_rate": 8.810105880750414e-06, "loss": 0.45332065, "memory(GiB)": 34.88, "step": 38855, "train_speed(iter/s)": 0.415279 }, { "acc": 0.88388023, "epoch": 1.052175561151274, "grad_norm": 4.4036383628845215, "learning_rate": 8.80974349459087e-06, "loss": 0.60751824, "memory(GiB)": 34.88, "step": 38860, "train_speed(iter/s)": 0.415282 }, { "acc": 0.88632584, "epoch": 1.0523109414344896, "grad_norm": 8.747809410095215, "learning_rate": 8.809381060713069e-06, "loss": 0.60879784, "memory(GiB)": 34.88, "step": 38865, "train_speed(iter/s)": 0.415284 }, { "acc": 0.89805918, "epoch": 1.0524463217177051, "grad_norm": 7.831678867340088, "learning_rate": 8.809018579121548e-06, "loss": 0.52238908, "memory(GiB)": 34.88, "step": 38870, "train_speed(iter/s)": 0.415286 }, { "acc": 0.87497635, "epoch": 1.0525817020009205, "grad_norm": 26.504858016967773, "learning_rate": 8.808656049820845e-06, "loss": 0.64809175, "memory(GiB)": 34.88, "step": 38875, "train_speed(iter/s)": 0.415289 }, { "acc": 0.88676443, "epoch": 1.052717082284136, "grad_norm": 7.215841770172119, "learning_rate": 8.808293472815508e-06, "loss": 0.70525689, "memory(GiB)": 34.88, "step": 38880, "train_speed(iter/s)": 0.415291 }, { "acc": 0.89711895, "epoch": 1.0528524625673517, "grad_norm": 8.59821605682373, "learning_rate": 8.807930848110076e-06, "loss": 0.5257576, "memory(GiB)": 34.88, "step": 38885, "train_speed(iter/s)": 0.415294 }, { "acc": 0.87715206, "epoch": 1.0529878428505672, "grad_norm": 7.921141147613525, "learning_rate": 8.80756817570909e-06, "loss": 0.60240288, "memory(GiB)": 34.88, "step": 38890, "train_speed(iter/s)": 0.415296 }, { "acc": 0.88855762, "epoch": 1.0531232231337828, "grad_norm": 24.813962936401367, "learning_rate": 8.807205455617094e-06, "loss": 0.59665003, "memory(GiB)": 34.88, "step": 38895, "train_speed(iter/s)": 0.415299 }, { "acc": 0.88644638, "epoch": 1.0532586034169984, "grad_norm": 8.499070167541504, "learning_rate": 8.806842687838631e-06, "loss": 0.6214695, "memory(GiB)": 34.88, "step": 38900, "train_speed(iter/s)": 0.415301 }, { "acc": 0.87652454, "epoch": 1.053393983700214, "grad_norm": 6.708542823791504, "learning_rate": 8.806479872378247e-06, "loss": 0.67195716, "memory(GiB)": 34.88, "step": 38905, "train_speed(iter/s)": 0.415303 }, { "acc": 0.90195045, "epoch": 1.0535293639834296, "grad_norm": 5.512512683868408, "learning_rate": 8.806117009240488e-06, "loss": 0.52599821, "memory(GiB)": 34.88, "step": 38910, "train_speed(iter/s)": 0.415305 }, { "acc": 0.88411846, "epoch": 1.053664744266645, "grad_norm": 10.02165699005127, "learning_rate": 8.805754098429896e-06, "loss": 0.62163086, "memory(GiB)": 34.88, "step": 38915, "train_speed(iter/s)": 0.415306 }, { "acc": 0.89630938, "epoch": 1.0538001245498605, "grad_norm": 13.558528900146484, "learning_rate": 8.805391139951021e-06, "loss": 0.58638277, "memory(GiB)": 34.88, "step": 38920, "train_speed(iter/s)": 0.415309 }, { "acc": 0.87466373, "epoch": 1.053935504833076, "grad_norm": 5.920779228210449, "learning_rate": 8.805028133808405e-06, "loss": 0.60750675, "memory(GiB)": 34.88, "step": 38925, "train_speed(iter/s)": 0.415311 }, { "acc": 0.89946966, "epoch": 1.0540708851162917, "grad_norm": 19.50156593322754, "learning_rate": 8.8046650800066e-06, "loss": 0.49512939, "memory(GiB)": 34.88, "step": 38930, "train_speed(iter/s)": 0.415314 }, { "acc": 0.89804115, "epoch": 1.0542062653995072, "grad_norm": 10.619224548339844, "learning_rate": 8.804301978550151e-06, "loss": 0.55924196, "memory(GiB)": 34.88, "step": 38935, "train_speed(iter/s)": 0.415316 }, { "acc": 0.86738253, "epoch": 1.0543416456827228, "grad_norm": 7.440145969390869, "learning_rate": 8.80393882944361e-06, "loss": 0.73849497, "memory(GiB)": 34.88, "step": 38940, "train_speed(iter/s)": 0.415318 }, { "acc": 0.90650721, "epoch": 1.0544770259659384, "grad_norm": 8.171278953552246, "learning_rate": 8.80357563269152e-06, "loss": 0.54372854, "memory(GiB)": 34.88, "step": 38945, "train_speed(iter/s)": 0.415319 }, { "acc": 0.89416008, "epoch": 1.054612406249154, "grad_norm": 5.983828544616699, "learning_rate": 8.80321238829844e-06, "loss": 0.53950348, "memory(GiB)": 34.88, "step": 38950, "train_speed(iter/s)": 0.41532 }, { "acc": 0.89232674, "epoch": 1.0547477865323693, "grad_norm": 5.715209484100342, "learning_rate": 8.802849096268912e-06, "loss": 0.47002516, "memory(GiB)": 34.88, "step": 38955, "train_speed(iter/s)": 0.415323 }, { "acc": 0.88492517, "epoch": 1.054883166815585, "grad_norm": 7.4747138023376465, "learning_rate": 8.80248575660749e-06, "loss": 0.66013656, "memory(GiB)": 34.88, "step": 38960, "train_speed(iter/s)": 0.415325 }, { "acc": 0.88849564, "epoch": 1.0550185470988005, "grad_norm": 10.907853126525879, "learning_rate": 8.802122369318727e-06, "loss": 0.56431494, "memory(GiB)": 34.88, "step": 38965, "train_speed(iter/s)": 0.415326 }, { "acc": 0.88526707, "epoch": 1.055153927382016, "grad_norm": 5.583466529846191, "learning_rate": 8.80175893440717e-06, "loss": 0.6015729, "memory(GiB)": 34.88, "step": 38970, "train_speed(iter/s)": 0.415327 }, { "acc": 0.88963528, "epoch": 1.0552893076652317, "grad_norm": 9.26816463470459, "learning_rate": 8.801395451877378e-06, "loss": 0.56917429, "memory(GiB)": 34.88, "step": 38975, "train_speed(iter/s)": 0.415329 }, { "acc": 0.89511509, "epoch": 1.0554246879484472, "grad_norm": 6.22952127456665, "learning_rate": 8.801031921733903e-06, "loss": 0.53204956, "memory(GiB)": 34.88, "step": 38980, "train_speed(iter/s)": 0.41533 }, { "acc": 0.8852932, "epoch": 1.0555600682316628, "grad_norm": 6.7628068923950195, "learning_rate": 8.800668343981296e-06, "loss": 0.6234549, "memory(GiB)": 34.88, "step": 38985, "train_speed(iter/s)": 0.415333 }, { "acc": 0.87925587, "epoch": 1.0556954485148784, "grad_norm": 6.821954250335693, "learning_rate": 8.800304718624113e-06, "loss": 0.67402, "memory(GiB)": 34.88, "step": 38990, "train_speed(iter/s)": 0.415335 }, { "acc": 0.87105331, "epoch": 1.0558308287980938, "grad_norm": 12.618706703186035, "learning_rate": 8.79994104566691e-06, "loss": 0.70599718, "memory(GiB)": 34.88, "step": 38995, "train_speed(iter/s)": 0.415337 }, { "acc": 0.91180305, "epoch": 1.0559662090813093, "grad_norm": 7.743525981903076, "learning_rate": 8.799577325114242e-06, "loss": 0.43502274, "memory(GiB)": 34.88, "step": 39000, "train_speed(iter/s)": 0.415338 }, { "acc": 0.89880962, "epoch": 1.056101589364525, "grad_norm": 6.839019298553467, "learning_rate": 8.799213556970664e-06, "loss": 0.55917206, "memory(GiB)": 34.88, "step": 39005, "train_speed(iter/s)": 0.41534 }, { "acc": 0.88954411, "epoch": 1.0562369696477405, "grad_norm": 8.723579406738281, "learning_rate": 8.798849741240736e-06, "loss": 0.51405916, "memory(GiB)": 34.88, "step": 39010, "train_speed(iter/s)": 0.415342 }, { "acc": 0.89597645, "epoch": 1.056372349930956, "grad_norm": 7.449071407318115, "learning_rate": 8.798485877929012e-06, "loss": 0.5806088, "memory(GiB)": 34.88, "step": 39015, "train_speed(iter/s)": 0.415344 }, { "acc": 0.88572464, "epoch": 1.0565077302141717, "grad_norm": 10.729837417602539, "learning_rate": 8.798121967040052e-06, "loss": 0.67593431, "memory(GiB)": 34.88, "step": 39020, "train_speed(iter/s)": 0.415347 }, { "acc": 0.87916222, "epoch": 1.0566431104973872, "grad_norm": 20.479467391967773, "learning_rate": 8.797758008578418e-06, "loss": 0.59013081, "memory(GiB)": 34.88, "step": 39025, "train_speed(iter/s)": 0.415348 }, { "acc": 0.88397713, "epoch": 1.0567784907806028, "grad_norm": 11.016867637634277, "learning_rate": 8.797394002548663e-06, "loss": 0.57281919, "memory(GiB)": 34.88, "step": 39030, "train_speed(iter/s)": 0.415349 }, { "acc": 0.89003849, "epoch": 1.0569138710638182, "grad_norm": 11.238348007202148, "learning_rate": 8.79702994895535e-06, "loss": 0.55464501, "memory(GiB)": 34.88, "step": 39035, "train_speed(iter/s)": 0.415352 }, { "acc": 0.89206209, "epoch": 1.0570492513470338, "grad_norm": 6.3975019454956055, "learning_rate": 8.79666584780304e-06, "loss": 0.52112112, "memory(GiB)": 34.88, "step": 39040, "train_speed(iter/s)": 0.415354 }, { "acc": 0.89762669, "epoch": 1.0571846316302493, "grad_norm": 7.600982666015625, "learning_rate": 8.796301699096292e-06, "loss": 0.49075084, "memory(GiB)": 34.88, "step": 39045, "train_speed(iter/s)": 0.415356 }, { "acc": 0.87548542, "epoch": 1.057320011913465, "grad_norm": 8.062139511108398, "learning_rate": 8.795937502839669e-06, "loss": 0.58754034, "memory(GiB)": 34.88, "step": 39050, "train_speed(iter/s)": 0.415359 }, { "acc": 0.88430977, "epoch": 1.0574553921966805, "grad_norm": 12.89348316192627, "learning_rate": 8.795573259037733e-06, "loss": 0.56294794, "memory(GiB)": 34.88, "step": 39055, "train_speed(iter/s)": 0.41536 }, { "acc": 0.87669964, "epoch": 1.057590772479896, "grad_norm": 6.7232666015625, "learning_rate": 8.795208967695049e-06, "loss": 0.57985401, "memory(GiB)": 34.88, "step": 39060, "train_speed(iter/s)": 0.41536 }, { "acc": 0.88954353, "epoch": 1.0577261527631117, "grad_norm": 6.780157566070557, "learning_rate": 8.794844628816177e-06, "loss": 0.63818026, "memory(GiB)": 34.88, "step": 39065, "train_speed(iter/s)": 0.415362 }, { "acc": 0.86202221, "epoch": 1.0578615330463272, "grad_norm": 13.08198070526123, "learning_rate": 8.794480242405683e-06, "loss": 0.76014595, "memory(GiB)": 34.88, "step": 39070, "train_speed(iter/s)": 0.415365 }, { "acc": 0.8765378, "epoch": 1.0579969133295426, "grad_norm": 7.145833969116211, "learning_rate": 8.79411580846813e-06, "loss": 0.68504429, "memory(GiB)": 34.88, "step": 39075, "train_speed(iter/s)": 0.415367 }, { "acc": 0.9108717, "epoch": 1.0581322936127582, "grad_norm": 4.600131034851074, "learning_rate": 8.793751327008087e-06, "loss": 0.44396276, "memory(GiB)": 34.88, "step": 39080, "train_speed(iter/s)": 0.415369 }, { "acc": 0.88244019, "epoch": 1.0582676738959738, "grad_norm": 8.35002613067627, "learning_rate": 8.793386798030117e-06, "loss": 0.58550701, "memory(GiB)": 34.88, "step": 39085, "train_speed(iter/s)": 0.415372 }, { "acc": 0.87778206, "epoch": 1.0584030541791893, "grad_norm": 11.534174919128418, "learning_rate": 8.793022221538786e-06, "loss": 0.65907106, "memory(GiB)": 34.88, "step": 39090, "train_speed(iter/s)": 0.415374 }, { "acc": 0.89933424, "epoch": 1.058538434462405, "grad_norm": 6.503966808319092, "learning_rate": 8.792657597538663e-06, "loss": 0.57092543, "memory(GiB)": 34.88, "step": 39095, "train_speed(iter/s)": 0.415376 }, { "acc": 0.89729414, "epoch": 1.0586738147456205, "grad_norm": 8.33030891418457, "learning_rate": 8.792292926034313e-06, "loss": 0.51875582, "memory(GiB)": 34.88, "step": 39100, "train_speed(iter/s)": 0.415377 }, { "acc": 0.88878517, "epoch": 1.058809195028836, "grad_norm": 11.433676719665527, "learning_rate": 8.791928207030307e-06, "loss": 0.68064814, "memory(GiB)": 34.88, "step": 39105, "train_speed(iter/s)": 0.41538 }, { "acc": 0.89024439, "epoch": 1.0589445753120517, "grad_norm": 7.772906303405762, "learning_rate": 8.791563440531212e-06, "loss": 0.50972939, "memory(GiB)": 34.88, "step": 39110, "train_speed(iter/s)": 0.415382 }, { "acc": 0.87364559, "epoch": 1.059079955595267, "grad_norm": 10.979668617248535, "learning_rate": 8.791198626541597e-06, "loss": 0.69786358, "memory(GiB)": 34.88, "step": 39115, "train_speed(iter/s)": 0.415384 }, { "acc": 0.87581158, "epoch": 1.0592153358784826, "grad_norm": 7.566807270050049, "learning_rate": 8.790833765066034e-06, "loss": 0.58897705, "memory(GiB)": 34.88, "step": 39120, "train_speed(iter/s)": 0.415386 }, { "acc": 0.85478401, "epoch": 1.0593507161616982, "grad_norm": 22.6336612701416, "learning_rate": 8.790468856109094e-06, "loss": 0.76310134, "memory(GiB)": 34.88, "step": 39125, "train_speed(iter/s)": 0.415388 }, { "acc": 0.90319033, "epoch": 1.0594860964449138, "grad_norm": 5.230920314788818, "learning_rate": 8.790103899675346e-06, "loss": 0.44808168, "memory(GiB)": 34.88, "step": 39130, "train_speed(iter/s)": 0.415391 }, { "acc": 0.87738028, "epoch": 1.0596214767281293, "grad_norm": 27.683963775634766, "learning_rate": 8.78973889576936e-06, "loss": 0.71257939, "memory(GiB)": 34.88, "step": 39135, "train_speed(iter/s)": 0.415393 }, { "acc": 0.88525028, "epoch": 1.059756857011345, "grad_norm": 16.75619125366211, "learning_rate": 8.789373844395714e-06, "loss": 0.61283765, "memory(GiB)": 34.88, "step": 39140, "train_speed(iter/s)": 0.415395 }, { "acc": 0.89292088, "epoch": 1.0598922372945605, "grad_norm": 5.959201812744141, "learning_rate": 8.789008745558976e-06, "loss": 0.54457407, "memory(GiB)": 34.88, "step": 39145, "train_speed(iter/s)": 0.415398 }, { "acc": 0.89983578, "epoch": 1.0600276175777759, "grad_norm": 3.195039987564087, "learning_rate": 8.788643599263723e-06, "loss": 0.4983655, "memory(GiB)": 34.88, "step": 39150, "train_speed(iter/s)": 0.4154 }, { "acc": 0.90100517, "epoch": 1.0601629978609914, "grad_norm": 7.940361976623535, "learning_rate": 8.788278405514526e-06, "loss": 0.50632615, "memory(GiB)": 34.88, "step": 39155, "train_speed(iter/s)": 0.415402 }, { "acc": 0.87684832, "epoch": 1.060298378144207, "grad_norm": 12.458794593811035, "learning_rate": 8.787913164315964e-06, "loss": 0.70068297, "memory(GiB)": 34.88, "step": 39160, "train_speed(iter/s)": 0.415405 }, { "acc": 0.88375053, "epoch": 1.0604337584274226, "grad_norm": 10.237693786621094, "learning_rate": 8.787547875672609e-06, "loss": 0.59514451, "memory(GiB)": 34.88, "step": 39165, "train_speed(iter/s)": 0.415407 }, { "acc": 0.8915575, "epoch": 1.0605691387106382, "grad_norm": 8.733705520629883, "learning_rate": 8.787182539589038e-06, "loss": 0.57488413, "memory(GiB)": 34.88, "step": 39170, "train_speed(iter/s)": 0.415409 }, { "acc": 0.90200653, "epoch": 1.0607045189938538, "grad_norm": 6.462183952331543, "learning_rate": 8.786817156069825e-06, "loss": 0.49539542, "memory(GiB)": 34.88, "step": 39175, "train_speed(iter/s)": 0.415412 }, { "acc": 0.90790424, "epoch": 1.0608398992770693, "grad_norm": 3.397928237915039, "learning_rate": 8.78645172511955e-06, "loss": 0.45207896, "memory(GiB)": 34.88, "step": 39180, "train_speed(iter/s)": 0.415414 }, { "acc": 0.89147968, "epoch": 1.060975279560285, "grad_norm": 7.091253280639648, "learning_rate": 8.786086246742792e-06, "loss": 0.58546658, "memory(GiB)": 34.88, "step": 39185, "train_speed(iter/s)": 0.415416 }, { "acc": 0.88969555, "epoch": 1.0611106598435005, "grad_norm": 9.819334030151367, "learning_rate": 8.785720720944127e-06, "loss": 0.61383972, "memory(GiB)": 34.88, "step": 39190, "train_speed(iter/s)": 0.415418 }, { "acc": 0.89719095, "epoch": 1.0612460401267159, "grad_norm": 8.835036277770996, "learning_rate": 8.785355147728136e-06, "loss": 0.62857246, "memory(GiB)": 34.88, "step": 39195, "train_speed(iter/s)": 0.415421 }, { "acc": 0.89879999, "epoch": 1.0613814204099314, "grad_norm": 8.109749794006348, "learning_rate": 8.784989527099394e-06, "loss": 0.53011017, "memory(GiB)": 34.88, "step": 39200, "train_speed(iter/s)": 0.415423 }, { "acc": 0.88003149, "epoch": 1.061516800693147, "grad_norm": 6.894897937774658, "learning_rate": 8.784623859062487e-06, "loss": 0.56204662, "memory(GiB)": 34.88, "step": 39205, "train_speed(iter/s)": 0.415425 }, { "acc": 0.88680763, "epoch": 1.0616521809763626, "grad_norm": 7.927652835845947, "learning_rate": 8.784258143621993e-06, "loss": 0.5635581, "memory(GiB)": 34.88, "step": 39210, "train_speed(iter/s)": 0.415428 }, { "acc": 0.8583025, "epoch": 1.0617875612595782, "grad_norm": 16.781810760498047, "learning_rate": 8.783892380782491e-06, "loss": 0.70090432, "memory(GiB)": 34.88, "step": 39215, "train_speed(iter/s)": 0.41543 }, { "acc": 0.88366671, "epoch": 1.0619229415427938, "grad_norm": 26.511592864990234, "learning_rate": 8.783526570548567e-06, "loss": 0.66562057, "memory(GiB)": 34.88, "step": 39220, "train_speed(iter/s)": 0.415433 }, { "acc": 0.88388796, "epoch": 1.0620583218260093, "grad_norm": 14.68570613861084, "learning_rate": 8.7831607129248e-06, "loss": 0.57999234, "memory(GiB)": 34.88, "step": 39225, "train_speed(iter/s)": 0.415435 }, { "acc": 0.88088036, "epoch": 1.0621937021092247, "grad_norm": 6.836246490478516, "learning_rate": 8.782794807915775e-06, "loss": 0.61924028, "memory(GiB)": 34.88, "step": 39230, "train_speed(iter/s)": 0.415437 }, { "acc": 0.88527527, "epoch": 1.0623290823924403, "grad_norm": 8.953149795532227, "learning_rate": 8.782428855526075e-06, "loss": 0.7371336, "memory(GiB)": 34.88, "step": 39235, "train_speed(iter/s)": 0.415439 }, { "acc": 0.90550213, "epoch": 1.0624644626756559, "grad_norm": 12.59799861907959, "learning_rate": 8.782062855760286e-06, "loss": 0.47141552, "memory(GiB)": 34.88, "step": 39240, "train_speed(iter/s)": 0.415442 }, { "acc": 0.89483681, "epoch": 1.0625998429588714, "grad_norm": 10.444833755493164, "learning_rate": 8.78169680862299e-06, "loss": 0.53831787, "memory(GiB)": 34.88, "step": 39245, "train_speed(iter/s)": 0.415444 }, { "acc": 0.88523808, "epoch": 1.062735223242087, "grad_norm": 8.788199424743652, "learning_rate": 8.781330714118776e-06, "loss": 0.59342141, "memory(GiB)": 34.88, "step": 39250, "train_speed(iter/s)": 0.415447 }, { "acc": 0.89409237, "epoch": 1.0628706035253026, "grad_norm": 8.64614200592041, "learning_rate": 8.780964572252224e-06, "loss": 0.65252275, "memory(GiB)": 34.88, "step": 39255, "train_speed(iter/s)": 0.415449 }, { "acc": 0.89749746, "epoch": 1.0630059838085182, "grad_norm": 8.000065803527832, "learning_rate": 8.78059838302793e-06, "loss": 0.47281742, "memory(GiB)": 34.88, "step": 39260, "train_speed(iter/s)": 0.415451 }, { "acc": 0.87193985, "epoch": 1.0631413640917338, "grad_norm": 7.97413969039917, "learning_rate": 8.780232146450472e-06, "loss": 0.65518522, "memory(GiB)": 34.88, "step": 39265, "train_speed(iter/s)": 0.415454 }, { "acc": 0.90596247, "epoch": 1.0632767443749493, "grad_norm": 3.793832778930664, "learning_rate": 8.779865862524443e-06, "loss": 0.40678263, "memory(GiB)": 34.88, "step": 39270, "train_speed(iter/s)": 0.415456 }, { "acc": 0.88307219, "epoch": 1.0634121246581647, "grad_norm": 19.809839248657227, "learning_rate": 8.779499531254428e-06, "loss": 0.64327912, "memory(GiB)": 34.88, "step": 39275, "train_speed(iter/s)": 0.415458 }, { "acc": 0.88608046, "epoch": 1.0635475049413803, "grad_norm": 6.301867485046387, "learning_rate": 8.779133152645021e-06, "loss": 0.59592047, "memory(GiB)": 34.88, "step": 39280, "train_speed(iter/s)": 0.415461 }, { "acc": 0.91270475, "epoch": 1.0636828852245959, "grad_norm": 11.45946979522705, "learning_rate": 8.778766726700807e-06, "loss": 0.44834666, "memory(GiB)": 34.88, "step": 39285, "train_speed(iter/s)": 0.415463 }, { "acc": 0.89272051, "epoch": 1.0638182655078114, "grad_norm": 11.093997955322266, "learning_rate": 8.77840025342638e-06, "loss": 0.57558265, "memory(GiB)": 34.88, "step": 39290, "train_speed(iter/s)": 0.415465 }, { "acc": 0.88785, "epoch": 1.063953645791027, "grad_norm": 10.126855850219727, "learning_rate": 8.778033732826326e-06, "loss": 0.64645939, "memory(GiB)": 34.88, "step": 39295, "train_speed(iter/s)": 0.415467 }, { "acc": 0.8706068, "epoch": 1.0640890260742426, "grad_norm": 17.329051971435547, "learning_rate": 8.777667164905243e-06, "loss": 0.71720295, "memory(GiB)": 34.88, "step": 39300, "train_speed(iter/s)": 0.41547 }, { "acc": 0.89523525, "epoch": 1.0642244063574582, "grad_norm": 9.990286827087402, "learning_rate": 8.777300549667716e-06, "loss": 0.53025312, "memory(GiB)": 34.88, "step": 39305, "train_speed(iter/s)": 0.415472 }, { "acc": 0.88285637, "epoch": 1.0643597866406735, "grad_norm": 9.57045841217041, "learning_rate": 8.77693388711834e-06, "loss": 0.66937809, "memory(GiB)": 34.88, "step": 39310, "train_speed(iter/s)": 0.415475 }, { "acc": 0.8854208, "epoch": 1.0644951669238891, "grad_norm": 6.828786373138428, "learning_rate": 8.776567177261712e-06, "loss": 0.49357738, "memory(GiB)": 34.88, "step": 39315, "train_speed(iter/s)": 0.415477 }, { "acc": 0.89986649, "epoch": 1.0646305472071047, "grad_norm": 14.97418212890625, "learning_rate": 8.77620042010242e-06, "loss": 0.54018607, "memory(GiB)": 34.88, "step": 39320, "train_speed(iter/s)": 0.415479 }, { "acc": 0.87981033, "epoch": 1.0647659274903203, "grad_norm": 11.219864845275879, "learning_rate": 8.775833615645063e-06, "loss": 0.68239894, "memory(GiB)": 34.88, "step": 39325, "train_speed(iter/s)": 0.415481 }, { "acc": 0.87747536, "epoch": 1.0649013077735359, "grad_norm": 8.900113105773926, "learning_rate": 8.775466763894232e-06, "loss": 0.5645299, "memory(GiB)": 34.88, "step": 39330, "train_speed(iter/s)": 0.415483 }, { "acc": 0.88959494, "epoch": 1.0650366880567514, "grad_norm": 7.150771617889404, "learning_rate": 8.775099864854528e-06, "loss": 0.53334064, "memory(GiB)": 34.88, "step": 39335, "train_speed(iter/s)": 0.415486 }, { "acc": 0.86313763, "epoch": 1.065172068339967, "grad_norm": 9.262136459350586, "learning_rate": 8.77473291853054e-06, "loss": 0.71446409, "memory(GiB)": 34.88, "step": 39340, "train_speed(iter/s)": 0.415487 }, { "acc": 0.87433834, "epoch": 1.0653074486231826, "grad_norm": 5.051617622375488, "learning_rate": 8.774365924926869e-06, "loss": 0.68655868, "memory(GiB)": 34.88, "step": 39345, "train_speed(iter/s)": 0.41549 }, { "acc": 0.89243851, "epoch": 1.0654428289063982, "grad_norm": 5.450623989105225, "learning_rate": 8.773998884048113e-06, "loss": 0.49023781, "memory(GiB)": 34.88, "step": 39350, "train_speed(iter/s)": 0.415492 }, { "acc": 0.88372955, "epoch": 1.0655782091896135, "grad_norm": 11.583861351013184, "learning_rate": 8.773631795898868e-06, "loss": 0.53794298, "memory(GiB)": 34.88, "step": 39355, "train_speed(iter/s)": 0.415494 }, { "acc": 0.90431232, "epoch": 1.0657135894728291, "grad_norm": 11.146806716918945, "learning_rate": 8.773264660483732e-06, "loss": 0.43948736, "memory(GiB)": 34.88, "step": 39360, "train_speed(iter/s)": 0.415496 }, { "acc": 0.85574656, "epoch": 1.0658489697560447, "grad_norm": 54.64267349243164, "learning_rate": 8.772897477807305e-06, "loss": 0.7824904, "memory(GiB)": 34.88, "step": 39365, "train_speed(iter/s)": 0.415498 }, { "acc": 0.87728214, "epoch": 1.0659843500392603, "grad_norm": 22.355093002319336, "learning_rate": 8.772530247874188e-06, "loss": 0.68272715, "memory(GiB)": 34.88, "step": 39370, "train_speed(iter/s)": 0.4155 }, { "acc": 0.88465996, "epoch": 1.0661197303224759, "grad_norm": 19.027667999267578, "learning_rate": 8.77216297068898e-06, "loss": 0.65338802, "memory(GiB)": 34.88, "step": 39375, "train_speed(iter/s)": 0.415503 }, { "acc": 0.87047577, "epoch": 1.0662551106056914, "grad_norm": 9.527612686157227, "learning_rate": 8.771795646256282e-06, "loss": 0.67155714, "memory(GiB)": 34.88, "step": 39380, "train_speed(iter/s)": 0.415505 }, { "acc": 0.87476635, "epoch": 1.066390490888907, "grad_norm": 8.996623992919922, "learning_rate": 8.771428274580696e-06, "loss": 0.73342595, "memory(GiB)": 34.88, "step": 39385, "train_speed(iter/s)": 0.415508 }, { "acc": 0.86647186, "epoch": 1.0665258711721224, "grad_norm": 11.901894569396973, "learning_rate": 8.771060855666822e-06, "loss": 0.73750591, "memory(GiB)": 34.88, "step": 39390, "train_speed(iter/s)": 0.41551 }, { "acc": 0.91130505, "epoch": 1.066661251455338, "grad_norm": 6.7510986328125, "learning_rate": 8.770693389519266e-06, "loss": 0.43347149, "memory(GiB)": 34.88, "step": 39395, "train_speed(iter/s)": 0.415513 }, { "acc": 0.88852243, "epoch": 1.0667966317385535, "grad_norm": 17.085697174072266, "learning_rate": 8.770325876142628e-06, "loss": 0.61585579, "memory(GiB)": 34.88, "step": 39400, "train_speed(iter/s)": 0.415515 }, { "acc": 0.89035492, "epoch": 1.0669320120217691, "grad_norm": 4.227520942687988, "learning_rate": 8.769958315541514e-06, "loss": 0.51215258, "memory(GiB)": 34.88, "step": 39405, "train_speed(iter/s)": 0.415518 }, { "acc": 0.89748602, "epoch": 1.0670673923049847, "grad_norm": 12.194405555725098, "learning_rate": 8.769590707720526e-06, "loss": 0.5935379, "memory(GiB)": 34.88, "step": 39410, "train_speed(iter/s)": 0.41552 }, { "acc": 0.88256016, "epoch": 1.0672027725882003, "grad_norm": 9.777103424072266, "learning_rate": 8.769223052684272e-06, "loss": 0.62465582, "memory(GiB)": 34.88, "step": 39415, "train_speed(iter/s)": 0.415522 }, { "acc": 0.88373852, "epoch": 1.0673381528714159, "grad_norm": 8.834369659423828, "learning_rate": 8.768855350437356e-06, "loss": 0.60362167, "memory(GiB)": 34.88, "step": 39420, "train_speed(iter/s)": 0.415525 }, { "acc": 0.8984108, "epoch": 1.0674735331546314, "grad_norm": 4.3580732345581055, "learning_rate": 8.768487600984384e-06, "loss": 0.44945979, "memory(GiB)": 34.88, "step": 39425, "train_speed(iter/s)": 0.415527 }, { "acc": 0.87481537, "epoch": 1.067608913437847, "grad_norm": 5.83779764175415, "learning_rate": 8.768119804329964e-06, "loss": 0.72933645, "memory(GiB)": 34.88, "step": 39430, "train_speed(iter/s)": 0.415529 }, { "acc": 0.90301418, "epoch": 1.0677442937210624, "grad_norm": 5.103248119354248, "learning_rate": 8.767751960478703e-06, "loss": 0.5098382, "memory(GiB)": 34.88, "step": 39435, "train_speed(iter/s)": 0.415531 }, { "acc": 0.89122639, "epoch": 1.067879674004278, "grad_norm": 4.520986080169678, "learning_rate": 8.767384069435207e-06, "loss": 0.51195941, "memory(GiB)": 34.88, "step": 39440, "train_speed(iter/s)": 0.415533 }, { "acc": 0.89119434, "epoch": 1.0680150542874935, "grad_norm": 6.212924003601074, "learning_rate": 8.767016131204089e-06, "loss": 0.63116961, "memory(GiB)": 34.88, "step": 39445, "train_speed(iter/s)": 0.415536 }, { "acc": 0.88753195, "epoch": 1.0681504345707091, "grad_norm": 7.105027198791504, "learning_rate": 8.766648145789952e-06, "loss": 0.4773613, "memory(GiB)": 34.88, "step": 39450, "train_speed(iter/s)": 0.415538 }, { "acc": 0.88577795, "epoch": 1.0682858148539247, "grad_norm": 9.378750801086426, "learning_rate": 8.76628011319741e-06, "loss": 0.69177885, "memory(GiB)": 34.88, "step": 39455, "train_speed(iter/s)": 0.41554 }, { "acc": 0.88464441, "epoch": 1.0684211951371403, "grad_norm": 9.026055335998535, "learning_rate": 8.765912033431074e-06, "loss": 0.66628537, "memory(GiB)": 34.88, "step": 39460, "train_speed(iter/s)": 0.415542 }, { "acc": 0.89048796, "epoch": 1.0685565754203559, "grad_norm": 10.956482887268066, "learning_rate": 8.76554390649555e-06, "loss": 0.59438872, "memory(GiB)": 34.88, "step": 39465, "train_speed(iter/s)": 0.415545 }, { "acc": 0.88078632, "epoch": 1.0686919557035712, "grad_norm": 11.85310173034668, "learning_rate": 8.765175732395456e-06, "loss": 0.69034319, "memory(GiB)": 34.88, "step": 39470, "train_speed(iter/s)": 0.415547 }, { "acc": 0.90496559, "epoch": 1.0688273359867868, "grad_norm": 12.802542686462402, "learning_rate": 8.764807511135398e-06, "loss": 0.37834387, "memory(GiB)": 34.88, "step": 39475, "train_speed(iter/s)": 0.415549 }, { "acc": 0.87662411, "epoch": 1.0689627162700024, "grad_norm": 12.021344184875488, "learning_rate": 8.764439242719993e-06, "loss": 0.62513332, "memory(GiB)": 34.88, "step": 39480, "train_speed(iter/s)": 0.415551 }, { "acc": 0.87824459, "epoch": 1.069098096553218, "grad_norm": 9.686911582946777, "learning_rate": 8.764070927153854e-06, "loss": 0.68275371, "memory(GiB)": 34.88, "step": 39485, "train_speed(iter/s)": 0.415554 }, { "acc": 0.89349127, "epoch": 1.0692334768364335, "grad_norm": 8.497353553771973, "learning_rate": 8.763702564441593e-06, "loss": 0.66322861, "memory(GiB)": 34.88, "step": 39490, "train_speed(iter/s)": 0.415556 }, { "acc": 0.88025188, "epoch": 1.0693688571196491, "grad_norm": 23.292312622070312, "learning_rate": 8.763334154587825e-06, "loss": 0.68538485, "memory(GiB)": 34.88, "step": 39495, "train_speed(iter/s)": 0.415558 }, { "acc": 0.86852055, "epoch": 1.0695042374028647, "grad_norm": 10.393710136413574, "learning_rate": 8.762965697597165e-06, "loss": 0.7222724, "memory(GiB)": 34.88, "step": 39500, "train_speed(iter/s)": 0.41556 }, { "acc": 0.88730888, "epoch": 1.0696396176860803, "grad_norm": 7.377163410186768, "learning_rate": 8.76259719347423e-06, "loss": 0.57734776, "memory(GiB)": 34.88, "step": 39505, "train_speed(iter/s)": 0.415562 }, { "acc": 0.88192158, "epoch": 1.0697749979692956, "grad_norm": 13.814397811889648, "learning_rate": 8.762228642223632e-06, "loss": 0.60592899, "memory(GiB)": 34.88, "step": 39510, "train_speed(iter/s)": 0.415564 }, { "acc": 0.86869106, "epoch": 1.0699103782525112, "grad_norm": 13.349080085754395, "learning_rate": 8.761860043849995e-06, "loss": 0.609519, "memory(GiB)": 34.88, "step": 39515, "train_speed(iter/s)": 0.415567 }, { "acc": 0.88444691, "epoch": 1.0700457585357268, "grad_norm": 12.224133491516113, "learning_rate": 8.76149139835793e-06, "loss": 0.62278175, "memory(GiB)": 34.88, "step": 39520, "train_speed(iter/s)": 0.415569 }, { "acc": 0.87725334, "epoch": 1.0701811388189424, "grad_norm": 8.342558860778809, "learning_rate": 8.761122705752058e-06, "loss": 0.5577364, "memory(GiB)": 34.88, "step": 39525, "train_speed(iter/s)": 0.415571 }, { "acc": 0.88550568, "epoch": 1.070316519102158, "grad_norm": 17.429691314697266, "learning_rate": 8.760753966036996e-06, "loss": 0.61586514, "memory(GiB)": 34.88, "step": 39530, "train_speed(iter/s)": 0.415573 }, { "acc": 0.88769608, "epoch": 1.0704518993853736, "grad_norm": 12.264715194702148, "learning_rate": 8.760385179217366e-06, "loss": 0.68438168, "memory(GiB)": 34.88, "step": 39535, "train_speed(iter/s)": 0.415576 }, { "acc": 0.87795162, "epoch": 1.0705872796685891, "grad_norm": 9.928439140319824, "learning_rate": 8.760016345297783e-06, "loss": 0.68625622, "memory(GiB)": 34.88, "step": 39540, "train_speed(iter/s)": 0.415578 }, { "acc": 0.86516857, "epoch": 1.0707226599518047, "grad_norm": 13.77204418182373, "learning_rate": 8.759647464282874e-06, "loss": 0.78221292, "memory(GiB)": 34.88, "step": 39545, "train_speed(iter/s)": 0.415581 }, { "acc": 0.88111677, "epoch": 1.07085804023502, "grad_norm": 7.99819803237915, "learning_rate": 8.759278536177255e-06, "loss": 0.64193501, "memory(GiB)": 34.88, "step": 39550, "train_speed(iter/s)": 0.415583 }, { "acc": 0.89681129, "epoch": 1.0709934205182357, "grad_norm": 13.990997314453125, "learning_rate": 8.758909560985547e-06, "loss": 0.5708776, "memory(GiB)": 34.88, "step": 39555, "train_speed(iter/s)": 0.415585 }, { "acc": 0.87094069, "epoch": 1.0711288008014512, "grad_norm": 9.422945976257324, "learning_rate": 8.758540538712377e-06, "loss": 0.65492134, "memory(GiB)": 34.88, "step": 39560, "train_speed(iter/s)": 0.415587 }, { "acc": 0.89492216, "epoch": 1.0712641810846668, "grad_norm": 4.526244640350342, "learning_rate": 8.758171469362362e-06, "loss": 0.53096504, "memory(GiB)": 34.88, "step": 39565, "train_speed(iter/s)": 0.415589 }, { "acc": 0.88268194, "epoch": 1.0713995613678824, "grad_norm": 9.35882568359375, "learning_rate": 8.75780235294013e-06, "loss": 0.54037023, "memory(GiB)": 34.88, "step": 39570, "train_speed(iter/s)": 0.415592 }, { "acc": 0.8785243, "epoch": 1.071534941651098, "grad_norm": 10.734764099121094, "learning_rate": 8.757433189450303e-06, "loss": 0.63864999, "memory(GiB)": 34.88, "step": 39575, "train_speed(iter/s)": 0.415594 }, { "acc": 0.88330164, "epoch": 1.0716703219343136, "grad_norm": 44.07365417480469, "learning_rate": 8.757063978897505e-06, "loss": 0.6029778, "memory(GiB)": 34.88, "step": 39580, "train_speed(iter/s)": 0.415596 }, { "acc": 0.85316849, "epoch": 1.0718057022175291, "grad_norm": 9.42086124420166, "learning_rate": 8.75669472128636e-06, "loss": 0.71740723, "memory(GiB)": 34.88, "step": 39585, "train_speed(iter/s)": 0.415599 }, { "acc": 0.88636436, "epoch": 1.0719410825007445, "grad_norm": 7.031391620635986, "learning_rate": 8.756325416621498e-06, "loss": 0.54107881, "memory(GiB)": 34.88, "step": 39590, "train_speed(iter/s)": 0.415601 }, { "acc": 0.87369051, "epoch": 1.07207646278396, "grad_norm": 9.155632019042969, "learning_rate": 8.75595606490754e-06, "loss": 0.7394146, "memory(GiB)": 34.88, "step": 39595, "train_speed(iter/s)": 0.415603 }, { "acc": 0.87251091, "epoch": 1.0722118430671757, "grad_norm": 8.381733894348145, "learning_rate": 8.755586666149117e-06, "loss": 0.71914272, "memory(GiB)": 34.88, "step": 39600, "train_speed(iter/s)": 0.415606 }, { "acc": 0.8928093, "epoch": 1.0723472233503912, "grad_norm": 18.23735237121582, "learning_rate": 8.755217220350853e-06, "loss": 0.6136095, "memory(GiB)": 34.88, "step": 39605, "train_speed(iter/s)": 0.415608 }, { "acc": 0.88453884, "epoch": 1.0724826036336068, "grad_norm": 7.608911037445068, "learning_rate": 8.75484772751738e-06, "loss": 0.64131861, "memory(GiB)": 34.88, "step": 39610, "train_speed(iter/s)": 0.41561 }, { "acc": 0.87131567, "epoch": 1.0726179839168224, "grad_norm": 10.512371063232422, "learning_rate": 8.754478187653321e-06, "loss": 0.69136982, "memory(GiB)": 34.88, "step": 39615, "train_speed(iter/s)": 0.415612 }, { "acc": 0.89949074, "epoch": 1.072753364200038, "grad_norm": 4.332676887512207, "learning_rate": 8.754108600763312e-06, "loss": 0.45480413, "memory(GiB)": 34.88, "step": 39620, "train_speed(iter/s)": 0.415615 }, { "acc": 0.89883785, "epoch": 1.0728887444832536, "grad_norm": 4.585373878479004, "learning_rate": 8.75373896685198e-06, "loss": 0.57837386, "memory(GiB)": 34.88, "step": 39625, "train_speed(iter/s)": 0.415617 }, { "acc": 0.88506069, "epoch": 1.073024124766469, "grad_norm": 9.670575141906738, "learning_rate": 8.753369285923952e-06, "loss": 0.6376153, "memory(GiB)": 34.88, "step": 39630, "train_speed(iter/s)": 0.41562 }, { "acc": 0.88292332, "epoch": 1.0731595050496845, "grad_norm": 5.100419998168945, "learning_rate": 8.752999557983863e-06, "loss": 0.62773905, "memory(GiB)": 34.88, "step": 39635, "train_speed(iter/s)": 0.415622 }, { "acc": 0.8918623, "epoch": 1.0732948853329, "grad_norm": 22.3031005859375, "learning_rate": 8.752629783036344e-06, "loss": 0.55893812, "memory(GiB)": 34.88, "step": 39640, "train_speed(iter/s)": 0.415624 }, { "acc": 0.89146385, "epoch": 1.0734302656161157, "grad_norm": 9.72570514678955, "learning_rate": 8.752259961086025e-06, "loss": 0.57996798, "memory(GiB)": 34.88, "step": 39645, "train_speed(iter/s)": 0.415626 }, { "acc": 0.88362865, "epoch": 1.0735656458993312, "grad_norm": 9.847268104553223, "learning_rate": 8.751890092137541e-06, "loss": 0.52727804, "memory(GiB)": 34.88, "step": 39650, "train_speed(iter/s)": 0.415629 }, { "acc": 0.8806551, "epoch": 1.0737010261825468, "grad_norm": 5.733822822570801, "learning_rate": 8.751520176195525e-06, "loss": 0.56240726, "memory(GiB)": 34.88, "step": 39655, "train_speed(iter/s)": 0.415631 }, { "acc": 0.88691616, "epoch": 1.0738364064657624, "grad_norm": 4.292734622955322, "learning_rate": 8.75115021326461e-06, "loss": 0.55245275, "memory(GiB)": 34.88, "step": 39660, "train_speed(iter/s)": 0.415633 }, { "acc": 0.89217958, "epoch": 1.073971786748978, "grad_norm": 7.479504585266113, "learning_rate": 8.750780203349432e-06, "loss": 0.64269862, "memory(GiB)": 34.88, "step": 39665, "train_speed(iter/s)": 0.415635 }, { "acc": 0.87192888, "epoch": 1.0741071670321933, "grad_norm": 8.329706192016602, "learning_rate": 8.750410146454625e-06, "loss": 0.67931337, "memory(GiB)": 34.88, "step": 39670, "train_speed(iter/s)": 0.415637 }, { "acc": 0.90182257, "epoch": 1.074242547315409, "grad_norm": 4.753078460693359, "learning_rate": 8.750040042584824e-06, "loss": 0.46923523, "memory(GiB)": 34.88, "step": 39675, "train_speed(iter/s)": 0.41564 }, { "acc": 0.89461126, "epoch": 1.0743779275986245, "grad_norm": 8.237678527832031, "learning_rate": 8.749669891744666e-06, "loss": 0.50010023, "memory(GiB)": 34.88, "step": 39680, "train_speed(iter/s)": 0.415642 }, { "acc": 0.86923637, "epoch": 1.07451330788184, "grad_norm": 9.910221099853516, "learning_rate": 8.74929969393879e-06, "loss": 0.62745371, "memory(GiB)": 34.88, "step": 39685, "train_speed(iter/s)": 0.415644 }, { "acc": 0.90495672, "epoch": 1.0746486881650557, "grad_norm": 15.735604286193848, "learning_rate": 8.748929449171829e-06, "loss": 0.50643635, "memory(GiB)": 34.88, "step": 39690, "train_speed(iter/s)": 0.415646 }, { "acc": 0.89476299, "epoch": 1.0747840684482712, "grad_norm": 6.499788284301758, "learning_rate": 8.748559157448425e-06, "loss": 0.49155588, "memory(GiB)": 34.88, "step": 39695, "train_speed(iter/s)": 0.415649 }, { "acc": 0.88602867, "epoch": 1.0749194487314868, "grad_norm": 3.0350358486175537, "learning_rate": 8.748188818773215e-06, "loss": 0.51819167, "memory(GiB)": 34.88, "step": 39700, "train_speed(iter/s)": 0.415651 }, { "acc": 0.88604307, "epoch": 1.0750548290147024, "grad_norm": 13.801410675048828, "learning_rate": 8.747818433150839e-06, "loss": 0.59453506, "memory(GiB)": 34.88, "step": 39705, "train_speed(iter/s)": 0.415654 }, { "acc": 0.88737698, "epoch": 1.0751902092979178, "grad_norm": 7.8408403396606445, "learning_rate": 8.747448000585933e-06, "loss": 0.57978873, "memory(GiB)": 34.88, "step": 39710, "train_speed(iter/s)": 0.415656 }, { "acc": 0.88600531, "epoch": 1.0753255895811333, "grad_norm": 8.199591636657715, "learning_rate": 8.747077521083144e-06, "loss": 0.60699825, "memory(GiB)": 34.88, "step": 39715, "train_speed(iter/s)": 0.415658 }, { "acc": 0.88936253, "epoch": 1.075460969864349, "grad_norm": 6.0778326988220215, "learning_rate": 8.746706994647109e-06, "loss": 0.53568325, "memory(GiB)": 34.88, "step": 39720, "train_speed(iter/s)": 0.415661 }, { "acc": 0.88642788, "epoch": 1.0755963501475645, "grad_norm": 4.953455448150635, "learning_rate": 8.746336421282468e-06, "loss": 0.66705022, "memory(GiB)": 34.88, "step": 39725, "train_speed(iter/s)": 0.415663 }, { "acc": 0.87582846, "epoch": 1.07573173043078, "grad_norm": 11.56834888458252, "learning_rate": 8.745965800993869e-06, "loss": 0.69000616, "memory(GiB)": 34.88, "step": 39730, "train_speed(iter/s)": 0.415666 }, { "acc": 0.8852541, "epoch": 1.0758671107139957, "grad_norm": 9.27344036102295, "learning_rate": 8.745595133785948e-06, "loss": 0.66051655, "memory(GiB)": 34.88, "step": 39735, "train_speed(iter/s)": 0.415668 }, { "acc": 0.87210426, "epoch": 1.0760024909972112, "grad_norm": 8.338618278503418, "learning_rate": 8.745224419663352e-06, "loss": 0.71755524, "memory(GiB)": 34.88, "step": 39740, "train_speed(iter/s)": 0.41567 }, { "acc": 0.90023394, "epoch": 1.0761378712804268, "grad_norm": 6.846222400665283, "learning_rate": 8.744853658630724e-06, "loss": 0.5176188, "memory(GiB)": 34.88, "step": 39745, "train_speed(iter/s)": 0.415673 }, { "acc": 0.87829018, "epoch": 1.0762732515636422, "grad_norm": 18.060840606689453, "learning_rate": 8.74448285069271e-06, "loss": 0.57279458, "memory(GiB)": 34.88, "step": 39750, "train_speed(iter/s)": 0.415675 }, { "acc": 0.89119987, "epoch": 1.0764086318468578, "grad_norm": 7.927086353302002, "learning_rate": 8.744111995853953e-06, "loss": 0.62001548, "memory(GiB)": 34.88, "step": 39755, "train_speed(iter/s)": 0.415677 }, { "acc": 0.91023483, "epoch": 1.0765440121300733, "grad_norm": 6.944666385650635, "learning_rate": 8.743741094119099e-06, "loss": 0.45582838, "memory(GiB)": 34.88, "step": 39760, "train_speed(iter/s)": 0.41568 }, { "acc": 0.8845377, "epoch": 1.076679392413289, "grad_norm": 6.605086803436279, "learning_rate": 8.743370145492794e-06, "loss": 0.67128572, "memory(GiB)": 34.88, "step": 39765, "train_speed(iter/s)": 0.415682 }, { "acc": 0.88600559, "epoch": 1.0768147726965045, "grad_norm": 7.557388782501221, "learning_rate": 8.742999149979689e-06, "loss": 0.61531377, "memory(GiB)": 34.88, "step": 39770, "train_speed(iter/s)": 0.415684 }, { "acc": 0.89238863, "epoch": 1.07695015297972, "grad_norm": 6.882914066314697, "learning_rate": 8.742628107584426e-06, "loss": 0.50414801, "memory(GiB)": 34.88, "step": 39775, "train_speed(iter/s)": 0.415687 }, { "acc": 0.89041567, "epoch": 1.0770855332629357, "grad_norm": 7.652161598205566, "learning_rate": 8.742257018311655e-06, "loss": 0.48749504, "memory(GiB)": 34.88, "step": 39780, "train_speed(iter/s)": 0.415689 }, { "acc": 0.89801311, "epoch": 1.0772209135461512, "grad_norm": 6.998603343963623, "learning_rate": 8.741885882166025e-06, "loss": 0.55137234, "memory(GiB)": 34.88, "step": 39785, "train_speed(iter/s)": 0.415691 }, { "acc": 0.90237198, "epoch": 1.0773562938293666, "grad_norm": 6.568413257598877, "learning_rate": 8.741514699152185e-06, "loss": 0.49913616, "memory(GiB)": 34.88, "step": 39790, "train_speed(iter/s)": 0.415694 }, { "acc": 0.88102627, "epoch": 1.0774916741125822, "grad_norm": 9.276298522949219, "learning_rate": 8.741143469274785e-06, "loss": 0.61320686, "memory(GiB)": 34.88, "step": 39795, "train_speed(iter/s)": 0.415696 }, { "acc": 0.92314816, "epoch": 1.0776270543957978, "grad_norm": 8.23410415649414, "learning_rate": 8.740772192538474e-06, "loss": 0.37118855, "memory(GiB)": 34.88, "step": 39800, "train_speed(iter/s)": 0.415699 }, { "acc": 0.85950165, "epoch": 1.0777624346790133, "grad_norm": 12.702621459960938, "learning_rate": 8.740400868947907e-06, "loss": 0.74120841, "memory(GiB)": 34.88, "step": 39805, "train_speed(iter/s)": 0.415701 }, { "acc": 0.87844467, "epoch": 1.077897814962229, "grad_norm": 8.74164867401123, "learning_rate": 8.740029498507731e-06, "loss": 0.54967928, "memory(GiB)": 34.88, "step": 39810, "train_speed(iter/s)": 0.415703 }, { "acc": 0.85680666, "epoch": 1.0780331952454445, "grad_norm": 10.987045288085938, "learning_rate": 8.739658081222601e-06, "loss": 0.78560109, "memory(GiB)": 34.88, "step": 39815, "train_speed(iter/s)": 0.415706 }, { "acc": 0.90176926, "epoch": 1.07816857552866, "grad_norm": 4.333967208862305, "learning_rate": 8.739286617097168e-06, "loss": 0.47618303, "memory(GiB)": 34.88, "step": 39820, "train_speed(iter/s)": 0.415707 }, { "acc": 0.88817892, "epoch": 1.0783039558118757, "grad_norm": 8.334131240844727, "learning_rate": 8.738915106136084e-06, "loss": 0.63113842, "memory(GiB)": 34.88, "step": 39825, "train_speed(iter/s)": 0.41571 }, { "acc": 0.89454308, "epoch": 1.078439336095091, "grad_norm": 10.647345542907715, "learning_rate": 8.738543548344007e-06, "loss": 0.50510635, "memory(GiB)": 34.88, "step": 39830, "train_speed(iter/s)": 0.415712 }, { "acc": 0.89567213, "epoch": 1.0785747163783066, "grad_norm": 9.511686325073242, "learning_rate": 8.738171943725588e-06, "loss": 0.60165787, "memory(GiB)": 34.88, "step": 39835, "train_speed(iter/s)": 0.415714 }, { "acc": 0.89127598, "epoch": 1.0787100966615222, "grad_norm": 6.286984920501709, "learning_rate": 8.737800292285486e-06, "loss": 0.57812662, "memory(GiB)": 34.88, "step": 39840, "train_speed(iter/s)": 0.415716 }, { "acc": 0.8849432, "epoch": 1.0788454769447378, "grad_norm": 8.558130264282227, "learning_rate": 8.737428594028352e-06, "loss": 0.61743364, "memory(GiB)": 34.88, "step": 39845, "train_speed(iter/s)": 0.415718 }, { "acc": 0.86840649, "epoch": 1.0789808572279533, "grad_norm": 7.624571323394775, "learning_rate": 8.737056848958844e-06, "loss": 0.64725227, "memory(GiB)": 34.88, "step": 39850, "train_speed(iter/s)": 0.415721 }, { "acc": 0.85901871, "epoch": 1.079116237511169, "grad_norm": 7.770055294036865, "learning_rate": 8.73668505708162e-06, "loss": 0.84469194, "memory(GiB)": 34.88, "step": 39855, "train_speed(iter/s)": 0.415723 }, { "acc": 0.90331707, "epoch": 1.0792516177943845, "grad_norm": 6.348667144775391, "learning_rate": 8.736313218401335e-06, "loss": 0.45133133, "memory(GiB)": 34.88, "step": 39860, "train_speed(iter/s)": 0.415725 }, { "acc": 0.89589443, "epoch": 1.0793869980776, "grad_norm": 29.91124725341797, "learning_rate": 8.73594133292265e-06, "loss": 0.58256311, "memory(GiB)": 34.88, "step": 39865, "train_speed(iter/s)": 0.415728 }, { "acc": 0.8977808, "epoch": 1.0795223783608154, "grad_norm": 5.778337001800537, "learning_rate": 8.735569400650223e-06, "loss": 0.51646991, "memory(GiB)": 34.88, "step": 39870, "train_speed(iter/s)": 0.415729 }, { "acc": 0.89744549, "epoch": 1.079657758644031, "grad_norm": 6.101346969604492, "learning_rate": 8.735197421588712e-06, "loss": 0.51383457, "memory(GiB)": 34.88, "step": 39875, "train_speed(iter/s)": 0.415732 }, { "acc": 0.89504128, "epoch": 1.0797931389272466, "grad_norm": 14.101744651794434, "learning_rate": 8.734825395742775e-06, "loss": 0.513869, "memory(GiB)": 34.88, "step": 39880, "train_speed(iter/s)": 0.415734 }, { "acc": 0.87511587, "epoch": 1.0799285192104622, "grad_norm": 7.329251766204834, "learning_rate": 8.734453323117076e-06, "loss": 0.72324157, "memory(GiB)": 34.88, "step": 39885, "train_speed(iter/s)": 0.415736 }, { "acc": 0.90093565, "epoch": 1.0800638994936778, "grad_norm": 13.718268394470215, "learning_rate": 8.734081203716274e-06, "loss": 0.57737012, "memory(GiB)": 34.88, "step": 39890, "train_speed(iter/s)": 0.415739 }, { "acc": 0.90125666, "epoch": 1.0801992797768933, "grad_norm": 8.712386131286621, "learning_rate": 8.733709037545032e-06, "loss": 0.50590725, "memory(GiB)": 34.88, "step": 39895, "train_speed(iter/s)": 0.415741 }, { "acc": 0.9003376, "epoch": 1.080334660060109, "grad_norm": 3.737799882888794, "learning_rate": 8.733336824608011e-06, "loss": 0.5092474, "memory(GiB)": 34.88, "step": 39900, "train_speed(iter/s)": 0.415743 }, { "acc": 0.88955164, "epoch": 1.0804700403433245, "grad_norm": 5.962831497192383, "learning_rate": 8.732964564909873e-06, "loss": 0.56227531, "memory(GiB)": 34.88, "step": 39905, "train_speed(iter/s)": 0.415746 }, { "acc": 0.89018745, "epoch": 1.0806054206265399, "grad_norm": 8.240604400634766, "learning_rate": 8.73259225845528e-06, "loss": 0.63462605, "memory(GiB)": 34.88, "step": 39910, "train_speed(iter/s)": 0.415748 }, { "acc": 0.8706399, "epoch": 1.0807408009097554, "grad_norm": 14.872519493103027, "learning_rate": 8.732219905248901e-06, "loss": 0.68615632, "memory(GiB)": 34.88, "step": 39915, "train_speed(iter/s)": 0.41575 }, { "acc": 0.8951601, "epoch": 1.080876181192971, "grad_norm": 6.63050651550293, "learning_rate": 8.731847505295397e-06, "loss": 0.61658125, "memory(GiB)": 34.88, "step": 39920, "train_speed(iter/s)": 0.415752 }, { "acc": 0.85469704, "epoch": 1.0810115614761866, "grad_norm": 43.15192794799805, "learning_rate": 8.731475058599433e-06, "loss": 0.81367083, "memory(GiB)": 34.88, "step": 39925, "train_speed(iter/s)": 0.415754 }, { "acc": 0.8980751, "epoch": 1.0811469417594022, "grad_norm": 16.735733032226562, "learning_rate": 8.731102565165674e-06, "loss": 0.55697775, "memory(GiB)": 34.88, "step": 39930, "train_speed(iter/s)": 0.415757 }, { "acc": 0.8965848, "epoch": 1.0812823220426178, "grad_norm": 9.481072425842285, "learning_rate": 8.730730024998788e-06, "loss": 0.57446003, "memory(GiB)": 34.88, "step": 39935, "train_speed(iter/s)": 0.415758 }, { "acc": 0.89647951, "epoch": 1.0814177023258333, "grad_norm": 9.054862022399902, "learning_rate": 8.730357438103442e-06, "loss": 0.54450188, "memory(GiB)": 34.88, "step": 39940, "train_speed(iter/s)": 0.41576 }, { "acc": 0.8835516, "epoch": 1.081553082609049, "grad_norm": 10.546102523803711, "learning_rate": 8.729984804484301e-06, "loss": 0.61356597, "memory(GiB)": 34.88, "step": 39945, "train_speed(iter/s)": 0.415763 }, { "acc": 0.88179893, "epoch": 1.0816884628922643, "grad_norm": 9.457489967346191, "learning_rate": 8.729612124146036e-06, "loss": 0.72930536, "memory(GiB)": 34.88, "step": 39950, "train_speed(iter/s)": 0.415765 }, { "acc": 0.87792072, "epoch": 1.0818238431754799, "grad_norm": 10.595529556274414, "learning_rate": 8.72923939709331e-06, "loss": 0.65457134, "memory(GiB)": 34.88, "step": 39955, "train_speed(iter/s)": 0.415768 }, { "acc": 0.89470654, "epoch": 1.0819592234586954, "grad_norm": 3.539088249206543, "learning_rate": 8.728866623330801e-06, "loss": 0.45122452, "memory(GiB)": 34.88, "step": 39960, "train_speed(iter/s)": 0.41577 }, { "acc": 0.88596764, "epoch": 1.082094603741911, "grad_norm": 7.501980781555176, "learning_rate": 8.728493802863173e-06, "loss": 0.53007498, "memory(GiB)": 34.88, "step": 39965, "train_speed(iter/s)": 0.415772 }, { "acc": 0.86696568, "epoch": 1.0822299840251266, "grad_norm": 5.149384021759033, "learning_rate": 8.728120935695094e-06, "loss": 0.68593531, "memory(GiB)": 34.88, "step": 39970, "train_speed(iter/s)": 0.415774 }, { "acc": 0.87796936, "epoch": 1.0823653643083422, "grad_norm": 9.158536911010742, "learning_rate": 8.72774802183124e-06, "loss": 0.62032185, "memory(GiB)": 34.88, "step": 39975, "train_speed(iter/s)": 0.415776 }, { "acc": 0.8631897, "epoch": 1.0825007445915578, "grad_norm": 6.044171333312988, "learning_rate": 8.727375061276282e-06, "loss": 0.60214596, "memory(GiB)": 34.88, "step": 39980, "train_speed(iter/s)": 0.415779 }, { "acc": 0.89463062, "epoch": 1.0826361248747731, "grad_norm": 10.859077453613281, "learning_rate": 8.727002054034888e-06, "loss": 0.54114666, "memory(GiB)": 34.88, "step": 39985, "train_speed(iter/s)": 0.415781 }, { "acc": 0.8801775, "epoch": 1.0827715051579887, "grad_norm": 26.47530174255371, "learning_rate": 8.726629000111734e-06, "loss": 0.70260415, "memory(GiB)": 34.88, "step": 39990, "train_speed(iter/s)": 0.415783 }, { "acc": 0.8877306, "epoch": 1.0829068854412043, "grad_norm": 6.525222301483154, "learning_rate": 8.726255899511494e-06, "loss": 0.57152023, "memory(GiB)": 34.88, "step": 39995, "train_speed(iter/s)": 0.415786 }, { "acc": 0.88361683, "epoch": 1.0830422657244199, "grad_norm": 5.503684997558594, "learning_rate": 8.72588275223884e-06, "loss": 0.62839279, "memory(GiB)": 34.88, "step": 40000, "train_speed(iter/s)": 0.415788 }, { "epoch": 1.0830422657244199, "eval_acc": 0.5942632665096724, "eval_loss": 1.091830849647522, "eval_runtime": 1297.9873, "eval_samples_per_second": 66.491, "eval_steps_per_second": 2.079, "step": 40000 }, { "acc": 0.87170515, "epoch": 1.0831776460076354, "grad_norm": 10.052457809448242, "learning_rate": 8.725509558298444e-06, "loss": 0.70296025, "memory(GiB)": 34.88, "step": 40005, "train_speed(iter/s)": 0.410144 }, { "acc": 0.88024673, "epoch": 1.083313026290851, "grad_norm": 11.953695297241211, "learning_rate": 8.725136317694985e-06, "loss": 0.58166752, "memory(GiB)": 34.88, "step": 40010, "train_speed(iter/s)": 0.410146 }, { "acc": 0.89030514, "epoch": 1.0834484065740666, "grad_norm": 6.591681003570557, "learning_rate": 8.724763030433137e-06, "loss": 0.5438056, "memory(GiB)": 34.88, "step": 40015, "train_speed(iter/s)": 0.410149 }, { "acc": 0.86487656, "epoch": 1.0835837868572822, "grad_norm": 10.56796932220459, "learning_rate": 8.724389696517578e-06, "loss": 0.74084048, "memory(GiB)": 34.88, "step": 40020, "train_speed(iter/s)": 0.410152 }, { "acc": 0.88685093, "epoch": 1.0837191671404978, "grad_norm": 7.559480667114258, "learning_rate": 8.724016315952983e-06, "loss": 0.64876719, "memory(GiB)": 34.88, "step": 40025, "train_speed(iter/s)": 0.410155 }, { "acc": 0.88926516, "epoch": 1.0838545474237131, "grad_norm": 10.026715278625488, "learning_rate": 8.723642888744026e-06, "loss": 0.62742682, "memory(GiB)": 34.88, "step": 40030, "train_speed(iter/s)": 0.410158 }, { "acc": 0.87847729, "epoch": 1.0839899277069287, "grad_norm": 5.6219282150268555, "learning_rate": 8.723269414895391e-06, "loss": 0.5736577, "memory(GiB)": 34.88, "step": 40035, "train_speed(iter/s)": 0.410161 }, { "acc": 0.89455242, "epoch": 1.0841253079901443, "grad_norm": 12.588384628295898, "learning_rate": 8.722895894411754e-06, "loss": 0.5672267, "memory(GiB)": 34.88, "step": 40040, "train_speed(iter/s)": 0.410164 }, { "acc": 0.90399323, "epoch": 1.0842606882733599, "grad_norm": 5.874068737030029, "learning_rate": 8.722522327297795e-06, "loss": 0.5117156, "memory(GiB)": 34.88, "step": 40045, "train_speed(iter/s)": 0.410166 }, { "acc": 0.88103714, "epoch": 1.0843960685565754, "grad_norm": 15.152658462524414, "learning_rate": 8.72214871355819e-06, "loss": 0.70439014, "memory(GiB)": 34.88, "step": 40050, "train_speed(iter/s)": 0.410169 }, { "acc": 0.88922319, "epoch": 1.084531448839791, "grad_norm": 12.09937858581543, "learning_rate": 8.721775053197625e-06, "loss": 0.61288114, "memory(GiB)": 34.88, "step": 40055, "train_speed(iter/s)": 0.410172 }, { "acc": 0.87873354, "epoch": 1.0846668291230066, "grad_norm": 12.1670503616333, "learning_rate": 8.721401346220775e-06, "loss": 0.65806937, "memory(GiB)": 34.88, "step": 40060, "train_speed(iter/s)": 0.410175 }, { "acc": 0.86968498, "epoch": 1.084802209406222, "grad_norm": 14.391724586486816, "learning_rate": 8.721027592632325e-06, "loss": 0.75543957, "memory(GiB)": 34.88, "step": 40065, "train_speed(iter/s)": 0.410178 }, { "acc": 0.89041824, "epoch": 1.0849375896894375, "grad_norm": 8.625935554504395, "learning_rate": 8.720653792436955e-06, "loss": 0.58713255, "memory(GiB)": 34.88, "step": 40070, "train_speed(iter/s)": 0.41018 }, { "acc": 0.85700397, "epoch": 1.0850729699726531, "grad_norm": 6.498554229736328, "learning_rate": 8.720279945639349e-06, "loss": 0.72133389, "memory(GiB)": 34.88, "step": 40075, "train_speed(iter/s)": 0.410184 }, { "acc": 0.87180405, "epoch": 1.0852083502558687, "grad_norm": 9.435857772827148, "learning_rate": 8.719906052244193e-06, "loss": 0.7612843, "memory(GiB)": 34.88, "step": 40080, "train_speed(iter/s)": 0.410187 }, { "acc": 0.89195175, "epoch": 1.0853437305390843, "grad_norm": 8.809589385986328, "learning_rate": 8.719532112256165e-06, "loss": 0.5658926, "memory(GiB)": 34.88, "step": 40085, "train_speed(iter/s)": 0.41019 }, { "acc": 0.8811903, "epoch": 1.0854791108222999, "grad_norm": 10.24527645111084, "learning_rate": 8.719158125679951e-06, "loss": 0.67953153, "memory(GiB)": 34.88, "step": 40090, "train_speed(iter/s)": 0.410192 }, { "acc": 0.91273184, "epoch": 1.0856144911055154, "grad_norm": 10.76793384552002, "learning_rate": 8.718784092520236e-06, "loss": 0.44788113, "memory(GiB)": 34.88, "step": 40095, "train_speed(iter/s)": 0.410195 }, { "acc": 0.8812521, "epoch": 1.085749871388731, "grad_norm": 9.96669864654541, "learning_rate": 8.718410012781709e-06, "loss": 0.59485726, "memory(GiB)": 34.88, "step": 40100, "train_speed(iter/s)": 0.410198 }, { "acc": 0.90158434, "epoch": 1.0858852516719466, "grad_norm": 4.432513236999512, "learning_rate": 8.718035886469052e-06, "loss": 0.50415831, "memory(GiB)": 34.88, "step": 40105, "train_speed(iter/s)": 0.4102 }, { "acc": 0.91001167, "epoch": 1.086020631955162, "grad_norm": 10.073612213134766, "learning_rate": 8.717661713586955e-06, "loss": 0.44849024, "memory(GiB)": 34.88, "step": 40110, "train_speed(iter/s)": 0.410203 }, { "acc": 0.87611637, "epoch": 1.0861560122383775, "grad_norm": 4.572251796722412, "learning_rate": 8.7172874941401e-06, "loss": 0.61729355, "memory(GiB)": 34.88, "step": 40115, "train_speed(iter/s)": 0.410206 }, { "acc": 0.87619553, "epoch": 1.0862913925215931, "grad_norm": 9.654542922973633, "learning_rate": 8.71691322813318e-06, "loss": 0.63430181, "memory(GiB)": 34.88, "step": 40120, "train_speed(iter/s)": 0.410209 }, { "acc": 0.90528879, "epoch": 1.0864267728048087, "grad_norm": 5.362098693847656, "learning_rate": 8.716538915570882e-06, "loss": 0.53535814, "memory(GiB)": 34.88, "step": 40125, "train_speed(iter/s)": 0.410212 }, { "acc": 0.91817608, "epoch": 1.0865621530880243, "grad_norm": 6.334131717681885, "learning_rate": 8.716164556457894e-06, "loss": 0.41513491, "memory(GiB)": 34.88, "step": 40130, "train_speed(iter/s)": 0.410215 }, { "acc": 0.87295895, "epoch": 1.0866975333712399, "grad_norm": 10.954686164855957, "learning_rate": 8.715790150798907e-06, "loss": 0.72054043, "memory(GiB)": 34.88, "step": 40135, "train_speed(iter/s)": 0.410217 }, { "acc": 0.89274731, "epoch": 1.0868329136544554, "grad_norm": 5.662597179412842, "learning_rate": 8.715415698598608e-06, "loss": 0.55056963, "memory(GiB)": 34.88, "step": 40140, "train_speed(iter/s)": 0.41022 }, { "acc": 0.89680729, "epoch": 1.0869682939376708, "grad_norm": 8.77053165435791, "learning_rate": 8.715041199861692e-06, "loss": 0.51172361, "memory(GiB)": 34.88, "step": 40145, "train_speed(iter/s)": 0.410223 }, { "acc": 0.88962078, "epoch": 1.0871036742208864, "grad_norm": 5.054896354675293, "learning_rate": 8.714666654592849e-06, "loss": 0.52034321, "memory(GiB)": 34.88, "step": 40150, "train_speed(iter/s)": 0.410226 }, { "acc": 0.89717674, "epoch": 1.087239054504102, "grad_norm": 11.000219345092773, "learning_rate": 8.71429206279677e-06, "loss": 0.56981974, "memory(GiB)": 34.88, "step": 40155, "train_speed(iter/s)": 0.410229 }, { "acc": 0.88467617, "epoch": 1.0873744347873175, "grad_norm": 7.497223854064941, "learning_rate": 8.713917424478147e-06, "loss": 0.62612624, "memory(GiB)": 34.88, "step": 40160, "train_speed(iter/s)": 0.410232 }, { "acc": 0.87462616, "epoch": 1.0875098150705331, "grad_norm": 7.209803104400635, "learning_rate": 8.713542739641674e-06, "loss": 0.66639547, "memory(GiB)": 34.88, "step": 40165, "train_speed(iter/s)": 0.410235 }, { "acc": 0.89090261, "epoch": 1.0876451953537487, "grad_norm": 11.8729248046875, "learning_rate": 8.713168008292045e-06, "loss": 0.54116673, "memory(GiB)": 34.88, "step": 40170, "train_speed(iter/s)": 0.410238 }, { "acc": 0.87509899, "epoch": 1.0877805756369643, "grad_norm": 15.4212064743042, "learning_rate": 8.712793230433953e-06, "loss": 0.61728477, "memory(GiB)": 34.88, "step": 40175, "train_speed(iter/s)": 0.410241 }, { "acc": 0.89054813, "epoch": 1.0879159559201799, "grad_norm": 10.816106796264648, "learning_rate": 8.712418406072093e-06, "loss": 0.61375599, "memory(GiB)": 34.88, "step": 40180, "train_speed(iter/s)": 0.410244 }, { "acc": 0.89975986, "epoch": 1.0880513362033954, "grad_norm": 5.960590839385986, "learning_rate": 8.712043535211164e-06, "loss": 0.45592623, "memory(GiB)": 34.88, "step": 40185, "train_speed(iter/s)": 0.410247 }, { "acc": 0.87772465, "epoch": 1.0881867164866108, "grad_norm": 13.162507057189941, "learning_rate": 8.711668617855858e-06, "loss": 0.72881575, "memory(GiB)": 34.88, "step": 40190, "train_speed(iter/s)": 0.410249 }, { "acc": 0.87860136, "epoch": 1.0883220967698264, "grad_norm": 6.940426349639893, "learning_rate": 8.71129365401087e-06, "loss": 0.61628976, "memory(GiB)": 34.88, "step": 40195, "train_speed(iter/s)": 0.410252 }, { "acc": 0.87426291, "epoch": 1.088457477053042, "grad_norm": 6.805625915527344, "learning_rate": 8.710918643680902e-06, "loss": 0.54633904, "memory(GiB)": 34.88, "step": 40200, "train_speed(iter/s)": 0.410255 }, { "acc": 0.88919554, "epoch": 1.0885928573362575, "grad_norm": 8.711479187011719, "learning_rate": 8.71054358687065e-06, "loss": 0.58451376, "memory(GiB)": 34.88, "step": 40205, "train_speed(iter/s)": 0.410258 }, { "acc": 0.89458113, "epoch": 1.0887282376194731, "grad_norm": 8.217120170593262, "learning_rate": 8.71016848358481e-06, "loss": 0.52001009, "memory(GiB)": 34.88, "step": 40210, "train_speed(iter/s)": 0.410261 }, { "acc": 0.85773659, "epoch": 1.0888636179026887, "grad_norm": 5.769830226898193, "learning_rate": 8.709793333828087e-06, "loss": 0.75371599, "memory(GiB)": 34.88, "step": 40215, "train_speed(iter/s)": 0.410263 }, { "acc": 0.8933939, "epoch": 1.0889989981859043, "grad_norm": 5.339576244354248, "learning_rate": 8.70941813760517e-06, "loss": 0.60335965, "memory(GiB)": 34.88, "step": 40220, "train_speed(iter/s)": 0.410266 }, { "acc": 0.87848587, "epoch": 1.0891343784691196, "grad_norm": 12.814857482910156, "learning_rate": 8.709042894920769e-06, "loss": 0.60174303, "memory(GiB)": 34.88, "step": 40225, "train_speed(iter/s)": 0.410269 }, { "acc": 0.88732052, "epoch": 1.0892697587523352, "grad_norm": 16.741859436035156, "learning_rate": 8.708667605779581e-06, "loss": 0.61312189, "memory(GiB)": 34.88, "step": 40230, "train_speed(iter/s)": 0.410272 }, { "acc": 0.88961926, "epoch": 1.0894051390355508, "grad_norm": 7.9934539794921875, "learning_rate": 8.708292270186308e-06, "loss": 0.56305456, "memory(GiB)": 34.88, "step": 40235, "train_speed(iter/s)": 0.410275 }, { "acc": 0.87053146, "epoch": 1.0895405193187664, "grad_norm": 9.220807075500488, "learning_rate": 8.707916888145649e-06, "loss": 0.72425098, "memory(GiB)": 34.88, "step": 40240, "train_speed(iter/s)": 0.410277 }, { "acc": 0.86596022, "epoch": 1.089675899601982, "grad_norm": 17.46892738342285, "learning_rate": 8.707541459662308e-06, "loss": 0.69990826, "memory(GiB)": 34.88, "step": 40245, "train_speed(iter/s)": 0.41028 }, { "acc": 0.88489113, "epoch": 1.0898112798851975, "grad_norm": 11.34963321685791, "learning_rate": 8.70716598474099e-06, "loss": 0.54174924, "memory(GiB)": 34.88, "step": 40250, "train_speed(iter/s)": 0.410283 }, { "acc": 0.89536047, "epoch": 1.0899466601684131, "grad_norm": 7.406686305999756, "learning_rate": 8.706790463386396e-06, "loss": 0.5239296, "memory(GiB)": 34.88, "step": 40255, "train_speed(iter/s)": 0.410286 }, { "acc": 0.86230564, "epoch": 1.0900820404516287, "grad_norm": 19.971097946166992, "learning_rate": 8.706414895603231e-06, "loss": 0.84083729, "memory(GiB)": 34.88, "step": 40260, "train_speed(iter/s)": 0.410289 }, { "acc": 0.90759926, "epoch": 1.0902174207348443, "grad_norm": 7.862791061401367, "learning_rate": 8.7060392813962e-06, "loss": 0.54820328, "memory(GiB)": 34.88, "step": 40265, "train_speed(iter/s)": 0.410292 }, { "acc": 0.88715534, "epoch": 1.0903528010180596, "grad_norm": 13.326574325561523, "learning_rate": 8.705663620770009e-06, "loss": 0.56725092, "memory(GiB)": 34.88, "step": 40270, "train_speed(iter/s)": 0.410295 }, { "acc": 0.89723091, "epoch": 1.0904881813012752, "grad_norm": 4.6614484786987305, "learning_rate": 8.705287913729364e-06, "loss": 0.49474902, "memory(GiB)": 34.88, "step": 40275, "train_speed(iter/s)": 0.410298 }, { "acc": 0.87765217, "epoch": 1.0906235615844908, "grad_norm": 8.355908393859863, "learning_rate": 8.704912160278967e-06, "loss": 0.72757378, "memory(GiB)": 34.88, "step": 40280, "train_speed(iter/s)": 0.410301 }, { "acc": 0.88384438, "epoch": 1.0907589418677064, "grad_norm": 28.430402755737305, "learning_rate": 8.704536360423532e-06, "loss": 0.56175261, "memory(GiB)": 34.88, "step": 40285, "train_speed(iter/s)": 0.410303 }, { "acc": 0.897052, "epoch": 1.090894322150922, "grad_norm": 9.99283504486084, "learning_rate": 8.704160514167762e-06, "loss": 0.56917086, "memory(GiB)": 34.88, "step": 40290, "train_speed(iter/s)": 0.410306 }, { "acc": 0.9032443, "epoch": 1.0910297024341375, "grad_norm": 5.665802001953125, "learning_rate": 8.703784621516365e-06, "loss": 0.46728449, "memory(GiB)": 34.88, "step": 40295, "train_speed(iter/s)": 0.410309 }, { "acc": 0.90805073, "epoch": 1.0911650827173531, "grad_norm": 8.717476844787598, "learning_rate": 8.703408682474053e-06, "loss": 0.4388876, "memory(GiB)": 34.88, "step": 40300, "train_speed(iter/s)": 0.410312 }, { "acc": 0.85597105, "epoch": 1.0913004630005685, "grad_norm": 21.94056510925293, "learning_rate": 8.703032697045532e-06, "loss": 0.77324886, "memory(GiB)": 34.88, "step": 40305, "train_speed(iter/s)": 0.410315 }, { "acc": 0.86492195, "epoch": 1.091435843283784, "grad_norm": 11.396286964416504, "learning_rate": 8.702656665235514e-06, "loss": 0.77899566, "memory(GiB)": 34.88, "step": 40310, "train_speed(iter/s)": 0.410317 }, { "acc": 0.89911194, "epoch": 1.0915712235669996, "grad_norm": 23.12609100341797, "learning_rate": 8.70228058704871e-06, "loss": 0.50686646, "memory(GiB)": 34.88, "step": 40315, "train_speed(iter/s)": 0.41032 }, { "acc": 0.89762793, "epoch": 1.0917066038502152, "grad_norm": 7.717397689819336, "learning_rate": 8.701904462489828e-06, "loss": 0.48073359, "memory(GiB)": 34.88, "step": 40320, "train_speed(iter/s)": 0.410323 }, { "acc": 0.9022666, "epoch": 1.0918419841334308, "grad_norm": 9.977461814880371, "learning_rate": 8.701528291563583e-06, "loss": 0.49126081, "memory(GiB)": 34.88, "step": 40325, "train_speed(iter/s)": 0.410325 }, { "acc": 0.88276825, "epoch": 1.0919773644166464, "grad_norm": 7.085948467254639, "learning_rate": 8.701152074274689e-06, "loss": 0.66874471, "memory(GiB)": 34.88, "step": 40330, "train_speed(iter/s)": 0.410328 }, { "acc": 0.88583288, "epoch": 1.092112744699862, "grad_norm": 11.070391654968262, "learning_rate": 8.700775810627852e-06, "loss": 0.57938213, "memory(GiB)": 34.88, "step": 40335, "train_speed(iter/s)": 0.410331 }, { "acc": 0.90434361, "epoch": 1.0922481249830776, "grad_norm": 5.337146282196045, "learning_rate": 8.700399500627791e-06, "loss": 0.4414782, "memory(GiB)": 34.88, "step": 40340, "train_speed(iter/s)": 0.410334 }, { "acc": 0.85476027, "epoch": 1.0923835052662931, "grad_norm": 20.41042137145996, "learning_rate": 8.700023144279219e-06, "loss": 0.80884132, "memory(GiB)": 34.88, "step": 40345, "train_speed(iter/s)": 0.410336 }, { "acc": 0.89622116, "epoch": 1.0925188855495085, "grad_norm": 6.879304885864258, "learning_rate": 8.699646741586849e-06, "loss": 0.45014076, "memory(GiB)": 34.88, "step": 40350, "train_speed(iter/s)": 0.410339 }, { "acc": 0.85880919, "epoch": 1.092654265832724, "grad_norm": 15.799638748168945, "learning_rate": 8.699270292555398e-06, "loss": 0.79984274, "memory(GiB)": 34.88, "step": 40355, "train_speed(iter/s)": 0.410342 }, { "acc": 0.86900015, "epoch": 1.0927896461159397, "grad_norm": 4.486942291259766, "learning_rate": 8.69889379718958e-06, "loss": 0.7076478, "memory(GiB)": 34.88, "step": 40360, "train_speed(iter/s)": 0.410345 }, { "acc": 0.89070024, "epoch": 1.0929250263991552, "grad_norm": 5.493224620819092, "learning_rate": 8.698517255494112e-06, "loss": 0.63241196, "memory(GiB)": 34.88, "step": 40365, "train_speed(iter/s)": 0.410348 }, { "acc": 0.87762289, "epoch": 1.0930604066823708, "grad_norm": 6.037600517272949, "learning_rate": 8.698140667473714e-06, "loss": 0.65165091, "memory(GiB)": 34.88, "step": 40370, "train_speed(iter/s)": 0.410351 }, { "acc": 0.88910217, "epoch": 1.0931957869655864, "grad_norm": 4.335313320159912, "learning_rate": 8.697764033133098e-06, "loss": 0.55828781, "memory(GiB)": 34.88, "step": 40375, "train_speed(iter/s)": 0.410354 }, { "acc": 0.88027792, "epoch": 1.093331167248802, "grad_norm": 5.101975440979004, "learning_rate": 8.697387352476988e-06, "loss": 0.6429369, "memory(GiB)": 34.88, "step": 40380, "train_speed(iter/s)": 0.410357 }, { "acc": 0.88776598, "epoch": 1.0934665475320173, "grad_norm": 9.098104476928711, "learning_rate": 8.697010625510097e-06, "loss": 0.60077782, "memory(GiB)": 34.88, "step": 40385, "train_speed(iter/s)": 0.410359 }, { "acc": 0.88438015, "epoch": 1.093601927815233, "grad_norm": 8.858513832092285, "learning_rate": 8.696633852237147e-06, "loss": 0.65799837, "memory(GiB)": 34.88, "step": 40390, "train_speed(iter/s)": 0.410362 }, { "acc": 0.90830832, "epoch": 1.0937373080984485, "grad_norm": 19.49602508544922, "learning_rate": 8.696257032662858e-06, "loss": 0.47665491, "memory(GiB)": 34.88, "step": 40395, "train_speed(iter/s)": 0.410365 }, { "acc": 0.87456598, "epoch": 1.093872688381664, "grad_norm": 16.9588680267334, "learning_rate": 8.69588016679195e-06, "loss": 0.66649389, "memory(GiB)": 34.88, "step": 40400, "train_speed(iter/s)": 0.410368 }, { "acc": 0.90197945, "epoch": 1.0940080686648797, "grad_norm": 5.955593585968018, "learning_rate": 8.695503254629142e-06, "loss": 0.46526241, "memory(GiB)": 34.88, "step": 40405, "train_speed(iter/s)": 0.410371 }, { "acc": 0.90267906, "epoch": 1.0941434489480952, "grad_norm": 6.754891872406006, "learning_rate": 8.695126296179162e-06, "loss": 0.53071799, "memory(GiB)": 34.88, "step": 40410, "train_speed(iter/s)": 0.410374 }, { "acc": 0.8918417, "epoch": 1.0942788292313108, "grad_norm": 32.57933807373047, "learning_rate": 8.694749291446724e-06, "loss": 0.51673126, "memory(GiB)": 34.88, "step": 40415, "train_speed(iter/s)": 0.410377 }, { "acc": 0.89994764, "epoch": 1.0944142095145264, "grad_norm": 8.845102310180664, "learning_rate": 8.694372240436558e-06, "loss": 0.55079064, "memory(GiB)": 34.88, "step": 40420, "train_speed(iter/s)": 0.410379 }, { "acc": 0.88837662, "epoch": 1.094549589797742, "grad_norm": 8.418821334838867, "learning_rate": 8.693995143153379e-06, "loss": 0.52736669, "memory(GiB)": 34.88, "step": 40425, "train_speed(iter/s)": 0.410382 }, { "acc": 0.86645412, "epoch": 1.0946849700809573, "grad_norm": 6.9695868492126465, "learning_rate": 8.69361799960192e-06, "loss": 0.71215916, "memory(GiB)": 34.88, "step": 40430, "train_speed(iter/s)": 0.410385 }, { "acc": 0.89608173, "epoch": 1.094820350364173, "grad_norm": 7.148428440093994, "learning_rate": 8.693240809786899e-06, "loss": 0.51540937, "memory(GiB)": 34.88, "step": 40435, "train_speed(iter/s)": 0.410388 }, { "acc": 0.89519625, "epoch": 1.0949557306473885, "grad_norm": 8.452441215515137, "learning_rate": 8.692863573713043e-06, "loss": 0.5412518, "memory(GiB)": 34.88, "step": 40440, "train_speed(iter/s)": 0.41039 }, { "acc": 0.88076077, "epoch": 1.095091110930604, "grad_norm": 10.847556114196777, "learning_rate": 8.692486291385077e-06, "loss": 0.69160886, "memory(GiB)": 34.88, "step": 40445, "train_speed(iter/s)": 0.410393 }, { "acc": 0.87615585, "epoch": 1.0952264912138197, "grad_norm": 9.742144584655762, "learning_rate": 8.69210896280773e-06, "loss": 0.68377652, "memory(GiB)": 34.88, "step": 40450, "train_speed(iter/s)": 0.410395 }, { "acc": 0.89836464, "epoch": 1.0953618714970352, "grad_norm": 5.235113143920898, "learning_rate": 8.691731587985725e-06, "loss": 0.46947789, "memory(GiB)": 34.88, "step": 40455, "train_speed(iter/s)": 0.410398 }, { "acc": 0.89548721, "epoch": 1.0954972517802508, "grad_norm": 5.763591289520264, "learning_rate": 8.691354166923792e-06, "loss": 0.59687295, "memory(GiB)": 34.88, "step": 40460, "train_speed(iter/s)": 0.410401 }, { "acc": 0.88314438, "epoch": 1.0956326320634662, "grad_norm": 14.181884765625, "learning_rate": 8.690976699626658e-06, "loss": 0.58793964, "memory(GiB)": 34.88, "step": 40465, "train_speed(iter/s)": 0.410404 }, { "acc": 0.89231262, "epoch": 1.0957680123466818, "grad_norm": 8.843961715698242, "learning_rate": 8.690599186099053e-06, "loss": 0.53948307, "memory(GiB)": 34.88, "step": 40470, "train_speed(iter/s)": 0.410407 }, { "acc": 0.87092495, "epoch": 1.0959033926298973, "grad_norm": 4.267063617706299, "learning_rate": 8.690221626345702e-06, "loss": 0.6105392, "memory(GiB)": 34.88, "step": 40475, "train_speed(iter/s)": 0.41041 }, { "acc": 0.90228767, "epoch": 1.096038772913113, "grad_norm": 3.336174249649048, "learning_rate": 8.689844020371337e-06, "loss": 0.43394136, "memory(GiB)": 34.88, "step": 40480, "train_speed(iter/s)": 0.410413 }, { "acc": 0.87752352, "epoch": 1.0961741531963285, "grad_norm": 13.411967277526855, "learning_rate": 8.68946636818069e-06, "loss": 0.63932171, "memory(GiB)": 34.88, "step": 40485, "train_speed(iter/s)": 0.410416 }, { "acc": 0.87308969, "epoch": 1.096309533479544, "grad_norm": 21.338804244995117, "learning_rate": 8.689088669778491e-06, "loss": 0.66093483, "memory(GiB)": 34.88, "step": 40490, "train_speed(iter/s)": 0.410418 }, { "acc": 0.90119658, "epoch": 1.0964449137627597, "grad_norm": 23.445842742919922, "learning_rate": 8.688710925169471e-06, "loss": 0.5430881, "memory(GiB)": 34.88, "step": 40495, "train_speed(iter/s)": 0.410421 }, { "acc": 0.90104446, "epoch": 1.0965802940459752, "grad_norm": 9.425333976745605, "learning_rate": 8.688333134358361e-06, "loss": 0.50270185, "memory(GiB)": 34.88, "step": 40500, "train_speed(iter/s)": 0.410424 }, { "acc": 0.8728301, "epoch": 1.0967156743291906, "grad_norm": 15.610610961914062, "learning_rate": 8.687955297349895e-06, "loss": 0.63319168, "memory(GiB)": 34.88, "step": 40505, "train_speed(iter/s)": 0.410427 }, { "acc": 0.89435797, "epoch": 1.0968510546124062, "grad_norm": 10.524940490722656, "learning_rate": 8.687577414148804e-06, "loss": 0.5000453, "memory(GiB)": 34.88, "step": 40510, "train_speed(iter/s)": 0.410429 }, { "acc": 0.87867908, "epoch": 1.0969864348956218, "grad_norm": 4.672108173370361, "learning_rate": 8.687199484759825e-06, "loss": 0.57613039, "memory(GiB)": 34.88, "step": 40515, "train_speed(iter/s)": 0.410432 }, { "acc": 0.87525673, "epoch": 1.0971218151788373, "grad_norm": 5.852295875549316, "learning_rate": 8.68682150918769e-06, "loss": 0.65461569, "memory(GiB)": 34.88, "step": 40520, "train_speed(iter/s)": 0.410435 }, { "acc": 0.89021244, "epoch": 1.097257195462053, "grad_norm": 25.00901222229004, "learning_rate": 8.686443487437135e-06, "loss": 0.57145505, "memory(GiB)": 34.88, "step": 40525, "train_speed(iter/s)": 0.410438 }, { "acc": 0.89428663, "epoch": 1.0973925757452685, "grad_norm": 9.609865188598633, "learning_rate": 8.686065419512895e-06, "loss": 0.52801371, "memory(GiB)": 34.88, "step": 40530, "train_speed(iter/s)": 0.410441 }, { "acc": 0.85656414, "epoch": 1.097527956028484, "grad_norm": 12.681300163269043, "learning_rate": 8.685687305419705e-06, "loss": 0.7208807, "memory(GiB)": 34.88, "step": 40535, "train_speed(iter/s)": 0.410443 }, { "acc": 0.87513981, "epoch": 1.0976633363116997, "grad_norm": 9.515331268310547, "learning_rate": 8.685309145162303e-06, "loss": 0.65008464, "memory(GiB)": 34.88, "step": 40540, "train_speed(iter/s)": 0.410446 }, { "acc": 0.89395752, "epoch": 1.097798716594915, "grad_norm": 15.709298133850098, "learning_rate": 8.684930938745427e-06, "loss": 0.50573683, "memory(GiB)": 34.88, "step": 40545, "train_speed(iter/s)": 0.410449 }, { "acc": 0.90548019, "epoch": 1.0979340968781306, "grad_norm": 5.896389484405518, "learning_rate": 8.68455268617381e-06, "loss": 0.5104795, "memory(GiB)": 34.88, "step": 40550, "train_speed(iter/s)": 0.410452 }, { "acc": 0.91071587, "epoch": 1.0980694771613462, "grad_norm": 7.094045639038086, "learning_rate": 8.684174387452197e-06, "loss": 0.41936598, "memory(GiB)": 34.88, "step": 40555, "train_speed(iter/s)": 0.410454 }, { "acc": 0.8938736, "epoch": 1.0982048574445618, "grad_norm": 18.244661331176758, "learning_rate": 8.683796042585324e-06, "loss": 0.55923443, "memory(GiB)": 34.88, "step": 40560, "train_speed(iter/s)": 0.410457 }, { "acc": 0.88402472, "epoch": 1.0983402377277773, "grad_norm": 9.104506492614746, "learning_rate": 8.683417651577932e-06, "loss": 0.62261767, "memory(GiB)": 34.88, "step": 40565, "train_speed(iter/s)": 0.41046 }, { "acc": 0.89384212, "epoch": 1.098475618010993, "grad_norm": 10.890484809875488, "learning_rate": 8.683039214434756e-06, "loss": 0.60204287, "memory(GiB)": 34.88, "step": 40570, "train_speed(iter/s)": 0.410463 }, { "acc": 0.87237301, "epoch": 1.0986109982942085, "grad_norm": 10.347485542297363, "learning_rate": 8.68266073116054e-06, "loss": 0.76661024, "memory(GiB)": 34.88, "step": 40575, "train_speed(iter/s)": 0.410466 }, { "acc": 0.91724997, "epoch": 1.098746378577424, "grad_norm": 5.724654197692871, "learning_rate": 8.682282201760027e-06, "loss": 0.36006505, "memory(GiB)": 34.88, "step": 40580, "train_speed(iter/s)": 0.410468 }, { "acc": 0.88582325, "epoch": 1.0988817588606394, "grad_norm": 15.821112632751465, "learning_rate": 8.681903626237957e-06, "loss": 0.5909112, "memory(GiB)": 34.88, "step": 40585, "train_speed(iter/s)": 0.410471 }, { "acc": 0.90792637, "epoch": 1.099017139143855, "grad_norm": 8.401007652282715, "learning_rate": 8.681525004599074e-06, "loss": 0.47520742, "memory(GiB)": 34.88, "step": 40590, "train_speed(iter/s)": 0.410474 }, { "acc": 0.86879578, "epoch": 1.0991525194270706, "grad_norm": 13.522610664367676, "learning_rate": 8.681146336848118e-06, "loss": 0.75123634, "memory(GiB)": 34.88, "step": 40595, "train_speed(iter/s)": 0.410476 }, { "acc": 0.89424133, "epoch": 1.0992878997102862, "grad_norm": 4.769608974456787, "learning_rate": 8.680767622989834e-06, "loss": 0.55597081, "memory(GiB)": 34.88, "step": 40600, "train_speed(iter/s)": 0.410479 }, { "acc": 0.88136978, "epoch": 1.0994232799935018, "grad_norm": 11.899898529052734, "learning_rate": 8.680388863028968e-06, "loss": 0.65795531, "memory(GiB)": 34.88, "step": 40605, "train_speed(iter/s)": 0.410482 }, { "acc": 0.89396801, "epoch": 1.0995586602767173, "grad_norm": 4.622782230377197, "learning_rate": 8.680010056970261e-06, "loss": 0.47842712, "memory(GiB)": 34.88, "step": 40610, "train_speed(iter/s)": 0.410484 }, { "acc": 0.89739952, "epoch": 1.099694040559933, "grad_norm": 11.196732521057129, "learning_rate": 8.67963120481846e-06, "loss": 0.54476585, "memory(GiB)": 34.88, "step": 40615, "train_speed(iter/s)": 0.410487 }, { "acc": 0.88871613, "epoch": 1.0998294208431485, "grad_norm": 6.3928446769714355, "learning_rate": 8.679252306578314e-06, "loss": 0.51026134, "memory(GiB)": 34.88, "step": 40620, "train_speed(iter/s)": 0.41049 }, { "acc": 0.88090782, "epoch": 1.0999648011263639, "grad_norm": 6.517024517059326, "learning_rate": 8.678873362254564e-06, "loss": 0.55419598, "memory(GiB)": 34.88, "step": 40625, "train_speed(iter/s)": 0.410493 }, { "acc": 0.87816219, "epoch": 1.1001001814095794, "grad_norm": 5.756251335144043, "learning_rate": 8.67849437185196e-06, "loss": 0.61466017, "memory(GiB)": 34.88, "step": 40630, "train_speed(iter/s)": 0.410496 }, { "acc": 0.86541834, "epoch": 1.100235561692795, "grad_norm": 12.09754753112793, "learning_rate": 8.678115335375252e-06, "loss": 0.76335716, "memory(GiB)": 34.88, "step": 40635, "train_speed(iter/s)": 0.410498 }, { "acc": 0.87501678, "epoch": 1.1003709419760106, "grad_norm": 8.126811981201172, "learning_rate": 8.677736252829182e-06, "loss": 0.71693306, "memory(GiB)": 34.88, "step": 40640, "train_speed(iter/s)": 0.410501 }, { "acc": 0.88981247, "epoch": 1.1005063222592262, "grad_norm": 3.7648282051086426, "learning_rate": 8.677357124218503e-06, "loss": 0.57109232, "memory(GiB)": 34.88, "step": 40645, "train_speed(iter/s)": 0.410504 }, { "acc": 0.88367214, "epoch": 1.1006417025424418, "grad_norm": 10.687302589416504, "learning_rate": 8.676977949547965e-06, "loss": 0.64929833, "memory(GiB)": 34.88, "step": 40650, "train_speed(iter/s)": 0.410507 }, { "acc": 0.88148069, "epoch": 1.1007770828256573, "grad_norm": 6.073428153991699, "learning_rate": 8.676598728822317e-06, "loss": 0.62567515, "memory(GiB)": 34.88, "step": 40655, "train_speed(iter/s)": 0.410509 }, { "acc": 0.89484463, "epoch": 1.100912463108873, "grad_norm": 10.007518768310547, "learning_rate": 8.676219462046305e-06, "loss": 0.51252861, "memory(GiB)": 34.88, "step": 40660, "train_speed(iter/s)": 0.410512 }, { "acc": 0.88505936, "epoch": 1.1010478433920883, "grad_norm": 6.325104236602783, "learning_rate": 8.675840149224686e-06, "loss": 0.66800976, "memory(GiB)": 34.88, "step": 40665, "train_speed(iter/s)": 0.410515 }, { "acc": 0.89566593, "epoch": 1.1011832236753039, "grad_norm": 13.471616744995117, "learning_rate": 8.67546079036221e-06, "loss": 0.49971504, "memory(GiB)": 34.88, "step": 40670, "train_speed(iter/s)": 0.410518 }, { "acc": 0.87877226, "epoch": 1.1013186039585194, "grad_norm": 10.272791862487793, "learning_rate": 8.67508138546363e-06, "loss": 0.70494943, "memory(GiB)": 34.88, "step": 40675, "train_speed(iter/s)": 0.41052 }, { "acc": 0.88443546, "epoch": 1.101453984241735, "grad_norm": 7.561551094055176, "learning_rate": 8.674701934533695e-06, "loss": 0.61635399, "memory(GiB)": 34.88, "step": 40680, "train_speed(iter/s)": 0.410523 }, { "acc": 0.87747841, "epoch": 1.1015893645249506, "grad_norm": 3.4392216205596924, "learning_rate": 8.674322437577162e-06, "loss": 0.58922892, "memory(GiB)": 34.88, "step": 40685, "train_speed(iter/s)": 0.410526 }, { "acc": 0.87752666, "epoch": 1.1017247448081662, "grad_norm": 4.788519859313965, "learning_rate": 8.673942894598785e-06, "loss": 0.69016857, "memory(GiB)": 34.88, "step": 40690, "train_speed(iter/s)": 0.410528 }, { "acc": 0.89648857, "epoch": 1.1018601250913818, "grad_norm": 7.374904632568359, "learning_rate": 8.673563305603317e-06, "loss": 0.48791776, "memory(GiB)": 34.88, "step": 40695, "train_speed(iter/s)": 0.410531 }, { "acc": 0.86270466, "epoch": 1.1019955053745973, "grad_norm": 10.355900764465332, "learning_rate": 8.673183670595514e-06, "loss": 0.82145557, "memory(GiB)": 34.88, "step": 40700, "train_speed(iter/s)": 0.410534 }, { "acc": 0.87939663, "epoch": 1.1021308856578127, "grad_norm": 2.6505589485168457, "learning_rate": 8.67280398958013e-06, "loss": 0.63157139, "memory(GiB)": 34.88, "step": 40705, "train_speed(iter/s)": 0.410537 }, { "acc": 0.88843622, "epoch": 1.1022662659410283, "grad_norm": 5.901360034942627, "learning_rate": 8.672424262561922e-06, "loss": 0.53049874, "memory(GiB)": 34.88, "step": 40710, "train_speed(iter/s)": 0.41054 }, { "acc": 0.8911974, "epoch": 1.1024016462242439, "grad_norm": 10.38512897491455, "learning_rate": 8.672044489545648e-06, "loss": 0.50792599, "memory(GiB)": 34.88, "step": 40715, "train_speed(iter/s)": 0.410542 }, { "acc": 0.86423626, "epoch": 1.1025370265074594, "grad_norm": 7.189640998840332, "learning_rate": 8.671664670536065e-06, "loss": 0.69386587, "memory(GiB)": 34.88, "step": 40720, "train_speed(iter/s)": 0.410545 }, { "acc": 0.8878541, "epoch": 1.102672406790675, "grad_norm": 10.29068374633789, "learning_rate": 8.67128480553793e-06, "loss": 0.60230865, "memory(GiB)": 34.88, "step": 40725, "train_speed(iter/s)": 0.410548 }, { "acc": 0.88632431, "epoch": 1.1028077870738906, "grad_norm": 7.552194118499756, "learning_rate": 8.670904894556003e-06, "loss": 0.57109728, "memory(GiB)": 34.88, "step": 40730, "train_speed(iter/s)": 0.410551 }, { "acc": 0.87015247, "epoch": 1.1029431673571062, "grad_norm": 11.78126335144043, "learning_rate": 8.670524937595042e-06, "loss": 0.72326536, "memory(GiB)": 34.88, "step": 40735, "train_speed(iter/s)": 0.410553 }, { "acc": 0.87901602, "epoch": 1.1030785476403218, "grad_norm": 9.402948379516602, "learning_rate": 8.670144934659807e-06, "loss": 0.56378875, "memory(GiB)": 34.88, "step": 40740, "train_speed(iter/s)": 0.410556 }, { "acc": 0.88843098, "epoch": 1.1032139279235371, "grad_norm": 5.928048133850098, "learning_rate": 8.66976488575506e-06, "loss": 0.64523935, "memory(GiB)": 34.88, "step": 40745, "train_speed(iter/s)": 0.410559 }, { "acc": 0.89474754, "epoch": 1.1033493082067527, "grad_norm": 9.884775161743164, "learning_rate": 8.669384790885557e-06, "loss": 0.62422485, "memory(GiB)": 34.88, "step": 40750, "train_speed(iter/s)": 0.410562 }, { "acc": 0.8547637, "epoch": 1.1034846884899683, "grad_norm": 8.601764678955078, "learning_rate": 8.669004650056066e-06, "loss": 0.78201046, "memory(GiB)": 34.88, "step": 40755, "train_speed(iter/s)": 0.410565 }, { "acc": 0.88476133, "epoch": 1.1036200687731839, "grad_norm": 7.204484462738037, "learning_rate": 8.668624463271344e-06, "loss": 0.68545284, "memory(GiB)": 34.88, "step": 40760, "train_speed(iter/s)": 0.410568 }, { "acc": 0.88820763, "epoch": 1.1037554490563994, "grad_norm": 7.448675632476807, "learning_rate": 8.668244230536154e-06, "loss": 0.57967563, "memory(GiB)": 34.88, "step": 40765, "train_speed(iter/s)": 0.41057 }, { "acc": 0.88013287, "epoch": 1.103890829339615, "grad_norm": 20.184831619262695, "learning_rate": 8.667863951855261e-06, "loss": 0.72658844, "memory(GiB)": 34.88, "step": 40770, "train_speed(iter/s)": 0.410573 }, { "acc": 0.89052105, "epoch": 1.1040262096228306, "grad_norm": 9.693138122558594, "learning_rate": 8.66748362723343e-06, "loss": 0.64074602, "memory(GiB)": 34.88, "step": 40775, "train_speed(iter/s)": 0.410576 }, { "acc": 0.87661314, "epoch": 1.1041615899060462, "grad_norm": 34.754024505615234, "learning_rate": 8.667103256675422e-06, "loss": 0.60600123, "memory(GiB)": 34.88, "step": 40780, "train_speed(iter/s)": 0.410579 }, { "acc": 0.90689201, "epoch": 1.1042969701892615, "grad_norm": 25.675373077392578, "learning_rate": 8.666722840186004e-06, "loss": 0.49912281, "memory(GiB)": 34.88, "step": 40785, "train_speed(iter/s)": 0.410581 }, { "acc": 0.89605742, "epoch": 1.1044323504724771, "grad_norm": 7.6865692138671875, "learning_rate": 8.66634237776994e-06, "loss": 0.49467297, "memory(GiB)": 34.88, "step": 40790, "train_speed(iter/s)": 0.410584 }, { "acc": 0.88231344, "epoch": 1.1045677307556927, "grad_norm": 10.574973106384277, "learning_rate": 8.665961869431996e-06, "loss": 0.69768276, "memory(GiB)": 34.88, "step": 40795, "train_speed(iter/s)": 0.410587 }, { "acc": 0.88385048, "epoch": 1.1047031110389083, "grad_norm": 16.386653900146484, "learning_rate": 8.665581315176943e-06, "loss": 0.6014019, "memory(GiB)": 34.88, "step": 40800, "train_speed(iter/s)": 0.410589 }, { "acc": 0.87624302, "epoch": 1.1048384913221239, "grad_norm": 8.736629486083984, "learning_rate": 8.665200715009542e-06, "loss": 0.64689474, "memory(GiB)": 34.88, "step": 40805, "train_speed(iter/s)": 0.410591 }, { "acc": 0.89042063, "epoch": 1.1049738716053394, "grad_norm": 11.409653663635254, "learning_rate": 8.664820068934564e-06, "loss": 0.55348911, "memory(GiB)": 34.88, "step": 40810, "train_speed(iter/s)": 0.410594 }, { "acc": 0.87559042, "epoch": 1.105109251888555, "grad_norm": 10.966800689697266, "learning_rate": 8.664439376956776e-06, "loss": 0.75616779, "memory(GiB)": 34.88, "step": 40815, "train_speed(iter/s)": 0.410597 }, { "acc": 0.8861208, "epoch": 1.1052446321717706, "grad_norm": 39.28802490234375, "learning_rate": 8.664058639080949e-06, "loss": 0.56803489, "memory(GiB)": 34.88, "step": 40820, "train_speed(iter/s)": 0.4106 }, { "acc": 0.88571949, "epoch": 1.105380012454986, "grad_norm": 10.422321319580078, "learning_rate": 8.66367785531185e-06, "loss": 0.59401274, "memory(GiB)": 34.88, "step": 40825, "train_speed(iter/s)": 0.410602 }, { "acc": 0.87385197, "epoch": 1.1055153927382015, "grad_norm": 12.487198829650879, "learning_rate": 8.663297025654252e-06, "loss": 0.66139789, "memory(GiB)": 34.88, "step": 40830, "train_speed(iter/s)": 0.410605 }, { "acc": 0.86386261, "epoch": 1.1056507730214171, "grad_norm": 15.150727272033691, "learning_rate": 8.66291615011292e-06, "loss": 0.85255651, "memory(GiB)": 34.88, "step": 40835, "train_speed(iter/s)": 0.410606 }, { "acc": 0.88240147, "epoch": 1.1057861533046327, "grad_norm": 8.621899604797363, "learning_rate": 8.662535228692634e-06, "loss": 0.57193823, "memory(GiB)": 34.88, "step": 40840, "train_speed(iter/s)": 0.410608 }, { "acc": 0.86590824, "epoch": 1.1059215335878483, "grad_norm": 5.203829765319824, "learning_rate": 8.662154261398158e-06, "loss": 0.7673068, "memory(GiB)": 34.88, "step": 40845, "train_speed(iter/s)": 0.410611 }, { "acc": 0.89476213, "epoch": 1.1060569138710639, "grad_norm": 12.63127613067627, "learning_rate": 8.661773248234266e-06, "loss": 0.55038929, "memory(GiB)": 34.88, "step": 40850, "train_speed(iter/s)": 0.410613 }, { "acc": 0.89011459, "epoch": 1.1061922941542794, "grad_norm": 8.539752960205078, "learning_rate": 8.661392189205737e-06, "loss": 0.6228096, "memory(GiB)": 34.88, "step": 40855, "train_speed(iter/s)": 0.410616 }, { "acc": 0.90806856, "epoch": 1.106327674437495, "grad_norm": 10.239779472351074, "learning_rate": 8.661011084317336e-06, "loss": 0.4269865, "memory(GiB)": 34.88, "step": 40860, "train_speed(iter/s)": 0.410618 }, { "acc": 0.8931366, "epoch": 1.1064630547207104, "grad_norm": 7.924241542816162, "learning_rate": 8.66062993357384e-06, "loss": 0.561689, "memory(GiB)": 34.88, "step": 40865, "train_speed(iter/s)": 0.41062 }, { "acc": 0.89071703, "epoch": 1.106598435003926, "grad_norm": 4.5795111656188965, "learning_rate": 8.660248736980027e-06, "loss": 0.53953834, "memory(GiB)": 34.88, "step": 40870, "train_speed(iter/s)": 0.410623 }, { "acc": 0.88505449, "epoch": 1.1067338152871415, "grad_norm": 7.446602821350098, "learning_rate": 8.659867494540669e-06, "loss": 0.62355022, "memory(GiB)": 34.88, "step": 40875, "train_speed(iter/s)": 0.410626 }, { "acc": 0.89355602, "epoch": 1.1068691955703571, "grad_norm": 7.0926432609558105, "learning_rate": 8.659486206260541e-06, "loss": 0.47884445, "memory(GiB)": 34.88, "step": 40880, "train_speed(iter/s)": 0.410628 }, { "acc": 0.8772171, "epoch": 1.1070045758535727, "grad_norm": 17.282577514648438, "learning_rate": 8.659104872144422e-06, "loss": 0.78815279, "memory(GiB)": 34.88, "step": 40885, "train_speed(iter/s)": 0.410631 }, { "acc": 0.88034945, "epoch": 1.1071399561367883, "grad_norm": 8.126029014587402, "learning_rate": 8.658723492197088e-06, "loss": 0.66989579, "memory(GiB)": 34.88, "step": 40890, "train_speed(iter/s)": 0.410633 }, { "acc": 0.89309931, "epoch": 1.1072753364200039, "grad_norm": 8.33126163482666, "learning_rate": 8.658342066423315e-06, "loss": 0.56535277, "memory(GiB)": 34.88, "step": 40895, "train_speed(iter/s)": 0.410636 }, { "acc": 0.90444527, "epoch": 1.1074107167032194, "grad_norm": 6.110560417175293, "learning_rate": 8.657960594827884e-06, "loss": 0.40369463, "memory(GiB)": 34.88, "step": 40900, "train_speed(iter/s)": 0.410639 }, { "acc": 0.90317287, "epoch": 1.1075460969864348, "grad_norm": 6.557853698730469, "learning_rate": 8.657579077415571e-06, "loss": 0.49015341, "memory(GiB)": 34.88, "step": 40905, "train_speed(iter/s)": 0.410641 }, { "acc": 0.88037567, "epoch": 1.1076814772696504, "grad_norm": 12.91069507598877, "learning_rate": 8.657197514191158e-06, "loss": 0.66114717, "memory(GiB)": 34.88, "step": 40910, "train_speed(iter/s)": 0.410643 }, { "acc": 0.88308916, "epoch": 1.107816857552866, "grad_norm": 5.475634574890137, "learning_rate": 8.656815905159422e-06, "loss": 0.6623292, "memory(GiB)": 34.88, "step": 40915, "train_speed(iter/s)": 0.410645 }, { "acc": 0.86771698, "epoch": 1.1079522378360815, "grad_norm": 15.46630573272705, "learning_rate": 8.656434250325145e-06, "loss": 0.65562849, "memory(GiB)": 34.88, "step": 40920, "train_speed(iter/s)": 0.410648 }, { "acc": 0.86203737, "epoch": 1.1080876181192971, "grad_norm": 9.630782127380371, "learning_rate": 8.656052549693107e-06, "loss": 0.71149826, "memory(GiB)": 34.88, "step": 40925, "train_speed(iter/s)": 0.41065 }, { "acc": 0.92681046, "epoch": 1.1082229984025127, "grad_norm": 5.592296123504639, "learning_rate": 8.655670803268092e-06, "loss": 0.4070375, "memory(GiB)": 34.88, "step": 40930, "train_speed(iter/s)": 0.410653 }, { "acc": 0.85673704, "epoch": 1.1083583786857283, "grad_norm": 11.200965881347656, "learning_rate": 8.65528901105488e-06, "loss": 0.72897253, "memory(GiB)": 34.88, "step": 40935, "train_speed(iter/s)": 0.410655 }, { "acc": 0.87514296, "epoch": 1.1084937589689439, "grad_norm": 10.608256340026855, "learning_rate": 8.654907173058252e-06, "loss": 0.65898008, "memory(GiB)": 34.88, "step": 40940, "train_speed(iter/s)": 0.410656 }, { "acc": 0.87155838, "epoch": 1.1086291392521592, "grad_norm": 10.458794593811035, "learning_rate": 8.654525289282995e-06, "loss": 0.66521659, "memory(GiB)": 34.88, "step": 40945, "train_speed(iter/s)": 0.410657 }, { "acc": 0.8829216, "epoch": 1.1087645195353748, "grad_norm": 12.558752059936523, "learning_rate": 8.65414335973389e-06, "loss": 0.61537132, "memory(GiB)": 34.88, "step": 40950, "train_speed(iter/s)": 0.41066 }, { "acc": 0.90456028, "epoch": 1.1088998998185904, "grad_norm": 22.85147476196289, "learning_rate": 8.653761384415722e-06, "loss": 0.45100775, "memory(GiB)": 34.88, "step": 40955, "train_speed(iter/s)": 0.410662 }, { "acc": 0.88020153, "epoch": 1.109035280101806, "grad_norm": 9.399136543273926, "learning_rate": 8.653379363333277e-06, "loss": 0.7032692, "memory(GiB)": 34.88, "step": 40960, "train_speed(iter/s)": 0.410665 }, { "acc": 0.91410341, "epoch": 1.1091706603850215, "grad_norm": 4.9294867515563965, "learning_rate": 8.652997296491342e-06, "loss": 0.47186689, "memory(GiB)": 34.88, "step": 40965, "train_speed(iter/s)": 0.410668 }, { "acc": 0.89143887, "epoch": 1.1093060406682371, "grad_norm": 6.857596397399902, "learning_rate": 8.652615183894699e-06, "loss": 0.5915596, "memory(GiB)": 34.88, "step": 40970, "train_speed(iter/s)": 0.410671 }, { "acc": 0.8635169, "epoch": 1.1094414209514527, "grad_norm": 10.4247465133667, "learning_rate": 8.652233025548139e-06, "loss": 0.7792017, "memory(GiB)": 34.88, "step": 40975, "train_speed(iter/s)": 0.410674 }, { "acc": 0.91600056, "epoch": 1.1095768012346683, "grad_norm": 6.0903215408325195, "learning_rate": 8.651850821456444e-06, "loss": 0.46205282, "memory(GiB)": 34.88, "step": 40980, "train_speed(iter/s)": 0.410676 }, { "acc": 0.86498489, "epoch": 1.1097121815178836, "grad_norm": 17.387104034423828, "learning_rate": 8.651468571624409e-06, "loss": 0.72553878, "memory(GiB)": 34.88, "step": 40985, "train_speed(iter/s)": 0.410678 }, { "acc": 0.88734627, "epoch": 1.1098475618010992, "grad_norm": 9.500259399414062, "learning_rate": 8.651086276056815e-06, "loss": 0.57545962, "memory(GiB)": 34.88, "step": 40990, "train_speed(iter/s)": 0.410681 }, { "acc": 0.88287878, "epoch": 1.1099829420843148, "grad_norm": 14.736488342285156, "learning_rate": 8.650703934758457e-06, "loss": 0.5950604, "memory(GiB)": 34.88, "step": 40995, "train_speed(iter/s)": 0.410684 }, { "acc": 0.87090282, "epoch": 1.1101183223675304, "grad_norm": 6.606959819793701, "learning_rate": 8.65032154773412e-06, "loss": 0.76074862, "memory(GiB)": 34.88, "step": 41000, "train_speed(iter/s)": 0.410687 }, { "acc": 0.90682678, "epoch": 1.110253702650746, "grad_norm": 6.299574375152588, "learning_rate": 8.649939114988599e-06, "loss": 0.55507994, "memory(GiB)": 34.88, "step": 41005, "train_speed(iter/s)": 0.41069 }, { "acc": 0.88813305, "epoch": 1.1103890829339615, "grad_norm": 6.26638126373291, "learning_rate": 8.649556636526678e-06, "loss": 0.59362583, "memory(GiB)": 34.88, "step": 41010, "train_speed(iter/s)": 0.410692 }, { "acc": 0.8957902, "epoch": 1.1105244632171771, "grad_norm": 6.934609413146973, "learning_rate": 8.649174112353156e-06, "loss": 0.56545682, "memory(GiB)": 34.88, "step": 41015, "train_speed(iter/s)": 0.410695 }, { "acc": 0.88911028, "epoch": 1.1106598435003927, "grad_norm": 8.055442810058594, "learning_rate": 8.648791542472818e-06, "loss": 0.64701138, "memory(GiB)": 34.88, "step": 41020, "train_speed(iter/s)": 0.410697 }, { "acc": 0.89478607, "epoch": 1.110795223783608, "grad_norm": 6.710092067718506, "learning_rate": 8.648408926890463e-06, "loss": 0.46598282, "memory(GiB)": 34.88, "step": 41025, "train_speed(iter/s)": 0.4107 }, { "acc": 0.89404297, "epoch": 1.1109306040668236, "grad_norm": 9.148700714111328, "learning_rate": 8.648026265610878e-06, "loss": 0.52433496, "memory(GiB)": 34.88, "step": 41030, "train_speed(iter/s)": 0.410702 }, { "acc": 0.90271645, "epoch": 1.1110659843500392, "grad_norm": 4.271521091461182, "learning_rate": 8.64764355863886e-06, "loss": 0.46920795, "memory(GiB)": 34.88, "step": 41035, "train_speed(iter/s)": 0.410705 }, { "acc": 0.89049454, "epoch": 1.1112013646332548, "grad_norm": 5.367644309997559, "learning_rate": 8.6472608059792e-06, "loss": 0.5024909, "memory(GiB)": 34.88, "step": 41040, "train_speed(iter/s)": 0.410708 }, { "acc": 0.88003101, "epoch": 1.1113367449164704, "grad_norm": 9.473100662231445, "learning_rate": 8.646878007636698e-06, "loss": 0.60636721, "memory(GiB)": 34.88, "step": 41045, "train_speed(iter/s)": 0.41071 }, { "acc": 0.8763361, "epoch": 1.111472125199686, "grad_norm": 10.715907096862793, "learning_rate": 8.646495163616147e-06, "loss": 0.7328908, "memory(GiB)": 34.88, "step": 41050, "train_speed(iter/s)": 0.410713 }, { "acc": 0.88897514, "epoch": 1.1116075054829015, "grad_norm": 11.493971824645996, "learning_rate": 8.64611227392234e-06, "loss": 0.56912203, "memory(GiB)": 34.88, "step": 41055, "train_speed(iter/s)": 0.410716 }, { "acc": 0.88567114, "epoch": 1.111742885766117, "grad_norm": 8.922345161437988, "learning_rate": 8.645729338560077e-06, "loss": 0.50615168, "memory(GiB)": 34.88, "step": 41060, "train_speed(iter/s)": 0.410719 }, { "acc": 0.89534836, "epoch": 1.1118782660493325, "grad_norm": 6.566896915435791, "learning_rate": 8.645346357534153e-06, "loss": 0.48992682, "memory(GiB)": 34.88, "step": 41065, "train_speed(iter/s)": 0.410721 }, { "acc": 0.87566662, "epoch": 1.112013646332548, "grad_norm": 8.964760780334473, "learning_rate": 8.644963330849367e-06, "loss": 0.58616796, "memory(GiB)": 34.88, "step": 41070, "train_speed(iter/s)": 0.410724 }, { "acc": 0.88254814, "epoch": 1.1121490266157636, "grad_norm": 10.80127239227295, "learning_rate": 8.644580258510516e-06, "loss": 0.65838208, "memory(GiB)": 34.88, "step": 41075, "train_speed(iter/s)": 0.410727 }, { "acc": 0.8862298, "epoch": 1.1122844068989792, "grad_norm": 14.28444766998291, "learning_rate": 8.644197140522396e-06, "loss": 0.56704512, "memory(GiB)": 34.88, "step": 41080, "train_speed(iter/s)": 0.41073 }, { "acc": 0.86342068, "epoch": 1.1124197871821948, "grad_norm": 16.658353805541992, "learning_rate": 8.643813976889814e-06, "loss": 0.7841939, "memory(GiB)": 34.88, "step": 41085, "train_speed(iter/s)": 0.410732 }, { "acc": 0.89432049, "epoch": 1.1125551674654104, "grad_norm": 7.25650691986084, "learning_rate": 8.643430767617564e-06, "loss": 0.55212312, "memory(GiB)": 34.88, "step": 41090, "train_speed(iter/s)": 0.410735 }, { "acc": 0.89598351, "epoch": 1.112690547748626, "grad_norm": 8.867816925048828, "learning_rate": 8.643047512710448e-06, "loss": 0.57772989, "memory(GiB)": 34.88, "step": 41095, "train_speed(iter/s)": 0.410738 }, { "acc": 0.88240557, "epoch": 1.1128259280318415, "grad_norm": 10.776275634765625, "learning_rate": 8.642664212173267e-06, "loss": 0.65486746, "memory(GiB)": 34.88, "step": 41100, "train_speed(iter/s)": 0.410741 }, { "acc": 0.89262428, "epoch": 1.112961308315057, "grad_norm": 8.194043159484863, "learning_rate": 8.642280866010821e-06, "loss": 0.5448494, "memory(GiB)": 34.88, "step": 41105, "train_speed(iter/s)": 0.410744 }, { "acc": 0.89013586, "epoch": 1.1130966885982725, "grad_norm": 5.858739376068115, "learning_rate": 8.641897474227912e-06, "loss": 0.59108424, "memory(GiB)": 34.88, "step": 41110, "train_speed(iter/s)": 0.410746 }, { "acc": 0.90059566, "epoch": 1.113232068881488, "grad_norm": 7.746875286102295, "learning_rate": 8.641514036829348e-06, "loss": 0.52143459, "memory(GiB)": 34.88, "step": 41115, "train_speed(iter/s)": 0.410749 }, { "acc": 0.88221989, "epoch": 1.1133674491647036, "grad_norm": 18.584861755371094, "learning_rate": 8.641130553819927e-06, "loss": 0.63373632, "memory(GiB)": 34.88, "step": 41120, "train_speed(iter/s)": 0.410752 }, { "acc": 0.91309547, "epoch": 1.1135028294479192, "grad_norm": 47.41482925415039, "learning_rate": 8.640747025204455e-06, "loss": 0.41370606, "memory(GiB)": 34.88, "step": 41125, "train_speed(iter/s)": 0.410754 }, { "acc": 0.89022989, "epoch": 1.1136382097311348, "grad_norm": 10.718731880187988, "learning_rate": 8.640363450987736e-06, "loss": 0.6091754, "memory(GiB)": 34.88, "step": 41130, "train_speed(iter/s)": 0.410757 }, { "acc": 0.88190804, "epoch": 1.1137735900143504, "grad_norm": 7.299324035644531, "learning_rate": 8.639979831174576e-06, "loss": 0.57444496, "memory(GiB)": 34.88, "step": 41135, "train_speed(iter/s)": 0.41076 }, { "acc": 0.90068588, "epoch": 1.1139089702975657, "grad_norm": 8.179597854614258, "learning_rate": 8.639596165769777e-06, "loss": 0.55642328, "memory(GiB)": 34.88, "step": 41140, "train_speed(iter/s)": 0.410763 }, { "acc": 0.89533205, "epoch": 1.1140443505807813, "grad_norm": 16.09147834777832, "learning_rate": 8.639212454778147e-06, "loss": 0.52639685, "memory(GiB)": 34.88, "step": 41145, "train_speed(iter/s)": 0.410765 }, { "acc": 0.87194118, "epoch": 1.114179730863997, "grad_norm": 6.353736877441406, "learning_rate": 8.638828698204496e-06, "loss": 0.67628932, "memory(GiB)": 34.88, "step": 41150, "train_speed(iter/s)": 0.410768 }, { "acc": 0.88925247, "epoch": 1.1143151111472125, "grad_norm": 6.455788612365723, "learning_rate": 8.638444896053628e-06, "loss": 0.55213771, "memory(GiB)": 34.88, "step": 41155, "train_speed(iter/s)": 0.410771 }, { "acc": 0.89861908, "epoch": 1.114450491430428, "grad_norm": 7.0450849533081055, "learning_rate": 8.638061048330351e-06, "loss": 0.53201189, "memory(GiB)": 34.88, "step": 41160, "train_speed(iter/s)": 0.410774 }, { "acc": 0.87964764, "epoch": 1.1145858717136436, "grad_norm": 14.974778175354004, "learning_rate": 8.637677155039475e-06, "loss": 0.53538561, "memory(GiB)": 34.88, "step": 41165, "train_speed(iter/s)": 0.410777 }, { "acc": 0.89966602, "epoch": 1.1147212519968592, "grad_norm": 6.264763832092285, "learning_rate": 8.637293216185809e-06, "loss": 0.49078255, "memory(GiB)": 34.88, "step": 41170, "train_speed(iter/s)": 0.410779 }, { "acc": 0.874403, "epoch": 1.1148566322800748, "grad_norm": 5.864406585693359, "learning_rate": 8.63690923177416e-06, "loss": 0.59994955, "memory(GiB)": 34.88, "step": 41175, "train_speed(iter/s)": 0.410782 }, { "acc": 0.88144703, "epoch": 1.1149920125632904, "grad_norm": 14.661746978759766, "learning_rate": 8.636525201809341e-06, "loss": 0.67729716, "memory(GiB)": 34.88, "step": 41180, "train_speed(iter/s)": 0.410785 }, { "acc": 0.89869862, "epoch": 1.1151273928465057, "grad_norm": 8.194869995117188, "learning_rate": 8.63614112629616e-06, "loss": 0.52993307, "memory(GiB)": 34.88, "step": 41185, "train_speed(iter/s)": 0.410788 }, { "acc": 0.8861784, "epoch": 1.1152627731297213, "grad_norm": 8.888936996459961, "learning_rate": 8.635757005239431e-06, "loss": 0.55959911, "memory(GiB)": 34.88, "step": 41190, "train_speed(iter/s)": 0.41079 }, { "acc": 0.89678726, "epoch": 1.115398153412937, "grad_norm": 6.925142765045166, "learning_rate": 8.635372838643966e-06, "loss": 0.61746254, "memory(GiB)": 34.88, "step": 41195, "train_speed(iter/s)": 0.410793 }, { "acc": 0.89493093, "epoch": 1.1155335336961525, "grad_norm": 5.611784934997559, "learning_rate": 8.634988626514574e-06, "loss": 0.5675704, "memory(GiB)": 34.88, "step": 41200, "train_speed(iter/s)": 0.410796 }, { "acc": 0.90110254, "epoch": 1.115668913979368, "grad_norm": 22.608104705810547, "learning_rate": 8.634604368856072e-06, "loss": 0.46344109, "memory(GiB)": 34.88, "step": 41205, "train_speed(iter/s)": 0.410799 }, { "acc": 0.88680687, "epoch": 1.1158042942625837, "grad_norm": 26.194320678710938, "learning_rate": 8.634220065673269e-06, "loss": 0.5590848, "memory(GiB)": 34.88, "step": 41210, "train_speed(iter/s)": 0.410801 }, { "acc": 0.88317938, "epoch": 1.1159396745457992, "grad_norm": 16.707307815551758, "learning_rate": 8.633835716970983e-06, "loss": 0.70403228, "memory(GiB)": 34.88, "step": 41215, "train_speed(iter/s)": 0.410804 }, { "acc": 0.89221096, "epoch": 1.1160750548290146, "grad_norm": 9.269295692443848, "learning_rate": 8.63345132275403e-06, "loss": 0.57420692, "memory(GiB)": 34.88, "step": 41220, "train_speed(iter/s)": 0.410807 }, { "acc": 0.87461624, "epoch": 1.1162104351122302, "grad_norm": 13.617300033569336, "learning_rate": 8.63306688302722e-06, "loss": 0.69457235, "memory(GiB)": 34.88, "step": 41225, "train_speed(iter/s)": 0.41081 }, { "acc": 0.89427042, "epoch": 1.1163458153954458, "grad_norm": 12.524796485900879, "learning_rate": 8.632682397795372e-06, "loss": 0.54575462, "memory(GiB)": 34.88, "step": 41230, "train_speed(iter/s)": 0.410812 }, { "acc": 0.88155785, "epoch": 1.1164811956786613, "grad_norm": 13.049567222595215, "learning_rate": 8.632297867063305e-06, "loss": 0.61322527, "memory(GiB)": 34.88, "step": 41235, "train_speed(iter/s)": 0.410815 }, { "acc": 0.89433346, "epoch": 1.116616575961877, "grad_norm": 8.173735618591309, "learning_rate": 8.631913290835832e-06, "loss": 0.50452394, "memory(GiB)": 34.88, "step": 41240, "train_speed(iter/s)": 0.410818 }, { "acc": 0.86953459, "epoch": 1.1167519562450925, "grad_norm": 10.507134437561035, "learning_rate": 8.631528669117771e-06, "loss": 0.72211881, "memory(GiB)": 34.88, "step": 41245, "train_speed(iter/s)": 0.41082 }, { "acc": 0.8676712, "epoch": 1.116887336528308, "grad_norm": 18.29280662536621, "learning_rate": 8.631144001913942e-06, "loss": 0.73702288, "memory(GiB)": 34.88, "step": 41250, "train_speed(iter/s)": 0.410823 }, { "acc": 0.89125099, "epoch": 1.1170227168115237, "grad_norm": 10.979547500610352, "learning_rate": 8.630759289229161e-06, "loss": 0.62555943, "memory(GiB)": 34.88, "step": 41255, "train_speed(iter/s)": 0.410825 }, { "acc": 0.86543579, "epoch": 1.1171580970947392, "grad_norm": 15.508190155029297, "learning_rate": 8.63037453106825e-06, "loss": 0.62799811, "memory(GiB)": 34.88, "step": 41260, "train_speed(iter/s)": 0.410828 }, { "acc": 0.86366615, "epoch": 1.1172934773779546, "grad_norm": 15.115728378295898, "learning_rate": 8.629989727436027e-06, "loss": 0.71168308, "memory(GiB)": 34.88, "step": 41265, "train_speed(iter/s)": 0.410831 }, { "acc": 0.87363958, "epoch": 1.1174288576611702, "grad_norm": 8.433809280395508, "learning_rate": 8.629604878337314e-06, "loss": 0.73895559, "memory(GiB)": 34.88, "step": 41270, "train_speed(iter/s)": 0.410834 }, { "acc": 0.87178192, "epoch": 1.1175642379443858, "grad_norm": 6.935451030731201, "learning_rate": 8.629219983776932e-06, "loss": 0.6734982, "memory(GiB)": 34.88, "step": 41275, "train_speed(iter/s)": 0.410837 }, { "acc": 0.90093174, "epoch": 1.1176996182276013, "grad_norm": 5.901663303375244, "learning_rate": 8.628835043759701e-06, "loss": 0.57388105, "memory(GiB)": 34.88, "step": 41280, "train_speed(iter/s)": 0.410839 }, { "acc": 0.88902035, "epoch": 1.117834998510817, "grad_norm": 8.402373313903809, "learning_rate": 8.628450058290445e-06, "loss": 0.62126265, "memory(GiB)": 34.88, "step": 41285, "train_speed(iter/s)": 0.410841 }, { "acc": 0.86665325, "epoch": 1.1179703787940325, "grad_norm": 7.671685218811035, "learning_rate": 8.628065027373983e-06, "loss": 0.68470364, "memory(GiB)": 34.88, "step": 41290, "train_speed(iter/s)": 0.410844 }, { "acc": 0.87814159, "epoch": 1.118105759077248, "grad_norm": 8.205713272094727, "learning_rate": 8.627679951015143e-06, "loss": 0.65363617, "memory(GiB)": 34.88, "step": 41295, "train_speed(iter/s)": 0.410847 }, { "acc": 0.88657036, "epoch": 1.1182411393604634, "grad_norm": 8.267634391784668, "learning_rate": 8.627294829218746e-06, "loss": 0.56231279, "memory(GiB)": 34.88, "step": 41300, "train_speed(iter/s)": 0.41085 }, { "acc": 0.85293751, "epoch": 1.118376519643679, "grad_norm": 17.426984786987305, "learning_rate": 8.626909661989616e-06, "loss": 0.82213593, "memory(GiB)": 34.88, "step": 41305, "train_speed(iter/s)": 0.410852 }, { "acc": 0.89343405, "epoch": 1.1185118999268946, "grad_norm": 6.239060401916504, "learning_rate": 8.626524449332581e-06, "loss": 0.50798435, "memory(GiB)": 34.88, "step": 41310, "train_speed(iter/s)": 0.410855 }, { "acc": 0.90631809, "epoch": 1.1186472802101102, "grad_norm": 5.885732173919678, "learning_rate": 8.626139191252464e-06, "loss": 0.47929907, "memory(GiB)": 34.88, "step": 41315, "train_speed(iter/s)": 0.410858 }, { "acc": 0.88566799, "epoch": 1.1187826604933258, "grad_norm": 13.885306358337402, "learning_rate": 8.625753887754093e-06, "loss": 0.57988706, "memory(GiB)": 34.88, "step": 41320, "train_speed(iter/s)": 0.41086 }, { "acc": 0.890131, "epoch": 1.1189180407765413, "grad_norm": 5.178289890289307, "learning_rate": 8.625368538842293e-06, "loss": 0.49105797, "memory(GiB)": 34.88, "step": 41325, "train_speed(iter/s)": 0.410863 }, { "acc": 0.85656757, "epoch": 1.119053421059757, "grad_norm": 12.396145820617676, "learning_rate": 8.62498314452189e-06, "loss": 0.84682865, "memory(GiB)": 34.88, "step": 41330, "train_speed(iter/s)": 0.410865 }, { "acc": 0.89522285, "epoch": 1.1191888013429725, "grad_norm": 7.251284122467041, "learning_rate": 8.624597704797713e-06, "loss": 0.62589331, "memory(GiB)": 34.88, "step": 41335, "train_speed(iter/s)": 0.410868 }, { "acc": 0.88282967, "epoch": 1.119324181626188, "grad_norm": 9.814724922180176, "learning_rate": 8.624212219674592e-06, "loss": 0.61960211, "memory(GiB)": 34.88, "step": 41340, "train_speed(iter/s)": 0.410871 }, { "acc": 0.88998184, "epoch": 1.1194595619094034, "grad_norm": 8.783378601074219, "learning_rate": 8.623826689157356e-06, "loss": 0.59735823, "memory(GiB)": 34.88, "step": 41345, "train_speed(iter/s)": 0.410873 }, { "acc": 0.87706966, "epoch": 1.119594942192619, "grad_norm": 8.45975112915039, "learning_rate": 8.623441113250833e-06, "loss": 0.61134205, "memory(GiB)": 34.88, "step": 41350, "train_speed(iter/s)": 0.410876 }, { "acc": 0.89528322, "epoch": 1.1197303224758346, "grad_norm": 3.6171066761016846, "learning_rate": 8.623055491959852e-06, "loss": 0.50376825, "memory(GiB)": 34.88, "step": 41355, "train_speed(iter/s)": 0.410879 }, { "acc": 0.89176922, "epoch": 1.1198657027590502, "grad_norm": 11.290072441101074, "learning_rate": 8.622669825289247e-06, "loss": 0.56534681, "memory(GiB)": 34.88, "step": 41360, "train_speed(iter/s)": 0.410881 }, { "acc": 0.89221191, "epoch": 1.1200010830422658, "grad_norm": 5.709841728210449, "learning_rate": 8.622284113243848e-06, "loss": 0.58754435, "memory(GiB)": 34.88, "step": 41365, "train_speed(iter/s)": 0.410884 }, { "acc": 0.88342247, "epoch": 1.1201364633254813, "grad_norm": 7.744701862335205, "learning_rate": 8.621898355828483e-06, "loss": 0.58534274, "memory(GiB)": 34.88, "step": 41370, "train_speed(iter/s)": 0.410887 }, { "acc": 0.91227074, "epoch": 1.120271843608697, "grad_norm": 7.9780707359313965, "learning_rate": 8.621512553047989e-06, "loss": 0.49855242, "memory(GiB)": 34.88, "step": 41375, "train_speed(iter/s)": 0.41089 }, { "acc": 0.87373028, "epoch": 1.1204072238919123, "grad_norm": 13.99600887298584, "learning_rate": 8.621126704907198e-06, "loss": 0.72395344, "memory(GiB)": 34.88, "step": 41380, "train_speed(iter/s)": 0.410893 }, { "acc": 0.88916626, "epoch": 1.1205426041751279, "grad_norm": 5.060513973236084, "learning_rate": 8.620740811410944e-06, "loss": 0.545544, "memory(GiB)": 34.88, "step": 41385, "train_speed(iter/s)": 0.410895 }, { "acc": 0.8995575, "epoch": 1.1206779844583434, "grad_norm": 10.523141860961914, "learning_rate": 8.620354872564059e-06, "loss": 0.56658578, "memory(GiB)": 34.88, "step": 41390, "train_speed(iter/s)": 0.410897 }, { "acc": 0.90163746, "epoch": 1.120813364741559, "grad_norm": 9.121180534362793, "learning_rate": 8.619968888371377e-06, "loss": 0.44176884, "memory(GiB)": 34.88, "step": 41395, "train_speed(iter/s)": 0.4109 }, { "acc": 0.89756947, "epoch": 1.1209487450247746, "grad_norm": 4.678865432739258, "learning_rate": 8.619582858837737e-06, "loss": 0.59728642, "memory(GiB)": 34.88, "step": 41400, "train_speed(iter/s)": 0.410903 }, { "acc": 0.88107281, "epoch": 1.1210841253079902, "grad_norm": 4.8796281814575195, "learning_rate": 8.619196783967973e-06, "loss": 0.65303812, "memory(GiB)": 34.88, "step": 41405, "train_speed(iter/s)": 0.410905 }, { "acc": 0.88159389, "epoch": 1.1212195055912058, "grad_norm": 12.78593921661377, "learning_rate": 8.61881066376692e-06, "loss": 0.67996426, "memory(GiB)": 34.88, "step": 41410, "train_speed(iter/s)": 0.410908 }, { "acc": 0.89946957, "epoch": 1.1213548858744213, "grad_norm": 5.12140417098999, "learning_rate": 8.618424498239416e-06, "loss": 0.49139385, "memory(GiB)": 34.88, "step": 41415, "train_speed(iter/s)": 0.410911 }, { "acc": 0.88774643, "epoch": 1.121490266157637, "grad_norm": 8.073816299438477, "learning_rate": 8.618038287390298e-06, "loss": 0.62710876, "memory(GiB)": 34.88, "step": 41420, "train_speed(iter/s)": 0.410913 }, { "acc": 0.88855648, "epoch": 1.1216256464408523, "grad_norm": 9.721198081970215, "learning_rate": 8.617652031224407e-06, "loss": 0.57606096, "memory(GiB)": 34.88, "step": 41425, "train_speed(iter/s)": 0.410916 }, { "acc": 0.87114449, "epoch": 1.1217610267240679, "grad_norm": 10.564888954162598, "learning_rate": 8.617265729746578e-06, "loss": 0.66679544, "memory(GiB)": 34.88, "step": 41430, "train_speed(iter/s)": 0.410918 }, { "acc": 0.8533226, "epoch": 1.1218964070072834, "grad_norm": 7.649325847625732, "learning_rate": 8.616879382961649e-06, "loss": 0.83012371, "memory(GiB)": 34.88, "step": 41435, "train_speed(iter/s)": 0.410921 }, { "acc": 0.89176035, "epoch": 1.122031787290499, "grad_norm": 7.19600772857666, "learning_rate": 8.616492990874464e-06, "loss": 0.56289597, "memory(GiB)": 34.88, "step": 41440, "train_speed(iter/s)": 0.410924 }, { "acc": 0.85845776, "epoch": 1.1221671675737146, "grad_norm": 4.0471510887146, "learning_rate": 8.616106553489861e-06, "loss": 0.74909577, "memory(GiB)": 34.88, "step": 41445, "train_speed(iter/s)": 0.410926 }, { "acc": 0.87961521, "epoch": 1.1223025478569302, "grad_norm": 8.413666725158691, "learning_rate": 8.61572007081268e-06, "loss": 0.58152189, "memory(GiB)": 34.88, "step": 41450, "train_speed(iter/s)": 0.410929 }, { "acc": 0.88280334, "epoch": 1.1224379281401458, "grad_norm": 5.983710765838623, "learning_rate": 8.615333542847767e-06, "loss": 0.62978029, "memory(GiB)": 34.88, "step": 41455, "train_speed(iter/s)": 0.410932 }, { "acc": 0.88078604, "epoch": 1.1225733084233611, "grad_norm": 12.23012924194336, "learning_rate": 8.614946969599958e-06, "loss": 0.62228241, "memory(GiB)": 34.88, "step": 41460, "train_speed(iter/s)": 0.410935 }, { "acc": 0.8719326, "epoch": 1.1227086887065767, "grad_norm": 8.972220420837402, "learning_rate": 8.614560351074099e-06, "loss": 0.69550705, "memory(GiB)": 34.88, "step": 41465, "train_speed(iter/s)": 0.410937 }, { "acc": 0.87440796, "epoch": 1.1228440689897923, "grad_norm": 15.000832557678223, "learning_rate": 8.614173687275033e-06, "loss": 0.68758502, "memory(GiB)": 34.88, "step": 41470, "train_speed(iter/s)": 0.41094 }, { "acc": 0.88370275, "epoch": 1.1229794492730079, "grad_norm": 8.794771194458008, "learning_rate": 8.613786978207604e-06, "loss": 0.59240646, "memory(GiB)": 34.88, "step": 41475, "train_speed(iter/s)": 0.410943 }, { "acc": 0.87711477, "epoch": 1.1231148295562234, "grad_norm": 7.4391770362854, "learning_rate": 8.613400223876654e-06, "loss": 0.60875454, "memory(GiB)": 34.88, "step": 41480, "train_speed(iter/s)": 0.410945 }, { "acc": 0.86832294, "epoch": 1.123250209839439, "grad_norm": 7.637993335723877, "learning_rate": 8.61301342428703e-06, "loss": 0.67714028, "memory(GiB)": 34.88, "step": 41485, "train_speed(iter/s)": 0.410948 }, { "acc": 0.89039097, "epoch": 1.1233855901226546, "grad_norm": 5.555266380310059, "learning_rate": 8.612626579443576e-06, "loss": 0.53396854, "memory(GiB)": 34.88, "step": 41490, "train_speed(iter/s)": 0.41095 }, { "acc": 0.90971985, "epoch": 1.1235209704058702, "grad_norm": 4.331420421600342, "learning_rate": 8.612239689351139e-06, "loss": 0.43225784, "memory(GiB)": 34.88, "step": 41495, "train_speed(iter/s)": 0.410953 }, { "acc": 0.89141674, "epoch": 1.1236563506890858, "grad_norm": 13.394838333129883, "learning_rate": 8.611852754014564e-06, "loss": 0.52437916, "memory(GiB)": 34.88, "step": 41500, "train_speed(iter/s)": 0.410956 }, { "acc": 0.88077192, "epoch": 1.1237917309723011, "grad_norm": 11.292413711547852, "learning_rate": 8.611465773438703e-06, "loss": 0.71357107, "memory(GiB)": 34.88, "step": 41505, "train_speed(iter/s)": 0.410959 }, { "acc": 0.85680523, "epoch": 1.1239271112555167, "grad_norm": 6.879952430725098, "learning_rate": 8.611078747628399e-06, "loss": 0.78089423, "memory(GiB)": 34.88, "step": 41510, "train_speed(iter/s)": 0.410962 }, { "acc": 0.8890316, "epoch": 1.1240624915387323, "grad_norm": 7.210666656494141, "learning_rate": 8.6106916765885e-06, "loss": 0.59896002, "memory(GiB)": 34.88, "step": 41515, "train_speed(iter/s)": 0.410964 }, { "acc": 0.87090721, "epoch": 1.1241978718219479, "grad_norm": 9.2251558303833, "learning_rate": 8.610304560323855e-06, "loss": 0.61140032, "memory(GiB)": 34.88, "step": 41520, "train_speed(iter/s)": 0.410967 }, { "acc": 0.89677467, "epoch": 1.1243332521051634, "grad_norm": 11.59229564666748, "learning_rate": 8.609917398839317e-06, "loss": 0.56006193, "memory(GiB)": 34.88, "step": 41525, "train_speed(iter/s)": 0.410969 }, { "acc": 0.86727972, "epoch": 1.124468632388379, "grad_norm": 7.903563022613525, "learning_rate": 8.609530192139734e-06, "loss": 0.69305634, "memory(GiB)": 34.88, "step": 41530, "train_speed(iter/s)": 0.410972 }, { "acc": 0.90018291, "epoch": 1.1246040126715946, "grad_norm": 7.217320919036865, "learning_rate": 8.609142940229955e-06, "loss": 0.49905581, "memory(GiB)": 34.88, "step": 41535, "train_speed(iter/s)": 0.410975 }, { "acc": 0.89049826, "epoch": 1.12473939295481, "grad_norm": 16.181787490844727, "learning_rate": 8.608755643114832e-06, "loss": 0.62971616, "memory(GiB)": 34.88, "step": 41540, "train_speed(iter/s)": 0.410978 }, { "acc": 0.87322502, "epoch": 1.1248747732380255, "grad_norm": 11.65361499786377, "learning_rate": 8.608368300799217e-06, "loss": 0.59433336, "memory(GiB)": 34.88, "step": 41545, "train_speed(iter/s)": 0.410981 }, { "acc": 0.88606358, "epoch": 1.1250101535212411, "grad_norm": 7.368857383728027, "learning_rate": 8.607980913287963e-06, "loss": 0.54259071, "memory(GiB)": 34.88, "step": 41550, "train_speed(iter/s)": 0.410983 }, { "acc": 0.89087887, "epoch": 1.1251455338044567, "grad_norm": 8.277533531188965, "learning_rate": 8.607593480585921e-06, "loss": 0.6006464, "memory(GiB)": 34.88, "step": 41555, "train_speed(iter/s)": 0.410986 }, { "acc": 0.89852066, "epoch": 1.1252809140876723, "grad_norm": 9.179338455200195, "learning_rate": 8.607206002697947e-06, "loss": 0.48011885, "memory(GiB)": 34.88, "step": 41560, "train_speed(iter/s)": 0.410988 }, { "acc": 0.89014807, "epoch": 1.1254162943708879, "grad_norm": 6.996517181396484, "learning_rate": 8.60681847962889e-06, "loss": 0.66562328, "memory(GiB)": 34.88, "step": 41565, "train_speed(iter/s)": 0.410991 }, { "acc": 0.91701736, "epoch": 1.1255516746541034, "grad_norm": 13.346526145935059, "learning_rate": 8.60643091138361e-06, "loss": 0.39029782, "memory(GiB)": 34.88, "step": 41570, "train_speed(iter/s)": 0.410994 }, { "acc": 0.89353409, "epoch": 1.125687054937319, "grad_norm": 6.924015522003174, "learning_rate": 8.606043297966958e-06, "loss": 0.57358556, "memory(GiB)": 34.88, "step": 41575, "train_speed(iter/s)": 0.410996 }, { "acc": 0.89157324, "epoch": 1.1258224352205346, "grad_norm": 6.627196788787842, "learning_rate": 8.605655639383792e-06, "loss": 0.52229633, "memory(GiB)": 34.88, "step": 41580, "train_speed(iter/s)": 0.410999 }, { "acc": 0.89986305, "epoch": 1.12595781550375, "grad_norm": 7.34337043762207, "learning_rate": 8.605267935638968e-06, "loss": 0.46712608, "memory(GiB)": 34.88, "step": 41585, "train_speed(iter/s)": 0.411002 }, { "acc": 0.86911869, "epoch": 1.1260931957869655, "grad_norm": 13.56994342803955, "learning_rate": 8.604880186737342e-06, "loss": 0.74848971, "memory(GiB)": 34.88, "step": 41590, "train_speed(iter/s)": 0.411005 }, { "acc": 0.89502926, "epoch": 1.1262285760701811, "grad_norm": 4.058531761169434, "learning_rate": 8.604492392683771e-06, "loss": 0.61662583, "memory(GiB)": 34.88, "step": 41595, "train_speed(iter/s)": 0.411008 }, { "acc": 0.86783094, "epoch": 1.1263639563533967, "grad_norm": 7.007258415222168, "learning_rate": 8.604104553483114e-06, "loss": 0.75573773, "memory(GiB)": 34.88, "step": 41600, "train_speed(iter/s)": 0.41101 }, { "acc": 0.89331322, "epoch": 1.1264993366366123, "grad_norm": 9.700311660766602, "learning_rate": 8.60371666914023e-06, "loss": 0.54123445, "memory(GiB)": 34.88, "step": 41605, "train_speed(iter/s)": 0.411013 }, { "acc": 0.90261478, "epoch": 1.1266347169198279, "grad_norm": 9.05102825164795, "learning_rate": 8.603328739659975e-06, "loss": 0.51443706, "memory(GiB)": 34.88, "step": 41610, "train_speed(iter/s)": 0.411016 }, { "acc": 0.86765671, "epoch": 1.1267700972030434, "grad_norm": 10.96094036102295, "learning_rate": 8.602940765047212e-06, "loss": 0.66777253, "memory(GiB)": 34.88, "step": 41615, "train_speed(iter/s)": 0.411018 }, { "acc": 0.88911572, "epoch": 1.1269054774862588, "grad_norm": 9.382287979125977, "learning_rate": 8.6025527453068e-06, "loss": 0.57098093, "memory(GiB)": 34.88, "step": 41620, "train_speed(iter/s)": 0.41102 }, { "acc": 0.89370537, "epoch": 1.1270408577694744, "grad_norm": 7.830651760101318, "learning_rate": 8.602164680443598e-06, "loss": 0.52817464, "memory(GiB)": 34.88, "step": 41625, "train_speed(iter/s)": 0.411023 }, { "acc": 0.89225388, "epoch": 1.12717623805269, "grad_norm": 17.67457389831543, "learning_rate": 8.601776570462469e-06, "loss": 0.50857515, "memory(GiB)": 34.88, "step": 41630, "train_speed(iter/s)": 0.411026 }, { "acc": 0.87261705, "epoch": 1.1273116183359055, "grad_norm": 9.226180076599121, "learning_rate": 8.601388415368273e-06, "loss": 0.65552759, "memory(GiB)": 34.88, "step": 41635, "train_speed(iter/s)": 0.411029 }, { "acc": 0.89791813, "epoch": 1.1274469986191211, "grad_norm": 11.520642280578613, "learning_rate": 8.601000215165875e-06, "loss": 0.5500484, "memory(GiB)": 34.88, "step": 41640, "train_speed(iter/s)": 0.411032 }, { "acc": 0.8666914, "epoch": 1.1275823789023367, "grad_norm": 12.202301979064941, "learning_rate": 8.600611969860138e-06, "loss": 0.69124908, "memory(GiB)": 34.88, "step": 41645, "train_speed(iter/s)": 0.411034 }, { "acc": 0.89351826, "epoch": 1.1277177591855523, "grad_norm": 11.122332572937012, "learning_rate": 8.600223679455924e-06, "loss": 0.59819603, "memory(GiB)": 34.88, "step": 41650, "train_speed(iter/s)": 0.411036 }, { "acc": 0.89719152, "epoch": 1.1278531394687679, "grad_norm": 10.0139741897583, "learning_rate": 8.599835343958097e-06, "loss": 0.56004419, "memory(GiB)": 34.88, "step": 41655, "train_speed(iter/s)": 0.411039 }, { "acc": 0.87612171, "epoch": 1.1279885197519834, "grad_norm": 12.872489929199219, "learning_rate": 8.599446963371522e-06, "loss": 0.7307354, "memory(GiB)": 34.88, "step": 41660, "train_speed(iter/s)": 0.411042 }, { "acc": 0.87938414, "epoch": 1.1281239000351988, "grad_norm": 8.655335426330566, "learning_rate": 8.599058537701064e-06, "loss": 0.69543958, "memory(GiB)": 34.88, "step": 41665, "train_speed(iter/s)": 0.411044 }, { "acc": 0.89743099, "epoch": 1.1282592803184144, "grad_norm": 15.170011520385742, "learning_rate": 8.59867006695159e-06, "loss": 0.46614232, "memory(GiB)": 34.88, "step": 41670, "train_speed(iter/s)": 0.411047 }, { "acc": 0.90477314, "epoch": 1.12839466060163, "grad_norm": 16.016841888427734, "learning_rate": 8.598281551127966e-06, "loss": 0.48871679, "memory(GiB)": 34.88, "step": 41675, "train_speed(iter/s)": 0.411049 }, { "acc": 0.87997351, "epoch": 1.1285300408848455, "grad_norm": 7.988922595977783, "learning_rate": 8.597892990235056e-06, "loss": 0.5139812, "memory(GiB)": 34.88, "step": 41680, "train_speed(iter/s)": 0.411052 }, { "acc": 0.879846, "epoch": 1.1286654211680611, "grad_norm": 4.2704973220825195, "learning_rate": 8.597504384277733e-06, "loss": 0.56494923, "memory(GiB)": 34.88, "step": 41685, "train_speed(iter/s)": 0.411055 }, { "acc": 0.8848999, "epoch": 1.1288008014512767, "grad_norm": 7.164211273193359, "learning_rate": 8.59711573326086e-06, "loss": 0.54533467, "memory(GiB)": 34.88, "step": 41690, "train_speed(iter/s)": 0.411057 }, { "acc": 0.8772007, "epoch": 1.1289361817344923, "grad_norm": 7.072514057159424, "learning_rate": 8.596727037189308e-06, "loss": 0.68350134, "memory(GiB)": 34.88, "step": 41695, "train_speed(iter/s)": 0.411059 }, { "acc": 0.89083328, "epoch": 1.1290715620177076, "grad_norm": 9.305572509765625, "learning_rate": 8.596338296067946e-06, "loss": 0.52512684, "memory(GiB)": 34.88, "step": 41700, "train_speed(iter/s)": 0.411062 }, { "acc": 0.88312244, "epoch": 1.1292069423009232, "grad_norm": 5.5261006355285645, "learning_rate": 8.595949509901643e-06, "loss": 0.62622519, "memory(GiB)": 34.88, "step": 41705, "train_speed(iter/s)": 0.411065 }, { "acc": 0.87863808, "epoch": 1.1293423225841388, "grad_norm": 5.810835361480713, "learning_rate": 8.595560678695271e-06, "loss": 0.65858684, "memory(GiB)": 34.88, "step": 41710, "train_speed(iter/s)": 0.411067 }, { "acc": 0.87791986, "epoch": 1.1294777028673544, "grad_norm": 9.840807914733887, "learning_rate": 8.595171802453697e-06, "loss": 0.6038178, "memory(GiB)": 34.88, "step": 41715, "train_speed(iter/s)": 0.41107 }, { "acc": 0.8702364, "epoch": 1.12961308315057, "grad_norm": 10.808097839355469, "learning_rate": 8.594782881181797e-06, "loss": 0.65688353, "memory(GiB)": 34.88, "step": 41720, "train_speed(iter/s)": 0.411073 }, { "acc": 0.91317501, "epoch": 1.1297484634337855, "grad_norm": 4.381174087524414, "learning_rate": 8.594393914884441e-06, "loss": 0.39535415, "memory(GiB)": 34.88, "step": 41725, "train_speed(iter/s)": 0.411076 }, { "acc": 0.87855301, "epoch": 1.1298838437170011, "grad_norm": 8.843292236328125, "learning_rate": 8.594004903566505e-06, "loss": 0.63608489, "memory(GiB)": 34.88, "step": 41730, "train_speed(iter/s)": 0.411077 }, { "acc": 0.88742714, "epoch": 1.1300192240002167, "grad_norm": 7.8314056396484375, "learning_rate": 8.593615847232855e-06, "loss": 0.56751027, "memory(GiB)": 34.88, "step": 41735, "train_speed(iter/s)": 0.41108 }, { "acc": 0.87347622, "epoch": 1.1301546042834323, "grad_norm": 7.993477821350098, "learning_rate": 8.593226745888367e-06, "loss": 0.62441235, "memory(GiB)": 34.88, "step": 41740, "train_speed(iter/s)": 0.411082 }, { "acc": 0.8889122, "epoch": 1.1302899845666476, "grad_norm": 6.980541706085205, "learning_rate": 8.59283759953792e-06, "loss": 0.5303237, "memory(GiB)": 34.88, "step": 41745, "train_speed(iter/s)": 0.411085 }, { "acc": 0.87754803, "epoch": 1.1304253648498632, "grad_norm": 21.567455291748047, "learning_rate": 8.592448408186387e-06, "loss": 0.7155652, "memory(GiB)": 34.88, "step": 41750, "train_speed(iter/s)": 0.411088 }, { "acc": 0.89297771, "epoch": 1.1305607451330788, "grad_norm": 4.538789749145508, "learning_rate": 8.592059171838638e-06, "loss": 0.54343896, "memory(GiB)": 34.88, "step": 41755, "train_speed(iter/s)": 0.411091 }, { "acc": 0.892167, "epoch": 1.1306961254162944, "grad_norm": 8.631712913513184, "learning_rate": 8.591669890499557e-06, "loss": 0.55226707, "memory(GiB)": 34.88, "step": 41760, "train_speed(iter/s)": 0.411093 }, { "acc": 0.88489323, "epoch": 1.13083150569951, "grad_norm": 10.112151145935059, "learning_rate": 8.591280564174013e-06, "loss": 0.57978125, "memory(GiB)": 34.88, "step": 41765, "train_speed(iter/s)": 0.411096 }, { "acc": 0.90625095, "epoch": 1.1309668859827255, "grad_norm": 7.484875202178955, "learning_rate": 8.590891192866888e-06, "loss": 0.42012424, "memory(GiB)": 34.88, "step": 41770, "train_speed(iter/s)": 0.411099 }, { "acc": 0.85944462, "epoch": 1.1311022662659411, "grad_norm": 9.019417762756348, "learning_rate": 8.590501776583058e-06, "loss": 0.80513706, "memory(GiB)": 34.88, "step": 41775, "train_speed(iter/s)": 0.411101 }, { "acc": 0.88180065, "epoch": 1.1312376465491565, "grad_norm": 6.7318949699401855, "learning_rate": 8.590112315327402e-06, "loss": 0.64352713, "memory(GiB)": 34.88, "step": 41780, "train_speed(iter/s)": 0.411104 }, { "acc": 0.89707327, "epoch": 1.131373026832372, "grad_norm": 6.812259674072266, "learning_rate": 8.589722809104798e-06, "loss": 0.48088241, "memory(GiB)": 34.88, "step": 41785, "train_speed(iter/s)": 0.411107 }, { "acc": 0.87947836, "epoch": 1.1315084071155876, "grad_norm": 7.461617469787598, "learning_rate": 8.589333257920124e-06, "loss": 0.65781307, "memory(GiB)": 34.88, "step": 41790, "train_speed(iter/s)": 0.41111 }, { "acc": 0.90000019, "epoch": 1.1316437873988032, "grad_norm": 3.994507312774658, "learning_rate": 8.588943661778262e-06, "loss": 0.50256891, "memory(GiB)": 34.88, "step": 41795, "train_speed(iter/s)": 0.411112 }, { "acc": 0.89671488, "epoch": 1.1317791676820188, "grad_norm": 6.476803302764893, "learning_rate": 8.588554020684094e-06, "loss": 0.51943097, "memory(GiB)": 34.88, "step": 41800, "train_speed(iter/s)": 0.411115 }, { "acc": 0.90321274, "epoch": 1.1319145479652344, "grad_norm": 10.521650314331055, "learning_rate": 8.588164334642495e-06, "loss": 0.55528712, "memory(GiB)": 34.88, "step": 41805, "train_speed(iter/s)": 0.411117 }, { "acc": 0.89368944, "epoch": 1.13204992824845, "grad_norm": 5.1267008781433105, "learning_rate": 8.587774603658353e-06, "loss": 0.43781738, "memory(GiB)": 34.88, "step": 41810, "train_speed(iter/s)": 0.41112 }, { "acc": 0.87684193, "epoch": 1.1321853085316653, "grad_norm": 4.415940761566162, "learning_rate": 8.587384827736547e-06, "loss": 0.61382475, "memory(GiB)": 34.88, "step": 41815, "train_speed(iter/s)": 0.411123 }, { "acc": 0.90527506, "epoch": 1.132320688814881, "grad_norm": 3.116901397705078, "learning_rate": 8.586995006881964e-06, "loss": 0.48731089, "memory(GiB)": 34.88, "step": 41820, "train_speed(iter/s)": 0.411125 }, { "acc": 0.87004185, "epoch": 1.1324560690980965, "grad_norm": 10.34760856628418, "learning_rate": 8.58660514109948e-06, "loss": 0.73199902, "memory(GiB)": 34.88, "step": 41825, "train_speed(iter/s)": 0.411128 }, { "acc": 0.89869041, "epoch": 1.132591449381312, "grad_norm": 7.1513872146606445, "learning_rate": 8.586215230393982e-06, "loss": 0.54511251, "memory(GiB)": 34.88, "step": 41830, "train_speed(iter/s)": 0.41113 }, { "acc": 0.88019466, "epoch": 1.1327268296645276, "grad_norm": 13.27668285369873, "learning_rate": 8.585825274770354e-06, "loss": 0.69784021, "memory(GiB)": 34.88, "step": 41835, "train_speed(iter/s)": 0.411133 }, { "acc": 0.89468174, "epoch": 1.1328622099477432, "grad_norm": 6.694453239440918, "learning_rate": 8.585435274233484e-06, "loss": 0.55377364, "memory(GiB)": 34.88, "step": 41840, "train_speed(iter/s)": 0.411135 }, { "acc": 0.87133627, "epoch": 1.1329975902309588, "grad_norm": 8.869535446166992, "learning_rate": 8.585045228788256e-06, "loss": 0.75215726, "memory(GiB)": 34.88, "step": 41845, "train_speed(iter/s)": 0.411138 }, { "acc": 0.89117374, "epoch": 1.1331329705141744, "grad_norm": 23.698986053466797, "learning_rate": 8.584655138439554e-06, "loss": 0.51990891, "memory(GiB)": 34.88, "step": 41850, "train_speed(iter/s)": 0.411141 }, { "acc": 0.88927355, "epoch": 1.13326835079739, "grad_norm": 12.62006950378418, "learning_rate": 8.584265003192265e-06, "loss": 0.58147578, "memory(GiB)": 34.88, "step": 41855, "train_speed(iter/s)": 0.411143 }, { "acc": 0.897966, "epoch": 1.1334037310806053, "grad_norm": 7.895991325378418, "learning_rate": 8.58387482305128e-06, "loss": 0.53862448, "memory(GiB)": 34.88, "step": 41860, "train_speed(iter/s)": 0.411146 }, { "acc": 0.88241482, "epoch": 1.133539111363821, "grad_norm": 11.297341346740723, "learning_rate": 8.583484598021483e-06, "loss": 0.6852994, "memory(GiB)": 34.88, "step": 41865, "train_speed(iter/s)": 0.411148 }, { "acc": 0.87805176, "epoch": 1.1336744916470365, "grad_norm": 4.877869129180908, "learning_rate": 8.583094328107761e-06, "loss": 0.70356631, "memory(GiB)": 34.88, "step": 41870, "train_speed(iter/s)": 0.411151 }, { "acc": 0.8814312, "epoch": 1.133809871930252, "grad_norm": 11.238481521606445, "learning_rate": 8.582704013315007e-06, "loss": 0.64815688, "memory(GiB)": 34.88, "step": 41875, "train_speed(iter/s)": 0.411154 }, { "acc": 0.8925045, "epoch": 1.1339452522134676, "grad_norm": 7.239121437072754, "learning_rate": 8.582313653648108e-06, "loss": 0.54341912, "memory(GiB)": 34.88, "step": 41880, "train_speed(iter/s)": 0.411156 }, { "acc": 0.86120672, "epoch": 1.1340806324966832, "grad_norm": 11.929839134216309, "learning_rate": 8.581923249111956e-06, "loss": 0.68304071, "memory(GiB)": 34.88, "step": 41885, "train_speed(iter/s)": 0.411159 }, { "acc": 0.88137589, "epoch": 1.1342160127798988, "grad_norm": 10.533183097839355, "learning_rate": 8.58153279971144e-06, "loss": 0.63938437, "memory(GiB)": 34.88, "step": 41890, "train_speed(iter/s)": 0.411162 }, { "acc": 0.89000578, "epoch": 1.1343513930631142, "grad_norm": 5.124636650085449, "learning_rate": 8.58114230545145e-06, "loss": 0.54836559, "memory(GiB)": 34.88, "step": 41895, "train_speed(iter/s)": 0.411164 }, { "acc": 0.86032772, "epoch": 1.1344867733463297, "grad_norm": 10.384431838989258, "learning_rate": 8.580751766336881e-06, "loss": 0.81854973, "memory(GiB)": 34.88, "step": 41900, "train_speed(iter/s)": 0.411167 }, { "acc": 0.90398006, "epoch": 1.1346221536295453, "grad_norm": 4.093724727630615, "learning_rate": 8.580361182372623e-06, "loss": 0.50730124, "memory(GiB)": 34.88, "step": 41905, "train_speed(iter/s)": 0.411169 }, { "acc": 0.89837132, "epoch": 1.134757533912761, "grad_norm": 9.352670669555664, "learning_rate": 8.579970553563569e-06, "loss": 0.49249601, "memory(GiB)": 34.88, "step": 41910, "train_speed(iter/s)": 0.411172 }, { "acc": 0.91192226, "epoch": 1.1348929141959765, "grad_norm": 4.992238521575928, "learning_rate": 8.579579879914613e-06, "loss": 0.49597731, "memory(GiB)": 34.88, "step": 41915, "train_speed(iter/s)": 0.411175 }, { "acc": 0.89450293, "epoch": 1.135028294479192, "grad_norm": 11.330446243286133, "learning_rate": 8.579189161430648e-06, "loss": 0.56406126, "memory(GiB)": 34.88, "step": 41920, "train_speed(iter/s)": 0.411178 }, { "acc": 0.89633932, "epoch": 1.1351636747624076, "grad_norm": 6.31464147567749, "learning_rate": 8.578798398116569e-06, "loss": 0.5230063, "memory(GiB)": 34.88, "step": 41925, "train_speed(iter/s)": 0.41118 }, { "acc": 0.89304199, "epoch": 1.1352990550456232, "grad_norm": 5.435029983520508, "learning_rate": 8.578407589977274e-06, "loss": 0.52424941, "memory(GiB)": 34.88, "step": 41930, "train_speed(iter/s)": 0.411183 }, { "acc": 0.88778915, "epoch": 1.1354344353288388, "grad_norm": 7.1013407707214355, "learning_rate": 8.578016737017652e-06, "loss": 0.65664349, "memory(GiB)": 34.88, "step": 41935, "train_speed(iter/s)": 0.411186 }, { "acc": 0.89861145, "epoch": 1.1355698156120542, "grad_norm": 6.8444366455078125, "learning_rate": 8.577625839242605e-06, "loss": 0.50616164, "memory(GiB)": 34.88, "step": 41940, "train_speed(iter/s)": 0.411188 }, { "acc": 0.88972054, "epoch": 1.1357051958952697, "grad_norm": 4.717678546905518, "learning_rate": 8.577234896657026e-06, "loss": 0.5146944, "memory(GiB)": 34.88, "step": 41945, "train_speed(iter/s)": 0.411191 }, { "acc": 0.88045273, "epoch": 1.1358405761784853, "grad_norm": 12.442296981811523, "learning_rate": 8.576843909265818e-06, "loss": 0.57954445, "memory(GiB)": 34.88, "step": 41950, "train_speed(iter/s)": 0.411194 }, { "acc": 0.90249424, "epoch": 1.135975956461701, "grad_norm": 10.339295387268066, "learning_rate": 8.57645287707387e-06, "loss": 0.51056309, "memory(GiB)": 34.88, "step": 41955, "train_speed(iter/s)": 0.411196 }, { "acc": 0.88376083, "epoch": 1.1361113367449165, "grad_norm": 12.969034194946289, "learning_rate": 8.57606180008609e-06, "loss": 0.62233133, "memory(GiB)": 34.88, "step": 41960, "train_speed(iter/s)": 0.411199 }, { "acc": 0.89493885, "epoch": 1.136246717028132, "grad_norm": 14.716389656066895, "learning_rate": 8.57567067830737e-06, "loss": 0.55724397, "memory(GiB)": 34.88, "step": 41965, "train_speed(iter/s)": 0.411201 }, { "acc": 0.89525776, "epoch": 1.1363820973113476, "grad_norm": 6.675181865692139, "learning_rate": 8.575279511742614e-06, "loss": 0.58744764, "memory(GiB)": 34.88, "step": 41970, "train_speed(iter/s)": 0.411204 }, { "acc": 0.87627649, "epoch": 1.136517477594563, "grad_norm": 9.27798843383789, "learning_rate": 8.574888300396718e-06, "loss": 0.67340031, "memory(GiB)": 34.88, "step": 41975, "train_speed(iter/s)": 0.411206 }, { "acc": 0.88058863, "epoch": 1.1366528578777786, "grad_norm": 9.585814476013184, "learning_rate": 8.574497044274586e-06, "loss": 0.56476984, "memory(GiB)": 34.88, "step": 41980, "train_speed(iter/s)": 0.411209 }, { "acc": 0.90559568, "epoch": 1.1367882381609942, "grad_norm": 11.733597755432129, "learning_rate": 8.574105743381117e-06, "loss": 0.4836072, "memory(GiB)": 34.88, "step": 41985, "train_speed(iter/s)": 0.411212 }, { "acc": 0.89566336, "epoch": 1.1369236184442097, "grad_norm": 7.770042419433594, "learning_rate": 8.573714397721216e-06, "loss": 0.49974036, "memory(GiB)": 34.88, "step": 41990, "train_speed(iter/s)": 0.411214 }, { "acc": 0.88978558, "epoch": 1.1370589987274253, "grad_norm": 7.958850383758545, "learning_rate": 8.573323007299782e-06, "loss": 0.56000013, "memory(GiB)": 34.88, "step": 41995, "train_speed(iter/s)": 0.411217 }, { "acc": 0.91339016, "epoch": 1.137194379010641, "grad_norm": 11.026658058166504, "learning_rate": 8.572931572121718e-06, "loss": 0.45146198, "memory(GiB)": 34.88, "step": 42000, "train_speed(iter/s)": 0.411219 }, { "acc": 0.88979216, "epoch": 1.1373297592938565, "grad_norm": 9.397883415222168, "learning_rate": 8.57254009219193e-06, "loss": 0.51158485, "memory(GiB)": 34.88, "step": 42005, "train_speed(iter/s)": 0.411221 }, { "acc": 0.88352489, "epoch": 1.137465139577072, "grad_norm": 7.045567035675049, "learning_rate": 8.572148567515322e-06, "loss": 0.71324701, "memory(GiB)": 34.88, "step": 42010, "train_speed(iter/s)": 0.411224 }, { "acc": 0.90711555, "epoch": 1.1376005198602877, "grad_norm": 8.334187507629395, "learning_rate": 8.571756998096797e-06, "loss": 0.52877226, "memory(GiB)": 34.88, "step": 42015, "train_speed(iter/s)": 0.411227 }, { "acc": 0.87747355, "epoch": 1.137735900143503, "grad_norm": 10.907044410705566, "learning_rate": 8.57136538394126e-06, "loss": 0.59914398, "memory(GiB)": 34.88, "step": 42020, "train_speed(iter/s)": 0.411229 }, { "acc": 0.86738863, "epoch": 1.1378712804267186, "grad_norm": 7.733216285705566, "learning_rate": 8.570973725053616e-06, "loss": 0.71505685, "memory(GiB)": 34.88, "step": 42025, "train_speed(iter/s)": 0.411232 }, { "acc": 0.87346077, "epoch": 1.1380066607099342, "grad_norm": 13.769582748413086, "learning_rate": 8.570582021438773e-06, "loss": 0.68217244, "memory(GiB)": 34.88, "step": 42030, "train_speed(iter/s)": 0.411234 }, { "acc": 0.88676815, "epoch": 1.1381420409931498, "grad_norm": 9.460411071777344, "learning_rate": 8.57019027310164e-06, "loss": 0.60512476, "memory(GiB)": 34.88, "step": 42035, "train_speed(iter/s)": 0.411237 }, { "acc": 0.90807114, "epoch": 1.1382774212763653, "grad_norm": 7.265880107879639, "learning_rate": 8.56979848004712e-06, "loss": 0.47968869, "memory(GiB)": 34.88, "step": 42040, "train_speed(iter/s)": 0.41124 }, { "acc": 0.88093729, "epoch": 1.138412801559581, "grad_norm": 6.175409317016602, "learning_rate": 8.569406642280123e-06, "loss": 0.64219294, "memory(GiB)": 34.88, "step": 42045, "train_speed(iter/s)": 0.411242 }, { "acc": 0.8921133, "epoch": 1.1385481818427965, "grad_norm": 12.870292663574219, "learning_rate": 8.569014759805557e-06, "loss": 0.58621025, "memory(GiB)": 34.88, "step": 42050, "train_speed(iter/s)": 0.411245 }, { "acc": 0.89869041, "epoch": 1.1386835621260119, "grad_norm": 10.982645988464355, "learning_rate": 8.568622832628334e-06, "loss": 0.48308244, "memory(GiB)": 34.88, "step": 42055, "train_speed(iter/s)": 0.411248 }, { "acc": 0.90824585, "epoch": 1.1388189424092274, "grad_norm": 4.394686698913574, "learning_rate": 8.568230860753359e-06, "loss": 0.42807646, "memory(GiB)": 34.88, "step": 42060, "train_speed(iter/s)": 0.41125 }, { "acc": 0.88130913, "epoch": 1.138954322692443, "grad_norm": 8.914009094238281, "learning_rate": 8.567838844185545e-06, "loss": 0.58066187, "memory(GiB)": 34.88, "step": 42065, "train_speed(iter/s)": 0.411253 }, { "acc": 0.87188606, "epoch": 1.1390897029756586, "grad_norm": 8.36454963684082, "learning_rate": 8.5674467829298e-06, "loss": 0.65570049, "memory(GiB)": 34.88, "step": 42070, "train_speed(iter/s)": 0.411255 }, { "acc": 0.87750082, "epoch": 1.1392250832588742, "grad_norm": 6.944628715515137, "learning_rate": 8.567054676991041e-06, "loss": 0.72854714, "memory(GiB)": 34.88, "step": 42075, "train_speed(iter/s)": 0.411258 }, { "acc": 0.90518208, "epoch": 1.1393604635420898, "grad_norm": 9.98475456237793, "learning_rate": 8.566662526374175e-06, "loss": 0.54653244, "memory(GiB)": 34.88, "step": 42080, "train_speed(iter/s)": 0.411261 }, { "acc": 0.8891613, "epoch": 1.1394958438253053, "grad_norm": 9.993171691894531, "learning_rate": 8.566270331084114e-06, "loss": 0.58811636, "memory(GiB)": 34.88, "step": 42085, "train_speed(iter/s)": 0.411263 }, { "acc": 0.88180981, "epoch": 1.139631224108521, "grad_norm": 10.693647384643555, "learning_rate": 8.565878091125775e-06, "loss": 0.6872508, "memory(GiB)": 34.88, "step": 42090, "train_speed(iter/s)": 0.411266 }, { "acc": 0.87177105, "epoch": 1.1397666043917365, "grad_norm": 8.541563987731934, "learning_rate": 8.56548580650407e-06, "loss": 0.63689375, "memory(GiB)": 34.88, "step": 42095, "train_speed(iter/s)": 0.411268 }, { "acc": 0.88489685, "epoch": 1.1399019846749519, "grad_norm": 12.935697555541992, "learning_rate": 8.56509347722391e-06, "loss": 0.60185938, "memory(GiB)": 34.88, "step": 42100, "train_speed(iter/s)": 0.411271 }, { "acc": 0.88383884, "epoch": 1.1400373649581674, "grad_norm": 10.352627754211426, "learning_rate": 8.564701103290215e-06, "loss": 0.55494232, "memory(GiB)": 34.88, "step": 42105, "train_speed(iter/s)": 0.411273 }, { "acc": 0.91111336, "epoch": 1.140172745241383, "grad_norm": 12.237682342529297, "learning_rate": 8.564308684707896e-06, "loss": 0.49977026, "memory(GiB)": 34.88, "step": 42110, "train_speed(iter/s)": 0.411276 }, { "acc": 0.88086109, "epoch": 1.1403081255245986, "grad_norm": 13.890127182006836, "learning_rate": 8.563916221481872e-06, "loss": 0.61846581, "memory(GiB)": 34.88, "step": 42115, "train_speed(iter/s)": 0.411279 }, { "acc": 0.88606825, "epoch": 1.1404435058078142, "grad_norm": 10.310629844665527, "learning_rate": 8.563523713617055e-06, "loss": 0.62045531, "memory(GiB)": 34.88, "step": 42120, "train_speed(iter/s)": 0.411281 }, { "acc": 0.89006252, "epoch": 1.1405788860910298, "grad_norm": 7.754816055297852, "learning_rate": 8.563131161118366e-06, "loss": 0.60116177, "memory(GiB)": 34.88, "step": 42125, "train_speed(iter/s)": 0.411284 }, { "acc": 0.86995029, "epoch": 1.1407142663742453, "grad_norm": 8.859517097473145, "learning_rate": 8.56273856399072e-06, "loss": 0.69879465, "memory(GiB)": 34.88, "step": 42130, "train_speed(iter/s)": 0.411286 }, { "acc": 0.90715036, "epoch": 1.1408496466574607, "grad_norm": 8.975590705871582, "learning_rate": 8.56234592223904e-06, "loss": 0.47029648, "memory(GiB)": 34.88, "step": 42135, "train_speed(iter/s)": 0.411289 }, { "acc": 0.87999916, "epoch": 1.1409850269406763, "grad_norm": 12.147801399230957, "learning_rate": 8.561953235868237e-06, "loss": 0.68709359, "memory(GiB)": 34.88, "step": 42140, "train_speed(iter/s)": 0.411291 }, { "acc": 0.8784379, "epoch": 1.1411204072238919, "grad_norm": 11.516439437866211, "learning_rate": 8.561560504883236e-06, "loss": 0.72101812, "memory(GiB)": 34.88, "step": 42145, "train_speed(iter/s)": 0.411294 }, { "acc": 0.90130701, "epoch": 1.1412557875071074, "grad_norm": 10.310981750488281, "learning_rate": 8.561167729288954e-06, "loss": 0.44593792, "memory(GiB)": 34.88, "step": 42150, "train_speed(iter/s)": 0.411296 }, { "acc": 0.87291498, "epoch": 1.141391167790323, "grad_norm": 4.254757404327393, "learning_rate": 8.56077490909031e-06, "loss": 0.71412172, "memory(GiB)": 34.88, "step": 42155, "train_speed(iter/s)": 0.411299 }, { "acc": 0.89031696, "epoch": 1.1415265480735386, "grad_norm": 14.044432640075684, "learning_rate": 8.56038204429223e-06, "loss": 0.61996984, "memory(GiB)": 34.88, "step": 42160, "train_speed(iter/s)": 0.411301 }, { "acc": 0.89145193, "epoch": 1.1416619283567542, "grad_norm": 7.51284646987915, "learning_rate": 8.55998913489963e-06, "loss": 0.58980284, "memory(GiB)": 34.88, "step": 42165, "train_speed(iter/s)": 0.411304 }, { "acc": 0.87572708, "epoch": 1.1417973086399698, "grad_norm": 7.9355010986328125, "learning_rate": 8.559596180917436e-06, "loss": 0.55900769, "memory(GiB)": 34.88, "step": 42170, "train_speed(iter/s)": 0.411307 }, { "acc": 0.8867733, "epoch": 1.1419326889231853, "grad_norm": 8.033130645751953, "learning_rate": 8.559203182350568e-06, "loss": 0.58423309, "memory(GiB)": 34.88, "step": 42175, "train_speed(iter/s)": 0.411309 }, { "acc": 0.88657904, "epoch": 1.1420680692064007, "grad_norm": 7.062610626220703, "learning_rate": 8.558810139203948e-06, "loss": 0.59675798, "memory(GiB)": 34.88, "step": 42180, "train_speed(iter/s)": 0.411311 }, { "acc": 0.89142141, "epoch": 1.1422034494896163, "grad_norm": 6.853145122528076, "learning_rate": 8.558417051482504e-06, "loss": 0.57212744, "memory(GiB)": 34.88, "step": 42185, "train_speed(iter/s)": 0.411313 }, { "acc": 0.88824978, "epoch": 1.1423388297728319, "grad_norm": 9.981169700622559, "learning_rate": 8.558023919191157e-06, "loss": 0.58643017, "memory(GiB)": 34.88, "step": 42190, "train_speed(iter/s)": 0.411316 }, { "acc": 0.89674759, "epoch": 1.1424742100560474, "grad_norm": 3.7695207595825195, "learning_rate": 8.557630742334833e-06, "loss": 0.56245799, "memory(GiB)": 34.88, "step": 42195, "train_speed(iter/s)": 0.411318 }, { "acc": 0.87602034, "epoch": 1.142609590339263, "grad_norm": 17.97606086730957, "learning_rate": 8.557237520918455e-06, "loss": 0.64817595, "memory(GiB)": 34.88, "step": 42200, "train_speed(iter/s)": 0.411321 }, { "acc": 0.90426121, "epoch": 1.1427449706224786, "grad_norm": 4.369909286499023, "learning_rate": 8.556844254946954e-06, "loss": 0.48033972, "memory(GiB)": 34.88, "step": 42205, "train_speed(iter/s)": 0.411323 }, { "acc": 0.91174202, "epoch": 1.1428803509056942, "grad_norm": 2.922882080078125, "learning_rate": 8.556450944425248e-06, "loss": 0.46039791, "memory(GiB)": 34.88, "step": 42210, "train_speed(iter/s)": 0.411326 }, { "acc": 0.87920828, "epoch": 1.1430157311889095, "grad_norm": 8.1356782913208, "learning_rate": 8.556057589358272e-06, "loss": 0.68559656, "memory(GiB)": 34.88, "step": 42215, "train_speed(iter/s)": 0.411328 }, { "acc": 0.88880501, "epoch": 1.1431511114721251, "grad_norm": 7.763473033905029, "learning_rate": 8.55566418975095e-06, "loss": 0.57827725, "memory(GiB)": 34.88, "step": 42220, "train_speed(iter/s)": 0.411331 }, { "acc": 0.87752113, "epoch": 1.1432864917553407, "grad_norm": 10.642367362976074, "learning_rate": 8.555270745608211e-06, "loss": 0.65963717, "memory(GiB)": 34.88, "step": 42225, "train_speed(iter/s)": 0.411333 }, { "acc": 0.90439053, "epoch": 1.1434218720385563, "grad_norm": 6.94122838973999, "learning_rate": 8.554877256934983e-06, "loss": 0.55308895, "memory(GiB)": 34.88, "step": 42230, "train_speed(iter/s)": 0.411336 }, { "acc": 0.88329983, "epoch": 1.1435572523217719, "grad_norm": 6.165004730224609, "learning_rate": 8.554483723736195e-06, "loss": 0.59455643, "memory(GiB)": 34.88, "step": 42235, "train_speed(iter/s)": 0.411338 }, { "acc": 0.88427238, "epoch": 1.1436926326049874, "grad_norm": 6.9066009521484375, "learning_rate": 8.554090146016778e-06, "loss": 0.68291965, "memory(GiB)": 34.88, "step": 42240, "train_speed(iter/s)": 0.411341 }, { "acc": 0.89759235, "epoch": 1.143828012888203, "grad_norm": 9.755316734313965, "learning_rate": 8.55369652378166e-06, "loss": 0.5141901, "memory(GiB)": 34.88, "step": 42245, "train_speed(iter/s)": 0.411344 }, { "acc": 0.8782053, "epoch": 1.1439633931714186, "grad_norm": 7.441192150115967, "learning_rate": 8.553302857035777e-06, "loss": 0.63616552, "memory(GiB)": 34.88, "step": 42250, "train_speed(iter/s)": 0.411346 }, { "acc": 0.88557405, "epoch": 1.1440987734546342, "grad_norm": 12.425028800964355, "learning_rate": 8.552909145784054e-06, "loss": 0.62698236, "memory(GiB)": 34.88, "step": 42255, "train_speed(iter/s)": 0.411349 }, { "acc": 0.89137707, "epoch": 1.1442341537378495, "grad_norm": 8.76666259765625, "learning_rate": 8.552515390031427e-06, "loss": 0.61375189, "memory(GiB)": 34.88, "step": 42260, "train_speed(iter/s)": 0.411351 }, { "acc": 0.8845048, "epoch": 1.1443695340210651, "grad_norm": 4.8866071701049805, "learning_rate": 8.552121589782827e-06, "loss": 0.58122578, "memory(GiB)": 34.88, "step": 42265, "train_speed(iter/s)": 0.411354 }, { "acc": 0.90079174, "epoch": 1.1445049143042807, "grad_norm": 20.101640701293945, "learning_rate": 8.551727745043188e-06, "loss": 0.51175609, "memory(GiB)": 34.88, "step": 42270, "train_speed(iter/s)": 0.411356 }, { "acc": 0.88893776, "epoch": 1.1446402945874963, "grad_norm": 5.394027233123779, "learning_rate": 8.551333855817444e-06, "loss": 0.56436253, "memory(GiB)": 34.88, "step": 42275, "train_speed(iter/s)": 0.411359 }, { "acc": 0.89346294, "epoch": 1.1447756748707119, "grad_norm": 22.72386932373047, "learning_rate": 8.550939922110529e-06, "loss": 0.56582561, "memory(GiB)": 34.88, "step": 42280, "train_speed(iter/s)": 0.411361 }, { "acc": 0.88013773, "epoch": 1.1449110551539274, "grad_norm": 9.433338165283203, "learning_rate": 8.550545943927376e-06, "loss": 0.58410544, "memory(GiB)": 34.88, "step": 42285, "train_speed(iter/s)": 0.411364 }, { "acc": 0.89488039, "epoch": 1.145046435437143, "grad_norm": 7.3519206047058105, "learning_rate": 8.550151921272921e-06, "loss": 0.51508617, "memory(GiB)": 34.88, "step": 42290, "train_speed(iter/s)": 0.411367 }, { "acc": 0.89895477, "epoch": 1.1451818157203584, "grad_norm": 5.006467342376709, "learning_rate": 8.549757854152101e-06, "loss": 0.46733198, "memory(GiB)": 34.88, "step": 42295, "train_speed(iter/s)": 0.411369 }, { "acc": 0.88420057, "epoch": 1.145317196003574, "grad_norm": 29.519245147705078, "learning_rate": 8.549363742569853e-06, "loss": 0.68531694, "memory(GiB)": 34.88, "step": 42300, "train_speed(iter/s)": 0.411372 }, { "acc": 0.87675209, "epoch": 1.1454525762867895, "grad_norm": 11.844406127929688, "learning_rate": 8.548969586531114e-06, "loss": 0.59825845, "memory(GiB)": 34.88, "step": 42305, "train_speed(iter/s)": 0.411375 }, { "acc": 0.89801331, "epoch": 1.1455879565700051, "grad_norm": 12.414759635925293, "learning_rate": 8.548575386040819e-06, "loss": 0.54945016, "memory(GiB)": 34.88, "step": 42310, "train_speed(iter/s)": 0.411377 }, { "acc": 0.87899399, "epoch": 1.1457233368532207, "grad_norm": 12.702226638793945, "learning_rate": 8.548181141103909e-06, "loss": 0.74914012, "memory(GiB)": 34.88, "step": 42315, "train_speed(iter/s)": 0.41138 }, { "acc": 0.88846102, "epoch": 1.1458587171364363, "grad_norm": 8.353981018066406, "learning_rate": 8.54778685172532e-06, "loss": 0.5739574, "memory(GiB)": 34.88, "step": 42320, "train_speed(iter/s)": 0.411383 }, { "acc": 0.86338501, "epoch": 1.1459940974196519, "grad_norm": 7.225763320922852, "learning_rate": 8.547392517909995e-06, "loss": 0.84306154, "memory(GiB)": 34.88, "step": 42325, "train_speed(iter/s)": 0.411385 }, { "acc": 0.89327888, "epoch": 1.1461294777028674, "grad_norm": 6.660994529724121, "learning_rate": 8.546998139662869e-06, "loss": 0.5748332, "memory(GiB)": 34.88, "step": 42330, "train_speed(iter/s)": 0.411388 }, { "acc": 0.90709629, "epoch": 1.146264857986083, "grad_norm": 4.59322452545166, "learning_rate": 8.546603716988887e-06, "loss": 0.43325047, "memory(GiB)": 34.88, "step": 42335, "train_speed(iter/s)": 0.41139 }, { "acc": 0.89006138, "epoch": 1.1464002382692984, "grad_norm": 13.765023231506348, "learning_rate": 8.546209249892987e-06, "loss": 0.56743579, "memory(GiB)": 34.88, "step": 42340, "train_speed(iter/s)": 0.411392 }, { "acc": 0.86974087, "epoch": 1.146535618552514, "grad_norm": 5.1106791496276855, "learning_rate": 8.545814738380113e-06, "loss": 0.7140151, "memory(GiB)": 34.88, "step": 42345, "train_speed(iter/s)": 0.411395 }, { "acc": 0.89257078, "epoch": 1.1466709988357295, "grad_norm": 2.140888214111328, "learning_rate": 8.545420182455202e-06, "loss": 0.5970233, "memory(GiB)": 34.88, "step": 42350, "train_speed(iter/s)": 0.411397 }, { "acc": 0.87980518, "epoch": 1.1468063791189451, "grad_norm": 10.501813888549805, "learning_rate": 8.545025582123202e-06, "loss": 0.58551235, "memory(GiB)": 34.88, "step": 42355, "train_speed(iter/s)": 0.4114 }, { "acc": 0.88935966, "epoch": 1.1469417594021607, "grad_norm": 7.001199722290039, "learning_rate": 8.544630937389055e-06, "loss": 0.59842834, "memory(GiB)": 34.88, "step": 42360, "train_speed(iter/s)": 0.411403 }, { "acc": 0.89355974, "epoch": 1.1470771396853763, "grad_norm": 60.43791961669922, "learning_rate": 8.544236248257702e-06, "loss": 0.57811241, "memory(GiB)": 34.88, "step": 42365, "train_speed(iter/s)": 0.411405 }, { "acc": 0.8817812, "epoch": 1.1472125199685919, "grad_norm": 4.905215740203857, "learning_rate": 8.54384151473409e-06, "loss": 0.55325866, "memory(GiB)": 34.88, "step": 42370, "train_speed(iter/s)": 0.411408 }, { "acc": 0.87730885, "epoch": 1.1473479002518072, "grad_norm": 12.314812660217285, "learning_rate": 8.543446736823164e-06, "loss": 0.70728788, "memory(GiB)": 34.88, "step": 42375, "train_speed(iter/s)": 0.41141 }, { "acc": 0.8915988, "epoch": 1.1474832805350228, "grad_norm": 8.300689697265625, "learning_rate": 8.543051914529866e-06, "loss": 0.57270522, "memory(GiB)": 34.88, "step": 42380, "train_speed(iter/s)": 0.411413 }, { "acc": 0.87955742, "epoch": 1.1476186608182384, "grad_norm": 9.582794189453125, "learning_rate": 8.542657047859145e-06, "loss": 0.66700053, "memory(GiB)": 34.88, "step": 42385, "train_speed(iter/s)": 0.411416 }, { "acc": 0.88073692, "epoch": 1.147754041101454, "grad_norm": 14.830881118774414, "learning_rate": 8.542262136815947e-06, "loss": 0.70558758, "memory(GiB)": 34.88, "step": 42390, "train_speed(iter/s)": 0.411418 }, { "acc": 0.91395912, "epoch": 1.1478894213846695, "grad_norm": 3.0727343559265137, "learning_rate": 8.541867181405218e-06, "loss": 0.47556992, "memory(GiB)": 34.88, "step": 42395, "train_speed(iter/s)": 0.411421 }, { "acc": 0.89327583, "epoch": 1.1480248016678851, "grad_norm": 5.905099391937256, "learning_rate": 8.541472181631907e-06, "loss": 0.55977411, "memory(GiB)": 34.88, "step": 42400, "train_speed(iter/s)": 0.411424 }, { "acc": 0.88957891, "epoch": 1.1481601819511007, "grad_norm": 7.890448570251465, "learning_rate": 8.54107713750096e-06, "loss": 0.51853242, "memory(GiB)": 34.88, "step": 42405, "train_speed(iter/s)": 0.411426 }, { "acc": 0.89201765, "epoch": 1.1482955622343163, "grad_norm": 2.0756900310516357, "learning_rate": 8.54068204901733e-06, "loss": 0.6257884, "memory(GiB)": 34.88, "step": 42410, "train_speed(iter/s)": 0.411429 }, { "acc": 0.89152012, "epoch": 1.1484309425175319, "grad_norm": 13.827563285827637, "learning_rate": 8.54028691618596e-06, "loss": 0.57833252, "memory(GiB)": 34.88, "step": 42415, "train_speed(iter/s)": 0.411431 }, { "acc": 0.90811863, "epoch": 1.1485663228007472, "grad_norm": 6.624017715454102, "learning_rate": 8.539891739011806e-06, "loss": 0.46972685, "memory(GiB)": 34.88, "step": 42420, "train_speed(iter/s)": 0.411434 }, { "acc": 0.90129662, "epoch": 1.1487017030839628, "grad_norm": 13.628294944763184, "learning_rate": 8.539496517499813e-06, "loss": 0.5117691, "memory(GiB)": 34.88, "step": 42425, "train_speed(iter/s)": 0.411436 }, { "acc": 0.87646561, "epoch": 1.1488370833671784, "grad_norm": 13.965314865112305, "learning_rate": 8.539101251654937e-06, "loss": 0.8006196, "memory(GiB)": 34.88, "step": 42430, "train_speed(iter/s)": 0.411439 }, { "acc": 0.88447142, "epoch": 1.148972463650394, "grad_norm": 5.1956915855407715, "learning_rate": 8.538705941482126e-06, "loss": 0.63027678, "memory(GiB)": 34.88, "step": 42435, "train_speed(iter/s)": 0.411441 }, { "acc": 0.88786583, "epoch": 1.1491078439336095, "grad_norm": 7.796669006347656, "learning_rate": 8.538310586986333e-06, "loss": 0.51503124, "memory(GiB)": 34.88, "step": 42440, "train_speed(iter/s)": 0.411444 }, { "acc": 0.89878902, "epoch": 1.1492432242168251, "grad_norm": 4.18449068069458, "learning_rate": 8.53791518817251e-06, "loss": 0.49403949, "memory(GiB)": 34.88, "step": 42445, "train_speed(iter/s)": 0.411447 }, { "acc": 0.89265137, "epoch": 1.1493786045000407, "grad_norm": 9.42597770690918, "learning_rate": 8.537519745045611e-06, "loss": 0.52966938, "memory(GiB)": 34.88, "step": 42450, "train_speed(iter/s)": 0.411449 }, { "acc": 0.87015829, "epoch": 1.149513984783256, "grad_norm": 12.599045753479004, "learning_rate": 8.53712425761059e-06, "loss": 0.77139559, "memory(GiB)": 34.88, "step": 42455, "train_speed(iter/s)": 0.411452 }, { "acc": 0.88814049, "epoch": 1.1496493650664716, "grad_norm": 8.5376558303833, "learning_rate": 8.536728725872401e-06, "loss": 0.59571657, "memory(GiB)": 34.88, "step": 42460, "train_speed(iter/s)": 0.411454 }, { "acc": 0.88630552, "epoch": 1.1497847453496872, "grad_norm": 8.746440887451172, "learning_rate": 8.536333149835997e-06, "loss": 0.54138594, "memory(GiB)": 34.88, "step": 42465, "train_speed(iter/s)": 0.411457 }, { "acc": 0.87471733, "epoch": 1.1499201256329028, "grad_norm": 15.766332626342773, "learning_rate": 8.535937529506337e-06, "loss": 0.70574613, "memory(GiB)": 34.88, "step": 42470, "train_speed(iter/s)": 0.411459 }, { "acc": 0.87688465, "epoch": 1.1500555059161184, "grad_norm": 8.566895484924316, "learning_rate": 8.535541864888373e-06, "loss": 0.6853857, "memory(GiB)": 34.88, "step": 42475, "train_speed(iter/s)": 0.411462 }, { "acc": 0.89033823, "epoch": 1.150190886199334, "grad_norm": 6.11886739730835, "learning_rate": 8.535146155987065e-06, "loss": 0.61794024, "memory(GiB)": 34.88, "step": 42480, "train_speed(iter/s)": 0.411464 }, { "acc": 0.89715033, "epoch": 1.1503262664825495, "grad_norm": 7.882668495178223, "learning_rate": 8.534750402807367e-06, "loss": 0.58830352, "memory(GiB)": 34.88, "step": 42485, "train_speed(iter/s)": 0.411467 }, { "acc": 0.89707556, "epoch": 1.1504616467657651, "grad_norm": 13.758879661560059, "learning_rate": 8.534354605354239e-06, "loss": 0.54746227, "memory(GiB)": 34.88, "step": 42490, "train_speed(iter/s)": 0.411469 }, { "acc": 0.89694328, "epoch": 1.1505970270489807, "grad_norm": 10.05325984954834, "learning_rate": 8.533958763632637e-06, "loss": 0.57424507, "memory(GiB)": 34.88, "step": 42495, "train_speed(iter/s)": 0.411472 }, { "acc": 0.90195484, "epoch": 1.150732407332196, "grad_norm": 3.72001051902771, "learning_rate": 8.533562877647522e-06, "loss": 0.48469095, "memory(GiB)": 34.88, "step": 42500, "train_speed(iter/s)": 0.411474 }, { "acc": 0.90871382, "epoch": 1.1508677876154116, "grad_norm": 11.156946182250977, "learning_rate": 8.53316694740385e-06, "loss": 0.51549158, "memory(GiB)": 34.88, "step": 42505, "train_speed(iter/s)": 0.411477 }, { "acc": 0.90216751, "epoch": 1.1510031678986272, "grad_norm": 10.894428253173828, "learning_rate": 8.532770972906584e-06, "loss": 0.49789529, "memory(GiB)": 34.88, "step": 42510, "train_speed(iter/s)": 0.411479 }, { "acc": 0.89794941, "epoch": 1.1511385481818428, "grad_norm": 34.52965545654297, "learning_rate": 8.532374954160682e-06, "loss": 0.5869873, "memory(GiB)": 34.88, "step": 42515, "train_speed(iter/s)": 0.411482 }, { "acc": 0.8889782, "epoch": 1.1512739284650584, "grad_norm": 7.701929092407227, "learning_rate": 8.531978891171108e-06, "loss": 0.65709805, "memory(GiB)": 34.88, "step": 42520, "train_speed(iter/s)": 0.411484 }, { "acc": 0.86918621, "epoch": 1.151409308748274, "grad_norm": 13.178031921386719, "learning_rate": 8.53158278394282e-06, "loss": 0.66387091, "memory(GiB)": 34.88, "step": 42525, "train_speed(iter/s)": 0.411487 }, { "acc": 0.87781563, "epoch": 1.1515446890314895, "grad_norm": 7.883853912353516, "learning_rate": 8.531186632480783e-06, "loss": 0.68129959, "memory(GiB)": 34.88, "step": 42530, "train_speed(iter/s)": 0.411489 }, { "acc": 0.89505157, "epoch": 1.151680069314705, "grad_norm": 12.232550621032715, "learning_rate": 8.530790436789957e-06, "loss": 0.48063412, "memory(GiB)": 34.88, "step": 42535, "train_speed(iter/s)": 0.411492 }, { "acc": 0.87719145, "epoch": 1.1518154495979205, "grad_norm": 5.974499702453613, "learning_rate": 8.530394196875308e-06, "loss": 0.66131167, "memory(GiB)": 34.88, "step": 42540, "train_speed(iter/s)": 0.411494 }, { "acc": 0.90426731, "epoch": 1.151950829881136, "grad_norm": 6.103494644165039, "learning_rate": 8.529997912741796e-06, "loss": 0.43060617, "memory(GiB)": 34.88, "step": 42545, "train_speed(iter/s)": 0.411497 }, { "acc": 0.91041298, "epoch": 1.1520862101643516, "grad_norm": 7.573764324188232, "learning_rate": 8.529601584394386e-06, "loss": 0.48865333, "memory(GiB)": 34.88, "step": 42550, "train_speed(iter/s)": 0.4115 }, { "acc": 0.8683794, "epoch": 1.1522215904475672, "grad_norm": 7.59497594833374, "learning_rate": 8.529205211838048e-06, "loss": 0.62271342, "memory(GiB)": 34.88, "step": 42555, "train_speed(iter/s)": 0.411502 }, { "acc": 0.87452774, "epoch": 1.1523569707307828, "grad_norm": 11.233736991882324, "learning_rate": 8.52880879507774e-06, "loss": 0.65360684, "memory(GiB)": 34.88, "step": 42560, "train_speed(iter/s)": 0.411505 }, { "acc": 0.90820494, "epoch": 1.1524923510139984, "grad_norm": 7.724059104919434, "learning_rate": 8.528412334118433e-06, "loss": 0.50780191, "memory(GiB)": 34.88, "step": 42565, "train_speed(iter/s)": 0.411507 }, { "acc": 0.88174171, "epoch": 1.152627731297214, "grad_norm": 10.802892684936523, "learning_rate": 8.52801582896509e-06, "loss": 0.56237259, "memory(GiB)": 34.88, "step": 42570, "train_speed(iter/s)": 0.411509 }, { "acc": 0.90163803, "epoch": 1.1527631115804295, "grad_norm": 7.729024410247803, "learning_rate": 8.527619279622682e-06, "loss": 0.53172112, "memory(GiB)": 34.88, "step": 42575, "train_speed(iter/s)": 0.411512 }, { "acc": 0.88773661, "epoch": 1.152898491863645, "grad_norm": 20.617284774780273, "learning_rate": 8.527222686096171e-06, "loss": 0.63704109, "memory(GiB)": 34.88, "step": 42580, "train_speed(iter/s)": 0.411514 }, { "acc": 0.89312506, "epoch": 1.1530338721468605, "grad_norm": 15.373608589172363, "learning_rate": 8.526826048390532e-06, "loss": 0.65688438, "memory(GiB)": 34.88, "step": 42585, "train_speed(iter/s)": 0.411517 }, { "acc": 0.89072285, "epoch": 1.153169252430076, "grad_norm": 8.923059463500977, "learning_rate": 8.526429366510727e-06, "loss": 0.50929012, "memory(GiB)": 34.88, "step": 42590, "train_speed(iter/s)": 0.411519 }, { "acc": 0.89720364, "epoch": 1.1533046327132916, "grad_norm": 4.902995586395264, "learning_rate": 8.52603264046173e-06, "loss": 0.55681958, "memory(GiB)": 34.88, "step": 42595, "train_speed(iter/s)": 0.411522 }, { "acc": 0.89798746, "epoch": 1.1534400129965072, "grad_norm": 9.535074234008789, "learning_rate": 8.52563587024851e-06, "loss": 0.52124615, "memory(GiB)": 34.88, "step": 42600, "train_speed(iter/s)": 0.411525 }, { "acc": 0.92528915, "epoch": 1.1535753932797228, "grad_norm": 4.941612720489502, "learning_rate": 8.525239055876034e-06, "loss": 0.37551277, "memory(GiB)": 34.88, "step": 42605, "train_speed(iter/s)": 0.411527 }, { "acc": 0.90631847, "epoch": 1.1537107735629384, "grad_norm": 3.6067137718200684, "learning_rate": 8.524842197349276e-06, "loss": 0.49330654, "memory(GiB)": 34.88, "step": 42610, "train_speed(iter/s)": 0.41153 }, { "acc": 0.8637002, "epoch": 1.1538461538461537, "grad_norm": 12.491840362548828, "learning_rate": 8.524445294673207e-06, "loss": 0.7294445, "memory(GiB)": 34.88, "step": 42615, "train_speed(iter/s)": 0.411532 }, { "acc": 0.90494423, "epoch": 1.1539815341293693, "grad_norm": 13.164518356323242, "learning_rate": 8.524048347852799e-06, "loss": 0.49473553, "memory(GiB)": 34.88, "step": 42620, "train_speed(iter/s)": 0.411535 }, { "acc": 0.90564995, "epoch": 1.154116914412585, "grad_norm": 6.769892692565918, "learning_rate": 8.523651356893025e-06, "loss": 0.47196779, "memory(GiB)": 34.88, "step": 42625, "train_speed(iter/s)": 0.411538 }, { "acc": 0.88548222, "epoch": 1.1542522946958005, "grad_norm": 11.21270751953125, "learning_rate": 8.523254321798855e-06, "loss": 0.61976328, "memory(GiB)": 34.88, "step": 42630, "train_speed(iter/s)": 0.41154 }, { "acc": 0.89311228, "epoch": 1.154387674979016, "grad_norm": 9.74463176727295, "learning_rate": 8.522857242575266e-06, "loss": 0.51502471, "memory(GiB)": 34.88, "step": 42635, "train_speed(iter/s)": 0.411542 }, { "acc": 0.86811361, "epoch": 1.1545230552622316, "grad_norm": 12.298417091369629, "learning_rate": 8.522460119227232e-06, "loss": 0.74729137, "memory(GiB)": 34.88, "step": 42640, "train_speed(iter/s)": 0.411545 }, { "acc": 0.8886507, "epoch": 1.1546584355454472, "grad_norm": 15.638031005859375, "learning_rate": 8.522062951759723e-06, "loss": 0.47213478, "memory(GiB)": 34.88, "step": 42645, "train_speed(iter/s)": 0.411547 }, { "acc": 0.90718813, "epoch": 1.1547938158286628, "grad_norm": 5.099422454833984, "learning_rate": 8.521665740177723e-06, "loss": 0.51834674, "memory(GiB)": 34.88, "step": 42650, "train_speed(iter/s)": 0.41155 }, { "acc": 0.87472153, "epoch": 1.1549291961118784, "grad_norm": 12.244223594665527, "learning_rate": 8.5212684844862e-06, "loss": 0.65543833, "memory(GiB)": 34.88, "step": 42655, "train_speed(iter/s)": 0.411552 }, { "acc": 0.90045528, "epoch": 1.1550645763950937, "grad_norm": 15.18913459777832, "learning_rate": 8.520871184690136e-06, "loss": 0.47003894, "memory(GiB)": 34.88, "step": 42660, "train_speed(iter/s)": 0.411555 }, { "acc": 0.89348869, "epoch": 1.1551999566783093, "grad_norm": 7.875404357910156, "learning_rate": 8.520473840794502e-06, "loss": 0.60511918, "memory(GiB)": 34.88, "step": 42665, "train_speed(iter/s)": 0.411557 }, { "acc": 0.86758757, "epoch": 1.155335336961525, "grad_norm": 4.035407066345215, "learning_rate": 8.520076452804282e-06, "loss": 0.77639666, "memory(GiB)": 34.88, "step": 42670, "train_speed(iter/s)": 0.411559 }, { "acc": 0.88749132, "epoch": 1.1554707172447405, "grad_norm": 5.217350959777832, "learning_rate": 8.51967902072445e-06, "loss": 0.5854526, "memory(GiB)": 34.88, "step": 42675, "train_speed(iter/s)": 0.411562 }, { "acc": 0.86343956, "epoch": 1.155606097527956, "grad_norm": 14.382601737976074, "learning_rate": 8.519281544559984e-06, "loss": 0.6947485, "memory(GiB)": 34.88, "step": 42680, "train_speed(iter/s)": 0.411564 }, { "acc": 0.88249168, "epoch": 1.1557414778111716, "grad_norm": 13.822063446044922, "learning_rate": 8.518884024315868e-06, "loss": 0.66639881, "memory(GiB)": 34.88, "step": 42685, "train_speed(iter/s)": 0.411567 }, { "acc": 0.90144043, "epoch": 1.1558768580943872, "grad_norm": 7.055079936981201, "learning_rate": 8.518486459997075e-06, "loss": 0.55134702, "memory(GiB)": 34.88, "step": 42690, "train_speed(iter/s)": 0.411569 }, { "acc": 0.88233986, "epoch": 1.1560122383776026, "grad_norm": 8.703510284423828, "learning_rate": 8.518088851608592e-06, "loss": 0.64807711, "memory(GiB)": 34.88, "step": 42695, "train_speed(iter/s)": 0.411572 }, { "acc": 0.88844051, "epoch": 1.1561476186608182, "grad_norm": 16.168933868408203, "learning_rate": 8.517691199155395e-06, "loss": 0.61467867, "memory(GiB)": 34.88, "step": 42700, "train_speed(iter/s)": 0.411575 }, { "acc": 0.89639587, "epoch": 1.1562829989440337, "grad_norm": 7.437227725982666, "learning_rate": 8.517293502642464e-06, "loss": 0.46499147, "memory(GiB)": 34.88, "step": 42705, "train_speed(iter/s)": 0.411577 }, { "acc": 0.89531116, "epoch": 1.1564183792272493, "grad_norm": 4.3869123458862305, "learning_rate": 8.51689576207479e-06, "loss": 0.53412023, "memory(GiB)": 34.88, "step": 42710, "train_speed(iter/s)": 0.41158 }, { "acc": 0.86845665, "epoch": 1.156553759510465, "grad_norm": 14.348933219909668, "learning_rate": 8.516497977457345e-06, "loss": 0.74839001, "memory(GiB)": 34.88, "step": 42715, "train_speed(iter/s)": 0.411582 }, { "acc": 0.88864193, "epoch": 1.1566891397936805, "grad_norm": 9.556076049804688, "learning_rate": 8.516100148795118e-06, "loss": 0.5838882, "memory(GiB)": 34.88, "step": 42720, "train_speed(iter/s)": 0.411585 }, { "acc": 0.86630192, "epoch": 1.156824520076896, "grad_norm": 19.747411727905273, "learning_rate": 8.51570227609309e-06, "loss": 0.69563847, "memory(GiB)": 34.88, "step": 42725, "train_speed(iter/s)": 0.411587 }, { "acc": 0.87852535, "epoch": 1.1569599003601116, "grad_norm": 7.297201633453369, "learning_rate": 8.515304359356248e-06, "loss": 0.61807532, "memory(GiB)": 34.88, "step": 42730, "train_speed(iter/s)": 0.41159 }, { "acc": 0.88096619, "epoch": 1.1570952806433272, "grad_norm": 6.869894504547119, "learning_rate": 8.514906398589571e-06, "loss": 0.63071795, "memory(GiB)": 34.88, "step": 42735, "train_speed(iter/s)": 0.411593 }, { "acc": 0.85957403, "epoch": 1.1572306609265426, "grad_norm": 9.801955223083496, "learning_rate": 8.514508393798051e-06, "loss": 0.6975975, "memory(GiB)": 34.88, "step": 42740, "train_speed(iter/s)": 0.411595 }, { "acc": 0.88944321, "epoch": 1.1573660412097582, "grad_norm": 6.77502965927124, "learning_rate": 8.514110344986671e-06, "loss": 0.60038033, "memory(GiB)": 34.88, "step": 42745, "train_speed(iter/s)": 0.411597 }, { "acc": 0.88299732, "epoch": 1.1575014214929737, "grad_norm": 8.392349243164062, "learning_rate": 8.513712252160416e-06, "loss": 0.58116436, "memory(GiB)": 34.88, "step": 42750, "train_speed(iter/s)": 0.4116 }, { "acc": 0.87464018, "epoch": 1.1576368017761893, "grad_norm": 10.997941970825195, "learning_rate": 8.513314115324276e-06, "loss": 0.61796517, "memory(GiB)": 34.88, "step": 42755, "train_speed(iter/s)": 0.411602 }, { "acc": 0.89986067, "epoch": 1.157772182059405, "grad_norm": 13.707465171813965, "learning_rate": 8.512915934483236e-06, "loss": 0.47904463, "memory(GiB)": 34.88, "step": 42760, "train_speed(iter/s)": 0.411605 }, { "acc": 0.88391476, "epoch": 1.1579075623426205, "grad_norm": 8.066165924072266, "learning_rate": 8.512517709642284e-06, "loss": 0.6377223, "memory(GiB)": 34.88, "step": 42765, "train_speed(iter/s)": 0.411607 }, { "acc": 0.90129089, "epoch": 1.158042942625836, "grad_norm": 5.683361530303955, "learning_rate": 8.512119440806408e-06, "loss": 0.47518988, "memory(GiB)": 34.88, "step": 42770, "train_speed(iter/s)": 0.411609 }, { "acc": 0.89086876, "epoch": 1.1581783229090514, "grad_norm": 7.048426628112793, "learning_rate": 8.5117211279806e-06, "loss": 0.59243517, "memory(GiB)": 34.88, "step": 42775, "train_speed(iter/s)": 0.411611 }, { "acc": 0.88279572, "epoch": 1.158313703192267, "grad_norm": 11.873150825500488, "learning_rate": 8.511322771169845e-06, "loss": 0.58882198, "memory(GiB)": 34.88, "step": 42780, "train_speed(iter/s)": 0.411614 }, { "acc": 0.90146427, "epoch": 1.1584490834754826, "grad_norm": 8.988362312316895, "learning_rate": 8.510924370379138e-06, "loss": 0.4972074, "memory(GiB)": 34.88, "step": 42785, "train_speed(iter/s)": 0.411617 }, { "acc": 0.90116014, "epoch": 1.1585844637586982, "grad_norm": 4.982300281524658, "learning_rate": 8.510525925613469e-06, "loss": 0.47676554, "memory(GiB)": 34.88, "step": 42790, "train_speed(iter/s)": 0.411619 }, { "acc": 0.8750536, "epoch": 1.1587198440419137, "grad_norm": 28.347103118896484, "learning_rate": 8.510127436877826e-06, "loss": 0.58037353, "memory(GiB)": 34.88, "step": 42795, "train_speed(iter/s)": 0.411622 }, { "acc": 0.89354448, "epoch": 1.1588552243251293, "grad_norm": 14.280777931213379, "learning_rate": 8.509728904177204e-06, "loss": 0.55193005, "memory(GiB)": 34.88, "step": 42800, "train_speed(iter/s)": 0.411625 }, { "acc": 0.89472008, "epoch": 1.158990604608345, "grad_norm": 10.219181060791016, "learning_rate": 8.509330327516594e-06, "loss": 0.58780775, "memory(GiB)": 34.88, "step": 42805, "train_speed(iter/s)": 0.411627 }, { "acc": 0.88602695, "epoch": 1.1591259848915605, "grad_norm": 7.956918239593506, "learning_rate": 8.50893170690099e-06, "loss": 0.58390808, "memory(GiB)": 34.88, "step": 42810, "train_speed(iter/s)": 0.411629 }, { "acc": 0.88392315, "epoch": 1.1592613651747758, "grad_norm": 8.268986701965332, "learning_rate": 8.508533042335385e-06, "loss": 0.62466874, "memory(GiB)": 34.88, "step": 42815, "train_speed(iter/s)": 0.411632 }, { "acc": 0.89015417, "epoch": 1.1593967454579914, "grad_norm": 4.25681734085083, "learning_rate": 8.508134333824772e-06, "loss": 0.60941591, "memory(GiB)": 34.88, "step": 42820, "train_speed(iter/s)": 0.411634 }, { "acc": 0.90306625, "epoch": 1.159532125741207, "grad_norm": 4.699159145355225, "learning_rate": 8.507735581374146e-06, "loss": 0.50868416, "memory(GiB)": 34.88, "step": 42825, "train_speed(iter/s)": 0.411637 }, { "acc": 0.87741756, "epoch": 1.1596675060244226, "grad_norm": 13.431941986083984, "learning_rate": 8.507336784988503e-06, "loss": 0.67396412, "memory(GiB)": 34.88, "step": 42830, "train_speed(iter/s)": 0.41164 }, { "acc": 0.86000748, "epoch": 1.1598028863076382, "grad_norm": 4.17280912399292, "learning_rate": 8.50693794467284e-06, "loss": 0.71183448, "memory(GiB)": 34.88, "step": 42835, "train_speed(iter/s)": 0.411642 }, { "acc": 0.89109764, "epoch": 1.1599382665908538, "grad_norm": 4.43743371963501, "learning_rate": 8.506539060432149e-06, "loss": 0.60582838, "memory(GiB)": 34.88, "step": 42840, "train_speed(iter/s)": 0.411645 }, { "acc": 0.88039503, "epoch": 1.1600736468740693, "grad_norm": 14.229220390319824, "learning_rate": 8.50614013227143e-06, "loss": 0.71921215, "memory(GiB)": 34.88, "step": 42845, "train_speed(iter/s)": 0.411647 }, { "acc": 0.89552231, "epoch": 1.160209027157285, "grad_norm": 6.2309112548828125, "learning_rate": 8.505741160195682e-06, "loss": 0.55331669, "memory(GiB)": 34.88, "step": 42850, "train_speed(iter/s)": 0.41165 }, { "acc": 0.8906765, "epoch": 1.1603444074405003, "grad_norm": 9.722129821777344, "learning_rate": 8.505342144209898e-06, "loss": 0.61746011, "memory(GiB)": 34.88, "step": 42855, "train_speed(iter/s)": 0.411652 }, { "acc": 0.88155422, "epoch": 1.1604797877237158, "grad_norm": 13.360773086547852, "learning_rate": 8.50494308431908e-06, "loss": 0.54714503, "memory(GiB)": 34.88, "step": 42860, "train_speed(iter/s)": 0.411654 }, { "acc": 0.89269476, "epoch": 1.1606151680069314, "grad_norm": 6.4034295082092285, "learning_rate": 8.504543980528225e-06, "loss": 0.57606835, "memory(GiB)": 34.88, "step": 42865, "train_speed(iter/s)": 0.411657 }, { "acc": 0.88981972, "epoch": 1.160750548290147, "grad_norm": 10.255681991577148, "learning_rate": 8.504144832842337e-06, "loss": 0.56211185, "memory(GiB)": 34.88, "step": 42870, "train_speed(iter/s)": 0.41166 }, { "acc": 0.89274693, "epoch": 1.1608859285733626, "grad_norm": 6.533815383911133, "learning_rate": 8.50374564126641e-06, "loss": 0.55845547, "memory(GiB)": 34.88, "step": 42875, "train_speed(iter/s)": 0.411662 }, { "acc": 0.86381798, "epoch": 1.1610213088565782, "grad_norm": 13.270166397094727, "learning_rate": 8.50334640580545e-06, "loss": 0.69157395, "memory(GiB)": 34.88, "step": 42880, "train_speed(iter/s)": 0.411665 }, { "acc": 0.87657442, "epoch": 1.1611566891397938, "grad_norm": 13.084609985351562, "learning_rate": 8.502947126464453e-06, "loss": 0.72640896, "memory(GiB)": 34.88, "step": 42885, "train_speed(iter/s)": 0.411667 }, { "acc": 0.89641953, "epoch": 1.161292069423009, "grad_norm": 12.23450756072998, "learning_rate": 8.502547803248424e-06, "loss": 0.62365522, "memory(GiB)": 34.88, "step": 42890, "train_speed(iter/s)": 0.41167 }, { "acc": 0.90588951, "epoch": 1.1614274497062247, "grad_norm": 6.76194953918457, "learning_rate": 8.502148436162366e-06, "loss": 0.44268398, "memory(GiB)": 34.88, "step": 42895, "train_speed(iter/s)": 0.411672 }, { "acc": 0.88679848, "epoch": 1.1615628299894403, "grad_norm": 8.613190650939941, "learning_rate": 8.50174902521128e-06, "loss": 0.65518856, "memory(GiB)": 34.88, "step": 42900, "train_speed(iter/s)": 0.411674 }, { "acc": 0.89465218, "epoch": 1.1616982102726559, "grad_norm": 12.819759368896484, "learning_rate": 8.501349570400169e-06, "loss": 0.49348431, "memory(GiB)": 34.88, "step": 42905, "train_speed(iter/s)": 0.411676 }, { "acc": 0.88921909, "epoch": 1.1618335905558714, "grad_norm": 23.57406997680664, "learning_rate": 8.50095007173404e-06, "loss": 0.66272068, "memory(GiB)": 34.88, "step": 42910, "train_speed(iter/s)": 0.411679 }, { "acc": 0.88048267, "epoch": 1.161968970839087, "grad_norm": 13.556525230407715, "learning_rate": 8.500550529217894e-06, "loss": 0.62658043, "memory(GiB)": 34.88, "step": 42915, "train_speed(iter/s)": 0.411681 }, { "acc": 0.88869953, "epoch": 1.1621043511223026, "grad_norm": 12.252198219299316, "learning_rate": 8.500150942856738e-06, "loss": 0.646416, "memory(GiB)": 34.88, "step": 42920, "train_speed(iter/s)": 0.411684 }, { "acc": 0.89878082, "epoch": 1.1622397314055182, "grad_norm": 11.98886489868164, "learning_rate": 8.499751312655577e-06, "loss": 0.55866194, "memory(GiB)": 34.88, "step": 42925, "train_speed(iter/s)": 0.411686 }, { "acc": 0.88009319, "epoch": 1.1623751116887338, "grad_norm": 5.1751179695129395, "learning_rate": 8.499351638619416e-06, "loss": 0.6282414, "memory(GiB)": 34.88, "step": 42930, "train_speed(iter/s)": 0.411689 }, { "acc": 0.90698414, "epoch": 1.1625104919719491, "grad_norm": 10.484984397888184, "learning_rate": 8.498951920753266e-06, "loss": 0.50613103, "memory(GiB)": 34.88, "step": 42935, "train_speed(iter/s)": 0.411691 }, { "acc": 0.88266563, "epoch": 1.1626458722551647, "grad_norm": 6.5447163581848145, "learning_rate": 8.49855215906213e-06, "loss": 0.66259718, "memory(GiB)": 34.88, "step": 42940, "train_speed(iter/s)": 0.411694 }, { "acc": 0.88898687, "epoch": 1.1627812525383803, "grad_norm": 10.089943885803223, "learning_rate": 8.498152353551015e-06, "loss": 0.53679795, "memory(GiB)": 34.88, "step": 42945, "train_speed(iter/s)": 0.411696 }, { "acc": 0.89476881, "epoch": 1.1629166328215959, "grad_norm": 8.14172077178955, "learning_rate": 8.497752504224935e-06, "loss": 0.56483669, "memory(GiB)": 34.88, "step": 42950, "train_speed(iter/s)": 0.411699 }, { "acc": 0.8966464, "epoch": 1.1630520131048114, "grad_norm": 23.709928512573242, "learning_rate": 8.497352611088892e-06, "loss": 0.48750505, "memory(GiB)": 34.88, "step": 42955, "train_speed(iter/s)": 0.411701 }, { "acc": 0.88397617, "epoch": 1.163187393388027, "grad_norm": 10.945677757263184, "learning_rate": 8.496952674147899e-06, "loss": 0.57410088, "memory(GiB)": 34.88, "step": 42960, "train_speed(iter/s)": 0.411704 }, { "acc": 0.88338671, "epoch": 1.1633227736712426, "grad_norm": 5.498937129974365, "learning_rate": 8.49655269340697e-06, "loss": 0.62534637, "memory(GiB)": 34.88, "step": 42965, "train_speed(iter/s)": 0.411706 }, { "acc": 0.89087734, "epoch": 1.163458153954458, "grad_norm": 9.094109535217285, "learning_rate": 8.496152668871106e-06, "loss": 0.5969985, "memory(GiB)": 34.88, "step": 42970, "train_speed(iter/s)": 0.411709 }, { "acc": 0.90197144, "epoch": 1.1635935342376735, "grad_norm": 8.231302261352539, "learning_rate": 8.495752600545326e-06, "loss": 0.46872492, "memory(GiB)": 34.88, "step": 42975, "train_speed(iter/s)": 0.411712 }, { "acc": 0.88735247, "epoch": 1.1637289145208891, "grad_norm": 8.474034309387207, "learning_rate": 8.495352488434638e-06, "loss": 0.59209456, "memory(GiB)": 34.88, "step": 42980, "train_speed(iter/s)": 0.411714 }, { "acc": 0.88955822, "epoch": 1.1638642948041047, "grad_norm": 8.284282684326172, "learning_rate": 8.494952332544057e-06, "loss": 0.58533487, "memory(GiB)": 34.88, "step": 42985, "train_speed(iter/s)": 0.411717 }, { "acc": 0.89043922, "epoch": 1.1639996750873203, "grad_norm": 6.604696273803711, "learning_rate": 8.49455213287859e-06, "loss": 0.53078661, "memory(GiB)": 34.88, "step": 42990, "train_speed(iter/s)": 0.411719 }, { "acc": 0.89421406, "epoch": 1.1641350553705359, "grad_norm": 9.625021934509277, "learning_rate": 8.494151889443258e-06, "loss": 0.58672838, "memory(GiB)": 34.88, "step": 42995, "train_speed(iter/s)": 0.411721 }, { "acc": 0.88643436, "epoch": 1.1642704356537514, "grad_norm": 22.243343353271484, "learning_rate": 8.493751602243069e-06, "loss": 0.67070704, "memory(GiB)": 34.88, "step": 43000, "train_speed(iter/s)": 0.411723 }, { "acc": 0.88622847, "epoch": 1.164405815936967, "grad_norm": 6.3834228515625, "learning_rate": 8.493351271283042e-06, "loss": 0.56508365, "memory(GiB)": 34.88, "step": 43005, "train_speed(iter/s)": 0.411725 }, { "acc": 0.9068819, "epoch": 1.1645411962201826, "grad_norm": 5.0891313552856445, "learning_rate": 8.492950896568188e-06, "loss": 0.49435034, "memory(GiB)": 34.88, "step": 43010, "train_speed(iter/s)": 0.411728 }, { "acc": 0.88144283, "epoch": 1.164676576503398, "grad_norm": 11.616328239440918, "learning_rate": 8.492550478103523e-06, "loss": 0.5295928, "memory(GiB)": 34.88, "step": 43015, "train_speed(iter/s)": 0.411731 }, { "acc": 0.86728153, "epoch": 1.1648119567866135, "grad_norm": 15.897995948791504, "learning_rate": 8.492150015894065e-06, "loss": 0.78381047, "memory(GiB)": 34.88, "step": 43020, "train_speed(iter/s)": 0.411733 }, { "acc": 0.90362797, "epoch": 1.1649473370698291, "grad_norm": 9.706873893737793, "learning_rate": 8.491749509944828e-06, "loss": 0.44567575, "memory(GiB)": 34.88, "step": 43025, "train_speed(iter/s)": 0.411735 }, { "acc": 0.87498722, "epoch": 1.1650827173530447, "grad_norm": 6.315020561218262, "learning_rate": 8.491348960260833e-06, "loss": 0.70528054, "memory(GiB)": 34.88, "step": 43030, "train_speed(iter/s)": 0.411738 }, { "acc": 0.86799736, "epoch": 1.1652180976362603, "grad_norm": 12.39573860168457, "learning_rate": 8.490948366847093e-06, "loss": 0.62507086, "memory(GiB)": 34.88, "step": 43035, "train_speed(iter/s)": 0.41174 }, { "acc": 0.90274391, "epoch": 1.1653534779194759, "grad_norm": 8.757796287536621, "learning_rate": 8.490547729708628e-06, "loss": 0.48708944, "memory(GiB)": 34.88, "step": 43040, "train_speed(iter/s)": 0.411743 }, { "acc": 0.87773542, "epoch": 1.1654888582026914, "grad_norm": 10.639963150024414, "learning_rate": 8.490147048850459e-06, "loss": 0.60956974, "memory(GiB)": 34.88, "step": 43045, "train_speed(iter/s)": 0.411745 }, { "acc": 0.87879543, "epoch": 1.1656242384859068, "grad_norm": 6.965953826904297, "learning_rate": 8.489746324277604e-06, "loss": 0.62641392, "memory(GiB)": 34.88, "step": 43050, "train_speed(iter/s)": 0.411748 }, { "acc": 0.87935381, "epoch": 1.1657596187691224, "grad_norm": 9.531820297241211, "learning_rate": 8.489345555995083e-06, "loss": 0.67707119, "memory(GiB)": 34.88, "step": 43055, "train_speed(iter/s)": 0.41175 }, { "acc": 0.87753773, "epoch": 1.165894999052338, "grad_norm": 10.474233627319336, "learning_rate": 8.488944744007913e-06, "loss": 0.61870027, "memory(GiB)": 34.88, "step": 43060, "train_speed(iter/s)": 0.411752 }, { "acc": 0.86115379, "epoch": 1.1660303793355535, "grad_norm": 11.099407196044922, "learning_rate": 8.488543888321119e-06, "loss": 0.71993742, "memory(GiB)": 34.88, "step": 43065, "train_speed(iter/s)": 0.411755 }, { "acc": 0.88615742, "epoch": 1.1661657596187691, "grad_norm": 7.268835067749023, "learning_rate": 8.488142988939722e-06, "loss": 0.58287244, "memory(GiB)": 34.88, "step": 43070, "train_speed(iter/s)": 0.411758 }, { "acc": 0.87907429, "epoch": 1.1663011399019847, "grad_norm": 7.82295036315918, "learning_rate": 8.487742045868742e-06, "loss": 0.59441204, "memory(GiB)": 34.88, "step": 43075, "train_speed(iter/s)": 0.41176 }, { "acc": 0.88788242, "epoch": 1.1664365201852003, "grad_norm": 6.1428351402282715, "learning_rate": 8.487341059113204e-06, "loss": 0.55509357, "memory(GiB)": 34.88, "step": 43080, "train_speed(iter/s)": 0.411763 }, { "acc": 0.88206263, "epoch": 1.1665719004684159, "grad_norm": 11.28367805480957, "learning_rate": 8.48694002867813e-06, "loss": 0.59647574, "memory(GiB)": 34.88, "step": 43085, "train_speed(iter/s)": 0.411766 }, { "acc": 0.88838139, "epoch": 1.1667072807516314, "grad_norm": 8.830516815185547, "learning_rate": 8.486538954568543e-06, "loss": 0.6017684, "memory(GiB)": 34.88, "step": 43090, "train_speed(iter/s)": 0.411768 }, { "acc": 0.88006029, "epoch": 1.1668426610348468, "grad_norm": 7.583841800689697, "learning_rate": 8.486137836789469e-06, "loss": 0.64668298, "memory(GiB)": 34.88, "step": 43095, "train_speed(iter/s)": 0.411771 }, { "acc": 0.90760956, "epoch": 1.1669780413180624, "grad_norm": 6.562000751495361, "learning_rate": 8.485736675345934e-06, "loss": 0.50840311, "memory(GiB)": 34.88, "step": 43100, "train_speed(iter/s)": 0.411773 }, { "acc": 0.876513, "epoch": 1.167113421601278, "grad_norm": 10.838778495788574, "learning_rate": 8.485335470242959e-06, "loss": 0.66232643, "memory(GiB)": 34.88, "step": 43105, "train_speed(iter/s)": 0.411775 }, { "acc": 0.8825489, "epoch": 1.1672488018844935, "grad_norm": 37.59815979003906, "learning_rate": 8.48493422148557e-06, "loss": 0.62065678, "memory(GiB)": 34.88, "step": 43110, "train_speed(iter/s)": 0.411778 }, { "acc": 0.87674217, "epoch": 1.1673841821677091, "grad_norm": 6.1233320236206055, "learning_rate": 8.484532929078799e-06, "loss": 0.67762151, "memory(GiB)": 34.88, "step": 43115, "train_speed(iter/s)": 0.41178 }, { "acc": 0.87132473, "epoch": 1.1675195624509247, "grad_norm": 4.756731033325195, "learning_rate": 8.484131593027667e-06, "loss": 0.72332311, "memory(GiB)": 34.88, "step": 43120, "train_speed(iter/s)": 0.411782 }, { "acc": 0.89868326, "epoch": 1.1676549427341403, "grad_norm": 20.907621383666992, "learning_rate": 8.483730213337205e-06, "loss": 0.58342824, "memory(GiB)": 34.88, "step": 43125, "train_speed(iter/s)": 0.411785 }, { "acc": 0.87619753, "epoch": 1.1677903230173556, "grad_norm": 7.877823829650879, "learning_rate": 8.48332879001244e-06, "loss": 0.64914103, "memory(GiB)": 34.88, "step": 43130, "train_speed(iter/s)": 0.411787 }, { "acc": 0.90029497, "epoch": 1.1679257033005712, "grad_norm": 9.897953033447266, "learning_rate": 8.4829273230584e-06, "loss": 0.51384792, "memory(GiB)": 34.88, "step": 43135, "train_speed(iter/s)": 0.41179 }, { "acc": 0.90409069, "epoch": 1.1680610835837868, "grad_norm": 6.1559648513793945, "learning_rate": 8.482525812480114e-06, "loss": 0.42883978, "memory(GiB)": 34.88, "step": 43140, "train_speed(iter/s)": 0.411793 }, { "acc": 0.88320541, "epoch": 1.1681964638670024, "grad_norm": 15.009014129638672, "learning_rate": 8.482124258282613e-06, "loss": 0.69932375, "memory(GiB)": 34.88, "step": 43145, "train_speed(iter/s)": 0.411795 }, { "acc": 0.90101395, "epoch": 1.168331844150218, "grad_norm": 9.45038890838623, "learning_rate": 8.48172266047093e-06, "loss": 0.58610706, "memory(GiB)": 34.88, "step": 43150, "train_speed(iter/s)": 0.411797 }, { "acc": 0.88539047, "epoch": 1.1684672244334335, "grad_norm": 5.7793474197387695, "learning_rate": 8.481321019050088e-06, "loss": 0.50585542, "memory(GiB)": 34.88, "step": 43155, "train_speed(iter/s)": 0.411799 }, { "acc": 0.88812981, "epoch": 1.1686026047166491, "grad_norm": 9.594977378845215, "learning_rate": 8.480919334025126e-06, "loss": 0.70354166, "memory(GiB)": 34.88, "step": 43160, "train_speed(iter/s)": 0.411801 }, { "acc": 0.87543764, "epoch": 1.1687379849998647, "grad_norm": 15.808441162109375, "learning_rate": 8.480517605401072e-06, "loss": 0.69681544, "memory(GiB)": 34.88, "step": 43165, "train_speed(iter/s)": 0.411804 }, { "acc": 0.89470558, "epoch": 1.1688733652830803, "grad_norm": 11.264317512512207, "learning_rate": 8.480115833182959e-06, "loss": 0.71913304, "memory(GiB)": 34.88, "step": 43170, "train_speed(iter/s)": 0.411806 }, { "acc": 0.86932354, "epoch": 1.1690087455662956, "grad_norm": 3.6653811931610107, "learning_rate": 8.479714017375822e-06, "loss": 0.6794395, "memory(GiB)": 34.88, "step": 43175, "train_speed(iter/s)": 0.411809 }, { "acc": 0.87259264, "epoch": 1.1691441258495112, "grad_norm": 9.919469833374023, "learning_rate": 8.47931215798469e-06, "loss": 0.65039501, "memory(GiB)": 34.88, "step": 43180, "train_speed(iter/s)": 0.411811 }, { "acc": 0.89219551, "epoch": 1.1692795061327268, "grad_norm": 6.665435791015625, "learning_rate": 8.478910255014602e-06, "loss": 0.56351609, "memory(GiB)": 34.88, "step": 43185, "train_speed(iter/s)": 0.411814 }, { "acc": 0.90975056, "epoch": 1.1694148864159424, "grad_norm": 7.906147003173828, "learning_rate": 8.47850830847059e-06, "loss": 0.44830785, "memory(GiB)": 34.88, "step": 43190, "train_speed(iter/s)": 0.411816 }, { "acc": 0.88098335, "epoch": 1.169550266699158, "grad_norm": 4.925190448760986, "learning_rate": 8.478106318357688e-06, "loss": 0.59243455, "memory(GiB)": 34.88, "step": 43195, "train_speed(iter/s)": 0.411819 }, { "acc": 0.90339699, "epoch": 1.1696856469823735, "grad_norm": 8.490331649780273, "learning_rate": 8.477704284680936e-06, "loss": 0.40448236, "memory(GiB)": 34.88, "step": 43200, "train_speed(iter/s)": 0.411821 }, { "acc": 0.88033762, "epoch": 1.1698210272655891, "grad_norm": 18.460281372070312, "learning_rate": 8.477302207445367e-06, "loss": 0.63931551, "memory(GiB)": 34.88, "step": 43205, "train_speed(iter/s)": 0.411823 }, { "acc": 0.87294235, "epoch": 1.1699564075488045, "grad_norm": 7.2346673011779785, "learning_rate": 8.476900086656017e-06, "loss": 0.74583263, "memory(GiB)": 34.88, "step": 43210, "train_speed(iter/s)": 0.411825 }, { "acc": 0.89968758, "epoch": 1.17009178783202, "grad_norm": 7.124056339263916, "learning_rate": 8.476497922317924e-06, "loss": 0.53226423, "memory(GiB)": 34.88, "step": 43215, "train_speed(iter/s)": 0.411827 }, { "acc": 0.89970207, "epoch": 1.1702271681152356, "grad_norm": 4.733862400054932, "learning_rate": 8.47609571443613e-06, "loss": 0.56357317, "memory(GiB)": 34.88, "step": 43220, "train_speed(iter/s)": 0.411829 }, { "acc": 0.88457642, "epoch": 1.1703625483984512, "grad_norm": 8.61914348602295, "learning_rate": 8.475693463015668e-06, "loss": 0.54740181, "memory(GiB)": 34.88, "step": 43225, "train_speed(iter/s)": 0.411831 }, { "acc": 0.88286848, "epoch": 1.1704979286816668, "grad_norm": 15.39330768585205, "learning_rate": 8.47529116806158e-06, "loss": 0.59011507, "memory(GiB)": 34.88, "step": 43230, "train_speed(iter/s)": 0.411834 }, { "acc": 0.89411755, "epoch": 1.1706333089648824, "grad_norm": 5.150308132171631, "learning_rate": 8.474888829578903e-06, "loss": 0.56779556, "memory(GiB)": 34.88, "step": 43235, "train_speed(iter/s)": 0.411836 }, { "acc": 0.90113049, "epoch": 1.170768689248098, "grad_norm": 6.22702693939209, "learning_rate": 8.47448644757268e-06, "loss": 0.49930029, "memory(GiB)": 34.88, "step": 43240, "train_speed(iter/s)": 0.411839 }, { "acc": 0.88522301, "epoch": 1.1709040695313135, "grad_norm": 10.529813766479492, "learning_rate": 8.474084022047951e-06, "loss": 0.57744732, "memory(GiB)": 34.88, "step": 43245, "train_speed(iter/s)": 0.411841 }, { "acc": 0.88609343, "epoch": 1.1710394498145291, "grad_norm": 9.737411499023438, "learning_rate": 8.473681553009755e-06, "loss": 0.5974092, "memory(GiB)": 34.88, "step": 43250, "train_speed(iter/s)": 0.411843 }, { "acc": 0.89914398, "epoch": 1.1711748300977445, "grad_norm": 5.28419303894043, "learning_rate": 8.473279040463136e-06, "loss": 0.54902081, "memory(GiB)": 34.88, "step": 43255, "train_speed(iter/s)": 0.411845 }, { "acc": 0.88035622, "epoch": 1.17131021038096, "grad_norm": 13.30286979675293, "learning_rate": 8.472876484413136e-06, "loss": 0.65673823, "memory(GiB)": 34.88, "step": 43260, "train_speed(iter/s)": 0.411846 }, { "acc": 0.87498293, "epoch": 1.1714455906641756, "grad_norm": 18.393295288085938, "learning_rate": 8.472473884864796e-06, "loss": 0.68929234, "memory(GiB)": 34.88, "step": 43265, "train_speed(iter/s)": 0.411848 }, { "acc": 0.88350925, "epoch": 1.1715809709473912, "grad_norm": 9.289365768432617, "learning_rate": 8.472071241823163e-06, "loss": 0.59352674, "memory(GiB)": 34.88, "step": 43270, "train_speed(iter/s)": 0.411851 }, { "acc": 0.91762733, "epoch": 1.1717163512306068, "grad_norm": 5.297266006469727, "learning_rate": 8.471668555293277e-06, "loss": 0.44817476, "memory(GiB)": 34.88, "step": 43275, "train_speed(iter/s)": 0.411854 }, { "acc": 0.89521275, "epoch": 1.1718517315138224, "grad_norm": 12.695059776306152, "learning_rate": 8.471265825280185e-06, "loss": 0.5587781, "memory(GiB)": 34.88, "step": 43280, "train_speed(iter/s)": 0.411856 }, { "acc": 0.87542706, "epoch": 1.171987111797038, "grad_norm": 9.790806770324707, "learning_rate": 8.470863051788929e-06, "loss": 0.72891202, "memory(GiB)": 34.88, "step": 43285, "train_speed(iter/s)": 0.411858 }, { "acc": 0.90067787, "epoch": 1.1721224920802533, "grad_norm": 12.999584197998047, "learning_rate": 8.470460234824559e-06, "loss": 0.59354181, "memory(GiB)": 34.88, "step": 43290, "train_speed(iter/s)": 0.41186 }, { "acc": 0.88753757, "epoch": 1.172257872363469, "grad_norm": 5.57363748550415, "learning_rate": 8.470057374392116e-06, "loss": 0.58181601, "memory(GiB)": 34.88, "step": 43295, "train_speed(iter/s)": 0.411862 }, { "acc": 0.88267021, "epoch": 1.1723932526466845, "grad_norm": 5.498176097869873, "learning_rate": 8.469654470496652e-06, "loss": 0.64356604, "memory(GiB)": 34.88, "step": 43300, "train_speed(iter/s)": 0.411865 }, { "acc": 0.89403706, "epoch": 1.1725286329299, "grad_norm": 8.336027145385742, "learning_rate": 8.46925152314321e-06, "loss": 0.63264313, "memory(GiB)": 34.88, "step": 43305, "train_speed(iter/s)": 0.411867 }, { "acc": 0.88024654, "epoch": 1.1726640132131156, "grad_norm": 11.119200706481934, "learning_rate": 8.468848532336838e-06, "loss": 0.70625339, "memory(GiB)": 34.88, "step": 43310, "train_speed(iter/s)": 0.411868 }, { "acc": 0.8638134, "epoch": 1.1727993934963312, "grad_norm": 11.480474472045898, "learning_rate": 8.468445498082586e-06, "loss": 0.7076705, "memory(GiB)": 34.88, "step": 43315, "train_speed(iter/s)": 0.41187 }, { "acc": 0.88013182, "epoch": 1.1729347737795468, "grad_norm": 7.268741607666016, "learning_rate": 8.468042420385504e-06, "loss": 0.56612425, "memory(GiB)": 34.88, "step": 43320, "train_speed(iter/s)": 0.411872 }, { "acc": 0.89559498, "epoch": 1.1730701540627624, "grad_norm": 11.838165283203125, "learning_rate": 8.467639299250637e-06, "loss": 0.62068601, "memory(GiB)": 34.88, "step": 43325, "train_speed(iter/s)": 0.411875 }, { "acc": 0.88601017, "epoch": 1.173205534345978, "grad_norm": 13.810225486755371, "learning_rate": 8.467236134683037e-06, "loss": 0.54738665, "memory(GiB)": 34.88, "step": 43330, "train_speed(iter/s)": 0.411876 }, { "acc": 0.89663534, "epoch": 1.1733409146291933, "grad_norm": 11.748552322387695, "learning_rate": 8.466832926687759e-06, "loss": 0.64164472, "memory(GiB)": 34.88, "step": 43335, "train_speed(iter/s)": 0.411878 }, { "acc": 0.89338322, "epoch": 1.173476294912409, "grad_norm": 8.434649467468262, "learning_rate": 8.466429675269846e-06, "loss": 0.58827596, "memory(GiB)": 34.88, "step": 43340, "train_speed(iter/s)": 0.411881 }, { "acc": 0.87644892, "epoch": 1.1736116751956245, "grad_norm": 12.264606475830078, "learning_rate": 8.466026380434353e-06, "loss": 0.67951298, "memory(GiB)": 34.88, "step": 43345, "train_speed(iter/s)": 0.411883 }, { "acc": 0.88875504, "epoch": 1.17374705547884, "grad_norm": 9.926063537597656, "learning_rate": 8.465623042186334e-06, "loss": 0.64226322, "memory(GiB)": 34.88, "step": 43350, "train_speed(iter/s)": 0.411886 }, { "acc": 0.89265242, "epoch": 1.1738824357620556, "grad_norm": 6.362768650054932, "learning_rate": 8.46521966053084e-06, "loss": 0.57710242, "memory(GiB)": 34.88, "step": 43355, "train_speed(iter/s)": 0.411888 }, { "acc": 0.8685648, "epoch": 1.1740178160452712, "grad_norm": 15.216714859008789, "learning_rate": 8.464816235472924e-06, "loss": 0.66119528, "memory(GiB)": 34.88, "step": 43360, "train_speed(iter/s)": 0.41189 }, { "acc": 0.88467836, "epoch": 1.1741531963284868, "grad_norm": 13.59544563293457, "learning_rate": 8.464412767017639e-06, "loss": 0.62945232, "memory(GiB)": 34.88, "step": 43365, "train_speed(iter/s)": 0.411893 }, { "acc": 0.86195822, "epoch": 1.1742885766117022, "grad_norm": 10.98449420928955, "learning_rate": 8.464009255170041e-06, "loss": 0.81678696, "memory(GiB)": 34.88, "step": 43370, "train_speed(iter/s)": 0.411895 }, { "acc": 0.88182344, "epoch": 1.1744239568949177, "grad_norm": 11.615896224975586, "learning_rate": 8.463605699935183e-06, "loss": 0.67731929, "memory(GiB)": 34.88, "step": 43375, "train_speed(iter/s)": 0.411898 }, { "acc": 0.88257046, "epoch": 1.1745593371781333, "grad_norm": 6.46737813949585, "learning_rate": 8.46320210131812e-06, "loss": 0.56279821, "memory(GiB)": 34.88, "step": 43380, "train_speed(iter/s)": 0.4119 }, { "acc": 0.90662956, "epoch": 1.174694717461349, "grad_norm": 10.780364990234375, "learning_rate": 8.46279845932391e-06, "loss": 0.54750643, "memory(GiB)": 34.88, "step": 43385, "train_speed(iter/s)": 0.411903 }, { "acc": 0.88833141, "epoch": 1.1748300977445645, "grad_norm": 7.870028972625732, "learning_rate": 8.46239477395761e-06, "loss": 0.5075192, "memory(GiB)": 34.88, "step": 43390, "train_speed(iter/s)": 0.411905 }, { "acc": 0.86863337, "epoch": 1.17496547802778, "grad_norm": 15.744297981262207, "learning_rate": 8.461991045224273e-06, "loss": 0.81577282, "memory(GiB)": 34.88, "step": 43395, "train_speed(iter/s)": 0.411908 }, { "acc": 0.90460119, "epoch": 1.1751008583109956, "grad_norm": 9.66019344329834, "learning_rate": 8.46158727312896e-06, "loss": 0.55066214, "memory(GiB)": 34.88, "step": 43400, "train_speed(iter/s)": 0.41191 }, { "acc": 0.88339901, "epoch": 1.1752362385942112, "grad_norm": 6.187833309173584, "learning_rate": 8.461183457676728e-06, "loss": 0.49799891, "memory(GiB)": 34.88, "step": 43405, "train_speed(iter/s)": 0.411913 }, { "acc": 0.90221624, "epoch": 1.1753716188774268, "grad_norm": 4.611557483673096, "learning_rate": 8.460779598872636e-06, "loss": 0.47635355, "memory(GiB)": 34.88, "step": 43410, "train_speed(iter/s)": 0.411914 }, { "acc": 0.88418446, "epoch": 1.1755069991606422, "grad_norm": 9.090843200683594, "learning_rate": 8.460375696721741e-06, "loss": 0.62619152, "memory(GiB)": 34.88, "step": 43415, "train_speed(iter/s)": 0.411917 }, { "acc": 0.90226116, "epoch": 1.1756423794438577, "grad_norm": 10.836058616638184, "learning_rate": 8.459971751229104e-06, "loss": 0.53091183, "memory(GiB)": 34.88, "step": 43420, "train_speed(iter/s)": 0.411919 }, { "acc": 0.90606403, "epoch": 1.1757777597270733, "grad_norm": 4.986924648284912, "learning_rate": 8.459567762399788e-06, "loss": 0.47636261, "memory(GiB)": 34.88, "step": 43425, "train_speed(iter/s)": 0.411922 }, { "acc": 0.89253635, "epoch": 1.175913140010289, "grad_norm": 6.036132335662842, "learning_rate": 8.459163730238848e-06, "loss": 0.52728825, "memory(GiB)": 34.88, "step": 43430, "train_speed(iter/s)": 0.411924 }, { "acc": 0.88277302, "epoch": 1.1760485202935045, "grad_norm": 9.9868745803833, "learning_rate": 8.458759654751351e-06, "loss": 0.66531091, "memory(GiB)": 34.88, "step": 43435, "train_speed(iter/s)": 0.411927 }, { "acc": 0.89445438, "epoch": 1.17618390057672, "grad_norm": 9.54418659210205, "learning_rate": 8.458355535942356e-06, "loss": 0.550526, "memory(GiB)": 34.88, "step": 43440, "train_speed(iter/s)": 0.411929 }, { "acc": 0.87594585, "epoch": 1.1763192808599356, "grad_norm": 5.5524492263793945, "learning_rate": 8.457951373816924e-06, "loss": 0.70463047, "memory(GiB)": 34.88, "step": 43445, "train_speed(iter/s)": 0.411931 }, { "acc": 0.89275513, "epoch": 1.176454661143151, "grad_norm": 5.40839958190918, "learning_rate": 8.457547168380123e-06, "loss": 0.58057146, "memory(GiB)": 34.88, "step": 43450, "train_speed(iter/s)": 0.411933 }, { "acc": 0.89791822, "epoch": 1.1765900414263666, "grad_norm": 5.418685436248779, "learning_rate": 8.45714291963701e-06, "loss": 0.4889019, "memory(GiB)": 34.88, "step": 43455, "train_speed(iter/s)": 0.411936 }, { "acc": 0.90667, "epoch": 1.1767254217095822, "grad_norm": 9.399312019348145, "learning_rate": 8.456738627592654e-06, "loss": 0.45258241, "memory(GiB)": 34.88, "step": 43460, "train_speed(iter/s)": 0.411938 }, { "acc": 0.88188896, "epoch": 1.1768608019927977, "grad_norm": 8.316740036010742, "learning_rate": 8.456334292252117e-06, "loss": 0.61367884, "memory(GiB)": 34.88, "step": 43465, "train_speed(iter/s)": 0.411941 }, { "acc": 0.88857479, "epoch": 1.1769961822760133, "grad_norm": 10.48261547088623, "learning_rate": 8.455929913620465e-06, "loss": 0.61385527, "memory(GiB)": 34.88, "step": 43470, "train_speed(iter/s)": 0.411944 }, { "acc": 0.88947029, "epoch": 1.177131562559229, "grad_norm": 6.700652599334717, "learning_rate": 8.455525491702762e-06, "loss": 0.56846728, "memory(GiB)": 34.88, "step": 43475, "train_speed(iter/s)": 0.411946 }, { "acc": 0.8913517, "epoch": 1.1772669428424445, "grad_norm": 5.650674343109131, "learning_rate": 8.455121026504076e-06, "loss": 0.50489984, "memory(GiB)": 34.88, "step": 43480, "train_speed(iter/s)": 0.411949 }, { "acc": 0.89286747, "epoch": 1.17740232312566, "grad_norm": 7.188247203826904, "learning_rate": 8.454716518029475e-06, "loss": 0.54113779, "memory(GiB)": 34.88, "step": 43485, "train_speed(iter/s)": 0.411951 }, { "acc": 0.89871798, "epoch": 1.1775377034088756, "grad_norm": 14.792076110839844, "learning_rate": 8.454311966284024e-06, "loss": 0.54772558, "memory(GiB)": 34.88, "step": 43490, "train_speed(iter/s)": 0.411953 }, { "acc": 0.89199219, "epoch": 1.177673083692091, "grad_norm": 4.9164137840271, "learning_rate": 8.45390737127279e-06, "loss": 0.61017857, "memory(GiB)": 34.88, "step": 43495, "train_speed(iter/s)": 0.411956 }, { "acc": 0.87777939, "epoch": 1.1778084639753066, "grad_norm": 5.083984375, "learning_rate": 8.453502733000844e-06, "loss": 0.62251568, "memory(GiB)": 34.88, "step": 43500, "train_speed(iter/s)": 0.411958 }, { "acc": 0.88791761, "epoch": 1.1779438442585222, "grad_norm": 7.759706497192383, "learning_rate": 8.453098051473254e-06, "loss": 0.61148343, "memory(GiB)": 34.88, "step": 43505, "train_speed(iter/s)": 0.411961 }, { "acc": 0.88358192, "epoch": 1.1780792245417377, "grad_norm": 9.78066635131836, "learning_rate": 8.452693326695089e-06, "loss": 0.60243368, "memory(GiB)": 34.88, "step": 43510, "train_speed(iter/s)": 0.411963 }, { "acc": 0.88218498, "epoch": 1.1782146048249533, "grad_norm": 20.60271453857422, "learning_rate": 8.452288558671418e-06, "loss": 0.54032011, "memory(GiB)": 34.88, "step": 43515, "train_speed(iter/s)": 0.411966 }, { "acc": 0.91793327, "epoch": 1.178349985108169, "grad_norm": 13.080093383789062, "learning_rate": 8.451883747407312e-06, "loss": 0.4069252, "memory(GiB)": 34.88, "step": 43520, "train_speed(iter/s)": 0.411969 }, { "acc": 0.89381094, "epoch": 1.1784853653913845, "grad_norm": 8.679769515991211, "learning_rate": 8.451478892907845e-06, "loss": 0.59916277, "memory(GiB)": 34.88, "step": 43525, "train_speed(iter/s)": 0.411971 }, { "acc": 0.89951, "epoch": 1.1786207456745998, "grad_norm": 9.611822128295898, "learning_rate": 8.451073995178084e-06, "loss": 0.4920577, "memory(GiB)": 34.88, "step": 43530, "train_speed(iter/s)": 0.411973 }, { "acc": 0.89843941, "epoch": 1.1787561259578154, "grad_norm": 5.5982465744018555, "learning_rate": 8.450669054223106e-06, "loss": 0.57768965, "memory(GiB)": 34.88, "step": 43535, "train_speed(iter/s)": 0.411976 }, { "acc": 0.89289207, "epoch": 1.178891506241031, "grad_norm": 7.959465026855469, "learning_rate": 8.450264070047977e-06, "loss": 0.51608834, "memory(GiB)": 34.88, "step": 43540, "train_speed(iter/s)": 0.411978 }, { "acc": 0.8904871, "epoch": 1.1790268865242466, "grad_norm": 18.064313888549805, "learning_rate": 8.449859042657778e-06, "loss": 0.62748494, "memory(GiB)": 34.88, "step": 43545, "train_speed(iter/s)": 0.411981 }, { "acc": 0.90836792, "epoch": 1.1791622668074622, "grad_norm": 6.696308135986328, "learning_rate": 8.449453972057576e-06, "loss": 0.39230614, "memory(GiB)": 34.88, "step": 43550, "train_speed(iter/s)": 0.411983 }, { "acc": 0.89380474, "epoch": 1.1792976470906777, "grad_norm": 31.174558639526367, "learning_rate": 8.44904885825245e-06, "loss": 0.5482605, "memory(GiB)": 34.88, "step": 43555, "train_speed(iter/s)": 0.411985 }, { "acc": 0.89348822, "epoch": 1.1794330273738933, "grad_norm": 14.74681568145752, "learning_rate": 8.448643701247474e-06, "loss": 0.6431869, "memory(GiB)": 34.88, "step": 43560, "train_speed(iter/s)": 0.411988 }, { "acc": 0.89253407, "epoch": 1.179568407657109, "grad_norm": 6.9834394454956055, "learning_rate": 8.448238501047722e-06, "loss": 0.55083766, "memory(GiB)": 34.88, "step": 43565, "train_speed(iter/s)": 0.411991 }, { "acc": 0.89388075, "epoch": 1.1797037879403245, "grad_norm": 8.387978553771973, "learning_rate": 8.44783325765827e-06, "loss": 0.49565287, "memory(GiB)": 34.88, "step": 43570, "train_speed(iter/s)": 0.411993 }, { "acc": 0.91098499, "epoch": 1.1798391682235398, "grad_norm": 4.448275089263916, "learning_rate": 8.447427971084194e-06, "loss": 0.447788, "memory(GiB)": 34.88, "step": 43575, "train_speed(iter/s)": 0.411995 }, { "acc": 0.87593126, "epoch": 1.1799745485067554, "grad_norm": 7.506588459014893, "learning_rate": 8.447022641330573e-06, "loss": 0.65246449, "memory(GiB)": 34.88, "step": 43580, "train_speed(iter/s)": 0.411997 }, { "acc": 0.88624363, "epoch": 1.180109928789971, "grad_norm": 27.015201568603516, "learning_rate": 8.446617268402486e-06, "loss": 0.58665237, "memory(GiB)": 34.88, "step": 43585, "train_speed(iter/s)": 0.412 }, { "acc": 0.91034851, "epoch": 1.1802453090731866, "grad_norm": 3.48506236076355, "learning_rate": 8.446211852305005e-06, "loss": 0.46169176, "memory(GiB)": 34.88, "step": 43590, "train_speed(iter/s)": 0.412002 }, { "acc": 0.90758915, "epoch": 1.1803806893564022, "grad_norm": 9.784177780151367, "learning_rate": 8.445806393043213e-06, "loss": 0.50981407, "memory(GiB)": 34.88, "step": 43595, "train_speed(iter/s)": 0.412005 }, { "acc": 0.89594202, "epoch": 1.1805160696396177, "grad_norm": 7.870082855224609, "learning_rate": 8.445400890622188e-06, "loss": 0.57457166, "memory(GiB)": 34.88, "step": 43600, "train_speed(iter/s)": 0.412007 }, { "acc": 0.88307247, "epoch": 1.1806514499228333, "grad_norm": 59.15522384643555, "learning_rate": 8.444995345047012e-06, "loss": 0.60208693, "memory(GiB)": 34.88, "step": 43605, "train_speed(iter/s)": 0.41201 }, { "acc": 0.87282448, "epoch": 1.1807868302060487, "grad_norm": 15.473928451538086, "learning_rate": 8.444589756322761e-06, "loss": 0.64802456, "memory(GiB)": 34.88, "step": 43610, "train_speed(iter/s)": 0.412012 }, { "acc": 0.90707645, "epoch": 1.1809222104892643, "grad_norm": 7.846367359161377, "learning_rate": 8.44418412445452e-06, "loss": 0.48985291, "memory(GiB)": 34.88, "step": 43615, "train_speed(iter/s)": 0.412014 }, { "acc": 0.88498821, "epoch": 1.1810575907724798, "grad_norm": 8.408188819885254, "learning_rate": 8.443778449447368e-06, "loss": 0.56771703, "memory(GiB)": 34.88, "step": 43620, "train_speed(iter/s)": 0.412017 }, { "acc": 0.89097214, "epoch": 1.1811929710556954, "grad_norm": 10.061176300048828, "learning_rate": 8.443372731306386e-06, "loss": 0.62260137, "memory(GiB)": 34.88, "step": 43625, "train_speed(iter/s)": 0.412019 }, { "acc": 0.8916048, "epoch": 1.181328351338911, "grad_norm": 11.464677810668945, "learning_rate": 8.44296697003666e-06, "loss": 0.59814897, "memory(GiB)": 34.88, "step": 43630, "train_speed(iter/s)": 0.412021 }, { "acc": 0.88055458, "epoch": 1.1814637316221266, "grad_norm": 13.679627418518066, "learning_rate": 8.44256116564327e-06, "loss": 0.65269403, "memory(GiB)": 34.88, "step": 43635, "train_speed(iter/s)": 0.412023 }, { "acc": 0.88945274, "epoch": 1.1815991119053422, "grad_norm": 6.16079044342041, "learning_rate": 8.4421553181313e-06, "loss": 0.60695534, "memory(GiB)": 34.88, "step": 43640, "train_speed(iter/s)": 0.412026 }, { "acc": 0.88716068, "epoch": 1.1817344921885578, "grad_norm": 8.411409378051758, "learning_rate": 8.441749427505834e-06, "loss": 0.52490225, "memory(GiB)": 34.88, "step": 43645, "train_speed(iter/s)": 0.412028 }, { "acc": 0.89865618, "epoch": 1.1818698724717733, "grad_norm": 6.0624823570251465, "learning_rate": 8.441343493771959e-06, "loss": 0.46164131, "memory(GiB)": 34.88, "step": 43650, "train_speed(iter/s)": 0.412031 }, { "acc": 0.8933671, "epoch": 1.1820052527549887, "grad_norm": 11.10735034942627, "learning_rate": 8.440937516934755e-06, "loss": 0.54748983, "memory(GiB)": 34.88, "step": 43655, "train_speed(iter/s)": 0.412033 }, { "acc": 0.8927742, "epoch": 1.1821406330382043, "grad_norm": 5.384072780609131, "learning_rate": 8.440531496999312e-06, "loss": 0.49855289, "memory(GiB)": 34.88, "step": 43660, "train_speed(iter/s)": 0.412035 }, { "acc": 0.89332418, "epoch": 1.1822760133214198, "grad_norm": 5.082404136657715, "learning_rate": 8.440125433970714e-06, "loss": 0.61431351, "memory(GiB)": 34.88, "step": 43665, "train_speed(iter/s)": 0.412037 }, { "acc": 0.89486656, "epoch": 1.1824113936046354, "grad_norm": 16.65279197692871, "learning_rate": 8.43971932785405e-06, "loss": 0.53410282, "memory(GiB)": 34.88, "step": 43670, "train_speed(iter/s)": 0.41204 }, { "acc": 0.89297237, "epoch": 1.182546773887851, "grad_norm": 8.781340599060059, "learning_rate": 8.439313178654407e-06, "loss": 0.500354, "memory(GiB)": 34.88, "step": 43675, "train_speed(iter/s)": 0.412042 }, { "acc": 0.89330158, "epoch": 1.1826821541710666, "grad_norm": 13.264022827148438, "learning_rate": 8.43890698637687e-06, "loss": 0.49135008, "memory(GiB)": 34.88, "step": 43680, "train_speed(iter/s)": 0.412045 }, { "acc": 0.8811883, "epoch": 1.1828175344542822, "grad_norm": 11.796731948852539, "learning_rate": 8.438500751026529e-06, "loss": 0.64168916, "memory(GiB)": 34.88, "step": 43685, "train_speed(iter/s)": 0.412047 }, { "acc": 0.8757493, "epoch": 1.1829529147374975, "grad_norm": 5.725927829742432, "learning_rate": 8.438094472608472e-06, "loss": 0.59473581, "memory(GiB)": 34.88, "step": 43690, "train_speed(iter/s)": 0.412049 }, { "acc": 0.90118484, "epoch": 1.183088295020713, "grad_norm": 8.559059143066406, "learning_rate": 8.43768815112779e-06, "loss": 0.52340221, "memory(GiB)": 34.88, "step": 43695, "train_speed(iter/s)": 0.412052 }, { "acc": 0.88946133, "epoch": 1.1832236753039287, "grad_norm": 9.595972061157227, "learning_rate": 8.437281786589573e-06, "loss": 0.52245169, "memory(GiB)": 34.88, "step": 43700, "train_speed(iter/s)": 0.412054 }, { "acc": 0.89776669, "epoch": 1.1833590555871443, "grad_norm": 10.851153373718262, "learning_rate": 8.43687537899891e-06, "loss": 0.55173893, "memory(GiB)": 34.88, "step": 43705, "train_speed(iter/s)": 0.412057 }, { "acc": 0.91039896, "epoch": 1.1834944358703599, "grad_norm": 6.739616394042969, "learning_rate": 8.436468928360894e-06, "loss": 0.43591003, "memory(GiB)": 34.88, "step": 43710, "train_speed(iter/s)": 0.41206 }, { "acc": 0.88632622, "epoch": 1.1836298161535754, "grad_norm": 17.321992874145508, "learning_rate": 8.436062434680613e-06, "loss": 0.63406782, "memory(GiB)": 34.88, "step": 43715, "train_speed(iter/s)": 0.412062 }, { "acc": 0.8748642, "epoch": 1.183765196436791, "grad_norm": 13.709515571594238, "learning_rate": 8.435655897963165e-06, "loss": 0.68888988, "memory(GiB)": 34.88, "step": 43720, "train_speed(iter/s)": 0.412064 }, { "acc": 0.8986537, "epoch": 1.1839005767200066, "grad_norm": 10.523858070373535, "learning_rate": 8.435249318213637e-06, "loss": 0.5761898, "memory(GiB)": 34.88, "step": 43725, "train_speed(iter/s)": 0.412067 }, { "acc": 0.87894154, "epoch": 1.1840359570032222, "grad_norm": 8.886649131774902, "learning_rate": 8.434842695437127e-06, "loss": 0.60738306, "memory(GiB)": 34.88, "step": 43730, "train_speed(iter/s)": 0.412069 }, { "acc": 0.90946321, "epoch": 1.1841713372864375, "grad_norm": 11.42800235748291, "learning_rate": 8.434436029638723e-06, "loss": 0.46984406, "memory(GiB)": 34.88, "step": 43735, "train_speed(iter/s)": 0.412072 }, { "acc": 0.90628462, "epoch": 1.1843067175696531, "grad_norm": 8.000638008117676, "learning_rate": 8.434029320823526e-06, "loss": 0.47288895, "memory(GiB)": 34.88, "step": 43740, "train_speed(iter/s)": 0.412074 }, { "acc": 0.87768631, "epoch": 1.1844420978528687, "grad_norm": 21.259336471557617, "learning_rate": 8.433622568996624e-06, "loss": 0.72528982, "memory(GiB)": 34.88, "step": 43745, "train_speed(iter/s)": 0.412077 }, { "acc": 0.88363438, "epoch": 1.1845774781360843, "grad_norm": 7.932959079742432, "learning_rate": 8.433215774163118e-06, "loss": 0.64493957, "memory(GiB)": 34.88, "step": 43750, "train_speed(iter/s)": 0.412079 }, { "acc": 0.90798016, "epoch": 1.1847128584192999, "grad_norm": 4.7396087646484375, "learning_rate": 8.432808936328103e-06, "loss": 0.43008776, "memory(GiB)": 34.88, "step": 43755, "train_speed(iter/s)": 0.412082 }, { "acc": 0.88780689, "epoch": 1.1848482387025154, "grad_norm": 9.378460884094238, "learning_rate": 8.432402055496672e-06, "loss": 0.60261827, "memory(GiB)": 34.88, "step": 43760, "train_speed(iter/s)": 0.412084 }, { "acc": 0.89543695, "epoch": 1.184983618985731, "grad_norm": 7.3449015617370605, "learning_rate": 8.431995131673924e-06, "loss": 0.58507519, "memory(GiB)": 34.88, "step": 43765, "train_speed(iter/s)": 0.412086 }, { "acc": 0.89587917, "epoch": 1.1851189992689464, "grad_norm": 11.640546798706055, "learning_rate": 8.431588164864954e-06, "loss": 0.5380455, "memory(GiB)": 34.88, "step": 43770, "train_speed(iter/s)": 0.412089 }, { "acc": 0.88934708, "epoch": 1.185254379552162, "grad_norm": 9.442830085754395, "learning_rate": 8.431181155074866e-06, "loss": 0.5716012, "memory(GiB)": 34.88, "step": 43775, "train_speed(iter/s)": 0.412091 }, { "acc": 0.88050537, "epoch": 1.1853897598353775, "grad_norm": 16.619142532348633, "learning_rate": 8.430774102308757e-06, "loss": 0.68446221, "memory(GiB)": 34.88, "step": 43780, "train_speed(iter/s)": 0.412094 }, { "acc": 0.8932745, "epoch": 1.1855251401185931, "grad_norm": 7.485621929168701, "learning_rate": 8.43036700657172e-06, "loss": 0.60569992, "memory(GiB)": 34.88, "step": 43785, "train_speed(iter/s)": 0.412096 }, { "acc": 0.89338131, "epoch": 1.1856605204018087, "grad_norm": 10.971394538879395, "learning_rate": 8.429959867868862e-06, "loss": 0.63838754, "memory(GiB)": 34.88, "step": 43790, "train_speed(iter/s)": 0.412098 }, { "acc": 0.89924335, "epoch": 1.1857959006850243, "grad_norm": 6.711667537689209, "learning_rate": 8.42955268620528e-06, "loss": 0.51644702, "memory(GiB)": 34.88, "step": 43795, "train_speed(iter/s)": 0.412101 }, { "acc": 0.9060873, "epoch": 1.1859312809682399, "grad_norm": 9.092352867126465, "learning_rate": 8.429145461586075e-06, "loss": 0.47158356, "memory(GiB)": 34.88, "step": 43800, "train_speed(iter/s)": 0.412103 }, { "acc": 0.9086832, "epoch": 1.1860666612514554, "grad_norm": 9.019532203674316, "learning_rate": 8.428738194016346e-06, "loss": 0.53006115, "memory(GiB)": 34.88, "step": 43805, "train_speed(iter/s)": 0.412105 }, { "acc": 0.85386305, "epoch": 1.186202041534671, "grad_norm": 14.36428451538086, "learning_rate": 8.4283308835012e-06, "loss": 0.74873629, "memory(GiB)": 34.88, "step": 43810, "train_speed(iter/s)": 0.412107 }, { "acc": 0.91311874, "epoch": 1.1863374218178864, "grad_norm": 7.846473217010498, "learning_rate": 8.427923530045735e-06, "loss": 0.37384694, "memory(GiB)": 34.88, "step": 43815, "train_speed(iter/s)": 0.41211 }, { "acc": 0.88757677, "epoch": 1.186472802101102, "grad_norm": 10.497882843017578, "learning_rate": 8.427516133655056e-06, "loss": 0.52347445, "memory(GiB)": 34.88, "step": 43820, "train_speed(iter/s)": 0.412112 }, { "acc": 0.8617487, "epoch": 1.1866081823843175, "grad_norm": 7.701376914978027, "learning_rate": 8.427108694334265e-06, "loss": 0.77829552, "memory(GiB)": 34.88, "step": 43825, "train_speed(iter/s)": 0.412115 }, { "acc": 0.85633125, "epoch": 1.1867435626675331, "grad_norm": 17.73583221435547, "learning_rate": 8.426701212088469e-06, "loss": 0.68550863, "memory(GiB)": 34.88, "step": 43830, "train_speed(iter/s)": 0.412117 }, { "acc": 0.89039679, "epoch": 1.1868789429507487, "grad_norm": 10.073110580444336, "learning_rate": 8.426293686922768e-06, "loss": 0.56090665, "memory(GiB)": 34.88, "step": 43835, "train_speed(iter/s)": 0.41212 }, { "acc": 0.92005119, "epoch": 1.1870143232339643, "grad_norm": 8.920726776123047, "learning_rate": 8.42588611884227e-06, "loss": 0.39247696, "memory(GiB)": 34.88, "step": 43840, "train_speed(iter/s)": 0.412122 }, { "acc": 0.90181103, "epoch": 1.1871497035171799, "grad_norm": 9.447102546691895, "learning_rate": 8.42547850785208e-06, "loss": 0.59697547, "memory(GiB)": 34.88, "step": 43845, "train_speed(iter/s)": 0.412125 }, { "acc": 0.88198528, "epoch": 1.1872850838003952, "grad_norm": 11.951092720031738, "learning_rate": 8.425070853957304e-06, "loss": 0.63997478, "memory(GiB)": 34.88, "step": 43850, "train_speed(iter/s)": 0.412127 }, { "acc": 0.89141197, "epoch": 1.1874204640836108, "grad_norm": 7.981534481048584, "learning_rate": 8.424663157163051e-06, "loss": 0.60744157, "memory(GiB)": 34.88, "step": 43855, "train_speed(iter/s)": 0.412129 }, { "acc": 0.90156803, "epoch": 1.1875558443668264, "grad_norm": 6.487203598022461, "learning_rate": 8.424255417474423e-06, "loss": 0.48580952, "memory(GiB)": 34.88, "step": 43860, "train_speed(iter/s)": 0.412132 }, { "acc": 0.87880058, "epoch": 1.187691224650042, "grad_norm": 8.407318115234375, "learning_rate": 8.423847634896533e-06, "loss": 0.73369102, "memory(GiB)": 34.88, "step": 43865, "train_speed(iter/s)": 0.412134 }, { "acc": 0.87424297, "epoch": 1.1878266049332575, "grad_norm": 12.614293098449707, "learning_rate": 8.423439809434484e-06, "loss": 0.69348774, "memory(GiB)": 34.88, "step": 43870, "train_speed(iter/s)": 0.412137 }, { "acc": 0.90699062, "epoch": 1.1879619852164731, "grad_norm": 5.619579315185547, "learning_rate": 8.42303194109339e-06, "loss": 0.45600405, "memory(GiB)": 34.88, "step": 43875, "train_speed(iter/s)": 0.412139 }, { "acc": 0.89286137, "epoch": 1.1880973654996887, "grad_norm": 6.150899410247803, "learning_rate": 8.422624029878359e-06, "loss": 0.5486948, "memory(GiB)": 34.88, "step": 43880, "train_speed(iter/s)": 0.412142 }, { "acc": 0.88742619, "epoch": 1.188232745782904, "grad_norm": 18.55419921875, "learning_rate": 8.4222160757945e-06, "loss": 0.60880013, "memory(GiB)": 34.88, "step": 43885, "train_speed(iter/s)": 0.412144 }, { "acc": 0.90582161, "epoch": 1.1883681260661196, "grad_norm": 6.6348772048950195, "learning_rate": 8.421808078846922e-06, "loss": 0.5033597, "memory(GiB)": 34.88, "step": 43890, "train_speed(iter/s)": 0.412147 }, { "acc": 0.89723358, "epoch": 1.1885035063493352, "grad_norm": 11.24925708770752, "learning_rate": 8.421400039040737e-06, "loss": 0.6061471, "memory(GiB)": 34.88, "step": 43895, "train_speed(iter/s)": 0.412149 }, { "acc": 0.90855579, "epoch": 1.1886388866325508, "grad_norm": 5.088613510131836, "learning_rate": 8.42099195638106e-06, "loss": 0.40987473, "memory(GiB)": 34.88, "step": 43900, "train_speed(iter/s)": 0.412151 }, { "acc": 0.87840939, "epoch": 1.1887742669157664, "grad_norm": 5.96268367767334, "learning_rate": 8.420583830872997e-06, "loss": 0.71067028, "memory(GiB)": 34.88, "step": 43905, "train_speed(iter/s)": 0.412154 }, { "acc": 0.88834314, "epoch": 1.188909647198982, "grad_norm": 11.736440658569336, "learning_rate": 8.420175662521662e-06, "loss": 0.53526402, "memory(GiB)": 34.88, "step": 43910, "train_speed(iter/s)": 0.412156 }, { "acc": 0.88327742, "epoch": 1.1890450274821975, "grad_norm": 13.587696075439453, "learning_rate": 8.419767451332173e-06, "loss": 0.67475777, "memory(GiB)": 34.88, "step": 43915, "train_speed(iter/s)": 0.412158 }, { "acc": 0.91283779, "epoch": 1.1891804077654131, "grad_norm": 4.4707417488098145, "learning_rate": 8.419359197309641e-06, "loss": 0.36634388, "memory(GiB)": 34.88, "step": 43920, "train_speed(iter/s)": 0.412161 }, { "acc": 0.90206594, "epoch": 1.1893157880486287, "grad_norm": 9.368577003479004, "learning_rate": 8.418950900459176e-06, "loss": 0.54909182, "memory(GiB)": 34.88, "step": 43925, "train_speed(iter/s)": 0.412163 }, { "acc": 0.89625149, "epoch": 1.189451168331844, "grad_norm": 11.563629150390625, "learning_rate": 8.418542560785899e-06, "loss": 0.5062777, "memory(GiB)": 34.88, "step": 43930, "train_speed(iter/s)": 0.412165 }, { "acc": 0.87932968, "epoch": 1.1895865486150596, "grad_norm": 3.1106348037719727, "learning_rate": 8.41813417829492e-06, "loss": 0.58587761, "memory(GiB)": 34.88, "step": 43935, "train_speed(iter/s)": 0.412168 }, { "acc": 0.88399734, "epoch": 1.1897219288982752, "grad_norm": 9.876896858215332, "learning_rate": 8.41772575299136e-06, "loss": 0.66984396, "memory(GiB)": 34.88, "step": 43940, "train_speed(iter/s)": 0.41217 }, { "acc": 0.89541931, "epoch": 1.1898573091814908, "grad_norm": 13.311366081237793, "learning_rate": 8.417317284880333e-06, "loss": 0.52081804, "memory(GiB)": 34.88, "step": 43945, "train_speed(iter/s)": 0.412173 }, { "acc": 0.87516613, "epoch": 1.1899926894647064, "grad_norm": 16.807043075561523, "learning_rate": 8.416908773966953e-06, "loss": 0.62172508, "memory(GiB)": 34.88, "step": 43950, "train_speed(iter/s)": 0.412175 }, { "acc": 0.87906141, "epoch": 1.190128069747922, "grad_norm": 12.709375381469727, "learning_rate": 8.41650022025634e-06, "loss": 0.56379495, "memory(GiB)": 34.88, "step": 43955, "train_speed(iter/s)": 0.412178 }, { "acc": 0.88014374, "epoch": 1.1902634500311375, "grad_norm": 10.55040168762207, "learning_rate": 8.416091623753613e-06, "loss": 0.63748155, "memory(GiB)": 34.88, "step": 43960, "train_speed(iter/s)": 0.41218 }, { "acc": 0.89481392, "epoch": 1.190398830314353, "grad_norm": 8.215100288391113, "learning_rate": 8.41568298446389e-06, "loss": 0.47727213, "memory(GiB)": 34.88, "step": 43965, "train_speed(iter/s)": 0.412182 }, { "acc": 0.89261208, "epoch": 1.1905342105975685, "grad_norm": 11.765375137329102, "learning_rate": 8.415274302392288e-06, "loss": 0.51084194, "memory(GiB)": 34.88, "step": 43970, "train_speed(iter/s)": 0.412185 }, { "acc": 0.90236034, "epoch": 1.190669590880784, "grad_norm": 13.007438659667969, "learning_rate": 8.414865577543929e-06, "loss": 0.54222641, "memory(GiB)": 34.88, "step": 43975, "train_speed(iter/s)": 0.412187 }, { "acc": 0.88302746, "epoch": 1.1908049711639996, "grad_norm": 13.53380012512207, "learning_rate": 8.414456809923929e-06, "loss": 0.6236639, "memory(GiB)": 34.88, "step": 43980, "train_speed(iter/s)": 0.41219 }, { "acc": 0.88961334, "epoch": 1.1909403514472152, "grad_norm": 7.5755696296691895, "learning_rate": 8.414047999537414e-06, "loss": 0.53459592, "memory(GiB)": 34.88, "step": 43985, "train_speed(iter/s)": 0.412192 }, { "acc": 0.89566669, "epoch": 1.1910757317304308, "grad_norm": 6.034481525421143, "learning_rate": 8.413639146389503e-06, "loss": 0.5020257, "memory(GiB)": 34.88, "step": 43990, "train_speed(iter/s)": 0.412195 }, { "acc": 0.88727531, "epoch": 1.1912111120136464, "grad_norm": 7.795986652374268, "learning_rate": 8.413230250485316e-06, "loss": 0.55323648, "memory(GiB)": 34.88, "step": 43995, "train_speed(iter/s)": 0.412197 }, { "acc": 0.8934021, "epoch": 1.191346492296862, "grad_norm": 8.582303047180176, "learning_rate": 8.412821311829978e-06, "loss": 0.58853741, "memory(GiB)": 34.88, "step": 44000, "train_speed(iter/s)": 0.412199 }, { "acc": 0.88114738, "epoch": 1.1914818725800775, "grad_norm": 25.4664249420166, "learning_rate": 8.41241233042861e-06, "loss": 0.66245193, "memory(GiB)": 34.88, "step": 44005, "train_speed(iter/s)": 0.412201 }, { "acc": 0.91560154, "epoch": 1.191617252863293, "grad_norm": 4.809462070465088, "learning_rate": 8.412003306286334e-06, "loss": 0.45020776, "memory(GiB)": 34.88, "step": 44010, "train_speed(iter/s)": 0.412204 }, { "acc": 0.88990784, "epoch": 1.1917526331465085, "grad_norm": 5.907113552093506, "learning_rate": 8.411594239408277e-06, "loss": 0.55375309, "memory(GiB)": 34.88, "step": 44015, "train_speed(iter/s)": 0.412206 }, { "acc": 0.88229027, "epoch": 1.191888013429724, "grad_norm": 6.944558620452881, "learning_rate": 8.411185129799563e-06, "loss": 0.62100191, "memory(GiB)": 34.88, "step": 44020, "train_speed(iter/s)": 0.412209 }, { "acc": 0.90774984, "epoch": 1.1920233937129396, "grad_norm": 12.525349617004395, "learning_rate": 8.410775977465314e-06, "loss": 0.42063656, "memory(GiB)": 34.88, "step": 44025, "train_speed(iter/s)": 0.412211 }, { "acc": 0.88611183, "epoch": 1.1921587739961552, "grad_norm": 10.575113296508789, "learning_rate": 8.410366782410658e-06, "loss": 0.63519926, "memory(GiB)": 34.88, "step": 44030, "train_speed(iter/s)": 0.412214 }, { "acc": 0.88948059, "epoch": 1.1922941542793708, "grad_norm": 11.060487747192383, "learning_rate": 8.40995754464072e-06, "loss": 0.52738967, "memory(GiB)": 34.88, "step": 44035, "train_speed(iter/s)": 0.412216 }, { "acc": 0.86398392, "epoch": 1.1924295345625864, "grad_norm": 10.440657615661621, "learning_rate": 8.409548264160626e-06, "loss": 0.77587285, "memory(GiB)": 34.88, "step": 44040, "train_speed(iter/s)": 0.412219 }, { "acc": 0.88810434, "epoch": 1.1925649148458017, "grad_norm": 8.219149589538574, "learning_rate": 8.409138940975504e-06, "loss": 0.68771167, "memory(GiB)": 34.88, "step": 44045, "train_speed(iter/s)": 0.412221 }, { "acc": 0.90321131, "epoch": 1.1927002951290173, "grad_norm": 10.89097785949707, "learning_rate": 8.408729575090482e-06, "loss": 0.50425053, "memory(GiB)": 34.88, "step": 44050, "train_speed(iter/s)": 0.412224 }, { "acc": 0.88724003, "epoch": 1.192835675412233, "grad_norm": 24.77772331237793, "learning_rate": 8.408320166510686e-06, "loss": 0.62110214, "memory(GiB)": 34.88, "step": 44055, "train_speed(iter/s)": 0.412226 }, { "acc": 0.89364548, "epoch": 1.1929710556954485, "grad_norm": 4.136007308959961, "learning_rate": 8.407910715241246e-06, "loss": 0.51725092, "memory(GiB)": 34.88, "step": 44060, "train_speed(iter/s)": 0.412229 }, { "acc": 0.88397846, "epoch": 1.193106435978664, "grad_norm": 14.983711242675781, "learning_rate": 8.407501221287293e-06, "loss": 0.6592536, "memory(GiB)": 34.88, "step": 44065, "train_speed(iter/s)": 0.412231 }, { "acc": 0.88395166, "epoch": 1.1932418162618796, "grad_norm": 6.81941032409668, "learning_rate": 8.407091684653955e-06, "loss": 0.55270739, "memory(GiB)": 34.88, "step": 44070, "train_speed(iter/s)": 0.412233 }, { "acc": 0.90232296, "epoch": 1.1933771965450952, "grad_norm": 8.190174102783203, "learning_rate": 8.406682105346363e-06, "loss": 0.56297174, "memory(GiB)": 34.88, "step": 44075, "train_speed(iter/s)": 0.412236 }, { "acc": 0.89454231, "epoch": 1.1935125768283108, "grad_norm": 10.30691146850586, "learning_rate": 8.406272483369642e-06, "loss": 0.59572458, "memory(GiB)": 34.88, "step": 44080, "train_speed(iter/s)": 0.412238 }, { "acc": 0.8788063, "epoch": 1.1936479571115264, "grad_norm": 7.790683269500732, "learning_rate": 8.405862818728934e-06, "loss": 0.61394978, "memory(GiB)": 34.88, "step": 44085, "train_speed(iter/s)": 0.412241 }, { "acc": 0.89151382, "epoch": 1.1937833373947417, "grad_norm": 7.3151702880859375, "learning_rate": 8.405453111429362e-06, "loss": 0.55115156, "memory(GiB)": 34.88, "step": 44090, "train_speed(iter/s)": 0.412243 }, { "acc": 0.88336163, "epoch": 1.1939187176779573, "grad_norm": 10.367741584777832, "learning_rate": 8.405043361476065e-06, "loss": 0.6333744, "memory(GiB)": 34.88, "step": 44095, "train_speed(iter/s)": 0.412245 }, { "acc": 0.90745239, "epoch": 1.194054097961173, "grad_norm": 11.141806602478027, "learning_rate": 8.404633568874167e-06, "loss": 0.49657159, "memory(GiB)": 34.88, "step": 44100, "train_speed(iter/s)": 0.412247 }, { "acc": 0.87856703, "epoch": 1.1941894782443885, "grad_norm": 5.783288955688477, "learning_rate": 8.40422373362881e-06, "loss": 0.74065833, "memory(GiB)": 34.88, "step": 44105, "train_speed(iter/s)": 0.412249 }, { "acc": 0.88008423, "epoch": 1.194324858527604, "grad_norm": 16.125320434570312, "learning_rate": 8.403813855745125e-06, "loss": 0.72880783, "memory(GiB)": 34.88, "step": 44110, "train_speed(iter/s)": 0.412252 }, { "acc": 0.89580803, "epoch": 1.1944602388108196, "grad_norm": 7.190464496612549, "learning_rate": 8.403403935228245e-06, "loss": 0.57367439, "memory(GiB)": 34.88, "step": 44115, "train_speed(iter/s)": 0.412254 }, { "acc": 0.87968102, "epoch": 1.1945956190940352, "grad_norm": 9.328902244567871, "learning_rate": 8.40299397208331e-06, "loss": 0.6026412, "memory(GiB)": 34.88, "step": 44120, "train_speed(iter/s)": 0.412257 }, { "acc": 0.90345516, "epoch": 1.1947309993772506, "grad_norm": 3.9160878658294678, "learning_rate": 8.402583966315449e-06, "loss": 0.51877546, "memory(GiB)": 34.88, "step": 44125, "train_speed(iter/s)": 0.412259 }, { "acc": 0.87760305, "epoch": 1.1948663796604662, "grad_norm": 14.483734130859375, "learning_rate": 8.402173917929803e-06, "loss": 0.72681427, "memory(GiB)": 34.88, "step": 44130, "train_speed(iter/s)": 0.412261 }, { "acc": 0.88210068, "epoch": 1.1950017599436817, "grad_norm": 7.164412498474121, "learning_rate": 8.401763826931505e-06, "loss": 0.70218949, "memory(GiB)": 34.88, "step": 44135, "train_speed(iter/s)": 0.412263 }, { "acc": 0.8941534, "epoch": 1.1951371402268973, "grad_norm": 16.525806427001953, "learning_rate": 8.401353693325697e-06, "loss": 0.5228478, "memory(GiB)": 34.88, "step": 44140, "train_speed(iter/s)": 0.412265 }, { "acc": 0.87938557, "epoch": 1.195272520510113, "grad_norm": 17.79673194885254, "learning_rate": 8.40094351711751e-06, "loss": 0.71662993, "memory(GiB)": 34.88, "step": 44145, "train_speed(iter/s)": 0.412267 }, { "acc": 0.89667072, "epoch": 1.1954079007933285, "grad_norm": 6.8604278564453125, "learning_rate": 8.400533298312087e-06, "loss": 0.51326752, "memory(GiB)": 34.88, "step": 44150, "train_speed(iter/s)": 0.412269 }, { "acc": 0.89475174, "epoch": 1.195543281076544, "grad_norm": 5.6418867111206055, "learning_rate": 8.400123036914569e-06, "loss": 0.54342384, "memory(GiB)": 34.88, "step": 44155, "train_speed(iter/s)": 0.412271 }, { "acc": 0.88916435, "epoch": 1.1956786613597596, "grad_norm": 8.584747314453125, "learning_rate": 8.399712732930089e-06, "loss": 0.59928594, "memory(GiB)": 34.88, "step": 44160, "train_speed(iter/s)": 0.412273 }, { "acc": 0.9020689, "epoch": 1.1958140416429752, "grad_norm": 8.950634002685547, "learning_rate": 8.39930238636379e-06, "loss": 0.48135066, "memory(GiB)": 34.88, "step": 44165, "train_speed(iter/s)": 0.412276 }, { "acc": 0.88421087, "epoch": 1.1959494219261906, "grad_norm": 5.302734375, "learning_rate": 8.398891997220815e-06, "loss": 0.63193674, "memory(GiB)": 34.88, "step": 44170, "train_speed(iter/s)": 0.412278 }, { "acc": 0.88968906, "epoch": 1.1960848022094062, "grad_norm": 7.78537654876709, "learning_rate": 8.398481565506303e-06, "loss": 0.60797815, "memory(GiB)": 34.88, "step": 44175, "train_speed(iter/s)": 0.41228 }, { "acc": 0.86639118, "epoch": 1.1962201824926217, "grad_norm": 5.219127655029297, "learning_rate": 8.398071091225393e-06, "loss": 0.72925491, "memory(GiB)": 34.88, "step": 44180, "train_speed(iter/s)": 0.412283 }, { "acc": 0.90809746, "epoch": 1.1963555627758373, "grad_norm": 7.024106979370117, "learning_rate": 8.397660574383228e-06, "loss": 0.439744, "memory(GiB)": 34.88, "step": 44185, "train_speed(iter/s)": 0.412285 }, { "acc": 0.89524994, "epoch": 1.196490943059053, "grad_norm": 9.460376739501953, "learning_rate": 8.397250014984953e-06, "loss": 0.61889153, "memory(GiB)": 34.88, "step": 44190, "train_speed(iter/s)": 0.412288 }, { "acc": 0.89696054, "epoch": 1.1966263233422685, "grad_norm": 6.082360744476318, "learning_rate": 8.396839413035707e-06, "loss": 0.53842897, "memory(GiB)": 34.88, "step": 44195, "train_speed(iter/s)": 0.41229 }, { "acc": 0.88904305, "epoch": 1.196761703625484, "grad_norm": 7.878046989440918, "learning_rate": 8.396428768540638e-06, "loss": 0.6443368, "memory(GiB)": 34.88, "step": 44200, "train_speed(iter/s)": 0.412293 }, { "acc": 0.88088894, "epoch": 1.1968970839086994, "grad_norm": 5.320539951324463, "learning_rate": 8.396018081504888e-06, "loss": 0.60989637, "memory(GiB)": 34.88, "step": 44205, "train_speed(iter/s)": 0.412295 }, { "acc": 0.89980927, "epoch": 1.197032464191915, "grad_norm": 20.647207260131836, "learning_rate": 8.395607351933603e-06, "loss": 0.51710882, "memory(GiB)": 34.88, "step": 44210, "train_speed(iter/s)": 0.412298 }, { "acc": 0.89964695, "epoch": 1.1971678444751306, "grad_norm": 5.7040228843688965, "learning_rate": 8.395196579831925e-06, "loss": 0.5656055, "memory(GiB)": 34.88, "step": 44215, "train_speed(iter/s)": 0.4123 }, { "acc": 0.89012184, "epoch": 1.1973032247583462, "grad_norm": 6.130700588226318, "learning_rate": 8.394785765205003e-06, "loss": 0.58116698, "memory(GiB)": 34.88, "step": 44220, "train_speed(iter/s)": 0.412303 }, { "acc": 0.90326347, "epoch": 1.1974386050415617, "grad_norm": 16.363985061645508, "learning_rate": 8.394374908057982e-06, "loss": 0.45016389, "memory(GiB)": 34.88, "step": 44225, "train_speed(iter/s)": 0.412305 }, { "acc": 0.89682903, "epoch": 1.1975739853247773, "grad_norm": 3.7317628860473633, "learning_rate": 8.393964008396007e-06, "loss": 0.53325567, "memory(GiB)": 34.88, "step": 44230, "train_speed(iter/s)": 0.412307 }, { "acc": 0.91095085, "epoch": 1.197709365607993, "grad_norm": 3.2234253883361816, "learning_rate": 8.39355306622423e-06, "loss": 0.41257944, "memory(GiB)": 34.88, "step": 44235, "train_speed(iter/s)": 0.41231 }, { "acc": 0.8822155, "epoch": 1.1978447458912085, "grad_norm": 14.99785041809082, "learning_rate": 8.393142081547793e-06, "loss": 0.67070661, "memory(GiB)": 34.88, "step": 44240, "train_speed(iter/s)": 0.412312 }, { "acc": 0.87443218, "epoch": 1.197980126174424, "grad_norm": 10.77750301361084, "learning_rate": 8.39273105437185e-06, "loss": 0.71208687, "memory(GiB)": 34.88, "step": 44245, "train_speed(iter/s)": 0.412315 }, { "acc": 0.89348993, "epoch": 1.1981155064576394, "grad_norm": 9.926565170288086, "learning_rate": 8.392319984701545e-06, "loss": 0.58191748, "memory(GiB)": 34.88, "step": 44250, "train_speed(iter/s)": 0.412317 }, { "acc": 0.86204844, "epoch": 1.198250886740855, "grad_norm": 4.953943252563477, "learning_rate": 8.391908872542033e-06, "loss": 0.78495522, "memory(GiB)": 34.88, "step": 44255, "train_speed(iter/s)": 0.412319 }, { "acc": 0.87927933, "epoch": 1.1983862670240706, "grad_norm": 8.32807731628418, "learning_rate": 8.391497717898457e-06, "loss": 0.60193701, "memory(GiB)": 34.88, "step": 44260, "train_speed(iter/s)": 0.412322 }, { "acc": 0.87907715, "epoch": 1.1985216473072862, "grad_norm": 9.093805313110352, "learning_rate": 8.391086520775975e-06, "loss": 0.73420796, "memory(GiB)": 34.88, "step": 44265, "train_speed(iter/s)": 0.412325 }, { "acc": 0.90367002, "epoch": 1.1986570275905017, "grad_norm": 6.768672466278076, "learning_rate": 8.390675281179731e-06, "loss": 0.52226338, "memory(GiB)": 34.88, "step": 44270, "train_speed(iter/s)": 0.412327 }, { "acc": 0.90067215, "epoch": 1.1987924078737173, "grad_norm": 5.788731575012207, "learning_rate": 8.390263999114883e-06, "loss": 0.51774302, "memory(GiB)": 34.88, "step": 44275, "train_speed(iter/s)": 0.412329 }, { "acc": 0.90672512, "epoch": 1.198927788156933, "grad_norm": 4.640342712402344, "learning_rate": 8.389852674586578e-06, "loss": 0.43882341, "memory(GiB)": 34.88, "step": 44280, "train_speed(iter/s)": 0.412332 }, { "acc": 0.8694541, "epoch": 1.1990631684401483, "grad_norm": 5.829956531524658, "learning_rate": 8.389441307599971e-06, "loss": 0.75739231, "memory(GiB)": 34.88, "step": 44285, "train_speed(iter/s)": 0.412334 }, { "acc": 0.91625299, "epoch": 1.1991985487233638, "grad_norm": 8.358990669250488, "learning_rate": 8.389029898160217e-06, "loss": 0.44886961, "memory(GiB)": 34.88, "step": 44290, "train_speed(iter/s)": 0.412336 }, { "acc": 0.88520832, "epoch": 1.1993339290065794, "grad_norm": 6.330922603607178, "learning_rate": 8.388618446272463e-06, "loss": 0.51697283, "memory(GiB)": 34.88, "step": 44295, "train_speed(iter/s)": 0.412339 }, { "acc": 0.87971115, "epoch": 1.199469309289795, "grad_norm": 8.622552871704102, "learning_rate": 8.388206951941871e-06, "loss": 0.57038865, "memory(GiB)": 34.88, "step": 44300, "train_speed(iter/s)": 0.412341 }, { "acc": 0.90964947, "epoch": 1.1996046895730106, "grad_norm": 9.230785369873047, "learning_rate": 8.387795415173591e-06, "loss": 0.4169569, "memory(GiB)": 34.88, "step": 44305, "train_speed(iter/s)": 0.412344 }, { "acc": 0.86903553, "epoch": 1.1997400698562262, "grad_norm": 13.867243766784668, "learning_rate": 8.387383835972783e-06, "loss": 0.64529314, "memory(GiB)": 34.88, "step": 44310, "train_speed(iter/s)": 0.412346 }, { "acc": 0.86945734, "epoch": 1.1998754501394417, "grad_norm": 11.264266014099121, "learning_rate": 8.386972214344596e-06, "loss": 0.73426113, "memory(GiB)": 34.88, "step": 44315, "train_speed(iter/s)": 0.412349 }, { "acc": 0.89494753, "epoch": 1.2000108304226573, "grad_norm": 7.939225673675537, "learning_rate": 8.386560550294193e-06, "loss": 0.5064332, "memory(GiB)": 34.88, "step": 44320, "train_speed(iter/s)": 0.412351 }, { "acc": 0.89608135, "epoch": 1.200146210705873, "grad_norm": 6.342667102813721, "learning_rate": 8.386148843826726e-06, "loss": 0.62303386, "memory(GiB)": 34.88, "step": 44325, "train_speed(iter/s)": 0.412353 }, { "acc": 0.88568239, "epoch": 1.2002815909890883, "grad_norm": 7.497658729553223, "learning_rate": 8.385737094947355e-06, "loss": 0.59769459, "memory(GiB)": 34.88, "step": 44330, "train_speed(iter/s)": 0.412356 }, { "acc": 0.89799728, "epoch": 1.2004169712723038, "grad_norm": 4.526721477508545, "learning_rate": 8.385325303661238e-06, "loss": 0.54604268, "memory(GiB)": 34.88, "step": 44335, "train_speed(iter/s)": 0.412358 }, { "acc": 0.89257717, "epoch": 1.2005523515555194, "grad_norm": 9.351029396057129, "learning_rate": 8.384913469973533e-06, "loss": 0.58421693, "memory(GiB)": 34.88, "step": 44340, "train_speed(iter/s)": 0.41236 }, { "acc": 0.87631178, "epoch": 1.200687731838735, "grad_norm": 12.196921348571777, "learning_rate": 8.384501593889397e-06, "loss": 0.73713756, "memory(GiB)": 34.88, "step": 44345, "train_speed(iter/s)": 0.412362 }, { "acc": 0.87334194, "epoch": 1.2008231121219506, "grad_norm": 10.7559175491333, "learning_rate": 8.384089675413994e-06, "loss": 0.73978987, "memory(GiB)": 34.88, "step": 44350, "train_speed(iter/s)": 0.412364 }, { "acc": 0.89013348, "epoch": 1.2009584924051662, "grad_norm": 9.78447437286377, "learning_rate": 8.38367771455248e-06, "loss": 0.57676196, "memory(GiB)": 34.88, "step": 44355, "train_speed(iter/s)": 0.412366 }, { "acc": 0.89239349, "epoch": 1.2010938726883817, "grad_norm": 10.063576698303223, "learning_rate": 8.383265711310018e-06, "loss": 0.56282983, "memory(GiB)": 34.88, "step": 44360, "train_speed(iter/s)": 0.412369 }, { "acc": 0.89712753, "epoch": 1.201229252971597, "grad_norm": 10.086130142211914, "learning_rate": 8.382853665691767e-06, "loss": 0.52480602, "memory(GiB)": 34.88, "step": 44365, "train_speed(iter/s)": 0.412371 }, { "acc": 0.90220623, "epoch": 1.2013646332548127, "grad_norm": 5.175327777862549, "learning_rate": 8.38244157770289e-06, "loss": 0.5079186, "memory(GiB)": 34.88, "step": 44370, "train_speed(iter/s)": 0.412373 }, { "acc": 0.88420744, "epoch": 1.2015000135380283, "grad_norm": 7.116308212280273, "learning_rate": 8.382029447348552e-06, "loss": 0.64646087, "memory(GiB)": 34.88, "step": 44375, "train_speed(iter/s)": 0.412376 }, { "acc": 0.89944859, "epoch": 1.2016353938212438, "grad_norm": 9.025099754333496, "learning_rate": 8.381617274633914e-06, "loss": 0.57308331, "memory(GiB)": 34.88, "step": 44380, "train_speed(iter/s)": 0.412379 }, { "acc": 0.87390118, "epoch": 1.2017707741044594, "grad_norm": 10.538647651672363, "learning_rate": 8.381205059564133e-06, "loss": 0.66186733, "memory(GiB)": 34.88, "step": 44385, "train_speed(iter/s)": 0.412381 }, { "acc": 0.90294304, "epoch": 1.201906154387675, "grad_norm": 7.814389228820801, "learning_rate": 8.38079280214438e-06, "loss": 0.45677185, "memory(GiB)": 34.88, "step": 44390, "train_speed(iter/s)": 0.412383 }, { "acc": 0.91031857, "epoch": 1.2020415346708906, "grad_norm": 9.925772666931152, "learning_rate": 8.380380502379819e-06, "loss": 0.42436018, "memory(GiB)": 34.88, "step": 44395, "train_speed(iter/s)": 0.412385 }, { "acc": 0.90027199, "epoch": 1.2021769149541062, "grad_norm": 4.371811389923096, "learning_rate": 8.379968160275612e-06, "loss": 0.48018055, "memory(GiB)": 34.88, "step": 44400, "train_speed(iter/s)": 0.412388 }, { "acc": 0.88588982, "epoch": 1.2023122952373217, "grad_norm": 12.641053199768066, "learning_rate": 8.379555775836927e-06, "loss": 0.48155909, "memory(GiB)": 34.88, "step": 44405, "train_speed(iter/s)": 0.41239 }, { "acc": 0.88999939, "epoch": 1.202447675520537, "grad_norm": 13.39423656463623, "learning_rate": 8.379143349068927e-06, "loss": 0.53434372, "memory(GiB)": 34.88, "step": 44410, "train_speed(iter/s)": 0.412392 }, { "acc": 0.88617764, "epoch": 1.2025830558037527, "grad_norm": 14.076208114624023, "learning_rate": 8.378730879976781e-06, "loss": 0.57572622, "memory(GiB)": 34.88, "step": 44415, "train_speed(iter/s)": 0.412394 }, { "acc": 0.88634682, "epoch": 1.2027184360869683, "grad_norm": 16.32305145263672, "learning_rate": 8.378318368565653e-06, "loss": 0.60951858, "memory(GiB)": 34.88, "step": 44420, "train_speed(iter/s)": 0.412397 }, { "acc": 0.87859669, "epoch": 1.2028538163701838, "grad_norm": 10.402164459228516, "learning_rate": 8.377905814840715e-06, "loss": 0.5958447, "memory(GiB)": 34.88, "step": 44425, "train_speed(iter/s)": 0.412399 }, { "acc": 0.88958073, "epoch": 1.2029891966533994, "grad_norm": 6.961845874786377, "learning_rate": 8.377493218807131e-06, "loss": 0.59114151, "memory(GiB)": 34.88, "step": 44430, "train_speed(iter/s)": 0.412401 }, { "acc": 0.88184338, "epoch": 1.203124576936615, "grad_norm": 9.287931442260742, "learning_rate": 8.377080580470071e-06, "loss": 0.67155142, "memory(GiB)": 34.88, "step": 44435, "train_speed(iter/s)": 0.412404 }, { "acc": 0.88026562, "epoch": 1.2032599572198306, "grad_norm": 6.253005504608154, "learning_rate": 8.376667899834705e-06, "loss": 0.65049334, "memory(GiB)": 34.88, "step": 44440, "train_speed(iter/s)": 0.412406 }, { "acc": 0.87597704, "epoch": 1.203395337503046, "grad_norm": 8.252875328063965, "learning_rate": 8.376255176906202e-06, "loss": 0.70198798, "memory(GiB)": 34.88, "step": 44445, "train_speed(iter/s)": 0.412409 }, { "acc": 0.89638414, "epoch": 1.2035307177862615, "grad_norm": 9.650784492492676, "learning_rate": 8.375842411689732e-06, "loss": 0.52934999, "memory(GiB)": 34.88, "step": 44450, "train_speed(iter/s)": 0.412411 }, { "acc": 0.89132185, "epoch": 1.203666098069477, "grad_norm": 11.967308044433594, "learning_rate": 8.375429604190467e-06, "loss": 0.54981699, "memory(GiB)": 34.88, "step": 44455, "train_speed(iter/s)": 0.412413 }, { "acc": 0.88078985, "epoch": 1.2038014783526927, "grad_norm": 14.040250778198242, "learning_rate": 8.375016754413575e-06, "loss": 0.56538529, "memory(GiB)": 34.88, "step": 44460, "train_speed(iter/s)": 0.412416 }, { "acc": 0.88696213, "epoch": 1.2039368586359083, "grad_norm": 6.967922210693359, "learning_rate": 8.374603862364231e-06, "loss": 0.63090544, "memory(GiB)": 34.88, "step": 44465, "train_speed(iter/s)": 0.412418 }, { "acc": 0.89804916, "epoch": 1.2040722389191238, "grad_norm": 11.647133827209473, "learning_rate": 8.374190928047604e-06, "loss": 0.50184402, "memory(GiB)": 34.88, "step": 44470, "train_speed(iter/s)": 0.41242 }, { "acc": 0.90263653, "epoch": 1.2042076192023394, "grad_norm": 4.693978786468506, "learning_rate": 8.373777951468872e-06, "loss": 0.53400846, "memory(GiB)": 34.88, "step": 44475, "train_speed(iter/s)": 0.412423 }, { "acc": 0.88705502, "epoch": 1.204342999485555, "grad_norm": 12.305262565612793, "learning_rate": 8.373364932633204e-06, "loss": 0.63486834, "memory(GiB)": 34.88, "step": 44480, "train_speed(iter/s)": 0.412425 }, { "acc": 0.89817352, "epoch": 1.2044783797687706, "grad_norm": 9.727559089660645, "learning_rate": 8.372951871545774e-06, "loss": 0.54828372, "memory(GiB)": 34.88, "step": 44485, "train_speed(iter/s)": 0.412427 }, { "acc": 0.89247036, "epoch": 1.204613760051986, "grad_norm": 7.588247776031494, "learning_rate": 8.372538768211758e-06, "loss": 0.52688398, "memory(GiB)": 34.88, "step": 44490, "train_speed(iter/s)": 0.41243 }, { "acc": 0.90481739, "epoch": 1.2047491403352015, "grad_norm": 6.580171585083008, "learning_rate": 8.37212562263633e-06, "loss": 0.55145502, "memory(GiB)": 34.88, "step": 44495, "train_speed(iter/s)": 0.412432 }, { "acc": 0.87287025, "epoch": 1.204884520618417, "grad_norm": 11.72333812713623, "learning_rate": 8.371712434824668e-06, "loss": 0.76069689, "memory(GiB)": 34.88, "step": 44500, "train_speed(iter/s)": 0.412434 }, { "acc": 0.90009136, "epoch": 1.2050199009016327, "grad_norm": 5.260614395141602, "learning_rate": 8.371299204781943e-06, "loss": 0.52547598, "memory(GiB)": 34.88, "step": 44505, "train_speed(iter/s)": 0.412437 }, { "acc": 0.87898464, "epoch": 1.2051552811848483, "grad_norm": 13.65104866027832, "learning_rate": 8.370885932513336e-06, "loss": 0.61968098, "memory(GiB)": 34.88, "step": 44510, "train_speed(iter/s)": 0.412439 }, { "acc": 0.90682669, "epoch": 1.2052906614680639, "grad_norm": 5.398930072784424, "learning_rate": 8.370472618024021e-06, "loss": 0.44870071, "memory(GiB)": 34.88, "step": 44515, "train_speed(iter/s)": 0.412441 }, { "acc": 0.89344311, "epoch": 1.2054260417512794, "grad_norm": 9.322661399841309, "learning_rate": 8.370059261319177e-06, "loss": 0.60502815, "memory(GiB)": 34.88, "step": 44520, "train_speed(iter/s)": 0.412443 }, { "acc": 0.88632212, "epoch": 1.2055614220344948, "grad_norm": 15.744617462158203, "learning_rate": 8.369645862403983e-06, "loss": 0.58089247, "memory(GiB)": 34.88, "step": 44525, "train_speed(iter/s)": 0.412446 }, { "acc": 0.91107502, "epoch": 1.2056968023177104, "grad_norm": 3.436383008956909, "learning_rate": 8.369232421283617e-06, "loss": 0.39107614, "memory(GiB)": 34.88, "step": 44530, "train_speed(iter/s)": 0.412448 }, { "acc": 0.87077522, "epoch": 1.205832182600926, "grad_norm": 7.714064121246338, "learning_rate": 8.368818937963258e-06, "loss": 0.76448236, "memory(GiB)": 34.88, "step": 44535, "train_speed(iter/s)": 0.412451 }, { "acc": 0.88289165, "epoch": 1.2059675628841415, "grad_norm": 8.048662185668945, "learning_rate": 8.368405412448085e-06, "loss": 0.59660263, "memory(GiB)": 34.88, "step": 44540, "train_speed(iter/s)": 0.412453 }, { "acc": 0.90399923, "epoch": 1.2061029431673571, "grad_norm": 11.066595077514648, "learning_rate": 8.367991844743277e-06, "loss": 0.57120891, "memory(GiB)": 34.88, "step": 44545, "train_speed(iter/s)": 0.412455 }, { "acc": 0.89523382, "epoch": 1.2062383234505727, "grad_norm": 13.517491340637207, "learning_rate": 8.36757823485402e-06, "loss": 0.53805795, "memory(GiB)": 34.88, "step": 44550, "train_speed(iter/s)": 0.412457 }, { "acc": 0.887537, "epoch": 1.2063737037337883, "grad_norm": 16.235111236572266, "learning_rate": 8.36716458278549e-06, "loss": 0.64987278, "memory(GiB)": 34.88, "step": 44555, "train_speed(iter/s)": 0.412459 }, { "acc": 0.89151011, "epoch": 1.2065090840170039, "grad_norm": 7.954161643981934, "learning_rate": 8.36675088854287e-06, "loss": 0.53700819, "memory(GiB)": 34.88, "step": 44560, "train_speed(iter/s)": 0.412462 }, { "acc": 0.86252851, "epoch": 1.2066444643002194, "grad_norm": 11.108817100524902, "learning_rate": 8.366337152131343e-06, "loss": 0.70843649, "memory(GiB)": 34.88, "step": 44565, "train_speed(iter/s)": 0.412464 }, { "acc": 0.90181046, "epoch": 1.2067798445834348, "grad_norm": 9.339865684509277, "learning_rate": 8.365923373556094e-06, "loss": 0.54978547, "memory(GiB)": 34.88, "step": 44570, "train_speed(iter/s)": 0.412466 }, { "acc": 0.88462219, "epoch": 1.2069152248666504, "grad_norm": 10.150613784790039, "learning_rate": 8.365509552822302e-06, "loss": 0.60713749, "memory(GiB)": 34.88, "step": 44575, "train_speed(iter/s)": 0.412469 }, { "acc": 0.87849379, "epoch": 1.207050605149866, "grad_norm": 10.860026359558105, "learning_rate": 8.365095689935155e-06, "loss": 0.60012417, "memory(GiB)": 34.88, "step": 44580, "train_speed(iter/s)": 0.412471 }, { "acc": 0.91375628, "epoch": 1.2071859854330815, "grad_norm": 3.455784320831299, "learning_rate": 8.364681784899834e-06, "loss": 0.45177307, "memory(GiB)": 34.88, "step": 44585, "train_speed(iter/s)": 0.412473 }, { "acc": 0.89624634, "epoch": 1.2073213657162971, "grad_norm": 30.603431701660156, "learning_rate": 8.364267837721526e-06, "loss": 0.6345274, "memory(GiB)": 34.88, "step": 44590, "train_speed(iter/s)": 0.412476 }, { "acc": 0.87936382, "epoch": 1.2074567459995127, "grad_norm": 20.58879280090332, "learning_rate": 8.363853848405416e-06, "loss": 0.6364779, "memory(GiB)": 34.88, "step": 44595, "train_speed(iter/s)": 0.412477 }, { "acc": 0.89356384, "epoch": 1.2075921262827283, "grad_norm": 7.390154838562012, "learning_rate": 8.36343981695669e-06, "loss": 0.54785128, "memory(GiB)": 34.88, "step": 44600, "train_speed(iter/s)": 0.41248 }, { "acc": 0.88035984, "epoch": 1.2077275065659436, "grad_norm": 8.410329818725586, "learning_rate": 8.363025743380536e-06, "loss": 0.69973621, "memory(GiB)": 34.88, "step": 44605, "train_speed(iter/s)": 0.412482 }, { "acc": 0.87908516, "epoch": 1.2078628868491592, "grad_norm": 9.484962463378906, "learning_rate": 8.362611627682138e-06, "loss": 0.62321587, "memory(GiB)": 34.88, "step": 44610, "train_speed(iter/s)": 0.412484 }, { "acc": 0.8954998, "epoch": 1.2079982671323748, "grad_norm": 9.08710765838623, "learning_rate": 8.362197469866685e-06, "loss": 0.58174987, "memory(GiB)": 34.88, "step": 44615, "train_speed(iter/s)": 0.412487 }, { "acc": 0.89030523, "epoch": 1.2081336474155904, "grad_norm": 14.867067337036133, "learning_rate": 8.361783269939367e-06, "loss": 0.51819096, "memory(GiB)": 34.88, "step": 44620, "train_speed(iter/s)": 0.412489 }, { "acc": 0.88742294, "epoch": 1.208269027698806, "grad_norm": 7.961309432983398, "learning_rate": 8.36136902790537e-06, "loss": 0.60855784, "memory(GiB)": 34.88, "step": 44625, "train_speed(iter/s)": 0.412491 }, { "acc": 0.9092577, "epoch": 1.2084044079820215, "grad_norm": 12.170244216918945, "learning_rate": 8.360954743769884e-06, "loss": 0.48976021, "memory(GiB)": 34.88, "step": 44630, "train_speed(iter/s)": 0.412494 }, { "acc": 0.88575459, "epoch": 1.2085397882652371, "grad_norm": 5.89935302734375, "learning_rate": 8.3605404175381e-06, "loss": 0.54691782, "memory(GiB)": 34.88, "step": 44635, "train_speed(iter/s)": 0.412496 }, { "acc": 0.90023232, "epoch": 1.2086751685484527, "grad_norm": 30.560697555541992, "learning_rate": 8.360126049215207e-06, "loss": 0.58592305, "memory(GiB)": 34.88, "step": 44640, "train_speed(iter/s)": 0.412499 }, { "acc": 0.88979359, "epoch": 1.2088105488316683, "grad_norm": 8.527091026306152, "learning_rate": 8.359711638806396e-06, "loss": 0.57588463, "memory(GiB)": 34.88, "step": 44645, "train_speed(iter/s)": 0.412501 }, { "acc": 0.91695671, "epoch": 1.2089459291148836, "grad_norm": 5.012368679046631, "learning_rate": 8.35929718631686e-06, "loss": 0.43584523, "memory(GiB)": 34.88, "step": 44650, "train_speed(iter/s)": 0.412503 }, { "acc": 0.89668217, "epoch": 1.2090813093980992, "grad_norm": 9.4873046875, "learning_rate": 8.358882691751787e-06, "loss": 0.53736949, "memory(GiB)": 34.88, "step": 44655, "train_speed(iter/s)": 0.412506 }, { "acc": 0.87471504, "epoch": 1.2092166896813148, "grad_norm": 9.199000358581543, "learning_rate": 8.358468155116373e-06, "loss": 0.7782856, "memory(GiB)": 34.88, "step": 44660, "train_speed(iter/s)": 0.412508 }, { "acc": 0.90612812, "epoch": 1.2093520699645304, "grad_norm": 5.858270168304443, "learning_rate": 8.358053576415807e-06, "loss": 0.47522612, "memory(GiB)": 34.88, "step": 44665, "train_speed(iter/s)": 0.41251 }, { "acc": 0.89761734, "epoch": 1.209487450247746, "grad_norm": 14.96181583404541, "learning_rate": 8.357638955655287e-06, "loss": 0.58795338, "memory(GiB)": 34.88, "step": 44670, "train_speed(iter/s)": 0.412513 }, { "acc": 0.88149605, "epoch": 1.2096228305309615, "grad_norm": 6.964900493621826, "learning_rate": 8.357224292840004e-06, "loss": 0.7026504, "memory(GiB)": 34.88, "step": 44675, "train_speed(iter/s)": 0.412515 }, { "acc": 0.85749378, "epoch": 1.2097582108141771, "grad_norm": 9.694719314575195, "learning_rate": 8.356809587975155e-06, "loss": 0.77720022, "memory(GiB)": 34.88, "step": 44680, "train_speed(iter/s)": 0.412518 }, { "acc": 0.90204697, "epoch": 1.2098935910973925, "grad_norm": 11.865130424499512, "learning_rate": 8.356394841065931e-06, "loss": 0.56944551, "memory(GiB)": 34.88, "step": 44685, "train_speed(iter/s)": 0.41252 }, { "acc": 0.8761219, "epoch": 1.210028971380608, "grad_norm": 13.004700660705566, "learning_rate": 8.355980052117532e-06, "loss": 0.7052835, "memory(GiB)": 34.88, "step": 44690, "train_speed(iter/s)": 0.412522 }, { "acc": 0.87674751, "epoch": 1.2101643516638236, "grad_norm": 9.796695709228516, "learning_rate": 8.355565221135152e-06, "loss": 0.69204464, "memory(GiB)": 34.88, "step": 44695, "train_speed(iter/s)": 0.412524 }, { "acc": 0.887216, "epoch": 1.2102997319470392, "grad_norm": 11.92297649383545, "learning_rate": 8.355150348123984e-06, "loss": 0.60537348, "memory(GiB)": 34.88, "step": 44700, "train_speed(iter/s)": 0.412527 }, { "acc": 0.90873508, "epoch": 1.2104351122302548, "grad_norm": 6.733121395111084, "learning_rate": 8.35473543308923e-06, "loss": 0.51974506, "memory(GiB)": 34.88, "step": 44705, "train_speed(iter/s)": 0.412529 }, { "acc": 0.8689002, "epoch": 1.2105704925134704, "grad_norm": 8.603499412536621, "learning_rate": 8.354320476036088e-06, "loss": 0.74975867, "memory(GiB)": 34.88, "step": 44710, "train_speed(iter/s)": 0.412531 }, { "acc": 0.88195019, "epoch": 1.210705872796686, "grad_norm": 8.190319061279297, "learning_rate": 8.353905476969753e-06, "loss": 0.54762611, "memory(GiB)": 34.88, "step": 44715, "train_speed(iter/s)": 0.412533 }, { "acc": 0.88985882, "epoch": 1.2108412530799015, "grad_norm": 11.252847671508789, "learning_rate": 8.353490435895425e-06, "loss": 0.61601586, "memory(GiB)": 34.88, "step": 44720, "train_speed(iter/s)": 0.412536 }, { "acc": 0.88994627, "epoch": 1.2109766333631171, "grad_norm": 9.707511901855469, "learning_rate": 8.353075352818303e-06, "loss": 0.52616868, "memory(GiB)": 34.88, "step": 44725, "train_speed(iter/s)": 0.412538 }, { "acc": 0.89718113, "epoch": 1.2111120136463325, "grad_norm": 13.471646308898926, "learning_rate": 8.352660227743586e-06, "loss": 0.58772402, "memory(GiB)": 34.88, "step": 44730, "train_speed(iter/s)": 0.41254 }, { "acc": 0.89140387, "epoch": 1.211247393929548, "grad_norm": 9.68913745880127, "learning_rate": 8.352245060676476e-06, "loss": 0.57656355, "memory(GiB)": 34.88, "step": 44735, "train_speed(iter/s)": 0.412542 }, { "acc": 0.89988861, "epoch": 1.2113827742127636, "grad_norm": 22.08296012878418, "learning_rate": 8.351829851622175e-06, "loss": 0.55351019, "memory(GiB)": 34.88, "step": 44740, "train_speed(iter/s)": 0.412545 }, { "acc": 0.88177166, "epoch": 1.2115181544959792, "grad_norm": 8.015323638916016, "learning_rate": 8.35141460058588e-06, "loss": 0.58349361, "memory(GiB)": 34.88, "step": 44745, "train_speed(iter/s)": 0.412546 }, { "acc": 0.89262724, "epoch": 1.2116535347791948, "grad_norm": 9.072362899780273, "learning_rate": 8.350999307572795e-06, "loss": 0.50954919, "memory(GiB)": 34.88, "step": 44750, "train_speed(iter/s)": 0.412549 }, { "acc": 0.89634323, "epoch": 1.2117889150624104, "grad_norm": 9.310044288635254, "learning_rate": 8.350583972588124e-06, "loss": 0.5599504, "memory(GiB)": 34.88, "step": 44755, "train_speed(iter/s)": 0.41255 }, { "acc": 0.88536568, "epoch": 1.211924295345626, "grad_norm": 19.09543228149414, "learning_rate": 8.350168595637068e-06, "loss": 0.69217968, "memory(GiB)": 34.88, "step": 44760, "train_speed(iter/s)": 0.412552 }, { "acc": 0.88184443, "epoch": 1.2120596756288413, "grad_norm": 7.250394821166992, "learning_rate": 8.349753176724828e-06, "loss": 0.60471926, "memory(GiB)": 34.88, "step": 44765, "train_speed(iter/s)": 0.412554 }, { "acc": 0.8946455, "epoch": 1.212195055912057, "grad_norm": 7.409794330596924, "learning_rate": 8.349337715856613e-06, "loss": 0.61280241, "memory(GiB)": 34.88, "step": 44770, "train_speed(iter/s)": 0.412557 }, { "acc": 0.88197842, "epoch": 1.2123304361952725, "grad_norm": 10.477399826049805, "learning_rate": 8.348922213037625e-06, "loss": 0.65369277, "memory(GiB)": 34.88, "step": 44775, "train_speed(iter/s)": 0.412559 }, { "acc": 0.88350468, "epoch": 1.212465816478488, "grad_norm": 7.0289082527160645, "learning_rate": 8.34850666827307e-06, "loss": 0.56670361, "memory(GiB)": 34.88, "step": 44780, "train_speed(iter/s)": 0.412561 }, { "acc": 0.88035383, "epoch": 1.2126011967617036, "grad_norm": 14.745031356811523, "learning_rate": 8.348091081568152e-06, "loss": 0.63923326, "memory(GiB)": 34.88, "step": 44785, "train_speed(iter/s)": 0.412563 }, { "acc": 0.8909709, "epoch": 1.2127365770449192, "grad_norm": 7.797100067138672, "learning_rate": 8.347675452928076e-06, "loss": 0.52621837, "memory(GiB)": 34.88, "step": 44790, "train_speed(iter/s)": 0.412566 }, { "acc": 0.89582891, "epoch": 1.2128719573281348, "grad_norm": 7.738694190979004, "learning_rate": 8.347259782358052e-06, "loss": 0.6068377, "memory(GiB)": 34.88, "step": 44795, "train_speed(iter/s)": 0.412568 }, { "acc": 0.88850479, "epoch": 1.2130073376113504, "grad_norm": 6.283220291137695, "learning_rate": 8.346844069863286e-06, "loss": 0.56230574, "memory(GiB)": 34.88, "step": 44800, "train_speed(iter/s)": 0.41257 }, { "acc": 0.87590237, "epoch": 1.213142717894566, "grad_norm": 8.23176383972168, "learning_rate": 8.346428315448983e-06, "loss": 0.72453718, "memory(GiB)": 34.88, "step": 44805, "train_speed(iter/s)": 0.412573 }, { "acc": 0.8921978, "epoch": 1.2132780981777813, "grad_norm": 10.67061996459961, "learning_rate": 8.346012519120354e-06, "loss": 0.61599998, "memory(GiB)": 34.88, "step": 44810, "train_speed(iter/s)": 0.412575 }, { "acc": 0.87864494, "epoch": 1.213413478460997, "grad_norm": 5.95351505279541, "learning_rate": 8.345596680882607e-06, "loss": 0.64384732, "memory(GiB)": 34.88, "step": 44815, "train_speed(iter/s)": 0.412577 }, { "acc": 0.899617, "epoch": 1.2135488587442125, "grad_norm": 7.319255828857422, "learning_rate": 8.345180800740951e-06, "loss": 0.51256895, "memory(GiB)": 34.88, "step": 44820, "train_speed(iter/s)": 0.412579 }, { "acc": 0.88758535, "epoch": 1.213684239027428, "grad_norm": 9.271032333374023, "learning_rate": 8.344764878700595e-06, "loss": 0.70313034, "memory(GiB)": 34.88, "step": 44825, "train_speed(iter/s)": 0.412582 }, { "acc": 0.89945545, "epoch": 1.2138196193106436, "grad_norm": 8.169388771057129, "learning_rate": 8.34434891476675e-06, "loss": 0.53579493, "memory(GiB)": 34.88, "step": 44830, "train_speed(iter/s)": 0.412584 }, { "acc": 0.89766808, "epoch": 1.2139549995938592, "grad_norm": 4.620377540588379, "learning_rate": 8.343932908944629e-06, "loss": 0.49833546, "memory(GiB)": 34.88, "step": 44835, "train_speed(iter/s)": 0.412586 }, { "acc": 0.87960949, "epoch": 1.2140903798770748, "grad_norm": 6.601670742034912, "learning_rate": 8.343516861239437e-06, "loss": 0.58199873, "memory(GiB)": 34.88, "step": 44840, "train_speed(iter/s)": 0.412589 }, { "acc": 0.90980511, "epoch": 1.2142257601602902, "grad_norm": 5.5500664710998535, "learning_rate": 8.343100771656393e-06, "loss": 0.45014048, "memory(GiB)": 34.88, "step": 44845, "train_speed(iter/s)": 0.412591 }, { "acc": 0.88060608, "epoch": 1.2143611404435057, "grad_norm": 15.336430549621582, "learning_rate": 8.342684640200705e-06, "loss": 0.64422774, "memory(GiB)": 34.88, "step": 44850, "train_speed(iter/s)": 0.412593 }, { "acc": 0.89658794, "epoch": 1.2144965207267213, "grad_norm": 17.60586929321289, "learning_rate": 8.342268466877588e-06, "loss": 0.56263146, "memory(GiB)": 34.88, "step": 44855, "train_speed(iter/s)": 0.412596 }, { "acc": 0.90308933, "epoch": 1.214631901009937, "grad_norm": 5.647272109985352, "learning_rate": 8.341852251692253e-06, "loss": 0.48954034, "memory(GiB)": 34.88, "step": 44860, "train_speed(iter/s)": 0.412597 }, { "acc": 0.89550447, "epoch": 1.2147672812931525, "grad_norm": 12.890151977539062, "learning_rate": 8.341435994649916e-06, "loss": 0.56750374, "memory(GiB)": 34.88, "step": 44865, "train_speed(iter/s)": 0.4126 }, { "acc": 0.87848883, "epoch": 1.214902661576368, "grad_norm": 7.177614688873291, "learning_rate": 8.34101969575579e-06, "loss": 0.71293359, "memory(GiB)": 34.88, "step": 44870, "train_speed(iter/s)": 0.412602 }, { "acc": 0.8688138, "epoch": 1.2150380418595836, "grad_norm": 6.118139266967773, "learning_rate": 8.34060335501509e-06, "loss": 0.61054821, "memory(GiB)": 34.88, "step": 44875, "train_speed(iter/s)": 0.412604 }, { "acc": 0.88694515, "epoch": 1.215173422142799, "grad_norm": 9.068597793579102, "learning_rate": 8.340186972433035e-06, "loss": 0.65385561, "memory(GiB)": 34.88, "step": 44880, "train_speed(iter/s)": 0.412607 }, { "acc": 0.88649635, "epoch": 1.2153088024260146, "grad_norm": 3.3924720287323, "learning_rate": 8.339770548014837e-06, "loss": 0.57222776, "memory(GiB)": 34.88, "step": 44885, "train_speed(iter/s)": 0.412609 }, { "acc": 0.88380756, "epoch": 1.2154441827092302, "grad_norm": 13.034856796264648, "learning_rate": 8.339354081765714e-06, "loss": 0.65736446, "memory(GiB)": 34.88, "step": 44890, "train_speed(iter/s)": 0.412612 }, { "acc": 0.88033829, "epoch": 1.2155795629924457, "grad_norm": 18.508352279663086, "learning_rate": 8.338937573690882e-06, "loss": 0.66744499, "memory(GiB)": 34.88, "step": 44895, "train_speed(iter/s)": 0.412613 }, { "acc": 0.89325027, "epoch": 1.2157149432756613, "grad_norm": 6.023049354553223, "learning_rate": 8.33852102379556e-06, "loss": 0.55497398, "memory(GiB)": 34.88, "step": 44900, "train_speed(iter/s)": 0.412616 }, { "acc": 0.88658524, "epoch": 1.215850323558877, "grad_norm": 4.566329002380371, "learning_rate": 8.338104432084964e-06, "loss": 0.60331783, "memory(GiB)": 34.88, "step": 44905, "train_speed(iter/s)": 0.412618 }, { "acc": 0.88855534, "epoch": 1.2159857038420925, "grad_norm": 13.505318641662598, "learning_rate": 8.337687798564315e-06, "loss": 0.55009947, "memory(GiB)": 34.88, "step": 44910, "train_speed(iter/s)": 0.41262 }, { "acc": 0.87540455, "epoch": 1.216121084125308, "grad_norm": 7.993583679199219, "learning_rate": 8.33727112323883e-06, "loss": 0.78127799, "memory(GiB)": 34.88, "step": 44915, "train_speed(iter/s)": 0.412623 }, { "acc": 0.89768963, "epoch": 1.2162564644085236, "grad_norm": 7.457418441772461, "learning_rate": 8.336854406113732e-06, "loss": 0.52713108, "memory(GiB)": 34.88, "step": 44920, "train_speed(iter/s)": 0.412625 }, { "acc": 0.89501743, "epoch": 1.216391844691739, "grad_norm": 8.44362735748291, "learning_rate": 8.33643764719424e-06, "loss": 0.53174934, "memory(GiB)": 34.88, "step": 44925, "train_speed(iter/s)": 0.412628 }, { "acc": 0.88524284, "epoch": 1.2165272249749546, "grad_norm": 4.396597385406494, "learning_rate": 8.336020846485572e-06, "loss": 0.5699626, "memory(GiB)": 34.88, "step": 44930, "train_speed(iter/s)": 0.41263 }, { "acc": 0.8966238, "epoch": 1.2166626052581702, "grad_norm": 7.650145053863525, "learning_rate": 8.33560400399295e-06, "loss": 0.61975169, "memory(GiB)": 34.88, "step": 44935, "train_speed(iter/s)": 0.412632 }, { "acc": 0.89444647, "epoch": 1.2167979855413857, "grad_norm": 16.92217445373535, "learning_rate": 8.335187119721599e-06, "loss": 0.5716814, "memory(GiB)": 34.88, "step": 44940, "train_speed(iter/s)": 0.412634 }, { "acc": 0.87048416, "epoch": 1.2169333658246013, "grad_norm": 33.46757888793945, "learning_rate": 8.33477019367674e-06, "loss": 0.68062038, "memory(GiB)": 34.88, "step": 44945, "train_speed(iter/s)": 0.412637 }, { "acc": 0.8702055, "epoch": 1.217068746107817, "grad_norm": 9.4734468460083, "learning_rate": 8.334353225863591e-06, "loss": 0.70422544, "memory(GiB)": 34.88, "step": 44950, "train_speed(iter/s)": 0.412639 }, { "acc": 0.89812984, "epoch": 1.2172041263910325, "grad_norm": 10.005067825317383, "learning_rate": 8.333936216287383e-06, "loss": 0.55707827, "memory(GiB)": 34.88, "step": 44955, "train_speed(iter/s)": 0.412641 }, { "acc": 0.89249125, "epoch": 1.2173395066742478, "grad_norm": 5.804970741271973, "learning_rate": 8.333519164953335e-06, "loss": 0.46082811, "memory(GiB)": 34.88, "step": 44960, "train_speed(iter/s)": 0.412643 }, { "acc": 0.89417992, "epoch": 1.2174748869574634, "grad_norm": 7.108333110809326, "learning_rate": 8.333102071866672e-06, "loss": 0.52767177, "memory(GiB)": 34.88, "step": 44965, "train_speed(iter/s)": 0.412646 }, { "acc": 0.89530697, "epoch": 1.217610267240679, "grad_norm": 7.49974250793457, "learning_rate": 8.33268493703262e-06, "loss": 0.60401106, "memory(GiB)": 34.88, "step": 44970, "train_speed(iter/s)": 0.412648 }, { "acc": 0.89363718, "epoch": 1.2177456475238946, "grad_norm": 10.82596206665039, "learning_rate": 8.332267760456403e-06, "loss": 0.53950276, "memory(GiB)": 34.88, "step": 44975, "train_speed(iter/s)": 0.412651 }, { "acc": 0.87308531, "epoch": 1.2178810278071102, "grad_norm": 9.503655433654785, "learning_rate": 8.33185054214325e-06, "loss": 0.65704279, "memory(GiB)": 34.88, "step": 44980, "train_speed(iter/s)": 0.412653 }, { "acc": 0.90852222, "epoch": 1.2180164080903257, "grad_norm": 15.902013778686523, "learning_rate": 8.331433282098385e-06, "loss": 0.51586771, "memory(GiB)": 34.88, "step": 44985, "train_speed(iter/s)": 0.412655 }, { "acc": 0.8812871, "epoch": 1.2181517883735413, "grad_norm": 8.941838264465332, "learning_rate": 8.331015980327034e-06, "loss": 0.59051571, "memory(GiB)": 34.88, "step": 44990, "train_speed(iter/s)": 0.412657 }, { "acc": 0.88251495, "epoch": 1.218287168656757, "grad_norm": 11.166728973388672, "learning_rate": 8.330598636834426e-06, "loss": 0.63738031, "memory(GiB)": 34.88, "step": 44995, "train_speed(iter/s)": 0.41266 }, { "acc": 0.89292755, "epoch": 1.2184225489399725, "grad_norm": 38.932151794433594, "learning_rate": 8.330181251625789e-06, "loss": 0.58724728, "memory(GiB)": 34.88, "step": 45000, "train_speed(iter/s)": 0.412662 }, { "acc": 0.89380569, "epoch": 1.2185579292231878, "grad_norm": 12.749410629272461, "learning_rate": 8.329763824706355e-06, "loss": 0.57862196, "memory(GiB)": 34.88, "step": 45005, "train_speed(iter/s)": 0.412664 }, { "acc": 0.89917383, "epoch": 1.2186933095064034, "grad_norm": 9.014808654785156, "learning_rate": 8.329346356081346e-06, "loss": 0.51599784, "memory(GiB)": 34.88, "step": 45010, "train_speed(iter/s)": 0.412666 }, { "acc": 0.88001728, "epoch": 1.218828689789619, "grad_norm": 10.607803344726562, "learning_rate": 8.328928845755993e-06, "loss": 0.71224084, "memory(GiB)": 34.88, "step": 45015, "train_speed(iter/s)": 0.412669 }, { "acc": 0.89988194, "epoch": 1.2189640700728346, "grad_norm": 10.415237426757812, "learning_rate": 8.328511293735532e-06, "loss": 0.46599302, "memory(GiB)": 34.88, "step": 45020, "train_speed(iter/s)": 0.412671 }, { "acc": 0.88984203, "epoch": 1.2190994503560502, "grad_norm": 7.049980163574219, "learning_rate": 8.32809370002519e-06, "loss": 0.63519573, "memory(GiB)": 34.88, "step": 45025, "train_speed(iter/s)": 0.412673 }, { "acc": 0.89835262, "epoch": 1.2192348306392657, "grad_norm": 5.93032169342041, "learning_rate": 8.327676064630199e-06, "loss": 0.463517, "memory(GiB)": 34.88, "step": 45030, "train_speed(iter/s)": 0.412676 }, { "acc": 0.88130016, "epoch": 1.2193702109224813, "grad_norm": 8.404311180114746, "learning_rate": 8.327258387555787e-06, "loss": 0.70090733, "memory(GiB)": 34.88, "step": 45035, "train_speed(iter/s)": 0.412678 }, { "acc": 0.89744415, "epoch": 1.2195055912056967, "grad_norm": 9.909331321716309, "learning_rate": 8.32684066880719e-06, "loss": 0.52313781, "memory(GiB)": 34.88, "step": 45040, "train_speed(iter/s)": 0.41268 }, { "acc": 0.90415649, "epoch": 1.2196409714889123, "grad_norm": 8.19991397857666, "learning_rate": 8.326422908389641e-06, "loss": 0.42752085, "memory(GiB)": 34.88, "step": 45045, "train_speed(iter/s)": 0.412683 }, { "acc": 0.89413462, "epoch": 1.2197763517721278, "grad_norm": 9.403093338012695, "learning_rate": 8.326005106308371e-06, "loss": 0.58993368, "memory(GiB)": 34.88, "step": 45050, "train_speed(iter/s)": 0.412685 }, { "acc": 0.89831619, "epoch": 1.2199117320553434, "grad_norm": 24.209728240966797, "learning_rate": 8.325587262568616e-06, "loss": 0.52443256, "memory(GiB)": 34.88, "step": 45055, "train_speed(iter/s)": 0.412687 }, { "acc": 0.89936981, "epoch": 1.220047112338559, "grad_norm": 5.5378851890563965, "learning_rate": 8.325169377175607e-06, "loss": 0.57225838, "memory(GiB)": 34.88, "step": 45060, "train_speed(iter/s)": 0.41269 }, { "acc": 0.9014204, "epoch": 1.2201824926217746, "grad_norm": 9.094088554382324, "learning_rate": 8.324751450134584e-06, "loss": 0.55533819, "memory(GiB)": 34.88, "step": 45065, "train_speed(iter/s)": 0.412692 }, { "acc": 0.88980541, "epoch": 1.2203178729049902, "grad_norm": 10.15760326385498, "learning_rate": 8.324333481450777e-06, "loss": 0.70156484, "memory(GiB)": 34.88, "step": 45070, "train_speed(iter/s)": 0.412694 }, { "acc": 0.89022818, "epoch": 1.2204532531882057, "grad_norm": 7.219549655914307, "learning_rate": 8.323915471129425e-06, "loss": 0.55205026, "memory(GiB)": 34.88, "step": 45075, "train_speed(iter/s)": 0.412696 }, { "acc": 0.86725969, "epoch": 1.2205886334714213, "grad_norm": 9.0263671875, "learning_rate": 8.323497419175766e-06, "loss": 0.68644581, "memory(GiB)": 34.88, "step": 45080, "train_speed(iter/s)": 0.412698 }, { "acc": 0.90576868, "epoch": 1.2207240137546367, "grad_norm": 14.142265319824219, "learning_rate": 8.323079325595032e-06, "loss": 0.44378133, "memory(GiB)": 34.88, "step": 45085, "train_speed(iter/s)": 0.412701 }, { "acc": 0.87440758, "epoch": 1.2208593940378523, "grad_norm": 7.059946060180664, "learning_rate": 8.322661190392463e-06, "loss": 0.62489424, "memory(GiB)": 34.88, "step": 45090, "train_speed(iter/s)": 0.412703 }, { "acc": 0.8946847, "epoch": 1.2209947743210678, "grad_norm": 8.67746639251709, "learning_rate": 8.3222430135733e-06, "loss": 0.61320343, "memory(GiB)": 34.88, "step": 45095, "train_speed(iter/s)": 0.412705 }, { "acc": 0.87944069, "epoch": 1.2211301546042834, "grad_norm": 6.287262439727783, "learning_rate": 8.321824795142775e-06, "loss": 0.63612251, "memory(GiB)": 34.88, "step": 45100, "train_speed(iter/s)": 0.412707 }, { "acc": 0.89602737, "epoch": 1.221265534887499, "grad_norm": 16.822826385498047, "learning_rate": 8.321406535106133e-06, "loss": 0.59572039, "memory(GiB)": 34.88, "step": 45105, "train_speed(iter/s)": 0.41271 }, { "acc": 0.91469784, "epoch": 1.2214009151707146, "grad_norm": 7.554628372192383, "learning_rate": 8.32098823346861e-06, "loss": 0.39680262, "memory(GiB)": 34.88, "step": 45110, "train_speed(iter/s)": 0.412712 }, { "acc": 0.90494013, "epoch": 1.2215362954539302, "grad_norm": 7.0016984939575195, "learning_rate": 8.320569890235447e-06, "loss": 0.48455257, "memory(GiB)": 34.88, "step": 45115, "train_speed(iter/s)": 0.412714 }, { "acc": 0.88789654, "epoch": 1.2216716757371455, "grad_norm": 7.00147008895874, "learning_rate": 8.320151505411886e-06, "loss": 0.60115814, "memory(GiB)": 34.88, "step": 45120, "train_speed(iter/s)": 0.412717 }, { "acc": 0.9133132, "epoch": 1.221807056020361, "grad_norm": 11.244131088256836, "learning_rate": 8.319733079003166e-06, "loss": 0.47229986, "memory(GiB)": 34.88, "step": 45125, "train_speed(iter/s)": 0.412719 }, { "acc": 0.9023159, "epoch": 1.2219424363035767, "grad_norm": 6.761031150817871, "learning_rate": 8.319314611014526e-06, "loss": 0.47561102, "memory(GiB)": 34.88, "step": 45130, "train_speed(iter/s)": 0.412721 }, { "acc": 0.87375546, "epoch": 1.2220778165867923, "grad_norm": 13.913207054138184, "learning_rate": 8.318896101451216e-06, "loss": 0.73596702, "memory(GiB)": 34.88, "step": 45135, "train_speed(iter/s)": 0.412723 }, { "acc": 0.89490414, "epoch": 1.2222131968700078, "grad_norm": 6.198687553405762, "learning_rate": 8.318477550318474e-06, "loss": 0.48478832, "memory(GiB)": 34.88, "step": 45140, "train_speed(iter/s)": 0.412726 }, { "acc": 0.9114109, "epoch": 1.2223485771532234, "grad_norm": 7.458199977874756, "learning_rate": 8.318058957621541e-06, "loss": 0.41503725, "memory(GiB)": 34.88, "step": 45145, "train_speed(iter/s)": 0.412728 }, { "acc": 0.88561211, "epoch": 1.222483957436439, "grad_norm": 12.788269996643066, "learning_rate": 8.317640323365665e-06, "loss": 0.59222832, "memory(GiB)": 34.88, "step": 45150, "train_speed(iter/s)": 0.41273 }, { "acc": 0.88549767, "epoch": 1.2226193377196546, "grad_norm": 13.324362754821777, "learning_rate": 8.317221647556088e-06, "loss": 0.61148171, "memory(GiB)": 34.88, "step": 45155, "train_speed(iter/s)": 0.412732 }, { "acc": 0.89014864, "epoch": 1.2227547180028702, "grad_norm": 4.924062728881836, "learning_rate": 8.316802930198056e-06, "loss": 0.5857728, "memory(GiB)": 34.88, "step": 45160, "train_speed(iter/s)": 0.412735 }, { "acc": 0.9007988, "epoch": 1.2228900982860855, "grad_norm": 7.924203872680664, "learning_rate": 8.316384171296811e-06, "loss": 0.50974617, "memory(GiB)": 34.88, "step": 45165, "train_speed(iter/s)": 0.412737 }, { "acc": 0.8684104, "epoch": 1.223025478569301, "grad_norm": 6.103691101074219, "learning_rate": 8.315965370857601e-06, "loss": 0.68354349, "memory(GiB)": 34.88, "step": 45170, "train_speed(iter/s)": 0.41274 }, { "acc": 0.8686841, "epoch": 1.2231608588525167, "grad_norm": 9.839695930480957, "learning_rate": 8.315546528885674e-06, "loss": 0.73199148, "memory(GiB)": 34.88, "step": 45175, "train_speed(iter/s)": 0.412742 }, { "acc": 0.91296234, "epoch": 1.2232962391357323, "grad_norm": 6.197756290435791, "learning_rate": 8.315127645386275e-06, "loss": 0.46617298, "memory(GiB)": 34.88, "step": 45180, "train_speed(iter/s)": 0.412744 }, { "acc": 0.87909069, "epoch": 1.2234316194189478, "grad_norm": 22.028844833374023, "learning_rate": 8.31470872036465e-06, "loss": 0.73468528, "memory(GiB)": 34.88, "step": 45185, "train_speed(iter/s)": 0.412747 }, { "acc": 0.90305033, "epoch": 1.2235669997021634, "grad_norm": 7.515604496002197, "learning_rate": 8.314289753826049e-06, "loss": 0.45556698, "memory(GiB)": 34.88, "step": 45190, "train_speed(iter/s)": 0.412749 }, { "acc": 0.89348488, "epoch": 1.223702379985379, "grad_norm": 6.313014984130859, "learning_rate": 8.313870745775721e-06, "loss": 0.50476322, "memory(GiB)": 34.88, "step": 45195, "train_speed(iter/s)": 0.412751 }, { "acc": 0.87817822, "epoch": 1.2238377602685944, "grad_norm": 11.668643951416016, "learning_rate": 8.313451696218914e-06, "loss": 0.67732015, "memory(GiB)": 34.88, "step": 45200, "train_speed(iter/s)": 0.412753 }, { "acc": 0.89618969, "epoch": 1.22397314055181, "grad_norm": 8.301387786865234, "learning_rate": 8.313032605160874e-06, "loss": 0.59897404, "memory(GiB)": 34.88, "step": 45205, "train_speed(iter/s)": 0.412756 }, { "acc": 0.86718206, "epoch": 1.2241085208350255, "grad_norm": 15.055113792419434, "learning_rate": 8.312613472606857e-06, "loss": 0.67848315, "memory(GiB)": 34.88, "step": 45210, "train_speed(iter/s)": 0.412758 }, { "acc": 0.87006721, "epoch": 1.224243901118241, "grad_norm": 11.743807792663574, "learning_rate": 8.312194298562109e-06, "loss": 0.70234671, "memory(GiB)": 34.88, "step": 45215, "train_speed(iter/s)": 0.41276 }, { "acc": 0.87644024, "epoch": 1.2243792814014567, "grad_norm": 12.429280281066895, "learning_rate": 8.311775083031882e-06, "loss": 0.65838456, "memory(GiB)": 34.88, "step": 45220, "train_speed(iter/s)": 0.412762 }, { "acc": 0.89424477, "epoch": 1.2245146616846723, "grad_norm": 6.961434364318848, "learning_rate": 8.31135582602143e-06, "loss": 0.52976885, "memory(GiB)": 34.88, "step": 45225, "train_speed(iter/s)": 0.412764 }, { "acc": 0.9083683, "epoch": 1.2246500419678878, "grad_norm": 7.506963729858398, "learning_rate": 8.310936527536001e-06, "loss": 0.44407587, "memory(GiB)": 34.88, "step": 45230, "train_speed(iter/s)": 0.412766 }, { "acc": 0.88660707, "epoch": 1.2247854222511034, "grad_norm": 10.228667259216309, "learning_rate": 8.31051718758085e-06, "loss": 0.62885284, "memory(GiB)": 34.88, "step": 45235, "train_speed(iter/s)": 0.412769 }, { "acc": 0.8995038, "epoch": 1.224920802534319, "grad_norm": 18.46358871459961, "learning_rate": 8.31009780616123e-06, "loss": 0.54187512, "memory(GiB)": 34.88, "step": 45240, "train_speed(iter/s)": 0.412771 }, { "acc": 0.91657267, "epoch": 1.2250561828175344, "grad_norm": 3.8899195194244385, "learning_rate": 8.309678383282394e-06, "loss": 0.3966239, "memory(GiB)": 34.88, "step": 45245, "train_speed(iter/s)": 0.412773 }, { "acc": 0.90183268, "epoch": 1.22519156310075, "grad_norm": 6.47100830078125, "learning_rate": 8.309258918949596e-06, "loss": 0.49690533, "memory(GiB)": 34.88, "step": 45250, "train_speed(iter/s)": 0.412775 }, { "acc": 0.87235518, "epoch": 1.2253269433839655, "grad_norm": 8.556814193725586, "learning_rate": 8.30883941316809e-06, "loss": 0.71465521, "memory(GiB)": 34.88, "step": 45255, "train_speed(iter/s)": 0.412777 }, { "acc": 0.90361824, "epoch": 1.225462323667181, "grad_norm": 5.631905555725098, "learning_rate": 8.308419865943134e-06, "loss": 0.48177447, "memory(GiB)": 34.88, "step": 45260, "train_speed(iter/s)": 0.412779 }, { "acc": 0.87274103, "epoch": 1.2255977039503967, "grad_norm": 11.331938743591309, "learning_rate": 8.30800027727998e-06, "loss": 0.7485384, "memory(GiB)": 34.88, "step": 45265, "train_speed(iter/s)": 0.412781 }, { "acc": 0.88711376, "epoch": 1.2257330842336123, "grad_norm": 4.795888900756836, "learning_rate": 8.307580647183887e-06, "loss": 0.58568511, "memory(GiB)": 34.88, "step": 45270, "train_speed(iter/s)": 0.412784 }, { "acc": 0.90322628, "epoch": 1.2258684645168278, "grad_norm": 5.687919616699219, "learning_rate": 8.30716097566011e-06, "loss": 0.51093249, "memory(GiB)": 34.88, "step": 45275, "train_speed(iter/s)": 0.412786 }, { "acc": 0.90657902, "epoch": 1.2260038448000432, "grad_norm": 4.666208267211914, "learning_rate": 8.306741262713906e-06, "loss": 0.39928584, "memory(GiB)": 34.88, "step": 45280, "train_speed(iter/s)": 0.412788 }, { "acc": 0.87515564, "epoch": 1.2261392250832588, "grad_norm": 8.972108840942383, "learning_rate": 8.306321508350533e-06, "loss": 0.62507334, "memory(GiB)": 34.88, "step": 45285, "train_speed(iter/s)": 0.41279 }, { "acc": 0.88617039, "epoch": 1.2262746053664744, "grad_norm": 12.652077674865723, "learning_rate": 8.305901712575251e-06, "loss": 0.59903097, "memory(GiB)": 34.88, "step": 45290, "train_speed(iter/s)": 0.412793 }, { "acc": 0.90224133, "epoch": 1.22640998564969, "grad_norm": 8.555771827697754, "learning_rate": 8.305481875393318e-06, "loss": 0.5933485, "memory(GiB)": 34.88, "step": 45295, "train_speed(iter/s)": 0.412795 }, { "acc": 0.89213543, "epoch": 1.2265453659329055, "grad_norm": 8.06934928894043, "learning_rate": 8.305061996809991e-06, "loss": 0.53005238, "memory(GiB)": 34.88, "step": 45300, "train_speed(iter/s)": 0.412797 }, { "acc": 0.88193188, "epoch": 1.226680746216121, "grad_norm": 14.666257858276367, "learning_rate": 8.304642076830533e-06, "loss": 0.59698629, "memory(GiB)": 34.88, "step": 45305, "train_speed(iter/s)": 0.412799 }, { "acc": 0.91270428, "epoch": 1.2268161264993367, "grad_norm": 5.58305549621582, "learning_rate": 8.304222115460202e-06, "loss": 0.44828019, "memory(GiB)": 34.88, "step": 45310, "train_speed(iter/s)": 0.412802 }, { "acc": 0.88927536, "epoch": 1.2269515067825523, "grad_norm": 10.535579681396484, "learning_rate": 8.30380211270426e-06, "loss": 0.58797474, "memory(GiB)": 34.88, "step": 45315, "train_speed(iter/s)": 0.412804 }, { "acc": 0.88944283, "epoch": 1.2270868870657679, "grad_norm": 9.420852661132812, "learning_rate": 8.303382068567968e-06, "loss": 0.62133169, "memory(GiB)": 34.88, "step": 45320, "train_speed(iter/s)": 0.412806 }, { "acc": 0.88715343, "epoch": 1.2272222673489832, "grad_norm": 6.262947082519531, "learning_rate": 8.302961983056588e-06, "loss": 0.56017027, "memory(GiB)": 34.88, "step": 45325, "train_speed(iter/s)": 0.412808 }, { "acc": 0.90171356, "epoch": 1.2273576476321988, "grad_norm": 5.976078033447266, "learning_rate": 8.302541856175383e-06, "loss": 0.49787912, "memory(GiB)": 34.88, "step": 45330, "train_speed(iter/s)": 0.41281 }, { "acc": 0.89697075, "epoch": 1.2274930279154144, "grad_norm": 5.90190315246582, "learning_rate": 8.302121687929613e-06, "loss": 0.54354429, "memory(GiB)": 34.88, "step": 45335, "train_speed(iter/s)": 0.412813 }, { "acc": 0.87684555, "epoch": 1.22762840819863, "grad_norm": 7.730188846588135, "learning_rate": 8.301701478324545e-06, "loss": 0.64285135, "memory(GiB)": 34.88, "step": 45340, "train_speed(iter/s)": 0.412814 }, { "acc": 0.90162525, "epoch": 1.2277637884818455, "grad_norm": 11.044716835021973, "learning_rate": 8.301281227365443e-06, "loss": 0.56904812, "memory(GiB)": 34.88, "step": 45345, "train_speed(iter/s)": 0.412816 }, { "acc": 0.89190254, "epoch": 1.227899168765061, "grad_norm": 10.37645149230957, "learning_rate": 8.300860935057568e-06, "loss": 0.60171385, "memory(GiB)": 34.88, "step": 45350, "train_speed(iter/s)": 0.412818 }, { "acc": 0.88059483, "epoch": 1.2280345490482767, "grad_norm": 16.953662872314453, "learning_rate": 8.300440601406188e-06, "loss": 0.58031702, "memory(GiB)": 34.88, "step": 45355, "train_speed(iter/s)": 0.412821 }, { "acc": 0.90419521, "epoch": 1.228169929331492, "grad_norm": 9.632587432861328, "learning_rate": 8.300020226416569e-06, "loss": 0.42813368, "memory(GiB)": 34.88, "step": 45360, "train_speed(iter/s)": 0.412823 }, { "acc": 0.8902688, "epoch": 1.2283053096147076, "grad_norm": 7.760126113891602, "learning_rate": 8.299599810093974e-06, "loss": 0.5849864, "memory(GiB)": 34.88, "step": 45365, "train_speed(iter/s)": 0.412824 }, { "acc": 0.90259247, "epoch": 1.2284406898979232, "grad_norm": 6.005525588989258, "learning_rate": 8.299179352443671e-06, "loss": 0.51601658, "memory(GiB)": 34.88, "step": 45370, "train_speed(iter/s)": 0.412827 }, { "acc": 0.90279942, "epoch": 1.2285760701811388, "grad_norm": 5.904856204986572, "learning_rate": 8.298758853470927e-06, "loss": 0.4431118, "memory(GiB)": 34.88, "step": 45375, "train_speed(iter/s)": 0.412829 }, { "acc": 0.87130203, "epoch": 1.2287114504643544, "grad_norm": 10.704177856445312, "learning_rate": 8.298338313181012e-06, "loss": 0.76331878, "memory(GiB)": 34.88, "step": 45380, "train_speed(iter/s)": 0.412831 }, { "acc": 0.88655682, "epoch": 1.22884683074757, "grad_norm": 4.368256092071533, "learning_rate": 8.297917731579189e-06, "loss": 0.5705246, "memory(GiB)": 34.88, "step": 45385, "train_speed(iter/s)": 0.412833 }, { "acc": 0.88006783, "epoch": 1.2289822110307855, "grad_norm": 19.929868698120117, "learning_rate": 8.29749710867073e-06, "loss": 0.62674284, "memory(GiB)": 34.88, "step": 45390, "train_speed(iter/s)": 0.412835 }, { "acc": 0.89795103, "epoch": 1.2291175913140011, "grad_norm": 6.233912944793701, "learning_rate": 8.297076444460905e-06, "loss": 0.52026663, "memory(GiB)": 34.88, "step": 45395, "train_speed(iter/s)": 0.412837 }, { "acc": 0.88713398, "epoch": 1.2292529715972167, "grad_norm": 7.472858905792236, "learning_rate": 8.296655738954982e-06, "loss": 0.59638653, "memory(GiB)": 34.88, "step": 45400, "train_speed(iter/s)": 0.412839 }, { "acc": 0.8846633, "epoch": 1.229388351880432, "grad_norm": 5.388561248779297, "learning_rate": 8.296234992158231e-06, "loss": 0.57555246, "memory(GiB)": 34.88, "step": 45405, "train_speed(iter/s)": 0.412842 }, { "acc": 0.86946554, "epoch": 1.2295237321636476, "grad_norm": 10.17664623260498, "learning_rate": 8.295814204075922e-06, "loss": 0.7370542, "memory(GiB)": 34.88, "step": 45410, "train_speed(iter/s)": 0.412844 }, { "acc": 0.88495321, "epoch": 1.2296591124468632, "grad_norm": 8.92165756225586, "learning_rate": 8.29539337471333e-06, "loss": 0.642834, "memory(GiB)": 34.88, "step": 45415, "train_speed(iter/s)": 0.412846 }, { "acc": 0.89604664, "epoch": 1.2297944927300788, "grad_norm": 6.750359058380127, "learning_rate": 8.294972504075722e-06, "loss": 0.480971, "memory(GiB)": 34.88, "step": 45420, "train_speed(iter/s)": 0.412848 }, { "acc": 0.90681953, "epoch": 1.2299298730132944, "grad_norm": 6.584060192108154, "learning_rate": 8.294551592168373e-06, "loss": 0.55562873, "memory(GiB)": 34.88, "step": 45425, "train_speed(iter/s)": 0.41285 }, { "acc": 0.90727921, "epoch": 1.23006525329651, "grad_norm": 3.6844711303710938, "learning_rate": 8.294130638996554e-06, "loss": 0.48864846, "memory(GiB)": 34.88, "step": 45430, "train_speed(iter/s)": 0.412853 }, { "acc": 0.89047251, "epoch": 1.2302006335797255, "grad_norm": 5.898309230804443, "learning_rate": 8.29370964456554e-06, "loss": 0.61121697, "memory(GiB)": 34.88, "step": 45435, "train_speed(iter/s)": 0.412854 }, { "acc": 0.87411194, "epoch": 1.230336013862941, "grad_norm": 6.948007583618164, "learning_rate": 8.293288608880606e-06, "loss": 0.59320173, "memory(GiB)": 34.88, "step": 45440, "train_speed(iter/s)": 0.412856 }, { "acc": 0.88364124, "epoch": 1.2304713941461565, "grad_norm": 14.524592399597168, "learning_rate": 8.292867531947023e-06, "loss": 0.56197195, "memory(GiB)": 34.88, "step": 45445, "train_speed(iter/s)": 0.412858 }, { "acc": 0.88889008, "epoch": 1.230606774429372, "grad_norm": 7.663805961608887, "learning_rate": 8.292446413770067e-06, "loss": 0.5490345, "memory(GiB)": 34.88, "step": 45450, "train_speed(iter/s)": 0.41286 }, { "acc": 0.90070038, "epoch": 1.2307421547125876, "grad_norm": 6.440911293029785, "learning_rate": 8.292025254355012e-06, "loss": 0.53621216, "memory(GiB)": 34.88, "step": 45455, "train_speed(iter/s)": 0.412863 }, { "acc": 0.87140026, "epoch": 1.2308775349958032, "grad_norm": 5.957942485809326, "learning_rate": 8.291604053707137e-06, "loss": 0.72923508, "memory(GiB)": 34.88, "step": 45460, "train_speed(iter/s)": 0.412865 }, { "acc": 0.8996191, "epoch": 1.2310129152790188, "grad_norm": 6.648718357086182, "learning_rate": 8.291182811831715e-06, "loss": 0.47610912, "memory(GiB)": 34.88, "step": 45465, "train_speed(iter/s)": 0.412868 }, { "acc": 0.86423893, "epoch": 1.2311482955622344, "grad_norm": 9.15996265411377, "learning_rate": 8.290761528734027e-06, "loss": 0.78108916, "memory(GiB)": 34.88, "step": 45470, "train_speed(iter/s)": 0.41287 }, { "acc": 0.89976196, "epoch": 1.23128367584545, "grad_norm": 10.591002464294434, "learning_rate": 8.290340204419347e-06, "loss": 0.57800064, "memory(GiB)": 34.88, "step": 45475, "train_speed(iter/s)": 0.412872 }, { "acc": 0.88552418, "epoch": 1.2314190561286655, "grad_norm": 17.53811264038086, "learning_rate": 8.289918838892953e-06, "loss": 0.59498529, "memory(GiB)": 34.88, "step": 45480, "train_speed(iter/s)": 0.412874 }, { "acc": 0.88580065, "epoch": 1.231554436411881, "grad_norm": 7.249420642852783, "learning_rate": 8.289497432160126e-06, "loss": 0.52326641, "memory(GiB)": 34.88, "step": 45485, "train_speed(iter/s)": 0.412876 }, { "acc": 0.91658211, "epoch": 1.2316898166950965, "grad_norm": 6.715843677520752, "learning_rate": 8.289075984226142e-06, "loss": 0.46042008, "memory(GiB)": 34.88, "step": 45490, "train_speed(iter/s)": 0.412878 }, { "acc": 0.89801712, "epoch": 1.231825196978312, "grad_norm": 7.728299140930176, "learning_rate": 8.288654495096282e-06, "loss": 0.60606871, "memory(GiB)": 34.88, "step": 45495, "train_speed(iter/s)": 0.412881 }, { "acc": 0.9036129, "epoch": 1.2319605772615276, "grad_norm": 11.081916809082031, "learning_rate": 8.288232964775825e-06, "loss": 0.45986786, "memory(GiB)": 34.88, "step": 45500, "train_speed(iter/s)": 0.412883 }, { "acc": 0.86780443, "epoch": 1.2320959575447432, "grad_norm": 16.883995056152344, "learning_rate": 8.287811393270053e-06, "loss": 0.71620913, "memory(GiB)": 34.88, "step": 45505, "train_speed(iter/s)": 0.412885 }, { "acc": 0.91135702, "epoch": 1.2322313378279588, "grad_norm": 5.320631504058838, "learning_rate": 8.287389780584247e-06, "loss": 0.46968393, "memory(GiB)": 34.88, "step": 45510, "train_speed(iter/s)": 0.412887 }, { "acc": 0.8896245, "epoch": 1.2323667181111744, "grad_norm": 7.050038814544678, "learning_rate": 8.286968126723687e-06, "loss": 0.59351015, "memory(GiB)": 34.88, "step": 45515, "train_speed(iter/s)": 0.412888 }, { "acc": 0.87909813, "epoch": 1.2325020983943897, "grad_norm": 17.302316665649414, "learning_rate": 8.286546431693655e-06, "loss": 0.70125213, "memory(GiB)": 34.88, "step": 45520, "train_speed(iter/s)": 0.412891 }, { "acc": 0.89428101, "epoch": 1.2326374786776053, "grad_norm": 22.08171272277832, "learning_rate": 8.286124695499437e-06, "loss": 0.56291742, "memory(GiB)": 34.88, "step": 45525, "train_speed(iter/s)": 0.412893 }, { "acc": 0.89123583, "epoch": 1.232772858960821, "grad_norm": 4.694798946380615, "learning_rate": 8.285702918146313e-06, "loss": 0.62539239, "memory(GiB)": 34.88, "step": 45530, "train_speed(iter/s)": 0.412895 }, { "acc": 0.89980059, "epoch": 1.2329082392440365, "grad_norm": 34.920379638671875, "learning_rate": 8.285281099639567e-06, "loss": 0.50456009, "memory(GiB)": 34.88, "step": 45535, "train_speed(iter/s)": 0.412898 }, { "acc": 0.89831085, "epoch": 1.233043619527252, "grad_norm": 8.048422813415527, "learning_rate": 8.284859239984479e-06, "loss": 0.49714851, "memory(GiB)": 34.88, "step": 45540, "train_speed(iter/s)": 0.4129 }, { "acc": 0.87773952, "epoch": 1.2331789998104676, "grad_norm": 8.273308753967285, "learning_rate": 8.284437339186342e-06, "loss": 0.63900037, "memory(GiB)": 34.88, "step": 45545, "train_speed(iter/s)": 0.412902 }, { "acc": 0.89246817, "epoch": 1.2333143800936832, "grad_norm": 6.6587910652160645, "learning_rate": 8.284015397250434e-06, "loss": 0.6121789, "memory(GiB)": 34.88, "step": 45550, "train_speed(iter/s)": 0.412905 }, { "acc": 0.89543667, "epoch": 1.2334497603768988, "grad_norm": 8.249961853027344, "learning_rate": 8.283593414182046e-06, "loss": 0.52056351, "memory(GiB)": 34.88, "step": 45555, "train_speed(iter/s)": 0.412907 }, { "acc": 0.88063736, "epoch": 1.2335851406601144, "grad_norm": 13.888017654418945, "learning_rate": 8.283171389986462e-06, "loss": 0.70236216, "memory(GiB)": 34.88, "step": 45560, "train_speed(iter/s)": 0.41291 }, { "acc": 0.89168243, "epoch": 1.2337205209433297, "grad_norm": 10.178956985473633, "learning_rate": 8.282749324668966e-06, "loss": 0.72171397, "memory(GiB)": 34.88, "step": 45565, "train_speed(iter/s)": 0.412912 }, { "acc": 0.90432072, "epoch": 1.2338559012265453, "grad_norm": 18.35786247253418, "learning_rate": 8.28232721823485e-06, "loss": 0.54849977, "memory(GiB)": 34.88, "step": 45570, "train_speed(iter/s)": 0.412914 }, { "acc": 0.89065342, "epoch": 1.233991281509761, "grad_norm": 4.2326531410217285, "learning_rate": 8.281905070689397e-06, "loss": 0.63869295, "memory(GiB)": 34.88, "step": 45575, "train_speed(iter/s)": 0.412916 }, { "acc": 0.91464348, "epoch": 1.2341266617929765, "grad_norm": 3.7571985721588135, "learning_rate": 8.2814828820379e-06, "loss": 0.46261296, "memory(GiB)": 34.88, "step": 45580, "train_speed(iter/s)": 0.412918 }, { "acc": 0.89173927, "epoch": 1.234262042076192, "grad_norm": 20.297821044921875, "learning_rate": 8.281060652285644e-06, "loss": 0.61227112, "memory(GiB)": 34.88, "step": 45585, "train_speed(iter/s)": 0.41292 }, { "acc": 0.87827816, "epoch": 1.2343974223594076, "grad_norm": 7.097497463226318, "learning_rate": 8.280638381437919e-06, "loss": 0.69522572, "memory(GiB)": 34.88, "step": 45590, "train_speed(iter/s)": 0.412922 }, { "acc": 0.88871422, "epoch": 1.2345328026426232, "grad_norm": 6.967200756072998, "learning_rate": 8.280216069500017e-06, "loss": 0.62022796, "memory(GiB)": 34.88, "step": 45595, "train_speed(iter/s)": 0.412923 }, { "acc": 0.8859251, "epoch": 1.2346681829258386, "grad_norm": 4.745894908905029, "learning_rate": 8.279793716477224e-06, "loss": 0.6171875, "memory(GiB)": 34.88, "step": 45600, "train_speed(iter/s)": 0.412925 }, { "acc": 0.90112886, "epoch": 1.2348035632090542, "grad_norm": 6.9837775230407715, "learning_rate": 8.279371322374836e-06, "loss": 0.49220958, "memory(GiB)": 34.88, "step": 45605, "train_speed(iter/s)": 0.412927 }, { "acc": 0.88726015, "epoch": 1.2349389434922697, "grad_norm": 65.55134582519531, "learning_rate": 8.278948887198141e-06, "loss": 0.58126774, "memory(GiB)": 34.88, "step": 45610, "train_speed(iter/s)": 0.412929 }, { "acc": 0.88338804, "epoch": 1.2350743237754853, "grad_norm": 12.96781063079834, "learning_rate": 8.27852641095243e-06, "loss": 0.59420357, "memory(GiB)": 34.88, "step": 45615, "train_speed(iter/s)": 0.41293 }, { "acc": 0.90485582, "epoch": 1.235209704058701, "grad_norm": 14.6688232421875, "learning_rate": 8.278103893643e-06, "loss": 0.40858488, "memory(GiB)": 34.88, "step": 45620, "train_speed(iter/s)": 0.412932 }, { "acc": 0.8977788, "epoch": 1.2353450843419165, "grad_norm": 3.6957030296325684, "learning_rate": 8.277681335275138e-06, "loss": 0.57320604, "memory(GiB)": 34.88, "step": 45625, "train_speed(iter/s)": 0.412935 }, { "acc": 0.87408981, "epoch": 1.235480464625132, "grad_norm": 13.843706130981445, "learning_rate": 8.27725873585414e-06, "loss": 0.67600608, "memory(GiB)": 34.88, "step": 45630, "train_speed(iter/s)": 0.412937 }, { "acc": 0.91571445, "epoch": 1.2356158449083476, "grad_norm": 4.784703254699707, "learning_rate": 8.2768360953853e-06, "loss": 0.3936034, "memory(GiB)": 34.88, "step": 45635, "train_speed(iter/s)": 0.412939 }, { "acc": 0.880546, "epoch": 1.2357512251915632, "grad_norm": 7.5825114250183105, "learning_rate": 8.276413413873915e-06, "loss": 0.69812727, "memory(GiB)": 34.88, "step": 45640, "train_speed(iter/s)": 0.412941 }, { "acc": 0.89439392, "epoch": 1.2358866054747786, "grad_norm": 14.257893562316895, "learning_rate": 8.275990691325277e-06, "loss": 0.55749474, "memory(GiB)": 34.88, "step": 45645, "train_speed(iter/s)": 0.412943 }, { "acc": 0.90253792, "epoch": 1.2360219857579942, "grad_norm": 8.444900512695312, "learning_rate": 8.27556792774468e-06, "loss": 0.5343236, "memory(GiB)": 34.88, "step": 45650, "train_speed(iter/s)": 0.412946 }, { "acc": 0.89186811, "epoch": 1.2361573660412097, "grad_norm": 10.77775764465332, "learning_rate": 8.275145123137421e-06, "loss": 0.5730485, "memory(GiB)": 34.88, "step": 45655, "train_speed(iter/s)": 0.412948 }, { "acc": 0.87455406, "epoch": 1.2362927463244253, "grad_norm": 13.18262004852295, "learning_rate": 8.274722277508799e-06, "loss": 0.70891323, "memory(GiB)": 34.88, "step": 45660, "train_speed(iter/s)": 0.41295 }, { "acc": 0.90621948, "epoch": 1.236428126607641, "grad_norm": 7.475658893585205, "learning_rate": 8.274299390864108e-06, "loss": 0.46795368, "memory(GiB)": 34.88, "step": 45665, "train_speed(iter/s)": 0.412953 }, { "acc": 0.89228249, "epoch": 1.2365635068908565, "grad_norm": 8.303881645202637, "learning_rate": 8.273876463208646e-06, "loss": 0.53292131, "memory(GiB)": 34.88, "step": 45670, "train_speed(iter/s)": 0.412954 }, { "acc": 0.88910789, "epoch": 1.236698887174072, "grad_norm": 6.843653202056885, "learning_rate": 8.273453494547714e-06, "loss": 0.57757239, "memory(GiB)": 34.88, "step": 45675, "train_speed(iter/s)": 0.412956 }, { "acc": 0.92530746, "epoch": 1.2368342674572874, "grad_norm": 5.4569315910339355, "learning_rate": 8.273030484886607e-06, "loss": 0.37431154, "memory(GiB)": 34.88, "step": 45680, "train_speed(iter/s)": 0.412958 }, { "acc": 0.88851805, "epoch": 1.236969647740503, "grad_norm": 14.604867935180664, "learning_rate": 8.272607434230626e-06, "loss": 0.69194989, "memory(GiB)": 34.88, "step": 45685, "train_speed(iter/s)": 0.41296 }, { "acc": 0.87465315, "epoch": 1.2371050280237186, "grad_norm": 8.396125793457031, "learning_rate": 8.272184342585067e-06, "loss": 0.64301672, "memory(GiB)": 34.88, "step": 45690, "train_speed(iter/s)": 0.412962 }, { "acc": 0.88931789, "epoch": 1.2372404083069342, "grad_norm": 4.297941207885742, "learning_rate": 8.271761209955235e-06, "loss": 0.59360666, "memory(GiB)": 34.88, "step": 45695, "train_speed(iter/s)": 0.412964 }, { "acc": 0.89016666, "epoch": 1.2373757885901497, "grad_norm": 8.652633666992188, "learning_rate": 8.271338036346428e-06, "loss": 0.53862576, "memory(GiB)": 34.88, "step": 45700, "train_speed(iter/s)": 0.412966 }, { "acc": 0.90776806, "epoch": 1.2375111688733653, "grad_norm": 6.790056228637695, "learning_rate": 8.270914821763948e-06, "loss": 0.53078742, "memory(GiB)": 34.88, "step": 45705, "train_speed(iter/s)": 0.412968 }, { "acc": 0.89413242, "epoch": 1.237646549156581, "grad_norm": 9.651641845703125, "learning_rate": 8.270491566213094e-06, "loss": 0.5401803, "memory(GiB)": 34.88, "step": 45710, "train_speed(iter/s)": 0.412971 }, { "acc": 0.89648895, "epoch": 1.2377819294397965, "grad_norm": 4.731078624725342, "learning_rate": 8.270068269699173e-06, "loss": 0.5610054, "memory(GiB)": 34.88, "step": 45715, "train_speed(iter/s)": 0.412972 }, { "acc": 0.89792604, "epoch": 1.237917309723012, "grad_norm": 6.718957901000977, "learning_rate": 8.269644932227484e-06, "loss": 0.56870284, "memory(GiB)": 34.88, "step": 45720, "train_speed(iter/s)": 0.412974 }, { "acc": 0.87510586, "epoch": 1.2380526900062274, "grad_norm": 12.004551887512207, "learning_rate": 8.269221553803329e-06, "loss": 0.62751899, "memory(GiB)": 34.88, "step": 45725, "train_speed(iter/s)": 0.412975 }, { "acc": 0.8826725, "epoch": 1.238188070289443, "grad_norm": 8.127464294433594, "learning_rate": 8.268798134432014e-06, "loss": 0.60708847, "memory(GiB)": 34.88, "step": 45730, "train_speed(iter/s)": 0.412977 }, { "acc": 0.90029526, "epoch": 1.2383234505726586, "grad_norm": 13.833662986755371, "learning_rate": 8.268374674118843e-06, "loss": 0.52720799, "memory(GiB)": 34.88, "step": 45735, "train_speed(iter/s)": 0.412978 }, { "acc": 0.90398064, "epoch": 1.2384588308558742, "grad_norm": 8.891571998596191, "learning_rate": 8.26795117286912e-06, "loss": 0.57433186, "memory(GiB)": 34.88, "step": 45740, "train_speed(iter/s)": 0.41298 }, { "acc": 0.90229597, "epoch": 1.2385942111390897, "grad_norm": 12.197493553161621, "learning_rate": 8.267527630688153e-06, "loss": 0.45839472, "memory(GiB)": 34.88, "step": 45745, "train_speed(iter/s)": 0.412983 }, { "acc": 0.89515057, "epoch": 1.2387295914223053, "grad_norm": 8.247386932373047, "learning_rate": 8.267104047581242e-06, "loss": 0.5187007, "memory(GiB)": 34.88, "step": 45750, "train_speed(iter/s)": 0.412984 }, { "acc": 0.88720341, "epoch": 1.238864971705521, "grad_norm": 4.321102142333984, "learning_rate": 8.266680423553698e-06, "loss": 0.55324154, "memory(GiB)": 34.88, "step": 45755, "train_speed(iter/s)": 0.412986 }, { "acc": 0.87994089, "epoch": 1.2390003519887363, "grad_norm": 7.281996726989746, "learning_rate": 8.266256758610826e-06, "loss": 0.65984259, "memory(GiB)": 34.88, "step": 45760, "train_speed(iter/s)": 0.412988 }, { "acc": 0.89680204, "epoch": 1.2391357322719518, "grad_norm": 15.50943374633789, "learning_rate": 8.265833052757933e-06, "loss": 0.56839848, "memory(GiB)": 34.88, "step": 45765, "train_speed(iter/s)": 0.41299 }, { "acc": 0.87596817, "epoch": 1.2392711125551674, "grad_norm": 12.882950782775879, "learning_rate": 8.265409306000327e-06, "loss": 0.60350847, "memory(GiB)": 34.88, "step": 45770, "train_speed(iter/s)": 0.412993 }, { "acc": 0.91276913, "epoch": 1.239406492838383, "grad_norm": 5.684508800506592, "learning_rate": 8.264985518343317e-06, "loss": 0.51038408, "memory(GiB)": 34.88, "step": 45775, "train_speed(iter/s)": 0.412995 }, { "acc": 0.89635077, "epoch": 1.2395418731215986, "grad_norm": 5.5617289543151855, "learning_rate": 8.264561689792211e-06, "loss": 0.49068813, "memory(GiB)": 34.88, "step": 45780, "train_speed(iter/s)": 0.412997 }, { "acc": 0.89039106, "epoch": 1.2396772534048142, "grad_norm": 13.02244758605957, "learning_rate": 8.26413782035232e-06, "loss": 0.62356753, "memory(GiB)": 34.88, "step": 45785, "train_speed(iter/s)": 0.412999 }, { "acc": 0.88836899, "epoch": 1.2398126336880297, "grad_norm": 6.538303375244141, "learning_rate": 8.26371391002895e-06, "loss": 0.56860542, "memory(GiB)": 34.88, "step": 45790, "train_speed(iter/s)": 0.413002 }, { "acc": 0.8886179, "epoch": 1.2399480139712453, "grad_norm": 11.46229362487793, "learning_rate": 8.263289958827413e-06, "loss": 0.65362978, "memory(GiB)": 34.88, "step": 45795, "train_speed(iter/s)": 0.413004 }, { "acc": 0.88371201, "epoch": 1.240083394254461, "grad_norm": 10.2171049118042, "learning_rate": 8.262865966753024e-06, "loss": 0.52498837, "memory(GiB)": 34.88, "step": 45800, "train_speed(iter/s)": 0.413006 }, { "acc": 0.8912508, "epoch": 1.2402187745376763, "grad_norm": 7.500174045562744, "learning_rate": 8.262441933811086e-06, "loss": 0.57180996, "memory(GiB)": 34.88, "step": 45805, "train_speed(iter/s)": 0.413008 }, { "acc": 0.90708714, "epoch": 1.2403541548208918, "grad_norm": 6.255893707275391, "learning_rate": 8.262017860006918e-06, "loss": 0.50827856, "memory(GiB)": 34.88, "step": 45810, "train_speed(iter/s)": 0.41301 }, { "acc": 0.89448547, "epoch": 1.2404895351041074, "grad_norm": 9.156390190124512, "learning_rate": 8.261593745345832e-06, "loss": 0.55797863, "memory(GiB)": 34.88, "step": 45815, "train_speed(iter/s)": 0.413012 }, { "acc": 0.88609524, "epoch": 1.240624915387323, "grad_norm": 6.206665515899658, "learning_rate": 8.261169589833136e-06, "loss": 0.56087089, "memory(GiB)": 34.88, "step": 45820, "train_speed(iter/s)": 0.413014 }, { "acc": 0.88699551, "epoch": 1.2407602956705386, "grad_norm": 30.401212692260742, "learning_rate": 8.260745393474146e-06, "loss": 0.56143351, "memory(GiB)": 34.88, "step": 45825, "train_speed(iter/s)": 0.413017 }, { "acc": 0.89886856, "epoch": 1.2408956759537542, "grad_norm": 5.706109046936035, "learning_rate": 8.260321156274177e-06, "loss": 0.51169925, "memory(GiB)": 34.88, "step": 45830, "train_speed(iter/s)": 0.413019 }, { "acc": 0.89065857, "epoch": 1.2410310562369697, "grad_norm": 4.464654922485352, "learning_rate": 8.259896878238543e-06, "loss": 0.52725697, "memory(GiB)": 34.88, "step": 45835, "train_speed(iter/s)": 0.413021 }, { "acc": 0.907409, "epoch": 1.241166436520185, "grad_norm": 4.0684733390808105, "learning_rate": 8.259472559372559e-06, "loss": 0.46952915, "memory(GiB)": 34.88, "step": 45840, "train_speed(iter/s)": 0.413023 }, { "acc": 0.8850687, "epoch": 1.2413018168034007, "grad_norm": 18.93104362487793, "learning_rate": 8.259048199681539e-06, "loss": 0.58016663, "memory(GiB)": 34.88, "step": 45845, "train_speed(iter/s)": 0.413025 }, { "acc": 0.90805349, "epoch": 1.2414371970866163, "grad_norm": 5.357059001922607, "learning_rate": 8.258623799170797e-06, "loss": 0.43369322, "memory(GiB)": 34.88, "step": 45850, "train_speed(iter/s)": 0.413027 }, { "acc": 0.89462948, "epoch": 1.2415725773698318, "grad_norm": 10.854412078857422, "learning_rate": 8.258199357845656e-06, "loss": 0.54230375, "memory(GiB)": 34.88, "step": 45855, "train_speed(iter/s)": 0.413029 }, { "acc": 0.89072504, "epoch": 1.2417079576530474, "grad_norm": 59.59682846069336, "learning_rate": 8.257774875711428e-06, "loss": 0.6281743, "memory(GiB)": 34.88, "step": 45860, "train_speed(iter/s)": 0.413031 }, { "acc": 0.8978363, "epoch": 1.241843337936263, "grad_norm": 3.743431329727173, "learning_rate": 8.257350352773432e-06, "loss": 0.48219986, "memory(GiB)": 34.88, "step": 45865, "train_speed(iter/s)": 0.413034 }, { "acc": 0.90170259, "epoch": 1.2419787182194786, "grad_norm": 5.653620719909668, "learning_rate": 8.256925789036985e-06, "loss": 0.43653193, "memory(GiB)": 34.88, "step": 45870, "train_speed(iter/s)": 0.413036 }, { "acc": 0.88202038, "epoch": 1.2421140985026942, "grad_norm": 7.374666690826416, "learning_rate": 8.256501184507405e-06, "loss": 0.56597424, "memory(GiB)": 34.88, "step": 45875, "train_speed(iter/s)": 0.413038 }, { "acc": 0.89120922, "epoch": 1.2422494787859097, "grad_norm": 12.560218811035156, "learning_rate": 8.256076539190015e-06, "loss": 0.53162012, "memory(GiB)": 34.88, "step": 45880, "train_speed(iter/s)": 0.413041 }, { "acc": 0.90230942, "epoch": 1.242384859069125, "grad_norm": 7.763488292694092, "learning_rate": 8.255651853090128e-06, "loss": 0.53899837, "memory(GiB)": 34.88, "step": 45885, "train_speed(iter/s)": 0.413043 }, { "acc": 0.90408077, "epoch": 1.2425202393523407, "grad_norm": 11.124801635742188, "learning_rate": 8.255227126213072e-06, "loss": 0.51720686, "memory(GiB)": 34.88, "step": 45890, "train_speed(iter/s)": 0.413045 }, { "acc": 0.89302855, "epoch": 1.2426556196355563, "grad_norm": 8.045431137084961, "learning_rate": 8.25480235856416e-06, "loss": 0.56991482, "memory(GiB)": 34.88, "step": 45895, "train_speed(iter/s)": 0.413047 }, { "acc": 0.88333492, "epoch": 1.2427909999187718, "grad_norm": 11.19533920288086, "learning_rate": 8.254377550148716e-06, "loss": 0.57498808, "memory(GiB)": 34.88, "step": 45900, "train_speed(iter/s)": 0.413049 }, { "acc": 0.86763382, "epoch": 1.2429263802019874, "grad_norm": 8.978872299194336, "learning_rate": 8.253952700972064e-06, "loss": 0.66848869, "memory(GiB)": 34.88, "step": 45905, "train_speed(iter/s)": 0.413051 }, { "acc": 0.90217524, "epoch": 1.243061760485203, "grad_norm": 7.758056640625, "learning_rate": 8.253527811039524e-06, "loss": 0.48016849, "memory(GiB)": 34.88, "step": 45910, "train_speed(iter/s)": 0.413054 }, { "acc": 0.88341599, "epoch": 1.2431971407684186, "grad_norm": 7.683410167694092, "learning_rate": 8.253102880356417e-06, "loss": 0.55063787, "memory(GiB)": 34.88, "step": 45915, "train_speed(iter/s)": 0.413056 }, { "acc": 0.88931751, "epoch": 1.243332521051634, "grad_norm": 9.073358535766602, "learning_rate": 8.252677908928067e-06, "loss": 0.56301489, "memory(GiB)": 34.88, "step": 45920, "train_speed(iter/s)": 0.413058 }, { "acc": 0.88894577, "epoch": 1.2434679013348495, "grad_norm": 7.360020637512207, "learning_rate": 8.2522528967598e-06, "loss": 0.59546041, "memory(GiB)": 34.88, "step": 45925, "train_speed(iter/s)": 0.41306 }, { "acc": 0.89915485, "epoch": 1.243603281618065, "grad_norm": 5.5978169441223145, "learning_rate": 8.251827843856936e-06, "loss": 0.45811682, "memory(GiB)": 34.88, "step": 45930, "train_speed(iter/s)": 0.413063 }, { "acc": 0.88751841, "epoch": 1.2437386619012807, "grad_norm": 13.553837776184082, "learning_rate": 8.251402750224805e-06, "loss": 0.67630606, "memory(GiB)": 34.88, "step": 45935, "train_speed(iter/s)": 0.413064 }, { "acc": 0.90751839, "epoch": 1.2438740421844963, "grad_norm": 7.1922149658203125, "learning_rate": 8.250977615868726e-06, "loss": 0.52667217, "memory(GiB)": 34.88, "step": 45940, "train_speed(iter/s)": 0.413067 }, { "acc": 0.89107513, "epoch": 1.2440094224677118, "grad_norm": 7.030262470245361, "learning_rate": 8.25055244079403e-06, "loss": 0.59459028, "memory(GiB)": 34.88, "step": 45945, "train_speed(iter/s)": 0.413069 }, { "acc": 0.87194109, "epoch": 1.2441448027509274, "grad_norm": 8.735325813293457, "learning_rate": 8.25012722500604e-06, "loss": 0.69393296, "memory(GiB)": 34.88, "step": 45950, "train_speed(iter/s)": 0.413071 }, { "acc": 0.87950974, "epoch": 1.2442801830341428, "grad_norm": 10.432391166687012, "learning_rate": 8.249701968510084e-06, "loss": 0.62875018, "memory(GiB)": 34.88, "step": 45955, "train_speed(iter/s)": 0.413074 }, { "acc": 0.89190731, "epoch": 1.2444155633173584, "grad_norm": 8.243121147155762, "learning_rate": 8.249276671311487e-06, "loss": 0.58183384, "memory(GiB)": 34.88, "step": 45960, "train_speed(iter/s)": 0.413076 }, { "acc": 0.89720287, "epoch": 1.244550943600574, "grad_norm": 12.093396186828613, "learning_rate": 8.24885133341558e-06, "loss": 0.5873188, "memory(GiB)": 34.88, "step": 45965, "train_speed(iter/s)": 0.413078 }, { "acc": 0.87905922, "epoch": 1.2446863238837895, "grad_norm": 6.145834922790527, "learning_rate": 8.248425954827689e-06, "loss": 0.63733444, "memory(GiB)": 34.88, "step": 45970, "train_speed(iter/s)": 0.41308 }, { "acc": 0.89386883, "epoch": 1.244821704167005, "grad_norm": 6.078123092651367, "learning_rate": 8.248000535553143e-06, "loss": 0.58609142, "memory(GiB)": 34.88, "step": 45975, "train_speed(iter/s)": 0.413082 }, { "acc": 0.88945751, "epoch": 1.2449570844502207, "grad_norm": 6.620100021362305, "learning_rate": 8.247575075597273e-06, "loss": 0.60056119, "memory(GiB)": 34.88, "step": 45980, "train_speed(iter/s)": 0.413085 }, { "acc": 0.88739758, "epoch": 1.2450924647334363, "grad_norm": 8.046758651733398, "learning_rate": 8.247149574965406e-06, "loss": 0.62217627, "memory(GiB)": 34.88, "step": 45985, "train_speed(iter/s)": 0.413087 }, { "acc": 0.88131495, "epoch": 1.2452278450166518, "grad_norm": 12.799596786499023, "learning_rate": 8.246724033662872e-06, "loss": 0.52541795, "memory(GiB)": 34.88, "step": 45990, "train_speed(iter/s)": 0.413089 }, { "acc": 0.89466267, "epoch": 1.2453632252998674, "grad_norm": 7.034102916717529, "learning_rate": 8.246298451695005e-06, "loss": 0.64414907, "memory(GiB)": 34.88, "step": 45995, "train_speed(iter/s)": 0.413091 }, { "acc": 0.89752493, "epoch": 1.2454986055830828, "grad_norm": 6.8545403480529785, "learning_rate": 8.245872829067135e-06, "loss": 0.53314953, "memory(GiB)": 34.88, "step": 46000, "train_speed(iter/s)": 0.413093 }, { "acc": 0.87483273, "epoch": 1.2456339858662984, "grad_norm": 9.30691146850586, "learning_rate": 8.245447165784591e-06, "loss": 0.60719476, "memory(GiB)": 34.88, "step": 46005, "train_speed(iter/s)": 0.413095 }, { "acc": 0.90192175, "epoch": 1.245769366149514, "grad_norm": 14.879396438598633, "learning_rate": 8.245021461852708e-06, "loss": 0.53375201, "memory(GiB)": 34.88, "step": 46010, "train_speed(iter/s)": 0.413097 }, { "acc": 0.90788069, "epoch": 1.2459047464327295, "grad_norm": 8.128730773925781, "learning_rate": 8.244595717276819e-06, "loss": 0.43165612, "memory(GiB)": 34.88, "step": 46015, "train_speed(iter/s)": 0.413099 }, { "acc": 0.86325207, "epoch": 1.246040126715945, "grad_norm": 9.588441848754883, "learning_rate": 8.244169932062256e-06, "loss": 0.79682465, "memory(GiB)": 34.88, "step": 46020, "train_speed(iter/s)": 0.413101 }, { "acc": 0.88806458, "epoch": 1.2461755069991607, "grad_norm": 11.058107376098633, "learning_rate": 8.243744106214354e-06, "loss": 0.73675532, "memory(GiB)": 34.88, "step": 46025, "train_speed(iter/s)": 0.413103 }, { "acc": 0.89410181, "epoch": 1.2463108872823763, "grad_norm": 6.754613876342773, "learning_rate": 8.243318239738448e-06, "loss": 0.51809983, "memory(GiB)": 34.88, "step": 46030, "train_speed(iter/s)": 0.413106 }, { "acc": 0.89581842, "epoch": 1.2464462675655916, "grad_norm": 5.856297492980957, "learning_rate": 8.242892332639867e-06, "loss": 0.52436132, "memory(GiB)": 34.88, "step": 46035, "train_speed(iter/s)": 0.413108 }, { "acc": 0.89280062, "epoch": 1.2465816478488072, "grad_norm": 13.954330444335938, "learning_rate": 8.242466384923955e-06, "loss": 0.58742785, "memory(GiB)": 34.88, "step": 46040, "train_speed(iter/s)": 0.41311 }, { "acc": 0.88760433, "epoch": 1.2467170281320228, "grad_norm": 7.6855292320251465, "learning_rate": 8.24204039659604e-06, "loss": 0.57278595, "memory(GiB)": 34.88, "step": 46045, "train_speed(iter/s)": 0.413112 }, { "acc": 0.8858099, "epoch": 1.2468524084152384, "grad_norm": 22.819793701171875, "learning_rate": 8.241614367661465e-06, "loss": 0.59398251, "memory(GiB)": 34.88, "step": 46050, "train_speed(iter/s)": 0.413115 }, { "acc": 0.89306126, "epoch": 1.246987788698454, "grad_norm": 5.540124893188477, "learning_rate": 8.241188298125562e-06, "loss": 0.51466808, "memory(GiB)": 34.88, "step": 46055, "train_speed(iter/s)": 0.413116 }, { "acc": 0.91531935, "epoch": 1.2471231689816695, "grad_norm": 7.377894401550293, "learning_rate": 8.24076218799367e-06, "loss": 0.40840697, "memory(GiB)": 34.88, "step": 46060, "train_speed(iter/s)": 0.413118 }, { "acc": 0.89897327, "epoch": 1.247258549264885, "grad_norm": 5.486326694488525, "learning_rate": 8.240336037271128e-06, "loss": 0.46196442, "memory(GiB)": 34.88, "step": 46065, "train_speed(iter/s)": 0.413121 }, { "acc": 0.90986862, "epoch": 1.2473939295481007, "grad_norm": 8.52928638458252, "learning_rate": 8.239909845963274e-06, "loss": 0.53736973, "memory(GiB)": 34.88, "step": 46070, "train_speed(iter/s)": 0.413123 }, { "acc": 0.90230293, "epoch": 1.2475293098313163, "grad_norm": 3.839334726333618, "learning_rate": 8.239483614075444e-06, "loss": 0.42428017, "memory(GiB)": 34.88, "step": 46075, "train_speed(iter/s)": 0.413125 }, { "acc": 0.89368839, "epoch": 1.2476646901145316, "grad_norm": 13.102836608886719, "learning_rate": 8.239057341612981e-06, "loss": 0.61128993, "memory(GiB)": 34.88, "step": 46080, "train_speed(iter/s)": 0.413127 }, { "acc": 0.90018215, "epoch": 1.2478000703977472, "grad_norm": 7.681959629058838, "learning_rate": 8.238631028581224e-06, "loss": 0.50437398, "memory(GiB)": 34.88, "step": 46085, "train_speed(iter/s)": 0.413129 }, { "acc": 0.90974617, "epoch": 1.2479354506809628, "grad_norm": 6.419723033905029, "learning_rate": 8.238204674985513e-06, "loss": 0.45402188, "memory(GiB)": 34.88, "step": 46090, "train_speed(iter/s)": 0.413131 }, { "acc": 0.89635906, "epoch": 1.2480708309641784, "grad_norm": 8.96529483795166, "learning_rate": 8.237778280831188e-06, "loss": 0.51576996, "memory(GiB)": 34.88, "step": 46095, "train_speed(iter/s)": 0.413133 }, { "acc": 0.9029377, "epoch": 1.248206211247394, "grad_norm": 7.638579368591309, "learning_rate": 8.237351846123592e-06, "loss": 0.4755744, "memory(GiB)": 34.88, "step": 46100, "train_speed(iter/s)": 0.413135 }, { "acc": 0.88973103, "epoch": 1.2483415915306095, "grad_norm": 7.0565924644470215, "learning_rate": 8.236925370868066e-06, "loss": 0.58397074, "memory(GiB)": 34.88, "step": 46105, "train_speed(iter/s)": 0.413138 }, { "acc": 0.8774704, "epoch": 1.248476971813825, "grad_norm": 7.897924423217773, "learning_rate": 8.236498855069953e-06, "loss": 0.61256905, "memory(GiB)": 34.88, "step": 46110, "train_speed(iter/s)": 0.41314 }, { "acc": 0.87545795, "epoch": 1.2486123520970405, "grad_norm": 12.088837623596191, "learning_rate": 8.236072298734594e-06, "loss": 0.70328007, "memory(GiB)": 34.88, "step": 46115, "train_speed(iter/s)": 0.413142 }, { "acc": 0.90731049, "epoch": 1.248747732380256, "grad_norm": 6.950376510620117, "learning_rate": 8.235645701867336e-06, "loss": 0.44119639, "memory(GiB)": 34.88, "step": 46120, "train_speed(iter/s)": 0.413144 }, { "acc": 0.90365181, "epoch": 1.2488831126634716, "grad_norm": 12.026183128356934, "learning_rate": 8.23521906447352e-06, "loss": 0.50206022, "memory(GiB)": 34.88, "step": 46125, "train_speed(iter/s)": 0.413146 }, { "acc": 0.88633137, "epoch": 1.2490184929466872, "grad_norm": 4.707333564758301, "learning_rate": 8.23479238655849e-06, "loss": 0.59618688, "memory(GiB)": 34.88, "step": 46130, "train_speed(iter/s)": 0.413148 }, { "acc": 0.87374468, "epoch": 1.2491538732299028, "grad_norm": 16.49530601501465, "learning_rate": 8.234365668127596e-06, "loss": 0.69026561, "memory(GiB)": 34.88, "step": 46135, "train_speed(iter/s)": 0.41315 }, { "acc": 0.90505838, "epoch": 1.2492892535131184, "grad_norm": 11.23263168334961, "learning_rate": 8.233938909186178e-06, "loss": 0.47944326, "memory(GiB)": 34.88, "step": 46140, "train_speed(iter/s)": 0.413152 }, { "acc": 0.91047935, "epoch": 1.249424633796334, "grad_norm": 4.033268451690674, "learning_rate": 8.233512109739582e-06, "loss": 0.41999788, "memory(GiB)": 34.88, "step": 46145, "train_speed(iter/s)": 0.413154 }, { "acc": 0.89506245, "epoch": 1.2495600140795495, "grad_norm": 10.607429504394531, "learning_rate": 8.233085269793161e-06, "loss": 0.6289834, "memory(GiB)": 34.88, "step": 46150, "train_speed(iter/s)": 0.413157 }, { "acc": 0.89565582, "epoch": 1.249695394362765, "grad_norm": 2.8635365962982178, "learning_rate": 8.232658389352254e-06, "loss": 0.5632195, "memory(GiB)": 34.88, "step": 46155, "train_speed(iter/s)": 0.413159 }, { "acc": 0.89760256, "epoch": 1.2498307746459805, "grad_norm": 8.032485961914062, "learning_rate": 8.232231468422214e-06, "loss": 0.51807804, "memory(GiB)": 34.88, "step": 46160, "train_speed(iter/s)": 0.413161 }, { "acc": 0.90118008, "epoch": 1.249966154929196, "grad_norm": 7.654090881347656, "learning_rate": 8.231804507008387e-06, "loss": 0.57239952, "memory(GiB)": 34.88, "step": 46165, "train_speed(iter/s)": 0.413163 }, { "acc": 0.9026289, "epoch": 1.2501015352124116, "grad_norm": 8.692169189453125, "learning_rate": 8.231377505116119e-06, "loss": 0.52042446, "memory(GiB)": 34.88, "step": 46170, "train_speed(iter/s)": 0.413165 }, { "acc": 0.88999205, "epoch": 1.2502369154956272, "grad_norm": 9.08998966217041, "learning_rate": 8.230950462750765e-06, "loss": 0.57393565, "memory(GiB)": 34.88, "step": 46175, "train_speed(iter/s)": 0.413167 }, { "acc": 0.88960495, "epoch": 1.2503722957788428, "grad_norm": 9.515687942504883, "learning_rate": 8.230523379917668e-06, "loss": 0.56220617, "memory(GiB)": 34.88, "step": 46180, "train_speed(iter/s)": 0.413169 }, { "acc": 0.89601974, "epoch": 1.2505076760620584, "grad_norm": 18.828371047973633, "learning_rate": 8.230096256622184e-06, "loss": 0.47952499, "memory(GiB)": 34.88, "step": 46185, "train_speed(iter/s)": 0.413172 }, { "acc": 0.87447834, "epoch": 1.250643056345274, "grad_norm": 10.151407241821289, "learning_rate": 8.229669092869658e-06, "loss": 0.61123123, "memory(GiB)": 34.88, "step": 46190, "train_speed(iter/s)": 0.413174 }, { "acc": 0.89165392, "epoch": 1.2507784366284893, "grad_norm": 14.698942184448242, "learning_rate": 8.229241888665445e-06, "loss": 0.52657413, "memory(GiB)": 34.88, "step": 46195, "train_speed(iter/s)": 0.413176 }, { "acc": 0.88238201, "epoch": 1.2509138169117051, "grad_norm": 9.321390151977539, "learning_rate": 8.228814644014896e-06, "loss": 0.68037233, "memory(GiB)": 34.88, "step": 46200, "train_speed(iter/s)": 0.413178 }, { "acc": 0.8755188, "epoch": 1.2510491971949205, "grad_norm": 14.66555118560791, "learning_rate": 8.228387358923362e-06, "loss": 0.61002154, "memory(GiB)": 34.88, "step": 46205, "train_speed(iter/s)": 0.413181 }, { "acc": 0.91096067, "epoch": 1.251184577478136, "grad_norm": 10.599218368530273, "learning_rate": 8.227960033396195e-06, "loss": 0.48775582, "memory(GiB)": 34.88, "step": 46210, "train_speed(iter/s)": 0.413183 }, { "acc": 0.91174221, "epoch": 1.2513199577613516, "grad_norm": 10.7135591506958, "learning_rate": 8.22753266743875e-06, "loss": 0.44161472, "memory(GiB)": 34.88, "step": 46215, "train_speed(iter/s)": 0.413185 }, { "acc": 0.87836342, "epoch": 1.2514553380445672, "grad_norm": 18.195087432861328, "learning_rate": 8.227105261056381e-06, "loss": 0.69367704, "memory(GiB)": 34.88, "step": 46220, "train_speed(iter/s)": 0.413187 }, { "acc": 0.89755259, "epoch": 1.2515907183277828, "grad_norm": 6.580015659332275, "learning_rate": 8.226677814254438e-06, "loss": 0.54894209, "memory(GiB)": 34.88, "step": 46225, "train_speed(iter/s)": 0.413189 }, { "acc": 0.89521046, "epoch": 1.2517260986109984, "grad_norm": 6.165887832641602, "learning_rate": 8.226250327038282e-06, "loss": 0.53655052, "memory(GiB)": 34.88, "step": 46230, "train_speed(iter/s)": 0.413192 }, { "acc": 0.89284201, "epoch": 1.251861478894214, "grad_norm": 4.53320837020874, "learning_rate": 8.225822799413262e-06, "loss": 0.57763839, "memory(GiB)": 34.88, "step": 46235, "train_speed(iter/s)": 0.413193 }, { "acc": 0.87855911, "epoch": 1.2519968591774293, "grad_norm": 10.211610794067383, "learning_rate": 8.225395231384738e-06, "loss": 0.72229733, "memory(GiB)": 34.88, "step": 46240, "train_speed(iter/s)": 0.413195 }, { "acc": 0.87943077, "epoch": 1.252132239460645, "grad_norm": 5.80930233001709, "learning_rate": 8.224967622958062e-06, "loss": 0.70052958, "memory(GiB)": 34.88, "step": 46245, "train_speed(iter/s)": 0.413198 }, { "acc": 0.896626, "epoch": 1.2522676197438605, "grad_norm": 7.978838920593262, "learning_rate": 8.224539974138593e-06, "loss": 0.52522359, "memory(GiB)": 34.88, "step": 46250, "train_speed(iter/s)": 0.4132 }, { "acc": 0.89611912, "epoch": 1.252403000027076, "grad_norm": 7.726938724517822, "learning_rate": 8.22411228493169e-06, "loss": 0.56244068, "memory(GiB)": 34.88, "step": 46255, "train_speed(iter/s)": 0.413202 }, { "acc": 0.8881794, "epoch": 1.2525383803102916, "grad_norm": 8.636898040771484, "learning_rate": 8.223684555342708e-06, "loss": 0.62473221, "memory(GiB)": 34.88, "step": 46260, "train_speed(iter/s)": 0.413204 }, { "acc": 0.89875517, "epoch": 1.2526737605935072, "grad_norm": 8.753504753112793, "learning_rate": 8.223256785377007e-06, "loss": 0.51120663, "memory(GiB)": 34.88, "step": 46265, "train_speed(iter/s)": 0.413207 }, { "acc": 0.89740353, "epoch": 1.2528091408767228, "grad_norm": 9.602381706237793, "learning_rate": 8.222828975039943e-06, "loss": 0.54240599, "memory(GiB)": 34.88, "step": 46270, "train_speed(iter/s)": 0.413209 }, { "acc": 0.88757782, "epoch": 1.2529445211599382, "grad_norm": 8.09627628326416, "learning_rate": 8.222401124336877e-06, "loss": 0.6678483, "memory(GiB)": 34.88, "step": 46275, "train_speed(iter/s)": 0.413211 }, { "acc": 0.88193016, "epoch": 1.253079901443154, "grad_norm": 12.871307373046875, "learning_rate": 8.22197323327317e-06, "loss": 0.60535092, "memory(GiB)": 34.88, "step": 46280, "train_speed(iter/s)": 0.413213 }, { "acc": 0.87951803, "epoch": 1.2532152817263693, "grad_norm": 19.227313995361328, "learning_rate": 8.221545301854178e-06, "loss": 0.61097546, "memory(GiB)": 34.88, "step": 46285, "train_speed(iter/s)": 0.413215 }, { "acc": 0.90284433, "epoch": 1.253350662009585, "grad_norm": 7.91107702255249, "learning_rate": 8.221117330085265e-06, "loss": 0.54071827, "memory(GiB)": 34.88, "step": 46290, "train_speed(iter/s)": 0.413217 }, { "acc": 0.87880268, "epoch": 1.2534860422928005, "grad_norm": 7.949373245239258, "learning_rate": 8.220689317971792e-06, "loss": 0.58655643, "memory(GiB)": 34.88, "step": 46295, "train_speed(iter/s)": 0.413219 }, { "acc": 0.8903616, "epoch": 1.253621422576016, "grad_norm": 4.474173069000244, "learning_rate": 8.22026126551912e-06, "loss": 0.5830348, "memory(GiB)": 34.88, "step": 46300, "train_speed(iter/s)": 0.413222 }, { "acc": 0.87556391, "epoch": 1.2537568028592316, "grad_norm": 8.220227241516113, "learning_rate": 8.219833172732612e-06, "loss": 0.72505112, "memory(GiB)": 34.88, "step": 46305, "train_speed(iter/s)": 0.413224 }, { "acc": 0.8832119, "epoch": 1.2538921831424472, "grad_norm": 13.244056701660156, "learning_rate": 8.219405039617628e-06, "loss": 0.66655607, "memory(GiB)": 34.88, "step": 46310, "train_speed(iter/s)": 0.413226 }, { "acc": 0.89191713, "epoch": 1.2540275634256628, "grad_norm": 13.49150562286377, "learning_rate": 8.218976866179536e-06, "loss": 0.51777124, "memory(GiB)": 34.88, "step": 46315, "train_speed(iter/s)": 0.413228 }, { "acc": 0.89721851, "epoch": 1.2541629437088782, "grad_norm": 12.42317008972168, "learning_rate": 8.218548652423694e-06, "loss": 0.59018373, "memory(GiB)": 34.88, "step": 46320, "train_speed(iter/s)": 0.41323 }, { "acc": 0.89935465, "epoch": 1.2542983239920937, "grad_norm": 9.310750961303711, "learning_rate": 8.21812039835547e-06, "loss": 0.51556048, "memory(GiB)": 34.88, "step": 46325, "train_speed(iter/s)": 0.413232 }, { "acc": 0.90657005, "epoch": 1.2544337042753093, "grad_norm": 5.352539539337158, "learning_rate": 8.217692103980229e-06, "loss": 0.49998498, "memory(GiB)": 34.88, "step": 46330, "train_speed(iter/s)": 0.413234 }, { "acc": 0.9056694, "epoch": 1.254569084558525, "grad_norm": 6.856354236602783, "learning_rate": 8.217263769303336e-06, "loss": 0.44140196, "memory(GiB)": 34.88, "step": 46335, "train_speed(iter/s)": 0.413237 }, { "acc": 0.88775826, "epoch": 1.2547044648417405, "grad_norm": 11.104561805725098, "learning_rate": 8.216835394330154e-06, "loss": 0.61112823, "memory(GiB)": 34.88, "step": 46340, "train_speed(iter/s)": 0.413239 }, { "acc": 0.84852448, "epoch": 1.254839845124956, "grad_norm": 12.10808277130127, "learning_rate": 8.216406979066052e-06, "loss": 0.87522812, "memory(GiB)": 34.88, "step": 46345, "train_speed(iter/s)": 0.413241 }, { "acc": 0.89695301, "epoch": 1.2549752254081716, "grad_norm": 13.99771785736084, "learning_rate": 8.215978523516394e-06, "loss": 0.55606451, "memory(GiB)": 34.88, "step": 46350, "train_speed(iter/s)": 0.413243 }, { "acc": 0.90180244, "epoch": 1.255110605691387, "grad_norm": 7.788909912109375, "learning_rate": 8.215550027686552e-06, "loss": 0.45810251, "memory(GiB)": 34.88, "step": 46355, "train_speed(iter/s)": 0.413245 }, { "acc": 0.90786572, "epoch": 1.2552459859746028, "grad_norm": 8.9488525390625, "learning_rate": 8.21512149158189e-06, "loss": 0.49305449, "memory(GiB)": 34.88, "step": 46360, "train_speed(iter/s)": 0.413247 }, { "acc": 0.88376904, "epoch": 1.2553813662578182, "grad_norm": 5.691237449645996, "learning_rate": 8.214692915207776e-06, "loss": 0.64025965, "memory(GiB)": 34.88, "step": 46365, "train_speed(iter/s)": 0.41325 }, { "acc": 0.87477379, "epoch": 1.2555167465410337, "grad_norm": 12.060750007629395, "learning_rate": 8.21426429856958e-06, "loss": 0.58322377, "memory(GiB)": 34.88, "step": 46370, "train_speed(iter/s)": 0.413251 }, { "acc": 0.8930006, "epoch": 1.2556521268242493, "grad_norm": 7.2179789543151855, "learning_rate": 8.213835641672672e-06, "loss": 0.63735838, "memory(GiB)": 34.88, "step": 46375, "train_speed(iter/s)": 0.413254 }, { "acc": 0.8995966, "epoch": 1.255787507107465, "grad_norm": 10.42602252960205, "learning_rate": 8.213406944522421e-06, "loss": 0.49793854, "memory(GiB)": 34.88, "step": 46380, "train_speed(iter/s)": 0.413256 }, { "acc": 0.88782282, "epoch": 1.2559228873906805, "grad_norm": 5.041364669799805, "learning_rate": 8.212978207124196e-06, "loss": 0.59839902, "memory(GiB)": 34.88, "step": 46385, "train_speed(iter/s)": 0.413258 }, { "acc": 0.89209919, "epoch": 1.256058267673896, "grad_norm": 11.011982917785645, "learning_rate": 8.212549429483369e-06, "loss": 0.58873882, "memory(GiB)": 34.88, "step": 46390, "train_speed(iter/s)": 0.41326 }, { "acc": 0.89910851, "epoch": 1.2561936479571116, "grad_norm": 21.1939697265625, "learning_rate": 8.212120611605313e-06, "loss": 0.524439, "memory(GiB)": 34.88, "step": 46395, "train_speed(iter/s)": 0.413263 }, { "acc": 0.88904266, "epoch": 1.256329028240327, "grad_norm": 6.762256145477295, "learning_rate": 8.211691753495395e-06, "loss": 0.55389795, "memory(GiB)": 34.88, "step": 46400, "train_speed(iter/s)": 0.413265 }, { "acc": 0.92302208, "epoch": 1.2564644085235426, "grad_norm": 4.775046348571777, "learning_rate": 8.211262855158993e-06, "loss": 0.40877256, "memory(GiB)": 34.88, "step": 46405, "train_speed(iter/s)": 0.413267 }, { "acc": 0.88512421, "epoch": 1.2565997888067582, "grad_norm": 11.467628479003906, "learning_rate": 8.210833916601476e-06, "loss": 0.64188852, "memory(GiB)": 34.88, "step": 46410, "train_speed(iter/s)": 0.413269 }, { "acc": 0.90373096, "epoch": 1.2567351690899737, "grad_norm": 12.87661361694336, "learning_rate": 8.21040493782822e-06, "loss": 0.48568621, "memory(GiB)": 34.88, "step": 46415, "train_speed(iter/s)": 0.413271 }, { "acc": 0.89781075, "epoch": 1.2568705493731893, "grad_norm": 2.4993600845336914, "learning_rate": 8.209975918844595e-06, "loss": 0.52094307, "memory(GiB)": 34.88, "step": 46420, "train_speed(iter/s)": 0.413273 }, { "acc": 0.87010984, "epoch": 1.257005929656405, "grad_norm": 6.726419925689697, "learning_rate": 8.209546859655978e-06, "loss": 0.70362329, "memory(GiB)": 34.88, "step": 46425, "train_speed(iter/s)": 0.413276 }, { "acc": 0.87496824, "epoch": 1.2571413099396205, "grad_norm": 9.557066917419434, "learning_rate": 8.209117760267745e-06, "loss": 0.62585478, "memory(GiB)": 34.88, "step": 46430, "train_speed(iter/s)": 0.413278 }, { "acc": 0.90095673, "epoch": 1.2572766902228358, "grad_norm": 7.926076889038086, "learning_rate": 8.208688620685268e-06, "loss": 0.53519435, "memory(GiB)": 34.88, "step": 46435, "train_speed(iter/s)": 0.41328 }, { "acc": 0.89863968, "epoch": 1.2574120705060514, "grad_norm": 5.784131050109863, "learning_rate": 8.208259440913924e-06, "loss": 0.59326239, "memory(GiB)": 34.88, "step": 46440, "train_speed(iter/s)": 0.413282 }, { "acc": 0.89545088, "epoch": 1.257547450789267, "grad_norm": 3.3359627723693848, "learning_rate": 8.20783022095909e-06, "loss": 0.47209506, "memory(GiB)": 34.88, "step": 46445, "train_speed(iter/s)": 0.413284 }, { "acc": 0.88252869, "epoch": 1.2576828310724826, "grad_norm": 11.527177810668945, "learning_rate": 8.207400960826142e-06, "loss": 0.62177663, "memory(GiB)": 34.88, "step": 46450, "train_speed(iter/s)": 0.413286 }, { "acc": 0.8958662, "epoch": 1.2578182113556982, "grad_norm": 6.2295918464660645, "learning_rate": 8.206971660520462e-06, "loss": 0.56141863, "memory(GiB)": 34.88, "step": 46455, "train_speed(iter/s)": 0.413288 }, { "acc": 0.90054426, "epoch": 1.2579535916389137, "grad_norm": 8.04110336303711, "learning_rate": 8.20654232004742e-06, "loss": 0.55209842, "memory(GiB)": 34.88, "step": 46460, "train_speed(iter/s)": 0.41329 }, { "acc": 0.88633051, "epoch": 1.2580889719221293, "grad_norm": 18.51313591003418, "learning_rate": 8.206112939412399e-06, "loss": 0.58608794, "memory(GiB)": 34.88, "step": 46465, "train_speed(iter/s)": 0.413293 }, { "acc": 0.90063038, "epoch": 1.2582243522053447, "grad_norm": 6.1955885887146, "learning_rate": 8.205683518620776e-06, "loss": 0.53874016, "memory(GiB)": 34.88, "step": 46470, "train_speed(iter/s)": 0.413294 }, { "acc": 0.88422737, "epoch": 1.2583597324885605, "grad_norm": 23.07221794128418, "learning_rate": 8.205254057677931e-06, "loss": 0.60626707, "memory(GiB)": 34.88, "step": 46475, "train_speed(iter/s)": 0.413297 }, { "acc": 0.87921829, "epoch": 1.2584951127717758, "grad_norm": 4.983969211578369, "learning_rate": 8.204824556589246e-06, "loss": 0.66320605, "memory(GiB)": 34.88, "step": 46480, "train_speed(iter/s)": 0.413299 }, { "acc": 0.88687115, "epoch": 1.2586304930549914, "grad_norm": 9.9083890914917, "learning_rate": 8.204395015360097e-06, "loss": 0.67045193, "memory(GiB)": 34.88, "step": 46485, "train_speed(iter/s)": 0.413301 }, { "acc": 0.89685526, "epoch": 1.258765873338207, "grad_norm": 11.658021926879883, "learning_rate": 8.20396543399587e-06, "loss": 0.51935391, "memory(GiB)": 34.88, "step": 46490, "train_speed(iter/s)": 0.413303 }, { "acc": 0.89114532, "epoch": 1.2589012536214226, "grad_norm": 9.456783294677734, "learning_rate": 8.203535812501943e-06, "loss": 0.58960724, "memory(GiB)": 34.88, "step": 46495, "train_speed(iter/s)": 0.413305 }, { "acc": 0.91106491, "epoch": 1.2590366339046382, "grad_norm": 6.940600872039795, "learning_rate": 8.203106150883698e-06, "loss": 0.41937814, "memory(GiB)": 34.88, "step": 46500, "train_speed(iter/s)": 0.413308 }, { "acc": 0.85701656, "epoch": 1.2591720141878537, "grad_norm": 13.526033401489258, "learning_rate": 8.202676449146518e-06, "loss": 0.81441498, "memory(GiB)": 34.88, "step": 46505, "train_speed(iter/s)": 0.41331 }, { "acc": 0.90647888, "epoch": 1.2593073944710693, "grad_norm": 6.045708656311035, "learning_rate": 8.202246707295784e-06, "loss": 0.54585342, "memory(GiB)": 34.88, "step": 46510, "train_speed(iter/s)": 0.413312 }, { "acc": 0.89737225, "epoch": 1.2594427747542847, "grad_norm": 5.267436981201172, "learning_rate": 8.201816925336883e-06, "loss": 0.57120371, "memory(GiB)": 34.88, "step": 46515, "train_speed(iter/s)": 0.413314 }, { "acc": 0.89181547, "epoch": 1.2595781550375003, "grad_norm": 5.7451171875, "learning_rate": 8.201387103275195e-06, "loss": 0.54635696, "memory(GiB)": 34.88, "step": 46520, "train_speed(iter/s)": 0.413316 }, { "acc": 0.90730038, "epoch": 1.2597135353207158, "grad_norm": 8.61093807220459, "learning_rate": 8.20095724111611e-06, "loss": 0.51510515, "memory(GiB)": 34.88, "step": 46525, "train_speed(iter/s)": 0.413319 }, { "acc": 0.90868263, "epoch": 1.2598489156039314, "grad_norm": 4.853018760681152, "learning_rate": 8.200527338865007e-06, "loss": 0.40178719, "memory(GiB)": 34.88, "step": 46530, "train_speed(iter/s)": 0.413321 }, { "acc": 0.89341164, "epoch": 1.259984295887147, "grad_norm": 11.543478012084961, "learning_rate": 8.200097396527273e-06, "loss": 0.52078061, "memory(GiB)": 34.88, "step": 46535, "train_speed(iter/s)": 0.413323 }, { "acc": 0.89605646, "epoch": 1.2601196761703626, "grad_norm": 24.368160247802734, "learning_rate": 8.199667414108296e-06, "loss": 0.5947084, "memory(GiB)": 34.88, "step": 46540, "train_speed(iter/s)": 0.413325 }, { "acc": 0.89861679, "epoch": 1.2602550564535782, "grad_norm": 7.158712387084961, "learning_rate": 8.19923739161346e-06, "loss": 0.55700703, "memory(GiB)": 34.88, "step": 46545, "train_speed(iter/s)": 0.413327 }, { "acc": 0.88278685, "epoch": 1.2603904367367935, "grad_norm": 7.859785079956055, "learning_rate": 8.19880732904815e-06, "loss": 0.71334734, "memory(GiB)": 34.88, "step": 46550, "train_speed(iter/s)": 0.413329 }, { "acc": 0.89173489, "epoch": 1.2605258170200093, "grad_norm": 7.6598310470581055, "learning_rate": 8.19837722641776e-06, "loss": 0.60671868, "memory(GiB)": 34.88, "step": 46555, "train_speed(iter/s)": 0.413331 }, { "acc": 0.87782888, "epoch": 1.2606611973032247, "grad_norm": 11.073080062866211, "learning_rate": 8.197947083727672e-06, "loss": 0.60586653, "memory(GiB)": 34.88, "step": 46560, "train_speed(iter/s)": 0.413333 }, { "acc": 0.88212566, "epoch": 1.2607965775864403, "grad_norm": 8.090089797973633, "learning_rate": 8.197516900983277e-06, "loss": 0.62311549, "memory(GiB)": 34.88, "step": 46565, "train_speed(iter/s)": 0.413336 }, { "acc": 0.90255013, "epoch": 1.2609319578696558, "grad_norm": 7.462934494018555, "learning_rate": 8.197086678189962e-06, "loss": 0.45178838, "memory(GiB)": 34.88, "step": 46570, "train_speed(iter/s)": 0.413338 }, { "acc": 0.89970341, "epoch": 1.2610673381528714, "grad_norm": 3.9485673904418945, "learning_rate": 8.19665641535312e-06, "loss": 0.5271471, "memory(GiB)": 34.88, "step": 46575, "train_speed(iter/s)": 0.41334 }, { "acc": 0.86508017, "epoch": 1.261202718436087, "grad_norm": 15.085877418518066, "learning_rate": 8.196226112478136e-06, "loss": 0.77057962, "memory(GiB)": 34.88, "step": 46580, "train_speed(iter/s)": 0.413342 }, { "acc": 0.88686714, "epoch": 1.2613380987193026, "grad_norm": 10.289758682250977, "learning_rate": 8.195795769570404e-06, "loss": 0.48409052, "memory(GiB)": 34.88, "step": 46585, "train_speed(iter/s)": 0.413344 }, { "acc": 0.88937759, "epoch": 1.2614734790025182, "grad_norm": 9.054524421691895, "learning_rate": 8.195365386635313e-06, "loss": 0.5657742, "memory(GiB)": 34.88, "step": 46590, "train_speed(iter/s)": 0.413346 }, { "acc": 0.88856773, "epoch": 1.2616088592857335, "grad_norm": 7.936374664306641, "learning_rate": 8.194934963678256e-06, "loss": 0.60553164, "memory(GiB)": 34.88, "step": 46595, "train_speed(iter/s)": 0.413349 }, { "acc": 0.88836842, "epoch": 1.261744239568949, "grad_norm": 8.486786842346191, "learning_rate": 8.194504500704623e-06, "loss": 0.66455975, "memory(GiB)": 34.88, "step": 46600, "train_speed(iter/s)": 0.413351 }, { "acc": 0.87156162, "epoch": 1.2618796198521647, "grad_norm": 10.878325462341309, "learning_rate": 8.194073997719808e-06, "loss": 0.68297515, "memory(GiB)": 34.88, "step": 46605, "train_speed(iter/s)": 0.413353 }, { "acc": 0.8912344, "epoch": 1.2620150001353803, "grad_norm": 15.78786849975586, "learning_rate": 8.193643454729204e-06, "loss": 0.58571472, "memory(GiB)": 34.88, "step": 46610, "train_speed(iter/s)": 0.413355 }, { "acc": 0.89636917, "epoch": 1.2621503804185958, "grad_norm": 17.08328628540039, "learning_rate": 8.193212871738202e-06, "loss": 0.51101418, "memory(GiB)": 34.88, "step": 46615, "train_speed(iter/s)": 0.413357 }, { "acc": 0.89809895, "epoch": 1.2622857607018114, "grad_norm": 10.484489440917969, "learning_rate": 8.192782248752199e-06, "loss": 0.5916811, "memory(GiB)": 34.88, "step": 46620, "train_speed(iter/s)": 0.413359 }, { "acc": 0.88158627, "epoch": 1.262421140985027, "grad_norm": 14.089640617370605, "learning_rate": 8.192351585776587e-06, "loss": 0.6675549, "memory(GiB)": 34.88, "step": 46625, "train_speed(iter/s)": 0.413362 }, { "acc": 0.90228596, "epoch": 1.2625565212682424, "grad_norm": 5.459266185760498, "learning_rate": 8.191920882816764e-06, "loss": 0.50606747, "memory(GiB)": 34.88, "step": 46630, "train_speed(iter/s)": 0.413364 }, { "acc": 0.91347885, "epoch": 1.2626919015514582, "grad_norm": 6.537477016448975, "learning_rate": 8.191490139878122e-06, "loss": 0.46205063, "memory(GiB)": 34.88, "step": 46635, "train_speed(iter/s)": 0.413366 }, { "acc": 0.87801008, "epoch": 1.2628272818346735, "grad_norm": 7.137939929962158, "learning_rate": 8.191059356966059e-06, "loss": 0.63363624, "memory(GiB)": 34.88, "step": 46640, "train_speed(iter/s)": 0.413368 }, { "acc": 0.87813606, "epoch": 1.262962662117889, "grad_norm": 6.700192928314209, "learning_rate": 8.19062853408597e-06, "loss": 0.68187675, "memory(GiB)": 34.88, "step": 46645, "train_speed(iter/s)": 0.41337 }, { "acc": 0.87843733, "epoch": 1.2630980424011047, "grad_norm": 18.014026641845703, "learning_rate": 8.190197671243251e-06, "loss": 0.76609974, "memory(GiB)": 34.88, "step": 46650, "train_speed(iter/s)": 0.413373 }, { "acc": 0.90504951, "epoch": 1.2632334226843203, "grad_norm": 7.723066329956055, "learning_rate": 8.189766768443302e-06, "loss": 0.47303295, "memory(GiB)": 34.88, "step": 46655, "train_speed(iter/s)": 0.413375 }, { "acc": 0.90701933, "epoch": 1.2633688029675358, "grad_norm": 9.060208320617676, "learning_rate": 8.18933582569152e-06, "loss": 0.48169293, "memory(GiB)": 34.88, "step": 46660, "train_speed(iter/s)": 0.413377 }, { "acc": 0.89878006, "epoch": 1.2635041832507514, "grad_norm": 8.914259910583496, "learning_rate": 8.188904842993303e-06, "loss": 0.61763678, "memory(GiB)": 34.88, "step": 46665, "train_speed(iter/s)": 0.413379 }, { "acc": 0.90366888, "epoch": 1.263639563533967, "grad_norm": 15.565712928771973, "learning_rate": 8.18847382035405e-06, "loss": 0.46980262, "memory(GiB)": 34.88, "step": 46670, "train_speed(iter/s)": 0.413382 }, { "acc": 0.90460463, "epoch": 1.2637749438171824, "grad_norm": 13.40576171875, "learning_rate": 8.188042757779163e-06, "loss": 0.56247263, "memory(GiB)": 34.88, "step": 46675, "train_speed(iter/s)": 0.413384 }, { "acc": 0.88221035, "epoch": 1.263910324100398, "grad_norm": 8.448882102966309, "learning_rate": 8.187611655274035e-06, "loss": 0.63788729, "memory(GiB)": 34.88, "step": 46680, "train_speed(iter/s)": 0.413386 }, { "acc": 0.91137123, "epoch": 1.2640457043836135, "grad_norm": 7.8746795654296875, "learning_rate": 8.187180512844076e-06, "loss": 0.38018053, "memory(GiB)": 34.88, "step": 46685, "train_speed(iter/s)": 0.413388 }, { "acc": 0.90746269, "epoch": 1.264181084666829, "grad_norm": 6.902766704559326, "learning_rate": 8.186749330494678e-06, "loss": 0.4714509, "memory(GiB)": 34.88, "step": 46690, "train_speed(iter/s)": 0.413391 }, { "acc": 0.9058569, "epoch": 1.2643164649500447, "grad_norm": 7.081726551055908, "learning_rate": 8.186318108231247e-06, "loss": 0.56230111, "memory(GiB)": 34.88, "step": 46695, "train_speed(iter/s)": 0.413393 }, { "acc": 0.89398098, "epoch": 1.2644518452332603, "grad_norm": 6.453060626983643, "learning_rate": 8.185886846059184e-06, "loss": 0.49158058, "memory(GiB)": 34.88, "step": 46700, "train_speed(iter/s)": 0.413395 }, { "acc": 0.88113251, "epoch": 1.2645872255164758, "grad_norm": 7.743429183959961, "learning_rate": 8.18545554398389e-06, "loss": 0.7304986, "memory(GiB)": 34.88, "step": 46705, "train_speed(iter/s)": 0.413397 }, { "acc": 0.88654051, "epoch": 1.2647226057996912, "grad_norm": 6.133615970611572, "learning_rate": 8.185024202010772e-06, "loss": 0.54370832, "memory(GiB)": 34.88, "step": 46710, "train_speed(iter/s)": 0.4134 }, { "acc": 0.88162518, "epoch": 1.264857986082907, "grad_norm": 15.470282554626465, "learning_rate": 8.18459282014523e-06, "loss": 0.58789825, "memory(GiB)": 34.88, "step": 46715, "train_speed(iter/s)": 0.413401 }, { "acc": 0.86820946, "epoch": 1.2649933663661224, "grad_norm": 9.141303062438965, "learning_rate": 8.184161398392669e-06, "loss": 0.65965505, "memory(GiB)": 34.88, "step": 46720, "train_speed(iter/s)": 0.413404 }, { "acc": 0.89921503, "epoch": 1.265128746649338, "grad_norm": 15.522472381591797, "learning_rate": 8.183729936758492e-06, "loss": 0.5661613, "memory(GiB)": 34.88, "step": 46725, "train_speed(iter/s)": 0.413406 }, { "acc": 0.88194151, "epoch": 1.2652641269325535, "grad_norm": 11.121652603149414, "learning_rate": 8.183298435248107e-06, "loss": 0.63712177, "memory(GiB)": 34.88, "step": 46730, "train_speed(iter/s)": 0.413408 }, { "acc": 0.88868532, "epoch": 1.265399507215769, "grad_norm": 6.839730739593506, "learning_rate": 8.182866893866916e-06, "loss": 0.55585046, "memory(GiB)": 34.88, "step": 46735, "train_speed(iter/s)": 0.41341 }, { "acc": 0.88892899, "epoch": 1.2655348874989847, "grad_norm": 15.305042266845703, "learning_rate": 8.182435312620325e-06, "loss": 0.58138461, "memory(GiB)": 34.88, "step": 46740, "train_speed(iter/s)": 0.413413 }, { "acc": 0.89112263, "epoch": 1.2656702677822003, "grad_norm": 9.85032844543457, "learning_rate": 8.182003691513745e-06, "loss": 0.5319541, "memory(GiB)": 34.88, "step": 46745, "train_speed(iter/s)": 0.413415 }, { "acc": 0.89095774, "epoch": 1.2658056480654158, "grad_norm": 14.968340873718262, "learning_rate": 8.181572030552576e-06, "loss": 0.61221313, "memory(GiB)": 34.88, "step": 46750, "train_speed(iter/s)": 0.413417 }, { "acc": 0.90111923, "epoch": 1.2659410283486312, "grad_norm": 8.540432929992676, "learning_rate": 8.181140329742232e-06, "loss": 0.52231617, "memory(GiB)": 34.88, "step": 46755, "train_speed(iter/s)": 0.413419 }, { "acc": 0.89014025, "epoch": 1.2660764086318468, "grad_norm": 8.797801971435547, "learning_rate": 8.180708589088116e-06, "loss": 0.67205005, "memory(GiB)": 34.88, "step": 46760, "train_speed(iter/s)": 0.413421 }, { "acc": 0.91104136, "epoch": 1.2662117889150624, "grad_norm": 4.863216876983643, "learning_rate": 8.18027680859564e-06, "loss": 0.41700096, "memory(GiB)": 34.88, "step": 46765, "train_speed(iter/s)": 0.413423 }, { "acc": 0.92332954, "epoch": 1.266347169198278, "grad_norm": 6.086979389190674, "learning_rate": 8.17984498827021e-06, "loss": 0.34447508, "memory(GiB)": 34.88, "step": 46770, "train_speed(iter/s)": 0.413426 }, { "acc": 0.88769941, "epoch": 1.2664825494814935, "grad_norm": 9.617431640625, "learning_rate": 8.179413128117237e-06, "loss": 0.57702904, "memory(GiB)": 34.88, "step": 46775, "train_speed(iter/s)": 0.413428 }, { "acc": 0.87665672, "epoch": 1.266617929764709, "grad_norm": 11.706277847290039, "learning_rate": 8.17898122814213e-06, "loss": 0.73687582, "memory(GiB)": 34.88, "step": 46780, "train_speed(iter/s)": 0.41343 }, { "acc": 0.88119116, "epoch": 1.2667533100479247, "grad_norm": 13.064480781555176, "learning_rate": 8.1785492883503e-06, "loss": 0.69857221, "memory(GiB)": 34.88, "step": 46785, "train_speed(iter/s)": 0.413432 }, { "acc": 0.8812335, "epoch": 1.26688869033114, "grad_norm": 8.05500602722168, "learning_rate": 8.178117308747157e-06, "loss": 0.57475009, "memory(GiB)": 34.88, "step": 46790, "train_speed(iter/s)": 0.413435 }, { "acc": 0.90664597, "epoch": 1.2670240706143558, "grad_norm": 5.3929572105407715, "learning_rate": 8.177685289338117e-06, "loss": 0.49041777, "memory(GiB)": 34.88, "step": 46795, "train_speed(iter/s)": 0.413437 }, { "acc": 0.90774879, "epoch": 1.2671594508975712, "grad_norm": 11.589765548706055, "learning_rate": 8.177253230128584e-06, "loss": 0.54823413, "memory(GiB)": 34.88, "step": 46800, "train_speed(iter/s)": 0.413439 }, { "acc": 0.89560547, "epoch": 1.2672948311807868, "grad_norm": 9.64924144744873, "learning_rate": 8.176821131123977e-06, "loss": 0.57126856, "memory(GiB)": 34.88, "step": 46805, "train_speed(iter/s)": 0.413441 }, { "acc": 0.87821522, "epoch": 1.2674302114640024, "grad_norm": 10.320713996887207, "learning_rate": 8.176388992329705e-06, "loss": 0.64483681, "memory(GiB)": 34.88, "step": 46810, "train_speed(iter/s)": 0.413443 }, { "acc": 0.89609632, "epoch": 1.267565591747218, "grad_norm": 10.137384414672852, "learning_rate": 8.175956813751182e-06, "loss": 0.56709576, "memory(GiB)": 34.88, "step": 46815, "train_speed(iter/s)": 0.413446 }, { "acc": 0.89304771, "epoch": 1.2677009720304335, "grad_norm": 14.02881908416748, "learning_rate": 8.175524595393825e-06, "loss": 0.58868189, "memory(GiB)": 34.88, "step": 46820, "train_speed(iter/s)": 0.413447 }, { "acc": 0.89950695, "epoch": 1.267836352313649, "grad_norm": 9.746756553649902, "learning_rate": 8.175092337263045e-06, "loss": 0.44776497, "memory(GiB)": 34.88, "step": 46825, "train_speed(iter/s)": 0.41345 }, { "acc": 0.89155531, "epoch": 1.2679717325968647, "grad_norm": 6.848628997802734, "learning_rate": 8.174660039364257e-06, "loss": 0.58660278, "memory(GiB)": 34.88, "step": 46830, "train_speed(iter/s)": 0.413452 }, { "acc": 0.90323601, "epoch": 1.26810711288008, "grad_norm": 5.107590675354004, "learning_rate": 8.174227701702879e-06, "loss": 0.4995698, "memory(GiB)": 34.88, "step": 46835, "train_speed(iter/s)": 0.413454 }, { "acc": 0.90315828, "epoch": 1.2682424931632956, "grad_norm": 7.7178521156311035, "learning_rate": 8.173795324284324e-06, "loss": 0.45875082, "memory(GiB)": 34.88, "step": 46840, "train_speed(iter/s)": 0.413456 }, { "acc": 0.90300245, "epoch": 1.2683778734465112, "grad_norm": 5.369653224945068, "learning_rate": 8.173362907114009e-06, "loss": 0.44439173, "memory(GiB)": 34.88, "step": 46845, "train_speed(iter/s)": 0.413458 }, { "acc": 0.89151058, "epoch": 1.2685132537297268, "grad_norm": 9.452107429504395, "learning_rate": 8.172930450197352e-06, "loss": 0.60280027, "memory(GiB)": 34.88, "step": 46850, "train_speed(iter/s)": 0.41346 }, { "acc": 0.89567556, "epoch": 1.2686486340129424, "grad_norm": 11.811020851135254, "learning_rate": 8.172497953539768e-06, "loss": 0.60062528, "memory(GiB)": 34.88, "step": 46855, "train_speed(iter/s)": 0.413462 }, { "acc": 0.88350134, "epoch": 1.268784014296158, "grad_norm": 7.126718997955322, "learning_rate": 8.17206541714668e-06, "loss": 0.67870722, "memory(GiB)": 34.88, "step": 46860, "train_speed(iter/s)": 0.413465 }, { "acc": 0.8877327, "epoch": 1.2689193945793735, "grad_norm": 6.361116409301758, "learning_rate": 8.171632841023502e-06, "loss": 0.63738079, "memory(GiB)": 34.88, "step": 46865, "train_speed(iter/s)": 0.413467 }, { "acc": 0.88270378, "epoch": 1.2690547748625889, "grad_norm": 18.44357681274414, "learning_rate": 8.171200225175653e-06, "loss": 0.63615484, "memory(GiB)": 34.88, "step": 46870, "train_speed(iter/s)": 0.413469 }, { "acc": 0.90438957, "epoch": 1.2691901551458047, "grad_norm": 17.545259475708008, "learning_rate": 8.170767569608554e-06, "loss": 0.46159573, "memory(GiB)": 34.88, "step": 46875, "train_speed(iter/s)": 0.413471 }, { "acc": 0.88870335, "epoch": 1.26932553542902, "grad_norm": 17.280256271362305, "learning_rate": 8.170334874327623e-06, "loss": 0.60971837, "memory(GiB)": 34.88, "step": 46880, "train_speed(iter/s)": 0.413473 }, { "acc": 0.87977657, "epoch": 1.2694609157122356, "grad_norm": 7.192968845367432, "learning_rate": 8.169902139338284e-06, "loss": 0.50078998, "memory(GiB)": 34.88, "step": 46885, "train_speed(iter/s)": 0.413475 }, { "acc": 0.88816719, "epoch": 1.2695962959954512, "grad_norm": 8.415306091308594, "learning_rate": 8.169469364645953e-06, "loss": 0.64384174, "memory(GiB)": 34.88, "step": 46890, "train_speed(iter/s)": 0.413477 }, { "acc": 0.86756973, "epoch": 1.2697316762786668, "grad_norm": 10.3350830078125, "learning_rate": 8.169036550256055e-06, "loss": 0.73618593, "memory(GiB)": 34.88, "step": 46895, "train_speed(iter/s)": 0.413479 }, { "acc": 0.8890955, "epoch": 1.2698670565618824, "grad_norm": 11.44654655456543, "learning_rate": 8.16860369617401e-06, "loss": 0.63021488, "memory(GiB)": 34.88, "step": 46900, "train_speed(iter/s)": 0.413482 }, { "acc": 0.89337101, "epoch": 1.270002436845098, "grad_norm": 5.765664100646973, "learning_rate": 8.16817080240524e-06, "loss": 0.49836912, "memory(GiB)": 34.88, "step": 46905, "train_speed(iter/s)": 0.413483 }, { "acc": 0.89542542, "epoch": 1.2701378171283135, "grad_norm": 11.424367904663086, "learning_rate": 8.167737868955169e-06, "loss": 0.51319542, "memory(GiB)": 34.88, "step": 46910, "train_speed(iter/s)": 0.413486 }, { "acc": 0.90027981, "epoch": 1.2702731974115289, "grad_norm": 5.651585102081299, "learning_rate": 8.16730489582922e-06, "loss": 0.46353951, "memory(GiB)": 34.88, "step": 46915, "train_speed(iter/s)": 0.413488 }, { "acc": 0.89375105, "epoch": 1.2704085776947445, "grad_norm": 8.854415893554688, "learning_rate": 8.166871883032817e-06, "loss": 0.60277452, "memory(GiB)": 34.88, "step": 46920, "train_speed(iter/s)": 0.41349 }, { "acc": 0.88726721, "epoch": 1.27054395797796, "grad_norm": 7.973564147949219, "learning_rate": 8.166438830571386e-06, "loss": 0.61556158, "memory(GiB)": 34.88, "step": 46925, "train_speed(iter/s)": 0.413492 }, { "acc": 0.8879158, "epoch": 1.2706793382611756, "grad_norm": 12.675677299499512, "learning_rate": 8.166005738450348e-06, "loss": 0.64570227, "memory(GiB)": 34.88, "step": 46930, "train_speed(iter/s)": 0.413494 }, { "acc": 0.90297308, "epoch": 1.2708147185443912, "grad_norm": 6.693952560424805, "learning_rate": 8.165572606675131e-06, "loss": 0.48261313, "memory(GiB)": 34.88, "step": 46935, "train_speed(iter/s)": 0.413497 }, { "acc": 0.88774595, "epoch": 1.2709500988276068, "grad_norm": 8.477581977844238, "learning_rate": 8.165139435251162e-06, "loss": 0.5756422, "memory(GiB)": 34.88, "step": 46940, "train_speed(iter/s)": 0.413499 }, { "acc": 0.893787, "epoch": 1.2710854791108224, "grad_norm": 5.492088794708252, "learning_rate": 8.164706224183864e-06, "loss": 0.5539897, "memory(GiB)": 34.88, "step": 46945, "train_speed(iter/s)": 0.413501 }, { "acc": 0.92726841, "epoch": 1.2712208593940377, "grad_norm": 5.0793070793151855, "learning_rate": 8.164272973478665e-06, "loss": 0.39110188, "memory(GiB)": 34.88, "step": 46950, "train_speed(iter/s)": 0.413503 }, { "acc": 0.89426699, "epoch": 1.2713562396772535, "grad_norm": 7.032703399658203, "learning_rate": 8.163839683140994e-06, "loss": 0.53070917, "memory(GiB)": 34.88, "step": 46955, "train_speed(iter/s)": 0.413505 }, { "acc": 0.87849636, "epoch": 1.2714916199604689, "grad_norm": 7.547943592071533, "learning_rate": 8.163406353176278e-06, "loss": 0.7247992, "memory(GiB)": 34.88, "step": 46960, "train_speed(iter/s)": 0.413507 }, { "acc": 0.87640085, "epoch": 1.2716270002436845, "grad_norm": 15.472299575805664, "learning_rate": 8.162972983589942e-06, "loss": 0.65453749, "memory(GiB)": 34.88, "step": 46965, "train_speed(iter/s)": 0.413509 }, { "acc": 0.87860603, "epoch": 1.2717623805269, "grad_norm": 9.088449478149414, "learning_rate": 8.16253957438742e-06, "loss": 0.71201782, "memory(GiB)": 34.88, "step": 46970, "train_speed(iter/s)": 0.413511 }, { "acc": 0.89684734, "epoch": 1.2718977608101156, "grad_norm": 10.147665023803711, "learning_rate": 8.162106125574138e-06, "loss": 0.59638104, "memory(GiB)": 34.88, "step": 46975, "train_speed(iter/s)": 0.413513 }, { "acc": 0.88751812, "epoch": 1.2720331410933312, "grad_norm": 7.286741256713867, "learning_rate": 8.161672637155527e-06, "loss": 0.6168736, "memory(GiB)": 34.88, "step": 46980, "train_speed(iter/s)": 0.413516 }, { "acc": 0.92166262, "epoch": 1.2721685213765468, "grad_norm": 5.537693977355957, "learning_rate": 8.161239109137016e-06, "loss": 0.37980528, "memory(GiB)": 34.88, "step": 46985, "train_speed(iter/s)": 0.413518 }, { "acc": 0.87950535, "epoch": 1.2723039016597624, "grad_norm": 16.61379623413086, "learning_rate": 8.160805541524038e-06, "loss": 0.63135223, "memory(GiB)": 34.88, "step": 46990, "train_speed(iter/s)": 0.41352 }, { "acc": 0.89126406, "epoch": 1.2724392819429777, "grad_norm": 9.91330337524414, "learning_rate": 8.160371934322021e-06, "loss": 0.60167961, "memory(GiB)": 34.88, "step": 46995, "train_speed(iter/s)": 0.413522 }, { "acc": 0.90054741, "epoch": 1.2725746622261933, "grad_norm": 14.92889404296875, "learning_rate": 8.159938287536401e-06, "loss": 0.59004378, "memory(GiB)": 34.88, "step": 47000, "train_speed(iter/s)": 0.413524 }, { "acc": 0.8985466, "epoch": 1.272710042509409, "grad_norm": 9.896199226379395, "learning_rate": 8.159504601172608e-06, "loss": 0.55808067, "memory(GiB)": 34.88, "step": 47005, "train_speed(iter/s)": 0.413526 }, { "acc": 0.87737484, "epoch": 1.2728454227926245, "grad_norm": 8.073044776916504, "learning_rate": 8.159070875236077e-06, "loss": 0.67472849, "memory(GiB)": 34.88, "step": 47010, "train_speed(iter/s)": 0.413529 }, { "acc": 0.90314865, "epoch": 1.27298080307584, "grad_norm": 12.04996109008789, "learning_rate": 8.158637109732235e-06, "loss": 0.56409001, "memory(GiB)": 34.88, "step": 47015, "train_speed(iter/s)": 0.413531 }, { "acc": 0.89548693, "epoch": 1.2731161833590556, "grad_norm": 23.66923713684082, "learning_rate": 8.158203304666524e-06, "loss": 0.6004283, "memory(GiB)": 34.88, "step": 47020, "train_speed(iter/s)": 0.413533 }, { "acc": 0.89118843, "epoch": 1.2732515636422712, "grad_norm": 9.151604652404785, "learning_rate": 8.157769460044372e-06, "loss": 0.49544306, "memory(GiB)": 34.88, "step": 47025, "train_speed(iter/s)": 0.413535 }, { "acc": 0.88229408, "epoch": 1.2733869439254866, "grad_norm": 10.286870002746582, "learning_rate": 8.157335575871216e-06, "loss": 0.68075409, "memory(GiB)": 34.88, "step": 47030, "train_speed(iter/s)": 0.413537 }, { "acc": 0.87675247, "epoch": 1.2735223242087024, "grad_norm": 9.343557357788086, "learning_rate": 8.156901652152492e-06, "loss": 0.67221518, "memory(GiB)": 34.88, "step": 47035, "train_speed(iter/s)": 0.413539 }, { "acc": 0.88944893, "epoch": 1.2736577044919177, "grad_norm": 10.230159759521484, "learning_rate": 8.156467688893633e-06, "loss": 0.59431987, "memory(GiB)": 34.88, "step": 47040, "train_speed(iter/s)": 0.413541 }, { "acc": 0.89294653, "epoch": 1.2737930847751333, "grad_norm": 19.424962997436523, "learning_rate": 8.156033686100079e-06, "loss": 0.61861086, "memory(GiB)": 34.88, "step": 47045, "train_speed(iter/s)": 0.413543 }, { "acc": 0.90200624, "epoch": 1.273928465058349, "grad_norm": 10.414081573486328, "learning_rate": 8.155599643777264e-06, "loss": 0.52672029, "memory(GiB)": 34.88, "step": 47050, "train_speed(iter/s)": 0.413545 }, { "acc": 0.85188255, "epoch": 1.2740638453415645, "grad_norm": 18.806175231933594, "learning_rate": 8.155165561930628e-06, "loss": 0.69832487, "memory(GiB)": 34.88, "step": 47055, "train_speed(iter/s)": 0.413547 }, { "acc": 0.89365129, "epoch": 1.27419922562478, "grad_norm": 11.812944412231445, "learning_rate": 8.154731440565605e-06, "loss": 0.51101046, "memory(GiB)": 34.88, "step": 47060, "train_speed(iter/s)": 0.413549 }, { "acc": 0.89657774, "epoch": 1.2743346059079956, "grad_norm": 10.901423454284668, "learning_rate": 8.154297279687636e-06, "loss": 0.59884624, "memory(GiB)": 34.88, "step": 47065, "train_speed(iter/s)": 0.413552 }, { "acc": 0.89150429, "epoch": 1.2744699861912112, "grad_norm": 7.491994380950928, "learning_rate": 8.15386307930216e-06, "loss": 0.63734055, "memory(GiB)": 34.88, "step": 47070, "train_speed(iter/s)": 0.413554 }, { "acc": 0.87440252, "epoch": 1.2746053664744266, "grad_norm": 8.740381240844727, "learning_rate": 8.153428839414611e-06, "loss": 0.62009668, "memory(GiB)": 34.88, "step": 47075, "train_speed(iter/s)": 0.413556 }, { "acc": 0.91022511, "epoch": 1.2747407467576422, "grad_norm": 5.816646575927734, "learning_rate": 8.152994560030438e-06, "loss": 0.45112391, "memory(GiB)": 34.88, "step": 47080, "train_speed(iter/s)": 0.413558 }, { "acc": 0.89706669, "epoch": 1.2748761270408577, "grad_norm": 6.784551620483398, "learning_rate": 8.152560241155074e-06, "loss": 0.56481128, "memory(GiB)": 34.88, "step": 47085, "train_speed(iter/s)": 0.41356 }, { "acc": 0.86243153, "epoch": 1.2750115073240733, "grad_norm": 10.660253524780273, "learning_rate": 8.15212588279396e-06, "loss": 0.82171383, "memory(GiB)": 34.88, "step": 47090, "train_speed(iter/s)": 0.413562 }, { "acc": 0.88781061, "epoch": 1.275146887607289, "grad_norm": 12.440569877624512, "learning_rate": 8.151691484952541e-06, "loss": 0.67842302, "memory(GiB)": 34.88, "step": 47095, "train_speed(iter/s)": 0.413564 }, { "acc": 0.88635206, "epoch": 1.2752822678905045, "grad_norm": 9.335600852966309, "learning_rate": 8.151257047636256e-06, "loss": 0.64310522, "memory(GiB)": 34.88, "step": 47100, "train_speed(iter/s)": 0.413566 }, { "acc": 0.89475727, "epoch": 1.27541764817372, "grad_norm": 8.710188865661621, "learning_rate": 8.150822570850547e-06, "loss": 0.52202997, "memory(GiB)": 34.88, "step": 47105, "train_speed(iter/s)": 0.413568 }, { "acc": 0.88986053, "epoch": 1.2755530284569354, "grad_norm": 7.757708549499512, "learning_rate": 8.150388054600858e-06, "loss": 0.60061455, "memory(GiB)": 34.88, "step": 47110, "train_speed(iter/s)": 0.41357 }, { "acc": 0.91209583, "epoch": 1.2756884087401512, "grad_norm": 14.058690071105957, "learning_rate": 8.14995349889263e-06, "loss": 0.47561827, "memory(GiB)": 34.88, "step": 47115, "train_speed(iter/s)": 0.413572 }, { "acc": 0.90878515, "epoch": 1.2758237890233666, "grad_norm": 8.846420288085938, "learning_rate": 8.149518903731311e-06, "loss": 0.51676602, "memory(GiB)": 34.88, "step": 47120, "train_speed(iter/s)": 0.413573 }, { "acc": 0.89016972, "epoch": 1.2759591693065822, "grad_norm": 7.734761714935303, "learning_rate": 8.149084269122343e-06, "loss": 0.63298874, "memory(GiB)": 34.88, "step": 47125, "train_speed(iter/s)": 0.413575 }, { "acc": 0.88397484, "epoch": 1.2760945495897977, "grad_norm": 19.58102798461914, "learning_rate": 8.148649595071167e-06, "loss": 0.71991901, "memory(GiB)": 34.88, "step": 47130, "train_speed(iter/s)": 0.413578 }, { "acc": 0.88922768, "epoch": 1.2762299298730133, "grad_norm": 18.033998489379883, "learning_rate": 8.148214881583232e-06, "loss": 0.65701613, "memory(GiB)": 34.88, "step": 47135, "train_speed(iter/s)": 0.41358 }, { "acc": 0.89300089, "epoch": 1.276365310156229, "grad_norm": 13.64214038848877, "learning_rate": 8.147780128663985e-06, "loss": 0.45934763, "memory(GiB)": 34.88, "step": 47140, "train_speed(iter/s)": 0.413582 }, { "acc": 0.88311329, "epoch": 1.2765006904394445, "grad_norm": 10.284148216247559, "learning_rate": 8.147345336318869e-06, "loss": 0.60609169, "memory(GiB)": 34.88, "step": 47145, "train_speed(iter/s)": 0.413584 }, { "acc": 0.88533916, "epoch": 1.27663607072266, "grad_norm": 5.304174423217773, "learning_rate": 8.146910504553331e-06, "loss": 0.69839725, "memory(GiB)": 34.88, "step": 47150, "train_speed(iter/s)": 0.413586 }, { "acc": 0.8698638, "epoch": 1.2767714510058754, "grad_norm": 6.186248302459717, "learning_rate": 8.146475633372817e-06, "loss": 0.7070354, "memory(GiB)": 34.88, "step": 47155, "train_speed(iter/s)": 0.413588 }, { "acc": 0.87195549, "epoch": 1.276906831289091, "grad_norm": 13.035503387451172, "learning_rate": 8.14604072278278e-06, "loss": 0.75834188, "memory(GiB)": 34.88, "step": 47160, "train_speed(iter/s)": 0.41359 }, { "acc": 0.88042393, "epoch": 1.2770422115723066, "grad_norm": 13.872544288635254, "learning_rate": 8.14560577278866e-06, "loss": 0.57574997, "memory(GiB)": 34.88, "step": 47165, "train_speed(iter/s)": 0.413592 }, { "acc": 0.91348572, "epoch": 1.2771775918555222, "grad_norm": 6.523688316345215, "learning_rate": 8.145170783395915e-06, "loss": 0.42101603, "memory(GiB)": 34.88, "step": 47170, "train_speed(iter/s)": 0.413595 }, { "acc": 0.90179033, "epoch": 1.2773129721387377, "grad_norm": 8.80711841583252, "learning_rate": 8.144735754609987e-06, "loss": 0.56513829, "memory(GiB)": 34.88, "step": 47175, "train_speed(iter/s)": 0.413596 }, { "acc": 0.89740791, "epoch": 1.2774483524219533, "grad_norm": 4.488645553588867, "learning_rate": 8.144300686436326e-06, "loss": 0.52345586, "memory(GiB)": 34.88, "step": 47180, "train_speed(iter/s)": 0.413599 }, { "acc": 0.87281685, "epoch": 1.277583732705169, "grad_norm": 14.81022834777832, "learning_rate": 8.143865578880386e-06, "loss": 0.76990633, "memory(GiB)": 34.88, "step": 47185, "train_speed(iter/s)": 0.413601 }, { "acc": 0.88790798, "epoch": 1.2777191129883843, "grad_norm": 24.920867919921875, "learning_rate": 8.143430431947615e-06, "loss": 0.59625978, "memory(GiB)": 34.88, "step": 47190, "train_speed(iter/s)": 0.413603 }, { "acc": 0.90406446, "epoch": 1.2778544932716, "grad_norm": 10.045807838439941, "learning_rate": 8.142995245643466e-06, "loss": 0.52200465, "memory(GiB)": 34.88, "step": 47195, "train_speed(iter/s)": 0.413606 }, { "acc": 0.89882345, "epoch": 1.2779898735548154, "grad_norm": 17.26451301574707, "learning_rate": 8.142560019973385e-06, "loss": 0.62799859, "memory(GiB)": 34.88, "step": 47200, "train_speed(iter/s)": 0.413608 }, { "acc": 0.87547035, "epoch": 1.278125253838031, "grad_norm": 8.135566711425781, "learning_rate": 8.142124754942833e-06, "loss": 0.66012878, "memory(GiB)": 34.88, "step": 47205, "train_speed(iter/s)": 0.41361 }, { "acc": 0.90685558, "epoch": 1.2782606341212466, "grad_norm": 5.280523300170898, "learning_rate": 8.141689450557255e-06, "loss": 0.54957685, "memory(GiB)": 34.88, "step": 47210, "train_speed(iter/s)": 0.413612 }, { "acc": 0.89915695, "epoch": 1.2783960144044622, "grad_norm": 6.413698673248291, "learning_rate": 8.141254106822107e-06, "loss": 0.50601349, "memory(GiB)": 34.88, "step": 47215, "train_speed(iter/s)": 0.413614 }, { "acc": 0.88477478, "epoch": 1.2785313946876777, "grad_norm": 6.12009859085083, "learning_rate": 8.14081872374284e-06, "loss": 0.55997105, "memory(GiB)": 34.88, "step": 47220, "train_speed(iter/s)": 0.413616 }, { "acc": 0.88947296, "epoch": 1.2786667749708933, "grad_norm": 11.922066688537598, "learning_rate": 8.140383301324913e-06, "loss": 0.6271595, "memory(GiB)": 34.88, "step": 47225, "train_speed(iter/s)": 0.413619 }, { "acc": 0.89206581, "epoch": 1.278802155254109, "grad_norm": 6.334896564483643, "learning_rate": 8.139947839573779e-06, "loss": 0.56555843, "memory(GiB)": 34.88, "step": 47230, "train_speed(iter/s)": 0.413621 }, { "acc": 0.90194559, "epoch": 1.2789375355373243, "grad_norm": 14.517704010009766, "learning_rate": 8.13951233849489e-06, "loss": 0.63132067, "memory(GiB)": 34.88, "step": 47235, "train_speed(iter/s)": 0.413623 }, { "acc": 0.89516077, "epoch": 1.2790729158205398, "grad_norm": 6.831577777862549, "learning_rate": 8.139076798093702e-06, "loss": 0.57217746, "memory(GiB)": 34.88, "step": 47240, "train_speed(iter/s)": 0.413625 }, { "acc": 0.91587677, "epoch": 1.2792082961037554, "grad_norm": 5.011009693145752, "learning_rate": 8.138641218375674e-06, "loss": 0.41604214, "memory(GiB)": 34.88, "step": 47245, "train_speed(iter/s)": 0.413627 }, { "acc": 0.89291897, "epoch": 1.279343676386971, "grad_norm": 4.757583141326904, "learning_rate": 8.138205599346261e-06, "loss": 0.57441702, "memory(GiB)": 34.88, "step": 47250, "train_speed(iter/s)": 0.413629 }, { "acc": 0.9180913, "epoch": 1.2794790566701866, "grad_norm": 11.170726776123047, "learning_rate": 8.13776994101092e-06, "loss": 0.42689242, "memory(GiB)": 34.88, "step": 47255, "train_speed(iter/s)": 0.413631 }, { "acc": 0.89585991, "epoch": 1.2796144369534022, "grad_norm": 12.650981903076172, "learning_rate": 8.137334243375108e-06, "loss": 0.62907619, "memory(GiB)": 34.88, "step": 47260, "train_speed(iter/s)": 0.413633 }, { "acc": 0.908076, "epoch": 1.2797498172366177, "grad_norm": 5.974756717681885, "learning_rate": 8.13689850644428e-06, "loss": 0.46087642, "memory(GiB)": 34.88, "step": 47265, "train_speed(iter/s)": 0.413636 }, { "acc": 0.90211906, "epoch": 1.279885197519833, "grad_norm": 6.004866123199463, "learning_rate": 8.1364627302239e-06, "loss": 0.46639795, "memory(GiB)": 34.88, "step": 47270, "train_speed(iter/s)": 0.413638 }, { "acc": 0.88874493, "epoch": 1.280020577803049, "grad_norm": 9.999189376831055, "learning_rate": 8.136026914719425e-06, "loss": 0.60306149, "memory(GiB)": 34.88, "step": 47275, "train_speed(iter/s)": 0.41364 }, { "acc": 0.89616127, "epoch": 1.2801559580862643, "grad_norm": 25.71917152404785, "learning_rate": 8.135591059936315e-06, "loss": 0.62024126, "memory(GiB)": 34.88, "step": 47280, "train_speed(iter/s)": 0.413642 }, { "acc": 0.88357201, "epoch": 1.2802913383694798, "grad_norm": 9.260185241699219, "learning_rate": 8.135155165880029e-06, "loss": 0.65091987, "memory(GiB)": 34.88, "step": 47285, "train_speed(iter/s)": 0.413644 }, { "acc": 0.88864937, "epoch": 1.2804267186526954, "grad_norm": 8.172918319702148, "learning_rate": 8.134719232556027e-06, "loss": 0.62549782, "memory(GiB)": 34.88, "step": 47290, "train_speed(iter/s)": 0.413646 }, { "acc": 0.89273767, "epoch": 1.280562098935911, "grad_norm": 6.74871301651001, "learning_rate": 8.13428325996977e-06, "loss": 0.61422491, "memory(GiB)": 34.88, "step": 47295, "train_speed(iter/s)": 0.413648 }, { "acc": 0.89671898, "epoch": 1.2806974792191266, "grad_norm": 7.334718227386475, "learning_rate": 8.13384724812672e-06, "loss": 0.53996992, "memory(GiB)": 34.88, "step": 47300, "train_speed(iter/s)": 0.41365 }, { "acc": 0.90080776, "epoch": 1.2808328595023422, "grad_norm": 7.64704704284668, "learning_rate": 8.133411197032338e-06, "loss": 0.52516785, "memory(GiB)": 34.88, "step": 47305, "train_speed(iter/s)": 0.413652 }, { "acc": 0.89773693, "epoch": 1.2809682397855577, "grad_norm": 20.239938735961914, "learning_rate": 8.132975106692089e-06, "loss": 0.582481, "memory(GiB)": 34.88, "step": 47310, "train_speed(iter/s)": 0.413654 }, { "acc": 0.87403889, "epoch": 1.281103620068773, "grad_norm": 10.833290100097656, "learning_rate": 8.132538977111432e-06, "loss": 0.73816214, "memory(GiB)": 34.88, "step": 47315, "train_speed(iter/s)": 0.413656 }, { "acc": 0.86813526, "epoch": 1.2812390003519887, "grad_norm": 15.218104362487793, "learning_rate": 8.132102808295832e-06, "loss": 0.69954643, "memory(GiB)": 34.88, "step": 47320, "train_speed(iter/s)": 0.413658 }, { "acc": 0.89484653, "epoch": 1.2813743806352043, "grad_norm": 14.336236953735352, "learning_rate": 8.131666600250754e-06, "loss": 0.55235395, "memory(GiB)": 34.88, "step": 47325, "train_speed(iter/s)": 0.41366 }, { "acc": 0.89933262, "epoch": 1.2815097609184198, "grad_norm": 7.362642288208008, "learning_rate": 8.131230352981661e-06, "loss": 0.50699167, "memory(GiB)": 34.88, "step": 47330, "train_speed(iter/s)": 0.413663 }, { "acc": 0.92279625, "epoch": 1.2816451412016354, "grad_norm": 4.8180694580078125, "learning_rate": 8.130794066494021e-06, "loss": 0.39040551, "memory(GiB)": 34.88, "step": 47335, "train_speed(iter/s)": 0.413665 }, { "acc": 0.89678202, "epoch": 1.281780521484851, "grad_norm": 13.3066987991333, "learning_rate": 8.130357740793296e-06, "loss": 0.56910725, "memory(GiB)": 34.88, "step": 47340, "train_speed(iter/s)": 0.413667 }, { "acc": 0.89964275, "epoch": 1.2819159017680666, "grad_norm": 9.321033477783203, "learning_rate": 8.12992137588495e-06, "loss": 0.55331497, "memory(GiB)": 34.88, "step": 47345, "train_speed(iter/s)": 0.413669 }, { "acc": 0.90328445, "epoch": 1.282051282051282, "grad_norm": 77.27735900878906, "learning_rate": 8.129484971774455e-06, "loss": 0.48989096, "memory(GiB)": 34.88, "step": 47350, "train_speed(iter/s)": 0.413671 }, { "acc": 0.90687857, "epoch": 1.2821866623344977, "grad_norm": 5.212990760803223, "learning_rate": 8.129048528467272e-06, "loss": 0.52028112, "memory(GiB)": 34.88, "step": 47355, "train_speed(iter/s)": 0.413673 }, { "acc": 0.8880312, "epoch": 1.282322042617713, "grad_norm": 8.66176700592041, "learning_rate": 8.128612045968874e-06, "loss": 0.59651499, "memory(GiB)": 34.88, "step": 47360, "train_speed(iter/s)": 0.413675 }, { "acc": 0.86913671, "epoch": 1.2824574229009287, "grad_norm": 8.177620887756348, "learning_rate": 8.128175524284723e-06, "loss": 0.73252831, "memory(GiB)": 34.88, "step": 47365, "train_speed(iter/s)": 0.413676 }, { "acc": 0.87599096, "epoch": 1.2825928031841443, "grad_norm": 8.621614456176758, "learning_rate": 8.127738963420293e-06, "loss": 0.69008474, "memory(GiB)": 34.88, "step": 47370, "train_speed(iter/s)": 0.413679 }, { "acc": 0.89598293, "epoch": 1.2827281834673598, "grad_norm": 8.839700698852539, "learning_rate": 8.127302363381049e-06, "loss": 0.51563911, "memory(GiB)": 34.88, "step": 47375, "train_speed(iter/s)": 0.41368 }, { "acc": 0.89772863, "epoch": 1.2828635637505754, "grad_norm": 4.579220771789551, "learning_rate": 8.12686572417246e-06, "loss": 0.47749739, "memory(GiB)": 34.88, "step": 47380, "train_speed(iter/s)": 0.413682 }, { "acc": 0.89563351, "epoch": 1.282998944033791, "grad_norm": 7.352900505065918, "learning_rate": 8.126429045799998e-06, "loss": 0.58016644, "memory(GiB)": 34.88, "step": 47385, "train_speed(iter/s)": 0.413684 }, { "acc": 0.8951807, "epoch": 1.2831343243170066, "grad_norm": 7.503134727478027, "learning_rate": 8.125992328269133e-06, "loss": 0.60763774, "memory(GiB)": 34.88, "step": 47390, "train_speed(iter/s)": 0.413686 }, { "acc": 0.8914814, "epoch": 1.283269704600222, "grad_norm": 16.527963638305664, "learning_rate": 8.125555571585333e-06, "loss": 0.56633387, "memory(GiB)": 34.88, "step": 47395, "train_speed(iter/s)": 0.413688 }, { "acc": 0.90888224, "epoch": 1.2834050848834375, "grad_norm": 6.983963489532471, "learning_rate": 8.125118775754073e-06, "loss": 0.47156782, "memory(GiB)": 34.88, "step": 47400, "train_speed(iter/s)": 0.41369 }, { "acc": 0.90841494, "epoch": 1.283540465166653, "grad_norm": 8.041340827941895, "learning_rate": 8.124681940780823e-06, "loss": 0.47274046, "memory(GiB)": 34.88, "step": 47405, "train_speed(iter/s)": 0.413692 }, { "acc": 0.88957138, "epoch": 1.2836758454498687, "grad_norm": 11.803627967834473, "learning_rate": 8.124245066671055e-06, "loss": 0.54791408, "memory(GiB)": 34.88, "step": 47410, "train_speed(iter/s)": 0.413695 }, { "acc": 0.896768, "epoch": 1.2838112257330843, "grad_norm": 9.99837875366211, "learning_rate": 8.12380815343024e-06, "loss": 0.56928577, "memory(GiB)": 34.88, "step": 47415, "train_speed(iter/s)": 0.413697 }, { "acc": 0.90631981, "epoch": 1.2839466060162998, "grad_norm": 7.15667200088501, "learning_rate": 8.123371201063854e-06, "loss": 0.46265407, "memory(GiB)": 34.88, "step": 47420, "train_speed(iter/s)": 0.413699 }, { "acc": 0.89469852, "epoch": 1.2840819862995154, "grad_norm": 6.0764994621276855, "learning_rate": 8.12293420957737e-06, "loss": 0.5301342, "memory(GiB)": 34.88, "step": 47425, "train_speed(iter/s)": 0.413701 }, { "acc": 0.88337498, "epoch": 1.2842173665827308, "grad_norm": 8.12617015838623, "learning_rate": 8.122497178976263e-06, "loss": 0.64237943, "memory(GiB)": 34.88, "step": 47430, "train_speed(iter/s)": 0.413703 }, { "acc": 0.90600986, "epoch": 1.2843527468659466, "grad_norm": 18.63707160949707, "learning_rate": 8.122060109266007e-06, "loss": 0.46809492, "memory(GiB)": 34.88, "step": 47435, "train_speed(iter/s)": 0.413705 }, { "acc": 0.90275402, "epoch": 1.284488127149162, "grad_norm": 6.762662887573242, "learning_rate": 8.121623000452075e-06, "loss": 0.5207375, "memory(GiB)": 34.88, "step": 47440, "train_speed(iter/s)": 0.413707 }, { "acc": 0.88663425, "epoch": 1.2846235074323775, "grad_norm": 6.405038833618164, "learning_rate": 8.121185852539946e-06, "loss": 0.55147166, "memory(GiB)": 34.88, "step": 47445, "train_speed(iter/s)": 0.41371 }, { "acc": 0.90015602, "epoch": 1.284758887715593, "grad_norm": 8.72689151763916, "learning_rate": 8.120748665535093e-06, "loss": 0.49128718, "memory(GiB)": 34.88, "step": 47450, "train_speed(iter/s)": 0.413711 }, { "acc": 0.90099773, "epoch": 1.2848942679988087, "grad_norm": 4.645042419433594, "learning_rate": 8.120311439442995e-06, "loss": 0.5120121, "memory(GiB)": 34.88, "step": 47455, "train_speed(iter/s)": 0.413714 }, { "acc": 0.90354328, "epoch": 1.2850296482820243, "grad_norm": 6.516239643096924, "learning_rate": 8.119874174269128e-06, "loss": 0.56483126, "memory(GiB)": 34.88, "step": 47460, "train_speed(iter/s)": 0.413716 }, { "acc": 0.9054986, "epoch": 1.2851650285652396, "grad_norm": 15.311664581298828, "learning_rate": 8.119436870018969e-06, "loss": 0.5918786, "memory(GiB)": 34.88, "step": 47465, "train_speed(iter/s)": 0.413718 }, { "acc": 0.88589258, "epoch": 1.2853004088484554, "grad_norm": 13.584444999694824, "learning_rate": 8.118999526697997e-06, "loss": 0.59962125, "memory(GiB)": 34.88, "step": 47470, "train_speed(iter/s)": 0.41372 }, { "acc": 0.91368284, "epoch": 1.2854357891316708, "grad_norm": 6.699871063232422, "learning_rate": 8.118562144311692e-06, "loss": 0.46160231, "memory(GiB)": 34.88, "step": 47475, "train_speed(iter/s)": 0.413722 }, { "acc": 0.88573828, "epoch": 1.2855711694148864, "grad_norm": 15.536643028259277, "learning_rate": 8.118124722865532e-06, "loss": 0.55719161, "memory(GiB)": 34.88, "step": 47480, "train_speed(iter/s)": 0.413725 }, { "acc": 0.8864933, "epoch": 1.285706549698102, "grad_norm": 11.114140510559082, "learning_rate": 8.117687262364997e-06, "loss": 0.61177616, "memory(GiB)": 34.88, "step": 47485, "train_speed(iter/s)": 0.413727 }, { "acc": 0.89824314, "epoch": 1.2858419299813175, "grad_norm": 10.795429229736328, "learning_rate": 8.117249762815565e-06, "loss": 0.58796339, "memory(GiB)": 34.88, "step": 47490, "train_speed(iter/s)": 0.413729 }, { "acc": 0.89535971, "epoch": 1.285977310264533, "grad_norm": 11.496622085571289, "learning_rate": 8.116812224222716e-06, "loss": 0.60861216, "memory(GiB)": 34.88, "step": 47495, "train_speed(iter/s)": 0.41373 }, { "acc": 0.90282278, "epoch": 1.2861126905477487, "grad_norm": 9.499368667602539, "learning_rate": 8.116374646591936e-06, "loss": 0.50633049, "memory(GiB)": 34.88, "step": 47500, "train_speed(iter/s)": 0.413733 }, { "acc": 0.89276247, "epoch": 1.2862480708309643, "grad_norm": 7.387739181518555, "learning_rate": 8.1159370299287e-06, "loss": 0.50535526, "memory(GiB)": 34.88, "step": 47505, "train_speed(iter/s)": 0.413734 }, { "acc": 0.89790773, "epoch": 1.2863834511141796, "grad_norm": 11.011490821838379, "learning_rate": 8.115499374238495e-06, "loss": 0.51108918, "memory(GiB)": 34.88, "step": 47510, "train_speed(iter/s)": 0.413737 }, { "acc": 0.89339218, "epoch": 1.2865188313973952, "grad_norm": 28.026060104370117, "learning_rate": 8.115061679526803e-06, "loss": 0.53894434, "memory(GiB)": 34.88, "step": 47515, "train_speed(iter/s)": 0.413739 }, { "acc": 0.90238171, "epoch": 1.2866542116806108, "grad_norm": 11.200943946838379, "learning_rate": 8.114623945799105e-06, "loss": 0.54621511, "memory(GiB)": 34.88, "step": 47520, "train_speed(iter/s)": 0.413741 }, { "acc": 0.88337536, "epoch": 1.2867895919638264, "grad_norm": 16.77532386779785, "learning_rate": 8.114186173060887e-06, "loss": 0.58796344, "memory(GiB)": 34.88, "step": 47525, "train_speed(iter/s)": 0.413743 }, { "acc": 0.89105444, "epoch": 1.286924972247042, "grad_norm": 10.442931175231934, "learning_rate": 8.113748361317628e-06, "loss": 0.5837163, "memory(GiB)": 34.88, "step": 47530, "train_speed(iter/s)": 0.413745 }, { "acc": 0.89627438, "epoch": 1.2870603525302575, "grad_norm": 4.457766056060791, "learning_rate": 8.113310510574816e-06, "loss": 0.5931725, "memory(GiB)": 34.88, "step": 47535, "train_speed(iter/s)": 0.413747 }, { "acc": 0.90348701, "epoch": 1.287195732813473, "grad_norm": 4.577847480773926, "learning_rate": 8.112872620837938e-06, "loss": 0.49687252, "memory(GiB)": 34.88, "step": 47540, "train_speed(iter/s)": 0.413749 }, { "acc": 0.87803802, "epoch": 1.2873311130966885, "grad_norm": 14.520620346069336, "learning_rate": 8.112434692112476e-06, "loss": 0.69158354, "memory(GiB)": 34.88, "step": 47545, "train_speed(iter/s)": 0.413751 }, { "acc": 0.88508148, "epoch": 1.2874664933799043, "grad_norm": 13.293828964233398, "learning_rate": 8.111996724403917e-06, "loss": 0.663727, "memory(GiB)": 34.88, "step": 47550, "train_speed(iter/s)": 0.413753 }, { "acc": 0.90453672, "epoch": 1.2876018736631196, "grad_norm": 6.557843208312988, "learning_rate": 8.111558717717749e-06, "loss": 0.51841946, "memory(GiB)": 34.88, "step": 47555, "train_speed(iter/s)": 0.413755 }, { "acc": 0.89307537, "epoch": 1.2877372539463352, "grad_norm": 7.627804279327393, "learning_rate": 8.111120672059455e-06, "loss": 0.52342854, "memory(GiB)": 34.88, "step": 47560, "train_speed(iter/s)": 0.413757 }, { "acc": 0.88488455, "epoch": 1.2878726342295508, "grad_norm": 14.330886840820312, "learning_rate": 8.110682587434526e-06, "loss": 0.652001, "memory(GiB)": 34.88, "step": 47565, "train_speed(iter/s)": 0.413759 }, { "acc": 0.90574112, "epoch": 1.2880080145127664, "grad_norm": 8.16535758972168, "learning_rate": 8.110244463848448e-06, "loss": 0.46190124, "memory(GiB)": 34.88, "step": 47570, "train_speed(iter/s)": 0.413761 }, { "acc": 0.89229164, "epoch": 1.288143394795982, "grad_norm": 15.000274658203125, "learning_rate": 8.10980630130671e-06, "loss": 0.60211105, "memory(GiB)": 34.88, "step": 47575, "train_speed(iter/s)": 0.413763 }, { "acc": 0.89919834, "epoch": 1.2882787750791975, "grad_norm": 6.9776530265808105, "learning_rate": 8.109368099814801e-06, "loss": 0.45933557, "memory(GiB)": 34.88, "step": 47580, "train_speed(iter/s)": 0.413765 }, { "acc": 0.90665283, "epoch": 1.288414155362413, "grad_norm": 4.5555949211120605, "learning_rate": 8.108929859378212e-06, "loss": 0.53696108, "memory(GiB)": 34.88, "step": 47585, "train_speed(iter/s)": 0.413767 }, { "acc": 0.89059401, "epoch": 1.2885495356456285, "grad_norm": 6.097501277923584, "learning_rate": 8.108491580002428e-06, "loss": 0.559021, "memory(GiB)": 34.88, "step": 47590, "train_speed(iter/s)": 0.413769 }, { "acc": 0.89821739, "epoch": 1.288684915928844, "grad_norm": 6.09620475769043, "learning_rate": 8.108053261692943e-06, "loss": 0.54769979, "memory(GiB)": 34.88, "step": 47595, "train_speed(iter/s)": 0.413771 }, { "acc": 0.91154919, "epoch": 1.2888202962120596, "grad_norm": 6.145139217376709, "learning_rate": 8.107614904455249e-06, "loss": 0.46309185, "memory(GiB)": 34.88, "step": 47600, "train_speed(iter/s)": 0.413773 }, { "acc": 0.88066788, "epoch": 1.2889556764952752, "grad_norm": 8.465988159179688, "learning_rate": 8.107176508294834e-06, "loss": 0.68510523, "memory(GiB)": 34.88, "step": 47605, "train_speed(iter/s)": 0.413775 }, { "acc": 0.90489988, "epoch": 1.2890910567784908, "grad_norm": 8.711288452148438, "learning_rate": 8.106738073217191e-06, "loss": 0.54824414, "memory(GiB)": 34.88, "step": 47610, "train_speed(iter/s)": 0.413777 }, { "acc": 0.89828405, "epoch": 1.2892264370617064, "grad_norm": 11.175654411315918, "learning_rate": 8.106299599227812e-06, "loss": 0.48055944, "memory(GiB)": 34.88, "step": 47615, "train_speed(iter/s)": 0.413779 }, { "acc": 0.8741209, "epoch": 1.289361817344922, "grad_norm": 11.715079307556152, "learning_rate": 8.10586108633219e-06, "loss": 0.71204453, "memory(GiB)": 34.88, "step": 47620, "train_speed(iter/s)": 0.41378 }, { "acc": 0.90698175, "epoch": 1.2894971976281373, "grad_norm": 4.73269510269165, "learning_rate": 8.105422534535819e-06, "loss": 0.5119525, "memory(GiB)": 34.88, "step": 47625, "train_speed(iter/s)": 0.413783 }, { "acc": 0.88860683, "epoch": 1.289632577911353, "grad_norm": 13.819302558898926, "learning_rate": 8.104983943844193e-06, "loss": 0.61660452, "memory(GiB)": 34.88, "step": 47630, "train_speed(iter/s)": 0.413785 }, { "acc": 0.88260069, "epoch": 1.2897679581945685, "grad_norm": 4.817414283752441, "learning_rate": 8.104545314262803e-06, "loss": 0.63956022, "memory(GiB)": 34.88, "step": 47635, "train_speed(iter/s)": 0.413787 }, { "acc": 0.88489399, "epoch": 1.289903338477784, "grad_norm": 14.259847640991211, "learning_rate": 8.104106645797146e-06, "loss": 0.64862185, "memory(GiB)": 34.88, "step": 47640, "train_speed(iter/s)": 0.413789 }, { "acc": 0.89601212, "epoch": 1.2900387187609996, "grad_norm": 5.161764621734619, "learning_rate": 8.10366793845272e-06, "loss": 0.57463412, "memory(GiB)": 34.88, "step": 47645, "train_speed(iter/s)": 0.41379 }, { "acc": 0.89446077, "epoch": 1.2901740990442152, "grad_norm": 9.28330135345459, "learning_rate": 8.103229192235017e-06, "loss": 0.55112357, "memory(GiB)": 34.88, "step": 47650, "train_speed(iter/s)": 0.413793 }, { "acc": 0.91240864, "epoch": 1.2903094793274308, "grad_norm": 5.848872184753418, "learning_rate": 8.102790407149533e-06, "loss": 0.40796452, "memory(GiB)": 34.88, "step": 47655, "train_speed(iter/s)": 0.413795 }, { "acc": 0.89548044, "epoch": 1.2904448596106464, "grad_norm": 5.342663288116455, "learning_rate": 8.102351583201766e-06, "loss": 0.60327063, "memory(GiB)": 34.88, "step": 47660, "train_speed(iter/s)": 0.413797 }, { "acc": 0.91499453, "epoch": 1.290580239893862, "grad_norm": 4.891487121582031, "learning_rate": 8.101912720397213e-06, "loss": 0.45819283, "memory(GiB)": 34.88, "step": 47665, "train_speed(iter/s)": 0.413799 }, { "acc": 0.88018827, "epoch": 1.2907156201770773, "grad_norm": 7.504177570343018, "learning_rate": 8.10147381874137e-06, "loss": 0.59348164, "memory(GiB)": 34.88, "step": 47670, "train_speed(iter/s)": 0.413801 }, { "acc": 0.88671379, "epoch": 1.2908510004602929, "grad_norm": 6.530342102050781, "learning_rate": 8.101034878239739e-06, "loss": 0.56892309, "memory(GiB)": 34.88, "step": 47675, "train_speed(iter/s)": 0.413803 }, { "acc": 0.90916367, "epoch": 1.2909863807435085, "grad_norm": 5.307924747467041, "learning_rate": 8.100595898897814e-06, "loss": 0.4367177, "memory(GiB)": 34.88, "step": 47680, "train_speed(iter/s)": 0.413805 }, { "acc": 0.90148726, "epoch": 1.291121761026724, "grad_norm": 4.856777667999268, "learning_rate": 8.100156880721098e-06, "loss": 0.51526346, "memory(GiB)": 34.88, "step": 47685, "train_speed(iter/s)": 0.413808 }, { "acc": 0.89266796, "epoch": 1.2912571413099396, "grad_norm": 6.620580196380615, "learning_rate": 8.099717823715087e-06, "loss": 0.49119782, "memory(GiB)": 34.88, "step": 47690, "train_speed(iter/s)": 0.41381 }, { "acc": 0.88271999, "epoch": 1.2913925215931552, "grad_norm": 13.106342315673828, "learning_rate": 8.099278727885283e-06, "loss": 0.701266, "memory(GiB)": 34.88, "step": 47695, "train_speed(iter/s)": 0.413812 }, { "acc": 0.89228477, "epoch": 1.2915279018763708, "grad_norm": 5.808468818664551, "learning_rate": 8.098839593237188e-06, "loss": 0.57276254, "memory(GiB)": 34.88, "step": 47700, "train_speed(iter/s)": 0.413814 }, { "acc": 0.87039509, "epoch": 1.2916632821595861, "grad_norm": 8.330491065979004, "learning_rate": 8.098400419776298e-06, "loss": 0.55399017, "memory(GiB)": 34.88, "step": 47705, "train_speed(iter/s)": 0.413817 }, { "acc": 0.88540478, "epoch": 1.291798662442802, "grad_norm": 14.022684097290039, "learning_rate": 8.097961207508123e-06, "loss": 0.59745307, "memory(GiB)": 34.88, "step": 47710, "train_speed(iter/s)": 0.413818 }, { "acc": 0.88002415, "epoch": 1.2919340427260173, "grad_norm": 9.363524436950684, "learning_rate": 8.097521956438156e-06, "loss": 0.65801458, "memory(GiB)": 34.88, "step": 47715, "train_speed(iter/s)": 0.413821 }, { "acc": 0.90113211, "epoch": 1.2920694230092329, "grad_norm": 10.093746185302734, "learning_rate": 8.097082666571905e-06, "loss": 0.55650706, "memory(GiB)": 34.88, "step": 47720, "train_speed(iter/s)": 0.413823 }, { "acc": 0.89758434, "epoch": 1.2922048032924485, "grad_norm": 9.978434562683105, "learning_rate": 8.09664333791487e-06, "loss": 0.57644663, "memory(GiB)": 34.88, "step": 47725, "train_speed(iter/s)": 0.413825 }, { "acc": 0.89207811, "epoch": 1.292340183575664, "grad_norm": 6.9195966720581055, "learning_rate": 8.096203970472557e-06, "loss": 0.64929333, "memory(GiB)": 34.88, "step": 47730, "train_speed(iter/s)": 0.413827 }, { "acc": 0.90522585, "epoch": 1.2924755638588796, "grad_norm": 14.804524421691895, "learning_rate": 8.095764564250469e-06, "loss": 0.54668188, "memory(GiB)": 34.88, "step": 47735, "train_speed(iter/s)": 0.413829 }, { "acc": 0.86951199, "epoch": 1.2926109441420952, "grad_norm": 7.912405014038086, "learning_rate": 8.09532511925411e-06, "loss": 0.79250116, "memory(GiB)": 34.88, "step": 47740, "train_speed(iter/s)": 0.413831 }, { "acc": 0.90233984, "epoch": 1.2927463244253108, "grad_norm": 8.67551040649414, "learning_rate": 8.094885635488983e-06, "loss": 0.4895071, "memory(GiB)": 34.88, "step": 47745, "train_speed(iter/s)": 0.413832 }, { "acc": 0.90455971, "epoch": 1.2928817047085261, "grad_norm": 10.496343612670898, "learning_rate": 8.094446112960596e-06, "loss": 0.50291629, "memory(GiB)": 34.88, "step": 47750, "train_speed(iter/s)": 0.413834 }, { "acc": 0.89244881, "epoch": 1.2930170849917417, "grad_norm": 10.274420738220215, "learning_rate": 8.094006551674456e-06, "loss": 0.53943887, "memory(GiB)": 34.88, "step": 47755, "train_speed(iter/s)": 0.413836 }, { "acc": 0.89218121, "epoch": 1.2931524652749573, "grad_norm": 8.646997451782227, "learning_rate": 8.093566951636067e-06, "loss": 0.5827529, "memory(GiB)": 34.88, "step": 47760, "train_speed(iter/s)": 0.413839 }, { "acc": 0.90535126, "epoch": 1.2932878455581729, "grad_norm": 6.791635990142822, "learning_rate": 8.093127312850937e-06, "loss": 0.37363853, "memory(GiB)": 34.88, "step": 47765, "train_speed(iter/s)": 0.413841 }, { "acc": 0.89725742, "epoch": 1.2934232258413885, "grad_norm": 9.660240173339844, "learning_rate": 8.092687635324571e-06, "loss": 0.53993921, "memory(GiB)": 34.88, "step": 47770, "train_speed(iter/s)": 0.413843 }, { "acc": 0.89422302, "epoch": 1.293558606124604, "grad_norm": 9.42612361907959, "learning_rate": 8.092247919062481e-06, "loss": 0.49822826, "memory(GiB)": 34.88, "step": 47775, "train_speed(iter/s)": 0.413845 }, { "acc": 0.89293318, "epoch": 1.2936939864078196, "grad_norm": 7.680364608764648, "learning_rate": 8.091808164070171e-06, "loss": 0.57274437, "memory(GiB)": 34.88, "step": 47780, "train_speed(iter/s)": 0.413847 }, { "acc": 0.90365009, "epoch": 1.293829366691035, "grad_norm": 5.0513763427734375, "learning_rate": 8.091368370353155e-06, "loss": 0.4851016, "memory(GiB)": 34.88, "step": 47785, "train_speed(iter/s)": 0.413848 }, { "acc": 0.90152016, "epoch": 1.2939647469742508, "grad_norm": 6.955642223358154, "learning_rate": 8.090928537916936e-06, "loss": 0.61309443, "memory(GiB)": 34.88, "step": 47790, "train_speed(iter/s)": 0.41385 }, { "acc": 0.89635439, "epoch": 1.2941001272574661, "grad_norm": 7.900267124176025, "learning_rate": 8.090488666767028e-06, "loss": 0.5277935, "memory(GiB)": 34.88, "step": 47795, "train_speed(iter/s)": 0.413852 }, { "acc": 0.89934282, "epoch": 1.2942355075406817, "grad_norm": 8.043022155761719, "learning_rate": 8.090048756908942e-06, "loss": 0.54102197, "memory(GiB)": 34.88, "step": 47800, "train_speed(iter/s)": 0.413854 }, { "acc": 0.8878624, "epoch": 1.2943708878238973, "grad_norm": 7.994552135467529, "learning_rate": 8.089608808348183e-06, "loss": 0.58488464, "memory(GiB)": 34.88, "step": 47805, "train_speed(iter/s)": 0.413856 }, { "acc": 0.8967556, "epoch": 1.294506268107113, "grad_norm": 11.269288063049316, "learning_rate": 8.089168821090269e-06, "loss": 0.62597075, "memory(GiB)": 34.88, "step": 47810, "train_speed(iter/s)": 0.413858 }, { "acc": 0.89026375, "epoch": 1.2946416483903285, "grad_norm": 5.732298851013184, "learning_rate": 8.088728795140706e-06, "loss": 0.56953001, "memory(GiB)": 34.88, "step": 47815, "train_speed(iter/s)": 0.41386 }, { "acc": 0.88784161, "epoch": 1.294777028673544, "grad_norm": 11.73357105255127, "learning_rate": 8.088288730505013e-06, "loss": 0.61946249, "memory(GiB)": 34.88, "step": 47820, "train_speed(iter/s)": 0.413862 }, { "acc": 0.88461628, "epoch": 1.2949124089567596, "grad_norm": 10.41729736328125, "learning_rate": 8.087848627188695e-06, "loss": 0.51409378, "memory(GiB)": 34.88, "step": 47825, "train_speed(iter/s)": 0.413864 }, { "acc": 0.89330902, "epoch": 1.295047789239975, "grad_norm": 9.450708389282227, "learning_rate": 8.08740848519727e-06, "loss": 0.55580077, "memory(GiB)": 34.88, "step": 47830, "train_speed(iter/s)": 0.413866 }, { "acc": 0.92460766, "epoch": 1.2951831695231906, "grad_norm": 5.449965476989746, "learning_rate": 8.08696830453625e-06, "loss": 0.43996792, "memory(GiB)": 34.88, "step": 47835, "train_speed(iter/s)": 0.413868 }, { "acc": 0.88966055, "epoch": 1.2953185498064061, "grad_norm": 13.095471382141113, "learning_rate": 8.086528085211153e-06, "loss": 0.62046661, "memory(GiB)": 34.88, "step": 47840, "train_speed(iter/s)": 0.41387 }, { "acc": 0.90737019, "epoch": 1.2954539300896217, "grad_norm": 3.4338126182556152, "learning_rate": 8.086087827227487e-06, "loss": 0.40206537, "memory(GiB)": 34.88, "step": 47845, "train_speed(iter/s)": 0.413872 }, { "acc": 0.90458069, "epoch": 1.2955893103728373, "grad_norm": 4.215738296508789, "learning_rate": 8.08564753059077e-06, "loss": 0.43626947, "memory(GiB)": 34.88, "step": 47850, "train_speed(iter/s)": 0.413874 }, { "acc": 0.89476671, "epoch": 1.295724690656053, "grad_norm": 8.703835487365723, "learning_rate": 8.085207195306518e-06, "loss": 0.53616343, "memory(GiB)": 34.88, "step": 47855, "train_speed(iter/s)": 0.413876 }, { "acc": 0.88784924, "epoch": 1.2958600709392685, "grad_norm": 9.6321439743042, "learning_rate": 8.084766821380249e-06, "loss": 0.64408307, "memory(GiB)": 34.88, "step": 47860, "train_speed(iter/s)": 0.413878 }, { "acc": 0.87842579, "epoch": 1.2959954512224838, "grad_norm": 7.580129623413086, "learning_rate": 8.084326408817476e-06, "loss": 0.60901928, "memory(GiB)": 34.88, "step": 47865, "train_speed(iter/s)": 0.41388 }, { "acc": 0.88175611, "epoch": 1.2961308315056996, "grad_norm": 5.571812629699707, "learning_rate": 8.083885957623718e-06, "loss": 0.69784708, "memory(GiB)": 34.88, "step": 47870, "train_speed(iter/s)": 0.413882 }, { "acc": 0.87860556, "epoch": 1.296266211788915, "grad_norm": 11.000173568725586, "learning_rate": 8.083445467804492e-06, "loss": 0.62916613, "memory(GiB)": 34.88, "step": 47875, "train_speed(iter/s)": 0.413884 }, { "acc": 0.86896687, "epoch": 1.2964015920721306, "grad_norm": 15.652093887329102, "learning_rate": 8.083004939365315e-06, "loss": 0.61854782, "memory(GiB)": 34.88, "step": 47880, "train_speed(iter/s)": 0.413886 }, { "acc": 0.90183029, "epoch": 1.2965369723553462, "grad_norm": 20.11720848083496, "learning_rate": 8.082564372311708e-06, "loss": 0.50675712, "memory(GiB)": 34.88, "step": 47885, "train_speed(iter/s)": 0.413888 }, { "acc": 0.89471331, "epoch": 1.2966723526385617, "grad_norm": 9.565125465393066, "learning_rate": 8.082123766649188e-06, "loss": 0.52575769, "memory(GiB)": 34.88, "step": 47890, "train_speed(iter/s)": 0.41389 }, { "acc": 0.88796673, "epoch": 1.2968077329217773, "grad_norm": 7.800455093383789, "learning_rate": 8.081683122383275e-06, "loss": 0.65426416, "memory(GiB)": 34.88, "step": 47895, "train_speed(iter/s)": 0.413892 }, { "acc": 0.91601686, "epoch": 1.296943113204993, "grad_norm": 9.69859790802002, "learning_rate": 8.081242439519487e-06, "loss": 0.51057911, "memory(GiB)": 34.88, "step": 47900, "train_speed(iter/s)": 0.413894 }, { "acc": 0.90377636, "epoch": 1.2970784934882085, "grad_norm": 5.368228912353516, "learning_rate": 8.08080171806335e-06, "loss": 0.44992867, "memory(GiB)": 34.88, "step": 47905, "train_speed(iter/s)": 0.413896 }, { "acc": 0.90549107, "epoch": 1.2972138737714238, "grad_norm": 5.27070951461792, "learning_rate": 8.080360958020378e-06, "loss": 0.5427876, "memory(GiB)": 34.88, "step": 47910, "train_speed(iter/s)": 0.413899 }, { "acc": 0.87242432, "epoch": 1.2973492540546394, "grad_norm": 14.855378150939941, "learning_rate": 8.079920159396097e-06, "loss": 0.84382191, "memory(GiB)": 34.88, "step": 47915, "train_speed(iter/s)": 0.413901 }, { "acc": 0.90810461, "epoch": 1.297484634337855, "grad_norm": 113.64859771728516, "learning_rate": 8.079479322196028e-06, "loss": 0.50585575, "memory(GiB)": 34.88, "step": 47920, "train_speed(iter/s)": 0.413902 }, { "acc": 0.9146143, "epoch": 1.2976200146210706, "grad_norm": 4.778736591339111, "learning_rate": 8.079038446425692e-06, "loss": 0.44624538, "memory(GiB)": 34.88, "step": 47925, "train_speed(iter/s)": 0.413904 }, { "acc": 0.88827438, "epoch": 1.2977553949042862, "grad_norm": 4.706627368927002, "learning_rate": 8.078597532090613e-06, "loss": 0.59829006, "memory(GiB)": 34.88, "step": 47930, "train_speed(iter/s)": 0.413906 }, { "acc": 0.8838068, "epoch": 1.2978907751875017, "grad_norm": 8.028438568115234, "learning_rate": 8.078156579196313e-06, "loss": 0.76022391, "memory(GiB)": 34.88, "step": 47935, "train_speed(iter/s)": 0.413908 }, { "acc": 0.92155752, "epoch": 1.2980261554707173, "grad_norm": 6.672358512878418, "learning_rate": 8.077715587748317e-06, "loss": 0.37008054, "memory(GiB)": 34.88, "step": 47940, "train_speed(iter/s)": 0.41391 }, { "acc": 0.87870836, "epoch": 1.2981615357539327, "grad_norm": 11.124351501464844, "learning_rate": 8.07727455775215e-06, "loss": 0.64325781, "memory(GiB)": 34.88, "step": 47945, "train_speed(iter/s)": 0.413912 }, { "acc": 0.88394985, "epoch": 1.2982969160371485, "grad_norm": 17.870128631591797, "learning_rate": 8.076833489213336e-06, "loss": 0.63812504, "memory(GiB)": 34.88, "step": 47950, "train_speed(iter/s)": 0.413914 }, { "acc": 0.91396141, "epoch": 1.2984322963203638, "grad_norm": 6.974645137786865, "learning_rate": 8.076392382137397e-06, "loss": 0.4327199, "memory(GiB)": 34.88, "step": 47955, "train_speed(iter/s)": 0.413916 }, { "acc": 0.87902927, "epoch": 1.2985676766035794, "grad_norm": 9.113829612731934, "learning_rate": 8.075951236529864e-06, "loss": 0.57427793, "memory(GiB)": 34.88, "step": 47960, "train_speed(iter/s)": 0.413918 }, { "acc": 0.89595337, "epoch": 1.298703056886795, "grad_norm": 14.408730506896973, "learning_rate": 8.075510052396262e-06, "loss": 0.51446085, "memory(GiB)": 34.88, "step": 47965, "train_speed(iter/s)": 0.41392 }, { "acc": 0.87576656, "epoch": 1.2988384371700106, "grad_norm": 9.515708923339844, "learning_rate": 8.075068829742117e-06, "loss": 0.74682298, "memory(GiB)": 34.88, "step": 47970, "train_speed(iter/s)": 0.413921 }, { "acc": 0.87661085, "epoch": 1.2989738174532262, "grad_norm": 11.014384269714355, "learning_rate": 8.074627568572954e-06, "loss": 0.60273037, "memory(GiB)": 34.88, "step": 47975, "train_speed(iter/s)": 0.413922 }, { "acc": 0.89657173, "epoch": 1.2991091977364417, "grad_norm": 3.625821828842163, "learning_rate": 8.074186268894304e-06, "loss": 0.62524385, "memory(GiB)": 34.88, "step": 47980, "train_speed(iter/s)": 0.413925 }, { "acc": 0.88304472, "epoch": 1.2992445780196573, "grad_norm": 6.94021463394165, "learning_rate": 8.073744930711692e-06, "loss": 0.60687399, "memory(GiB)": 34.88, "step": 47985, "train_speed(iter/s)": 0.413927 }, { "acc": 0.89450121, "epoch": 1.2993799583028727, "grad_norm": 8.87065315246582, "learning_rate": 8.073303554030648e-06, "loss": 0.49713755, "memory(GiB)": 34.88, "step": 47990, "train_speed(iter/s)": 0.413929 }, { "acc": 0.89573212, "epoch": 1.2995153385860883, "grad_norm": 14.81333065032959, "learning_rate": 8.072862138856703e-06, "loss": 0.62907543, "memory(GiB)": 34.88, "step": 47995, "train_speed(iter/s)": 0.413931 }, { "acc": 0.88640919, "epoch": 1.2996507188693038, "grad_norm": 4.949281215667725, "learning_rate": 8.072420685195384e-06, "loss": 0.5033864, "memory(GiB)": 34.88, "step": 48000, "train_speed(iter/s)": 0.413933 }, { "acc": 0.91205635, "epoch": 1.2997860991525194, "grad_norm": 7.803915500640869, "learning_rate": 8.071979193052223e-06, "loss": 0.4024384, "memory(GiB)": 34.88, "step": 48005, "train_speed(iter/s)": 0.413934 }, { "acc": 0.89708939, "epoch": 1.299921479435735, "grad_norm": 11.830528259277344, "learning_rate": 8.071537662432752e-06, "loss": 0.56769686, "memory(GiB)": 34.88, "step": 48010, "train_speed(iter/s)": 0.413936 }, { "acc": 0.90340023, "epoch": 1.3000568597189506, "grad_norm": 25.569169998168945, "learning_rate": 8.071096093342496e-06, "loss": 0.57188358, "memory(GiB)": 34.88, "step": 48015, "train_speed(iter/s)": 0.413938 }, { "acc": 0.8856102, "epoch": 1.3001922400021662, "grad_norm": 16.217273712158203, "learning_rate": 8.070654485786991e-06, "loss": 0.77708373, "memory(GiB)": 34.88, "step": 48020, "train_speed(iter/s)": 0.413939 }, { "acc": 0.90822344, "epoch": 1.3003276202853815, "grad_norm": 6.272834300994873, "learning_rate": 8.07021283977177e-06, "loss": 0.46291313, "memory(GiB)": 34.88, "step": 48025, "train_speed(iter/s)": 0.413941 }, { "acc": 0.88191509, "epoch": 1.3004630005685973, "grad_norm": 8.104884147644043, "learning_rate": 8.069771155302362e-06, "loss": 0.69505053, "memory(GiB)": 34.88, "step": 48030, "train_speed(iter/s)": 0.413943 }, { "acc": 0.85780659, "epoch": 1.3005983808518127, "grad_norm": 17.339069366455078, "learning_rate": 8.0693294323843e-06, "loss": 0.84843273, "memory(GiB)": 34.88, "step": 48035, "train_speed(iter/s)": 0.413945 }, { "acc": 0.89494343, "epoch": 1.3007337611350283, "grad_norm": 5.106635570526123, "learning_rate": 8.068887671023124e-06, "loss": 0.47367287, "memory(GiB)": 34.88, "step": 48040, "train_speed(iter/s)": 0.413947 }, { "acc": 0.90894146, "epoch": 1.3008691414182438, "grad_norm": 6.07127046585083, "learning_rate": 8.06844587122436e-06, "loss": 0.49844227, "memory(GiB)": 34.88, "step": 48045, "train_speed(iter/s)": 0.413948 }, { "acc": 0.9059906, "epoch": 1.3010045217014594, "grad_norm": 9.151581764221191, "learning_rate": 8.068004032993545e-06, "loss": 0.59079204, "memory(GiB)": 34.88, "step": 48050, "train_speed(iter/s)": 0.41395 }, { "acc": 0.88884172, "epoch": 1.301139901984675, "grad_norm": 11.42907428741455, "learning_rate": 8.067562156336214e-06, "loss": 0.64411993, "memory(GiB)": 34.88, "step": 48055, "train_speed(iter/s)": 0.413952 }, { "acc": 0.8932889, "epoch": 1.3012752822678906, "grad_norm": 7.475404739379883, "learning_rate": 8.067120241257905e-06, "loss": 0.53658328, "memory(GiB)": 34.88, "step": 48060, "train_speed(iter/s)": 0.413954 }, { "acc": 0.88523903, "epoch": 1.3014106625511062, "grad_norm": 16.759662628173828, "learning_rate": 8.06667828776415e-06, "loss": 0.61873789, "memory(GiB)": 34.88, "step": 48065, "train_speed(iter/s)": 0.413955 }, { "acc": 0.8774971, "epoch": 1.3015460428343215, "grad_norm": 5.938663482666016, "learning_rate": 8.066236295860486e-06, "loss": 0.5840292, "memory(GiB)": 34.88, "step": 48070, "train_speed(iter/s)": 0.413956 }, { "acc": 0.87615843, "epoch": 1.301681423117537, "grad_norm": 9.533353805541992, "learning_rate": 8.065794265552452e-06, "loss": 0.69629216, "memory(GiB)": 34.88, "step": 48075, "train_speed(iter/s)": 0.413958 }, { "acc": 0.89176769, "epoch": 1.3018168034007527, "grad_norm": 48.49466323852539, "learning_rate": 8.065352196845584e-06, "loss": 0.60453577, "memory(GiB)": 34.88, "step": 48080, "train_speed(iter/s)": 0.41396 }, { "acc": 0.88414307, "epoch": 1.3019521836839683, "grad_norm": 5.276429653167725, "learning_rate": 8.06491008974542e-06, "loss": 0.5863605, "memory(GiB)": 34.88, "step": 48085, "train_speed(iter/s)": 0.413962 }, { "acc": 0.88223171, "epoch": 1.3020875639671838, "grad_norm": 12.082667350769043, "learning_rate": 8.064467944257498e-06, "loss": 0.64037919, "memory(GiB)": 34.88, "step": 48090, "train_speed(iter/s)": 0.413964 }, { "acc": 0.87920284, "epoch": 1.3022229442503994, "grad_norm": 25.167675018310547, "learning_rate": 8.064025760387358e-06, "loss": 0.63542681, "memory(GiB)": 34.88, "step": 48095, "train_speed(iter/s)": 0.413965 }, { "acc": 0.8834404, "epoch": 1.302358324533615, "grad_norm": 7.14066743850708, "learning_rate": 8.063583538140535e-06, "loss": 0.60304008, "memory(GiB)": 34.88, "step": 48100, "train_speed(iter/s)": 0.413967 }, { "acc": 0.8775528, "epoch": 1.3024937048168304, "grad_norm": 8.691230773925781, "learning_rate": 8.063141277522574e-06, "loss": 0.68310375, "memory(GiB)": 34.88, "step": 48105, "train_speed(iter/s)": 0.413968 }, { "acc": 0.89017582, "epoch": 1.3026290851000462, "grad_norm": 5.741418361663818, "learning_rate": 8.062698978539013e-06, "loss": 0.53301625, "memory(GiB)": 34.88, "step": 48110, "train_speed(iter/s)": 0.41397 }, { "acc": 0.88760424, "epoch": 1.3027644653832615, "grad_norm": 9.3705472946167, "learning_rate": 8.062256641195391e-06, "loss": 0.58051214, "memory(GiB)": 34.88, "step": 48115, "train_speed(iter/s)": 0.413972 }, { "acc": 0.88792944, "epoch": 1.302899845666477, "grad_norm": 7.308605670928955, "learning_rate": 8.061814265497252e-06, "loss": 0.53992205, "memory(GiB)": 34.88, "step": 48120, "train_speed(iter/s)": 0.413974 }, { "acc": 0.87943687, "epoch": 1.3030352259496927, "grad_norm": 7.159127235412598, "learning_rate": 8.061371851450135e-06, "loss": 0.65700264, "memory(GiB)": 34.88, "step": 48125, "train_speed(iter/s)": 0.413976 }, { "acc": 0.88749828, "epoch": 1.3031706062329083, "grad_norm": 7.779754161834717, "learning_rate": 8.060929399059587e-06, "loss": 0.70504618, "memory(GiB)": 34.88, "step": 48130, "train_speed(iter/s)": 0.413977 }, { "acc": 0.88968334, "epoch": 1.3033059865161238, "grad_norm": 8.440030097961426, "learning_rate": 8.060486908331141e-06, "loss": 0.55368147, "memory(GiB)": 34.88, "step": 48135, "train_speed(iter/s)": 0.413979 }, { "acc": 0.89214106, "epoch": 1.3034413667993394, "grad_norm": 8.07866096496582, "learning_rate": 8.060044379270352e-06, "loss": 0.59191265, "memory(GiB)": 34.88, "step": 48140, "train_speed(iter/s)": 0.41398 }, { "acc": 0.88212357, "epoch": 1.303576747082555, "grad_norm": 15.656099319458008, "learning_rate": 8.059601811882756e-06, "loss": 0.75264683, "memory(GiB)": 34.88, "step": 48145, "train_speed(iter/s)": 0.413982 }, { "acc": 0.904459, "epoch": 1.3037121273657704, "grad_norm": 8.394510269165039, "learning_rate": 8.059159206173896e-06, "loss": 0.46889515, "memory(GiB)": 34.88, "step": 48150, "train_speed(iter/s)": 0.413984 }, { "acc": 0.88781548, "epoch": 1.303847507648986, "grad_norm": 7.491031646728516, "learning_rate": 8.058716562149323e-06, "loss": 0.58341646, "memory(GiB)": 34.88, "step": 48155, "train_speed(iter/s)": 0.413986 }, { "acc": 0.88880272, "epoch": 1.3039828879322015, "grad_norm": 7.650571346282959, "learning_rate": 8.058273879814576e-06, "loss": 0.58258972, "memory(GiB)": 34.88, "step": 48160, "train_speed(iter/s)": 0.413988 }, { "acc": 0.88718548, "epoch": 1.304118268215417, "grad_norm": 13.71600341796875, "learning_rate": 8.057831159175203e-06, "loss": 0.66693745, "memory(GiB)": 34.88, "step": 48165, "train_speed(iter/s)": 0.41399 }, { "acc": 0.87780342, "epoch": 1.3042536484986327, "grad_norm": 9.713218688964844, "learning_rate": 8.05738840023675e-06, "loss": 0.63136845, "memory(GiB)": 34.88, "step": 48170, "train_speed(iter/s)": 0.413992 }, { "acc": 0.89250231, "epoch": 1.3043890287818483, "grad_norm": 9.73067855834961, "learning_rate": 8.056945603004761e-06, "loss": 0.536166, "memory(GiB)": 34.88, "step": 48175, "train_speed(iter/s)": 0.413993 }, { "acc": 0.86909122, "epoch": 1.3045244090650638, "grad_norm": 5.1975531578063965, "learning_rate": 8.056502767484787e-06, "loss": 0.73611403, "memory(GiB)": 34.88, "step": 48180, "train_speed(iter/s)": 0.413995 }, { "acc": 0.89524364, "epoch": 1.3046597893482792, "grad_norm": 14.683143615722656, "learning_rate": 8.056059893682374e-06, "loss": 0.51215854, "memory(GiB)": 34.88, "step": 48185, "train_speed(iter/s)": 0.413997 }, { "acc": 0.88782511, "epoch": 1.304795169631495, "grad_norm": 5.190059185028076, "learning_rate": 8.055616981603067e-06, "loss": 0.63471088, "memory(GiB)": 34.88, "step": 48190, "train_speed(iter/s)": 0.413999 }, { "acc": 0.91252747, "epoch": 1.3049305499147104, "grad_norm": 7.221526622772217, "learning_rate": 8.055174031252417e-06, "loss": 0.47610016, "memory(GiB)": 34.88, "step": 48195, "train_speed(iter/s)": 0.414001 }, { "acc": 0.90975561, "epoch": 1.305065930197926, "grad_norm": 4.655961990356445, "learning_rate": 8.054731042635975e-06, "loss": 0.45839691, "memory(GiB)": 34.88, "step": 48200, "train_speed(iter/s)": 0.414003 }, { "acc": 0.88912659, "epoch": 1.3052013104811415, "grad_norm": 7.105992317199707, "learning_rate": 8.054288015759284e-06, "loss": 0.70508561, "memory(GiB)": 34.88, "step": 48205, "train_speed(iter/s)": 0.414004 }, { "acc": 0.92518263, "epoch": 1.305336690764357, "grad_norm": 6.316390514373779, "learning_rate": 8.0538449506279e-06, "loss": 0.37856331, "memory(GiB)": 34.88, "step": 48210, "train_speed(iter/s)": 0.414006 }, { "acc": 0.88268261, "epoch": 1.3054720710475727, "grad_norm": 27.247377395629883, "learning_rate": 8.053401847247368e-06, "loss": 0.66021309, "memory(GiB)": 34.88, "step": 48215, "train_speed(iter/s)": 0.414008 }, { "acc": 0.89814796, "epoch": 1.3056074513307883, "grad_norm": 9.72153091430664, "learning_rate": 8.052958705623242e-06, "loss": 0.53950987, "memory(GiB)": 34.88, "step": 48220, "train_speed(iter/s)": 0.41401 }, { "acc": 0.88956146, "epoch": 1.3057428316140038, "grad_norm": 9.782230377197266, "learning_rate": 8.052515525761076e-06, "loss": 0.63620615, "memory(GiB)": 34.88, "step": 48225, "train_speed(iter/s)": 0.414012 }, { "acc": 0.90992069, "epoch": 1.3058782118972192, "grad_norm": 4.341070175170898, "learning_rate": 8.052072307666414e-06, "loss": 0.500776, "memory(GiB)": 34.88, "step": 48230, "train_speed(iter/s)": 0.414014 }, { "acc": 0.91034002, "epoch": 1.3060135921804348, "grad_norm": 8.882204055786133, "learning_rate": 8.051629051344815e-06, "loss": 0.47137499, "memory(GiB)": 34.88, "step": 48235, "train_speed(iter/s)": 0.414015 }, { "acc": 0.88759327, "epoch": 1.3061489724636504, "grad_norm": 16.235614776611328, "learning_rate": 8.051185756801828e-06, "loss": 0.67576561, "memory(GiB)": 34.88, "step": 48240, "train_speed(iter/s)": 0.414017 }, { "acc": 0.91041384, "epoch": 1.306284352746866, "grad_norm": 7.4554829597473145, "learning_rate": 8.05074242404301e-06, "loss": 0.43708339, "memory(GiB)": 34.88, "step": 48245, "train_speed(iter/s)": 0.414019 }, { "acc": 0.90430317, "epoch": 1.3064197330300815, "grad_norm": 6.631117820739746, "learning_rate": 8.05029905307391e-06, "loss": 0.47202463, "memory(GiB)": 34.88, "step": 48250, "train_speed(iter/s)": 0.414021 }, { "acc": 0.91143255, "epoch": 1.306555113313297, "grad_norm": 34.76923751831055, "learning_rate": 8.049855643900082e-06, "loss": 0.49966025, "memory(GiB)": 34.88, "step": 48255, "train_speed(iter/s)": 0.414023 }, { "acc": 0.89858379, "epoch": 1.3066904935965127, "grad_norm": 8.178800582885742, "learning_rate": 8.049412196527086e-06, "loss": 0.57881255, "memory(GiB)": 34.88, "step": 48260, "train_speed(iter/s)": 0.414025 }, { "acc": 0.91293507, "epoch": 1.306825873879728, "grad_norm": 4.4431633949279785, "learning_rate": 8.048968710960472e-06, "loss": 0.5021801, "memory(GiB)": 34.88, "step": 48265, "train_speed(iter/s)": 0.414027 }, { "acc": 0.88462439, "epoch": 1.3069612541629438, "grad_norm": 6.9391770362854, "learning_rate": 8.048525187205797e-06, "loss": 0.63905048, "memory(GiB)": 34.88, "step": 48270, "train_speed(iter/s)": 0.414029 }, { "acc": 0.86134834, "epoch": 1.3070966344461592, "grad_norm": 7.657658576965332, "learning_rate": 8.048081625268619e-06, "loss": 0.7011651, "memory(GiB)": 34.88, "step": 48275, "train_speed(iter/s)": 0.414031 }, { "acc": 0.90687466, "epoch": 1.3072320147293748, "grad_norm": 4.935122966766357, "learning_rate": 8.04763802515449e-06, "loss": 0.47138424, "memory(GiB)": 34.88, "step": 48280, "train_speed(iter/s)": 0.414033 }, { "acc": 0.91350784, "epoch": 1.3073673950125904, "grad_norm": 9.032851219177246, "learning_rate": 8.04719438686897e-06, "loss": 0.40251856, "memory(GiB)": 34.88, "step": 48285, "train_speed(iter/s)": 0.414035 }, { "acc": 0.89448805, "epoch": 1.307502775295806, "grad_norm": 12.605831146240234, "learning_rate": 8.046750710417618e-06, "loss": 0.56695747, "memory(GiB)": 34.88, "step": 48290, "train_speed(iter/s)": 0.414037 }, { "acc": 0.88289452, "epoch": 1.3076381555790215, "grad_norm": 5.723515510559082, "learning_rate": 8.046306995805988e-06, "loss": 0.53623495, "memory(GiB)": 34.88, "step": 48295, "train_speed(iter/s)": 0.414039 }, { "acc": 0.89040337, "epoch": 1.307773535862237, "grad_norm": 6.339138031005859, "learning_rate": 8.04586324303964e-06, "loss": 0.54736171, "memory(GiB)": 34.88, "step": 48300, "train_speed(iter/s)": 0.414041 }, { "acc": 0.90416813, "epoch": 1.3079089161454527, "grad_norm": 7.609899044036865, "learning_rate": 8.045419452124135e-06, "loss": 0.50868487, "memory(GiB)": 34.88, "step": 48305, "train_speed(iter/s)": 0.414043 }, { "acc": 0.90616703, "epoch": 1.308044296428668, "grad_norm": 12.654119491577148, "learning_rate": 8.04497562306503e-06, "loss": 0.51074219, "memory(GiB)": 34.88, "step": 48310, "train_speed(iter/s)": 0.414045 }, { "acc": 0.89900589, "epoch": 1.3081796767118836, "grad_norm": 7.324193000793457, "learning_rate": 8.044531755867886e-06, "loss": 0.47488279, "memory(GiB)": 34.88, "step": 48315, "train_speed(iter/s)": 0.414047 }, { "acc": 0.88020067, "epoch": 1.3083150569950992, "grad_norm": 7.503477573394775, "learning_rate": 8.044087850538262e-06, "loss": 0.71244693, "memory(GiB)": 34.88, "step": 48320, "train_speed(iter/s)": 0.41405 }, { "acc": 0.88430691, "epoch": 1.3084504372783148, "grad_norm": 5.785887241363525, "learning_rate": 8.043643907081721e-06, "loss": 0.68704586, "memory(GiB)": 34.88, "step": 48325, "train_speed(iter/s)": 0.414051 }, { "acc": 0.91018848, "epoch": 1.3085858175615304, "grad_norm": 7.324754238128662, "learning_rate": 8.043199925503821e-06, "loss": 0.44563413, "memory(GiB)": 34.88, "step": 48330, "train_speed(iter/s)": 0.414053 }, { "acc": 0.8865118, "epoch": 1.308721197844746, "grad_norm": 7.091097354888916, "learning_rate": 8.042755905810126e-06, "loss": 0.58772373, "memory(GiB)": 34.88, "step": 48335, "train_speed(iter/s)": 0.414055 }, { "acc": 0.91727066, "epoch": 1.3088565781279615, "grad_norm": 4.315550327301025, "learning_rate": 8.042311848006198e-06, "loss": 0.4348423, "memory(GiB)": 34.88, "step": 48340, "train_speed(iter/s)": 0.414057 }, { "acc": 0.86799879, "epoch": 1.3089919584111769, "grad_norm": 11.176060676574707, "learning_rate": 8.0418677520976e-06, "loss": 0.77527437, "memory(GiB)": 34.88, "step": 48345, "train_speed(iter/s)": 0.414059 }, { "acc": 0.89082508, "epoch": 1.3091273386943927, "grad_norm": 11.618339538574219, "learning_rate": 8.041423618089893e-06, "loss": 0.61543655, "memory(GiB)": 34.88, "step": 48350, "train_speed(iter/s)": 0.414061 }, { "acc": 0.87868948, "epoch": 1.309262718977608, "grad_norm": 9.09152889251709, "learning_rate": 8.040979445988643e-06, "loss": 0.6159194, "memory(GiB)": 34.88, "step": 48355, "train_speed(iter/s)": 0.414063 }, { "acc": 0.90624905, "epoch": 1.3093980992608236, "grad_norm": 9.542470932006836, "learning_rate": 8.040535235799413e-06, "loss": 0.47409034, "memory(GiB)": 34.88, "step": 48360, "train_speed(iter/s)": 0.414065 }, { "acc": 0.89575253, "epoch": 1.3095334795440392, "grad_norm": 12.164813995361328, "learning_rate": 8.040090987527768e-06, "loss": 0.55789423, "memory(GiB)": 34.88, "step": 48365, "train_speed(iter/s)": 0.414067 }, { "acc": 0.8916832, "epoch": 1.3096688598272548, "grad_norm": 8.69968318939209, "learning_rate": 8.039646701179274e-06, "loss": 0.55846643, "memory(GiB)": 34.88, "step": 48370, "train_speed(iter/s)": 0.414069 }, { "acc": 0.91224308, "epoch": 1.3098042401104704, "grad_norm": 7.9700775146484375, "learning_rate": 8.039202376759497e-06, "loss": 0.48393416, "memory(GiB)": 34.88, "step": 48375, "train_speed(iter/s)": 0.414071 }, { "acc": 0.89690495, "epoch": 1.309939620393686, "grad_norm": 13.957879066467285, "learning_rate": 8.038758014274e-06, "loss": 0.54524775, "memory(GiB)": 34.88, "step": 48380, "train_speed(iter/s)": 0.414073 }, { "acc": 0.87568502, "epoch": 1.3100750006769015, "grad_norm": 11.160513877868652, "learning_rate": 8.038313613728352e-06, "loss": 0.5467885, "memory(GiB)": 34.88, "step": 48385, "train_speed(iter/s)": 0.414075 }, { "acc": 0.90882187, "epoch": 1.3102103809601169, "grad_norm": 9.152226448059082, "learning_rate": 8.037869175128117e-06, "loss": 0.42013936, "memory(GiB)": 34.88, "step": 48390, "train_speed(iter/s)": 0.414076 }, { "acc": 0.90139389, "epoch": 1.3103457612433325, "grad_norm": 7.153623104095459, "learning_rate": 8.037424698478867e-06, "loss": 0.4659688, "memory(GiB)": 34.88, "step": 48395, "train_speed(iter/s)": 0.414078 }, { "acc": 0.89577246, "epoch": 1.310481141526548, "grad_norm": 15.139204978942871, "learning_rate": 8.036980183786166e-06, "loss": 0.58657317, "memory(GiB)": 34.88, "step": 48400, "train_speed(iter/s)": 0.41408 }, { "acc": 0.90239048, "epoch": 1.3106165218097636, "grad_norm": 16.218746185302734, "learning_rate": 8.036535631055584e-06, "loss": 0.61647444, "memory(GiB)": 34.88, "step": 48405, "train_speed(iter/s)": 0.414082 }, { "acc": 0.87810202, "epoch": 1.3107519020929792, "grad_norm": 10.623562812805176, "learning_rate": 8.036091040292691e-06, "loss": 0.68712893, "memory(GiB)": 34.88, "step": 48410, "train_speed(iter/s)": 0.414084 }, { "acc": 0.91089706, "epoch": 1.3108872823761948, "grad_norm": 8.454322814941406, "learning_rate": 8.035646411503055e-06, "loss": 0.4745512, "memory(GiB)": 34.88, "step": 48415, "train_speed(iter/s)": 0.414086 }, { "acc": 0.88005114, "epoch": 1.3110226626594104, "grad_norm": 13.010004997253418, "learning_rate": 8.035201744692246e-06, "loss": 0.6011878, "memory(GiB)": 34.88, "step": 48420, "train_speed(iter/s)": 0.414088 }, { "acc": 0.89062862, "epoch": 1.3111580429426257, "grad_norm": 21.98396873474121, "learning_rate": 8.034757039865835e-06, "loss": 0.62924309, "memory(GiB)": 34.88, "step": 48425, "train_speed(iter/s)": 0.41409 }, { "acc": 0.8840765, "epoch": 1.3112934232258415, "grad_norm": 9.7241792678833, "learning_rate": 8.034312297029392e-06, "loss": 0.60382919, "memory(GiB)": 34.88, "step": 48430, "train_speed(iter/s)": 0.414092 }, { "acc": 0.89717627, "epoch": 1.3114288035090569, "grad_norm": 4.166248798370361, "learning_rate": 8.033867516188487e-06, "loss": 0.50164881, "memory(GiB)": 34.88, "step": 48435, "train_speed(iter/s)": 0.414094 }, { "acc": 0.90742912, "epoch": 1.3115641837922725, "grad_norm": 4.912347316741943, "learning_rate": 8.033422697348695e-06, "loss": 0.4129755, "memory(GiB)": 34.88, "step": 48440, "train_speed(iter/s)": 0.414096 }, { "acc": 0.90748806, "epoch": 1.311699564075488, "grad_norm": 25.927536010742188, "learning_rate": 8.032977840515587e-06, "loss": 0.50730772, "memory(GiB)": 34.88, "step": 48445, "train_speed(iter/s)": 0.414098 }, { "acc": 0.90543413, "epoch": 1.3118349443587036, "grad_norm": 7.8422369956970215, "learning_rate": 8.032532945694736e-06, "loss": 0.47996902, "memory(GiB)": 34.88, "step": 48450, "train_speed(iter/s)": 0.4141 }, { "acc": 0.89928894, "epoch": 1.3119703246419192, "grad_norm": 6.653850078582764, "learning_rate": 8.032088012891712e-06, "loss": 0.49494152, "memory(GiB)": 34.88, "step": 48455, "train_speed(iter/s)": 0.414102 }, { "acc": 0.92000332, "epoch": 1.3121057049251348, "grad_norm": 4.197278022766113, "learning_rate": 8.031643042112092e-06, "loss": 0.38495319, "memory(GiB)": 34.88, "step": 48460, "train_speed(iter/s)": 0.414104 }, { "acc": 0.90101585, "epoch": 1.3122410852083504, "grad_norm": 11.08135986328125, "learning_rate": 8.031198033361448e-06, "loss": 0.47859492, "memory(GiB)": 34.88, "step": 48465, "train_speed(iter/s)": 0.414106 }, { "acc": 0.86845064, "epoch": 1.3123764654915657, "grad_norm": 10.637046813964844, "learning_rate": 8.030752986645358e-06, "loss": 0.73219175, "memory(GiB)": 34.88, "step": 48470, "train_speed(iter/s)": 0.414108 }, { "acc": 0.89461594, "epoch": 1.3125118457747813, "grad_norm": 3.825302839279175, "learning_rate": 8.030307901969394e-06, "loss": 0.55387955, "memory(GiB)": 34.88, "step": 48475, "train_speed(iter/s)": 0.41411 }, { "acc": 0.91364784, "epoch": 1.3126472260579969, "grad_norm": 8.851006507873535, "learning_rate": 8.029862779339133e-06, "loss": 0.41072378, "memory(GiB)": 34.88, "step": 48480, "train_speed(iter/s)": 0.414112 }, { "acc": 0.90968609, "epoch": 1.3127826063412125, "grad_norm": 4.5161895751953125, "learning_rate": 8.02941761876015e-06, "loss": 0.50087595, "memory(GiB)": 34.88, "step": 48485, "train_speed(iter/s)": 0.414114 }, { "acc": 0.87817993, "epoch": 1.312917986624428, "grad_norm": 11.47383975982666, "learning_rate": 8.028972420238023e-06, "loss": 0.660355, "memory(GiB)": 34.88, "step": 48490, "train_speed(iter/s)": 0.414116 }, { "acc": 0.86894293, "epoch": 1.3130533669076436, "grad_norm": 12.415069580078125, "learning_rate": 8.028527183778326e-06, "loss": 0.78946662, "memory(GiB)": 34.88, "step": 48495, "train_speed(iter/s)": 0.414118 }, { "acc": 0.8887949, "epoch": 1.3131887471908592, "grad_norm": 42.02979278564453, "learning_rate": 8.028081909386639e-06, "loss": 0.62048831, "memory(GiB)": 34.88, "step": 48500, "train_speed(iter/s)": 0.41412 }, { "acc": 0.92126675, "epoch": 1.3133241274740746, "grad_norm": 5.674601078033447, "learning_rate": 8.027636597068538e-06, "loss": 0.39399579, "memory(GiB)": 34.88, "step": 48505, "train_speed(iter/s)": 0.414122 }, { "acc": 0.89006701, "epoch": 1.3134595077572901, "grad_norm": 7.405944347381592, "learning_rate": 8.027191246829606e-06, "loss": 0.52476206, "memory(GiB)": 34.88, "step": 48510, "train_speed(iter/s)": 0.414124 }, { "acc": 0.885849, "epoch": 1.3135948880405057, "grad_norm": 15.251303672790527, "learning_rate": 8.026745858675417e-06, "loss": 0.58940086, "memory(GiB)": 34.88, "step": 48515, "train_speed(iter/s)": 0.414126 }, { "acc": 0.89057055, "epoch": 1.3137302683237213, "grad_norm": 6.58709192276001, "learning_rate": 8.026300432611554e-06, "loss": 0.65441914, "memory(GiB)": 34.88, "step": 48520, "train_speed(iter/s)": 0.414128 }, { "acc": 0.9001749, "epoch": 1.3138656486069369, "grad_norm": 5.844932556152344, "learning_rate": 8.025854968643592e-06, "loss": 0.45724797, "memory(GiB)": 34.88, "step": 48525, "train_speed(iter/s)": 0.41413 }, { "acc": 0.89267159, "epoch": 1.3140010288901525, "grad_norm": 14.33560848236084, "learning_rate": 8.025409466777117e-06, "loss": 0.63549528, "memory(GiB)": 34.88, "step": 48530, "train_speed(iter/s)": 0.414132 }, { "acc": 0.90133839, "epoch": 1.314136409173368, "grad_norm": 8.27591609954834, "learning_rate": 8.024963927017703e-06, "loss": 0.60125513, "memory(GiB)": 34.88, "step": 48535, "train_speed(iter/s)": 0.414134 }, { "acc": 0.88398294, "epoch": 1.3142717894565834, "grad_norm": 9.22323989868164, "learning_rate": 8.024518349370938e-06, "loss": 0.67700214, "memory(GiB)": 34.88, "step": 48540, "train_speed(iter/s)": 0.414136 }, { "acc": 0.87533798, "epoch": 1.3144071697397992, "grad_norm": 7.373898506164551, "learning_rate": 8.024072733842401e-06, "loss": 0.64781885, "memory(GiB)": 34.88, "step": 48545, "train_speed(iter/s)": 0.414138 }, { "acc": 0.89378605, "epoch": 1.3145425500230146, "grad_norm": 14.537861824035645, "learning_rate": 8.023627080437673e-06, "loss": 0.60895066, "memory(GiB)": 34.88, "step": 48550, "train_speed(iter/s)": 0.41414 }, { "acc": 0.91258774, "epoch": 1.3146779303062301, "grad_norm": 14.3084716796875, "learning_rate": 8.02318138916234e-06, "loss": 0.52940936, "memory(GiB)": 34.88, "step": 48555, "train_speed(iter/s)": 0.414142 }, { "acc": 0.91232738, "epoch": 1.3148133105894457, "grad_norm": 6.752827167510986, "learning_rate": 8.022735660021982e-06, "loss": 0.53852725, "memory(GiB)": 34.88, "step": 48560, "train_speed(iter/s)": 0.414144 }, { "acc": 0.89145985, "epoch": 1.3149486908726613, "grad_norm": 5.981058597564697, "learning_rate": 8.022289893022182e-06, "loss": 0.62653551, "memory(GiB)": 34.88, "step": 48565, "train_speed(iter/s)": 0.414145 }, { "acc": 0.90149317, "epoch": 1.3150840711558769, "grad_norm": 6.899142742156982, "learning_rate": 8.021844088168527e-06, "loss": 0.49830713, "memory(GiB)": 34.88, "step": 48570, "train_speed(iter/s)": 0.414147 }, { "acc": 0.89801016, "epoch": 1.3152194514390925, "grad_norm": 10.35013484954834, "learning_rate": 8.021398245466601e-06, "loss": 0.55348911, "memory(GiB)": 34.88, "step": 48575, "train_speed(iter/s)": 0.414149 }, { "acc": 0.88120728, "epoch": 1.315354831722308, "grad_norm": 7.957623481750488, "learning_rate": 8.020952364921989e-06, "loss": 0.6461525, "memory(GiB)": 34.88, "step": 48580, "train_speed(iter/s)": 0.414151 }, { "acc": 0.89176674, "epoch": 1.3154902120055234, "grad_norm": 8.272233963012695, "learning_rate": 8.020506446540274e-06, "loss": 0.46817021, "memory(GiB)": 34.88, "step": 48585, "train_speed(iter/s)": 0.414153 }, { "acc": 0.90145121, "epoch": 1.315625592288739, "grad_norm": 9.058786392211914, "learning_rate": 8.020060490327045e-06, "loss": 0.57459126, "memory(GiB)": 34.88, "step": 48590, "train_speed(iter/s)": 0.414155 }, { "acc": 0.91382618, "epoch": 1.3157609725719546, "grad_norm": 5.445359230041504, "learning_rate": 8.019614496287888e-06, "loss": 0.38017075, "memory(GiB)": 34.88, "step": 48595, "train_speed(iter/s)": 0.414157 }, { "acc": 0.90193052, "epoch": 1.3158963528551701, "grad_norm": 8.768669128417969, "learning_rate": 8.019168464428389e-06, "loss": 0.55436201, "memory(GiB)": 34.88, "step": 48600, "train_speed(iter/s)": 0.414159 }, { "acc": 0.89099655, "epoch": 1.3160317331383857, "grad_norm": 17.212284088134766, "learning_rate": 8.018722394754135e-06, "loss": 0.57627916, "memory(GiB)": 34.88, "step": 48605, "train_speed(iter/s)": 0.414161 }, { "acc": 0.89649086, "epoch": 1.3161671134216013, "grad_norm": 5.660506248474121, "learning_rate": 8.018276287270716e-06, "loss": 0.53667293, "memory(GiB)": 34.88, "step": 48610, "train_speed(iter/s)": 0.414164 }, { "acc": 0.88588734, "epoch": 1.316302493704817, "grad_norm": 16.317459106445312, "learning_rate": 8.017830141983718e-06, "loss": 0.59291496, "memory(GiB)": 34.88, "step": 48615, "train_speed(iter/s)": 0.414166 }, { "acc": 0.90627613, "epoch": 1.3164378739880322, "grad_norm": 10.705503463745117, "learning_rate": 8.017383958898731e-06, "loss": 0.45352316, "memory(GiB)": 34.88, "step": 48620, "train_speed(iter/s)": 0.414167 }, { "acc": 0.88126574, "epoch": 1.316573254271248, "grad_norm": 21.057729721069336, "learning_rate": 8.016937738021346e-06, "loss": 0.67858257, "memory(GiB)": 34.88, "step": 48625, "train_speed(iter/s)": 0.41417 }, { "acc": 0.8899435, "epoch": 1.3167086345544634, "grad_norm": 6.970509052276611, "learning_rate": 8.016491479357148e-06, "loss": 0.6495585, "memory(GiB)": 34.88, "step": 48630, "train_speed(iter/s)": 0.414172 }, { "acc": 0.89814262, "epoch": 1.316844014837679, "grad_norm": 10.992460250854492, "learning_rate": 8.016045182911732e-06, "loss": 0.57124557, "memory(GiB)": 34.88, "step": 48635, "train_speed(iter/s)": 0.414174 }, { "acc": 0.89054422, "epoch": 1.3169793951208946, "grad_norm": 6.973571300506592, "learning_rate": 8.015598848690688e-06, "loss": 0.55863876, "memory(GiB)": 34.88, "step": 48640, "train_speed(iter/s)": 0.414176 }, { "acc": 0.88644466, "epoch": 1.3171147754041101, "grad_norm": 6.902926445007324, "learning_rate": 8.015152476699605e-06, "loss": 0.59595547, "memory(GiB)": 34.88, "step": 48645, "train_speed(iter/s)": 0.414178 }, { "acc": 0.90667715, "epoch": 1.3172501556873257, "grad_norm": 13.551931381225586, "learning_rate": 8.014706066944076e-06, "loss": 0.53365216, "memory(GiB)": 34.88, "step": 48650, "train_speed(iter/s)": 0.41418 }, { "acc": 0.88945656, "epoch": 1.3173855359705413, "grad_norm": 8.261685371398926, "learning_rate": 8.014259619429694e-06, "loss": 0.60871539, "memory(GiB)": 34.88, "step": 48655, "train_speed(iter/s)": 0.414182 }, { "acc": 0.89645548, "epoch": 1.317520916253757, "grad_norm": 5.888625144958496, "learning_rate": 8.01381313416205e-06, "loss": 0.57853718, "memory(GiB)": 34.88, "step": 48660, "train_speed(iter/s)": 0.414184 }, { "acc": 0.9004755, "epoch": 1.3176562965369722, "grad_norm": 6.369472026824951, "learning_rate": 8.013366611146739e-06, "loss": 0.50108242, "memory(GiB)": 34.88, "step": 48665, "train_speed(iter/s)": 0.414186 }, { "acc": 0.89211884, "epoch": 1.3177916768201878, "grad_norm": 5.934520244598389, "learning_rate": 8.012920050389349e-06, "loss": 0.49277277, "memory(GiB)": 34.88, "step": 48670, "train_speed(iter/s)": 0.414188 }, { "acc": 0.88131466, "epoch": 1.3179270571034034, "grad_norm": 14.210970878601074, "learning_rate": 8.012473451895484e-06, "loss": 0.67956362, "memory(GiB)": 34.88, "step": 48675, "train_speed(iter/s)": 0.41419 }, { "acc": 0.88283958, "epoch": 1.318062437386619, "grad_norm": 7.897933483123779, "learning_rate": 8.01202681567073e-06, "loss": 0.58896632, "memory(GiB)": 34.88, "step": 48680, "train_speed(iter/s)": 0.414192 }, { "acc": 0.89871731, "epoch": 1.3181978176698346, "grad_norm": 7.822031497955322, "learning_rate": 8.011580141720686e-06, "loss": 0.53564262, "memory(GiB)": 34.88, "step": 48685, "train_speed(iter/s)": 0.414194 }, { "acc": 0.89780502, "epoch": 1.3183331979530502, "grad_norm": 17.674962997436523, "learning_rate": 8.011133430050945e-06, "loss": 0.59112577, "memory(GiB)": 34.88, "step": 48690, "train_speed(iter/s)": 0.414196 }, { "acc": 0.90541239, "epoch": 1.3184685782362657, "grad_norm": 7.201760292053223, "learning_rate": 8.010686680667107e-06, "loss": 0.49612055, "memory(GiB)": 34.88, "step": 48695, "train_speed(iter/s)": 0.414198 }, { "acc": 0.89911861, "epoch": 1.318603958519481, "grad_norm": 4.988587856292725, "learning_rate": 8.010239893574763e-06, "loss": 0.4816556, "memory(GiB)": 34.88, "step": 48700, "train_speed(iter/s)": 0.4142 }, { "acc": 0.90254116, "epoch": 1.318739338802697, "grad_norm": 12.980838775634766, "learning_rate": 8.009793068779514e-06, "loss": 0.59566498, "memory(GiB)": 34.88, "step": 48705, "train_speed(iter/s)": 0.414202 }, { "acc": 0.90780649, "epoch": 1.3188747190859123, "grad_norm": 5.305335521697998, "learning_rate": 8.009346206286957e-06, "loss": 0.54370861, "memory(GiB)": 34.88, "step": 48710, "train_speed(iter/s)": 0.414204 }, { "acc": 0.86958656, "epoch": 1.3190100993691278, "grad_norm": 12.079974174499512, "learning_rate": 8.008899306102689e-06, "loss": 0.69569259, "memory(GiB)": 34.88, "step": 48715, "train_speed(iter/s)": 0.414206 }, { "acc": 0.87544346, "epoch": 1.3191454796523434, "grad_norm": 10.261617660522461, "learning_rate": 8.008452368232309e-06, "loss": 0.68649187, "memory(GiB)": 34.88, "step": 48720, "train_speed(iter/s)": 0.414208 }, { "acc": 0.8966114, "epoch": 1.319280859935559, "grad_norm": 9.235487937927246, "learning_rate": 8.008005392681412e-06, "loss": 0.57732143, "memory(GiB)": 34.88, "step": 48725, "train_speed(iter/s)": 0.41421 }, { "acc": 0.9015398, "epoch": 1.3194162402187746, "grad_norm": 5.694642543792725, "learning_rate": 8.0075583794556e-06, "loss": 0.48046856, "memory(GiB)": 34.88, "step": 48730, "train_speed(iter/s)": 0.414211 }, { "acc": 0.88559694, "epoch": 1.3195516205019902, "grad_norm": 10.041600227355957, "learning_rate": 8.007111328560474e-06, "loss": 0.64811049, "memory(GiB)": 34.88, "step": 48735, "train_speed(iter/s)": 0.414213 }, { "acc": 0.89663105, "epoch": 1.3196870007852057, "grad_norm": 9.80285930633545, "learning_rate": 8.006664240001635e-06, "loss": 0.55826244, "memory(GiB)": 34.88, "step": 48740, "train_speed(iter/s)": 0.414215 }, { "acc": 0.89312344, "epoch": 1.319822381068421, "grad_norm": 6.775171279907227, "learning_rate": 8.00621711378468e-06, "loss": 0.58290706, "memory(GiB)": 34.88, "step": 48745, "train_speed(iter/s)": 0.414217 }, { "acc": 0.88475761, "epoch": 1.3199577613516367, "grad_norm": 12.714242935180664, "learning_rate": 8.005769949915211e-06, "loss": 0.54341912, "memory(GiB)": 34.88, "step": 48750, "train_speed(iter/s)": 0.414219 }, { "acc": 0.87656031, "epoch": 1.3200931416348523, "grad_norm": 9.339423179626465, "learning_rate": 8.005322748398833e-06, "loss": 0.65738707, "memory(GiB)": 34.88, "step": 48755, "train_speed(iter/s)": 0.414221 }, { "acc": 0.90801964, "epoch": 1.3202285219180678, "grad_norm": 8.60901927947998, "learning_rate": 8.004875509241145e-06, "loss": 0.43117371, "memory(GiB)": 34.88, "step": 48760, "train_speed(iter/s)": 0.414222 }, { "acc": 0.88147869, "epoch": 1.3203639022012834, "grad_norm": 10.828285217285156, "learning_rate": 8.004428232447749e-06, "loss": 0.75900326, "memory(GiB)": 34.88, "step": 48765, "train_speed(iter/s)": 0.414224 }, { "acc": 0.8990036, "epoch": 1.320499282484499, "grad_norm": 5.921069145202637, "learning_rate": 8.00398091802425e-06, "loss": 0.55273461, "memory(GiB)": 34.88, "step": 48770, "train_speed(iter/s)": 0.414226 }, { "acc": 0.89017467, "epoch": 1.3206346627677146, "grad_norm": 7.900566101074219, "learning_rate": 8.003533565976249e-06, "loss": 0.57961998, "memory(GiB)": 34.88, "step": 48775, "train_speed(iter/s)": 0.414228 }, { "acc": 0.90878773, "epoch": 1.32077004305093, "grad_norm": 17.63787269592285, "learning_rate": 8.003086176309354e-06, "loss": 0.46231384, "memory(GiB)": 34.88, "step": 48780, "train_speed(iter/s)": 0.41423 }, { "acc": 0.88493767, "epoch": 1.3209054233341457, "grad_norm": 5.288390636444092, "learning_rate": 8.002638749029166e-06, "loss": 0.65176907, "memory(GiB)": 34.88, "step": 48785, "train_speed(iter/s)": 0.414232 }, { "acc": 0.89965467, "epoch": 1.321040803617361, "grad_norm": 8.364426612854004, "learning_rate": 8.002191284141291e-06, "loss": 0.54926333, "memory(GiB)": 34.88, "step": 48790, "train_speed(iter/s)": 0.414234 }, { "acc": 0.9248847, "epoch": 1.3211761839005767, "grad_norm": 8.394631385803223, "learning_rate": 8.001743781651336e-06, "loss": 0.38499832, "memory(GiB)": 34.88, "step": 48795, "train_speed(iter/s)": 0.414236 }, { "acc": 0.90168905, "epoch": 1.3213115641837923, "grad_norm": 11.308355331420898, "learning_rate": 8.001296241564903e-06, "loss": 0.49499779, "memory(GiB)": 34.88, "step": 48800, "train_speed(iter/s)": 0.414237 }, { "acc": 0.89561214, "epoch": 1.3214469444670078, "grad_norm": 4.832115173339844, "learning_rate": 8.0008486638876e-06, "loss": 0.44699764, "memory(GiB)": 34.88, "step": 48805, "train_speed(iter/s)": 0.414239 }, { "acc": 0.88332052, "epoch": 1.3215823247502234, "grad_norm": 7.641927242279053, "learning_rate": 8.000401048625036e-06, "loss": 0.60655713, "memory(GiB)": 34.88, "step": 48810, "train_speed(iter/s)": 0.414242 }, { "acc": 0.89866705, "epoch": 1.321717705033439, "grad_norm": 10.015862464904785, "learning_rate": 7.999953395782815e-06, "loss": 0.5703001, "memory(GiB)": 34.88, "step": 48815, "train_speed(iter/s)": 0.414244 }, { "acc": 0.90323792, "epoch": 1.3218530853166546, "grad_norm": 21.36278533935547, "learning_rate": 7.999505705366548e-06, "loss": 0.49157763, "memory(GiB)": 34.88, "step": 48820, "train_speed(iter/s)": 0.414245 }, { "acc": 0.87949648, "epoch": 1.32198846559987, "grad_norm": 6.876397132873535, "learning_rate": 7.999057977381843e-06, "loss": 0.59582787, "memory(GiB)": 34.88, "step": 48825, "train_speed(iter/s)": 0.414247 }, { "acc": 0.9168973, "epoch": 1.3221238458830855, "grad_norm": 5.976983070373535, "learning_rate": 7.998610211834303e-06, "loss": 0.36145883, "memory(GiB)": 34.88, "step": 48830, "train_speed(iter/s)": 0.414249 }, { "acc": 0.90149755, "epoch": 1.322259226166301, "grad_norm": 8.165229797363281, "learning_rate": 7.998162408729543e-06, "loss": 0.51920929, "memory(GiB)": 34.88, "step": 48835, "train_speed(iter/s)": 0.414251 }, { "acc": 0.88426018, "epoch": 1.3223946064495167, "grad_norm": 8.90834903717041, "learning_rate": 7.997714568073171e-06, "loss": 0.57309542, "memory(GiB)": 34.88, "step": 48840, "train_speed(iter/s)": 0.414253 }, { "acc": 0.87915955, "epoch": 1.3225299867327323, "grad_norm": 6.065849781036377, "learning_rate": 7.997266689870798e-06, "loss": 0.63369884, "memory(GiB)": 34.88, "step": 48845, "train_speed(iter/s)": 0.414255 }, { "acc": 0.89768429, "epoch": 1.3226653670159478, "grad_norm": 11.604728698730469, "learning_rate": 7.996818774128034e-06, "loss": 0.62585287, "memory(GiB)": 34.88, "step": 48850, "train_speed(iter/s)": 0.414257 }, { "acc": 0.92093849, "epoch": 1.3228007472991634, "grad_norm": 4.045223712921143, "learning_rate": 7.996370820850488e-06, "loss": 0.39624491, "memory(GiB)": 34.88, "step": 48855, "train_speed(iter/s)": 0.414259 }, { "acc": 0.87135639, "epoch": 1.3229361275823788, "grad_norm": 10.638993263244629, "learning_rate": 7.995922830043771e-06, "loss": 0.69719386, "memory(GiB)": 34.88, "step": 48860, "train_speed(iter/s)": 0.414261 }, { "acc": 0.88392334, "epoch": 1.3230715078655946, "grad_norm": 7.8333611488342285, "learning_rate": 7.995474801713502e-06, "loss": 0.70852408, "memory(GiB)": 34.88, "step": 48865, "train_speed(iter/s)": 0.414262 }, { "acc": 0.90202255, "epoch": 1.32320688814881, "grad_norm": 13.47928524017334, "learning_rate": 7.995026735865287e-06, "loss": 0.55569401, "memory(GiB)": 34.88, "step": 48870, "train_speed(iter/s)": 0.414264 }, { "acc": 0.88422451, "epoch": 1.3233422684320255, "grad_norm": 8.17175006866455, "learning_rate": 7.99457863250474e-06, "loss": 0.69036717, "memory(GiB)": 34.88, "step": 48875, "train_speed(iter/s)": 0.414266 }, { "acc": 0.90021248, "epoch": 1.323477648715241, "grad_norm": 8.810521125793457, "learning_rate": 7.994130491637473e-06, "loss": 0.46001196, "memory(GiB)": 34.88, "step": 48880, "train_speed(iter/s)": 0.414269 }, { "acc": 0.87836046, "epoch": 1.3236130289984567, "grad_norm": 14.827188491821289, "learning_rate": 7.993682313269101e-06, "loss": 0.65263472, "memory(GiB)": 34.88, "step": 48885, "train_speed(iter/s)": 0.414271 }, { "acc": 0.91039295, "epoch": 1.3237484092816723, "grad_norm": 10.466072082519531, "learning_rate": 7.993234097405243e-06, "loss": 0.42205524, "memory(GiB)": 34.88, "step": 48890, "train_speed(iter/s)": 0.414273 }, { "acc": 0.90827227, "epoch": 1.3238837895648878, "grad_norm": 7.428018569946289, "learning_rate": 7.992785844051508e-06, "loss": 0.51094565, "memory(GiB)": 34.88, "step": 48895, "train_speed(iter/s)": 0.414275 }, { "acc": 0.90259972, "epoch": 1.3240191698481034, "grad_norm": 5.349980354309082, "learning_rate": 7.992337553213512e-06, "loss": 0.4078393, "memory(GiB)": 34.88, "step": 48900, "train_speed(iter/s)": 0.414277 }, { "acc": 0.89989519, "epoch": 1.3241545501313188, "grad_norm": 21.12730598449707, "learning_rate": 7.991889224896872e-06, "loss": 0.47201681, "memory(GiB)": 34.88, "step": 48905, "train_speed(iter/s)": 0.414279 }, { "acc": 0.9086092, "epoch": 1.3242899304145344, "grad_norm": 6.6676435470581055, "learning_rate": 7.991440859107204e-06, "loss": 0.52656937, "memory(GiB)": 34.88, "step": 48910, "train_speed(iter/s)": 0.414281 }, { "acc": 0.9014864, "epoch": 1.32442531069775, "grad_norm": 13.050919532775879, "learning_rate": 7.990992455850125e-06, "loss": 0.50928078, "memory(GiB)": 34.88, "step": 48915, "train_speed(iter/s)": 0.414283 }, { "acc": 0.87487774, "epoch": 1.3245606909809655, "grad_norm": 6.531683921813965, "learning_rate": 7.990544015131252e-06, "loss": 0.61854935, "memory(GiB)": 34.88, "step": 48920, "train_speed(iter/s)": 0.414285 }, { "acc": 0.90879097, "epoch": 1.324696071264181, "grad_norm": 11.455857276916504, "learning_rate": 7.990095536956201e-06, "loss": 0.43866582, "memory(GiB)": 34.88, "step": 48925, "train_speed(iter/s)": 0.414287 }, { "acc": 0.89565897, "epoch": 1.3248314515473967, "grad_norm": 25.530567169189453, "learning_rate": 7.989647021330593e-06, "loss": 0.57008748, "memory(GiB)": 34.88, "step": 48930, "train_speed(iter/s)": 0.414289 }, { "acc": 0.90947514, "epoch": 1.3249668318306123, "grad_norm": 5.497183799743652, "learning_rate": 7.989198468260043e-06, "loss": 0.43673444, "memory(GiB)": 34.88, "step": 48935, "train_speed(iter/s)": 0.414291 }, { "acc": 0.91438084, "epoch": 1.3251022121138276, "grad_norm": 7.334897041320801, "learning_rate": 7.988749877750175e-06, "loss": 0.50519524, "memory(GiB)": 34.88, "step": 48940, "train_speed(iter/s)": 0.414293 }, { "acc": 0.898843, "epoch": 1.3252375923970434, "grad_norm": 10.718835830688477, "learning_rate": 7.988301249806602e-06, "loss": 0.53194838, "memory(GiB)": 34.88, "step": 48945, "train_speed(iter/s)": 0.414295 }, { "acc": 0.90808315, "epoch": 1.3253729726802588, "grad_norm": 10.060097694396973, "learning_rate": 7.987852584434948e-06, "loss": 0.44055834, "memory(GiB)": 34.88, "step": 48950, "train_speed(iter/s)": 0.414297 }, { "acc": 0.88541012, "epoch": 1.3255083529634744, "grad_norm": 8.254288673400879, "learning_rate": 7.987403881640832e-06, "loss": 0.56516666, "memory(GiB)": 34.88, "step": 48955, "train_speed(iter/s)": 0.414299 }, { "acc": 0.87631111, "epoch": 1.32564373324669, "grad_norm": 7.729429244995117, "learning_rate": 7.986955141429877e-06, "loss": 0.70169926, "memory(GiB)": 34.88, "step": 48960, "train_speed(iter/s)": 0.4143 }, { "acc": 0.89991856, "epoch": 1.3257791135299055, "grad_norm": 94.04386901855469, "learning_rate": 7.986506363807705e-06, "loss": 0.49030795, "memory(GiB)": 34.88, "step": 48965, "train_speed(iter/s)": 0.414302 }, { "acc": 0.89699526, "epoch": 1.325914493813121, "grad_norm": 6.5564961433410645, "learning_rate": 7.986057548779931e-06, "loss": 0.53210588, "memory(GiB)": 34.88, "step": 48970, "train_speed(iter/s)": 0.414304 }, { "acc": 0.90522003, "epoch": 1.3260498740963367, "grad_norm": 9.375592231750488, "learning_rate": 7.985608696352187e-06, "loss": 0.49065785, "memory(GiB)": 34.88, "step": 48975, "train_speed(iter/s)": 0.414306 }, { "acc": 0.88801956, "epoch": 1.3261852543795523, "grad_norm": 10.717913627624512, "learning_rate": 7.985159806530089e-06, "loss": 0.62004466, "memory(GiB)": 34.88, "step": 48980, "train_speed(iter/s)": 0.414308 }, { "acc": 0.89081812, "epoch": 1.3263206346627676, "grad_norm": 4.245068550109863, "learning_rate": 7.98471087931926e-06, "loss": 0.53106127, "memory(GiB)": 34.88, "step": 48985, "train_speed(iter/s)": 0.41431 }, { "acc": 0.90782595, "epoch": 1.3264560149459832, "grad_norm": 3.614576578140259, "learning_rate": 7.984261914725328e-06, "loss": 0.4026792, "memory(GiB)": 34.88, "step": 48990, "train_speed(iter/s)": 0.414312 }, { "acc": 0.86754522, "epoch": 1.3265913952291988, "grad_norm": 10.645975112915039, "learning_rate": 7.983812912753914e-06, "loss": 0.79976606, "memory(GiB)": 34.88, "step": 48995, "train_speed(iter/s)": 0.414314 }, { "acc": 0.88265915, "epoch": 1.3267267755124144, "grad_norm": 15.26258373260498, "learning_rate": 7.983363873410645e-06, "loss": 0.63421144, "memory(GiB)": 34.88, "step": 49000, "train_speed(iter/s)": 0.414316 }, { "acc": 0.87998676, "epoch": 1.32686215579563, "grad_norm": 20.830745697021484, "learning_rate": 7.982914796701143e-06, "loss": 0.70187426, "memory(GiB)": 34.88, "step": 49005, "train_speed(iter/s)": 0.414318 }, { "acc": 0.88693256, "epoch": 1.3269975360788455, "grad_norm": 7.155261039733887, "learning_rate": 7.982465682631036e-06, "loss": 0.56390934, "memory(GiB)": 34.88, "step": 49010, "train_speed(iter/s)": 0.41432 }, { "acc": 0.88537369, "epoch": 1.327132916362061, "grad_norm": 7.449040412902832, "learning_rate": 7.982016531205948e-06, "loss": 0.62039623, "memory(GiB)": 34.88, "step": 49015, "train_speed(iter/s)": 0.414322 }, { "acc": 0.8957695, "epoch": 1.3272682966452765, "grad_norm": 7.886623382568359, "learning_rate": 7.981567342431509e-06, "loss": 0.52066212, "memory(GiB)": 34.88, "step": 49020, "train_speed(iter/s)": 0.414324 }, { "acc": 0.91590271, "epoch": 1.3274036769284923, "grad_norm": 6.9272589683532715, "learning_rate": 7.981118116313343e-06, "loss": 0.46218567, "memory(GiB)": 34.88, "step": 49025, "train_speed(iter/s)": 0.414326 }, { "acc": 0.90815153, "epoch": 1.3275390572117076, "grad_norm": 7.491617202758789, "learning_rate": 7.980668852857077e-06, "loss": 0.54048095, "memory(GiB)": 34.88, "step": 49030, "train_speed(iter/s)": 0.414328 }, { "acc": 0.90009537, "epoch": 1.3276744374949232, "grad_norm": 16.93580436706543, "learning_rate": 7.980219552068342e-06, "loss": 0.55921364, "memory(GiB)": 34.88, "step": 49035, "train_speed(iter/s)": 0.41433 }, { "acc": 0.8757761, "epoch": 1.3278098177781388, "grad_norm": 13.79605484008789, "learning_rate": 7.979770213952763e-06, "loss": 0.74290018, "memory(GiB)": 34.88, "step": 49040, "train_speed(iter/s)": 0.414332 }, { "acc": 0.90195522, "epoch": 1.3279451980613544, "grad_norm": 4.975107192993164, "learning_rate": 7.97932083851597e-06, "loss": 0.46535931, "memory(GiB)": 34.88, "step": 49045, "train_speed(iter/s)": 0.414334 }, { "acc": 0.9025589, "epoch": 1.32808057834457, "grad_norm": 16.733428955078125, "learning_rate": 7.978871425763594e-06, "loss": 0.52043381, "memory(GiB)": 34.88, "step": 49050, "train_speed(iter/s)": 0.414336 }, { "acc": 0.8994833, "epoch": 1.3282159586277855, "grad_norm": 8.607913970947266, "learning_rate": 7.978421975701262e-06, "loss": 0.55128288, "memory(GiB)": 34.88, "step": 49055, "train_speed(iter/s)": 0.414338 }, { "acc": 0.91786633, "epoch": 1.328351338911001, "grad_norm": 5.945791721343994, "learning_rate": 7.977972488334608e-06, "loss": 0.42721844, "memory(GiB)": 34.88, "step": 49060, "train_speed(iter/s)": 0.41434 }, { "acc": 0.91820812, "epoch": 1.3284867191942165, "grad_norm": 5.079983234405518, "learning_rate": 7.977522963669257e-06, "loss": 0.47617717, "memory(GiB)": 34.88, "step": 49065, "train_speed(iter/s)": 0.414342 }, { "acc": 0.89699678, "epoch": 1.328622099477432, "grad_norm": 15.049212455749512, "learning_rate": 7.977073401710847e-06, "loss": 0.61923709, "memory(GiB)": 34.88, "step": 49070, "train_speed(iter/s)": 0.414344 }, { "acc": 0.87790232, "epoch": 1.3287574797606476, "grad_norm": 13.393935203552246, "learning_rate": 7.976623802465005e-06, "loss": 0.59556918, "memory(GiB)": 34.88, "step": 49075, "train_speed(iter/s)": 0.414346 }, { "acc": 0.88087759, "epoch": 1.3288928600438632, "grad_norm": 11.72084903717041, "learning_rate": 7.976174165937363e-06, "loss": 0.58321581, "memory(GiB)": 34.88, "step": 49080, "train_speed(iter/s)": 0.414348 }, { "acc": 0.92870779, "epoch": 1.3290282403270788, "grad_norm": 3.2640910148620605, "learning_rate": 7.975724492133555e-06, "loss": 0.33837676, "memory(GiB)": 34.88, "step": 49085, "train_speed(iter/s)": 0.41435 }, { "acc": 0.88270531, "epoch": 1.3291636206102944, "grad_norm": 28.6503849029541, "learning_rate": 7.975274781059215e-06, "loss": 0.62524915, "memory(GiB)": 34.88, "step": 49090, "train_speed(iter/s)": 0.414352 }, { "acc": 0.88810978, "epoch": 1.32929900089351, "grad_norm": 7.758147716522217, "learning_rate": 7.974825032719975e-06, "loss": 0.63640738, "memory(GiB)": 34.88, "step": 49095, "train_speed(iter/s)": 0.414354 }, { "acc": 0.88739529, "epoch": 1.3294343811767253, "grad_norm": 8.523346900939941, "learning_rate": 7.97437524712147e-06, "loss": 0.60181818, "memory(GiB)": 34.88, "step": 49100, "train_speed(iter/s)": 0.414356 }, { "acc": 0.87693481, "epoch": 1.329569761459941, "grad_norm": 13.21802806854248, "learning_rate": 7.973925424269334e-06, "loss": 0.64827824, "memory(GiB)": 34.88, "step": 49105, "train_speed(iter/s)": 0.414358 }, { "acc": 0.89364834, "epoch": 1.3297051417431565, "grad_norm": 8.710793495178223, "learning_rate": 7.973475564169199e-06, "loss": 0.55263653, "memory(GiB)": 34.88, "step": 49110, "train_speed(iter/s)": 0.41436 }, { "acc": 0.89693642, "epoch": 1.329840522026372, "grad_norm": 90.61897277832031, "learning_rate": 7.973025666826706e-06, "loss": 0.48647394, "memory(GiB)": 34.88, "step": 49115, "train_speed(iter/s)": 0.414362 }, { "acc": 0.91285868, "epoch": 1.3299759023095876, "grad_norm": 7.204343795776367, "learning_rate": 7.972575732247488e-06, "loss": 0.41964164, "memory(GiB)": 34.88, "step": 49120, "train_speed(iter/s)": 0.414364 }, { "acc": 0.89671669, "epoch": 1.3301112825928032, "grad_norm": 11.32481575012207, "learning_rate": 7.97212576043718e-06, "loss": 0.52377763, "memory(GiB)": 34.88, "step": 49125, "train_speed(iter/s)": 0.414366 }, { "acc": 0.90209198, "epoch": 1.3302466628760188, "grad_norm": 7.323117733001709, "learning_rate": 7.971675751401422e-06, "loss": 0.47022996, "memory(GiB)": 34.88, "step": 49130, "train_speed(iter/s)": 0.414368 }, { "acc": 0.88608789, "epoch": 1.3303820431592344, "grad_norm": 7.039689540863037, "learning_rate": 7.971225705145848e-06, "loss": 0.59727321, "memory(GiB)": 34.88, "step": 49135, "train_speed(iter/s)": 0.41437 }, { "acc": 0.89948349, "epoch": 1.33051742344245, "grad_norm": 11.584285736083984, "learning_rate": 7.970775621676096e-06, "loss": 0.56283832, "memory(GiB)": 34.88, "step": 49140, "train_speed(iter/s)": 0.414371 }, { "acc": 0.86372051, "epoch": 1.3306528037256653, "grad_norm": 9.693875312805176, "learning_rate": 7.970325500997807e-06, "loss": 0.84416275, "memory(GiB)": 34.88, "step": 49145, "train_speed(iter/s)": 0.414373 }, { "acc": 0.90602713, "epoch": 1.3307881840088809, "grad_norm": 6.818587779998779, "learning_rate": 7.96987534311662e-06, "loss": 0.43846679, "memory(GiB)": 34.88, "step": 49150, "train_speed(iter/s)": 0.414375 }, { "acc": 0.89503765, "epoch": 1.3309235642920965, "grad_norm": 17.20860481262207, "learning_rate": 7.969425148038168e-06, "loss": 0.6096879, "memory(GiB)": 34.88, "step": 49155, "train_speed(iter/s)": 0.414377 }, { "acc": 0.89292078, "epoch": 1.331058944575312, "grad_norm": 6.557055950164795, "learning_rate": 7.968974915768095e-06, "loss": 0.49681492, "memory(GiB)": 34.88, "step": 49160, "train_speed(iter/s)": 0.414379 }, { "acc": 0.88961372, "epoch": 1.3311943248585276, "grad_norm": 10.93297290802002, "learning_rate": 7.968524646312042e-06, "loss": 0.58127384, "memory(GiB)": 34.88, "step": 49165, "train_speed(iter/s)": 0.414381 }, { "acc": 0.87096157, "epoch": 1.3313297051417432, "grad_norm": 5.972321033477783, "learning_rate": 7.968074339675648e-06, "loss": 0.67810488, "memory(GiB)": 34.88, "step": 49170, "train_speed(iter/s)": 0.414383 }, { "acc": 0.87968292, "epoch": 1.3314650854249588, "grad_norm": 12.947497367858887, "learning_rate": 7.967623995864553e-06, "loss": 0.64083729, "memory(GiB)": 34.88, "step": 49175, "train_speed(iter/s)": 0.414385 }, { "acc": 0.89922695, "epoch": 1.3316004657081741, "grad_norm": 8.357641220092773, "learning_rate": 7.9671736148844e-06, "loss": 0.55466423, "memory(GiB)": 34.88, "step": 49180, "train_speed(iter/s)": 0.414387 }, { "acc": 0.88449612, "epoch": 1.33173584599139, "grad_norm": 8.038678169250488, "learning_rate": 7.966723196740831e-06, "loss": 0.60885239, "memory(GiB)": 34.88, "step": 49185, "train_speed(iter/s)": 0.414389 }, { "acc": 0.90001202, "epoch": 1.3318712262746053, "grad_norm": 7.0734148025512695, "learning_rate": 7.96627274143949e-06, "loss": 0.47481403, "memory(GiB)": 34.88, "step": 49190, "train_speed(iter/s)": 0.414391 }, { "acc": 0.88543081, "epoch": 1.3320066065578209, "grad_norm": 10.090353012084961, "learning_rate": 7.965822248986015e-06, "loss": 0.50624924, "memory(GiB)": 34.88, "step": 49195, "train_speed(iter/s)": 0.414393 }, { "acc": 0.90498657, "epoch": 1.3321419868410365, "grad_norm": 18.882766723632812, "learning_rate": 7.965371719386052e-06, "loss": 0.56575227, "memory(GiB)": 34.88, "step": 49200, "train_speed(iter/s)": 0.414395 }, { "acc": 0.89198494, "epoch": 1.332277367124252, "grad_norm": 7.754002571105957, "learning_rate": 7.964921152645245e-06, "loss": 0.56690874, "memory(GiB)": 34.88, "step": 49205, "train_speed(iter/s)": 0.414397 }, { "acc": 0.86740875, "epoch": 1.3324127474074676, "grad_norm": 27.241744995117188, "learning_rate": 7.964470548769239e-06, "loss": 0.72730622, "memory(GiB)": 34.88, "step": 49210, "train_speed(iter/s)": 0.414399 }, { "acc": 0.89870844, "epoch": 1.3325481276906832, "grad_norm": 6.65516996383667, "learning_rate": 7.964019907763677e-06, "loss": 0.5925025, "memory(GiB)": 34.88, "step": 49215, "train_speed(iter/s)": 0.414401 }, { "acc": 0.91499405, "epoch": 1.3326835079738988, "grad_norm": 6.714751243591309, "learning_rate": 7.963569229634204e-06, "loss": 0.48098125, "memory(GiB)": 34.88, "step": 49220, "train_speed(iter/s)": 0.414403 }, { "acc": 0.89914722, "epoch": 1.3328188882571141, "grad_norm": 8.227828979492188, "learning_rate": 7.963118514386466e-06, "loss": 0.48654952, "memory(GiB)": 34.88, "step": 49225, "train_speed(iter/s)": 0.414405 }, { "acc": 0.89770393, "epoch": 1.3329542685403297, "grad_norm": 7.00921106338501, "learning_rate": 7.962667762026112e-06, "loss": 0.56927152, "memory(GiB)": 34.88, "step": 49230, "train_speed(iter/s)": 0.414406 }, { "acc": 0.87497101, "epoch": 1.3330896488235453, "grad_norm": 7.770803451538086, "learning_rate": 7.962216972558785e-06, "loss": 0.6707274, "memory(GiB)": 34.88, "step": 49235, "train_speed(iter/s)": 0.414408 }, { "acc": 0.89204769, "epoch": 1.3332250291067609, "grad_norm": 7.0430803298950195, "learning_rate": 7.961766145990134e-06, "loss": 0.63129458, "memory(GiB)": 34.88, "step": 49240, "train_speed(iter/s)": 0.41441 }, { "acc": 0.90472813, "epoch": 1.3333604093899765, "grad_norm": 19.118690490722656, "learning_rate": 7.961315282325804e-06, "loss": 0.53693457, "memory(GiB)": 34.88, "step": 49245, "train_speed(iter/s)": 0.414412 }, { "acc": 0.89746609, "epoch": 1.333495789673192, "grad_norm": 7.282479763031006, "learning_rate": 7.960864381571446e-06, "loss": 0.49871531, "memory(GiB)": 34.88, "step": 49250, "train_speed(iter/s)": 0.414413 }, { "acc": 0.90400724, "epoch": 1.3336311699564076, "grad_norm": 9.828513145446777, "learning_rate": 7.960413443732707e-06, "loss": 0.56134582, "memory(GiB)": 34.88, "step": 49255, "train_speed(iter/s)": 0.414415 }, { "acc": 0.89858618, "epoch": 1.333766550239623, "grad_norm": 13.637886047363281, "learning_rate": 7.959962468815234e-06, "loss": 0.48663516, "memory(GiB)": 34.88, "step": 49260, "train_speed(iter/s)": 0.414417 }, { "acc": 0.90481291, "epoch": 1.3339019305228388, "grad_norm": 4.308648109436035, "learning_rate": 7.95951145682468e-06, "loss": 0.49794626, "memory(GiB)": 34.88, "step": 49265, "train_speed(iter/s)": 0.414419 }, { "acc": 0.88308001, "epoch": 1.3340373108060541, "grad_norm": 5.953770160675049, "learning_rate": 7.95906040776669e-06, "loss": 0.63471794, "memory(GiB)": 34.88, "step": 49270, "train_speed(iter/s)": 0.414421 }, { "acc": 0.89562893, "epoch": 1.3341726910892697, "grad_norm": 9.638184547424316, "learning_rate": 7.958609321646921e-06, "loss": 0.52841597, "memory(GiB)": 34.88, "step": 49275, "train_speed(iter/s)": 0.414423 }, { "acc": 0.90189228, "epoch": 1.3343080713724853, "grad_norm": 4.393795013427734, "learning_rate": 7.95815819847102e-06, "loss": 0.57029762, "memory(GiB)": 34.88, "step": 49280, "train_speed(iter/s)": 0.414424 }, { "acc": 0.91150036, "epoch": 1.3344434516557009, "grad_norm": 7.800928115844727, "learning_rate": 7.957707038244638e-06, "loss": 0.45545921, "memory(GiB)": 34.88, "step": 49285, "train_speed(iter/s)": 0.414426 }, { "acc": 0.86869717, "epoch": 1.3345788319389165, "grad_norm": 9.818851470947266, "learning_rate": 7.957255840973425e-06, "loss": 0.73061657, "memory(GiB)": 34.88, "step": 49290, "train_speed(iter/s)": 0.414428 }, { "acc": 0.89722452, "epoch": 1.334714212222132, "grad_norm": 11.737303733825684, "learning_rate": 7.956804606663036e-06, "loss": 0.52628479, "memory(GiB)": 34.88, "step": 49295, "train_speed(iter/s)": 0.41443 }, { "acc": 0.92259264, "epoch": 1.3348495925053476, "grad_norm": 7.269991397857666, "learning_rate": 7.956353335319123e-06, "loss": 0.41847787, "memory(GiB)": 34.88, "step": 49300, "train_speed(iter/s)": 0.414432 }, { "acc": 0.89932747, "epoch": 1.334984972788563, "grad_norm": 12.5238618850708, "learning_rate": 7.955902026947338e-06, "loss": 0.5796288, "memory(GiB)": 34.88, "step": 49305, "train_speed(iter/s)": 0.414434 }, { "acc": 0.87824688, "epoch": 1.3351203530717786, "grad_norm": 9.299164772033691, "learning_rate": 7.955450681553337e-06, "loss": 0.54464598, "memory(GiB)": 34.88, "step": 49310, "train_speed(iter/s)": 0.414436 }, { "acc": 0.87941227, "epoch": 1.3352557333549941, "grad_norm": 11.491448402404785, "learning_rate": 7.954999299142772e-06, "loss": 0.55212374, "memory(GiB)": 34.88, "step": 49315, "train_speed(iter/s)": 0.414438 }, { "acc": 0.91043434, "epoch": 1.3353911136382097, "grad_norm": 15.348984718322754, "learning_rate": 7.954547879721296e-06, "loss": 0.4904911, "memory(GiB)": 34.88, "step": 49320, "train_speed(iter/s)": 0.41444 }, { "acc": 0.89120932, "epoch": 1.3355264939214253, "grad_norm": 4.959876537322998, "learning_rate": 7.954096423294568e-06, "loss": 0.56532297, "memory(GiB)": 34.88, "step": 49325, "train_speed(iter/s)": 0.414442 }, { "acc": 0.89077091, "epoch": 1.3356618742046409, "grad_norm": 9.336734771728516, "learning_rate": 7.953644929868238e-06, "loss": 0.68604574, "memory(GiB)": 34.88, "step": 49330, "train_speed(iter/s)": 0.414444 }, { "acc": 0.88657532, "epoch": 1.3357972544878565, "grad_norm": 6.823627471923828, "learning_rate": 7.953193399447968e-06, "loss": 0.58391771, "memory(GiB)": 34.88, "step": 49335, "train_speed(iter/s)": 0.414446 }, { "acc": 0.90529146, "epoch": 1.3359326347710718, "grad_norm": 7.980939865112305, "learning_rate": 7.952741832039408e-06, "loss": 0.48987713, "memory(GiB)": 34.88, "step": 49340, "train_speed(iter/s)": 0.414448 }, { "acc": 0.90023804, "epoch": 1.3360680150542876, "grad_norm": 7.027641773223877, "learning_rate": 7.95229022764822e-06, "loss": 0.59981594, "memory(GiB)": 34.88, "step": 49345, "train_speed(iter/s)": 0.414449 }, { "acc": 0.90055799, "epoch": 1.336203395337503, "grad_norm": 7.309653282165527, "learning_rate": 7.951838586280058e-06, "loss": 0.55845814, "memory(GiB)": 34.88, "step": 49350, "train_speed(iter/s)": 0.414451 }, { "acc": 0.8956007, "epoch": 1.3363387756207186, "grad_norm": 5.883120059967041, "learning_rate": 7.95138690794058e-06, "loss": 0.52154307, "memory(GiB)": 34.88, "step": 49355, "train_speed(iter/s)": 0.414453 }, { "acc": 0.89149876, "epoch": 1.3364741559039341, "grad_norm": 8.759262084960938, "learning_rate": 7.950935192635448e-06, "loss": 0.5816206, "memory(GiB)": 34.88, "step": 49360, "train_speed(iter/s)": 0.414455 }, { "acc": 0.90842886, "epoch": 1.3366095361871497, "grad_norm": 6.869811058044434, "learning_rate": 7.950483440370315e-06, "loss": 0.46865754, "memory(GiB)": 34.88, "step": 49365, "train_speed(iter/s)": 0.414457 }, { "acc": 0.89703274, "epoch": 1.3367449164703653, "grad_norm": 3.384418249130249, "learning_rate": 7.950031651150845e-06, "loss": 0.53494573, "memory(GiB)": 34.88, "step": 49370, "train_speed(iter/s)": 0.414458 }, { "acc": 0.91179695, "epoch": 1.3368802967535809, "grad_norm": 6.365828990936279, "learning_rate": 7.949579824982693e-06, "loss": 0.47256088, "memory(GiB)": 34.88, "step": 49375, "train_speed(iter/s)": 0.41446 }, { "acc": 0.90978909, "epoch": 1.3370156770367965, "grad_norm": 12.172764778137207, "learning_rate": 7.949127961871521e-06, "loss": 0.48588839, "memory(GiB)": 34.88, "step": 49380, "train_speed(iter/s)": 0.414462 }, { "acc": 0.86686096, "epoch": 1.3371510573200118, "grad_norm": 6.87034273147583, "learning_rate": 7.948676061822992e-06, "loss": 0.68174124, "memory(GiB)": 34.88, "step": 49385, "train_speed(iter/s)": 0.414464 }, { "acc": 0.88517513, "epoch": 1.3372864376032274, "grad_norm": 9.22991943359375, "learning_rate": 7.948224124842763e-06, "loss": 0.6343483, "memory(GiB)": 34.88, "step": 49390, "train_speed(iter/s)": 0.414466 }, { "acc": 0.92033863, "epoch": 1.337421817886443, "grad_norm": 3.787694215774536, "learning_rate": 7.947772150936496e-06, "loss": 0.42405128, "memory(GiB)": 34.88, "step": 49395, "train_speed(iter/s)": 0.414468 }, { "acc": 0.91659222, "epoch": 1.3375571981696586, "grad_norm": 7.476479530334473, "learning_rate": 7.947320140109858e-06, "loss": 0.39008336, "memory(GiB)": 34.88, "step": 49400, "train_speed(iter/s)": 0.41447 }, { "acc": 0.88832092, "epoch": 1.3376925784528741, "grad_norm": 14.824267387390137, "learning_rate": 7.946868092368505e-06, "loss": 0.6059989, "memory(GiB)": 34.88, "step": 49405, "train_speed(iter/s)": 0.414471 }, { "acc": 0.90957098, "epoch": 1.3378279587360897, "grad_norm": 5.615873336791992, "learning_rate": 7.946416007718101e-06, "loss": 0.51590586, "memory(GiB)": 34.88, "step": 49410, "train_speed(iter/s)": 0.414474 }, { "acc": 0.87758808, "epoch": 1.3379633390193053, "grad_norm": 33.65502166748047, "learning_rate": 7.945963886164311e-06, "loss": 0.68876028, "memory(GiB)": 34.88, "step": 49415, "train_speed(iter/s)": 0.414476 }, { "acc": 0.88875141, "epoch": 1.3380987193025207, "grad_norm": 9.871241569519043, "learning_rate": 7.945511727712799e-06, "loss": 0.56514678, "memory(GiB)": 34.88, "step": 49420, "train_speed(iter/s)": 0.414477 }, { "acc": 0.89472275, "epoch": 1.3382340995857365, "grad_norm": 5.169256687164307, "learning_rate": 7.945059532369227e-06, "loss": 0.4848834, "memory(GiB)": 34.88, "step": 49425, "train_speed(iter/s)": 0.41448 }, { "acc": 0.89416084, "epoch": 1.3383694798689518, "grad_norm": 9.161900520324707, "learning_rate": 7.944607300139261e-06, "loss": 0.53133149, "memory(GiB)": 34.88, "step": 49430, "train_speed(iter/s)": 0.414482 }, { "acc": 0.90701027, "epoch": 1.3385048601521674, "grad_norm": 7.561184406280518, "learning_rate": 7.944155031028565e-06, "loss": 0.52920847, "memory(GiB)": 34.88, "step": 49435, "train_speed(iter/s)": 0.414484 }, { "acc": 0.91196556, "epoch": 1.338640240435383, "grad_norm": 5.41917085647583, "learning_rate": 7.943702725042809e-06, "loss": 0.53849945, "memory(GiB)": 34.88, "step": 49440, "train_speed(iter/s)": 0.414486 }, { "acc": 0.90442371, "epoch": 1.3387756207185986, "grad_norm": 4.288778781890869, "learning_rate": 7.943250382187653e-06, "loss": 0.57492781, "memory(GiB)": 34.88, "step": 49445, "train_speed(iter/s)": 0.414487 }, { "acc": 0.88729992, "epoch": 1.3389110010018141, "grad_norm": 8.692299842834473, "learning_rate": 7.942798002468766e-06, "loss": 0.68665814, "memory(GiB)": 34.88, "step": 49450, "train_speed(iter/s)": 0.414489 }, { "acc": 0.90668793, "epoch": 1.3390463812850297, "grad_norm": 3.1713814735412598, "learning_rate": 7.942345585891814e-06, "loss": 0.45771828, "memory(GiB)": 34.88, "step": 49455, "train_speed(iter/s)": 0.414491 }, { "acc": 0.87288151, "epoch": 1.3391817615682453, "grad_norm": 6.688612937927246, "learning_rate": 7.941893132462465e-06, "loss": 0.71082129, "memory(GiB)": 34.88, "step": 49460, "train_speed(iter/s)": 0.414493 }, { "acc": 0.88981819, "epoch": 1.3393171418514607, "grad_norm": 6.809170722961426, "learning_rate": 7.941440642186388e-06, "loss": 0.59814949, "memory(GiB)": 34.88, "step": 49465, "train_speed(iter/s)": 0.414495 }, { "acc": 0.91993999, "epoch": 1.3394525221346762, "grad_norm": 5.164942264556885, "learning_rate": 7.940988115069253e-06, "loss": 0.36475286, "memory(GiB)": 34.88, "step": 49470, "train_speed(iter/s)": 0.414497 }, { "acc": 0.89170952, "epoch": 1.3395879024178918, "grad_norm": 11.081912994384766, "learning_rate": 7.940535551116722e-06, "loss": 0.59268031, "memory(GiB)": 34.88, "step": 49475, "train_speed(iter/s)": 0.414499 }, { "acc": 0.87950096, "epoch": 1.3397232827011074, "grad_norm": 8.749364852905273, "learning_rate": 7.940082950334473e-06, "loss": 0.68970623, "memory(GiB)": 34.88, "step": 49480, "train_speed(iter/s)": 0.414501 }, { "acc": 0.85902367, "epoch": 1.339858662984323, "grad_norm": 10.50621509552002, "learning_rate": 7.939630312728169e-06, "loss": 0.75841002, "memory(GiB)": 34.88, "step": 49485, "train_speed(iter/s)": 0.414503 }, { "acc": 0.88486862, "epoch": 1.3399940432675386, "grad_norm": 6.813855171203613, "learning_rate": 7.93917763830348e-06, "loss": 0.62992144, "memory(GiB)": 34.88, "step": 49490, "train_speed(iter/s)": 0.414505 }, { "acc": 0.91537209, "epoch": 1.3401294235507542, "grad_norm": 4.14884090423584, "learning_rate": 7.938724927066083e-06, "loss": 0.38914309, "memory(GiB)": 34.88, "step": 49495, "train_speed(iter/s)": 0.414507 }, { "acc": 0.8905592, "epoch": 1.3402648038339695, "grad_norm": 13.828161239624023, "learning_rate": 7.938272179021645e-06, "loss": 0.57649765, "memory(GiB)": 34.88, "step": 49500, "train_speed(iter/s)": 0.414509 }, { "acc": 0.89616756, "epoch": 1.3404001841171853, "grad_norm": 11.501481056213379, "learning_rate": 7.937819394175836e-06, "loss": 0.53430977, "memory(GiB)": 34.88, "step": 49505, "train_speed(iter/s)": 0.414511 }, { "acc": 0.8642271, "epoch": 1.3405355644004007, "grad_norm": 12.715560913085938, "learning_rate": 7.93736657253433e-06, "loss": 0.75270443, "memory(GiB)": 34.88, "step": 49510, "train_speed(iter/s)": 0.414512 }, { "acc": 0.90005274, "epoch": 1.3406709446836163, "grad_norm": 5.428807735443115, "learning_rate": 7.936913714102799e-06, "loss": 0.44034534, "memory(GiB)": 34.88, "step": 49515, "train_speed(iter/s)": 0.414514 }, { "acc": 0.90952044, "epoch": 1.3408063249668318, "grad_norm": 11.34469985961914, "learning_rate": 7.936460818886918e-06, "loss": 0.47506256, "memory(GiB)": 34.88, "step": 49520, "train_speed(iter/s)": 0.414517 }, { "acc": 0.91563263, "epoch": 1.3409417052500474, "grad_norm": 14.402010917663574, "learning_rate": 7.936007886892355e-06, "loss": 0.47659655, "memory(GiB)": 34.88, "step": 49525, "train_speed(iter/s)": 0.414518 }, { "acc": 0.89408636, "epoch": 1.341077085533263, "grad_norm": 11.020061492919922, "learning_rate": 7.935554918124792e-06, "loss": 0.5273901, "memory(GiB)": 34.88, "step": 49530, "train_speed(iter/s)": 0.41452 }, { "acc": 0.90139303, "epoch": 1.3412124658164783, "grad_norm": 41.421844482421875, "learning_rate": 7.935101912589896e-06, "loss": 0.51098766, "memory(GiB)": 34.88, "step": 49535, "train_speed(iter/s)": 0.414522 }, { "acc": 0.90260162, "epoch": 1.3413478460996942, "grad_norm": 9.737525939941406, "learning_rate": 7.934648870293344e-06, "loss": 0.48696833, "memory(GiB)": 34.88, "step": 49540, "train_speed(iter/s)": 0.414524 }, { "acc": 0.88638821, "epoch": 1.3414832263829095, "grad_norm": 8.58952808380127, "learning_rate": 7.934195791240814e-06, "loss": 0.63789063, "memory(GiB)": 34.88, "step": 49545, "train_speed(iter/s)": 0.414526 }, { "acc": 0.89150438, "epoch": 1.341618606666125, "grad_norm": 8.553069114685059, "learning_rate": 7.933742675437979e-06, "loss": 0.60332251, "memory(GiB)": 34.88, "step": 49550, "train_speed(iter/s)": 0.414527 }, { "acc": 0.90412788, "epoch": 1.3417539869493407, "grad_norm": 5.529123783111572, "learning_rate": 7.933289522890514e-06, "loss": 0.52795401, "memory(GiB)": 34.88, "step": 49555, "train_speed(iter/s)": 0.414529 }, { "acc": 0.89323635, "epoch": 1.3418893672325563, "grad_norm": 22.220430374145508, "learning_rate": 7.932836333604099e-06, "loss": 0.55994654, "memory(GiB)": 34.88, "step": 49560, "train_speed(iter/s)": 0.414531 }, { "acc": 0.90377922, "epoch": 1.3420247475157718, "grad_norm": 12.69288158416748, "learning_rate": 7.932383107584406e-06, "loss": 0.49968495, "memory(GiB)": 34.88, "step": 49565, "train_speed(iter/s)": 0.414533 }, { "acc": 0.88485699, "epoch": 1.3421601277989874, "grad_norm": 8.094533920288086, "learning_rate": 7.931929844837121e-06, "loss": 0.57521062, "memory(GiB)": 34.88, "step": 49570, "train_speed(iter/s)": 0.414535 }, { "acc": 0.8948513, "epoch": 1.342295508082203, "grad_norm": 11.28966999053955, "learning_rate": 7.931476545367916e-06, "loss": 0.60274773, "memory(GiB)": 34.88, "step": 49575, "train_speed(iter/s)": 0.414537 }, { "acc": 0.88527699, "epoch": 1.3424308883654184, "grad_norm": 15.169938087463379, "learning_rate": 7.931023209182468e-06, "loss": 0.62860689, "memory(GiB)": 34.88, "step": 49580, "train_speed(iter/s)": 0.414539 }, { "acc": 0.90261421, "epoch": 1.342566268648634, "grad_norm": 7.808049201965332, "learning_rate": 7.930569836286459e-06, "loss": 0.55991187, "memory(GiB)": 34.88, "step": 49585, "train_speed(iter/s)": 0.41454 }, { "acc": 0.88944407, "epoch": 1.3427016489318495, "grad_norm": 13.018501281738281, "learning_rate": 7.930116426685569e-06, "loss": 0.50963755, "memory(GiB)": 34.88, "step": 49590, "train_speed(iter/s)": 0.414542 }, { "acc": 0.90127554, "epoch": 1.342837029215065, "grad_norm": 9.439398765563965, "learning_rate": 7.929662980385475e-06, "loss": 0.46605363, "memory(GiB)": 34.88, "step": 49595, "train_speed(iter/s)": 0.414544 }, { "acc": 0.88898144, "epoch": 1.3429724094982807, "grad_norm": 10.742955207824707, "learning_rate": 7.929209497391858e-06, "loss": 0.6464644, "memory(GiB)": 34.88, "step": 49600, "train_speed(iter/s)": 0.414546 }, { "acc": 0.90740967, "epoch": 1.3431077897814963, "grad_norm": 4.9925947189331055, "learning_rate": 7.928755977710403e-06, "loss": 0.42233839, "memory(GiB)": 34.88, "step": 49605, "train_speed(iter/s)": 0.414548 }, { "acc": 0.90698032, "epoch": 1.3432431700647118, "grad_norm": 11.876768112182617, "learning_rate": 7.928302421346787e-06, "loss": 0.51652637, "memory(GiB)": 34.88, "step": 49610, "train_speed(iter/s)": 0.41455 }, { "acc": 0.89795189, "epoch": 1.3433785503479272, "grad_norm": 10.390992164611816, "learning_rate": 7.92784882830669e-06, "loss": 0.59476953, "memory(GiB)": 34.88, "step": 49615, "train_speed(iter/s)": 0.414552 }, { "acc": 0.9040246, "epoch": 1.343513930631143, "grad_norm": 6.586333751678467, "learning_rate": 7.9273951985958e-06, "loss": 0.48525763, "memory(GiB)": 34.88, "step": 49620, "train_speed(iter/s)": 0.414554 }, { "acc": 0.91487999, "epoch": 1.3436493109143584, "grad_norm": 3.6970393657684326, "learning_rate": 7.926941532219794e-06, "loss": 0.44534302, "memory(GiB)": 34.88, "step": 49625, "train_speed(iter/s)": 0.414556 }, { "acc": 0.88112907, "epoch": 1.343784691197574, "grad_norm": 14.760725021362305, "learning_rate": 7.926487829184358e-06, "loss": 0.76262722, "memory(GiB)": 34.88, "step": 49630, "train_speed(iter/s)": 0.414558 }, { "acc": 0.90953455, "epoch": 1.3439200714807895, "grad_norm": 3.4654622077941895, "learning_rate": 7.926034089495175e-06, "loss": 0.39835551, "memory(GiB)": 34.88, "step": 49635, "train_speed(iter/s)": 0.41456 }, { "acc": 0.90653677, "epoch": 1.344055451764005, "grad_norm": 25.91468620300293, "learning_rate": 7.92558031315793e-06, "loss": 0.45423918, "memory(GiB)": 34.88, "step": 49640, "train_speed(iter/s)": 0.414562 }, { "acc": 0.89890633, "epoch": 1.3441908320472207, "grad_norm": 11.41103458404541, "learning_rate": 7.925126500178304e-06, "loss": 0.54649692, "memory(GiB)": 34.88, "step": 49645, "train_speed(iter/s)": 0.414564 }, { "acc": 0.88159313, "epoch": 1.3443262123304363, "grad_norm": 7.49636697769165, "learning_rate": 7.924672650561984e-06, "loss": 0.62507896, "memory(GiB)": 34.88, "step": 49650, "train_speed(iter/s)": 0.414566 }, { "acc": 0.89803467, "epoch": 1.3444615926136518, "grad_norm": 8.13499927520752, "learning_rate": 7.92421876431466e-06, "loss": 0.4717412, "memory(GiB)": 34.88, "step": 49655, "train_speed(iter/s)": 0.414567 }, { "acc": 0.88669786, "epoch": 1.3445969728968672, "grad_norm": 4.689084053039551, "learning_rate": 7.923764841442008e-06, "loss": 0.64523363, "memory(GiB)": 34.88, "step": 49660, "train_speed(iter/s)": 0.414569 }, { "acc": 0.87855206, "epoch": 1.3447323531800828, "grad_norm": 12.62399959564209, "learning_rate": 7.923310881949722e-06, "loss": 0.62223611, "memory(GiB)": 34.88, "step": 49665, "train_speed(iter/s)": 0.414571 }, { "acc": 0.8905139, "epoch": 1.3448677334632984, "grad_norm": 9.032352447509766, "learning_rate": 7.922856885843487e-06, "loss": 0.56254334, "memory(GiB)": 34.88, "step": 49670, "train_speed(iter/s)": 0.414573 }, { "acc": 0.88963242, "epoch": 1.345003113746514, "grad_norm": 9.859160423278809, "learning_rate": 7.922402853128987e-06, "loss": 0.59439235, "memory(GiB)": 34.88, "step": 49675, "train_speed(iter/s)": 0.414575 }, { "acc": 0.88833742, "epoch": 1.3451384940297295, "grad_norm": 8.919943809509277, "learning_rate": 7.921948783811917e-06, "loss": 0.648806, "memory(GiB)": 34.88, "step": 49680, "train_speed(iter/s)": 0.414577 }, { "acc": 0.88246193, "epoch": 1.345273874312945, "grad_norm": 16.029918670654297, "learning_rate": 7.921494677897956e-06, "loss": 0.5356679, "memory(GiB)": 34.88, "step": 49685, "train_speed(iter/s)": 0.414579 }, { "acc": 0.90623074, "epoch": 1.3454092545961607, "grad_norm": 20.673246383666992, "learning_rate": 7.921040535392797e-06, "loss": 0.49054766, "memory(GiB)": 34.88, "step": 49690, "train_speed(iter/s)": 0.414581 }, { "acc": 0.9065711, "epoch": 1.345544634879376, "grad_norm": 5.66855001449585, "learning_rate": 7.92058635630213e-06, "loss": 0.4982883, "memory(GiB)": 34.88, "step": 49695, "train_speed(iter/s)": 0.414583 }, { "acc": 0.9271471, "epoch": 1.3456800151625918, "grad_norm": 4.753189563751221, "learning_rate": 7.920132140631645e-06, "loss": 0.35636718, "memory(GiB)": 34.88, "step": 49700, "train_speed(iter/s)": 0.414585 }, { "acc": 0.8868494, "epoch": 1.3458153954458072, "grad_norm": 13.700533866882324, "learning_rate": 7.919677888387029e-06, "loss": 0.62892885, "memory(GiB)": 34.88, "step": 49705, "train_speed(iter/s)": 0.414587 }, { "acc": 0.90212708, "epoch": 1.3459507757290228, "grad_norm": 9.165067672729492, "learning_rate": 7.919223599573974e-06, "loss": 0.59241343, "memory(GiB)": 34.88, "step": 49710, "train_speed(iter/s)": 0.414589 }, { "acc": 0.92082891, "epoch": 1.3460861560122384, "grad_norm": 4.483486175537109, "learning_rate": 7.91876927419817e-06, "loss": 0.39796593, "memory(GiB)": 34.88, "step": 49715, "train_speed(iter/s)": 0.41459 }, { "acc": 0.88337193, "epoch": 1.346221536295454, "grad_norm": 18.67742347717285, "learning_rate": 7.918314912265311e-06, "loss": 0.54730663, "memory(GiB)": 34.88, "step": 49720, "train_speed(iter/s)": 0.414592 }, { "acc": 0.87389936, "epoch": 1.3463569165786695, "grad_norm": 10.088976860046387, "learning_rate": 7.917860513781083e-06, "loss": 0.55825334, "memory(GiB)": 34.88, "step": 49725, "train_speed(iter/s)": 0.414593 }, { "acc": 0.91369095, "epoch": 1.346492296861885, "grad_norm": 9.606492042541504, "learning_rate": 7.917406078751184e-06, "loss": 0.44231358, "memory(GiB)": 34.88, "step": 49730, "train_speed(iter/s)": 0.414595 }, { "acc": 0.89631414, "epoch": 1.3466276771451007, "grad_norm": 14.750253677368164, "learning_rate": 7.916951607181307e-06, "loss": 0.54514322, "memory(GiB)": 34.88, "step": 49735, "train_speed(iter/s)": 0.414597 }, { "acc": 0.90608654, "epoch": 1.346763057428316, "grad_norm": 7.910755634307861, "learning_rate": 7.91649709907714e-06, "loss": 0.47108078, "memory(GiB)": 34.88, "step": 49740, "train_speed(iter/s)": 0.414599 }, { "acc": 0.90869904, "epoch": 1.3468984377115316, "grad_norm": 10.136595726013184, "learning_rate": 7.916042554444379e-06, "loss": 0.42070594, "memory(GiB)": 34.88, "step": 49745, "train_speed(iter/s)": 0.414601 }, { "acc": 0.89014359, "epoch": 1.3470338179947472, "grad_norm": 13.595531463623047, "learning_rate": 7.915587973288721e-06, "loss": 0.64134922, "memory(GiB)": 34.88, "step": 49750, "train_speed(iter/s)": 0.414603 }, { "acc": 0.87956047, "epoch": 1.3471691982779628, "grad_norm": 5.786032676696777, "learning_rate": 7.915133355615858e-06, "loss": 0.63090057, "memory(GiB)": 34.88, "step": 49755, "train_speed(iter/s)": 0.414605 }, { "acc": 0.90525589, "epoch": 1.3473045785611784, "grad_norm": 12.06521224975586, "learning_rate": 7.914678701431482e-06, "loss": 0.48425989, "memory(GiB)": 34.88, "step": 49760, "train_speed(iter/s)": 0.414607 }, { "acc": 0.89232426, "epoch": 1.347439958844394, "grad_norm": 4.924671173095703, "learning_rate": 7.914224010741293e-06, "loss": 0.52320089, "memory(GiB)": 34.88, "step": 49765, "train_speed(iter/s)": 0.414608 }, { "acc": 0.8724762, "epoch": 1.3475753391276095, "grad_norm": 8.015158653259277, "learning_rate": 7.913769283550983e-06, "loss": 0.62206526, "memory(GiB)": 34.88, "step": 49770, "train_speed(iter/s)": 0.41461 }, { "acc": 0.87961483, "epoch": 1.3477107194108249, "grad_norm": 24.7181339263916, "learning_rate": 7.913314519866254e-06, "loss": 0.64728861, "memory(GiB)": 34.88, "step": 49775, "train_speed(iter/s)": 0.414612 }, { "acc": 0.91051292, "epoch": 1.3478460996940407, "grad_norm": 3.458608865737915, "learning_rate": 7.912859719692799e-06, "loss": 0.47278481, "memory(GiB)": 34.88, "step": 49780, "train_speed(iter/s)": 0.414614 }, { "acc": 0.90323906, "epoch": 1.347981479977256, "grad_norm": 4.695001602172852, "learning_rate": 7.912404883036315e-06, "loss": 0.42327237, "memory(GiB)": 34.88, "step": 49785, "train_speed(iter/s)": 0.414616 }, { "acc": 0.89772816, "epoch": 1.3481168602604716, "grad_norm": 7.23183012008667, "learning_rate": 7.911950009902501e-06, "loss": 0.58033075, "memory(GiB)": 34.88, "step": 49790, "train_speed(iter/s)": 0.414617 }, { "acc": 0.89885969, "epoch": 1.3482522405436872, "grad_norm": 8.82544994354248, "learning_rate": 7.911495100297053e-06, "loss": 0.55932493, "memory(GiB)": 34.88, "step": 49795, "train_speed(iter/s)": 0.414619 }, { "acc": 0.89297581, "epoch": 1.3483876208269028, "grad_norm": 14.615466117858887, "learning_rate": 7.911040154225672e-06, "loss": 0.67876444, "memory(GiB)": 34.88, "step": 49800, "train_speed(iter/s)": 0.414621 }, { "acc": 0.89551163, "epoch": 1.3485230011101184, "grad_norm": 6.657776355743408, "learning_rate": 7.910585171694057e-06, "loss": 0.48775287, "memory(GiB)": 34.88, "step": 49805, "train_speed(iter/s)": 0.414623 }, { "acc": 0.88119049, "epoch": 1.348658381393334, "grad_norm": 6.651215076446533, "learning_rate": 7.910130152707907e-06, "loss": 0.66364336, "memory(GiB)": 34.88, "step": 49810, "train_speed(iter/s)": 0.414624 }, { "acc": 0.88213263, "epoch": 1.3487937616765495, "grad_norm": 6.985583782196045, "learning_rate": 7.909675097272921e-06, "loss": 0.64426823, "memory(GiB)": 34.88, "step": 49815, "train_speed(iter/s)": 0.414626 }, { "acc": 0.87746773, "epoch": 1.3489291419597649, "grad_norm": 10.866357803344727, "learning_rate": 7.909220005394802e-06, "loss": 0.67847681, "memory(GiB)": 34.88, "step": 49820, "train_speed(iter/s)": 0.414628 }, { "acc": 0.90568209, "epoch": 1.3490645222429805, "grad_norm": 6.735967636108398, "learning_rate": 7.908764877079248e-06, "loss": 0.48124342, "memory(GiB)": 34.88, "step": 49825, "train_speed(iter/s)": 0.41463 }, { "acc": 0.87951918, "epoch": 1.349199902526196, "grad_norm": 16.266979217529297, "learning_rate": 7.908309712331962e-06, "loss": 0.67593946, "memory(GiB)": 34.88, "step": 49830, "train_speed(iter/s)": 0.414632 }, { "acc": 0.87419577, "epoch": 1.3493352828094116, "grad_norm": 6.300751686096191, "learning_rate": 7.907854511158648e-06, "loss": 0.62580156, "memory(GiB)": 34.88, "step": 49835, "train_speed(iter/s)": 0.414634 }, { "acc": 0.89580803, "epoch": 1.3494706630926272, "grad_norm": 6.57274055480957, "learning_rate": 7.907399273565e-06, "loss": 0.4968523, "memory(GiB)": 34.88, "step": 49840, "train_speed(iter/s)": 0.414636 }, { "acc": 0.8918684, "epoch": 1.3496060433758428, "grad_norm": 7.8925347328186035, "learning_rate": 7.906943999556732e-06, "loss": 0.51361127, "memory(GiB)": 34.88, "step": 49845, "train_speed(iter/s)": 0.414638 }, { "acc": 0.87758427, "epoch": 1.3497414236590584, "grad_norm": 53.077964782714844, "learning_rate": 7.906488689139538e-06, "loss": 0.69748816, "memory(GiB)": 34.88, "step": 49850, "train_speed(iter/s)": 0.41464 }, { "acc": 0.89213409, "epoch": 1.3498768039422737, "grad_norm": 6.972925186157227, "learning_rate": 7.906033342319127e-06, "loss": 0.53310914, "memory(GiB)": 34.88, "step": 49855, "train_speed(iter/s)": 0.414642 }, { "acc": 0.90048656, "epoch": 1.3500121842254895, "grad_norm": 12.80560302734375, "learning_rate": 7.905577959101201e-06, "loss": 0.61923714, "memory(GiB)": 34.88, "step": 49860, "train_speed(iter/s)": 0.414644 }, { "acc": 0.88309135, "epoch": 1.3501475645087049, "grad_norm": 17.26230812072754, "learning_rate": 7.905122539491468e-06, "loss": 0.63931456, "memory(GiB)": 34.88, "step": 49865, "train_speed(iter/s)": 0.414645 }, { "acc": 0.91701336, "epoch": 1.3502829447919205, "grad_norm": 6.876560688018799, "learning_rate": 7.904667083495628e-06, "loss": 0.41931691, "memory(GiB)": 34.88, "step": 49870, "train_speed(iter/s)": 0.414647 }, { "acc": 0.89394302, "epoch": 1.350418325075136, "grad_norm": 9.956768035888672, "learning_rate": 7.904211591119387e-06, "loss": 0.59110498, "memory(GiB)": 34.88, "step": 49875, "train_speed(iter/s)": 0.414649 }, { "acc": 0.87741995, "epoch": 1.3505537053583516, "grad_norm": 12.490574836730957, "learning_rate": 7.903756062368454e-06, "loss": 0.6453342, "memory(GiB)": 34.88, "step": 49880, "train_speed(iter/s)": 0.41465 }, { "acc": 0.88836384, "epoch": 1.3506890856415672, "grad_norm": 12.15542984008789, "learning_rate": 7.903300497248535e-06, "loss": 0.61930585, "memory(GiB)": 34.88, "step": 49885, "train_speed(iter/s)": 0.414652 }, { "acc": 0.8942915, "epoch": 1.3508244659247828, "grad_norm": 14.621941566467285, "learning_rate": 7.902844895765333e-06, "loss": 0.52359214, "memory(GiB)": 34.88, "step": 49890, "train_speed(iter/s)": 0.414654 }, { "acc": 0.89100552, "epoch": 1.3509598462079984, "grad_norm": 8.469293594360352, "learning_rate": 7.90238925792456e-06, "loss": 0.59046373, "memory(GiB)": 34.88, "step": 49895, "train_speed(iter/s)": 0.414656 }, { "acc": 0.88510141, "epoch": 1.3510952264912137, "grad_norm": 8.968442916870117, "learning_rate": 7.90193358373192e-06, "loss": 0.59859409, "memory(GiB)": 34.88, "step": 49900, "train_speed(iter/s)": 0.414659 }, { "acc": 0.8849844, "epoch": 1.3512306067744293, "grad_norm": 6.051826000213623, "learning_rate": 7.901477873193127e-06, "loss": 0.58184991, "memory(GiB)": 34.88, "step": 49905, "train_speed(iter/s)": 0.41466 }, { "acc": 0.90346413, "epoch": 1.3513659870576449, "grad_norm": 7.140491008758545, "learning_rate": 7.901022126313883e-06, "loss": 0.49679604, "memory(GiB)": 34.88, "step": 49910, "train_speed(iter/s)": 0.414662 }, { "acc": 0.89564304, "epoch": 1.3515013673408605, "grad_norm": 10.642366409301758, "learning_rate": 7.900566343099901e-06, "loss": 0.5259068, "memory(GiB)": 34.88, "step": 49915, "train_speed(iter/s)": 0.414664 }, { "acc": 0.90109062, "epoch": 1.351636747624076, "grad_norm": 16.68497085571289, "learning_rate": 7.900110523556889e-06, "loss": 0.55344973, "memory(GiB)": 34.88, "step": 49920, "train_speed(iter/s)": 0.414667 }, { "acc": 0.87709951, "epoch": 1.3517721279072916, "grad_norm": 12.59011173248291, "learning_rate": 7.899654667690557e-06, "loss": 0.69327002, "memory(GiB)": 34.88, "step": 49925, "train_speed(iter/s)": 0.414669 }, { "acc": 0.8945859, "epoch": 1.3519075081905072, "grad_norm": 5.646408557891846, "learning_rate": 7.899198775506615e-06, "loss": 0.58034501, "memory(GiB)": 34.88, "step": 49930, "train_speed(iter/s)": 0.41467 }, { "acc": 0.89936218, "epoch": 1.3520428884737226, "grad_norm": 16.95070457458496, "learning_rate": 7.898742847010775e-06, "loss": 0.54811764, "memory(GiB)": 34.88, "step": 49935, "train_speed(iter/s)": 0.414672 }, { "acc": 0.91618481, "epoch": 1.3521782687569384, "grad_norm": 5.630530834197998, "learning_rate": 7.89828688220875e-06, "loss": 0.38497834, "memory(GiB)": 34.88, "step": 49940, "train_speed(iter/s)": 0.414674 }, { "acc": 0.90782795, "epoch": 1.3523136490401537, "grad_norm": 3.9575815200805664, "learning_rate": 7.89783088110625e-06, "loss": 0.46549006, "memory(GiB)": 34.88, "step": 49945, "train_speed(iter/s)": 0.414676 }, { "acc": 0.87660551, "epoch": 1.3524490293233693, "grad_norm": 7.4148173332214355, "learning_rate": 7.897374843708987e-06, "loss": 0.687708, "memory(GiB)": 34.88, "step": 49950, "train_speed(iter/s)": 0.414678 }, { "acc": 0.89530306, "epoch": 1.3525844096065849, "grad_norm": 10.404261589050293, "learning_rate": 7.896918770022673e-06, "loss": 0.5974298, "memory(GiB)": 34.88, "step": 49955, "train_speed(iter/s)": 0.414679 }, { "acc": 0.88929424, "epoch": 1.3527197898898005, "grad_norm": 8.95445728302002, "learning_rate": 7.896462660053025e-06, "loss": 0.58102994, "memory(GiB)": 34.88, "step": 49960, "train_speed(iter/s)": 0.414681 }, { "acc": 0.92043896, "epoch": 1.352855170173016, "grad_norm": 12.216828346252441, "learning_rate": 7.896006513805752e-06, "loss": 0.45962825, "memory(GiB)": 34.88, "step": 49965, "train_speed(iter/s)": 0.414683 }, { "acc": 0.90231562, "epoch": 1.3529905504562316, "grad_norm": 6.172807216644287, "learning_rate": 7.89555033128657e-06, "loss": 0.51814165, "memory(GiB)": 34.88, "step": 49970, "train_speed(iter/s)": 0.414685 }, { "acc": 0.88980055, "epoch": 1.3531259307394472, "grad_norm": 11.023921012878418, "learning_rate": 7.895094112501195e-06, "loss": 0.60875583, "memory(GiB)": 34.88, "step": 49975, "train_speed(iter/s)": 0.414687 }, { "acc": 0.88483448, "epoch": 1.3532613110226626, "grad_norm": 6.440934658050537, "learning_rate": 7.894637857455342e-06, "loss": 0.65125446, "memory(GiB)": 34.88, "step": 49980, "train_speed(iter/s)": 0.414689 }, { "acc": 0.90676708, "epoch": 1.3533966913058781, "grad_norm": 11.709197998046875, "learning_rate": 7.894181566154722e-06, "loss": 0.47783017, "memory(GiB)": 34.88, "step": 49985, "train_speed(iter/s)": 0.414691 }, { "acc": 0.90074148, "epoch": 1.3535320715890937, "grad_norm": 6.306578159332275, "learning_rate": 7.893725238605058e-06, "loss": 0.5399241, "memory(GiB)": 34.88, "step": 49990, "train_speed(iter/s)": 0.414693 }, { "acc": 0.91014271, "epoch": 1.3536674518723093, "grad_norm": 7.011313438415527, "learning_rate": 7.89326887481206e-06, "loss": 0.40342774, "memory(GiB)": 34.88, "step": 49995, "train_speed(iter/s)": 0.414695 }, { "acc": 0.88616886, "epoch": 1.3538028321555249, "grad_norm": 6.2997965812683105, "learning_rate": 7.89281247478145e-06, "loss": 0.57811203, "memory(GiB)": 34.88, "step": 50000, "train_speed(iter/s)": 0.414697 }, { "epoch": 1.3538028321555249, "eval_acc": 0.5989868453305008, "eval_loss": 1.1144152879714966, "eval_runtime": 1301.8085, "eval_samples_per_second": 66.296, "eval_steps_per_second": 2.073, "step": 50000 }, { "acc": 0.88680973, "epoch": 1.3539382124387405, "grad_norm": 11.100438117980957, "learning_rate": 7.89235603851894e-06, "loss": 0.52829571, "memory(GiB)": 34.88, "step": 50005, "train_speed(iter/s)": 0.410184 }, { "acc": 0.9121809, "epoch": 1.354073592721956, "grad_norm": 6.762966632843018, "learning_rate": 7.891899566030254e-06, "loss": 0.39044445, "memory(GiB)": 34.88, "step": 50010, "train_speed(iter/s)": 0.410186 }, { "acc": 0.89094238, "epoch": 1.3542089730051714, "grad_norm": 11.586774826049805, "learning_rate": 7.891443057321104e-06, "loss": 0.56006446, "memory(GiB)": 34.88, "step": 50015, "train_speed(iter/s)": 0.410188 }, { "acc": 0.87235355, "epoch": 1.3543443532883872, "grad_norm": 16.626079559326172, "learning_rate": 7.890986512397212e-06, "loss": 0.71780763, "memory(GiB)": 34.88, "step": 50020, "train_speed(iter/s)": 0.410191 }, { "acc": 0.89515676, "epoch": 1.3544797335716026, "grad_norm": 6.5237717628479, "learning_rate": 7.890529931264298e-06, "loss": 0.57905321, "memory(GiB)": 34.88, "step": 50025, "train_speed(iter/s)": 0.410193 }, { "acc": 0.89126911, "epoch": 1.3546151138548181, "grad_norm": 12.053192138671875, "learning_rate": 7.89007331392808e-06, "loss": 0.58441219, "memory(GiB)": 34.88, "step": 50030, "train_speed(iter/s)": 0.410195 }, { "acc": 0.91945572, "epoch": 1.3547504941380337, "grad_norm": 9.098840713500977, "learning_rate": 7.889616660394277e-06, "loss": 0.35772798, "memory(GiB)": 34.88, "step": 50035, "train_speed(iter/s)": 0.410197 }, { "acc": 0.88428316, "epoch": 1.3548858744212493, "grad_norm": 11.443464279174805, "learning_rate": 7.88915997066861e-06, "loss": 0.60856819, "memory(GiB)": 34.88, "step": 50040, "train_speed(iter/s)": 0.4102 }, { "acc": 0.90686893, "epoch": 1.3550212547044649, "grad_norm": 5.946112632751465, "learning_rate": 7.888703244756803e-06, "loss": 0.43377762, "memory(GiB)": 34.88, "step": 50045, "train_speed(iter/s)": 0.410202 }, { "acc": 0.91415501, "epoch": 1.3551566349876805, "grad_norm": 4.2995500564575195, "learning_rate": 7.888246482664574e-06, "loss": 0.43506784, "memory(GiB)": 34.88, "step": 50050, "train_speed(iter/s)": 0.410204 }, { "acc": 0.91467581, "epoch": 1.355292015270896, "grad_norm": 8.388270378112793, "learning_rate": 7.887789684397645e-06, "loss": 0.50484829, "memory(GiB)": 34.88, "step": 50055, "train_speed(iter/s)": 0.410207 }, { "acc": 0.89365616, "epoch": 1.3554273955541114, "grad_norm": 5.980961322784424, "learning_rate": 7.887332849961739e-06, "loss": 0.53780589, "memory(GiB)": 34.88, "step": 50060, "train_speed(iter/s)": 0.410209 }, { "acc": 0.88702908, "epoch": 1.355562775837327, "grad_norm": 9.646467208862305, "learning_rate": 7.886875979362581e-06, "loss": 0.62698116, "memory(GiB)": 34.88, "step": 50065, "train_speed(iter/s)": 0.410211 }, { "acc": 0.89639339, "epoch": 1.3556981561205426, "grad_norm": 8.75200366973877, "learning_rate": 7.886419072605892e-06, "loss": 0.53090706, "memory(GiB)": 34.88, "step": 50070, "train_speed(iter/s)": 0.410214 }, { "acc": 0.88470459, "epoch": 1.3558335364037581, "grad_norm": 5.615845203399658, "learning_rate": 7.885962129697395e-06, "loss": 0.62702279, "memory(GiB)": 34.88, "step": 50075, "train_speed(iter/s)": 0.410216 }, { "acc": 0.88168011, "epoch": 1.3559689166869737, "grad_norm": 9.52607536315918, "learning_rate": 7.885505150642815e-06, "loss": 0.70403199, "memory(GiB)": 34.88, "step": 50080, "train_speed(iter/s)": 0.410218 }, { "acc": 0.89279995, "epoch": 1.3561042969701893, "grad_norm": 7.614367485046387, "learning_rate": 7.885048135447878e-06, "loss": 0.60290303, "memory(GiB)": 34.88, "step": 50085, "train_speed(iter/s)": 0.41022 }, { "acc": 0.88707094, "epoch": 1.3562396772534049, "grad_norm": 14.289319038391113, "learning_rate": 7.884591084118304e-06, "loss": 0.55823917, "memory(GiB)": 34.88, "step": 50090, "train_speed(iter/s)": 0.410222 }, { "acc": 0.89939117, "epoch": 1.3563750575366202, "grad_norm": 8.309113502502441, "learning_rate": 7.884133996659825e-06, "loss": 0.53152595, "memory(GiB)": 34.88, "step": 50095, "train_speed(iter/s)": 0.410225 }, { "acc": 0.89713144, "epoch": 1.356510437819836, "grad_norm": 7.278443336486816, "learning_rate": 7.883676873078162e-06, "loss": 0.49025297, "memory(GiB)": 34.88, "step": 50100, "train_speed(iter/s)": 0.410226 }, { "acc": 0.86962519, "epoch": 1.3566458181030514, "grad_norm": 10.311613082885742, "learning_rate": 7.883219713379044e-06, "loss": 0.74220972, "memory(GiB)": 34.88, "step": 50105, "train_speed(iter/s)": 0.410228 }, { "acc": 0.87159901, "epoch": 1.356781198386267, "grad_norm": 11.300841331481934, "learning_rate": 7.882762517568196e-06, "loss": 0.64128027, "memory(GiB)": 34.88, "step": 50110, "train_speed(iter/s)": 0.41023 }, { "acc": 0.90484447, "epoch": 1.3569165786694826, "grad_norm": 8.199052810668945, "learning_rate": 7.882305285651347e-06, "loss": 0.56367793, "memory(GiB)": 34.88, "step": 50115, "train_speed(iter/s)": 0.410233 }, { "acc": 0.89549828, "epoch": 1.3570519589526981, "grad_norm": 7.513024806976318, "learning_rate": 7.881848017634226e-06, "loss": 0.49751787, "memory(GiB)": 34.88, "step": 50120, "train_speed(iter/s)": 0.410235 }, { "acc": 0.8970479, "epoch": 1.3571873392359137, "grad_norm": 8.09619140625, "learning_rate": 7.881390713522557e-06, "loss": 0.55753531, "memory(GiB)": 34.88, "step": 50125, "train_speed(iter/s)": 0.410237 }, { "acc": 0.92962484, "epoch": 1.3573227195191293, "grad_norm": 3.7581865787506104, "learning_rate": 7.88093337332207e-06, "loss": 0.41342316, "memory(GiB)": 34.88, "step": 50130, "train_speed(iter/s)": 0.410239 }, { "acc": 0.88822269, "epoch": 1.3574580998023449, "grad_norm": 7.235535144805908, "learning_rate": 7.880475997038498e-06, "loss": 0.64971633, "memory(GiB)": 34.88, "step": 50135, "train_speed(iter/s)": 0.410242 }, { "acc": 0.91573544, "epoch": 1.3575934800855602, "grad_norm": 5.663949489593506, "learning_rate": 7.880018584677565e-06, "loss": 0.45279627, "memory(GiB)": 34.88, "step": 50140, "train_speed(iter/s)": 0.410244 }, { "acc": 0.90825529, "epoch": 1.3577288603687758, "grad_norm": 6.6093339920043945, "learning_rate": 7.879561136245006e-06, "loss": 0.52235718, "memory(GiB)": 34.88, "step": 50145, "train_speed(iter/s)": 0.410246 }, { "acc": 0.91104918, "epoch": 1.3578642406519914, "grad_norm": 5.450991153717041, "learning_rate": 7.879103651746545e-06, "loss": 0.41180434, "memory(GiB)": 34.88, "step": 50150, "train_speed(iter/s)": 0.410249 }, { "acc": 0.8992341, "epoch": 1.357999620935207, "grad_norm": 6.347855091094971, "learning_rate": 7.878646131187921e-06, "loss": 0.50860538, "memory(GiB)": 34.88, "step": 50155, "train_speed(iter/s)": 0.410251 }, { "acc": 0.89394379, "epoch": 1.3581350012184226, "grad_norm": 5.903591632843018, "learning_rate": 7.878188574574858e-06, "loss": 0.54253092, "memory(GiB)": 34.88, "step": 50160, "train_speed(iter/s)": 0.410253 }, { "acc": 0.89729376, "epoch": 1.3582703815016381, "grad_norm": 11.889044761657715, "learning_rate": 7.877730981913093e-06, "loss": 0.54730291, "memory(GiB)": 34.88, "step": 50165, "train_speed(iter/s)": 0.410255 }, { "acc": 0.8944355, "epoch": 1.3584057617848537, "grad_norm": 7.289543151855469, "learning_rate": 7.877273353208354e-06, "loss": 0.57032423, "memory(GiB)": 34.88, "step": 50170, "train_speed(iter/s)": 0.410258 }, { "acc": 0.91623049, "epoch": 1.358541142068069, "grad_norm": 4.20674991607666, "learning_rate": 7.876815688466379e-06, "loss": 0.42093329, "memory(GiB)": 34.88, "step": 50175, "train_speed(iter/s)": 0.41026 }, { "acc": 0.88088179, "epoch": 1.3586765223512849, "grad_norm": 7.763387680053711, "learning_rate": 7.876357987692896e-06, "loss": 0.59193449, "memory(GiB)": 34.88, "step": 50180, "train_speed(iter/s)": 0.410262 }, { "acc": 0.91378708, "epoch": 1.3588119026345002, "grad_norm": 7.884191036224365, "learning_rate": 7.875900250893641e-06, "loss": 0.45479689, "memory(GiB)": 34.88, "step": 50185, "train_speed(iter/s)": 0.410264 }, { "acc": 0.89202785, "epoch": 1.3589472829177158, "grad_norm": 6.393599987030029, "learning_rate": 7.875442478074346e-06, "loss": 0.61237726, "memory(GiB)": 34.88, "step": 50190, "train_speed(iter/s)": 0.410266 }, { "acc": 0.89089565, "epoch": 1.3590826632009314, "grad_norm": 7.453497886657715, "learning_rate": 7.87498466924075e-06, "loss": 0.59296751, "memory(GiB)": 34.88, "step": 50195, "train_speed(iter/s)": 0.410269 }, { "acc": 0.88266115, "epoch": 1.359218043484147, "grad_norm": 8.988574028015137, "learning_rate": 7.874526824398585e-06, "loss": 0.63498793, "memory(GiB)": 34.88, "step": 50200, "train_speed(iter/s)": 0.410271 }, { "acc": 0.89775076, "epoch": 1.3593534237673626, "grad_norm": 13.483855247497559, "learning_rate": 7.874068943553585e-06, "loss": 0.66762915, "memory(GiB)": 34.88, "step": 50205, "train_speed(iter/s)": 0.410273 }, { "acc": 0.87122879, "epoch": 1.3594888040505781, "grad_norm": 16.271892547607422, "learning_rate": 7.87361102671149e-06, "loss": 0.63611803, "memory(GiB)": 34.88, "step": 50210, "train_speed(iter/s)": 0.410276 }, { "acc": 0.90788965, "epoch": 1.3596241843337937, "grad_norm": 6.185720920562744, "learning_rate": 7.87315307387803e-06, "loss": 0.4866117, "memory(GiB)": 34.88, "step": 50215, "train_speed(iter/s)": 0.410278 }, { "acc": 0.890765, "epoch": 1.359759564617009, "grad_norm": 6.619177341461182, "learning_rate": 7.872695085058948e-06, "loss": 0.49167781, "memory(GiB)": 34.88, "step": 50220, "train_speed(iter/s)": 0.41028 }, { "acc": 0.88149881, "epoch": 1.3598949449002247, "grad_norm": 11.7072172164917, "learning_rate": 7.872237060259977e-06, "loss": 0.65763597, "memory(GiB)": 34.88, "step": 50225, "train_speed(iter/s)": 0.410283 }, { "acc": 0.8707058, "epoch": 1.3600303251834402, "grad_norm": 9.708168029785156, "learning_rate": 7.871778999486858e-06, "loss": 0.72516804, "memory(GiB)": 34.88, "step": 50230, "train_speed(iter/s)": 0.410284 }, { "acc": 0.87626247, "epoch": 1.3601657054666558, "grad_norm": 11.09493350982666, "learning_rate": 7.871320902745326e-06, "loss": 0.64218292, "memory(GiB)": 34.88, "step": 50235, "train_speed(iter/s)": 0.410286 }, { "acc": 0.9115839, "epoch": 1.3603010857498714, "grad_norm": 7.320267200469971, "learning_rate": 7.870862770041124e-06, "loss": 0.40411901, "memory(GiB)": 34.88, "step": 50240, "train_speed(iter/s)": 0.410288 }, { "acc": 0.89029751, "epoch": 1.360436466033087, "grad_norm": 9.050777435302734, "learning_rate": 7.870404601379985e-06, "loss": 0.56593599, "memory(GiB)": 34.88, "step": 50245, "train_speed(iter/s)": 0.410291 }, { "acc": 0.89732971, "epoch": 1.3605718463163026, "grad_norm": 19.061019897460938, "learning_rate": 7.869946396767652e-06, "loss": 0.58448091, "memory(GiB)": 34.88, "step": 50250, "train_speed(iter/s)": 0.410293 }, { "acc": 0.8916048, "epoch": 1.360707226599518, "grad_norm": 7.715241432189941, "learning_rate": 7.869488156209866e-06, "loss": 0.6134778, "memory(GiB)": 34.88, "step": 50255, "train_speed(iter/s)": 0.410295 }, { "acc": 0.89635696, "epoch": 1.3608426068827337, "grad_norm": 5.421105861663818, "learning_rate": 7.869029879712362e-06, "loss": 0.52596989, "memory(GiB)": 34.88, "step": 50260, "train_speed(iter/s)": 0.410298 }, { "acc": 0.90467949, "epoch": 1.360977987165949, "grad_norm": 8.519139289855957, "learning_rate": 7.868571567280889e-06, "loss": 0.54642773, "memory(GiB)": 34.88, "step": 50265, "train_speed(iter/s)": 0.4103 }, { "acc": 0.87922478, "epoch": 1.3611133674491647, "grad_norm": 13.744627952575684, "learning_rate": 7.868113218921182e-06, "loss": 0.72299395, "memory(GiB)": 34.88, "step": 50270, "train_speed(iter/s)": 0.410302 }, { "acc": 0.88720427, "epoch": 1.3612487477323802, "grad_norm": 40.123199462890625, "learning_rate": 7.867654834638985e-06, "loss": 0.604496, "memory(GiB)": 34.88, "step": 50275, "train_speed(iter/s)": 0.410304 }, { "acc": 0.89758968, "epoch": 1.3613841280155958, "grad_norm": 3.695119857788086, "learning_rate": 7.86719641444004e-06, "loss": 0.61078572, "memory(GiB)": 34.88, "step": 50280, "train_speed(iter/s)": 0.410307 }, { "acc": 0.90254002, "epoch": 1.3615195082988114, "grad_norm": 9.469910621643066, "learning_rate": 7.86673795833009e-06, "loss": 0.54529819, "memory(GiB)": 34.88, "step": 50285, "train_speed(iter/s)": 0.410309 }, { "acc": 0.88775692, "epoch": 1.361654888582027, "grad_norm": 10.44472885131836, "learning_rate": 7.866279466314874e-06, "loss": 0.63375559, "memory(GiB)": 34.88, "step": 50290, "train_speed(iter/s)": 0.410311 }, { "acc": 0.89025402, "epoch": 1.3617902688652426, "grad_norm": 31.279008865356445, "learning_rate": 7.865820938400143e-06, "loss": 0.64488826, "memory(GiB)": 34.88, "step": 50295, "train_speed(iter/s)": 0.410314 }, { "acc": 0.90721779, "epoch": 1.361925649148458, "grad_norm": 5.068754196166992, "learning_rate": 7.865362374591636e-06, "loss": 0.4729784, "memory(GiB)": 34.88, "step": 50300, "train_speed(iter/s)": 0.410316 }, { "acc": 0.89038506, "epoch": 1.3620610294316735, "grad_norm": 6.756390571594238, "learning_rate": 7.864903774895096e-06, "loss": 0.57356429, "memory(GiB)": 34.88, "step": 50305, "train_speed(iter/s)": 0.410318 }, { "acc": 0.8872447, "epoch": 1.362196409714889, "grad_norm": 14.515091896057129, "learning_rate": 7.864445139316271e-06, "loss": 0.65898767, "memory(GiB)": 34.88, "step": 50310, "train_speed(iter/s)": 0.41032 }, { "acc": 0.89302683, "epoch": 1.3623317899981047, "grad_norm": 13.61366081237793, "learning_rate": 7.863986467860906e-06, "loss": 0.58368716, "memory(GiB)": 34.88, "step": 50315, "train_speed(iter/s)": 0.410323 }, { "acc": 0.90389957, "epoch": 1.3624671702813203, "grad_norm": 8.19449520111084, "learning_rate": 7.863527760534746e-06, "loss": 0.46635857, "memory(GiB)": 34.88, "step": 50320, "train_speed(iter/s)": 0.410325 }, { "acc": 0.88926659, "epoch": 1.3626025505645358, "grad_norm": 27.736125946044922, "learning_rate": 7.86306901734354e-06, "loss": 0.5850471, "memory(GiB)": 34.88, "step": 50325, "train_speed(iter/s)": 0.410327 }, { "acc": 0.89665604, "epoch": 1.3627379308477514, "grad_norm": 10.18011474609375, "learning_rate": 7.862610238293028e-06, "loss": 0.53886471, "memory(GiB)": 34.88, "step": 50330, "train_speed(iter/s)": 0.410329 }, { "acc": 0.88505907, "epoch": 1.3628733111309668, "grad_norm": 18.311994552612305, "learning_rate": 7.862151423388965e-06, "loss": 0.62587023, "memory(GiB)": 34.88, "step": 50335, "train_speed(iter/s)": 0.410331 }, { "acc": 0.89141407, "epoch": 1.3630086914141826, "grad_norm": 6.977340221405029, "learning_rate": 7.861692572637093e-06, "loss": 0.60229197, "memory(GiB)": 34.88, "step": 50340, "train_speed(iter/s)": 0.410334 }, { "acc": 0.89967947, "epoch": 1.363144071697398, "grad_norm": 9.573169708251953, "learning_rate": 7.86123368604316e-06, "loss": 0.43881407, "memory(GiB)": 34.88, "step": 50345, "train_speed(iter/s)": 0.410336 }, { "acc": 0.91049938, "epoch": 1.3632794519806135, "grad_norm": 5.351937770843506, "learning_rate": 7.86077476361292e-06, "loss": 0.43888407, "memory(GiB)": 34.88, "step": 50350, "train_speed(iter/s)": 0.410338 }, { "acc": 0.90254526, "epoch": 1.363414832263829, "grad_norm": 14.04706859588623, "learning_rate": 7.860315805352114e-06, "loss": 0.51868467, "memory(GiB)": 34.88, "step": 50355, "train_speed(iter/s)": 0.410341 }, { "acc": 0.90058718, "epoch": 1.3635502125470447, "grad_norm": 8.467144012451172, "learning_rate": 7.859856811266499e-06, "loss": 0.50653648, "memory(GiB)": 34.88, "step": 50360, "train_speed(iter/s)": 0.410343 }, { "acc": 0.89136295, "epoch": 1.3636855928302603, "grad_norm": 8.045713424682617, "learning_rate": 7.85939778136182e-06, "loss": 0.60962534, "memory(GiB)": 34.88, "step": 50365, "train_speed(iter/s)": 0.410345 }, { "acc": 0.89713392, "epoch": 1.3638209731134758, "grad_norm": 10.36906623840332, "learning_rate": 7.858938715643828e-06, "loss": 0.54583664, "memory(GiB)": 34.88, "step": 50370, "train_speed(iter/s)": 0.410348 }, { "acc": 0.87524357, "epoch": 1.3639563533966914, "grad_norm": 9.696273803710938, "learning_rate": 7.858479614118276e-06, "loss": 0.72467422, "memory(GiB)": 34.88, "step": 50375, "train_speed(iter/s)": 0.41035 }, { "acc": 0.90663309, "epoch": 1.3640917336799068, "grad_norm": 6.44887113571167, "learning_rate": 7.858020476790912e-06, "loss": 0.45982609, "memory(GiB)": 34.88, "step": 50380, "train_speed(iter/s)": 0.410352 }, { "acc": 0.88831968, "epoch": 1.3642271139631224, "grad_norm": 7.457441329956055, "learning_rate": 7.857561303667487e-06, "loss": 0.60712709, "memory(GiB)": 34.88, "step": 50385, "train_speed(iter/s)": 0.410355 }, { "acc": 0.87755833, "epoch": 1.364362494246338, "grad_norm": 9.536857604980469, "learning_rate": 7.85710209475376e-06, "loss": 0.69541159, "memory(GiB)": 34.88, "step": 50390, "train_speed(iter/s)": 0.410356 }, { "acc": 0.88073826, "epoch": 1.3644978745295535, "grad_norm": 13.785582542419434, "learning_rate": 7.856642850055474e-06, "loss": 0.67366619, "memory(GiB)": 34.88, "step": 50395, "train_speed(iter/s)": 0.410358 }, { "acc": 0.88961716, "epoch": 1.364633254812769, "grad_norm": 3.589754819869995, "learning_rate": 7.856183569578389e-06, "loss": 0.58276072, "memory(GiB)": 34.88, "step": 50400, "train_speed(iter/s)": 0.41036 }, { "acc": 0.90000277, "epoch": 1.3647686350959847, "grad_norm": 18.363239288330078, "learning_rate": 7.855724253328255e-06, "loss": 0.45733433, "memory(GiB)": 34.88, "step": 50405, "train_speed(iter/s)": 0.410363 }, { "acc": 0.90474033, "epoch": 1.3649040153792003, "grad_norm": 8.059941291809082, "learning_rate": 7.855264901310828e-06, "loss": 0.59829497, "memory(GiB)": 34.88, "step": 50410, "train_speed(iter/s)": 0.410365 }, { "acc": 0.89352961, "epoch": 1.3650393956624156, "grad_norm": 6.20117712020874, "learning_rate": 7.854805513531858e-06, "loss": 0.55180821, "memory(GiB)": 34.88, "step": 50415, "train_speed(iter/s)": 0.410367 }, { "acc": 0.91539211, "epoch": 1.3651747759456314, "grad_norm": 9.353401184082031, "learning_rate": 7.854346089997106e-06, "loss": 0.39101906, "memory(GiB)": 34.88, "step": 50420, "train_speed(iter/s)": 0.410369 }, { "acc": 0.90383949, "epoch": 1.3653101562288468, "grad_norm": 7.334719181060791, "learning_rate": 7.853886630712321e-06, "loss": 0.5062891, "memory(GiB)": 34.88, "step": 50425, "train_speed(iter/s)": 0.410372 }, { "acc": 0.9111845, "epoch": 1.3654455365120624, "grad_norm": 17.97559356689453, "learning_rate": 7.853427135683264e-06, "loss": 0.47460642, "memory(GiB)": 34.88, "step": 50430, "train_speed(iter/s)": 0.410374 }, { "acc": 0.87231007, "epoch": 1.365580916795278, "grad_norm": 10.878119468688965, "learning_rate": 7.852967604915688e-06, "loss": 0.72196012, "memory(GiB)": 34.88, "step": 50435, "train_speed(iter/s)": 0.410376 }, { "acc": 0.87642403, "epoch": 1.3657162970784935, "grad_norm": 12.9032564163208, "learning_rate": 7.852508038415351e-06, "loss": 0.76252551, "memory(GiB)": 34.88, "step": 50440, "train_speed(iter/s)": 0.410379 }, { "acc": 0.90267239, "epoch": 1.365851677361709, "grad_norm": 8.333932876586914, "learning_rate": 7.852048436188007e-06, "loss": 0.51457968, "memory(GiB)": 34.88, "step": 50445, "train_speed(iter/s)": 0.410381 }, { "acc": 0.87802429, "epoch": 1.3659870576449247, "grad_norm": 8.887232780456543, "learning_rate": 7.85158879823942e-06, "loss": 0.61398492, "memory(GiB)": 34.88, "step": 50450, "train_speed(iter/s)": 0.410383 }, { "acc": 0.90570259, "epoch": 1.3661224379281403, "grad_norm": 7.48096227645874, "learning_rate": 7.851129124575337e-06, "loss": 0.46922817, "memory(GiB)": 34.88, "step": 50455, "train_speed(iter/s)": 0.410385 }, { "acc": 0.91740627, "epoch": 1.3662578182113556, "grad_norm": 10.610736846923828, "learning_rate": 7.850669415201527e-06, "loss": 0.39109395, "memory(GiB)": 34.88, "step": 50460, "train_speed(iter/s)": 0.410388 }, { "acc": 0.88795986, "epoch": 1.3663931984945712, "grad_norm": 7.362958908081055, "learning_rate": 7.850209670123743e-06, "loss": 0.54919157, "memory(GiB)": 34.88, "step": 50465, "train_speed(iter/s)": 0.41039 }, { "acc": 0.91958923, "epoch": 1.3665285787777868, "grad_norm": 14.816308975219727, "learning_rate": 7.849749889347746e-06, "loss": 0.42913747, "memory(GiB)": 34.88, "step": 50470, "train_speed(iter/s)": 0.410392 }, { "acc": 0.88421278, "epoch": 1.3666639590610024, "grad_norm": 12.34233283996582, "learning_rate": 7.849290072879295e-06, "loss": 0.61673374, "memory(GiB)": 34.88, "step": 50475, "train_speed(iter/s)": 0.410395 }, { "acc": 0.87829609, "epoch": 1.366799339344218, "grad_norm": 12.288139343261719, "learning_rate": 7.84883022072415e-06, "loss": 0.61047053, "memory(GiB)": 34.88, "step": 50480, "train_speed(iter/s)": 0.410397 }, { "acc": 0.89473343, "epoch": 1.3669347196274335, "grad_norm": 9.518983840942383, "learning_rate": 7.848370332888074e-06, "loss": 0.6227109, "memory(GiB)": 34.88, "step": 50485, "train_speed(iter/s)": 0.410399 }, { "acc": 0.9067728, "epoch": 1.367070099910649, "grad_norm": 4.182662010192871, "learning_rate": 7.847910409376824e-06, "loss": 0.44504814, "memory(GiB)": 34.88, "step": 50490, "train_speed(iter/s)": 0.410401 }, { "acc": 0.91124325, "epoch": 1.3672054801938645, "grad_norm": 8.04407024383545, "learning_rate": 7.847450450196165e-06, "loss": 0.54067268, "memory(GiB)": 34.88, "step": 50495, "train_speed(iter/s)": 0.410403 }, { "acc": 0.88947744, "epoch": 1.3673408604770803, "grad_norm": 9.702913284301758, "learning_rate": 7.846990455351857e-06, "loss": 0.56122494, "memory(GiB)": 34.88, "step": 50500, "train_speed(iter/s)": 0.410405 }, { "acc": 0.89839134, "epoch": 1.3674762407602956, "grad_norm": 4.748228549957275, "learning_rate": 7.846530424849662e-06, "loss": 0.64389114, "memory(GiB)": 34.88, "step": 50505, "train_speed(iter/s)": 0.410408 }, { "acc": 0.90484667, "epoch": 1.3676116210435112, "grad_norm": 4.518547534942627, "learning_rate": 7.846070358695345e-06, "loss": 0.50691833, "memory(GiB)": 34.88, "step": 50510, "train_speed(iter/s)": 0.41041 }, { "acc": 0.88798943, "epoch": 1.3677470013267268, "grad_norm": 9.284289360046387, "learning_rate": 7.845610256894667e-06, "loss": 0.59238477, "memory(GiB)": 34.88, "step": 50515, "train_speed(iter/s)": 0.410412 }, { "acc": 0.89094591, "epoch": 1.3678823816099424, "grad_norm": 6.952794075012207, "learning_rate": 7.845150119453391e-06, "loss": 0.60717382, "memory(GiB)": 34.88, "step": 50520, "train_speed(iter/s)": 0.410415 }, { "acc": 0.8859972, "epoch": 1.368017761893158, "grad_norm": 6.175424098968506, "learning_rate": 7.844689946377284e-06, "loss": 0.60064936, "memory(GiB)": 34.88, "step": 50525, "train_speed(iter/s)": 0.410417 }, { "acc": 0.89059391, "epoch": 1.3681531421763735, "grad_norm": 7.186837196350098, "learning_rate": 7.84422973767211e-06, "loss": 0.52030578, "memory(GiB)": 34.88, "step": 50530, "train_speed(iter/s)": 0.410419 }, { "acc": 0.89697037, "epoch": 1.368288522459589, "grad_norm": 8.092758178710938, "learning_rate": 7.843769493343631e-06, "loss": 0.54653149, "memory(GiB)": 34.88, "step": 50535, "train_speed(iter/s)": 0.410421 }, { "acc": 0.90501156, "epoch": 1.3684239027428045, "grad_norm": 7.539225101470947, "learning_rate": 7.843309213397614e-06, "loss": 0.56977067, "memory(GiB)": 34.88, "step": 50540, "train_speed(iter/s)": 0.410423 }, { "acc": 0.88855352, "epoch": 1.36855928302602, "grad_norm": 9.466296195983887, "learning_rate": 7.842848897839828e-06, "loss": 0.62570667, "memory(GiB)": 34.88, "step": 50545, "train_speed(iter/s)": 0.410425 }, { "acc": 0.86744595, "epoch": 1.3686946633092356, "grad_norm": 12.989184379577637, "learning_rate": 7.842388546676037e-06, "loss": 0.64915295, "memory(GiB)": 34.88, "step": 50550, "train_speed(iter/s)": 0.410427 }, { "acc": 0.90376091, "epoch": 1.3688300435924512, "grad_norm": 10.444252967834473, "learning_rate": 7.841928159912004e-06, "loss": 0.43140392, "memory(GiB)": 34.88, "step": 50555, "train_speed(iter/s)": 0.410429 }, { "acc": 0.90117798, "epoch": 1.3689654238756668, "grad_norm": 7.45375919342041, "learning_rate": 7.841467737553504e-06, "loss": 0.38310716, "memory(GiB)": 34.88, "step": 50560, "train_speed(iter/s)": 0.410432 }, { "acc": 0.89202747, "epoch": 1.3691008041588824, "grad_norm": 7.5553789138793945, "learning_rate": 7.841007279606298e-06, "loss": 0.52197948, "memory(GiB)": 34.88, "step": 50565, "train_speed(iter/s)": 0.410434 }, { "acc": 0.90179768, "epoch": 1.369236184442098, "grad_norm": 8.660561561584473, "learning_rate": 7.840546786076158e-06, "loss": 0.51406002, "memory(GiB)": 34.88, "step": 50570, "train_speed(iter/s)": 0.410436 }, { "acc": 0.89648743, "epoch": 1.3693715647253133, "grad_norm": 21.368934631347656, "learning_rate": 7.84008625696885e-06, "loss": 0.60586443, "memory(GiB)": 34.88, "step": 50575, "train_speed(iter/s)": 0.410438 }, { "acc": 0.88490601, "epoch": 1.3695069450085289, "grad_norm": 16.68216323852539, "learning_rate": 7.839625692290147e-06, "loss": 0.59503107, "memory(GiB)": 34.88, "step": 50580, "train_speed(iter/s)": 0.410441 }, { "acc": 0.89360046, "epoch": 1.3696423252917445, "grad_norm": 11.405237197875977, "learning_rate": 7.839165092045812e-06, "loss": 0.55967655, "memory(GiB)": 34.88, "step": 50585, "train_speed(iter/s)": 0.410443 }, { "acc": 0.88439026, "epoch": 1.36977770557496, "grad_norm": 6.473206520080566, "learning_rate": 7.838704456241623e-06, "loss": 0.55880055, "memory(GiB)": 34.88, "step": 50590, "train_speed(iter/s)": 0.410445 }, { "acc": 0.88692665, "epoch": 1.3699130858581756, "grad_norm": 5.270341396331787, "learning_rate": 7.838243784883343e-06, "loss": 0.54306536, "memory(GiB)": 34.88, "step": 50595, "train_speed(iter/s)": 0.410447 }, { "acc": 0.87365828, "epoch": 1.3700484661413912, "grad_norm": 14.9976167678833, "learning_rate": 7.837783077976747e-06, "loss": 0.70044637, "memory(GiB)": 34.88, "step": 50600, "train_speed(iter/s)": 0.410449 }, { "acc": 0.91255245, "epoch": 1.3701838464246068, "grad_norm": 5.200503826141357, "learning_rate": 7.837322335527606e-06, "loss": 0.45814719, "memory(GiB)": 34.88, "step": 50605, "train_speed(iter/s)": 0.410451 }, { "acc": 0.89295349, "epoch": 1.3703192267078221, "grad_norm": 6.390551567077637, "learning_rate": 7.83686155754169e-06, "loss": 0.56211758, "memory(GiB)": 34.88, "step": 50610, "train_speed(iter/s)": 0.410453 }, { "acc": 0.9052496, "epoch": 1.370454606991038, "grad_norm": 8.86761474609375, "learning_rate": 7.83640074402477e-06, "loss": 0.53676758, "memory(GiB)": 34.88, "step": 50615, "train_speed(iter/s)": 0.410455 }, { "acc": 0.89559984, "epoch": 1.3705899872742533, "grad_norm": 4.572853088378906, "learning_rate": 7.835939894982624e-06, "loss": 0.56275864, "memory(GiB)": 34.88, "step": 50620, "train_speed(iter/s)": 0.410458 }, { "acc": 0.87996178, "epoch": 1.3707253675574689, "grad_norm": 8.419262886047363, "learning_rate": 7.835479010421022e-06, "loss": 0.75765276, "memory(GiB)": 34.88, "step": 50625, "train_speed(iter/s)": 0.41046 }, { "acc": 0.89199333, "epoch": 1.3708607478406845, "grad_norm": 25.632265090942383, "learning_rate": 7.835018090345735e-06, "loss": 0.64624786, "memory(GiB)": 34.88, "step": 50630, "train_speed(iter/s)": 0.410462 }, { "acc": 0.89211826, "epoch": 1.3709961281239, "grad_norm": 6.760631084442139, "learning_rate": 7.83455713476254e-06, "loss": 0.54569812, "memory(GiB)": 34.88, "step": 50635, "train_speed(iter/s)": 0.410464 }, { "acc": 0.90862093, "epoch": 1.3711315084071156, "grad_norm": 7.021582126617432, "learning_rate": 7.83409614367721e-06, "loss": 0.49534035, "memory(GiB)": 34.88, "step": 50640, "train_speed(iter/s)": 0.410466 }, { "acc": 0.89609547, "epoch": 1.3712668886903312, "grad_norm": 12.49400520324707, "learning_rate": 7.83363511709552e-06, "loss": 0.58053856, "memory(GiB)": 34.88, "step": 50645, "train_speed(iter/s)": 0.410468 }, { "acc": 0.87876749, "epoch": 1.3714022689735468, "grad_norm": 10.603145599365234, "learning_rate": 7.833174055023245e-06, "loss": 0.7501647, "memory(GiB)": 34.88, "step": 50650, "train_speed(iter/s)": 0.410471 }, { "acc": 0.91331511, "epoch": 1.3715376492567621, "grad_norm": 9.771459579467773, "learning_rate": 7.832712957466164e-06, "loss": 0.49060698, "memory(GiB)": 34.88, "step": 50655, "train_speed(iter/s)": 0.410473 }, { "acc": 0.87943783, "epoch": 1.3716730295399777, "grad_norm": 8.714702606201172, "learning_rate": 7.83225182443005e-06, "loss": 0.59664679, "memory(GiB)": 34.88, "step": 50660, "train_speed(iter/s)": 0.410475 }, { "acc": 0.88568192, "epoch": 1.3718084098231933, "grad_norm": 6.928164005279541, "learning_rate": 7.831790655920677e-06, "loss": 0.64195995, "memory(GiB)": 34.88, "step": 50665, "train_speed(iter/s)": 0.410477 }, { "acc": 0.89101734, "epoch": 1.3719437901064089, "grad_norm": 5.413261890411377, "learning_rate": 7.831329451943828e-06, "loss": 0.61040163, "memory(GiB)": 34.88, "step": 50670, "train_speed(iter/s)": 0.41048 }, { "acc": 0.88790112, "epoch": 1.3720791703896245, "grad_norm": 13.562106132507324, "learning_rate": 7.830868212505276e-06, "loss": 0.61492414, "memory(GiB)": 34.88, "step": 50675, "train_speed(iter/s)": 0.410482 }, { "acc": 0.88680353, "epoch": 1.37221455067284, "grad_norm": 7.703670024871826, "learning_rate": 7.8304069376108e-06, "loss": 0.61008444, "memory(GiB)": 34.88, "step": 50680, "train_speed(iter/s)": 0.410484 }, { "acc": 0.89682455, "epoch": 1.3723499309560556, "grad_norm": 7.954565525054932, "learning_rate": 7.829945627266179e-06, "loss": 0.52508869, "memory(GiB)": 34.88, "step": 50685, "train_speed(iter/s)": 0.410486 }, { "acc": 0.89956646, "epoch": 1.372485311239271, "grad_norm": 8.888897895812988, "learning_rate": 7.829484281477193e-06, "loss": 0.5059845, "memory(GiB)": 34.88, "step": 50690, "train_speed(iter/s)": 0.410488 }, { "acc": 0.90549526, "epoch": 1.3726206915224868, "grad_norm": 6.032255172729492, "learning_rate": 7.82902290024962e-06, "loss": 0.48399839, "memory(GiB)": 34.88, "step": 50695, "train_speed(iter/s)": 0.410491 }, { "acc": 0.88640575, "epoch": 1.3727560718057021, "grad_norm": 8.923982620239258, "learning_rate": 7.82856148358924e-06, "loss": 0.64424858, "memory(GiB)": 34.88, "step": 50700, "train_speed(iter/s)": 0.410493 }, { "acc": 0.91473513, "epoch": 1.3728914520889177, "grad_norm": 8.394447326660156, "learning_rate": 7.828100031501832e-06, "loss": 0.44216413, "memory(GiB)": 34.88, "step": 50705, "train_speed(iter/s)": 0.410496 }, { "acc": 0.89362535, "epoch": 1.3730268323721333, "grad_norm": 21.651660919189453, "learning_rate": 7.827638543993175e-06, "loss": 0.55519361, "memory(GiB)": 34.88, "step": 50710, "train_speed(iter/s)": 0.410498 }, { "acc": 0.89313517, "epoch": 1.3731622126553489, "grad_norm": 7.7887420654296875, "learning_rate": 7.827177021069054e-06, "loss": 0.54198246, "memory(GiB)": 34.88, "step": 50715, "train_speed(iter/s)": 0.4105 }, { "acc": 0.90303497, "epoch": 1.3732975929385645, "grad_norm": 13.255433082580566, "learning_rate": 7.826715462735251e-06, "loss": 0.50869365, "memory(GiB)": 34.88, "step": 50720, "train_speed(iter/s)": 0.410502 }, { "acc": 0.88827772, "epoch": 1.37343297322178, "grad_norm": 9.215597152709961, "learning_rate": 7.826253868997543e-06, "loss": 0.62576723, "memory(GiB)": 34.88, "step": 50725, "train_speed(iter/s)": 0.410505 }, { "acc": 0.89775801, "epoch": 1.3735683535049956, "grad_norm": 13.392463684082031, "learning_rate": 7.825792239861716e-06, "loss": 0.62049894, "memory(GiB)": 34.88, "step": 50730, "train_speed(iter/s)": 0.410507 }, { "acc": 0.89790325, "epoch": 1.373703733788211, "grad_norm": 9.454721450805664, "learning_rate": 7.825330575333553e-06, "loss": 0.49871655, "memory(GiB)": 34.88, "step": 50735, "train_speed(iter/s)": 0.410509 }, { "acc": 0.88349028, "epoch": 1.3738391140714266, "grad_norm": 11.793089866638184, "learning_rate": 7.824868875418837e-06, "loss": 0.71485319, "memory(GiB)": 34.88, "step": 50740, "train_speed(iter/s)": 0.410511 }, { "acc": 0.91846714, "epoch": 1.3739744943546421, "grad_norm": 4.976813793182373, "learning_rate": 7.82440714012335e-06, "loss": 0.41963549, "memory(GiB)": 34.88, "step": 50745, "train_speed(iter/s)": 0.410513 }, { "acc": 0.9039607, "epoch": 1.3741098746378577, "grad_norm": 13.692585945129395, "learning_rate": 7.823945369452876e-06, "loss": 0.43996305, "memory(GiB)": 34.88, "step": 50750, "train_speed(iter/s)": 0.410516 }, { "acc": 0.87646294, "epoch": 1.3742452549210733, "grad_norm": 11.167536735534668, "learning_rate": 7.8234835634132e-06, "loss": 0.61923428, "memory(GiB)": 34.88, "step": 50755, "train_speed(iter/s)": 0.410518 }, { "acc": 0.87695408, "epoch": 1.3743806352042889, "grad_norm": 12.188100814819336, "learning_rate": 7.82302172201011e-06, "loss": 0.71557665, "memory(GiB)": 34.88, "step": 50760, "train_speed(iter/s)": 0.41052 }, { "acc": 0.90711422, "epoch": 1.3745160154875045, "grad_norm": 9.723535537719727, "learning_rate": 7.822559845249389e-06, "loss": 0.52191706, "memory(GiB)": 34.88, "step": 50765, "train_speed(iter/s)": 0.410522 }, { "acc": 0.89041595, "epoch": 1.3746513957707198, "grad_norm": 14.47048282623291, "learning_rate": 7.822097933136822e-06, "loss": 0.58962641, "memory(GiB)": 34.88, "step": 50770, "train_speed(iter/s)": 0.410524 }, { "acc": 0.90890703, "epoch": 1.3747867760539356, "grad_norm": 27.299776077270508, "learning_rate": 7.821635985678196e-06, "loss": 0.43821211, "memory(GiB)": 34.88, "step": 50775, "train_speed(iter/s)": 0.410526 }, { "acc": 0.8841218, "epoch": 1.374922156337151, "grad_norm": 7.685687065124512, "learning_rate": 7.8211740028793e-06, "loss": 0.55982695, "memory(GiB)": 34.88, "step": 50780, "train_speed(iter/s)": 0.410529 }, { "acc": 0.92809544, "epoch": 1.3750575366203666, "grad_norm": 6.044332504272461, "learning_rate": 7.82071198474592e-06, "loss": 0.36679142, "memory(GiB)": 34.88, "step": 50785, "train_speed(iter/s)": 0.410531 }, { "acc": 0.88704462, "epoch": 1.3751929169035821, "grad_norm": 6.955505847930908, "learning_rate": 7.820249931283839e-06, "loss": 0.70515432, "memory(GiB)": 34.88, "step": 50790, "train_speed(iter/s)": 0.410533 }, { "acc": 0.89515266, "epoch": 1.3753282971867977, "grad_norm": 13.559789657592773, "learning_rate": 7.819787842498853e-06, "loss": 0.54723845, "memory(GiB)": 34.88, "step": 50795, "train_speed(iter/s)": 0.410535 }, { "acc": 0.88354797, "epoch": 1.3754636774700133, "grad_norm": 8.321163177490234, "learning_rate": 7.819325718396747e-06, "loss": 0.70922832, "memory(GiB)": 34.88, "step": 50800, "train_speed(iter/s)": 0.410538 }, { "acc": 0.90135441, "epoch": 1.3755990577532289, "grad_norm": 3.463498592376709, "learning_rate": 7.818863558983308e-06, "loss": 0.52582006, "memory(GiB)": 34.88, "step": 50805, "train_speed(iter/s)": 0.41054 }, { "acc": 0.90884914, "epoch": 1.3757344380364445, "grad_norm": 6.05127477645874, "learning_rate": 7.81840136426433e-06, "loss": 0.58221436, "memory(GiB)": 34.88, "step": 50810, "train_speed(iter/s)": 0.410542 }, { "acc": 0.89686794, "epoch": 1.3758698183196598, "grad_norm": 19.187713623046875, "learning_rate": 7.817939134245598e-06, "loss": 0.60269947, "memory(GiB)": 34.88, "step": 50815, "train_speed(iter/s)": 0.410544 }, { "acc": 0.90509338, "epoch": 1.3760051986028754, "grad_norm": 5.220651626586914, "learning_rate": 7.817476868932905e-06, "loss": 0.4791965, "memory(GiB)": 34.88, "step": 50820, "train_speed(iter/s)": 0.410546 }, { "acc": 0.90157032, "epoch": 1.376140578886091, "grad_norm": 15.086231231689453, "learning_rate": 7.817014568332041e-06, "loss": 0.52477345, "memory(GiB)": 34.88, "step": 50825, "train_speed(iter/s)": 0.410549 }, { "acc": 0.91055641, "epoch": 1.3762759591693066, "grad_norm": 9.618184089660645, "learning_rate": 7.816552232448798e-06, "loss": 0.4518847, "memory(GiB)": 34.88, "step": 50830, "train_speed(iter/s)": 0.410551 }, { "acc": 0.88445549, "epoch": 1.3764113394525221, "grad_norm": 9.798823356628418, "learning_rate": 7.816089861288969e-06, "loss": 0.5540318, "memory(GiB)": 34.88, "step": 50835, "train_speed(iter/s)": 0.410553 }, { "acc": 0.9003336, "epoch": 1.3765467197357377, "grad_norm": 4.821801662445068, "learning_rate": 7.815627454858342e-06, "loss": 0.53907514, "memory(GiB)": 34.88, "step": 50840, "train_speed(iter/s)": 0.410555 }, { "acc": 0.87641468, "epoch": 1.3766821000189533, "grad_norm": 11.745224952697754, "learning_rate": 7.815165013162713e-06, "loss": 0.59359298, "memory(GiB)": 34.88, "step": 50845, "train_speed(iter/s)": 0.410558 }, { "acc": 0.88651047, "epoch": 1.3768174803021687, "grad_norm": 15.187320709228516, "learning_rate": 7.814702536207874e-06, "loss": 0.5939992, "memory(GiB)": 34.88, "step": 50850, "train_speed(iter/s)": 0.41056 }, { "acc": 0.88463879, "epoch": 1.3769528605853845, "grad_norm": 6.847804546356201, "learning_rate": 7.814240023999616e-06, "loss": 0.57136774, "memory(GiB)": 34.88, "step": 50855, "train_speed(iter/s)": 0.410563 }, { "acc": 0.87499847, "epoch": 1.3770882408685998, "grad_norm": 8.262794494628906, "learning_rate": 7.813777476543736e-06, "loss": 0.64915771, "memory(GiB)": 34.88, "step": 50860, "train_speed(iter/s)": 0.410565 }, { "acc": 0.9138176, "epoch": 1.3772236211518154, "grad_norm": 6.869470119476318, "learning_rate": 7.81331489384603e-06, "loss": 0.53221807, "memory(GiB)": 34.88, "step": 50865, "train_speed(iter/s)": 0.410567 }, { "acc": 0.86386433, "epoch": 1.377359001435031, "grad_norm": 15.696683883666992, "learning_rate": 7.81285227591229e-06, "loss": 0.78368139, "memory(GiB)": 34.88, "step": 50870, "train_speed(iter/s)": 0.410569 }, { "acc": 0.89088955, "epoch": 1.3774943817182466, "grad_norm": 13.069172859191895, "learning_rate": 7.812389622748311e-06, "loss": 0.60535259, "memory(GiB)": 34.88, "step": 50875, "train_speed(iter/s)": 0.410571 }, { "acc": 0.89361868, "epoch": 1.3776297620014621, "grad_norm": 12.558976173400879, "learning_rate": 7.811926934359888e-06, "loss": 0.58159456, "memory(GiB)": 34.88, "step": 50880, "train_speed(iter/s)": 0.410573 }, { "acc": 0.88644037, "epoch": 1.3777651422846777, "grad_norm": 13.82315444946289, "learning_rate": 7.811464210752817e-06, "loss": 0.63796005, "memory(GiB)": 34.88, "step": 50885, "train_speed(iter/s)": 0.410575 }, { "acc": 0.88936348, "epoch": 1.3779005225678933, "grad_norm": 3.904845952987671, "learning_rate": 7.811001451932897e-06, "loss": 0.55738754, "memory(GiB)": 34.88, "step": 50890, "train_speed(iter/s)": 0.410577 }, { "acc": 0.91371994, "epoch": 1.3780359028511087, "grad_norm": 3.7073123455047607, "learning_rate": 7.810538657905925e-06, "loss": 0.38420768, "memory(GiB)": 34.88, "step": 50895, "train_speed(iter/s)": 0.41058 }, { "acc": 0.88444138, "epoch": 1.3781712831343242, "grad_norm": 31.473310470581055, "learning_rate": 7.810075828677697e-06, "loss": 0.58673639, "memory(GiB)": 34.88, "step": 50900, "train_speed(iter/s)": 0.410582 }, { "acc": 0.89336319, "epoch": 1.3783066634175398, "grad_norm": 8.276079177856445, "learning_rate": 7.809612964254008e-06, "loss": 0.60613327, "memory(GiB)": 34.88, "step": 50905, "train_speed(iter/s)": 0.410584 }, { "acc": 0.90679111, "epoch": 1.3784420437007554, "grad_norm": 9.50003719329834, "learning_rate": 7.809150064640663e-06, "loss": 0.4413043, "memory(GiB)": 34.88, "step": 50910, "train_speed(iter/s)": 0.410587 }, { "acc": 0.9057744, "epoch": 1.378577423983971, "grad_norm": 12.33259391784668, "learning_rate": 7.808687129843456e-06, "loss": 0.42787766, "memory(GiB)": 34.88, "step": 50915, "train_speed(iter/s)": 0.410589 }, { "acc": 0.90674534, "epoch": 1.3787128042671866, "grad_norm": 5.886458873748779, "learning_rate": 7.808224159868185e-06, "loss": 0.4558723, "memory(GiB)": 34.88, "step": 50920, "train_speed(iter/s)": 0.410591 }, { "acc": 0.88980417, "epoch": 1.3788481845504021, "grad_norm": 8.80009937286377, "learning_rate": 7.807761154720653e-06, "loss": 0.58157988, "memory(GiB)": 34.88, "step": 50925, "train_speed(iter/s)": 0.410593 }, { "acc": 0.89204321, "epoch": 1.3789835648336175, "grad_norm": 7.486247539520264, "learning_rate": 7.80729811440666e-06, "loss": 0.62954035, "memory(GiB)": 34.88, "step": 50930, "train_speed(iter/s)": 0.410596 }, { "acc": 0.88311691, "epoch": 1.3791189451168333, "grad_norm": 11.350421905517578, "learning_rate": 7.806835038932003e-06, "loss": 0.62689257, "memory(GiB)": 34.88, "step": 50935, "train_speed(iter/s)": 0.410598 }, { "acc": 0.90359306, "epoch": 1.3792543254000487, "grad_norm": 7.42994499206543, "learning_rate": 7.806371928302487e-06, "loss": 0.54541717, "memory(GiB)": 34.88, "step": 50940, "train_speed(iter/s)": 0.4106 }, { "acc": 0.89292488, "epoch": 1.3793897056832642, "grad_norm": 8.505350112915039, "learning_rate": 7.805908782523912e-06, "loss": 0.62682939, "memory(GiB)": 34.88, "step": 50945, "train_speed(iter/s)": 0.410603 }, { "acc": 0.89729805, "epoch": 1.3795250859664798, "grad_norm": 10.031237602233887, "learning_rate": 7.805445601602077e-06, "loss": 0.50719728, "memory(GiB)": 34.88, "step": 50950, "train_speed(iter/s)": 0.410605 }, { "acc": 0.88813763, "epoch": 1.3796604662496954, "grad_norm": 23.72296905517578, "learning_rate": 7.804982385542789e-06, "loss": 0.56149378, "memory(GiB)": 34.88, "step": 50955, "train_speed(iter/s)": 0.410607 }, { "acc": 0.89024277, "epoch": 1.379795846532911, "grad_norm": 22.012012481689453, "learning_rate": 7.804519134351846e-06, "loss": 0.56114454, "memory(GiB)": 34.88, "step": 50960, "train_speed(iter/s)": 0.410609 }, { "acc": 0.90343657, "epoch": 1.3799312268161266, "grad_norm": 6.652402877807617, "learning_rate": 7.804055848035056e-06, "loss": 0.4459393, "memory(GiB)": 34.88, "step": 50965, "train_speed(iter/s)": 0.410611 }, { "acc": 0.90001793, "epoch": 1.3800666070993421, "grad_norm": 9.42949104309082, "learning_rate": 7.803592526598219e-06, "loss": 0.51054544, "memory(GiB)": 34.88, "step": 50970, "train_speed(iter/s)": 0.410614 }, { "acc": 0.90766621, "epoch": 1.3802019873825575, "grad_norm": 10.640623092651367, "learning_rate": 7.80312917004714e-06, "loss": 0.46372867, "memory(GiB)": 34.88, "step": 50975, "train_speed(iter/s)": 0.410616 }, { "acc": 0.88112068, "epoch": 1.380337367665773, "grad_norm": 14.365002632141113, "learning_rate": 7.802665778387623e-06, "loss": 0.69939575, "memory(GiB)": 34.88, "step": 50980, "train_speed(iter/s)": 0.410618 }, { "acc": 0.90529118, "epoch": 1.3804727479489887, "grad_norm": 6.501030445098877, "learning_rate": 7.802202351625472e-06, "loss": 0.53217163, "memory(GiB)": 34.88, "step": 50985, "train_speed(iter/s)": 0.41062 }, { "acc": 0.87586899, "epoch": 1.3806081282322042, "grad_norm": 19.192380905151367, "learning_rate": 7.801738889766497e-06, "loss": 0.61553822, "memory(GiB)": 34.88, "step": 50990, "train_speed(iter/s)": 0.410623 }, { "acc": 0.88956175, "epoch": 1.3807435085154198, "grad_norm": 7.655126094818115, "learning_rate": 7.801275392816499e-06, "loss": 0.66888685, "memory(GiB)": 34.88, "step": 50995, "train_speed(iter/s)": 0.410625 }, { "acc": 0.87714376, "epoch": 1.3808788887986354, "grad_norm": 10.283599853515625, "learning_rate": 7.800811860781286e-06, "loss": 0.74394007, "memory(GiB)": 34.88, "step": 51000, "train_speed(iter/s)": 0.410627 }, { "acc": 0.908144, "epoch": 1.381014269081851, "grad_norm": 8.100500106811523, "learning_rate": 7.800348293666663e-06, "loss": 0.40530486, "memory(GiB)": 34.88, "step": 51005, "train_speed(iter/s)": 0.410629 }, { "acc": 0.90796843, "epoch": 1.3811496493650663, "grad_norm": 17.550952911376953, "learning_rate": 7.799884691478439e-06, "loss": 0.38364456, "memory(GiB)": 34.88, "step": 51010, "train_speed(iter/s)": 0.410631 }, { "acc": 0.89360428, "epoch": 1.3812850296482821, "grad_norm": 6.324429988861084, "learning_rate": 7.799421054222422e-06, "loss": 0.51752234, "memory(GiB)": 34.88, "step": 51015, "train_speed(iter/s)": 0.410633 }, { "acc": 0.90008984, "epoch": 1.3814204099314975, "grad_norm": 7.902379989624023, "learning_rate": 7.798957381904418e-06, "loss": 0.52327204, "memory(GiB)": 34.88, "step": 51020, "train_speed(iter/s)": 0.410636 }, { "acc": 0.89922714, "epoch": 1.381555790214713, "grad_norm": 5.370937347412109, "learning_rate": 7.798493674530236e-06, "loss": 0.50836334, "memory(GiB)": 34.88, "step": 51025, "train_speed(iter/s)": 0.410638 }, { "acc": 0.88704109, "epoch": 1.3816911704979287, "grad_norm": 8.051703453063965, "learning_rate": 7.798029932105685e-06, "loss": 0.57980051, "memory(GiB)": 34.88, "step": 51030, "train_speed(iter/s)": 0.41064 }, { "acc": 0.8901679, "epoch": 1.3818265507811442, "grad_norm": 7.629434108734131, "learning_rate": 7.797566154636578e-06, "loss": 0.5921464, "memory(GiB)": 34.88, "step": 51035, "train_speed(iter/s)": 0.410642 }, { "acc": 0.89043684, "epoch": 1.3819619310643598, "grad_norm": 9.312070846557617, "learning_rate": 7.797102342128719e-06, "loss": 0.59085789, "memory(GiB)": 34.88, "step": 51040, "train_speed(iter/s)": 0.410645 }, { "acc": 0.90447512, "epoch": 1.3820973113475754, "grad_norm": 12.29801082611084, "learning_rate": 7.79663849458792e-06, "loss": 0.45538673, "memory(GiB)": 34.88, "step": 51045, "train_speed(iter/s)": 0.410647 }, { "acc": 0.88718081, "epoch": 1.382232691630791, "grad_norm": 6.579158782958984, "learning_rate": 7.79617461201999e-06, "loss": 0.62037258, "memory(GiB)": 34.88, "step": 51050, "train_speed(iter/s)": 0.410649 }, { "acc": 0.89422331, "epoch": 1.3823680719140063, "grad_norm": 7.58036470413208, "learning_rate": 7.795710694430742e-06, "loss": 0.50040188, "memory(GiB)": 34.88, "step": 51055, "train_speed(iter/s)": 0.410651 }, { "acc": 0.87910013, "epoch": 1.382503452197222, "grad_norm": 11.496025085449219, "learning_rate": 7.795246741825992e-06, "loss": 0.63439183, "memory(GiB)": 34.88, "step": 51060, "train_speed(iter/s)": 0.410653 }, { "acc": 0.90306044, "epoch": 1.3826388324804375, "grad_norm": 4.605968952178955, "learning_rate": 7.794782754211542e-06, "loss": 0.4684701, "memory(GiB)": 34.88, "step": 51065, "train_speed(iter/s)": 0.410655 }, { "acc": 0.91606646, "epoch": 1.382774212763653, "grad_norm": 8.53222370147705, "learning_rate": 7.794318731593212e-06, "loss": 0.3954711, "memory(GiB)": 34.88, "step": 51070, "train_speed(iter/s)": 0.410657 }, { "acc": 0.88901529, "epoch": 1.3829095930468687, "grad_norm": 5.587375164031982, "learning_rate": 7.793854673976816e-06, "loss": 0.59044504, "memory(GiB)": 34.88, "step": 51075, "train_speed(iter/s)": 0.410659 }, { "acc": 0.90951719, "epoch": 1.3830449733300842, "grad_norm": 9.516656875610352, "learning_rate": 7.793390581368158e-06, "loss": 0.47207503, "memory(GiB)": 34.88, "step": 51080, "train_speed(iter/s)": 0.410662 }, { "acc": 0.9026556, "epoch": 1.3831803536132998, "grad_norm": 4.936168193817139, "learning_rate": 7.792926453773062e-06, "loss": 0.52968464, "memory(GiB)": 34.88, "step": 51085, "train_speed(iter/s)": 0.410664 }, { "acc": 0.88189583, "epoch": 1.3833157338965152, "grad_norm": 10.154748916625977, "learning_rate": 7.792462291197335e-06, "loss": 0.59847713, "memory(GiB)": 34.88, "step": 51090, "train_speed(iter/s)": 0.410666 }, { "acc": 0.91216259, "epoch": 1.383451114179731, "grad_norm": 10.509418487548828, "learning_rate": 7.791998093646796e-06, "loss": 0.45419159, "memory(GiB)": 34.88, "step": 51095, "train_speed(iter/s)": 0.410668 }, { "acc": 0.86966066, "epoch": 1.3835864944629463, "grad_norm": 13.404866218566895, "learning_rate": 7.791533861127255e-06, "loss": 0.69070005, "memory(GiB)": 34.88, "step": 51100, "train_speed(iter/s)": 0.41067 }, { "acc": 0.89278908, "epoch": 1.383721874746162, "grad_norm": 17.803726196289062, "learning_rate": 7.791069593644533e-06, "loss": 0.56271353, "memory(GiB)": 34.88, "step": 51105, "train_speed(iter/s)": 0.410672 }, { "acc": 0.88972111, "epoch": 1.3838572550293775, "grad_norm": 5.494040489196777, "learning_rate": 7.790605291204444e-06, "loss": 0.59038467, "memory(GiB)": 34.88, "step": 51110, "train_speed(iter/s)": 0.410674 }, { "acc": 0.87462826, "epoch": 1.383992635312593, "grad_norm": 12.23538875579834, "learning_rate": 7.7901409538128e-06, "loss": 0.68892612, "memory(GiB)": 34.88, "step": 51115, "train_speed(iter/s)": 0.410676 }, { "acc": 0.8978363, "epoch": 1.3841280155958087, "grad_norm": 13.809700012207031, "learning_rate": 7.789676581475427e-06, "loss": 0.63135896, "memory(GiB)": 34.88, "step": 51120, "train_speed(iter/s)": 0.410679 }, { "acc": 0.90726814, "epoch": 1.3842633958790242, "grad_norm": 8.80404281616211, "learning_rate": 7.789212174198132e-06, "loss": 0.52900782, "memory(GiB)": 34.88, "step": 51125, "train_speed(iter/s)": 0.410681 }, { "acc": 0.88491354, "epoch": 1.3843987761622398, "grad_norm": 8.443045616149902, "learning_rate": 7.78874773198674e-06, "loss": 0.60845842, "memory(GiB)": 34.88, "step": 51130, "train_speed(iter/s)": 0.410683 }, { "acc": 0.90186024, "epoch": 1.3845341564454552, "grad_norm": 16.724279403686523, "learning_rate": 7.788283254847062e-06, "loss": 0.48897963, "memory(GiB)": 34.88, "step": 51135, "train_speed(iter/s)": 0.410685 }, { "acc": 0.90266991, "epoch": 1.3846695367286708, "grad_norm": 5.780263900756836, "learning_rate": 7.787818742784926e-06, "loss": 0.50395899, "memory(GiB)": 34.88, "step": 51140, "train_speed(iter/s)": 0.410688 }, { "acc": 0.88208761, "epoch": 1.3848049170118863, "grad_norm": 11.182638168334961, "learning_rate": 7.787354195806142e-06, "loss": 0.67467017, "memory(GiB)": 34.88, "step": 51145, "train_speed(iter/s)": 0.41069 }, { "acc": 0.8711153, "epoch": 1.384940297295102, "grad_norm": 10.330623626708984, "learning_rate": 7.786889613916536e-06, "loss": 0.66492033, "memory(GiB)": 34.88, "step": 51150, "train_speed(iter/s)": 0.410692 }, { "acc": 0.89741688, "epoch": 1.3850756775783175, "grad_norm": 9.752246856689453, "learning_rate": 7.786424997121922e-06, "loss": 0.55452204, "memory(GiB)": 34.88, "step": 51155, "train_speed(iter/s)": 0.410694 }, { "acc": 0.90308495, "epoch": 1.385211057861533, "grad_norm": 4.646870136260986, "learning_rate": 7.785960345428124e-06, "loss": 0.46728363, "memory(GiB)": 34.88, "step": 51160, "train_speed(iter/s)": 0.410696 }, { "acc": 0.90652695, "epoch": 1.3853464381447487, "grad_norm": 3.0545835494995117, "learning_rate": 7.785495658840963e-06, "loss": 0.46324463, "memory(GiB)": 34.88, "step": 51165, "train_speed(iter/s)": 0.410699 }, { "acc": 0.89382925, "epoch": 1.385481818427964, "grad_norm": 8.715717315673828, "learning_rate": 7.785030937366256e-06, "loss": 0.58007221, "memory(GiB)": 34.88, "step": 51170, "train_speed(iter/s)": 0.4107 }, { "acc": 0.88571301, "epoch": 1.3856171987111798, "grad_norm": 12.409981727600098, "learning_rate": 7.78456618100983e-06, "loss": 0.65132914, "memory(GiB)": 34.88, "step": 51175, "train_speed(iter/s)": 0.410703 }, { "acc": 0.89734325, "epoch": 1.3857525789943952, "grad_norm": 10.759941101074219, "learning_rate": 7.784101389777505e-06, "loss": 0.57969971, "memory(GiB)": 34.88, "step": 51180, "train_speed(iter/s)": 0.410705 }, { "acc": 0.89411926, "epoch": 1.3858879592776108, "grad_norm": 4.051016330718994, "learning_rate": 7.783636563675101e-06, "loss": 0.44727812, "memory(GiB)": 34.88, "step": 51185, "train_speed(iter/s)": 0.410707 }, { "acc": 0.88502474, "epoch": 1.3860233395608264, "grad_norm": 13.031404495239258, "learning_rate": 7.783171702708443e-06, "loss": 0.68810844, "memory(GiB)": 34.88, "step": 51190, "train_speed(iter/s)": 0.410709 }, { "acc": 0.90622807, "epoch": 1.386158719844042, "grad_norm": 14.306462287902832, "learning_rate": 7.782706806883354e-06, "loss": 0.47991695, "memory(GiB)": 34.88, "step": 51195, "train_speed(iter/s)": 0.410711 }, { "acc": 0.91461201, "epoch": 1.3862941001272575, "grad_norm": 6.384231090545654, "learning_rate": 7.782241876205657e-06, "loss": 0.42594514, "memory(GiB)": 34.88, "step": 51200, "train_speed(iter/s)": 0.410713 }, { "acc": 0.87402458, "epoch": 1.386429480410473, "grad_norm": 9.363616943359375, "learning_rate": 7.78177691068118e-06, "loss": 0.6566577, "memory(GiB)": 34.88, "step": 51205, "train_speed(iter/s)": 0.410716 }, { "acc": 0.88866653, "epoch": 1.3865648606936887, "grad_norm": 8.90698528289795, "learning_rate": 7.78131191031574e-06, "loss": 0.60571575, "memory(GiB)": 34.88, "step": 51210, "train_speed(iter/s)": 0.410718 }, { "acc": 0.89402227, "epoch": 1.386700240976904, "grad_norm": 8.454601287841797, "learning_rate": 7.780846875115172e-06, "loss": 0.5954946, "memory(GiB)": 34.88, "step": 51215, "train_speed(iter/s)": 0.410719 }, { "acc": 0.87191658, "epoch": 1.3868356212601196, "grad_norm": 5.827056884765625, "learning_rate": 7.780381805085291e-06, "loss": 0.66328077, "memory(GiB)": 34.88, "step": 51220, "train_speed(iter/s)": 0.410722 }, { "acc": 0.90194149, "epoch": 1.3869710015433352, "grad_norm": 5.092397689819336, "learning_rate": 7.779916700231931e-06, "loss": 0.49713678, "memory(GiB)": 34.88, "step": 51225, "train_speed(iter/s)": 0.410724 }, { "acc": 0.89172668, "epoch": 1.3871063818265508, "grad_norm": 13.802640914916992, "learning_rate": 7.779451560560914e-06, "loss": 0.62098818, "memory(GiB)": 34.88, "step": 51230, "train_speed(iter/s)": 0.410726 }, { "acc": 0.89542065, "epoch": 1.3872417621097664, "grad_norm": 7.976048946380615, "learning_rate": 7.778986386078068e-06, "loss": 0.60021744, "memory(GiB)": 34.88, "step": 51235, "train_speed(iter/s)": 0.410728 }, { "acc": 0.90576487, "epoch": 1.387377142392982, "grad_norm": 4.677953243255615, "learning_rate": 7.77852117678922e-06, "loss": 0.53663387, "memory(GiB)": 34.88, "step": 51240, "train_speed(iter/s)": 0.410731 }, { "acc": 0.9192008, "epoch": 1.3875125226761975, "grad_norm": 12.456300735473633, "learning_rate": 7.778055932700199e-06, "loss": 0.4730588, "memory(GiB)": 34.88, "step": 51245, "train_speed(iter/s)": 0.410733 }, { "acc": 0.88886633, "epoch": 1.3876479029594129, "grad_norm": 6.35309362411499, "learning_rate": 7.777590653816832e-06, "loss": 0.64861679, "memory(GiB)": 34.88, "step": 51250, "train_speed(iter/s)": 0.410735 }, { "acc": 0.87819471, "epoch": 1.3877832832426287, "grad_norm": 8.534676551818848, "learning_rate": 7.777125340144947e-06, "loss": 0.69344172, "memory(GiB)": 34.88, "step": 51255, "train_speed(iter/s)": 0.410737 }, { "acc": 0.90995407, "epoch": 1.387918663525844, "grad_norm": 8.786675453186035, "learning_rate": 7.776659991690371e-06, "loss": 0.54058342, "memory(GiB)": 34.88, "step": 51260, "train_speed(iter/s)": 0.410739 }, { "acc": 0.89908295, "epoch": 1.3880540438090596, "grad_norm": 11.416232109069824, "learning_rate": 7.776194608458939e-06, "loss": 0.51775646, "memory(GiB)": 34.88, "step": 51265, "train_speed(iter/s)": 0.410742 }, { "acc": 0.88427114, "epoch": 1.3881894240922752, "grad_norm": 11.635624885559082, "learning_rate": 7.77572919045648e-06, "loss": 0.61376915, "memory(GiB)": 34.88, "step": 51270, "train_speed(iter/s)": 0.410744 }, { "acc": 0.9238018, "epoch": 1.3883248043754908, "grad_norm": 4.833983421325684, "learning_rate": 7.775263737688819e-06, "loss": 0.38312063, "memory(GiB)": 34.88, "step": 51275, "train_speed(iter/s)": 0.410746 }, { "acc": 0.8848525, "epoch": 1.3884601846587064, "grad_norm": 9.488628387451172, "learning_rate": 7.774798250161789e-06, "loss": 0.67404833, "memory(GiB)": 34.88, "step": 51280, "train_speed(iter/s)": 0.410748 }, { "acc": 0.90224285, "epoch": 1.388595564941922, "grad_norm": 5.309837818145752, "learning_rate": 7.774332727881223e-06, "loss": 0.47927351, "memory(GiB)": 34.88, "step": 51285, "train_speed(iter/s)": 0.410751 }, { "acc": 0.8777277, "epoch": 1.3887309452251375, "grad_norm": 18.15584373474121, "learning_rate": 7.773867170852954e-06, "loss": 0.69950094, "memory(GiB)": 34.88, "step": 51290, "train_speed(iter/s)": 0.410753 }, { "acc": 0.88882141, "epoch": 1.3888663255083529, "grad_norm": 7.763003826141357, "learning_rate": 7.773401579082806e-06, "loss": 0.63659453, "memory(GiB)": 34.88, "step": 51295, "train_speed(iter/s)": 0.410755 }, { "acc": 0.90407534, "epoch": 1.3890017057915685, "grad_norm": 8.37415599822998, "learning_rate": 7.77293595257662e-06, "loss": 0.52259836, "memory(GiB)": 34.88, "step": 51300, "train_speed(iter/s)": 0.410757 }, { "acc": 0.90201893, "epoch": 1.389137086074784, "grad_norm": 10.652751922607422, "learning_rate": 7.772470291340228e-06, "loss": 0.52870026, "memory(GiB)": 34.88, "step": 51305, "train_speed(iter/s)": 0.41076 }, { "acc": 0.91032085, "epoch": 1.3892724663579996, "grad_norm": 5.341384410858154, "learning_rate": 7.772004595379459e-06, "loss": 0.46866179, "memory(GiB)": 34.88, "step": 51310, "train_speed(iter/s)": 0.410762 }, { "acc": 0.92128372, "epoch": 1.3894078466412152, "grad_norm": 9.607500076293945, "learning_rate": 7.77153886470015e-06, "loss": 0.47973413, "memory(GiB)": 34.88, "step": 51315, "train_speed(iter/s)": 0.410764 }, { "acc": 0.88884335, "epoch": 1.3895432269244308, "grad_norm": 4.596218585968018, "learning_rate": 7.771073099308132e-06, "loss": 0.56733775, "memory(GiB)": 34.88, "step": 51320, "train_speed(iter/s)": 0.410766 }, { "acc": 0.90733528, "epoch": 1.3896786072076464, "grad_norm": 6.457675933837891, "learning_rate": 7.770607299209243e-06, "loss": 0.48953314, "memory(GiB)": 34.88, "step": 51325, "train_speed(iter/s)": 0.410767 }, { "acc": 0.89305496, "epoch": 1.3898139874908617, "grad_norm": 5.479189395904541, "learning_rate": 7.770141464409318e-06, "loss": 0.52876186, "memory(GiB)": 34.88, "step": 51330, "train_speed(iter/s)": 0.410769 }, { "acc": 0.90519104, "epoch": 1.3899493677740775, "grad_norm": 13.180994033813477, "learning_rate": 7.769675594914191e-06, "loss": 0.55858054, "memory(GiB)": 34.88, "step": 51335, "train_speed(iter/s)": 0.410772 }, { "acc": 0.88790112, "epoch": 1.3900847480572929, "grad_norm": 14.065646171569824, "learning_rate": 7.769209690729698e-06, "loss": 0.63790951, "memory(GiB)": 34.88, "step": 51340, "train_speed(iter/s)": 0.410774 }, { "acc": 0.90072994, "epoch": 1.3902201283405085, "grad_norm": 7.92175817489624, "learning_rate": 7.768743751861672e-06, "loss": 0.57764602, "memory(GiB)": 34.88, "step": 51345, "train_speed(iter/s)": 0.410776 }, { "acc": 0.90210323, "epoch": 1.390355508623724, "grad_norm": 5.784627914428711, "learning_rate": 7.768277778315958e-06, "loss": 0.52211084, "memory(GiB)": 34.88, "step": 51350, "train_speed(iter/s)": 0.410779 }, { "acc": 0.90518589, "epoch": 1.3904908889069396, "grad_norm": 3.688988447189331, "learning_rate": 7.767811770098387e-06, "loss": 0.46369252, "memory(GiB)": 34.88, "step": 51355, "train_speed(iter/s)": 0.410781 }, { "acc": 0.87823219, "epoch": 1.3906262691901552, "grad_norm": 11.390876770019531, "learning_rate": 7.767345727214798e-06, "loss": 0.72985067, "memory(GiB)": 34.88, "step": 51360, "train_speed(iter/s)": 0.410783 }, { "acc": 0.89028549, "epoch": 1.3907616494733708, "grad_norm": 5.1101765632629395, "learning_rate": 7.76687964967103e-06, "loss": 0.56858082, "memory(GiB)": 34.88, "step": 51365, "train_speed(iter/s)": 0.410785 }, { "acc": 0.91059723, "epoch": 1.3908970297565864, "grad_norm": 7.445705890655518, "learning_rate": 7.766413537472922e-06, "loss": 0.41938376, "memory(GiB)": 34.88, "step": 51370, "train_speed(iter/s)": 0.410787 }, { "acc": 0.89284735, "epoch": 1.3910324100398017, "grad_norm": 6.3080291748046875, "learning_rate": 7.765947390626311e-06, "loss": 0.56085024, "memory(GiB)": 34.88, "step": 51375, "train_speed(iter/s)": 0.410789 }, { "acc": 0.90953588, "epoch": 1.3911677903230173, "grad_norm": 9.185355186462402, "learning_rate": 7.765481209137036e-06, "loss": 0.48466415, "memory(GiB)": 34.88, "step": 51380, "train_speed(iter/s)": 0.410792 }, { "acc": 0.8931385, "epoch": 1.3913031706062329, "grad_norm": 10.924220085144043, "learning_rate": 7.76501499301094e-06, "loss": 0.52828813, "memory(GiB)": 34.88, "step": 51385, "train_speed(iter/s)": 0.410794 }, { "acc": 0.88970013, "epoch": 1.3914385508894485, "grad_norm": 8.708784103393555, "learning_rate": 7.764548742253862e-06, "loss": 0.577388, "memory(GiB)": 34.88, "step": 51390, "train_speed(iter/s)": 0.410796 }, { "acc": 0.88419113, "epoch": 1.391573931172664, "grad_norm": 5.1594014167785645, "learning_rate": 7.76408245687164e-06, "loss": 0.60399971, "memory(GiB)": 34.88, "step": 51395, "train_speed(iter/s)": 0.410798 }, { "acc": 0.8962698, "epoch": 1.3917093114558796, "grad_norm": 11.681214332580566, "learning_rate": 7.76361613687012e-06, "loss": 0.4795476, "memory(GiB)": 34.88, "step": 51400, "train_speed(iter/s)": 0.410801 }, { "acc": 0.8942482, "epoch": 1.3918446917390952, "grad_norm": 9.794008255004883, "learning_rate": 7.763149782255138e-06, "loss": 0.60961466, "memory(GiB)": 34.88, "step": 51405, "train_speed(iter/s)": 0.410803 }, { "acc": 0.89954185, "epoch": 1.3919800720223106, "grad_norm": 4.648960113525391, "learning_rate": 7.762683393032538e-06, "loss": 0.53885221, "memory(GiB)": 34.88, "step": 51410, "train_speed(iter/s)": 0.410805 }, { "acc": 0.91629753, "epoch": 1.3921154523055264, "grad_norm": 7.050252914428711, "learning_rate": 7.762216969208167e-06, "loss": 0.39162755, "memory(GiB)": 34.88, "step": 51415, "train_speed(iter/s)": 0.410807 }, { "acc": 0.88657093, "epoch": 1.3922508325887417, "grad_norm": 4.87697696685791, "learning_rate": 7.761750510787863e-06, "loss": 0.66578698, "memory(GiB)": 34.88, "step": 51420, "train_speed(iter/s)": 0.410809 }, { "acc": 0.88394871, "epoch": 1.3923862128719573, "grad_norm": 13.839588165283203, "learning_rate": 7.76128401777747e-06, "loss": 0.64276886, "memory(GiB)": 34.88, "step": 51425, "train_speed(iter/s)": 0.410811 }, { "acc": 0.90145397, "epoch": 1.3925215931551729, "grad_norm": 7.003293037414551, "learning_rate": 7.760817490182832e-06, "loss": 0.52579112, "memory(GiB)": 34.88, "step": 51430, "train_speed(iter/s)": 0.410813 }, { "acc": 0.89060631, "epoch": 1.3926569734383885, "grad_norm": 16.010257720947266, "learning_rate": 7.760350928009793e-06, "loss": 0.60967989, "memory(GiB)": 34.88, "step": 51435, "train_speed(iter/s)": 0.410815 }, { "acc": 0.88800869, "epoch": 1.392792353721604, "grad_norm": 5.148932933807373, "learning_rate": 7.759884331264199e-06, "loss": 0.60503411, "memory(GiB)": 34.88, "step": 51440, "train_speed(iter/s)": 0.410818 }, { "acc": 0.90041065, "epoch": 1.3929277340048196, "grad_norm": 7.670952320098877, "learning_rate": 7.759417699951894e-06, "loss": 0.54682159, "memory(GiB)": 34.88, "step": 51445, "train_speed(iter/s)": 0.41082 }, { "acc": 0.90011654, "epoch": 1.3930631142880352, "grad_norm": 3.9445571899414062, "learning_rate": 7.758951034078723e-06, "loss": 0.47904344, "memory(GiB)": 34.88, "step": 51450, "train_speed(iter/s)": 0.410822 }, { "acc": 0.8675808, "epoch": 1.3931984945712506, "grad_norm": 26.585651397705078, "learning_rate": 7.758484333650533e-06, "loss": 0.67013826, "memory(GiB)": 34.88, "step": 51455, "train_speed(iter/s)": 0.410824 }, { "acc": 0.8846241, "epoch": 1.3933338748544661, "grad_norm": 10.893774032592773, "learning_rate": 7.75801759867317e-06, "loss": 0.56515265, "memory(GiB)": 34.88, "step": 51460, "train_speed(iter/s)": 0.410826 }, { "acc": 0.90801678, "epoch": 1.3934692551376817, "grad_norm": 15.005099296569824, "learning_rate": 7.75755082915248e-06, "loss": 0.43896899, "memory(GiB)": 34.88, "step": 51465, "train_speed(iter/s)": 0.410829 }, { "acc": 0.88937111, "epoch": 1.3936046354208973, "grad_norm": 11.435518264770508, "learning_rate": 7.757084025094312e-06, "loss": 0.61116347, "memory(GiB)": 34.88, "step": 51470, "train_speed(iter/s)": 0.410831 }, { "acc": 0.89734421, "epoch": 1.3937400157041129, "grad_norm": 8.889623641967773, "learning_rate": 7.75661718650451e-06, "loss": 0.62902398, "memory(GiB)": 34.88, "step": 51475, "train_speed(iter/s)": 0.410833 }, { "acc": 0.89179258, "epoch": 1.3938753959873285, "grad_norm": 16.597673416137695, "learning_rate": 7.756150313388926e-06, "loss": 0.5797266, "memory(GiB)": 34.88, "step": 51480, "train_speed(iter/s)": 0.410835 }, { "acc": 0.87726498, "epoch": 1.394010776270544, "grad_norm": 13.529924392700195, "learning_rate": 7.755683405753406e-06, "loss": 0.64565997, "memory(GiB)": 34.88, "step": 51485, "train_speed(iter/s)": 0.410837 }, { "acc": 0.8855484, "epoch": 1.3941461565537594, "grad_norm": 5.084700107574463, "learning_rate": 7.755216463603802e-06, "loss": 0.65625782, "memory(GiB)": 34.88, "step": 51490, "train_speed(iter/s)": 0.410839 }, { "acc": 0.87162952, "epoch": 1.3942815368369752, "grad_norm": 13.26539134979248, "learning_rate": 7.75474948694596e-06, "loss": 0.68360696, "memory(GiB)": 34.88, "step": 51495, "train_speed(iter/s)": 0.410841 }, { "acc": 0.90662565, "epoch": 1.3944169171201906, "grad_norm": 6.602733612060547, "learning_rate": 7.754282475785732e-06, "loss": 0.47570076, "memory(GiB)": 34.88, "step": 51500, "train_speed(iter/s)": 0.410844 }, { "acc": 0.89890318, "epoch": 1.3945522974034061, "grad_norm": 4.696345806121826, "learning_rate": 7.753815430128965e-06, "loss": 0.49769936, "memory(GiB)": 34.88, "step": 51505, "train_speed(iter/s)": 0.410846 }, { "acc": 0.88949213, "epoch": 1.3946876776866217, "grad_norm": 6.643763065338135, "learning_rate": 7.753348349981514e-06, "loss": 0.55999651, "memory(GiB)": 34.88, "step": 51510, "train_speed(iter/s)": 0.410848 }, { "acc": 0.88227806, "epoch": 1.3948230579698373, "grad_norm": 9.324258804321289, "learning_rate": 7.752881235349227e-06, "loss": 0.61539369, "memory(GiB)": 34.88, "step": 51515, "train_speed(iter/s)": 0.41085 }, { "acc": 0.8932312, "epoch": 1.3949584382530529, "grad_norm": 5.676517009735107, "learning_rate": 7.752414086237956e-06, "loss": 0.57326007, "memory(GiB)": 34.88, "step": 51520, "train_speed(iter/s)": 0.410852 }, { "acc": 0.90179691, "epoch": 1.3950938185362685, "grad_norm": 9.895890235900879, "learning_rate": 7.751946902653556e-06, "loss": 0.46928134, "memory(GiB)": 34.88, "step": 51525, "train_speed(iter/s)": 0.410854 }, { "acc": 0.88570709, "epoch": 1.395229198819484, "grad_norm": 12.572663307189941, "learning_rate": 7.751479684601875e-06, "loss": 0.62484407, "memory(GiB)": 34.88, "step": 51530, "train_speed(iter/s)": 0.410856 }, { "acc": 0.8828083, "epoch": 1.3953645791026994, "grad_norm": 11.156603813171387, "learning_rate": 7.751012432088768e-06, "loss": 0.61501842, "memory(GiB)": 34.88, "step": 51535, "train_speed(iter/s)": 0.410858 }, { "acc": 0.89967136, "epoch": 1.395499959385915, "grad_norm": 9.514786720275879, "learning_rate": 7.75054514512009e-06, "loss": 0.50864944, "memory(GiB)": 34.88, "step": 51540, "train_speed(iter/s)": 0.41086 }, { "acc": 0.91444654, "epoch": 1.3956353396691306, "grad_norm": 9.083909034729004, "learning_rate": 7.75007782370169e-06, "loss": 0.41294355, "memory(GiB)": 34.88, "step": 51545, "train_speed(iter/s)": 0.410862 }, { "acc": 0.89333572, "epoch": 1.3957707199523461, "grad_norm": 12.11956787109375, "learning_rate": 7.749610467839426e-06, "loss": 0.57704678, "memory(GiB)": 34.88, "step": 51550, "train_speed(iter/s)": 0.410865 }, { "acc": 0.89431934, "epoch": 1.3959061002355617, "grad_norm": 10.221821784973145, "learning_rate": 7.749143077539152e-06, "loss": 0.61078362, "memory(GiB)": 34.88, "step": 51555, "train_speed(iter/s)": 0.410867 }, { "acc": 0.90634356, "epoch": 1.3960414805187773, "grad_norm": 10.802898406982422, "learning_rate": 7.748675652806721e-06, "loss": 0.56379814, "memory(GiB)": 34.88, "step": 51560, "train_speed(iter/s)": 0.410869 }, { "acc": 0.87874069, "epoch": 1.3961768608019929, "grad_norm": 10.820287704467773, "learning_rate": 7.748208193647991e-06, "loss": 0.60460472, "memory(GiB)": 34.88, "step": 51565, "train_speed(iter/s)": 0.410871 }, { "acc": 0.90066185, "epoch": 1.3963122410852082, "grad_norm": 5.36245584487915, "learning_rate": 7.747740700068816e-06, "loss": 0.58189516, "memory(GiB)": 34.88, "step": 51570, "train_speed(iter/s)": 0.410873 }, { "acc": 0.91462297, "epoch": 1.396447621368424, "grad_norm": 9.551982879638672, "learning_rate": 7.747273172075054e-06, "loss": 0.48126235, "memory(GiB)": 34.88, "step": 51575, "train_speed(iter/s)": 0.410875 }, { "acc": 0.88776493, "epoch": 1.3965830016516394, "grad_norm": 16.065916061401367, "learning_rate": 7.746805609672559e-06, "loss": 0.60079856, "memory(GiB)": 34.88, "step": 51580, "train_speed(iter/s)": 0.410877 }, { "acc": 0.90549259, "epoch": 1.396718381934855, "grad_norm": 8.398831367492676, "learning_rate": 7.746338012867192e-06, "loss": 0.52645931, "memory(GiB)": 34.88, "step": 51585, "train_speed(iter/s)": 0.41088 }, { "acc": 0.86879425, "epoch": 1.3968537622180706, "grad_norm": 8.298446655273438, "learning_rate": 7.745870381664806e-06, "loss": 0.723033, "memory(GiB)": 34.88, "step": 51590, "train_speed(iter/s)": 0.410882 }, { "acc": 0.91477928, "epoch": 1.3969891425012861, "grad_norm": 7.354783535003662, "learning_rate": 7.745402716071261e-06, "loss": 0.38675802, "memory(GiB)": 34.88, "step": 51595, "train_speed(iter/s)": 0.410884 }, { "acc": 0.88060741, "epoch": 1.3971245227845017, "grad_norm": 14.086105346679688, "learning_rate": 7.744935016092418e-06, "loss": 0.70302153, "memory(GiB)": 34.88, "step": 51600, "train_speed(iter/s)": 0.410886 }, { "acc": 0.88007908, "epoch": 1.397259903067717, "grad_norm": 8.942390441894531, "learning_rate": 7.744467281734132e-06, "loss": 0.65651379, "memory(GiB)": 34.88, "step": 51605, "train_speed(iter/s)": 0.410888 }, { "acc": 0.88784885, "epoch": 1.3973952833509329, "grad_norm": 6.916423320770264, "learning_rate": 7.743999513002265e-06, "loss": 0.682128, "memory(GiB)": 34.88, "step": 51610, "train_speed(iter/s)": 0.41089 }, { "acc": 0.89569263, "epoch": 1.3975306636341482, "grad_norm": 10.204075813293457, "learning_rate": 7.743531709902675e-06, "loss": 0.57981071, "memory(GiB)": 34.88, "step": 51615, "train_speed(iter/s)": 0.410892 }, { "acc": 0.91353989, "epoch": 1.3976660439173638, "grad_norm": 6.99193811416626, "learning_rate": 7.743063872441223e-06, "loss": 0.43730688, "memory(GiB)": 34.88, "step": 51620, "train_speed(iter/s)": 0.410894 }, { "acc": 0.87706718, "epoch": 1.3978014242005794, "grad_norm": 10.136687278747559, "learning_rate": 7.742596000623769e-06, "loss": 0.65887361, "memory(GiB)": 34.88, "step": 51625, "train_speed(iter/s)": 0.410896 }, { "acc": 0.90355759, "epoch": 1.397936804483795, "grad_norm": 7.866124153137207, "learning_rate": 7.742128094456174e-06, "loss": 0.50622311, "memory(GiB)": 34.88, "step": 51630, "train_speed(iter/s)": 0.410898 }, { "acc": 0.88569622, "epoch": 1.3980721847670106, "grad_norm": 6.178188323974609, "learning_rate": 7.741660153944303e-06, "loss": 0.69377012, "memory(GiB)": 34.88, "step": 51635, "train_speed(iter/s)": 0.4109 }, { "acc": 0.89130688, "epoch": 1.3982075650502261, "grad_norm": 7.698821544647217, "learning_rate": 7.741192179094012e-06, "loss": 0.54422216, "memory(GiB)": 34.88, "step": 51640, "train_speed(iter/s)": 0.410903 }, { "acc": 0.88731499, "epoch": 1.3983429453334417, "grad_norm": 8.847380638122559, "learning_rate": 7.740724169911165e-06, "loss": 0.55719795, "memory(GiB)": 34.88, "step": 51645, "train_speed(iter/s)": 0.410904 }, { "acc": 0.90091124, "epoch": 1.398478325616657, "grad_norm": 4.747138977050781, "learning_rate": 7.740256126401628e-06, "loss": 0.48940067, "memory(GiB)": 34.88, "step": 51650, "train_speed(iter/s)": 0.410906 }, { "acc": 0.90622978, "epoch": 1.3986137058998727, "grad_norm": 5.7634358406066895, "learning_rate": 7.73978804857126e-06, "loss": 0.45881586, "memory(GiB)": 34.88, "step": 51655, "train_speed(iter/s)": 0.410908 }, { "acc": 0.90873699, "epoch": 1.3987490861830882, "grad_norm": 14.995244026184082, "learning_rate": 7.739319936425927e-06, "loss": 0.48115487, "memory(GiB)": 34.88, "step": 51660, "train_speed(iter/s)": 0.410911 }, { "acc": 0.87804976, "epoch": 1.3988844664663038, "grad_norm": 7.721078872680664, "learning_rate": 7.738851789971493e-06, "loss": 0.66781149, "memory(GiB)": 34.88, "step": 51665, "train_speed(iter/s)": 0.410913 }, { "acc": 0.88884859, "epoch": 1.3990198467495194, "grad_norm": 9.525790214538574, "learning_rate": 7.738383609213821e-06, "loss": 0.6025856, "memory(GiB)": 34.88, "step": 51670, "train_speed(iter/s)": 0.410915 }, { "acc": 0.89497089, "epoch": 1.399155227032735, "grad_norm": 5.1827006340026855, "learning_rate": 7.737915394158778e-06, "loss": 0.49588547, "memory(GiB)": 34.88, "step": 51675, "train_speed(iter/s)": 0.410917 }, { "acc": 0.90816498, "epoch": 1.3992906073159506, "grad_norm": 6.672701835632324, "learning_rate": 7.737447144812227e-06, "loss": 0.4661314, "memory(GiB)": 34.88, "step": 51680, "train_speed(iter/s)": 0.410919 }, { "acc": 0.90592995, "epoch": 1.399425987599166, "grad_norm": 9.710278511047363, "learning_rate": 7.736978861180036e-06, "loss": 0.52700515, "memory(GiB)": 34.88, "step": 51685, "train_speed(iter/s)": 0.410921 }, { "acc": 0.88531847, "epoch": 1.3995613678823817, "grad_norm": 7.2244720458984375, "learning_rate": 7.736510543268071e-06, "loss": 0.55001945, "memory(GiB)": 34.88, "step": 51690, "train_speed(iter/s)": 0.410923 }, { "acc": 0.88920918, "epoch": 1.399696748165597, "grad_norm": 12.326204299926758, "learning_rate": 7.736042191082196e-06, "loss": 0.55621295, "memory(GiB)": 34.88, "step": 51695, "train_speed(iter/s)": 0.410925 }, { "acc": 0.90017128, "epoch": 1.3998321284488127, "grad_norm": 8.84506893157959, "learning_rate": 7.73557380462828e-06, "loss": 0.47075806, "memory(GiB)": 34.88, "step": 51700, "train_speed(iter/s)": 0.410928 }, { "acc": 0.89200249, "epoch": 1.3999675087320282, "grad_norm": 6.120397567749023, "learning_rate": 7.73510538391219e-06, "loss": 0.63094316, "memory(GiB)": 34.88, "step": 51705, "train_speed(iter/s)": 0.41093 }, { "acc": 0.89814949, "epoch": 1.4001028890152438, "grad_norm": 7.076021671295166, "learning_rate": 7.734636928939795e-06, "loss": 0.5664475, "memory(GiB)": 34.88, "step": 51710, "train_speed(iter/s)": 0.410932 }, { "acc": 0.89534712, "epoch": 1.4002382692984594, "grad_norm": 14.779844284057617, "learning_rate": 7.734168439716963e-06, "loss": 0.58367443, "memory(GiB)": 34.88, "step": 51715, "train_speed(iter/s)": 0.410934 }, { "acc": 0.88834877, "epoch": 1.400373649581675, "grad_norm": 5.058659076690674, "learning_rate": 7.733699916249562e-06, "loss": 0.60471864, "memory(GiB)": 34.88, "step": 51720, "train_speed(iter/s)": 0.410936 }, { "acc": 0.90395145, "epoch": 1.4005090298648906, "grad_norm": 7.185991287231445, "learning_rate": 7.73323135854346e-06, "loss": 0.50664639, "memory(GiB)": 34.88, "step": 51725, "train_speed(iter/s)": 0.410938 }, { "acc": 0.90530243, "epoch": 1.400644410148106, "grad_norm": 16.792593002319336, "learning_rate": 7.732762766604527e-06, "loss": 0.49029846, "memory(GiB)": 34.88, "step": 51730, "train_speed(iter/s)": 0.41094 }, { "acc": 0.89453716, "epoch": 1.4007797904313215, "grad_norm": 7.669924736022949, "learning_rate": 7.732294140438636e-06, "loss": 0.61534519, "memory(GiB)": 34.88, "step": 51735, "train_speed(iter/s)": 0.410942 }, { "acc": 0.88448019, "epoch": 1.400915170714537, "grad_norm": 8.209709167480469, "learning_rate": 7.731825480051653e-06, "loss": 0.54572716, "memory(GiB)": 34.88, "step": 51740, "train_speed(iter/s)": 0.410944 }, { "acc": 0.90447388, "epoch": 1.4010505509977527, "grad_norm": 6.6028828620910645, "learning_rate": 7.731356785449454e-06, "loss": 0.40852389, "memory(GiB)": 34.88, "step": 51745, "train_speed(iter/s)": 0.410947 }, { "acc": 0.89097881, "epoch": 1.4011859312809682, "grad_norm": 7.651245594024658, "learning_rate": 7.730888056637908e-06, "loss": 0.45050192, "memory(GiB)": 34.88, "step": 51750, "train_speed(iter/s)": 0.410949 }, { "acc": 0.8935318, "epoch": 1.4013213115641838, "grad_norm": 8.881372451782227, "learning_rate": 7.730419293622885e-06, "loss": 0.57468414, "memory(GiB)": 34.88, "step": 51755, "train_speed(iter/s)": 0.410951 }, { "acc": 0.90800972, "epoch": 1.4014566918473994, "grad_norm": 5.837082386016846, "learning_rate": 7.72995049641026e-06, "loss": 0.48054218, "memory(GiB)": 34.88, "step": 51760, "train_speed(iter/s)": 0.410953 }, { "acc": 0.91068668, "epoch": 1.4015920721306148, "grad_norm": 8.581764221191406, "learning_rate": 7.729481665005901e-06, "loss": 0.52041616, "memory(GiB)": 34.88, "step": 51765, "train_speed(iter/s)": 0.410955 }, { "acc": 0.89762592, "epoch": 1.4017274524138306, "grad_norm": 4.79888391494751, "learning_rate": 7.729012799415685e-06, "loss": 0.51651816, "memory(GiB)": 34.88, "step": 51770, "train_speed(iter/s)": 0.410957 }, { "acc": 0.88761311, "epoch": 1.401862832697046, "grad_norm": 4.781508445739746, "learning_rate": 7.728543899645487e-06, "loss": 0.60885382, "memory(GiB)": 34.88, "step": 51775, "train_speed(iter/s)": 0.410959 }, { "acc": 0.89590769, "epoch": 1.4019982129802615, "grad_norm": 5.0992045402526855, "learning_rate": 7.728074965701178e-06, "loss": 0.62810664, "memory(GiB)": 34.88, "step": 51780, "train_speed(iter/s)": 0.410962 }, { "acc": 0.8963933, "epoch": 1.402133593263477, "grad_norm": 5.8177266120910645, "learning_rate": 7.727605997588631e-06, "loss": 0.51095018, "memory(GiB)": 34.88, "step": 51785, "train_speed(iter/s)": 0.410964 }, { "acc": 0.90818052, "epoch": 1.4022689735466927, "grad_norm": 7.0756402015686035, "learning_rate": 7.727136995313725e-06, "loss": 0.49611092, "memory(GiB)": 34.88, "step": 51790, "train_speed(iter/s)": 0.410966 }, { "acc": 0.90665703, "epoch": 1.4024043538299082, "grad_norm": 10.053913116455078, "learning_rate": 7.726667958882331e-06, "loss": 0.57722425, "memory(GiB)": 34.88, "step": 51795, "train_speed(iter/s)": 0.410968 }, { "acc": 0.91063805, "epoch": 1.4025397341131238, "grad_norm": 18.55946922302246, "learning_rate": 7.726198888300327e-06, "loss": 0.40885277, "memory(GiB)": 34.88, "step": 51800, "train_speed(iter/s)": 0.41097 }, { "acc": 0.89268169, "epoch": 1.4026751143963394, "grad_norm": 10.187834739685059, "learning_rate": 7.725729783573588e-06, "loss": 0.59719868, "memory(GiB)": 34.88, "step": 51805, "train_speed(iter/s)": 0.410972 }, { "acc": 0.89088039, "epoch": 1.4028104946795548, "grad_norm": 11.365107536315918, "learning_rate": 7.725260644707992e-06, "loss": 0.66231074, "memory(GiB)": 34.88, "step": 51810, "train_speed(iter/s)": 0.410975 }, { "acc": 0.89843006, "epoch": 1.4029458749627703, "grad_norm": 5.749467372894287, "learning_rate": 7.724791471709412e-06, "loss": 0.5071301, "memory(GiB)": 34.88, "step": 51815, "train_speed(iter/s)": 0.410977 }, { "acc": 0.91661558, "epoch": 1.403081255245986, "grad_norm": 6.483100414276123, "learning_rate": 7.724322264583731e-06, "loss": 0.48762999, "memory(GiB)": 34.88, "step": 51820, "train_speed(iter/s)": 0.410979 }, { "acc": 0.89257374, "epoch": 1.4032166355292015, "grad_norm": 13.406343460083008, "learning_rate": 7.723853023336824e-06, "loss": 0.61691513, "memory(GiB)": 34.88, "step": 51825, "train_speed(iter/s)": 0.410981 }, { "acc": 0.90663433, "epoch": 1.403352015812417, "grad_norm": 11.233695983886719, "learning_rate": 7.723383747974568e-06, "loss": 0.59413834, "memory(GiB)": 34.88, "step": 51830, "train_speed(iter/s)": 0.410984 }, { "acc": 0.88374405, "epoch": 1.4034873960956327, "grad_norm": 20.01531219482422, "learning_rate": 7.722914438502843e-06, "loss": 0.63505983, "memory(GiB)": 34.88, "step": 51835, "train_speed(iter/s)": 0.410985 }, { "acc": 0.91289825, "epoch": 1.4036227763788482, "grad_norm": 4.61305570602417, "learning_rate": 7.722445094927527e-06, "loss": 0.41999087, "memory(GiB)": 34.88, "step": 51840, "train_speed(iter/s)": 0.410988 }, { "acc": 0.88572111, "epoch": 1.4037581566620636, "grad_norm": 3.520962953567505, "learning_rate": 7.7219757172545e-06, "loss": 0.65019007, "memory(GiB)": 34.88, "step": 51845, "train_speed(iter/s)": 0.41099 }, { "acc": 0.89050722, "epoch": 1.4038935369452794, "grad_norm": 4.303628921508789, "learning_rate": 7.721506305489641e-06, "loss": 0.51600876, "memory(GiB)": 34.88, "step": 51850, "train_speed(iter/s)": 0.410992 }, { "acc": 0.91301041, "epoch": 1.4040289172284948, "grad_norm": 5.182365417480469, "learning_rate": 7.721036859638833e-06, "loss": 0.4356678, "memory(GiB)": 34.88, "step": 51855, "train_speed(iter/s)": 0.410994 }, { "acc": 0.9003664, "epoch": 1.4041642975117103, "grad_norm": 9.567720413208008, "learning_rate": 7.720567379707955e-06, "loss": 0.54636497, "memory(GiB)": 34.88, "step": 51860, "train_speed(iter/s)": 0.410996 }, { "acc": 0.87254829, "epoch": 1.404299677794926, "grad_norm": 12.411613464355469, "learning_rate": 7.720097865702888e-06, "loss": 0.65661736, "memory(GiB)": 34.88, "step": 51865, "train_speed(iter/s)": 0.410999 }, { "acc": 0.87989674, "epoch": 1.4044350580781415, "grad_norm": 22.00715446472168, "learning_rate": 7.719628317629513e-06, "loss": 0.64582472, "memory(GiB)": 34.88, "step": 51870, "train_speed(iter/s)": 0.411 }, { "acc": 0.8966651, "epoch": 1.404570438361357, "grad_norm": 6.63250732421875, "learning_rate": 7.719158735493713e-06, "loss": 0.58291245, "memory(GiB)": 34.88, "step": 51875, "train_speed(iter/s)": 0.411003 }, { "acc": 0.91061611, "epoch": 1.4047058186445727, "grad_norm": 6.670375823974609, "learning_rate": 7.71868911930137e-06, "loss": 0.51325684, "memory(GiB)": 34.88, "step": 51880, "train_speed(iter/s)": 0.411005 }, { "acc": 0.88206291, "epoch": 1.4048411989277882, "grad_norm": 14.519767761230469, "learning_rate": 7.718219469058368e-06, "loss": 0.62453575, "memory(GiB)": 34.88, "step": 51885, "train_speed(iter/s)": 0.411007 }, { "acc": 0.89379616, "epoch": 1.4049765792110036, "grad_norm": 5.509864807128906, "learning_rate": 7.717749784770589e-06, "loss": 0.6185854, "memory(GiB)": 34.88, "step": 51890, "train_speed(iter/s)": 0.411009 }, { "acc": 0.88477335, "epoch": 1.4051119594942192, "grad_norm": 12.398638725280762, "learning_rate": 7.717280066443916e-06, "loss": 0.62581205, "memory(GiB)": 34.88, "step": 51895, "train_speed(iter/s)": 0.411011 }, { "acc": 0.90954647, "epoch": 1.4052473397774348, "grad_norm": 15.565930366516113, "learning_rate": 7.716810314084234e-06, "loss": 0.47128477, "memory(GiB)": 34.88, "step": 51900, "train_speed(iter/s)": 0.411013 }, { "acc": 0.91116199, "epoch": 1.4053827200606503, "grad_norm": 10.614731788635254, "learning_rate": 7.716340527697429e-06, "loss": 0.55856619, "memory(GiB)": 34.88, "step": 51905, "train_speed(iter/s)": 0.411015 }, { "acc": 0.90005617, "epoch": 1.405518100343866, "grad_norm": 5.979791164398193, "learning_rate": 7.715870707289385e-06, "loss": 0.52679276, "memory(GiB)": 34.88, "step": 51910, "train_speed(iter/s)": 0.411017 }, { "acc": 0.89370174, "epoch": 1.4056534806270815, "grad_norm": 13.613645553588867, "learning_rate": 7.715400852865988e-06, "loss": 0.55600405, "memory(GiB)": 34.88, "step": 51915, "train_speed(iter/s)": 0.411019 }, { "acc": 0.91462622, "epoch": 1.405788860910297, "grad_norm": 31.988174438476562, "learning_rate": 7.71493096443312e-06, "loss": 0.426507, "memory(GiB)": 34.88, "step": 51920, "train_speed(iter/s)": 0.411021 }, { "acc": 0.8964241, "epoch": 1.4059242411935124, "grad_norm": 6.320105075836182, "learning_rate": 7.714461041996673e-06, "loss": 0.52652507, "memory(GiB)": 34.88, "step": 51925, "train_speed(iter/s)": 0.411023 }, { "acc": 0.91367989, "epoch": 1.4060596214767282, "grad_norm": 8.237348556518555, "learning_rate": 7.713991085562531e-06, "loss": 0.45819736, "memory(GiB)": 34.88, "step": 51930, "train_speed(iter/s)": 0.411026 }, { "acc": 0.89123554, "epoch": 1.4061950017599436, "grad_norm": 5.547684192657471, "learning_rate": 7.71352109513658e-06, "loss": 0.52902069, "memory(GiB)": 34.88, "step": 51935, "train_speed(iter/s)": 0.411027 }, { "acc": 0.90498943, "epoch": 1.4063303820431592, "grad_norm": 6.485222339630127, "learning_rate": 7.713051070724708e-06, "loss": 0.46506214, "memory(GiB)": 34.88, "step": 51940, "train_speed(iter/s)": 0.41103 }, { "acc": 0.87307415, "epoch": 1.4064657623263748, "grad_norm": 15.168708801269531, "learning_rate": 7.712581012332804e-06, "loss": 0.71260128, "memory(GiB)": 34.88, "step": 51945, "train_speed(iter/s)": 0.411031 }, { "acc": 0.9090065, "epoch": 1.4066011426095903, "grad_norm": 9.457884788513184, "learning_rate": 7.712110919966757e-06, "loss": 0.48982186, "memory(GiB)": 34.88, "step": 51950, "train_speed(iter/s)": 0.411034 }, { "acc": 0.91463699, "epoch": 1.406736522892806, "grad_norm": 3.1240222454071045, "learning_rate": 7.711640793632455e-06, "loss": 0.4449708, "memory(GiB)": 34.88, "step": 51955, "train_speed(iter/s)": 0.411036 }, { "acc": 0.90794706, "epoch": 1.4068719031760215, "grad_norm": 5.047486305236816, "learning_rate": 7.711170633335787e-06, "loss": 0.45376391, "memory(GiB)": 34.88, "step": 51960, "train_speed(iter/s)": 0.411038 }, { "acc": 0.90751476, "epoch": 1.407007283459237, "grad_norm": 9.748116493225098, "learning_rate": 7.710700439082643e-06, "loss": 0.54827719, "memory(GiB)": 34.88, "step": 51965, "train_speed(iter/s)": 0.41104 }, { "acc": 0.90702524, "epoch": 1.4071426637424524, "grad_norm": 4.269461631774902, "learning_rate": 7.710230210878913e-06, "loss": 0.51217685, "memory(GiB)": 34.88, "step": 51970, "train_speed(iter/s)": 0.411042 }, { "acc": 0.90309238, "epoch": 1.407278044025668, "grad_norm": 10.79925537109375, "learning_rate": 7.709759948730486e-06, "loss": 0.52581391, "memory(GiB)": 34.88, "step": 51975, "train_speed(iter/s)": 0.411044 }, { "acc": 0.88917046, "epoch": 1.4074134243088836, "grad_norm": 11.205716133117676, "learning_rate": 7.709289652643256e-06, "loss": 0.67232857, "memory(GiB)": 34.88, "step": 51980, "train_speed(iter/s)": 0.411047 }, { "acc": 0.90632048, "epoch": 1.4075488045920992, "grad_norm": 4.463366508483887, "learning_rate": 7.708819322623113e-06, "loss": 0.53738399, "memory(GiB)": 34.88, "step": 51985, "train_speed(iter/s)": 0.411049 }, { "acc": 0.89129181, "epoch": 1.4076841848753148, "grad_norm": 8.223159790039062, "learning_rate": 7.708348958675947e-06, "loss": 0.63385935, "memory(GiB)": 34.88, "step": 51990, "train_speed(iter/s)": 0.411051 }, { "acc": 0.89261627, "epoch": 1.4078195651585304, "grad_norm": 12.960916519165039, "learning_rate": 7.707878560807654e-06, "loss": 0.50381074, "memory(GiB)": 34.88, "step": 51995, "train_speed(iter/s)": 0.411053 }, { "acc": 0.90079021, "epoch": 1.407954945441746, "grad_norm": 5.940385818481445, "learning_rate": 7.707408129024125e-06, "loss": 0.540662, "memory(GiB)": 34.88, "step": 52000, "train_speed(iter/s)": 0.411055 }, { "acc": 0.89942856, "epoch": 1.4080903257249613, "grad_norm": 8.667491912841797, "learning_rate": 7.70693766333125e-06, "loss": 0.49416904, "memory(GiB)": 34.88, "step": 52005, "train_speed(iter/s)": 0.411057 }, { "acc": 0.89671068, "epoch": 1.408225706008177, "grad_norm": 8.362325668334961, "learning_rate": 7.706467163734926e-06, "loss": 0.57321091, "memory(GiB)": 34.88, "step": 52010, "train_speed(iter/s)": 0.411059 }, { "acc": 0.88413448, "epoch": 1.4083610862913924, "grad_norm": 4.2378058433532715, "learning_rate": 7.705996630241047e-06, "loss": 0.55281682, "memory(GiB)": 34.88, "step": 52015, "train_speed(iter/s)": 0.411061 }, { "acc": 0.87615795, "epoch": 1.408496466574608, "grad_norm": 10.376572608947754, "learning_rate": 7.705526062855507e-06, "loss": 0.66227064, "memory(GiB)": 34.88, "step": 52020, "train_speed(iter/s)": 0.411063 }, { "acc": 0.88785915, "epoch": 1.4086318468578236, "grad_norm": 6.6134772300720215, "learning_rate": 7.705055461584198e-06, "loss": 0.66180878, "memory(GiB)": 34.88, "step": 52025, "train_speed(iter/s)": 0.411065 }, { "acc": 0.87576838, "epoch": 1.4087672271410392, "grad_norm": 12.963223457336426, "learning_rate": 7.704584826433016e-06, "loss": 0.70912189, "memory(GiB)": 34.88, "step": 52030, "train_speed(iter/s)": 0.411067 }, { "acc": 0.89438887, "epoch": 1.4089026074242548, "grad_norm": 7.900145530700684, "learning_rate": 7.70411415740786e-06, "loss": 0.52382593, "memory(GiB)": 34.88, "step": 52035, "train_speed(iter/s)": 0.41107 }, { "acc": 0.88491344, "epoch": 1.4090379877074704, "grad_norm": 7.355475425720215, "learning_rate": 7.703643454514625e-06, "loss": 0.54769645, "memory(GiB)": 34.88, "step": 52040, "train_speed(iter/s)": 0.411072 }, { "acc": 0.90321941, "epoch": 1.409173367990686, "grad_norm": 5.077040672302246, "learning_rate": 7.703172717759207e-06, "loss": 0.52796564, "memory(GiB)": 34.88, "step": 52045, "train_speed(iter/s)": 0.411074 }, { "acc": 0.92004623, "epoch": 1.4093087482739013, "grad_norm": 8.311651229858398, "learning_rate": 7.702701947147501e-06, "loss": 0.46497421, "memory(GiB)": 34.88, "step": 52050, "train_speed(iter/s)": 0.411076 }, { "acc": 0.8910326, "epoch": 1.4094441285571169, "grad_norm": 7.22182559967041, "learning_rate": 7.702231142685407e-06, "loss": 0.56464105, "memory(GiB)": 34.88, "step": 52055, "train_speed(iter/s)": 0.411078 }, { "acc": 0.90574198, "epoch": 1.4095795088403325, "grad_norm": 17.229299545288086, "learning_rate": 7.701760304378822e-06, "loss": 0.47171383, "memory(GiB)": 34.88, "step": 52060, "train_speed(iter/s)": 0.411081 }, { "acc": 0.88939114, "epoch": 1.409714889123548, "grad_norm": 8.76478099822998, "learning_rate": 7.70128943223364e-06, "loss": 0.55609808, "memory(GiB)": 34.88, "step": 52065, "train_speed(iter/s)": 0.411083 }, { "acc": 0.90764885, "epoch": 1.4098502694067636, "grad_norm": 7.271213531494141, "learning_rate": 7.700818526255768e-06, "loss": 0.51297836, "memory(GiB)": 34.88, "step": 52070, "train_speed(iter/s)": 0.411085 }, { "acc": 0.89336338, "epoch": 1.4099856496899792, "grad_norm": 6.494156837463379, "learning_rate": 7.700347586451098e-06, "loss": 0.51654568, "memory(GiB)": 34.88, "step": 52075, "train_speed(iter/s)": 0.411087 }, { "acc": 0.89769497, "epoch": 1.4101210299731948, "grad_norm": 10.238155364990234, "learning_rate": 7.69987661282553e-06, "loss": 0.57818995, "memory(GiB)": 34.88, "step": 52080, "train_speed(iter/s)": 0.411089 }, { "acc": 0.89638195, "epoch": 1.4102564102564101, "grad_norm": 8.259994506835938, "learning_rate": 7.699405605384969e-06, "loss": 0.62642078, "memory(GiB)": 34.88, "step": 52085, "train_speed(iter/s)": 0.411092 }, { "acc": 0.90722485, "epoch": 1.410391790539626, "grad_norm": 3.9431958198547363, "learning_rate": 7.698934564135312e-06, "loss": 0.45865955, "memory(GiB)": 34.88, "step": 52090, "train_speed(iter/s)": 0.411094 }, { "acc": 0.88196335, "epoch": 1.4105271708228413, "grad_norm": 8.515331268310547, "learning_rate": 7.698463489082458e-06, "loss": 0.69098053, "memory(GiB)": 34.88, "step": 52095, "train_speed(iter/s)": 0.411096 }, { "acc": 0.88889523, "epoch": 1.4106625511060569, "grad_norm": 8.612836837768555, "learning_rate": 7.69799238023231e-06, "loss": 0.60622988, "memory(GiB)": 34.88, "step": 52100, "train_speed(iter/s)": 0.411098 }, { "acc": 0.90517321, "epoch": 1.4107979313892725, "grad_norm": 6.318556308746338, "learning_rate": 7.69752123759077e-06, "loss": 0.45275831, "memory(GiB)": 34.88, "step": 52105, "train_speed(iter/s)": 0.4111 }, { "acc": 0.87542124, "epoch": 1.410933311672488, "grad_norm": 6.247448444366455, "learning_rate": 7.697050061163737e-06, "loss": 0.61062994, "memory(GiB)": 34.88, "step": 52110, "train_speed(iter/s)": 0.411102 }, { "acc": 0.89938107, "epoch": 1.4110686919557036, "grad_norm": 6.399148941040039, "learning_rate": 7.696578850957119e-06, "loss": 0.5796711, "memory(GiB)": 34.88, "step": 52115, "train_speed(iter/s)": 0.411104 }, { "acc": 0.91064224, "epoch": 1.4112040722389192, "grad_norm": 11.690848350524902, "learning_rate": 7.696107606976813e-06, "loss": 0.51362872, "memory(GiB)": 34.88, "step": 52120, "train_speed(iter/s)": 0.411107 }, { "acc": 0.90350714, "epoch": 1.4113394525221348, "grad_norm": 9.65169906616211, "learning_rate": 7.695636329228726e-06, "loss": 0.50984268, "memory(GiB)": 34.88, "step": 52125, "train_speed(iter/s)": 0.411109 }, { "acc": 0.90048904, "epoch": 1.4114748328053501, "grad_norm": 5.417537689208984, "learning_rate": 7.69516501771876e-06, "loss": 0.50566459, "memory(GiB)": 34.88, "step": 52130, "train_speed(iter/s)": 0.411111 }, { "acc": 0.88718777, "epoch": 1.4116102130885657, "grad_norm": 8.723974227905273, "learning_rate": 7.694693672452821e-06, "loss": 0.66202488, "memory(GiB)": 34.88, "step": 52135, "train_speed(iter/s)": 0.411113 }, { "acc": 0.90381222, "epoch": 1.4117455933717813, "grad_norm": 7.492333889007568, "learning_rate": 7.694222293436813e-06, "loss": 0.49052362, "memory(GiB)": 34.88, "step": 52140, "train_speed(iter/s)": 0.411115 }, { "acc": 0.90418053, "epoch": 1.4118809736549969, "grad_norm": 3.9130661487579346, "learning_rate": 7.693750880676637e-06, "loss": 0.49334736, "memory(GiB)": 34.88, "step": 52145, "train_speed(iter/s)": 0.411117 }, { "acc": 0.89696312, "epoch": 1.4120163539382125, "grad_norm": 6.79409122467041, "learning_rate": 7.693279434178203e-06, "loss": 0.62847624, "memory(GiB)": 34.88, "step": 52150, "train_speed(iter/s)": 0.411119 }, { "acc": 0.90691185, "epoch": 1.412151734221428, "grad_norm": 7.244744300842285, "learning_rate": 7.692807953947415e-06, "loss": 0.52096844, "memory(GiB)": 34.88, "step": 52155, "train_speed(iter/s)": 0.411121 }, { "acc": 0.90124321, "epoch": 1.4122871145046436, "grad_norm": 4.218317031860352, "learning_rate": 7.692336439990181e-06, "loss": 0.48367691, "memory(GiB)": 34.88, "step": 52160, "train_speed(iter/s)": 0.411123 }, { "acc": 0.89818106, "epoch": 1.412422494787859, "grad_norm": 7.149960517883301, "learning_rate": 7.691864892312405e-06, "loss": 0.57077913, "memory(GiB)": 34.88, "step": 52165, "train_speed(iter/s)": 0.411126 }, { "acc": 0.88794975, "epoch": 1.4125578750710748, "grad_norm": 6.325035572052002, "learning_rate": 7.691393310919994e-06, "loss": 0.56730919, "memory(GiB)": 34.88, "step": 52170, "train_speed(iter/s)": 0.411128 }, { "acc": 0.89764156, "epoch": 1.4126932553542901, "grad_norm": 8.992047309875488, "learning_rate": 7.690921695818859e-06, "loss": 0.49867811, "memory(GiB)": 34.88, "step": 52175, "train_speed(iter/s)": 0.411129 }, { "acc": 0.90434475, "epoch": 1.4128286356375057, "grad_norm": 13.50882339477539, "learning_rate": 7.690450047014903e-06, "loss": 0.49203486, "memory(GiB)": 34.88, "step": 52180, "train_speed(iter/s)": 0.411132 }, { "acc": 0.88589554, "epoch": 1.4129640159207213, "grad_norm": 12.286563873291016, "learning_rate": 7.68997836451404e-06, "loss": 0.64604254, "memory(GiB)": 34.88, "step": 52185, "train_speed(iter/s)": 0.411134 }, { "acc": 0.89775467, "epoch": 1.4130993962039369, "grad_norm": 7.983034610748291, "learning_rate": 7.689506648322173e-06, "loss": 0.55684576, "memory(GiB)": 34.88, "step": 52190, "train_speed(iter/s)": 0.411136 }, { "acc": 0.89353275, "epoch": 1.4132347764871525, "grad_norm": 8.775540351867676, "learning_rate": 7.689034898445216e-06, "loss": 0.52140217, "memory(GiB)": 34.88, "step": 52195, "train_speed(iter/s)": 0.411138 }, { "acc": 0.90375557, "epoch": 1.413370156770368, "grad_norm": 11.621747016906738, "learning_rate": 7.688563114889074e-06, "loss": 0.51586332, "memory(GiB)": 34.88, "step": 52200, "train_speed(iter/s)": 0.41114 }, { "acc": 0.92090759, "epoch": 1.4135055370535836, "grad_norm": 3.77016544342041, "learning_rate": 7.688091297659662e-06, "loss": 0.34982898, "memory(GiB)": 34.88, "step": 52205, "train_speed(iter/s)": 0.411142 }, { "acc": 0.89895458, "epoch": 1.413640917336799, "grad_norm": 5.282154560089111, "learning_rate": 7.687619446762887e-06, "loss": 0.47462025, "memory(GiB)": 34.88, "step": 52210, "train_speed(iter/s)": 0.411144 }, { "acc": 0.89704533, "epoch": 1.4137762976200146, "grad_norm": 5.972297191619873, "learning_rate": 7.687147562204661e-06, "loss": 0.52466769, "memory(GiB)": 34.88, "step": 52215, "train_speed(iter/s)": 0.411146 }, { "acc": 0.90942736, "epoch": 1.4139116779032301, "grad_norm": 4.318449020385742, "learning_rate": 7.686675643990893e-06, "loss": 0.46220713, "memory(GiB)": 34.88, "step": 52220, "train_speed(iter/s)": 0.411148 }, { "acc": 0.88768177, "epoch": 1.4140470581864457, "grad_norm": 12.349370002746582, "learning_rate": 7.686203692127498e-06, "loss": 0.5595583, "memory(GiB)": 34.88, "step": 52225, "train_speed(iter/s)": 0.41115 }, { "acc": 0.92046833, "epoch": 1.4141824384696613, "grad_norm": 6.162243366241455, "learning_rate": 7.685731706620387e-06, "loss": 0.44562025, "memory(GiB)": 34.88, "step": 52230, "train_speed(iter/s)": 0.411152 }, { "acc": 0.8840313, "epoch": 1.4143178187528769, "grad_norm": 12.260746002197266, "learning_rate": 7.685259687475473e-06, "loss": 0.59094648, "memory(GiB)": 34.88, "step": 52235, "train_speed(iter/s)": 0.411154 }, { "acc": 0.88547611, "epoch": 1.4144531990360925, "grad_norm": 6.804000377655029, "learning_rate": 7.684787634698668e-06, "loss": 0.70436144, "memory(GiB)": 34.88, "step": 52240, "train_speed(iter/s)": 0.411156 }, { "acc": 0.91242714, "epoch": 1.4145885793193078, "grad_norm": 6.238768100738525, "learning_rate": 7.684315548295885e-06, "loss": 0.41400714, "memory(GiB)": 34.88, "step": 52245, "train_speed(iter/s)": 0.411158 }, { "acc": 0.89361467, "epoch": 1.4147239596025236, "grad_norm": 12.789400100708008, "learning_rate": 7.683843428273038e-06, "loss": 0.59007177, "memory(GiB)": 34.88, "step": 52250, "train_speed(iter/s)": 0.411161 }, { "acc": 0.9006485, "epoch": 1.414859339885739, "grad_norm": 10.887655258178711, "learning_rate": 7.683371274636044e-06, "loss": 0.56015358, "memory(GiB)": 34.88, "step": 52255, "train_speed(iter/s)": 0.411163 }, { "acc": 0.88804846, "epoch": 1.4149947201689546, "grad_norm": 9.76002025604248, "learning_rate": 7.682899087390815e-06, "loss": 0.63173223, "memory(GiB)": 34.88, "step": 52260, "train_speed(iter/s)": 0.411164 }, { "acc": 0.91437607, "epoch": 1.4151301004521701, "grad_norm": 7.822744846343994, "learning_rate": 7.682426866543266e-06, "loss": 0.44886727, "memory(GiB)": 34.88, "step": 52265, "train_speed(iter/s)": 0.411166 }, { "acc": 0.90500946, "epoch": 1.4152654807353857, "grad_norm": 6.568543910980225, "learning_rate": 7.68195461209931e-06, "loss": 0.47464395, "memory(GiB)": 34.88, "step": 52270, "train_speed(iter/s)": 0.411168 }, { "acc": 0.88975697, "epoch": 1.4154008610186013, "grad_norm": 8.358152389526367, "learning_rate": 7.681482324064872e-06, "loss": 0.58554573, "memory(GiB)": 34.88, "step": 52275, "train_speed(iter/s)": 0.41117 }, { "acc": 0.88408298, "epoch": 1.4155362413018169, "grad_norm": 6.941387176513672, "learning_rate": 7.681010002445857e-06, "loss": 0.59064727, "memory(GiB)": 34.88, "step": 52280, "train_speed(iter/s)": 0.411173 }, { "acc": 0.89579926, "epoch": 1.4156716215850325, "grad_norm": 6.028295993804932, "learning_rate": 7.68053764724819e-06, "loss": 0.56933088, "memory(GiB)": 34.88, "step": 52285, "train_speed(iter/s)": 0.411175 }, { "acc": 0.88484869, "epoch": 1.4158070018682478, "grad_norm": 8.410956382751465, "learning_rate": 7.680065258477786e-06, "loss": 0.57804723, "memory(GiB)": 34.88, "step": 52290, "train_speed(iter/s)": 0.411176 }, { "acc": 0.88480997, "epoch": 1.4159423821514634, "grad_norm": 6.258101940155029, "learning_rate": 7.67959283614056e-06, "loss": 0.61044478, "memory(GiB)": 34.88, "step": 52295, "train_speed(iter/s)": 0.411178 }, { "acc": 0.90799961, "epoch": 1.416077762434679, "grad_norm": 12.791275978088379, "learning_rate": 7.679120380242432e-06, "loss": 0.47069941, "memory(GiB)": 34.88, "step": 52300, "train_speed(iter/s)": 0.41118 }, { "acc": 0.8885211, "epoch": 1.4162131427178946, "grad_norm": 8.994312286376953, "learning_rate": 7.67864789078932e-06, "loss": 0.54950466, "memory(GiB)": 34.88, "step": 52305, "train_speed(iter/s)": 0.411181 }, { "acc": 0.91975098, "epoch": 1.4163485230011101, "grad_norm": 11.830406188964844, "learning_rate": 7.678175367787144e-06, "loss": 0.3949096, "memory(GiB)": 34.88, "step": 52310, "train_speed(iter/s)": 0.411183 }, { "acc": 0.91902218, "epoch": 1.4164839032843257, "grad_norm": 19.519960403442383, "learning_rate": 7.67770281124182e-06, "loss": 0.43437095, "memory(GiB)": 34.88, "step": 52315, "train_speed(iter/s)": 0.411186 }, { "acc": 0.8776495, "epoch": 1.4166192835675413, "grad_norm": 7.113765716552734, "learning_rate": 7.677230221159273e-06, "loss": 0.64519043, "memory(GiB)": 34.88, "step": 52320, "train_speed(iter/s)": 0.411187 }, { "acc": 0.8911335, "epoch": 1.4167546638507567, "grad_norm": 7.936566352844238, "learning_rate": 7.676757597545418e-06, "loss": 0.5350266, "memory(GiB)": 34.88, "step": 52325, "train_speed(iter/s)": 0.411189 }, { "acc": 0.89974651, "epoch": 1.4168900441339725, "grad_norm": 4.487305641174316, "learning_rate": 7.67628494040618e-06, "loss": 0.5275671, "memory(GiB)": 34.88, "step": 52330, "train_speed(iter/s)": 0.411192 }, { "acc": 0.90526762, "epoch": 1.4170254244171878, "grad_norm": 11.299087524414062, "learning_rate": 7.675812249747478e-06, "loss": 0.48743639, "memory(GiB)": 34.88, "step": 52335, "train_speed(iter/s)": 0.411194 }, { "acc": 0.88746033, "epoch": 1.4171608047004034, "grad_norm": 9.776142120361328, "learning_rate": 7.67533952557523e-06, "loss": 0.61082296, "memory(GiB)": 34.88, "step": 52340, "train_speed(iter/s)": 0.411195 }, { "acc": 0.88327703, "epoch": 1.417296184983619, "grad_norm": 8.543047904968262, "learning_rate": 7.674866767895364e-06, "loss": 0.68795385, "memory(GiB)": 34.88, "step": 52345, "train_speed(iter/s)": 0.411197 }, { "acc": 0.89833889, "epoch": 1.4174315652668346, "grad_norm": 8.05139446258545, "learning_rate": 7.674393976713795e-06, "loss": 0.52585082, "memory(GiB)": 34.88, "step": 52350, "train_speed(iter/s)": 0.411198 }, { "acc": 0.90328941, "epoch": 1.4175669455500501, "grad_norm": 3.669250249862671, "learning_rate": 7.673921152036453e-06, "loss": 0.48857689, "memory(GiB)": 34.88, "step": 52355, "train_speed(iter/s)": 0.4112 }, { "acc": 0.89741821, "epoch": 1.4177023258332657, "grad_norm": 11.329679489135742, "learning_rate": 7.673448293869256e-06, "loss": 0.55937357, "memory(GiB)": 34.88, "step": 52360, "train_speed(iter/s)": 0.411202 }, { "acc": 0.87734375, "epoch": 1.4178377061164813, "grad_norm": 7.91434383392334, "learning_rate": 7.672975402218131e-06, "loss": 0.65098882, "memory(GiB)": 34.88, "step": 52365, "train_speed(iter/s)": 0.411204 }, { "acc": 0.90612068, "epoch": 1.4179730863996967, "grad_norm": 7.0481696128845215, "learning_rate": 7.672502477089e-06, "loss": 0.44255829, "memory(GiB)": 34.88, "step": 52370, "train_speed(iter/s)": 0.411206 }, { "acc": 0.90655622, "epoch": 1.4181084666829122, "grad_norm": 8.611824035644531, "learning_rate": 7.672029518487786e-06, "loss": 0.45507812, "memory(GiB)": 34.88, "step": 52375, "train_speed(iter/s)": 0.411209 }, { "acc": 0.89560814, "epoch": 1.4182438469661278, "grad_norm": 23.336894989013672, "learning_rate": 7.67155652642042e-06, "loss": 0.56135168, "memory(GiB)": 34.88, "step": 52380, "train_speed(iter/s)": 0.41121 }, { "acc": 0.90453014, "epoch": 1.4183792272493434, "grad_norm": 5.686201572418213, "learning_rate": 7.671083500892817e-06, "loss": 0.50497088, "memory(GiB)": 34.88, "step": 52385, "train_speed(iter/s)": 0.411212 }, { "acc": 0.89381981, "epoch": 1.418514607532559, "grad_norm": 10.861124992370605, "learning_rate": 7.67061044191091e-06, "loss": 0.56225057, "memory(GiB)": 34.88, "step": 52390, "train_speed(iter/s)": 0.411213 }, { "acc": 0.87175274, "epoch": 1.4186499878157746, "grad_norm": 6.706625938415527, "learning_rate": 7.670137349480623e-06, "loss": 0.76654439, "memory(GiB)": 34.88, "step": 52395, "train_speed(iter/s)": 0.411215 }, { "acc": 0.91131058, "epoch": 1.4187853680989901, "grad_norm": 29.620567321777344, "learning_rate": 7.669664223607882e-06, "loss": 0.45979772, "memory(GiB)": 34.88, "step": 52400, "train_speed(iter/s)": 0.411217 }, { "acc": 0.89633198, "epoch": 1.4189207483822055, "grad_norm": 3.687288284301758, "learning_rate": 7.669191064298615e-06, "loss": 0.62642708, "memory(GiB)": 34.88, "step": 52405, "train_speed(iter/s)": 0.411219 }, { "acc": 0.89132671, "epoch": 1.4190561286654213, "grad_norm": 7.559551239013672, "learning_rate": 7.668717871558747e-06, "loss": 0.54883776, "memory(GiB)": 34.88, "step": 52410, "train_speed(iter/s)": 0.41122 }, { "acc": 0.8965229, "epoch": 1.4191915089486367, "grad_norm": 7.977550983428955, "learning_rate": 7.668244645394207e-06, "loss": 0.66449766, "memory(GiB)": 34.88, "step": 52415, "train_speed(iter/s)": 0.411222 }, { "acc": 0.88702946, "epoch": 1.4193268892318522, "grad_norm": 7.045322895050049, "learning_rate": 7.667771385810926e-06, "loss": 0.61352038, "memory(GiB)": 34.88, "step": 52420, "train_speed(iter/s)": 0.411223 }, { "acc": 0.90365219, "epoch": 1.4194622695150678, "grad_norm": 7.156736850738525, "learning_rate": 7.667298092814826e-06, "loss": 0.48918972, "memory(GiB)": 34.88, "step": 52425, "train_speed(iter/s)": 0.411226 }, { "acc": 0.90719433, "epoch": 1.4195976497982834, "grad_norm": 7.8794169425964355, "learning_rate": 7.666824766411843e-06, "loss": 0.49529104, "memory(GiB)": 34.88, "step": 52430, "train_speed(iter/s)": 0.411227 }, { "acc": 0.89287357, "epoch": 1.419733030081499, "grad_norm": 16.966386795043945, "learning_rate": 7.6663514066079e-06, "loss": 0.64246383, "memory(GiB)": 34.88, "step": 52435, "train_speed(iter/s)": 0.411229 }, { "acc": 0.88283854, "epoch": 1.4198684103647146, "grad_norm": 6.370001316070557, "learning_rate": 7.665878013408932e-06, "loss": 0.57466698, "memory(GiB)": 34.88, "step": 52440, "train_speed(iter/s)": 0.411231 }, { "acc": 0.88005676, "epoch": 1.4200037906479301, "grad_norm": 13.630304336547852, "learning_rate": 7.665404586820865e-06, "loss": 0.69025292, "memory(GiB)": 34.88, "step": 52445, "train_speed(iter/s)": 0.411233 }, { "acc": 0.887784, "epoch": 1.4201391709311455, "grad_norm": 10.22910213470459, "learning_rate": 7.664931126849633e-06, "loss": 0.56433678, "memory(GiB)": 34.88, "step": 52450, "train_speed(iter/s)": 0.411236 }, { "acc": 0.89539194, "epoch": 1.420274551214361, "grad_norm": 8.378802299499512, "learning_rate": 7.664457633501166e-06, "loss": 0.64076118, "memory(GiB)": 34.88, "step": 52455, "train_speed(iter/s)": 0.411238 }, { "acc": 0.89830132, "epoch": 1.4204099314975767, "grad_norm": 8.035073280334473, "learning_rate": 7.663984106781394e-06, "loss": 0.53244042, "memory(GiB)": 34.88, "step": 52460, "train_speed(iter/s)": 0.41124 }, { "acc": 0.88757782, "epoch": 1.4205453117807922, "grad_norm": 15.616789817810059, "learning_rate": 7.66351054669625e-06, "loss": 0.60080876, "memory(GiB)": 34.88, "step": 52465, "train_speed(iter/s)": 0.411242 }, { "acc": 0.88748045, "epoch": 1.4206806920640078, "grad_norm": 8.173062324523926, "learning_rate": 7.663036953251664e-06, "loss": 0.57781625, "memory(GiB)": 34.88, "step": 52470, "train_speed(iter/s)": 0.411244 }, { "acc": 0.90740204, "epoch": 1.4208160723472234, "grad_norm": 7.04318380355835, "learning_rate": 7.662563326453575e-06, "loss": 0.44616857, "memory(GiB)": 34.88, "step": 52475, "train_speed(iter/s)": 0.411246 }, { "acc": 0.88861866, "epoch": 1.420951452630439, "grad_norm": 9.20583438873291, "learning_rate": 7.66208966630791e-06, "loss": 0.50777035, "memory(GiB)": 34.88, "step": 52480, "train_speed(iter/s)": 0.411248 }, { "acc": 0.88902788, "epoch": 1.4210868329136543, "grad_norm": 18.493772506713867, "learning_rate": 7.661615972820603e-06, "loss": 0.5745965, "memory(GiB)": 34.88, "step": 52485, "train_speed(iter/s)": 0.411251 }, { "acc": 0.90839558, "epoch": 1.4212222131968701, "grad_norm": 7.329838752746582, "learning_rate": 7.661142245997592e-06, "loss": 0.4970376, "memory(GiB)": 34.88, "step": 52490, "train_speed(iter/s)": 0.411253 }, { "acc": 0.88983097, "epoch": 1.4213575934800855, "grad_norm": 11.007323265075684, "learning_rate": 7.660668485844805e-06, "loss": 0.61702876, "memory(GiB)": 34.88, "step": 52495, "train_speed(iter/s)": 0.411254 }, { "acc": 0.89781084, "epoch": 1.421492973763301, "grad_norm": 6.222098350524902, "learning_rate": 7.660194692368184e-06, "loss": 0.60098057, "memory(GiB)": 34.88, "step": 52500, "train_speed(iter/s)": 0.411257 }, { "acc": 0.90438976, "epoch": 1.4216283540465167, "grad_norm": 4.606318950653076, "learning_rate": 7.65972086557366e-06, "loss": 0.49514227, "memory(GiB)": 34.88, "step": 52505, "train_speed(iter/s)": 0.411258 }, { "acc": 0.89237232, "epoch": 1.4217637343297322, "grad_norm": 31.545825958251953, "learning_rate": 7.659247005467167e-06, "loss": 0.58997035, "memory(GiB)": 34.88, "step": 52510, "train_speed(iter/s)": 0.41126 }, { "acc": 0.8827281, "epoch": 1.4218991146129478, "grad_norm": 9.543421745300293, "learning_rate": 7.658773112054646e-06, "loss": 0.71982503, "memory(GiB)": 34.88, "step": 52515, "train_speed(iter/s)": 0.411262 }, { "acc": 0.91387253, "epoch": 1.4220344948961634, "grad_norm": 7.256378173828125, "learning_rate": 7.65829918534203e-06, "loss": 0.42487388, "memory(GiB)": 34.88, "step": 52520, "train_speed(iter/s)": 0.411264 }, { "acc": 0.88825607, "epoch": 1.422169875179379, "grad_norm": 6.536588668823242, "learning_rate": 7.657825225335257e-06, "loss": 0.54320378, "memory(GiB)": 34.88, "step": 52525, "train_speed(iter/s)": 0.411266 }, { "acc": 0.90254765, "epoch": 1.4223052554625943, "grad_norm": 9.462533950805664, "learning_rate": 7.657351232040263e-06, "loss": 0.45273237, "memory(GiB)": 34.88, "step": 52530, "train_speed(iter/s)": 0.411268 }, { "acc": 0.90145292, "epoch": 1.42244063574581, "grad_norm": 11.928350448608398, "learning_rate": 7.656877205462986e-06, "loss": 0.45498505, "memory(GiB)": 34.88, "step": 52535, "train_speed(iter/s)": 0.41127 }, { "acc": 0.91756411, "epoch": 1.4225760160290255, "grad_norm": 5.241387367248535, "learning_rate": 7.656403145609366e-06, "loss": 0.41406217, "memory(GiB)": 34.88, "step": 52540, "train_speed(iter/s)": 0.411273 }, { "acc": 0.88522644, "epoch": 1.422711396312241, "grad_norm": 8.706828117370605, "learning_rate": 7.655929052485341e-06, "loss": 0.67807007, "memory(GiB)": 34.88, "step": 52545, "train_speed(iter/s)": 0.411275 }, { "acc": 0.90408154, "epoch": 1.4228467765954567, "grad_norm": 15.229354858398438, "learning_rate": 7.655454926096846e-06, "loss": 0.55888052, "memory(GiB)": 34.88, "step": 52550, "train_speed(iter/s)": 0.411277 }, { "acc": 0.92004776, "epoch": 1.4229821568786722, "grad_norm": 6.337368011474609, "learning_rate": 7.654980766449828e-06, "loss": 0.41493406, "memory(GiB)": 34.88, "step": 52555, "train_speed(iter/s)": 0.411279 }, { "acc": 0.88340101, "epoch": 1.4231175371618878, "grad_norm": 14.17165470123291, "learning_rate": 7.65450657355022e-06, "loss": 0.64354486, "memory(GiB)": 34.88, "step": 52560, "train_speed(iter/s)": 0.411282 }, { "acc": 0.87872066, "epoch": 1.4232529174451032, "grad_norm": 8.330180168151855, "learning_rate": 7.654032347403964e-06, "loss": 0.64595785, "memory(GiB)": 34.88, "step": 52565, "train_speed(iter/s)": 0.411284 }, { "acc": 0.88553963, "epoch": 1.423388297728319, "grad_norm": 8.946969032287598, "learning_rate": 7.653558088017002e-06, "loss": 0.61435323, "memory(GiB)": 34.88, "step": 52570, "train_speed(iter/s)": 0.411286 }, { "acc": 0.88503876, "epoch": 1.4235236780115343, "grad_norm": 12.710442543029785, "learning_rate": 7.653083795395272e-06, "loss": 0.67665987, "memory(GiB)": 34.88, "step": 52575, "train_speed(iter/s)": 0.411288 }, { "acc": 0.91086054, "epoch": 1.42365905829475, "grad_norm": 13.163735389709473, "learning_rate": 7.65260946954472e-06, "loss": 0.51725197, "memory(GiB)": 34.88, "step": 52580, "train_speed(iter/s)": 0.41129 }, { "acc": 0.89672661, "epoch": 1.4237944385779655, "grad_norm": 11.415141105651855, "learning_rate": 7.652135110471285e-06, "loss": 0.5922173, "memory(GiB)": 34.88, "step": 52585, "train_speed(iter/s)": 0.411292 }, { "acc": 0.89618959, "epoch": 1.423929818861181, "grad_norm": 7.793549060821533, "learning_rate": 7.651660718180912e-06, "loss": 0.62342215, "memory(GiB)": 34.88, "step": 52590, "train_speed(iter/s)": 0.411294 }, { "acc": 0.90717087, "epoch": 1.4240651991443967, "grad_norm": 10.368772506713867, "learning_rate": 7.651186292679538e-06, "loss": 0.52166586, "memory(GiB)": 34.88, "step": 52595, "train_speed(iter/s)": 0.411296 }, { "acc": 0.90184669, "epoch": 1.4242005794276122, "grad_norm": 12.771710395812988, "learning_rate": 7.650711833973112e-06, "loss": 0.50849223, "memory(GiB)": 34.88, "step": 52600, "train_speed(iter/s)": 0.411299 }, { "acc": 0.90456123, "epoch": 1.4243359597108278, "grad_norm": 10.693395614624023, "learning_rate": 7.650237342067574e-06, "loss": 0.54371748, "memory(GiB)": 34.88, "step": 52605, "train_speed(iter/s)": 0.411301 }, { "acc": 0.89674149, "epoch": 1.4244713399940432, "grad_norm": 5.352342128753662, "learning_rate": 7.649762816968868e-06, "loss": 0.58857517, "memory(GiB)": 34.88, "step": 52610, "train_speed(iter/s)": 0.411303 }, { "acc": 0.88558111, "epoch": 1.4246067202772588, "grad_norm": 6.806844234466553, "learning_rate": 7.649288258682942e-06, "loss": 0.71313171, "memory(GiB)": 34.88, "step": 52615, "train_speed(iter/s)": 0.411305 }, { "acc": 0.88468122, "epoch": 1.4247421005604743, "grad_norm": 7.425887107849121, "learning_rate": 7.648813667215736e-06, "loss": 0.53898168, "memory(GiB)": 34.88, "step": 52620, "train_speed(iter/s)": 0.411307 }, { "acc": 0.90137968, "epoch": 1.42487748084369, "grad_norm": 8.585500717163086, "learning_rate": 7.6483390425732e-06, "loss": 0.52922344, "memory(GiB)": 34.88, "step": 52625, "train_speed(iter/s)": 0.411309 }, { "acc": 0.91482735, "epoch": 1.4250128611269055, "grad_norm": 8.281062126159668, "learning_rate": 7.647864384761274e-06, "loss": 0.51082101, "memory(GiB)": 34.88, "step": 52630, "train_speed(iter/s)": 0.411311 }, { "acc": 0.90564861, "epoch": 1.425148241410121, "grad_norm": 5.80373477935791, "learning_rate": 7.647389693785909e-06, "loss": 0.50576315, "memory(GiB)": 34.88, "step": 52635, "train_speed(iter/s)": 0.411314 }, { "acc": 0.89300928, "epoch": 1.4252836216933367, "grad_norm": 7.580427646636963, "learning_rate": 7.646914969653048e-06, "loss": 0.61344461, "memory(GiB)": 34.88, "step": 52640, "train_speed(iter/s)": 0.411316 }, { "acc": 0.91156654, "epoch": 1.425419001976552, "grad_norm": 7.091749668121338, "learning_rate": 7.64644021236864e-06, "loss": 0.49383359, "memory(GiB)": 34.88, "step": 52645, "train_speed(iter/s)": 0.411318 }, { "acc": 0.90998516, "epoch": 1.4255543822597676, "grad_norm": 4.861525535583496, "learning_rate": 7.645965421938631e-06, "loss": 0.45320005, "memory(GiB)": 34.88, "step": 52650, "train_speed(iter/s)": 0.41132 }, { "acc": 0.8893568, "epoch": 1.4256897625429832, "grad_norm": 11.066205024719238, "learning_rate": 7.645490598368969e-06, "loss": 0.53496294, "memory(GiB)": 34.88, "step": 52655, "train_speed(iter/s)": 0.411322 }, { "acc": 0.89244442, "epoch": 1.4258251428261988, "grad_norm": 14.9028959274292, "learning_rate": 7.645015741665604e-06, "loss": 0.64392819, "memory(GiB)": 34.88, "step": 52660, "train_speed(iter/s)": 0.411324 }, { "acc": 0.88951874, "epoch": 1.4259605231094143, "grad_norm": 13.326237678527832, "learning_rate": 7.644540851834483e-06, "loss": 0.57557364, "memory(GiB)": 34.88, "step": 52665, "train_speed(iter/s)": 0.411326 }, { "acc": 0.89715538, "epoch": 1.42609590339263, "grad_norm": 15.979976654052734, "learning_rate": 7.644065928881554e-06, "loss": 0.53817768, "memory(GiB)": 34.88, "step": 52670, "train_speed(iter/s)": 0.411328 }, { "acc": 0.88289471, "epoch": 1.4262312836758455, "grad_norm": 16.120071411132812, "learning_rate": 7.643590972812766e-06, "loss": 0.56496143, "memory(GiB)": 34.88, "step": 52675, "train_speed(iter/s)": 0.41133 }, { "acc": 0.89422207, "epoch": 1.4263666639590609, "grad_norm": 11.870243072509766, "learning_rate": 7.643115983634072e-06, "loss": 0.51654224, "memory(GiB)": 34.88, "step": 52680, "train_speed(iter/s)": 0.411332 }, { "acc": 0.91355171, "epoch": 1.4265020442422767, "grad_norm": 7.380921840667725, "learning_rate": 7.642640961351419e-06, "loss": 0.52289152, "memory(GiB)": 34.88, "step": 52685, "train_speed(iter/s)": 0.411334 }, { "acc": 0.89788857, "epoch": 1.426637424525492, "grad_norm": 7.200401306152344, "learning_rate": 7.642165905970757e-06, "loss": 0.47606335, "memory(GiB)": 34.88, "step": 52690, "train_speed(iter/s)": 0.411336 }, { "acc": 0.90381393, "epoch": 1.4267728048087076, "grad_norm": 3.7456369400024414, "learning_rate": 7.641690817498043e-06, "loss": 0.45283265, "memory(GiB)": 34.88, "step": 52695, "train_speed(iter/s)": 0.411338 }, { "acc": 0.91042547, "epoch": 1.4269081850919232, "grad_norm": 9.32111930847168, "learning_rate": 7.641215695939219e-06, "loss": 0.45785737, "memory(GiB)": 34.88, "step": 52700, "train_speed(iter/s)": 0.41134 }, { "acc": 0.91076345, "epoch": 1.4270435653751388, "grad_norm": 5.486959457397461, "learning_rate": 7.640740541300246e-06, "loss": 0.45886698, "memory(GiB)": 34.88, "step": 52705, "train_speed(iter/s)": 0.411342 }, { "acc": 0.88827553, "epoch": 1.4271789456583543, "grad_norm": 10.493369102478027, "learning_rate": 7.640265353587068e-06, "loss": 0.56778164, "memory(GiB)": 34.88, "step": 52710, "train_speed(iter/s)": 0.411344 }, { "acc": 0.91487389, "epoch": 1.42731432594157, "grad_norm": 3.8192622661590576, "learning_rate": 7.639790132805647e-06, "loss": 0.43438649, "memory(GiB)": 34.88, "step": 52715, "train_speed(iter/s)": 0.411346 }, { "acc": 0.91280603, "epoch": 1.4274497062247855, "grad_norm": 19.42246437072754, "learning_rate": 7.639314878961927e-06, "loss": 0.46731625, "memory(GiB)": 34.88, "step": 52720, "train_speed(iter/s)": 0.411348 }, { "acc": 0.90519676, "epoch": 1.4275850865080009, "grad_norm": 6.299910545349121, "learning_rate": 7.638839592061867e-06, "loss": 0.54186187, "memory(GiB)": 34.88, "step": 52725, "train_speed(iter/s)": 0.41135 }, { "acc": 0.90393925, "epoch": 1.4277204667912164, "grad_norm": 13.07510757446289, "learning_rate": 7.638364272111419e-06, "loss": 0.52778387, "memory(GiB)": 34.88, "step": 52730, "train_speed(iter/s)": 0.411352 }, { "acc": 0.89839745, "epoch": 1.427855847074432, "grad_norm": 5.676155090332031, "learning_rate": 7.637888919116536e-06, "loss": 0.52755127, "memory(GiB)": 34.88, "step": 52735, "train_speed(iter/s)": 0.411354 }, { "acc": 0.91248226, "epoch": 1.4279912273576476, "grad_norm": 6.744517803192139, "learning_rate": 7.637413533083177e-06, "loss": 0.43468518, "memory(GiB)": 34.88, "step": 52740, "train_speed(iter/s)": 0.411356 }, { "acc": 0.91018372, "epoch": 1.4281266076408632, "grad_norm": 40.202022552490234, "learning_rate": 7.636938114017293e-06, "loss": 0.49149609, "memory(GiB)": 34.88, "step": 52745, "train_speed(iter/s)": 0.411358 }, { "acc": 0.89674549, "epoch": 1.4282619879240788, "grad_norm": 9.08800983428955, "learning_rate": 7.63646266192484e-06, "loss": 0.53893929, "memory(GiB)": 34.88, "step": 52750, "train_speed(iter/s)": 0.41136 }, { "acc": 0.88666439, "epoch": 1.4283973682072943, "grad_norm": 11.784866333007812, "learning_rate": 7.635987176811777e-06, "loss": 0.56937432, "memory(GiB)": 34.88, "step": 52755, "train_speed(iter/s)": 0.411362 }, { "acc": 0.91184521, "epoch": 1.4285327484905097, "grad_norm": 12.909767150878906, "learning_rate": 7.635511658684057e-06, "loss": 0.49257088, "memory(GiB)": 34.88, "step": 52760, "train_speed(iter/s)": 0.411364 }, { "acc": 0.91711826, "epoch": 1.4286681287737255, "grad_norm": 10.763382911682129, "learning_rate": 7.63503610754764e-06, "loss": 0.40873523, "memory(GiB)": 34.88, "step": 52765, "train_speed(iter/s)": 0.411366 }, { "acc": 0.88644619, "epoch": 1.4288035090569409, "grad_norm": 4.800394535064697, "learning_rate": 7.634560523408479e-06, "loss": 0.60382733, "memory(GiB)": 34.88, "step": 52770, "train_speed(iter/s)": 0.411368 }, { "acc": 0.90248203, "epoch": 1.4289388893401564, "grad_norm": 6.305108070373535, "learning_rate": 7.634084906272536e-06, "loss": 0.50473576, "memory(GiB)": 34.88, "step": 52775, "train_speed(iter/s)": 0.41137 }, { "acc": 0.89405937, "epoch": 1.429074269623372, "grad_norm": 5.93568229675293, "learning_rate": 7.633609256145766e-06, "loss": 0.51359696, "memory(GiB)": 34.88, "step": 52780, "train_speed(iter/s)": 0.411372 }, { "acc": 0.90651016, "epoch": 1.4292096499065876, "grad_norm": 6.215620517730713, "learning_rate": 7.63313357303413e-06, "loss": 0.49735985, "memory(GiB)": 34.88, "step": 52785, "train_speed(iter/s)": 0.411374 }, { "acc": 0.90546303, "epoch": 1.4293450301898032, "grad_norm": 6.22039270401001, "learning_rate": 7.632657856943583e-06, "loss": 0.48535666, "memory(GiB)": 34.88, "step": 52790, "train_speed(iter/s)": 0.411377 }, { "acc": 0.89262218, "epoch": 1.4294804104730188, "grad_norm": 7.979440689086914, "learning_rate": 7.632182107880088e-06, "loss": 0.63975897, "memory(GiB)": 34.88, "step": 52795, "train_speed(iter/s)": 0.411379 }, { "acc": 0.91018381, "epoch": 1.4296157907562344, "grad_norm": 8.803876876831055, "learning_rate": 7.631706325849605e-06, "loss": 0.53510504, "memory(GiB)": 34.88, "step": 52800, "train_speed(iter/s)": 0.411381 }, { "acc": 0.91153851, "epoch": 1.4297511710394497, "grad_norm": 6.969977855682373, "learning_rate": 7.631230510858089e-06, "loss": 0.46058679, "memory(GiB)": 34.88, "step": 52805, "train_speed(iter/s)": 0.411383 }, { "acc": 0.89745045, "epoch": 1.4298865513226653, "grad_norm": 37.350379943847656, "learning_rate": 7.630754662911506e-06, "loss": 0.5638689, "memory(GiB)": 34.88, "step": 52810, "train_speed(iter/s)": 0.411385 }, { "acc": 0.90931721, "epoch": 1.4300219316058809, "grad_norm": 6.2676239013671875, "learning_rate": 7.630278782015815e-06, "loss": 0.52177052, "memory(GiB)": 34.88, "step": 52815, "train_speed(iter/s)": 0.411387 }, { "acc": 0.90818787, "epoch": 1.4301573118890964, "grad_norm": 6.847929954528809, "learning_rate": 7.629802868176977e-06, "loss": 0.54252567, "memory(GiB)": 34.88, "step": 52820, "train_speed(iter/s)": 0.411389 }, { "acc": 0.91349688, "epoch": 1.430292692172312, "grad_norm": 7.31986141204834, "learning_rate": 7.629326921400952e-06, "loss": 0.37984776, "memory(GiB)": 34.88, "step": 52825, "train_speed(iter/s)": 0.411391 }, { "acc": 0.91500177, "epoch": 1.4304280724555276, "grad_norm": 7.876500606536865, "learning_rate": 7.628850941693709e-06, "loss": 0.46707168, "memory(GiB)": 34.88, "step": 52830, "train_speed(iter/s)": 0.411393 }, { "acc": 0.88829012, "epoch": 1.4305634527387432, "grad_norm": 8.843531608581543, "learning_rate": 7.628374929061201e-06, "loss": 0.55328712, "memory(GiB)": 34.88, "step": 52835, "train_speed(iter/s)": 0.411395 }, { "acc": 0.90549355, "epoch": 1.4306988330219585, "grad_norm": 6.522580623626709, "learning_rate": 7.627898883509397e-06, "loss": 0.42235665, "memory(GiB)": 34.88, "step": 52840, "train_speed(iter/s)": 0.411397 }, { "acc": 0.91271172, "epoch": 1.4308342133051744, "grad_norm": 8.522747039794922, "learning_rate": 7.627422805044261e-06, "loss": 0.45550404, "memory(GiB)": 34.88, "step": 52845, "train_speed(iter/s)": 0.411399 }, { "acc": 0.90661554, "epoch": 1.4309695935883897, "grad_norm": 7.142147064208984, "learning_rate": 7.626946693671753e-06, "loss": 0.42741461, "memory(GiB)": 34.88, "step": 52850, "train_speed(iter/s)": 0.411401 }, { "acc": 0.89075031, "epoch": 1.4311049738716053, "grad_norm": 11.049223899841309, "learning_rate": 7.62647054939784e-06, "loss": 0.61038513, "memory(GiB)": 34.88, "step": 52855, "train_speed(iter/s)": 0.411403 }, { "acc": 0.88226624, "epoch": 1.4312403541548209, "grad_norm": 6.717602252960205, "learning_rate": 7.6259943722284844e-06, "loss": 0.54831743, "memory(GiB)": 34.88, "step": 52860, "train_speed(iter/s)": 0.411405 }, { "acc": 0.88878765, "epoch": 1.4313757344380365, "grad_norm": 9.3413667678833, "learning_rate": 7.625518162169655e-06, "loss": 0.5639307, "memory(GiB)": 34.88, "step": 52865, "train_speed(iter/s)": 0.411407 }, { "acc": 0.89447145, "epoch": 1.431511114721252, "grad_norm": 12.640351295471191, "learning_rate": 7.625041919227314e-06, "loss": 0.55072932, "memory(GiB)": 34.88, "step": 52870, "train_speed(iter/s)": 0.411409 }, { "acc": 0.92190342, "epoch": 1.4316464950044676, "grad_norm": 13.038555145263672, "learning_rate": 7.624565643407429e-06, "loss": 0.37895894, "memory(GiB)": 34.88, "step": 52875, "train_speed(iter/s)": 0.411411 }, { "acc": 0.91213007, "epoch": 1.4317818752876832, "grad_norm": 5.54014778137207, "learning_rate": 7.624089334715965e-06, "loss": 0.49524841, "memory(GiB)": 34.88, "step": 52880, "train_speed(iter/s)": 0.411413 }, { "acc": 0.89960861, "epoch": 1.4319172555708986, "grad_norm": 8.525252342224121, "learning_rate": 7.623612993158888e-06, "loss": 0.46431222, "memory(GiB)": 34.88, "step": 52885, "train_speed(iter/s)": 0.411415 }, { "acc": 0.89771004, "epoch": 1.4320526358541141, "grad_norm": 9.943594932556152, "learning_rate": 7.623136618742168e-06, "loss": 0.50919628, "memory(GiB)": 34.88, "step": 52890, "train_speed(iter/s)": 0.411417 }, { "acc": 0.90003967, "epoch": 1.4321880161373297, "grad_norm": 17.57719612121582, "learning_rate": 7.622660211471771e-06, "loss": 0.57238874, "memory(GiB)": 34.88, "step": 52895, "train_speed(iter/s)": 0.411419 }, { "acc": 0.89524288, "epoch": 1.4323233964205453, "grad_norm": 12.379956245422363, "learning_rate": 7.622183771353663e-06, "loss": 0.61342688, "memory(GiB)": 34.88, "step": 52900, "train_speed(iter/s)": 0.411421 }, { "acc": 0.89857817, "epoch": 1.4324587767037609, "grad_norm": 8.475090026855469, "learning_rate": 7.621707298393815e-06, "loss": 0.63361306, "memory(GiB)": 34.88, "step": 52905, "train_speed(iter/s)": 0.411423 }, { "acc": 0.92159357, "epoch": 1.4325941569869765, "grad_norm": 3.7918779850006104, "learning_rate": 7.621230792598196e-06, "loss": 0.38813863, "memory(GiB)": 34.88, "step": 52910, "train_speed(iter/s)": 0.411426 }, { "acc": 0.9145298, "epoch": 1.432729537270192, "grad_norm": 5.0919342041015625, "learning_rate": 7.6207542539727715e-06, "loss": 0.39350877, "memory(GiB)": 34.88, "step": 52915, "train_speed(iter/s)": 0.411428 }, { "acc": 0.88038864, "epoch": 1.4328649175534074, "grad_norm": 6.061524391174316, "learning_rate": 7.6202776825235145e-06, "loss": 0.68828945, "memory(GiB)": 34.88, "step": 52920, "train_speed(iter/s)": 0.41143 }, { "acc": 0.89296761, "epoch": 1.4330002978366232, "grad_norm": 23.803712844848633, "learning_rate": 7.619801078256398e-06, "loss": 0.53020306, "memory(GiB)": 34.88, "step": 52925, "train_speed(iter/s)": 0.411432 }, { "acc": 0.87727165, "epoch": 1.4331356781198386, "grad_norm": 15.111849784851074, "learning_rate": 7.619324441177385e-06, "loss": 0.77303977, "memory(GiB)": 34.88, "step": 52930, "train_speed(iter/s)": 0.411434 }, { "acc": 0.87439384, "epoch": 1.4332710584030541, "grad_norm": 6.354092121124268, "learning_rate": 7.61884777129245e-06, "loss": 0.72704263, "memory(GiB)": 34.88, "step": 52935, "train_speed(iter/s)": 0.411436 }, { "acc": 0.89975529, "epoch": 1.4334064386862697, "grad_norm": 7.328013896942139, "learning_rate": 7.618371068607565e-06, "loss": 0.57785425, "memory(GiB)": 34.88, "step": 52940, "train_speed(iter/s)": 0.411438 }, { "acc": 0.90018959, "epoch": 1.4335418189694853, "grad_norm": 8.161482810974121, "learning_rate": 7.617894333128702e-06, "loss": 0.61553702, "memory(GiB)": 34.88, "step": 52945, "train_speed(iter/s)": 0.41144 }, { "acc": 0.89293585, "epoch": 1.4336771992527009, "grad_norm": 65.3283462524414, "learning_rate": 7.617417564861829e-06, "loss": 0.59903893, "memory(GiB)": 34.88, "step": 52950, "train_speed(iter/s)": 0.411443 }, { "acc": 0.88685379, "epoch": 1.4338125795359165, "grad_norm": 9.932060241699219, "learning_rate": 7.616940763812921e-06, "loss": 0.72304449, "memory(GiB)": 34.88, "step": 52955, "train_speed(iter/s)": 0.411445 }, { "acc": 0.88013306, "epoch": 1.433947959819132, "grad_norm": 7.843704700469971, "learning_rate": 7.6164639299879536e-06, "loss": 0.64616828, "memory(GiB)": 34.88, "step": 52960, "train_speed(iter/s)": 0.411447 }, { "acc": 0.87598705, "epoch": 1.4340833401023474, "grad_norm": 37.4412956237793, "learning_rate": 7.615987063392897e-06, "loss": 0.63100567, "memory(GiB)": 34.88, "step": 52965, "train_speed(iter/s)": 0.411449 }, { "acc": 0.87961617, "epoch": 1.434218720385563, "grad_norm": 11.406383514404297, "learning_rate": 7.615510164033726e-06, "loss": 0.66452308, "memory(GiB)": 34.88, "step": 52970, "train_speed(iter/s)": 0.41145 }, { "acc": 0.90207138, "epoch": 1.4343541006687786, "grad_norm": 16.677148818969727, "learning_rate": 7.615033231916413e-06, "loss": 0.53212209, "memory(GiB)": 34.88, "step": 52975, "train_speed(iter/s)": 0.411452 }, { "acc": 0.90340118, "epoch": 1.4344894809519941, "grad_norm": 5.752546787261963, "learning_rate": 7.614556267046934e-06, "loss": 0.5094902, "memory(GiB)": 34.88, "step": 52980, "train_speed(iter/s)": 0.411454 }, { "acc": 0.91559401, "epoch": 1.4346248612352097, "grad_norm": 8.987025260925293, "learning_rate": 7.614079269431264e-06, "loss": 0.53544159, "memory(GiB)": 34.88, "step": 52985, "train_speed(iter/s)": 0.411456 }, { "acc": 0.89924784, "epoch": 1.4347602415184253, "grad_norm": 7.0038228034973145, "learning_rate": 7.613602239075376e-06, "loss": 0.54593039, "memory(GiB)": 34.88, "step": 52990, "train_speed(iter/s)": 0.411458 }, { "acc": 0.89633236, "epoch": 1.4348956218016409, "grad_norm": 17.413223266601562, "learning_rate": 7.61312517598525e-06, "loss": 0.57554598, "memory(GiB)": 34.88, "step": 52995, "train_speed(iter/s)": 0.41146 }, { "acc": 0.90329361, "epoch": 1.4350310020848562, "grad_norm": 3.8676135540008545, "learning_rate": 7.612648080166859e-06, "loss": 0.53412585, "memory(GiB)": 34.88, "step": 53000, "train_speed(iter/s)": 0.411462 }, { "acc": 0.90554657, "epoch": 1.435166382368072, "grad_norm": 21.170074462890625, "learning_rate": 7.612170951626182e-06, "loss": 0.49131694, "memory(GiB)": 34.88, "step": 53005, "train_speed(iter/s)": 0.411464 }, { "acc": 0.91128912, "epoch": 1.4353017626512874, "grad_norm": 5.218272686004639, "learning_rate": 7.611693790369192e-06, "loss": 0.43787022, "memory(GiB)": 34.88, "step": 53010, "train_speed(iter/s)": 0.411466 }, { "acc": 0.89907026, "epoch": 1.435437142934503, "grad_norm": 6.772834300994873, "learning_rate": 7.611216596401871e-06, "loss": 0.46510062, "memory(GiB)": 34.88, "step": 53015, "train_speed(iter/s)": 0.411468 }, { "acc": 0.88338032, "epoch": 1.4355725232177186, "grad_norm": 6.422058582305908, "learning_rate": 7.610739369730193e-06, "loss": 0.6338336, "memory(GiB)": 34.88, "step": 53020, "train_speed(iter/s)": 0.41147 }, { "acc": 0.87676306, "epoch": 1.4357079035009341, "grad_norm": 8.685070991516113, "learning_rate": 7.610262110360138e-06, "loss": 0.65702229, "memory(GiB)": 34.88, "step": 53025, "train_speed(iter/s)": 0.411472 }, { "acc": 0.90540047, "epoch": 1.4358432837841497, "grad_norm": 20.75360870361328, "learning_rate": 7.6097848182976865e-06, "loss": 0.46362371, "memory(GiB)": 34.88, "step": 53030, "train_speed(iter/s)": 0.411475 }, { "acc": 0.91461058, "epoch": 1.4359786640673653, "grad_norm": 8.859132766723633, "learning_rate": 7.609307493548812e-06, "loss": 0.44324732, "memory(GiB)": 34.88, "step": 53035, "train_speed(iter/s)": 0.411477 }, { "acc": 0.89222431, "epoch": 1.4361140443505809, "grad_norm": 5.679901599884033, "learning_rate": 7.6088301361194985e-06, "loss": 0.49878397, "memory(GiB)": 34.88, "step": 53040, "train_speed(iter/s)": 0.411479 }, { "acc": 0.8817625, "epoch": 1.4362494246337962, "grad_norm": 9.619501113891602, "learning_rate": 7.6083527460157245e-06, "loss": 0.69744873, "memory(GiB)": 34.88, "step": 53045, "train_speed(iter/s)": 0.411481 }, { "acc": 0.88596439, "epoch": 1.4363848049170118, "grad_norm": 13.905268669128418, "learning_rate": 7.607875323243473e-06, "loss": 0.57880726, "memory(GiB)": 34.88, "step": 53050, "train_speed(iter/s)": 0.411483 }, { "acc": 0.89552708, "epoch": 1.4365201852002274, "grad_norm": 6.534646511077881, "learning_rate": 7.60739786780872e-06, "loss": 0.61771102, "memory(GiB)": 34.88, "step": 53055, "train_speed(iter/s)": 0.411485 }, { "acc": 0.90879536, "epoch": 1.436655565483443, "grad_norm": 6.566226005554199, "learning_rate": 7.606920379717448e-06, "loss": 0.40413275, "memory(GiB)": 34.88, "step": 53060, "train_speed(iter/s)": 0.411487 }, { "acc": 0.90698814, "epoch": 1.4367909457666586, "grad_norm": 6.426933288574219, "learning_rate": 7.6064428589756405e-06, "loss": 0.47155933, "memory(GiB)": 34.88, "step": 53065, "train_speed(iter/s)": 0.411489 }, { "acc": 0.8879324, "epoch": 1.4369263260498741, "grad_norm": 5.6902618408203125, "learning_rate": 7.605965305589277e-06, "loss": 0.63143282, "memory(GiB)": 34.88, "step": 53070, "train_speed(iter/s)": 0.411492 }, { "acc": 0.89268284, "epoch": 1.4370617063330897, "grad_norm": 4.1767778396606445, "learning_rate": 7.605487719564342e-06, "loss": 0.54726276, "memory(GiB)": 34.88, "step": 53075, "train_speed(iter/s)": 0.411494 }, { "acc": 0.90388451, "epoch": 1.437197086616305, "grad_norm": 12.590446472167969, "learning_rate": 7.6050101009068155e-06, "loss": 0.47872047, "memory(GiB)": 34.88, "step": 53080, "train_speed(iter/s)": 0.411495 }, { "acc": 0.9041563, "epoch": 1.4373324668995209, "grad_norm": 5.636552810668945, "learning_rate": 7.604532449622684e-06, "loss": 0.48828769, "memory(GiB)": 34.88, "step": 53085, "train_speed(iter/s)": 0.411497 }, { "acc": 0.91372423, "epoch": 1.4374678471827362, "grad_norm": 17.66002082824707, "learning_rate": 7.604054765717929e-06, "loss": 0.50507483, "memory(GiB)": 34.88, "step": 53090, "train_speed(iter/s)": 0.411499 }, { "acc": 0.88697996, "epoch": 1.4376032274659518, "grad_norm": 7.450891971588135, "learning_rate": 7.603577049198533e-06, "loss": 0.66613531, "memory(GiB)": 34.88, "step": 53095, "train_speed(iter/s)": 0.411501 }, { "acc": 0.88783779, "epoch": 1.4377386077491674, "grad_norm": 27.338640213012695, "learning_rate": 7.603099300070482e-06, "loss": 0.60638747, "memory(GiB)": 34.88, "step": 53100, "train_speed(iter/s)": 0.411503 }, { "acc": 0.91018238, "epoch": 1.437873988032383, "grad_norm": 6.534680366516113, "learning_rate": 7.602621518339761e-06, "loss": 0.49061651, "memory(GiB)": 34.88, "step": 53105, "train_speed(iter/s)": 0.411505 }, { "acc": 0.90703049, "epoch": 1.4380093683155986, "grad_norm": 3.6995162963867188, "learning_rate": 7.6021437040123546e-06, "loss": 0.50851088, "memory(GiB)": 34.88, "step": 53110, "train_speed(iter/s)": 0.411507 }, { "acc": 0.88595104, "epoch": 1.4381447485988141, "grad_norm": 8.764086723327637, "learning_rate": 7.601665857094249e-06, "loss": 0.64155197, "memory(GiB)": 34.88, "step": 53115, "train_speed(iter/s)": 0.411509 }, { "acc": 0.9022604, "epoch": 1.4382801288820297, "grad_norm": 5.311961650848389, "learning_rate": 7.601187977591431e-06, "loss": 0.56610847, "memory(GiB)": 34.88, "step": 53120, "train_speed(iter/s)": 0.411511 }, { "acc": 0.88899307, "epoch": 1.438415509165245, "grad_norm": 7.4194416999816895, "learning_rate": 7.6007100655098845e-06, "loss": 0.53609314, "memory(GiB)": 34.88, "step": 53125, "train_speed(iter/s)": 0.411513 }, { "acc": 0.89316654, "epoch": 1.4385508894484607, "grad_norm": 5.530922889709473, "learning_rate": 7.6002321208555974e-06, "loss": 0.54893532, "memory(GiB)": 34.88, "step": 53130, "train_speed(iter/s)": 0.411515 }, { "acc": 0.90935097, "epoch": 1.4386862697316762, "grad_norm": 8.030903816223145, "learning_rate": 7.599754143634558e-06, "loss": 0.46473589, "memory(GiB)": 34.88, "step": 53135, "train_speed(iter/s)": 0.411517 }, { "acc": 0.86085072, "epoch": 1.4388216500148918, "grad_norm": 7.118036270141602, "learning_rate": 7.599276133852752e-06, "loss": 0.77651033, "memory(GiB)": 34.88, "step": 53140, "train_speed(iter/s)": 0.411519 }, { "acc": 0.87336359, "epoch": 1.4389570302981074, "grad_norm": 8.065838813781738, "learning_rate": 7.598798091516168e-06, "loss": 0.65813479, "memory(GiB)": 34.88, "step": 53145, "train_speed(iter/s)": 0.411521 }, { "acc": 0.89982471, "epoch": 1.439092410581323, "grad_norm": 8.88725757598877, "learning_rate": 7.598320016630794e-06, "loss": 0.54043908, "memory(GiB)": 34.88, "step": 53150, "train_speed(iter/s)": 0.411524 }, { "acc": 0.89444237, "epoch": 1.4392277908645386, "grad_norm": 7.763824462890625, "learning_rate": 7.597841909202621e-06, "loss": 0.63836393, "memory(GiB)": 34.88, "step": 53155, "train_speed(iter/s)": 0.411525 }, { "acc": 0.89574327, "epoch": 1.439363171147754, "grad_norm": 13.6195650100708, "learning_rate": 7.5973637692376364e-06, "loss": 0.52275295, "memory(GiB)": 34.88, "step": 53160, "train_speed(iter/s)": 0.411527 }, { "acc": 0.90372028, "epoch": 1.4394985514309697, "grad_norm": 6.811896800994873, "learning_rate": 7.596885596741832e-06, "loss": 0.51108999, "memory(GiB)": 34.88, "step": 53165, "train_speed(iter/s)": 0.411529 }, { "acc": 0.88299227, "epoch": 1.439633931714185, "grad_norm": 10.745345115661621, "learning_rate": 7.596407391721194e-06, "loss": 0.72751317, "memory(GiB)": 34.88, "step": 53170, "train_speed(iter/s)": 0.411531 }, { "acc": 0.89304914, "epoch": 1.4397693119974007, "grad_norm": 8.818476676940918, "learning_rate": 7.5959291541817174e-06, "loss": 0.60347834, "memory(GiB)": 34.88, "step": 53175, "train_speed(iter/s)": 0.411534 }, { "acc": 0.92182236, "epoch": 1.4399046922806162, "grad_norm": 6.234923362731934, "learning_rate": 7.595450884129388e-06, "loss": 0.40943298, "memory(GiB)": 34.88, "step": 53180, "train_speed(iter/s)": 0.411536 }, { "acc": 0.90375423, "epoch": 1.4400400725638318, "grad_norm": 8.264444351196289, "learning_rate": 7.5949725815702e-06, "loss": 0.42112961, "memory(GiB)": 34.88, "step": 53185, "train_speed(iter/s)": 0.411538 }, { "acc": 0.90315704, "epoch": 1.4401754528470474, "grad_norm": 14.621318817138672, "learning_rate": 7.594494246510147e-06, "loss": 0.49780731, "memory(GiB)": 34.88, "step": 53190, "train_speed(iter/s)": 0.41154 }, { "acc": 0.90513649, "epoch": 1.440310833130263, "grad_norm": 9.263993263244629, "learning_rate": 7.594015878955217e-06, "loss": 0.51566062, "memory(GiB)": 34.88, "step": 53195, "train_speed(iter/s)": 0.411542 }, { "acc": 0.9067378, "epoch": 1.4404462134134786, "grad_norm": 13.342116355895996, "learning_rate": 7.593537478911403e-06, "loss": 0.50636449, "memory(GiB)": 34.88, "step": 53200, "train_speed(iter/s)": 0.411544 }, { "acc": 0.90033197, "epoch": 1.440581593696694, "grad_norm": 9.761874198913574, "learning_rate": 7.593059046384701e-06, "loss": 0.48042631, "memory(GiB)": 34.88, "step": 53205, "train_speed(iter/s)": 0.411546 }, { "acc": 0.91129951, "epoch": 1.4407169739799095, "grad_norm": 15.917963027954102, "learning_rate": 7.592580581381103e-06, "loss": 0.48262854, "memory(GiB)": 34.88, "step": 53210, "train_speed(iter/s)": 0.411548 }, { "acc": 0.89049339, "epoch": 1.440852354263125, "grad_norm": 27.7109317779541, "learning_rate": 7.592102083906602e-06, "loss": 0.61302471, "memory(GiB)": 34.88, "step": 53215, "train_speed(iter/s)": 0.411549 }, { "acc": 0.91106758, "epoch": 1.4409877345463407, "grad_norm": 7.701587200164795, "learning_rate": 7.591623553967191e-06, "loss": 0.50251436, "memory(GiB)": 34.88, "step": 53220, "train_speed(iter/s)": 0.411551 }, { "acc": 0.92327175, "epoch": 1.4411231148295562, "grad_norm": 5.836732864379883, "learning_rate": 7.5911449915688685e-06, "loss": 0.38030224, "memory(GiB)": 34.88, "step": 53225, "train_speed(iter/s)": 0.411553 }, { "acc": 0.89796801, "epoch": 1.4412584951127718, "grad_norm": 8.414419174194336, "learning_rate": 7.590666396717624e-06, "loss": 0.5227375, "memory(GiB)": 34.88, "step": 53230, "train_speed(iter/s)": 0.411555 }, { "acc": 0.9013361, "epoch": 1.4413938753959874, "grad_norm": 6.830255031585693, "learning_rate": 7.590187769419458e-06, "loss": 0.55169525, "memory(GiB)": 34.88, "step": 53235, "train_speed(iter/s)": 0.411557 }, { "acc": 0.88743649, "epoch": 1.4415292556792028, "grad_norm": 7.938295364379883, "learning_rate": 7.5897091096803625e-06, "loss": 0.66640763, "memory(GiB)": 34.88, "step": 53240, "train_speed(iter/s)": 0.411559 }, { "acc": 0.88119202, "epoch": 1.4416646359624186, "grad_norm": 5.719595909118652, "learning_rate": 7.589230417506337e-06, "loss": 0.64083185, "memory(GiB)": 34.88, "step": 53245, "train_speed(iter/s)": 0.411561 }, { "acc": 0.90463142, "epoch": 1.441800016245634, "grad_norm": 5.705687046051025, "learning_rate": 7.588751692903375e-06, "loss": 0.4521596, "memory(GiB)": 34.88, "step": 53250, "train_speed(iter/s)": 0.411563 }, { "acc": 0.91077366, "epoch": 1.4419353965288495, "grad_norm": 6.895359039306641, "learning_rate": 7.588272935877473e-06, "loss": 0.45739355, "memory(GiB)": 34.88, "step": 53255, "train_speed(iter/s)": 0.411565 }, { "acc": 0.89481106, "epoch": 1.442070776812065, "grad_norm": 8.016252517700195, "learning_rate": 7.587794146434633e-06, "loss": 0.56207256, "memory(GiB)": 34.88, "step": 53260, "train_speed(iter/s)": 0.411567 }, { "acc": 0.88665905, "epoch": 1.4422061570952807, "grad_norm": 8.843790054321289, "learning_rate": 7.587315324580847e-06, "loss": 0.59718103, "memory(GiB)": 34.88, "step": 53265, "train_speed(iter/s)": 0.411569 }, { "acc": 0.91756535, "epoch": 1.4423415373784962, "grad_norm": 3.411346912384033, "learning_rate": 7.586836470322118e-06, "loss": 0.39309697, "memory(GiB)": 34.88, "step": 53270, "train_speed(iter/s)": 0.411571 }, { "acc": 0.89920988, "epoch": 1.4424769176617118, "grad_norm": 18.72234535217285, "learning_rate": 7.586357583664442e-06, "loss": 0.5747705, "memory(GiB)": 34.88, "step": 53275, "train_speed(iter/s)": 0.411572 }, { "acc": 0.88245201, "epoch": 1.4426122979449274, "grad_norm": 18.970693588256836, "learning_rate": 7.585878664613818e-06, "loss": 0.64685793, "memory(GiB)": 34.88, "step": 53280, "train_speed(iter/s)": 0.411574 }, { "acc": 0.88880825, "epoch": 1.4427476782281428, "grad_norm": 11.93909740447998, "learning_rate": 7.585399713176246e-06, "loss": 0.64338312, "memory(GiB)": 34.88, "step": 53285, "train_speed(iter/s)": 0.411576 }, { "acc": 0.89268103, "epoch": 1.4428830585113583, "grad_norm": 11.175980567932129, "learning_rate": 7.584920729357725e-06, "loss": 0.59847145, "memory(GiB)": 34.88, "step": 53290, "train_speed(iter/s)": 0.411578 }, { "acc": 0.90696859, "epoch": 1.443018438794574, "grad_norm": 5.6080474853515625, "learning_rate": 7.584441713164258e-06, "loss": 0.45042377, "memory(GiB)": 34.88, "step": 53295, "train_speed(iter/s)": 0.41158 }, { "acc": 0.90254602, "epoch": 1.4431538190777895, "grad_norm": 14.532597541809082, "learning_rate": 7.583962664601843e-06, "loss": 0.53746576, "memory(GiB)": 34.88, "step": 53300, "train_speed(iter/s)": 0.411582 }, { "acc": 0.90346794, "epoch": 1.443289199361005, "grad_norm": 9.278915405273438, "learning_rate": 7.583483583676482e-06, "loss": 0.51758285, "memory(GiB)": 34.88, "step": 53305, "train_speed(iter/s)": 0.411584 }, { "acc": 0.88952656, "epoch": 1.4434245796442207, "grad_norm": 5.83211088180542, "learning_rate": 7.583004470394175e-06, "loss": 0.51657438, "memory(GiB)": 34.88, "step": 53310, "train_speed(iter/s)": 0.411586 }, { "acc": 0.88663101, "epoch": 1.4435599599274362, "grad_norm": 10.386276245117188, "learning_rate": 7.582525324760927e-06, "loss": 0.7450036, "memory(GiB)": 34.88, "step": 53315, "train_speed(iter/s)": 0.411589 }, { "acc": 0.89539738, "epoch": 1.4436953402106516, "grad_norm": 7.620725154876709, "learning_rate": 7.5820461467827346e-06, "loss": 0.54361124, "memory(GiB)": 34.88, "step": 53320, "train_speed(iter/s)": 0.41159 }, { "acc": 0.88392735, "epoch": 1.4438307204938674, "grad_norm": 6.774762153625488, "learning_rate": 7.5815669364656054e-06, "loss": 0.60199409, "memory(GiB)": 34.88, "step": 53325, "train_speed(iter/s)": 0.411593 }, { "acc": 0.90058985, "epoch": 1.4439661007770828, "grad_norm": 6.232203960418701, "learning_rate": 7.581087693815543e-06, "loss": 0.4943572, "memory(GiB)": 34.88, "step": 53330, "train_speed(iter/s)": 0.411594 }, { "acc": 0.89278755, "epoch": 1.4441014810602983, "grad_norm": 13.41272258758545, "learning_rate": 7.580608418838546e-06, "loss": 0.53470621, "memory(GiB)": 34.88, "step": 53335, "train_speed(iter/s)": 0.411596 }, { "acc": 0.87182322, "epoch": 1.444236861343514, "grad_norm": 9.32983684539795, "learning_rate": 7.580129111540624e-06, "loss": 0.63652916, "memory(GiB)": 34.88, "step": 53340, "train_speed(iter/s)": 0.411598 }, { "acc": 0.92460299, "epoch": 1.4443722416267295, "grad_norm": 10.364066123962402, "learning_rate": 7.579649771927778e-06, "loss": 0.40483341, "memory(GiB)": 34.88, "step": 53345, "train_speed(iter/s)": 0.4116 }, { "acc": 0.90326643, "epoch": 1.444507621909945, "grad_norm": 7.266246318817139, "learning_rate": 7.579170400006011e-06, "loss": 0.47968273, "memory(GiB)": 34.88, "step": 53350, "train_speed(iter/s)": 0.411602 }, { "acc": 0.8870451, "epoch": 1.4446430021931607, "grad_norm": 8.243502616882324, "learning_rate": 7.578690995781332e-06, "loss": 0.65339479, "memory(GiB)": 34.88, "step": 53355, "train_speed(iter/s)": 0.411605 }, { "acc": 0.90192442, "epoch": 1.4447783824763762, "grad_norm": 10.755391120910645, "learning_rate": 7.5782115592597425e-06, "loss": 0.60100551, "memory(GiB)": 34.88, "step": 53360, "train_speed(iter/s)": 0.411607 }, { "acc": 0.91059017, "epoch": 1.4449137627595916, "grad_norm": 5.823614597320557, "learning_rate": 7.5777320904472525e-06, "loss": 0.4390357, "memory(GiB)": 34.88, "step": 53365, "train_speed(iter/s)": 0.411609 }, { "acc": 0.87925644, "epoch": 1.4450491430428072, "grad_norm": 8.094022750854492, "learning_rate": 7.5772525893498646e-06, "loss": 0.64189367, "memory(GiB)": 34.88, "step": 53370, "train_speed(iter/s)": 0.411611 }, { "acc": 0.91647472, "epoch": 1.4451845233260228, "grad_norm": 6.735677719116211, "learning_rate": 7.57677305597359e-06, "loss": 0.40659022, "memory(GiB)": 34.88, "step": 53375, "train_speed(iter/s)": 0.411612 }, { "acc": 0.90389977, "epoch": 1.4453199036092383, "grad_norm": 5.347715377807617, "learning_rate": 7.57629349032443e-06, "loss": 0.54576535, "memory(GiB)": 34.88, "step": 53380, "train_speed(iter/s)": 0.411615 }, { "acc": 0.89576836, "epoch": 1.445455283892454, "grad_norm": 8.448747634887695, "learning_rate": 7.575813892408399e-06, "loss": 0.55626454, "memory(GiB)": 34.88, "step": 53385, "train_speed(iter/s)": 0.411617 }, { "acc": 0.90566845, "epoch": 1.4455906641756695, "grad_norm": 7.2416768074035645, "learning_rate": 7.575334262231498e-06, "loss": 0.48807216, "memory(GiB)": 34.88, "step": 53390, "train_speed(iter/s)": 0.411619 }, { "acc": 0.88441296, "epoch": 1.445726044458885, "grad_norm": 9.87362003326416, "learning_rate": 7.574854599799739e-06, "loss": 0.72027884, "memory(GiB)": 34.88, "step": 53395, "train_speed(iter/s)": 0.411621 }, { "acc": 0.89413929, "epoch": 1.4458614247421004, "grad_norm": 19.638809204101562, "learning_rate": 7.574374905119128e-06, "loss": 0.56314993, "memory(GiB)": 34.88, "step": 53400, "train_speed(iter/s)": 0.411623 }, { "acc": 0.91089687, "epoch": 1.4459968050253162, "grad_norm": 7.188549518585205, "learning_rate": 7.573895178195678e-06, "loss": 0.45824862, "memory(GiB)": 34.88, "step": 53405, "train_speed(iter/s)": 0.411625 }, { "acc": 0.88758869, "epoch": 1.4461321853085316, "grad_norm": 5.36644172668457, "learning_rate": 7.573415419035397e-06, "loss": 0.67278619, "memory(GiB)": 34.88, "step": 53410, "train_speed(iter/s)": 0.411627 }, { "acc": 0.90789547, "epoch": 1.4462675655917472, "grad_norm": 7.023416042327881, "learning_rate": 7.572935627644294e-06, "loss": 0.47049332, "memory(GiB)": 34.88, "step": 53415, "train_speed(iter/s)": 0.411629 }, { "acc": 0.87791996, "epoch": 1.4464029458749628, "grad_norm": 12.87610912322998, "learning_rate": 7.57245580402838e-06, "loss": 0.65836267, "memory(GiB)": 34.88, "step": 53420, "train_speed(iter/s)": 0.411631 }, { "acc": 0.89927502, "epoch": 1.4465383261581783, "grad_norm": 6.0234150886535645, "learning_rate": 7.571975948193667e-06, "loss": 0.56814394, "memory(GiB)": 34.88, "step": 53425, "train_speed(iter/s)": 0.411633 }, { "acc": 0.90349121, "epoch": 1.446673706441394, "grad_norm": 7.093152046203613, "learning_rate": 7.571496060146164e-06, "loss": 0.46870637, "memory(GiB)": 34.88, "step": 53430, "train_speed(iter/s)": 0.411635 }, { "acc": 0.89908371, "epoch": 1.4468090867246095, "grad_norm": 6.92300271987915, "learning_rate": 7.571016139891881e-06, "loss": 0.61300573, "memory(GiB)": 34.88, "step": 53435, "train_speed(iter/s)": 0.411637 }, { "acc": 0.8898428, "epoch": 1.446944467007825, "grad_norm": 6.9764838218688965, "learning_rate": 7.570536187436834e-06, "loss": 0.52814398, "memory(GiB)": 34.88, "step": 53440, "train_speed(iter/s)": 0.411639 }, { "acc": 0.89348202, "epoch": 1.4470798472910404, "grad_norm": 6.113006114959717, "learning_rate": 7.570056202787035e-06, "loss": 0.53640633, "memory(GiB)": 34.88, "step": 53445, "train_speed(iter/s)": 0.411641 }, { "acc": 0.88631926, "epoch": 1.447215227574256, "grad_norm": 8.158334732055664, "learning_rate": 7.569576185948494e-06, "loss": 0.68834362, "memory(GiB)": 34.88, "step": 53450, "train_speed(iter/s)": 0.411643 }, { "acc": 0.89444942, "epoch": 1.4473506078574716, "grad_norm": 9.367218017578125, "learning_rate": 7.569096136927226e-06, "loss": 0.61081924, "memory(GiB)": 34.88, "step": 53455, "train_speed(iter/s)": 0.411645 }, { "acc": 0.90758352, "epoch": 1.4474859881406872, "grad_norm": 14.564066886901855, "learning_rate": 7.568616055729243e-06, "loss": 0.54668365, "memory(GiB)": 34.88, "step": 53460, "train_speed(iter/s)": 0.411647 }, { "acc": 0.91607046, "epoch": 1.4476213684239028, "grad_norm": 8.634916305541992, "learning_rate": 7.568135942360563e-06, "loss": 0.42551813, "memory(GiB)": 34.88, "step": 53465, "train_speed(iter/s)": 0.411649 }, { "acc": 0.86976881, "epoch": 1.4477567487071183, "grad_norm": 13.446002960205078, "learning_rate": 7.567655796827196e-06, "loss": 0.65093598, "memory(GiB)": 34.88, "step": 53470, "train_speed(iter/s)": 0.411651 }, { "acc": 0.89574966, "epoch": 1.447892128990334, "grad_norm": 10.779041290283203, "learning_rate": 7.567175619135157e-06, "loss": 0.56394315, "memory(GiB)": 34.88, "step": 53475, "train_speed(iter/s)": 0.411653 }, { "acc": 0.89709549, "epoch": 1.4480275092735493, "grad_norm": 10.085943222045898, "learning_rate": 7.566695409290463e-06, "loss": 0.64259458, "memory(GiB)": 34.88, "step": 53480, "train_speed(iter/s)": 0.411655 }, { "acc": 0.89541512, "epoch": 1.448162889556765, "grad_norm": 6.012624740600586, "learning_rate": 7.566215167299129e-06, "loss": 0.49446316, "memory(GiB)": 34.88, "step": 53485, "train_speed(iter/s)": 0.411657 }, { "acc": 0.89077358, "epoch": 1.4482982698399804, "grad_norm": 11.527748107910156, "learning_rate": 7.565734893167172e-06, "loss": 0.58058643, "memory(GiB)": 34.88, "step": 53490, "train_speed(iter/s)": 0.411659 }, { "acc": 0.91602802, "epoch": 1.448433650123196, "grad_norm": 9.854727745056152, "learning_rate": 7.565254586900606e-06, "loss": 0.45221777, "memory(GiB)": 34.88, "step": 53495, "train_speed(iter/s)": 0.411661 }, { "acc": 0.90593157, "epoch": 1.4485690304064116, "grad_norm": 10.295578002929688, "learning_rate": 7.564774248505448e-06, "loss": 0.49946337, "memory(GiB)": 34.88, "step": 53500, "train_speed(iter/s)": 0.411663 }, { "acc": 0.90792007, "epoch": 1.4487044106896272, "grad_norm": 12.7854585647583, "learning_rate": 7.564293877987717e-06, "loss": 0.5196734, "memory(GiB)": 34.88, "step": 53505, "train_speed(iter/s)": 0.411665 }, { "acc": 0.91123924, "epoch": 1.4488397909728428, "grad_norm": 9.100054740905762, "learning_rate": 7.563813475353431e-06, "loss": 0.46247768, "memory(GiB)": 34.88, "step": 53510, "train_speed(iter/s)": 0.411667 }, { "acc": 0.90171299, "epoch": 1.4489751712560583, "grad_norm": 6.841635704040527, "learning_rate": 7.563333040608607e-06, "loss": 0.54912276, "memory(GiB)": 34.88, "step": 53515, "train_speed(iter/s)": 0.411669 }, { "acc": 0.90880833, "epoch": 1.449110551539274, "grad_norm": 7.153518199920654, "learning_rate": 7.5628525737592615e-06, "loss": 0.48739305, "memory(GiB)": 34.88, "step": 53520, "train_speed(iter/s)": 0.411672 }, { "acc": 0.90404606, "epoch": 1.4492459318224893, "grad_norm": 10.510140419006348, "learning_rate": 7.562372074811416e-06, "loss": 0.5172401, "memory(GiB)": 34.88, "step": 53525, "train_speed(iter/s)": 0.411674 }, { "acc": 0.88259602, "epoch": 1.4493813121057049, "grad_norm": 12.745750427246094, "learning_rate": 7.561891543771088e-06, "loss": 0.66551018, "memory(GiB)": 34.88, "step": 53530, "train_speed(iter/s)": 0.411676 }, { "acc": 0.86777544, "epoch": 1.4495166923889204, "grad_norm": 6.580905914306641, "learning_rate": 7.561410980644298e-06, "loss": 0.73259773, "memory(GiB)": 34.88, "step": 53535, "train_speed(iter/s)": 0.411678 }, { "acc": 0.88885984, "epoch": 1.449652072672136, "grad_norm": 7.346602916717529, "learning_rate": 7.560930385437068e-06, "loss": 0.60920582, "memory(GiB)": 34.88, "step": 53540, "train_speed(iter/s)": 0.41168 }, { "acc": 0.89507818, "epoch": 1.4497874529553516, "grad_norm": 11.429417610168457, "learning_rate": 7.5604497581554135e-06, "loss": 0.54578967, "memory(GiB)": 34.88, "step": 53545, "train_speed(iter/s)": 0.411682 }, { "acc": 0.90796432, "epoch": 1.4499228332385672, "grad_norm": 8.195379257202148, "learning_rate": 7.559969098805359e-06, "loss": 0.49346313, "memory(GiB)": 34.88, "step": 53550, "train_speed(iter/s)": 0.411684 }, { "acc": 0.86941252, "epoch": 1.4500582135217828, "grad_norm": 10.821879386901855, "learning_rate": 7.559488407392924e-06, "loss": 0.78947411, "memory(GiB)": 34.88, "step": 53555, "train_speed(iter/s)": 0.411686 }, { "acc": 0.88779182, "epoch": 1.4501935938049981, "grad_norm": 5.92238187789917, "learning_rate": 7.559007683924132e-06, "loss": 0.6505281, "memory(GiB)": 34.88, "step": 53560, "train_speed(iter/s)": 0.411688 }, { "acc": 0.89357395, "epoch": 1.450328974088214, "grad_norm": 5.908132076263428, "learning_rate": 7.558526928405002e-06, "loss": 0.54281063, "memory(GiB)": 34.88, "step": 53565, "train_speed(iter/s)": 0.411691 }, { "acc": 0.89047337, "epoch": 1.4504643543714293, "grad_norm": 9.905888557434082, "learning_rate": 7.55804614084156e-06, "loss": 0.5715806, "memory(GiB)": 34.88, "step": 53570, "train_speed(iter/s)": 0.411693 }, { "acc": 0.91178188, "epoch": 1.4505997346546449, "grad_norm": 2.0053277015686035, "learning_rate": 7.557565321239826e-06, "loss": 0.4069056, "memory(GiB)": 34.88, "step": 53575, "train_speed(iter/s)": 0.411695 }, { "acc": 0.88400497, "epoch": 1.4507351149378604, "grad_norm": 24.20726203918457, "learning_rate": 7.557084469605825e-06, "loss": 0.69727497, "memory(GiB)": 34.88, "step": 53580, "train_speed(iter/s)": 0.411696 }, { "acc": 0.86459208, "epoch": 1.450870495221076, "grad_norm": 11.670368194580078, "learning_rate": 7.556603585945579e-06, "loss": 0.89452763, "memory(GiB)": 34.88, "step": 53585, "train_speed(iter/s)": 0.411699 }, { "acc": 0.91324396, "epoch": 1.4510058755042916, "grad_norm": 16.872840881347656, "learning_rate": 7.556122670265111e-06, "loss": 0.48315005, "memory(GiB)": 34.88, "step": 53590, "train_speed(iter/s)": 0.411701 }, { "acc": 0.90830126, "epoch": 1.4511412557875072, "grad_norm": 4.836723804473877, "learning_rate": 7.555641722570453e-06, "loss": 0.4723361, "memory(GiB)": 34.88, "step": 53595, "train_speed(iter/s)": 0.411703 }, { "acc": 0.903022, "epoch": 1.4512766360707228, "grad_norm": 12.618559837341309, "learning_rate": 7.55516074286762e-06, "loss": 0.49423561, "memory(GiB)": 34.88, "step": 53600, "train_speed(iter/s)": 0.411705 }, { "acc": 0.91980629, "epoch": 1.4514120163539381, "grad_norm": 8.989669799804688, "learning_rate": 7.5546797311626415e-06, "loss": 0.38598022, "memory(GiB)": 34.88, "step": 53605, "train_speed(iter/s)": 0.411707 }, { "acc": 0.905443, "epoch": 1.4515473966371537, "grad_norm": 5.714386463165283, "learning_rate": 7.554198687461543e-06, "loss": 0.54060273, "memory(GiB)": 34.88, "step": 53610, "train_speed(iter/s)": 0.411708 }, { "acc": 0.89359779, "epoch": 1.4516827769203693, "grad_norm": 10.757648468017578, "learning_rate": 7.55371761177035e-06, "loss": 0.54156761, "memory(GiB)": 34.88, "step": 53615, "train_speed(iter/s)": 0.41171 }, { "acc": 0.9166213, "epoch": 1.4518181572035849, "grad_norm": 7.8280839920043945, "learning_rate": 7.553236504095091e-06, "loss": 0.42758374, "memory(GiB)": 34.88, "step": 53620, "train_speed(iter/s)": 0.411712 }, { "acc": 0.90013599, "epoch": 1.4519535374868004, "grad_norm": 8.172148704528809, "learning_rate": 7.5527553644417906e-06, "loss": 0.52128572, "memory(GiB)": 34.88, "step": 53625, "train_speed(iter/s)": 0.411714 }, { "acc": 0.89804115, "epoch": 1.452088917770016, "grad_norm": 6.600294589996338, "learning_rate": 7.5522741928164755e-06, "loss": 0.61038122, "memory(GiB)": 34.88, "step": 53630, "train_speed(iter/s)": 0.411716 }, { "acc": 0.9048275, "epoch": 1.4522242980532316, "grad_norm": 10.60817813873291, "learning_rate": 7.551792989225176e-06, "loss": 0.56288242, "memory(GiB)": 34.88, "step": 53635, "train_speed(iter/s)": 0.411718 }, { "acc": 0.88931761, "epoch": 1.452359678336447, "grad_norm": 3.9535653591156006, "learning_rate": 7.551311753673918e-06, "loss": 0.53791752, "memory(GiB)": 34.88, "step": 53640, "train_speed(iter/s)": 0.41172 }, { "acc": 0.89738302, "epoch": 1.4524950586196628, "grad_norm": 8.51976490020752, "learning_rate": 7.5508304861687286e-06, "loss": 0.53124657, "memory(GiB)": 34.88, "step": 53645, "train_speed(iter/s)": 0.411722 }, { "acc": 0.90544796, "epoch": 1.4526304389028781, "grad_norm": 6.335030555725098, "learning_rate": 7.550349186715642e-06, "loss": 0.48840909, "memory(GiB)": 34.88, "step": 53650, "train_speed(iter/s)": 0.411724 }, { "acc": 0.91393557, "epoch": 1.4527658191860937, "grad_norm": 5.592706203460693, "learning_rate": 7.549867855320679e-06, "loss": 0.51699791, "memory(GiB)": 34.88, "step": 53655, "train_speed(iter/s)": 0.411726 }, { "acc": 0.92378902, "epoch": 1.4529011994693093, "grad_norm": 6.363797187805176, "learning_rate": 7.549386491989879e-06, "loss": 0.32455778, "memory(GiB)": 34.88, "step": 53660, "train_speed(iter/s)": 0.411728 }, { "acc": 0.8824213, "epoch": 1.4530365797525249, "grad_norm": 26.003379821777344, "learning_rate": 7.548905096729265e-06, "loss": 0.62289076, "memory(GiB)": 34.88, "step": 53665, "train_speed(iter/s)": 0.41173 }, { "acc": 0.8947298, "epoch": 1.4531719600357405, "grad_norm": 12.717823028564453, "learning_rate": 7.548423669544867e-06, "loss": 0.52633672, "memory(GiB)": 34.88, "step": 53670, "train_speed(iter/s)": 0.411732 }, { "acc": 0.89266186, "epoch": 1.4533073403189558, "grad_norm": 8.728055000305176, "learning_rate": 7.5479422104427236e-06, "loss": 0.60220175, "memory(GiB)": 34.88, "step": 53675, "train_speed(iter/s)": 0.411734 }, { "acc": 0.90350885, "epoch": 1.4534427206021716, "grad_norm": 10.734338760375977, "learning_rate": 7.5474607194288575e-06, "loss": 0.59221077, "memory(GiB)": 34.88, "step": 53680, "train_speed(iter/s)": 0.411736 }, { "acc": 0.90620766, "epoch": 1.453578100885387, "grad_norm": 9.07615852355957, "learning_rate": 7.546979196509304e-06, "loss": 0.50915108, "memory(GiB)": 34.88, "step": 53685, "train_speed(iter/s)": 0.411738 }, { "acc": 0.90098419, "epoch": 1.4537134811686026, "grad_norm": 31.583141326904297, "learning_rate": 7.5464976416900946e-06, "loss": 0.49919796, "memory(GiB)": 34.88, "step": 53690, "train_speed(iter/s)": 0.41174 }, { "acc": 0.9183116, "epoch": 1.4538488614518181, "grad_norm": 6.699953556060791, "learning_rate": 7.546016054977263e-06, "loss": 0.39725409, "memory(GiB)": 34.88, "step": 53695, "train_speed(iter/s)": 0.411742 }, { "acc": 0.89232149, "epoch": 1.4539842417350337, "grad_norm": 10.161090850830078, "learning_rate": 7.54553443637684e-06, "loss": 0.54171886, "memory(GiB)": 34.88, "step": 53700, "train_speed(iter/s)": 0.411744 }, { "acc": 0.87746878, "epoch": 1.4541196220182493, "grad_norm": 20.707597732543945, "learning_rate": 7.54505278589486e-06, "loss": 0.68657389, "memory(GiB)": 34.88, "step": 53705, "train_speed(iter/s)": 0.411746 }, { "acc": 0.91347733, "epoch": 1.4542550023014649, "grad_norm": 6.930257320404053, "learning_rate": 7.544571103537357e-06, "loss": 0.47888122, "memory(GiB)": 34.88, "step": 53710, "train_speed(iter/s)": 0.411749 }, { "acc": 0.89221001, "epoch": 1.4543903825846805, "grad_norm": 12.260967254638672, "learning_rate": 7.544089389310363e-06, "loss": 0.68934169, "memory(GiB)": 34.88, "step": 53715, "train_speed(iter/s)": 0.411751 }, { "acc": 0.89332428, "epoch": 1.4545257628678958, "grad_norm": 8.189940452575684, "learning_rate": 7.543607643219917e-06, "loss": 0.60333815, "memory(GiB)": 34.88, "step": 53720, "train_speed(iter/s)": 0.411753 }, { "acc": 0.90194588, "epoch": 1.4546611431511114, "grad_norm": 11.963991165161133, "learning_rate": 7.543125865272047e-06, "loss": 0.54930682, "memory(GiB)": 34.88, "step": 53725, "train_speed(iter/s)": 0.411755 }, { "acc": 0.8930769, "epoch": 1.454796523434327, "grad_norm": 10.50968074798584, "learning_rate": 7.542644055472792e-06, "loss": 0.61054811, "memory(GiB)": 34.88, "step": 53730, "train_speed(iter/s)": 0.411757 }, { "acc": 0.89187937, "epoch": 1.4549319037175426, "grad_norm": 8.34238338470459, "learning_rate": 7.542162213828188e-06, "loss": 0.61502352, "memory(GiB)": 34.88, "step": 53735, "train_speed(iter/s)": 0.411759 }, { "acc": 0.92091732, "epoch": 1.4550672840007581, "grad_norm": 10.215394973754883, "learning_rate": 7.541680340344269e-06, "loss": 0.40581779, "memory(GiB)": 34.88, "step": 53740, "train_speed(iter/s)": 0.411761 }, { "acc": 0.88006964, "epoch": 1.4552026642839737, "grad_norm": 9.856813430786133, "learning_rate": 7.541198435027075e-06, "loss": 0.6854352, "memory(GiB)": 34.88, "step": 53745, "train_speed(iter/s)": 0.411763 }, { "acc": 0.90276937, "epoch": 1.4553380445671893, "grad_norm": 28.60955047607422, "learning_rate": 7.540716497882641e-06, "loss": 0.52548771, "memory(GiB)": 34.88, "step": 53750, "train_speed(iter/s)": 0.411765 }, { "acc": 0.88904133, "epoch": 1.4554734248504047, "grad_norm": 5.265392303466797, "learning_rate": 7.540234528917002e-06, "loss": 0.51914835, "memory(GiB)": 34.88, "step": 53755, "train_speed(iter/s)": 0.411767 }, { "acc": 0.90204811, "epoch": 1.4556088051336205, "grad_norm": 9.338815689086914, "learning_rate": 7.5397525281361995e-06, "loss": 0.53650584, "memory(GiB)": 34.88, "step": 53760, "train_speed(iter/s)": 0.411769 }, { "acc": 0.88534164, "epoch": 1.4557441854168358, "grad_norm": 10.239725112915039, "learning_rate": 7.5392704955462695e-06, "loss": 0.58407431, "memory(GiB)": 34.88, "step": 53765, "train_speed(iter/s)": 0.411771 }, { "acc": 0.89300604, "epoch": 1.4558795657000514, "grad_norm": 9.09695053100586, "learning_rate": 7.5387884311532475e-06, "loss": 0.51364121, "memory(GiB)": 34.88, "step": 53770, "train_speed(iter/s)": 0.411773 }, { "acc": 0.89498119, "epoch": 1.456014945983267, "grad_norm": 3.8890905380249023, "learning_rate": 7.538306334963177e-06, "loss": 0.56750383, "memory(GiB)": 34.88, "step": 53775, "train_speed(iter/s)": 0.411775 }, { "acc": 0.89801359, "epoch": 1.4561503262664826, "grad_norm": 7.033112525939941, "learning_rate": 7.537824206982095e-06, "loss": 0.51825123, "memory(GiB)": 34.88, "step": 53780, "train_speed(iter/s)": 0.411777 }, { "acc": 0.91103163, "epoch": 1.4562857065496981, "grad_norm": 7.257516384124756, "learning_rate": 7.53734204721604e-06, "loss": 0.4886086, "memory(GiB)": 34.88, "step": 53785, "train_speed(iter/s)": 0.411779 }, { "acc": 0.90405712, "epoch": 1.4564210868329137, "grad_norm": 9.475709915161133, "learning_rate": 7.536859855671057e-06, "loss": 0.48377991, "memory(GiB)": 34.88, "step": 53790, "train_speed(iter/s)": 0.411781 }, { "acc": 0.88944216, "epoch": 1.4565564671161293, "grad_norm": 7.9767584800720215, "learning_rate": 7.53637763235318e-06, "loss": 0.57867422, "memory(GiB)": 34.88, "step": 53795, "train_speed(iter/s)": 0.411783 }, { "acc": 0.90246143, "epoch": 1.4566918473993447, "grad_norm": 9.807459831237793, "learning_rate": 7.535895377268454e-06, "loss": 0.56095839, "memory(GiB)": 34.88, "step": 53800, "train_speed(iter/s)": 0.411785 }, { "acc": 0.89913235, "epoch": 1.4568272276825602, "grad_norm": 17.25568962097168, "learning_rate": 7.53541309042292e-06, "loss": 0.56283207, "memory(GiB)": 34.88, "step": 53805, "train_speed(iter/s)": 0.411787 }, { "acc": 0.90757294, "epoch": 1.4569626079657758, "grad_norm": 11.577836036682129, "learning_rate": 7.534930771822615e-06, "loss": 0.5927865, "memory(GiB)": 34.88, "step": 53810, "train_speed(iter/s)": 0.411789 }, { "acc": 0.88951015, "epoch": 1.4570979882489914, "grad_norm": 13.02920913696289, "learning_rate": 7.534448421473588e-06, "loss": 0.61138897, "memory(GiB)": 34.88, "step": 53815, "train_speed(iter/s)": 0.411791 }, { "acc": 0.88704042, "epoch": 1.457233368532207, "grad_norm": 7.938584327697754, "learning_rate": 7.533966039381876e-06, "loss": 0.56657591, "memory(GiB)": 34.88, "step": 53820, "train_speed(iter/s)": 0.411792 }, { "acc": 0.88603115, "epoch": 1.4573687488154226, "grad_norm": 12.650092124938965, "learning_rate": 7.533483625553523e-06, "loss": 0.68151608, "memory(GiB)": 34.88, "step": 53825, "train_speed(iter/s)": 0.411794 }, { "acc": 0.89645309, "epoch": 1.4575041290986381, "grad_norm": 10.318384170532227, "learning_rate": 7.533001179994573e-06, "loss": 0.53534913, "memory(GiB)": 34.88, "step": 53830, "train_speed(iter/s)": 0.411796 }, { "acc": 0.90539169, "epoch": 1.4576395093818535, "grad_norm": 15.200130462646484, "learning_rate": 7.532518702711071e-06, "loss": 0.500385, "memory(GiB)": 34.88, "step": 53835, "train_speed(iter/s)": 0.411798 }, { "acc": 0.90453072, "epoch": 1.4577748896650693, "grad_norm": 9.699248313903809, "learning_rate": 7.5320361937090584e-06, "loss": 0.56195464, "memory(GiB)": 34.88, "step": 53840, "train_speed(iter/s)": 0.4118 }, { "acc": 0.89816351, "epoch": 1.4579102699482847, "grad_norm": 7.742361545562744, "learning_rate": 7.531553652994582e-06, "loss": 0.44077692, "memory(GiB)": 34.88, "step": 53845, "train_speed(iter/s)": 0.411802 }, { "acc": 0.89507637, "epoch": 1.4580456502315002, "grad_norm": 9.647834777832031, "learning_rate": 7.531071080573682e-06, "loss": 0.54861155, "memory(GiB)": 34.88, "step": 53850, "train_speed(iter/s)": 0.411803 }, { "acc": 0.88806772, "epoch": 1.4581810305147158, "grad_norm": 11.333841323852539, "learning_rate": 7.530588476452408e-06, "loss": 0.67129021, "memory(GiB)": 34.88, "step": 53855, "train_speed(iter/s)": 0.411805 }, { "acc": 0.9052124, "epoch": 1.4583164107979314, "grad_norm": 7.959852695465088, "learning_rate": 7.530105840636805e-06, "loss": 0.47214928, "memory(GiB)": 34.88, "step": 53860, "train_speed(iter/s)": 0.411807 }, { "acc": 0.91267719, "epoch": 1.458451791081147, "grad_norm": 13.476289749145508, "learning_rate": 7.529623173132916e-06, "loss": 0.47974262, "memory(GiB)": 34.88, "step": 53865, "train_speed(iter/s)": 0.411809 }, { "acc": 0.88779945, "epoch": 1.4585871713643626, "grad_norm": 25.66582679748535, "learning_rate": 7.5291404739467925e-06, "loss": 0.72357731, "memory(GiB)": 34.88, "step": 53870, "train_speed(iter/s)": 0.411811 }, { "acc": 0.90170021, "epoch": 1.4587225516475781, "grad_norm": 7.070113182067871, "learning_rate": 7.5286577430844764e-06, "loss": 0.52264051, "memory(GiB)": 34.88, "step": 53875, "train_speed(iter/s)": 0.411813 }, { "acc": 0.90735264, "epoch": 1.4588579319307935, "grad_norm": 9.66012191772461, "learning_rate": 7.528174980552018e-06, "loss": 0.52882056, "memory(GiB)": 34.88, "step": 53880, "train_speed(iter/s)": 0.411815 }, { "acc": 0.89314232, "epoch": 1.458993312214009, "grad_norm": 13.952474594116211, "learning_rate": 7.527692186355462e-06, "loss": 0.47803812, "memory(GiB)": 34.88, "step": 53885, "train_speed(iter/s)": 0.411817 }, { "acc": 0.88322048, "epoch": 1.4591286924972247, "grad_norm": 12.400924682617188, "learning_rate": 7.52720936050086e-06, "loss": 0.68762493, "memory(GiB)": 34.88, "step": 53890, "train_speed(iter/s)": 0.411819 }, { "acc": 0.88896122, "epoch": 1.4592640727804402, "grad_norm": 6.743652820587158, "learning_rate": 7.526726502994256e-06, "loss": 0.69394898, "memory(GiB)": 34.88, "step": 53895, "train_speed(iter/s)": 0.411821 }, { "acc": 0.90239811, "epoch": 1.4593994530636558, "grad_norm": 9.978599548339844, "learning_rate": 7.526243613841703e-06, "loss": 0.44330163, "memory(GiB)": 34.88, "step": 53900, "train_speed(iter/s)": 0.411823 }, { "acc": 0.85905876, "epoch": 1.4595348333468714, "grad_norm": 10.45702075958252, "learning_rate": 7.525760693049247e-06, "loss": 0.70285211, "memory(GiB)": 34.88, "step": 53905, "train_speed(iter/s)": 0.411825 }, { "acc": 0.91547775, "epoch": 1.459670213630087, "grad_norm": 4.705005168914795, "learning_rate": 7.5252777406229384e-06, "loss": 0.39495926, "memory(GiB)": 34.88, "step": 53910, "train_speed(iter/s)": 0.411827 }, { "acc": 0.9127634, "epoch": 1.4598055939133023, "grad_norm": 6.254936695098877, "learning_rate": 7.524794756568829e-06, "loss": 0.44371686, "memory(GiB)": 34.88, "step": 53915, "train_speed(iter/s)": 0.411829 }, { "acc": 0.87642717, "epoch": 1.4599409741965181, "grad_norm": 9.066853523254395, "learning_rate": 7.524311740892967e-06, "loss": 0.67872758, "memory(GiB)": 34.88, "step": 53920, "train_speed(iter/s)": 0.411831 }, { "acc": 0.8832159, "epoch": 1.4600763544797335, "grad_norm": 19.63222312927246, "learning_rate": 7.5238286936014015e-06, "loss": 0.63180227, "memory(GiB)": 34.88, "step": 53925, "train_speed(iter/s)": 0.411833 }, { "acc": 0.89440498, "epoch": 1.460211734762949, "grad_norm": 5.984654426574707, "learning_rate": 7.523345614700188e-06, "loss": 0.49929233, "memory(GiB)": 34.88, "step": 53930, "train_speed(iter/s)": 0.411835 }, { "acc": 0.89996548, "epoch": 1.4603471150461647, "grad_norm": 7.99041223526001, "learning_rate": 7.522862504195375e-06, "loss": 0.51751003, "memory(GiB)": 34.88, "step": 53935, "train_speed(iter/s)": 0.411837 }, { "acc": 0.87940845, "epoch": 1.4604824953293802, "grad_norm": 15.196958541870117, "learning_rate": 7.5223793620930166e-06, "loss": 0.63188009, "memory(GiB)": 34.88, "step": 53940, "train_speed(iter/s)": 0.411839 }, { "acc": 0.89286976, "epoch": 1.4606178756125958, "grad_norm": 9.915671348571777, "learning_rate": 7.521896188399162e-06, "loss": 0.54632888, "memory(GiB)": 34.88, "step": 53945, "train_speed(iter/s)": 0.411841 }, { "acc": 0.89135838, "epoch": 1.4607532558958114, "grad_norm": 8.251693725585938, "learning_rate": 7.521412983119866e-06, "loss": 0.54156704, "memory(GiB)": 34.88, "step": 53950, "train_speed(iter/s)": 0.411843 }, { "acc": 0.90411491, "epoch": 1.460888636179027, "grad_norm": 7.123885631561279, "learning_rate": 7.5209297462611805e-06, "loss": 0.50007739, "memory(GiB)": 34.88, "step": 53955, "train_speed(iter/s)": 0.411845 }, { "acc": 0.88829632, "epoch": 1.4610240164622423, "grad_norm": 11.849091529846191, "learning_rate": 7.520446477829162e-06, "loss": 0.59822841, "memory(GiB)": 34.88, "step": 53960, "train_speed(iter/s)": 0.411847 }, { "acc": 0.88829126, "epoch": 1.461159396745458, "grad_norm": 15.281429290771484, "learning_rate": 7.519963177829861e-06, "loss": 0.60873833, "memory(GiB)": 34.88, "step": 53965, "train_speed(iter/s)": 0.411849 }, { "acc": 0.89266434, "epoch": 1.4612947770286735, "grad_norm": 6.867181777954102, "learning_rate": 7.519479846269333e-06, "loss": 0.57823353, "memory(GiB)": 34.88, "step": 53970, "train_speed(iter/s)": 0.411851 }, { "acc": 0.90376854, "epoch": 1.461430157311889, "grad_norm": 6.8622307777404785, "learning_rate": 7.518996483153632e-06, "loss": 0.51436682, "memory(GiB)": 34.88, "step": 53975, "train_speed(iter/s)": 0.411853 }, { "acc": 0.90540133, "epoch": 1.4615655375951047, "grad_norm": 4.41752815246582, "learning_rate": 7.518513088488814e-06, "loss": 0.47047796, "memory(GiB)": 34.88, "step": 53980, "train_speed(iter/s)": 0.411855 }, { "acc": 0.9048111, "epoch": 1.4617009178783202, "grad_norm": 8.773761749267578, "learning_rate": 7.5180296622809355e-06, "loss": 0.56272831, "memory(GiB)": 34.88, "step": 53985, "train_speed(iter/s)": 0.411857 }, { "acc": 0.92282314, "epoch": 1.4618362981615358, "grad_norm": 12.531309127807617, "learning_rate": 7.517546204536048e-06, "loss": 0.41224689, "memory(GiB)": 34.88, "step": 53990, "train_speed(iter/s)": 0.411859 }, { "acc": 0.90632973, "epoch": 1.4619716784447512, "grad_norm": 14.958921432495117, "learning_rate": 7.5170627152602136e-06, "loss": 0.49772921, "memory(GiB)": 34.88, "step": 53995, "train_speed(iter/s)": 0.411861 }, { "acc": 0.88188972, "epoch": 1.462107058727967, "grad_norm": 7.38330602645874, "learning_rate": 7.516579194459486e-06, "loss": 0.64678211, "memory(GiB)": 34.88, "step": 54000, "train_speed(iter/s)": 0.411862 }, { "acc": 0.90470695, "epoch": 1.4622424390111823, "grad_norm": 7.886199474334717, "learning_rate": 7.516095642139921e-06, "loss": 0.48104925, "memory(GiB)": 34.88, "step": 54005, "train_speed(iter/s)": 0.411864 }, { "acc": 0.89454508, "epoch": 1.462377819294398, "grad_norm": 7.946580410003662, "learning_rate": 7.515612058307577e-06, "loss": 0.55527844, "memory(GiB)": 34.88, "step": 54010, "train_speed(iter/s)": 0.411866 }, { "acc": 0.91439371, "epoch": 1.4625131995776135, "grad_norm": 4.729345321655273, "learning_rate": 7.515128442968516e-06, "loss": 0.4776022, "memory(GiB)": 34.88, "step": 54015, "train_speed(iter/s)": 0.411868 }, { "acc": 0.89898863, "epoch": 1.462648579860829, "grad_norm": 8.4142484664917, "learning_rate": 7.514644796128789e-06, "loss": 0.48444109, "memory(GiB)": 34.88, "step": 54020, "train_speed(iter/s)": 0.41187 }, { "acc": 0.91042023, "epoch": 1.4627839601440447, "grad_norm": 8.519964218139648, "learning_rate": 7.51416111779446e-06, "loss": 0.47936893, "memory(GiB)": 34.88, "step": 54025, "train_speed(iter/s)": 0.411872 }, { "acc": 0.87739706, "epoch": 1.4629193404272602, "grad_norm": 13.253511428833008, "learning_rate": 7.5136774079715845e-06, "loss": 0.671031, "memory(GiB)": 34.88, "step": 54030, "train_speed(iter/s)": 0.411874 }, { "acc": 0.89631748, "epoch": 1.4630547207104758, "grad_norm": 12.630995750427246, "learning_rate": 7.5131936666662245e-06, "loss": 0.58431869, "memory(GiB)": 34.88, "step": 54035, "train_speed(iter/s)": 0.411875 }, { "acc": 0.90601845, "epoch": 1.4631901009936912, "grad_norm": 6.683009624481201, "learning_rate": 7.512709893884438e-06, "loss": 0.48681288, "memory(GiB)": 34.88, "step": 54040, "train_speed(iter/s)": 0.411878 }, { "acc": 0.90627632, "epoch": 1.4633254812769068, "grad_norm": 10.61587142944336, "learning_rate": 7.512226089632287e-06, "loss": 0.55944948, "memory(GiB)": 34.88, "step": 54045, "train_speed(iter/s)": 0.41188 }, { "acc": 0.90327845, "epoch": 1.4634608615601223, "grad_norm": 7.278033256530762, "learning_rate": 7.51174225391583e-06, "loss": 0.47971539, "memory(GiB)": 34.88, "step": 54050, "train_speed(iter/s)": 0.411882 }, { "acc": 0.89862251, "epoch": 1.463596241843338, "grad_norm": 13.98647403717041, "learning_rate": 7.511258386741133e-06, "loss": 0.58241329, "memory(GiB)": 34.88, "step": 54055, "train_speed(iter/s)": 0.411884 }, { "acc": 0.91386271, "epoch": 1.4637316221265535, "grad_norm": 9.900151252746582, "learning_rate": 7.5107744881142494e-06, "loss": 0.45106173, "memory(GiB)": 34.88, "step": 54060, "train_speed(iter/s)": 0.411886 }, { "acc": 0.91180515, "epoch": 1.463867002409769, "grad_norm": 12.853486061096191, "learning_rate": 7.510290558041248e-06, "loss": 0.45926285, "memory(GiB)": 34.88, "step": 54065, "train_speed(iter/s)": 0.411888 }, { "acc": 0.89311695, "epoch": 1.4640023826929847, "grad_norm": 13.539688110351562, "learning_rate": 7.5098065965281855e-06, "loss": 0.48940697, "memory(GiB)": 34.88, "step": 54070, "train_speed(iter/s)": 0.41189 }, { "acc": 0.88258362, "epoch": 1.4641377629762, "grad_norm": 14.129003524780273, "learning_rate": 7.5093226035811275e-06, "loss": 0.63167024, "memory(GiB)": 34.88, "step": 54075, "train_speed(iter/s)": 0.411892 }, { "acc": 0.91876278, "epoch": 1.4642731432594158, "grad_norm": 10.26771068572998, "learning_rate": 7.508838579206138e-06, "loss": 0.47819386, "memory(GiB)": 34.88, "step": 54080, "train_speed(iter/s)": 0.411894 }, { "acc": 0.88364182, "epoch": 1.4644085235426312, "grad_norm": 11.878923416137695, "learning_rate": 7.508354523409278e-06, "loss": 0.61584454, "memory(GiB)": 34.88, "step": 54085, "train_speed(iter/s)": 0.411896 }, { "acc": 0.89828167, "epoch": 1.4645439038258468, "grad_norm": 7.113682270050049, "learning_rate": 7.507870436196614e-06, "loss": 0.54232159, "memory(GiB)": 34.88, "step": 54090, "train_speed(iter/s)": 0.411898 }, { "acc": 0.8909483, "epoch": 1.4646792841090623, "grad_norm": 13.403264999389648, "learning_rate": 7.507386317574206e-06, "loss": 0.60815411, "memory(GiB)": 34.88, "step": 54095, "train_speed(iter/s)": 0.4119 }, { "acc": 0.90226011, "epoch": 1.464814664392278, "grad_norm": 8.44493579864502, "learning_rate": 7.506902167548121e-06, "loss": 0.47242217, "memory(GiB)": 34.88, "step": 54100, "train_speed(iter/s)": 0.411902 }, { "acc": 0.89393988, "epoch": 1.4649500446754935, "grad_norm": 14.310639381408691, "learning_rate": 7.5064179861244245e-06, "loss": 0.66413517, "memory(GiB)": 34.88, "step": 54105, "train_speed(iter/s)": 0.411904 }, { "acc": 0.8891614, "epoch": 1.465085424958709, "grad_norm": 6.504634857177734, "learning_rate": 7.505933773309182e-06, "loss": 0.63664837, "memory(GiB)": 34.88, "step": 54110, "train_speed(iter/s)": 0.411906 }, { "acc": 0.88396616, "epoch": 1.4652208052419247, "grad_norm": 6.5133891105651855, "learning_rate": 7.505449529108457e-06, "loss": 0.59750066, "memory(GiB)": 34.88, "step": 54115, "train_speed(iter/s)": 0.411908 }, { "acc": 0.91313171, "epoch": 1.46535618552514, "grad_norm": 3.3831865787506104, "learning_rate": 7.504965253528316e-06, "loss": 0.41842833, "memory(GiB)": 34.88, "step": 54120, "train_speed(iter/s)": 0.41191 }, { "acc": 0.92068729, "epoch": 1.4654915658083556, "grad_norm": 5.4044413566589355, "learning_rate": 7.504480946574829e-06, "loss": 0.41752133, "memory(GiB)": 34.88, "step": 54125, "train_speed(iter/s)": 0.411912 }, { "acc": 0.89154577, "epoch": 1.4656269460915712, "grad_norm": 4.849388122558594, "learning_rate": 7.503996608254058e-06, "loss": 0.69324794, "memory(GiB)": 34.88, "step": 54130, "train_speed(iter/s)": 0.411914 }, { "acc": 0.90024223, "epoch": 1.4657623263747868, "grad_norm": 7.449036598205566, "learning_rate": 7.503512238572075e-06, "loss": 0.54793119, "memory(GiB)": 34.88, "step": 54135, "train_speed(iter/s)": 0.411915 }, { "acc": 0.89444599, "epoch": 1.4658977066580023, "grad_norm": 12.969612121582031, "learning_rate": 7.503027837534944e-06, "loss": 0.57779722, "memory(GiB)": 34.88, "step": 54140, "train_speed(iter/s)": 0.411917 }, { "acc": 0.90911884, "epoch": 1.466033086941218, "grad_norm": 8.340503692626953, "learning_rate": 7.502543405148734e-06, "loss": 0.45111847, "memory(GiB)": 34.88, "step": 54145, "train_speed(iter/s)": 0.411919 }, { "acc": 0.90359154, "epoch": 1.4661684672244335, "grad_norm": 6.977083683013916, "learning_rate": 7.502058941419513e-06, "loss": 0.46580076, "memory(GiB)": 34.88, "step": 54150, "train_speed(iter/s)": 0.411921 }, { "acc": 0.9040884, "epoch": 1.4663038475076489, "grad_norm": 6.616682052612305, "learning_rate": 7.501574446353353e-06, "loss": 0.54034548, "memory(GiB)": 34.88, "step": 54155, "train_speed(iter/s)": 0.411922 }, { "acc": 0.90591564, "epoch": 1.4664392277908647, "grad_norm": 5.52769136428833, "learning_rate": 7.501089919956319e-06, "loss": 0.51649094, "memory(GiB)": 34.88, "step": 54160, "train_speed(iter/s)": 0.411924 }, { "acc": 0.8973032, "epoch": 1.46657460807408, "grad_norm": 3.1585893630981445, "learning_rate": 7.5006053622344845e-06, "loss": 0.58067932, "memory(GiB)": 34.88, "step": 54165, "train_speed(iter/s)": 0.411926 }, { "acc": 0.90435085, "epoch": 1.4667099883572956, "grad_norm": 13.11892318725586, "learning_rate": 7.500120773193917e-06, "loss": 0.5104022, "memory(GiB)": 34.88, "step": 54170, "train_speed(iter/s)": 0.411928 }, { "acc": 0.88225622, "epoch": 1.4668453686405112, "grad_norm": 6.960718631744385, "learning_rate": 7.499636152840686e-06, "loss": 0.64866743, "memory(GiB)": 34.88, "step": 54175, "train_speed(iter/s)": 0.41193 }, { "acc": 0.90160923, "epoch": 1.4669807489237268, "grad_norm": 6.4755754470825195, "learning_rate": 7.4991515011808655e-06, "loss": 0.55099301, "memory(GiB)": 34.88, "step": 54180, "train_speed(iter/s)": 0.411932 }, { "acc": 0.91276684, "epoch": 1.4671161292069423, "grad_norm": 6.803967475891113, "learning_rate": 7.498666818220525e-06, "loss": 0.529776, "memory(GiB)": 34.88, "step": 54185, "train_speed(iter/s)": 0.411934 }, { "acc": 0.89622154, "epoch": 1.467251509490158, "grad_norm": 5.096598148345947, "learning_rate": 7.498182103965736e-06, "loss": 0.4835124, "memory(GiB)": 34.88, "step": 54190, "train_speed(iter/s)": 0.411936 }, { "acc": 0.89300327, "epoch": 1.4673868897733735, "grad_norm": 6.8892035484313965, "learning_rate": 7.49769735842257e-06, "loss": 0.53693037, "memory(GiB)": 34.88, "step": 54195, "train_speed(iter/s)": 0.411939 }, { "acc": 0.89465771, "epoch": 1.4675222700565889, "grad_norm": 5.885646343231201, "learning_rate": 7.497212581597101e-06, "loss": 0.54810152, "memory(GiB)": 34.88, "step": 54200, "train_speed(iter/s)": 0.41194 }, { "acc": 0.88966532, "epoch": 1.4676576503398044, "grad_norm": 10.152697563171387, "learning_rate": 7.496727773495401e-06, "loss": 0.60043564, "memory(GiB)": 34.88, "step": 54205, "train_speed(iter/s)": 0.411942 }, { "acc": 0.88344479, "epoch": 1.46779303062302, "grad_norm": 7.602305889129639, "learning_rate": 7.496242934123541e-06, "loss": 0.57351789, "memory(GiB)": 34.88, "step": 54210, "train_speed(iter/s)": 0.411944 }, { "acc": 0.89806938, "epoch": 1.4679284109062356, "grad_norm": 15.873212814331055, "learning_rate": 7.495758063487599e-06, "loss": 0.54866953, "memory(GiB)": 34.88, "step": 54215, "train_speed(iter/s)": 0.411946 }, { "acc": 0.90318298, "epoch": 1.4680637911894512, "grad_norm": 11.590346336364746, "learning_rate": 7.495273161593644e-06, "loss": 0.57479677, "memory(GiB)": 34.88, "step": 54220, "train_speed(iter/s)": 0.411948 }, { "acc": 0.9046072, "epoch": 1.4681991714726668, "grad_norm": 12.511625289916992, "learning_rate": 7.494788228447757e-06, "loss": 0.58435688, "memory(GiB)": 34.88, "step": 54225, "train_speed(iter/s)": 0.41195 }, { "acc": 0.9257164, "epoch": 1.4683345517558823, "grad_norm": 8.449325561523438, "learning_rate": 7.494303264056006e-06, "loss": 0.42944546, "memory(GiB)": 34.88, "step": 54230, "train_speed(iter/s)": 0.411952 }, { "acc": 0.90063534, "epoch": 1.4684699320390977, "grad_norm": 10.221778869628906, "learning_rate": 7.49381826842447e-06, "loss": 0.62309132, "memory(GiB)": 34.88, "step": 54235, "train_speed(iter/s)": 0.411954 }, { "acc": 0.85101175, "epoch": 1.4686053123223135, "grad_norm": 17.291269302368164, "learning_rate": 7.493333241559222e-06, "loss": 0.78395305, "memory(GiB)": 34.88, "step": 54240, "train_speed(iter/s)": 0.411955 }, { "acc": 0.91111908, "epoch": 1.4687406926055289, "grad_norm": 7.798882961273193, "learning_rate": 7.4928481834663395e-06, "loss": 0.51191235, "memory(GiB)": 34.88, "step": 54245, "train_speed(iter/s)": 0.411957 }, { "acc": 0.91393967, "epoch": 1.4688760728887444, "grad_norm": 6.448139667510986, "learning_rate": 7.492363094151899e-06, "loss": 0.52758589, "memory(GiB)": 34.88, "step": 54250, "train_speed(iter/s)": 0.411959 }, { "acc": 0.9078105, "epoch": 1.46901145317196, "grad_norm": 10.20681095123291, "learning_rate": 7.491877973621976e-06, "loss": 0.50684361, "memory(GiB)": 34.88, "step": 54255, "train_speed(iter/s)": 0.411961 }, { "acc": 0.9030386, "epoch": 1.4691468334551756, "grad_norm": 6.872005462646484, "learning_rate": 7.491392821882649e-06, "loss": 0.52425213, "memory(GiB)": 34.88, "step": 54260, "train_speed(iter/s)": 0.411963 }, { "acc": 0.90654411, "epoch": 1.4692822137383912, "grad_norm": 5.8144917488098145, "learning_rate": 7.490907638939996e-06, "loss": 0.42632427, "memory(GiB)": 34.88, "step": 54265, "train_speed(iter/s)": 0.411965 }, { "acc": 0.90200424, "epoch": 1.4694175940216068, "grad_norm": 12.159603118896484, "learning_rate": 7.4904224248000916e-06, "loss": 0.50908303, "memory(GiB)": 34.88, "step": 54270, "train_speed(iter/s)": 0.411967 }, { "acc": 0.89205971, "epoch": 1.4695529743048223, "grad_norm": 10.459715843200684, "learning_rate": 7.489937179469017e-06, "loss": 0.69468007, "memory(GiB)": 34.88, "step": 54275, "train_speed(iter/s)": 0.411969 }, { "acc": 0.89876003, "epoch": 1.4696883545880377, "grad_norm": 6.1434807777404785, "learning_rate": 7.4894519029528505e-06, "loss": 0.5371501, "memory(GiB)": 34.88, "step": 54280, "train_speed(iter/s)": 0.411971 }, { "acc": 0.91476221, "epoch": 1.4698237348712533, "grad_norm": 5.609464645385742, "learning_rate": 7.48896659525767e-06, "loss": 0.43663611, "memory(GiB)": 34.88, "step": 54285, "train_speed(iter/s)": 0.411973 }, { "acc": 0.87494764, "epoch": 1.4699591151544689, "grad_norm": 6.846791744232178, "learning_rate": 7.488481256389555e-06, "loss": 0.76206403, "memory(GiB)": 34.88, "step": 54290, "train_speed(iter/s)": 0.411975 }, { "acc": 0.90914955, "epoch": 1.4700944954376844, "grad_norm": 6.8666181564331055, "learning_rate": 7.4879958863545864e-06, "loss": 0.46704087, "memory(GiB)": 34.88, "step": 54295, "train_speed(iter/s)": 0.411977 }, { "acc": 0.90135279, "epoch": 1.4702298757209, "grad_norm": 8.752947807312012, "learning_rate": 7.487510485158843e-06, "loss": 0.59766932, "memory(GiB)": 34.88, "step": 54300, "train_speed(iter/s)": 0.411979 }, { "acc": 0.8982933, "epoch": 1.4703652560041156, "grad_norm": 7.428816795349121, "learning_rate": 7.4870250528084085e-06, "loss": 0.55981798, "memory(GiB)": 34.88, "step": 54305, "train_speed(iter/s)": 0.411981 }, { "acc": 0.90508366, "epoch": 1.4705006362873312, "grad_norm": 7.488013744354248, "learning_rate": 7.486539589309362e-06, "loss": 0.47641745, "memory(GiB)": 34.88, "step": 54310, "train_speed(iter/s)": 0.411983 }, { "acc": 0.88758049, "epoch": 1.4706360165705465, "grad_norm": 12.548041343688965, "learning_rate": 7.486054094667782e-06, "loss": 0.66394658, "memory(GiB)": 34.88, "step": 54315, "train_speed(iter/s)": 0.411985 }, { "acc": 0.89800892, "epoch": 1.4707713968537623, "grad_norm": 5.822653770446777, "learning_rate": 7.485568568889754e-06, "loss": 0.52342148, "memory(GiB)": 34.88, "step": 54320, "train_speed(iter/s)": 0.411987 }, { "acc": 0.90496168, "epoch": 1.4709067771369777, "grad_norm": 17.13275146484375, "learning_rate": 7.485083011981359e-06, "loss": 0.48421717, "memory(GiB)": 34.88, "step": 54325, "train_speed(iter/s)": 0.411989 }, { "acc": 0.89028845, "epoch": 1.4710421574201933, "grad_norm": 16.1600341796875, "learning_rate": 7.484597423948679e-06, "loss": 0.66388712, "memory(GiB)": 34.88, "step": 54330, "train_speed(iter/s)": 0.411991 }, { "acc": 0.9171751, "epoch": 1.4711775377034089, "grad_norm": 6.442766189575195, "learning_rate": 7.484111804797799e-06, "loss": 0.44364219, "memory(GiB)": 34.88, "step": 54335, "train_speed(iter/s)": 0.411993 }, { "acc": 0.91118851, "epoch": 1.4713129179866244, "grad_norm": 10.080487251281738, "learning_rate": 7.483626154534798e-06, "loss": 0.49692702, "memory(GiB)": 34.88, "step": 54340, "train_speed(iter/s)": 0.411995 }, { "acc": 0.88870211, "epoch": 1.47144829826984, "grad_norm": 12.632539749145508, "learning_rate": 7.483140473165765e-06, "loss": 0.6210393, "memory(GiB)": 34.88, "step": 54345, "train_speed(iter/s)": 0.411997 }, { "acc": 0.9062995, "epoch": 1.4715836785530556, "grad_norm": 5.905514240264893, "learning_rate": 7.482654760696783e-06, "loss": 0.54454422, "memory(GiB)": 34.88, "step": 54350, "train_speed(iter/s)": 0.411999 }, { "acc": 0.88790493, "epoch": 1.4717190588362712, "grad_norm": 6.957329750061035, "learning_rate": 7.4821690171339336e-06, "loss": 0.65038137, "memory(GiB)": 34.88, "step": 54355, "train_speed(iter/s)": 0.412 }, { "acc": 0.88993044, "epoch": 1.4718544391194865, "grad_norm": 9.353654861450195, "learning_rate": 7.481683242483302e-06, "loss": 0.59423399, "memory(GiB)": 34.88, "step": 54360, "train_speed(iter/s)": 0.412002 }, { "acc": 0.907687, "epoch": 1.4719898194027021, "grad_norm": 8.685391426086426, "learning_rate": 7.481197436750977e-06, "loss": 0.55437937, "memory(GiB)": 34.88, "step": 54365, "train_speed(iter/s)": 0.412004 }, { "acc": 0.90979919, "epoch": 1.4721251996859177, "grad_norm": 8.55323600769043, "learning_rate": 7.48071159994304e-06, "loss": 0.52970333, "memory(GiB)": 34.88, "step": 54370, "train_speed(iter/s)": 0.412006 }, { "acc": 0.9055006, "epoch": 1.4722605799691333, "grad_norm": 7.502859592437744, "learning_rate": 7.48022573206558e-06, "loss": 0.48228984, "memory(GiB)": 34.88, "step": 54375, "train_speed(iter/s)": 0.412007 }, { "acc": 0.91131105, "epoch": 1.4723959602523489, "grad_norm": 4.792603969573975, "learning_rate": 7.479739833124682e-06, "loss": 0.49680405, "memory(GiB)": 34.88, "step": 54380, "train_speed(iter/s)": 0.412009 }, { "acc": 0.8992754, "epoch": 1.4725313405355644, "grad_norm": 9.640052795410156, "learning_rate": 7.479253903126435e-06, "loss": 0.56800447, "memory(GiB)": 34.88, "step": 54385, "train_speed(iter/s)": 0.412011 }, { "acc": 0.89094467, "epoch": 1.47266672081878, "grad_norm": 13.182635307312012, "learning_rate": 7.4787679420769236e-06, "loss": 0.57375946, "memory(GiB)": 34.88, "step": 54390, "train_speed(iter/s)": 0.412013 }, { "acc": 0.91288948, "epoch": 1.4728021011019954, "grad_norm": 6.472226142883301, "learning_rate": 7.478281949982236e-06, "loss": 0.40921054, "memory(GiB)": 34.88, "step": 54395, "train_speed(iter/s)": 0.412015 }, { "acc": 0.92742901, "epoch": 1.4729374813852112, "grad_norm": 8.638107299804688, "learning_rate": 7.477795926848462e-06, "loss": 0.40999231, "memory(GiB)": 34.88, "step": 54400, "train_speed(iter/s)": 0.412016 }, { "acc": 0.88830585, "epoch": 1.4730728616684265, "grad_norm": 18.97922706604004, "learning_rate": 7.477309872681686e-06, "loss": 0.66429625, "memory(GiB)": 34.88, "step": 54405, "train_speed(iter/s)": 0.412018 }, { "acc": 0.90043335, "epoch": 1.4732082419516421, "grad_norm": 8.655277252197266, "learning_rate": 7.476823787488002e-06, "loss": 0.53659492, "memory(GiB)": 34.88, "step": 54410, "train_speed(iter/s)": 0.41202 }, { "acc": 0.92326908, "epoch": 1.4733436222348577, "grad_norm": 8.094853401184082, "learning_rate": 7.476337671273496e-06, "loss": 0.42064705, "memory(GiB)": 34.88, "step": 54415, "train_speed(iter/s)": 0.412022 }, { "acc": 0.90294504, "epoch": 1.4734790025180733, "grad_norm": 5.384448528289795, "learning_rate": 7.475851524044257e-06, "loss": 0.44873362, "memory(GiB)": 34.88, "step": 54420, "train_speed(iter/s)": 0.412024 }, { "acc": 0.88728428, "epoch": 1.4736143828012889, "grad_norm": 6.19639253616333, "learning_rate": 7.475365345806378e-06, "loss": 0.5951828, "memory(GiB)": 34.88, "step": 54425, "train_speed(iter/s)": 0.412026 }, { "acc": 0.90287924, "epoch": 1.4737497630845044, "grad_norm": 7.690219402313232, "learning_rate": 7.474879136565946e-06, "loss": 0.5227273, "memory(GiB)": 34.88, "step": 54430, "train_speed(iter/s)": 0.412028 }, { "acc": 0.87728367, "epoch": 1.47388514336772, "grad_norm": 9.589661598205566, "learning_rate": 7.474392896329055e-06, "loss": 0.68803921, "memory(GiB)": 34.88, "step": 54435, "train_speed(iter/s)": 0.41203 }, { "acc": 0.90657616, "epoch": 1.4740205236509354, "grad_norm": 10.934244155883789, "learning_rate": 7.4739066251017925e-06, "loss": 0.51581626, "memory(GiB)": 34.88, "step": 54440, "train_speed(iter/s)": 0.412032 }, { "acc": 0.88582172, "epoch": 1.474155903934151, "grad_norm": 5.230443477630615, "learning_rate": 7.473420322890252e-06, "loss": 0.58311467, "memory(GiB)": 34.88, "step": 54445, "train_speed(iter/s)": 0.412034 }, { "acc": 0.89898043, "epoch": 1.4742912842173665, "grad_norm": 4.986212730407715, "learning_rate": 7.472933989700525e-06, "loss": 0.51439133, "memory(GiB)": 34.88, "step": 54450, "train_speed(iter/s)": 0.412035 }, { "acc": 0.89351788, "epoch": 1.4744266645005821, "grad_norm": 36.67625045776367, "learning_rate": 7.472447625538704e-06, "loss": 0.58828468, "memory(GiB)": 34.88, "step": 54455, "train_speed(iter/s)": 0.412037 }, { "acc": 0.90213165, "epoch": 1.4745620447837977, "grad_norm": 7.774971961975098, "learning_rate": 7.471961230410882e-06, "loss": 0.57534394, "memory(GiB)": 34.88, "step": 54460, "train_speed(iter/s)": 0.412039 }, { "acc": 0.90297928, "epoch": 1.4746974250670133, "grad_norm": 23.956871032714844, "learning_rate": 7.471474804323151e-06, "loss": 0.52263489, "memory(GiB)": 34.88, "step": 54465, "train_speed(iter/s)": 0.412041 }, { "acc": 0.90145359, "epoch": 1.4748328053502289, "grad_norm": 5.770922660827637, "learning_rate": 7.470988347281607e-06, "loss": 0.52534704, "memory(GiB)": 34.88, "step": 54470, "train_speed(iter/s)": 0.412043 }, { "acc": 0.88557911, "epoch": 1.4749681856334442, "grad_norm": 15.168289184570312, "learning_rate": 7.470501859292341e-06, "loss": 0.69476523, "memory(GiB)": 34.88, "step": 54475, "train_speed(iter/s)": 0.412045 }, { "acc": 0.90849266, "epoch": 1.47510356591666, "grad_norm": 10.76606273651123, "learning_rate": 7.470015340361448e-06, "loss": 0.47621856, "memory(GiB)": 34.88, "step": 54480, "train_speed(iter/s)": 0.412047 }, { "acc": 0.89463377, "epoch": 1.4752389461998754, "grad_norm": 13.807389259338379, "learning_rate": 7.469528790495022e-06, "loss": 0.44494123, "memory(GiB)": 34.88, "step": 54485, "train_speed(iter/s)": 0.412049 }, { "acc": 0.91376734, "epoch": 1.475374326483091, "grad_norm": 5.599521636962891, "learning_rate": 7.469042209699159e-06, "loss": 0.42266259, "memory(GiB)": 34.88, "step": 54490, "train_speed(iter/s)": 0.41205 }, { "acc": 0.89919205, "epoch": 1.4755097067663066, "grad_norm": 10.0130615234375, "learning_rate": 7.468555597979954e-06, "loss": 0.54228354, "memory(GiB)": 34.88, "step": 54495, "train_speed(iter/s)": 0.412052 }, { "acc": 0.89145832, "epoch": 1.4756450870495221, "grad_norm": 4.993704795837402, "learning_rate": 7.468068955343504e-06, "loss": 0.60749655, "memory(GiB)": 34.88, "step": 54500, "train_speed(iter/s)": 0.412054 }, { "acc": 0.90715084, "epoch": 1.4757804673327377, "grad_norm": 3.7339465618133545, "learning_rate": 7.467582281795903e-06, "loss": 0.49381232, "memory(GiB)": 34.88, "step": 54505, "train_speed(iter/s)": 0.412056 }, { "acc": 0.87188025, "epoch": 1.4759158476159533, "grad_norm": 9.273160934448242, "learning_rate": 7.467095577343249e-06, "loss": 0.67971802, "memory(GiB)": 34.88, "step": 54510, "train_speed(iter/s)": 0.412058 }, { "acc": 0.90935783, "epoch": 1.4760512278991689, "grad_norm": 20.440279006958008, "learning_rate": 7.46660884199164e-06, "loss": 0.49611149, "memory(GiB)": 34.88, "step": 54515, "train_speed(iter/s)": 0.41206 }, { "acc": 0.8870574, "epoch": 1.4761866081823842, "grad_norm": 9.63711929321289, "learning_rate": 7.4661220757471696e-06, "loss": 0.60769773, "memory(GiB)": 34.88, "step": 54520, "train_speed(iter/s)": 0.412062 }, { "acc": 0.90279112, "epoch": 1.4763219884655998, "grad_norm": 7.858168601989746, "learning_rate": 7.465635278615939e-06, "loss": 0.55232525, "memory(GiB)": 34.88, "step": 54525, "train_speed(iter/s)": 0.412064 }, { "acc": 0.88707113, "epoch": 1.4764573687488154, "grad_norm": 11.880446434020996, "learning_rate": 7.465148450604045e-06, "loss": 0.6477931, "memory(GiB)": 34.88, "step": 54530, "train_speed(iter/s)": 0.412066 }, { "acc": 0.90943832, "epoch": 1.476592749032031, "grad_norm": 10.276809692382812, "learning_rate": 7.464661591717586e-06, "loss": 0.43747706, "memory(GiB)": 34.88, "step": 54535, "train_speed(iter/s)": 0.412068 }, { "acc": 0.90957918, "epoch": 1.4767281293152466, "grad_norm": 11.971752166748047, "learning_rate": 7.464174701962661e-06, "loss": 0.48983889, "memory(GiB)": 34.88, "step": 54540, "train_speed(iter/s)": 0.41207 }, { "acc": 0.88944817, "epoch": 1.4768635095984621, "grad_norm": 7.886600494384766, "learning_rate": 7.463687781345369e-06, "loss": 0.64559126, "memory(GiB)": 34.88, "step": 54545, "train_speed(iter/s)": 0.412072 }, { "acc": 0.90725784, "epoch": 1.4769988898816777, "grad_norm": 10.451970100402832, "learning_rate": 7.46320082987181e-06, "loss": 0.48910055, "memory(GiB)": 34.88, "step": 54550, "train_speed(iter/s)": 0.412074 }, { "acc": 0.89349785, "epoch": 1.477134270164893, "grad_norm": 8.341527938842773, "learning_rate": 7.462713847548083e-06, "loss": 0.58535786, "memory(GiB)": 34.88, "step": 54555, "train_speed(iter/s)": 0.412075 }, { "acc": 0.9041646, "epoch": 1.4772696504481089, "grad_norm": 5.840569019317627, "learning_rate": 7.462226834380291e-06, "loss": 0.44619598, "memory(GiB)": 34.88, "step": 54560, "train_speed(iter/s)": 0.412077 }, { "acc": 0.90007372, "epoch": 1.4774050307313242, "grad_norm": 5.956230640411377, "learning_rate": 7.461739790374532e-06, "loss": 0.53557415, "memory(GiB)": 34.88, "step": 54565, "train_speed(iter/s)": 0.412079 }, { "acc": 0.90165005, "epoch": 1.4775404110145398, "grad_norm": 6.228744029998779, "learning_rate": 7.4612527155369085e-06, "loss": 0.50212879, "memory(GiB)": 34.88, "step": 54570, "train_speed(iter/s)": 0.412081 }, { "acc": 0.89426727, "epoch": 1.4776757912977554, "grad_norm": 6.317912578582764, "learning_rate": 7.460765609873523e-06, "loss": 0.54714518, "memory(GiB)": 34.88, "step": 54575, "train_speed(iter/s)": 0.412083 }, { "acc": 0.90080776, "epoch": 1.477811171580971, "grad_norm": 8.111489295959473, "learning_rate": 7.460278473390475e-06, "loss": 0.50196471, "memory(GiB)": 34.88, "step": 54580, "train_speed(iter/s)": 0.412085 }, { "acc": 0.88661518, "epoch": 1.4779465518641866, "grad_norm": 11.304537773132324, "learning_rate": 7.459791306093869e-06, "loss": 0.70482817, "memory(GiB)": 34.88, "step": 54585, "train_speed(iter/s)": 0.412087 }, { "acc": 0.9156394, "epoch": 1.4780819321474021, "grad_norm": 5.0036234855651855, "learning_rate": 7.459304107989805e-06, "loss": 0.41477637, "memory(GiB)": 34.88, "step": 54590, "train_speed(iter/s)": 0.412089 }, { "acc": 0.89651432, "epoch": 1.4782173124306177, "grad_norm": 5.661874294281006, "learning_rate": 7.45881687908439e-06, "loss": 0.62082534, "memory(GiB)": 34.88, "step": 54595, "train_speed(iter/s)": 0.412091 }, { "acc": 0.9041564, "epoch": 1.478352692713833, "grad_norm": 7.9382195472717285, "learning_rate": 7.4583296193837265e-06, "loss": 0.49721241, "memory(GiB)": 34.88, "step": 54600, "train_speed(iter/s)": 0.412093 }, { "acc": 0.87790985, "epoch": 1.4784880729970487, "grad_norm": 12.114855766296387, "learning_rate": 7.457842328893915e-06, "loss": 0.63585653, "memory(GiB)": 34.88, "step": 54605, "train_speed(iter/s)": 0.412095 }, { "acc": 0.89598961, "epoch": 1.4786234532802642, "grad_norm": 6.410582542419434, "learning_rate": 7.457355007621063e-06, "loss": 0.49121304, "memory(GiB)": 34.88, "step": 54610, "train_speed(iter/s)": 0.412097 }, { "acc": 0.91236591, "epoch": 1.4787588335634798, "grad_norm": 19.265798568725586, "learning_rate": 7.4568676555712735e-06, "loss": 0.40824718, "memory(GiB)": 34.88, "step": 54615, "train_speed(iter/s)": 0.412099 }, { "acc": 0.88044233, "epoch": 1.4788942138466954, "grad_norm": 8.16810417175293, "learning_rate": 7.456380272750653e-06, "loss": 0.64210367, "memory(GiB)": 34.88, "step": 54620, "train_speed(iter/s)": 0.412101 }, { "acc": 0.88580065, "epoch": 1.479029594129911, "grad_norm": 5.971997261047363, "learning_rate": 7.455892859165307e-06, "loss": 0.52883692, "memory(GiB)": 34.88, "step": 54625, "train_speed(iter/s)": 0.412103 }, { "acc": 0.91226339, "epoch": 1.4791649744131266, "grad_norm": 5.936676979064941, "learning_rate": 7.455405414821341e-06, "loss": 0.46984615, "memory(GiB)": 34.88, "step": 54630, "train_speed(iter/s)": 0.412104 }, { "acc": 0.90278902, "epoch": 1.479300354696342, "grad_norm": 5.198092937469482, "learning_rate": 7.454917939724858e-06, "loss": 0.44232717, "memory(GiB)": 34.88, "step": 54635, "train_speed(iter/s)": 0.412106 }, { "acc": 0.9118453, "epoch": 1.4794357349795577, "grad_norm": 20.108890533447266, "learning_rate": 7.45443043388197e-06, "loss": 0.41709533, "memory(GiB)": 34.88, "step": 54640, "train_speed(iter/s)": 0.412108 }, { "acc": 0.91279888, "epoch": 1.479571115262773, "grad_norm": 24.638927459716797, "learning_rate": 7.4539428972987815e-06, "loss": 0.47771664, "memory(GiB)": 34.88, "step": 54645, "train_speed(iter/s)": 0.41211 }, { "acc": 0.90844727, "epoch": 1.4797064955459887, "grad_norm": 11.403433799743652, "learning_rate": 7.4534553299813986e-06, "loss": 0.50773978, "memory(GiB)": 34.88, "step": 54650, "train_speed(iter/s)": 0.412111 }, { "acc": 0.88835182, "epoch": 1.4798418758292042, "grad_norm": 16.5618839263916, "learning_rate": 7.45296773193593e-06, "loss": 0.64558935, "memory(GiB)": 34.88, "step": 54655, "train_speed(iter/s)": 0.412113 }, { "acc": 0.88714867, "epoch": 1.4799772561124198, "grad_norm": 4.4565911293029785, "learning_rate": 7.452480103168483e-06, "loss": 0.63684359, "memory(GiB)": 34.88, "step": 54660, "train_speed(iter/s)": 0.412115 }, { "acc": 0.89778404, "epoch": 1.4801126363956354, "grad_norm": 12.581438064575195, "learning_rate": 7.45199244368517e-06, "loss": 0.57638006, "memory(GiB)": 34.88, "step": 54665, "train_speed(iter/s)": 0.412117 }, { "acc": 0.90130196, "epoch": 1.480248016678851, "grad_norm": 7.703524589538574, "learning_rate": 7.451504753492095e-06, "loss": 0.5402842, "memory(GiB)": 34.88, "step": 54670, "train_speed(iter/s)": 0.412118 }, { "acc": 0.89251537, "epoch": 1.4803833969620666, "grad_norm": 7.0242791175842285, "learning_rate": 7.451017032595369e-06, "loss": 0.54999604, "memory(GiB)": 34.88, "step": 54675, "train_speed(iter/s)": 0.412119 }, { "acc": 0.90745335, "epoch": 1.480518777245282, "grad_norm": 3.8931639194488525, "learning_rate": 7.450529281001102e-06, "loss": 0.50433965, "memory(GiB)": 34.88, "step": 54680, "train_speed(iter/s)": 0.412121 }, { "acc": 0.90907078, "epoch": 1.4806541575284975, "grad_norm": 9.596575736999512, "learning_rate": 7.450041498715404e-06, "loss": 0.49723368, "memory(GiB)": 34.88, "step": 54685, "train_speed(iter/s)": 0.412123 }, { "acc": 0.8855298, "epoch": 1.480789537811713, "grad_norm": 13.209675788879395, "learning_rate": 7.449553685744387e-06, "loss": 0.6215344, "memory(GiB)": 34.88, "step": 54690, "train_speed(iter/s)": 0.412124 }, { "acc": 0.91321144, "epoch": 1.4809249180949287, "grad_norm": 5.86170768737793, "learning_rate": 7.449065842094159e-06, "loss": 0.44720516, "memory(GiB)": 34.88, "step": 54695, "train_speed(iter/s)": 0.412126 }, { "acc": 0.89685011, "epoch": 1.4810602983781442, "grad_norm": 6.355997085571289, "learning_rate": 7.4485779677708325e-06, "loss": 0.63459187, "memory(GiB)": 34.88, "step": 54700, "train_speed(iter/s)": 0.412128 }, { "acc": 0.89774055, "epoch": 1.4811956786613598, "grad_norm": 7.283566474914551, "learning_rate": 7.4480900627805185e-06, "loss": 0.56327152, "memory(GiB)": 34.88, "step": 54705, "train_speed(iter/s)": 0.412129 }, { "acc": 0.86453543, "epoch": 1.4813310589445754, "grad_norm": 13.863791465759277, "learning_rate": 7.447602127129331e-06, "loss": 0.73765488, "memory(GiB)": 34.88, "step": 54710, "train_speed(iter/s)": 0.412131 }, { "acc": 0.89344254, "epoch": 1.4814664392277908, "grad_norm": 4.454527378082275, "learning_rate": 7.447114160823379e-06, "loss": 0.52524157, "memory(GiB)": 34.88, "step": 54715, "train_speed(iter/s)": 0.412132 }, { "acc": 0.88001518, "epoch": 1.4816018195110063, "grad_norm": 8.539339065551758, "learning_rate": 7.446626163868777e-06, "loss": 0.63082695, "memory(GiB)": 34.88, "step": 54720, "train_speed(iter/s)": 0.412134 }, { "acc": 0.90969086, "epoch": 1.481737199794222, "grad_norm": 8.401387214660645, "learning_rate": 7.44613813627164e-06, "loss": 0.47732363, "memory(GiB)": 34.88, "step": 54725, "train_speed(iter/s)": 0.412136 }, { "acc": 0.89404125, "epoch": 1.4818725800774375, "grad_norm": 5.826543807983398, "learning_rate": 7.4456500780380785e-06, "loss": 0.56921296, "memory(GiB)": 34.88, "step": 54730, "train_speed(iter/s)": 0.412138 }, { "acc": 0.89405594, "epoch": 1.482007960360653, "grad_norm": 12.462808609008789, "learning_rate": 7.445161989174206e-06, "loss": 0.59997263, "memory(GiB)": 34.88, "step": 54735, "train_speed(iter/s)": 0.412139 }, { "acc": 0.91251163, "epoch": 1.4821433406438687, "grad_norm": 5.1832194328308105, "learning_rate": 7.444673869686141e-06, "loss": 0.41640606, "memory(GiB)": 34.88, "step": 54740, "train_speed(iter/s)": 0.41214 }, { "acc": 0.90292683, "epoch": 1.4822787209270842, "grad_norm": 7.011385440826416, "learning_rate": 7.444185719579995e-06, "loss": 0.55051103, "memory(GiB)": 34.88, "step": 54745, "train_speed(iter/s)": 0.412142 }, { "acc": 0.92731686, "epoch": 1.4824141012102996, "grad_norm": 11.579851150512695, "learning_rate": 7.443697538861883e-06, "loss": 0.38381324, "memory(GiB)": 34.88, "step": 54750, "train_speed(iter/s)": 0.412144 }, { "acc": 0.92250347, "epoch": 1.4825494814935154, "grad_norm": 5.833752632141113, "learning_rate": 7.443209327537919e-06, "loss": 0.42765779, "memory(GiB)": 34.88, "step": 54755, "train_speed(iter/s)": 0.412146 }, { "acc": 0.87774792, "epoch": 1.4826848617767308, "grad_norm": 11.432340621948242, "learning_rate": 7.442721085614225e-06, "loss": 0.71212955, "memory(GiB)": 34.88, "step": 54760, "train_speed(iter/s)": 0.412147 }, { "acc": 0.90562878, "epoch": 1.4828202420599463, "grad_norm": 8.713651657104492, "learning_rate": 7.442232813096909e-06, "loss": 0.5286459, "memory(GiB)": 34.88, "step": 54765, "train_speed(iter/s)": 0.412149 }, { "acc": 0.90948353, "epoch": 1.482955622343162, "grad_norm": 4.145506858825684, "learning_rate": 7.441744509992095e-06, "loss": 0.46708069, "memory(GiB)": 34.88, "step": 54770, "train_speed(iter/s)": 0.412151 }, { "acc": 0.89650211, "epoch": 1.4830910026263775, "grad_norm": 8.120141983032227, "learning_rate": 7.441256176305895e-06, "loss": 0.49804993, "memory(GiB)": 34.88, "step": 54775, "train_speed(iter/s)": 0.412153 }, { "acc": 0.89752655, "epoch": 1.483226382909593, "grad_norm": 9.001185417175293, "learning_rate": 7.440767812044427e-06, "loss": 0.58473277, "memory(GiB)": 34.88, "step": 54780, "train_speed(iter/s)": 0.412155 }, { "acc": 0.89979763, "epoch": 1.4833617631928087, "grad_norm": 7.728304862976074, "learning_rate": 7.44027941721381e-06, "loss": 0.48624058, "memory(GiB)": 34.88, "step": 54785, "train_speed(iter/s)": 0.412157 }, { "acc": 0.87572498, "epoch": 1.4834971434760242, "grad_norm": 9.04714584350586, "learning_rate": 7.43979099182016e-06, "loss": 0.57537808, "memory(GiB)": 34.88, "step": 54790, "train_speed(iter/s)": 0.412158 }, { "acc": 0.90175676, "epoch": 1.4836325237592396, "grad_norm": 5.328512191772461, "learning_rate": 7.439302535869599e-06, "loss": 0.52425356, "memory(GiB)": 34.88, "step": 54795, "train_speed(iter/s)": 0.41216 }, { "acc": 0.91682701, "epoch": 1.4837679040424552, "grad_norm": 9.332347869873047, "learning_rate": 7.438814049368244e-06, "loss": 0.44389668, "memory(GiB)": 34.88, "step": 54800, "train_speed(iter/s)": 0.412162 }, { "acc": 0.91383018, "epoch": 1.4839032843256708, "grad_norm": 31.600120544433594, "learning_rate": 7.438325532322213e-06, "loss": 0.47547464, "memory(GiB)": 34.88, "step": 54805, "train_speed(iter/s)": 0.412164 }, { "acc": 0.90355663, "epoch": 1.4840386646088863, "grad_norm": 7.107061386108398, "learning_rate": 7.43783698473763e-06, "loss": 0.53371744, "memory(GiB)": 34.88, "step": 54810, "train_speed(iter/s)": 0.412166 }, { "acc": 0.90680065, "epoch": 1.484174044892102, "grad_norm": 31.435087203979492, "learning_rate": 7.43734840662061e-06, "loss": 0.50220261, "memory(GiB)": 34.88, "step": 54815, "train_speed(iter/s)": 0.412167 }, { "acc": 0.91265669, "epoch": 1.4843094251753175, "grad_norm": 5.10302209854126, "learning_rate": 7.436859797977275e-06, "loss": 0.46827173, "memory(GiB)": 34.88, "step": 54820, "train_speed(iter/s)": 0.412169 }, { "acc": 0.90400982, "epoch": 1.484444805458533, "grad_norm": 9.92927074432373, "learning_rate": 7.436371158813746e-06, "loss": 0.45445442, "memory(GiB)": 34.88, "step": 54825, "train_speed(iter/s)": 0.412171 }, { "acc": 0.88601046, "epoch": 1.4845801857417484, "grad_norm": 9.85223388671875, "learning_rate": 7.435882489136143e-06, "loss": 0.57882905, "memory(GiB)": 34.88, "step": 54830, "train_speed(iter/s)": 0.412173 }, { "acc": 0.92221756, "epoch": 1.4847155660249642, "grad_norm": 6.2549028396606445, "learning_rate": 7.435393788950591e-06, "loss": 0.44113054, "memory(GiB)": 34.88, "step": 54835, "train_speed(iter/s)": 0.412174 }, { "acc": 0.8900444, "epoch": 1.4848509463081796, "grad_norm": 5.1117262840271, "learning_rate": 7.434905058263209e-06, "loss": 0.59518409, "memory(GiB)": 34.88, "step": 54840, "train_speed(iter/s)": 0.412176 }, { "acc": 0.91673746, "epoch": 1.4849863265913952, "grad_norm": 9.8868989944458, "learning_rate": 7.4344162970801205e-06, "loss": 0.38358335, "memory(GiB)": 34.88, "step": 54845, "train_speed(iter/s)": 0.412178 }, { "acc": 0.9189743, "epoch": 1.4851217068746108, "grad_norm": 5.412615776062012, "learning_rate": 7.4339275054074485e-06, "loss": 0.50110345, "memory(GiB)": 34.88, "step": 54850, "train_speed(iter/s)": 0.41218 }, { "acc": 0.91098518, "epoch": 1.4852570871578263, "grad_norm": 6.111374855041504, "learning_rate": 7.4334386832513135e-06, "loss": 0.41567163, "memory(GiB)": 34.88, "step": 54855, "train_speed(iter/s)": 0.412182 }, { "acc": 0.88616972, "epoch": 1.485392467441042, "grad_norm": 7.009057998657227, "learning_rate": 7.432949830617841e-06, "loss": 0.70481472, "memory(GiB)": 34.88, "step": 54860, "train_speed(iter/s)": 0.412184 }, { "acc": 0.89518232, "epoch": 1.4855278477242575, "grad_norm": 4.447657108306885, "learning_rate": 7.432460947513155e-06, "loss": 0.60027132, "memory(GiB)": 34.88, "step": 54865, "train_speed(iter/s)": 0.412186 }, { "acc": 0.89737587, "epoch": 1.485663228007473, "grad_norm": 7.53031587600708, "learning_rate": 7.4319720339433785e-06, "loss": 0.58998203, "memory(GiB)": 34.88, "step": 54870, "train_speed(iter/s)": 0.412187 }, { "acc": 0.89613266, "epoch": 1.4857986082906884, "grad_norm": 7.795638561248779, "learning_rate": 7.431483089914639e-06, "loss": 0.66113143, "memory(GiB)": 34.88, "step": 54875, "train_speed(iter/s)": 0.412189 }, { "acc": 0.86305914, "epoch": 1.485933988573904, "grad_norm": 34.437923431396484, "learning_rate": 7.430994115433058e-06, "loss": 0.69489431, "memory(GiB)": 34.88, "step": 54880, "train_speed(iter/s)": 0.412191 }, { "acc": 0.90412884, "epoch": 1.4860693688571196, "grad_norm": 7.02047872543335, "learning_rate": 7.430505110504762e-06, "loss": 0.58375397, "memory(GiB)": 34.88, "step": 54885, "train_speed(iter/s)": 0.412193 }, { "acc": 0.88638268, "epoch": 1.4862047491403352, "grad_norm": 21.06474494934082, "learning_rate": 7.430016075135878e-06, "loss": 0.63849068, "memory(GiB)": 34.88, "step": 54890, "train_speed(iter/s)": 0.412195 }, { "acc": 0.91623325, "epoch": 1.4863401294235508, "grad_norm": 4.879031658172607, "learning_rate": 7.429527009332532e-06, "loss": 0.41681099, "memory(GiB)": 34.88, "step": 54895, "train_speed(iter/s)": 0.412197 }, { "acc": 0.89852304, "epoch": 1.4864755097067663, "grad_norm": 6.234211444854736, "learning_rate": 7.429037913100849e-06, "loss": 0.62967548, "memory(GiB)": 34.88, "step": 54900, "train_speed(iter/s)": 0.412199 }, { "acc": 0.91014404, "epoch": 1.486610889989982, "grad_norm": 16.673988342285156, "learning_rate": 7.428548786446954e-06, "loss": 0.50983391, "memory(GiB)": 34.88, "step": 54905, "train_speed(iter/s)": 0.4122 }, { "acc": 0.91073227, "epoch": 1.4867462702731973, "grad_norm": 12.145359992980957, "learning_rate": 7.428059629376979e-06, "loss": 0.45531454, "memory(GiB)": 34.88, "step": 54910, "train_speed(iter/s)": 0.412202 }, { "acc": 0.91756296, "epoch": 1.486881650556413, "grad_norm": 10.56302261352539, "learning_rate": 7.427570441897048e-06, "loss": 0.37242503, "memory(GiB)": 34.88, "step": 54915, "train_speed(iter/s)": 0.412204 }, { "acc": 0.9172905, "epoch": 1.4870170308396284, "grad_norm": 3.6756997108459473, "learning_rate": 7.427081224013294e-06, "loss": 0.42377186, "memory(GiB)": 34.88, "step": 54920, "train_speed(iter/s)": 0.412205 }, { "acc": 0.91942453, "epoch": 1.487152411122844, "grad_norm": 5.462090969085693, "learning_rate": 7.4265919757318386e-06, "loss": 0.43429117, "memory(GiB)": 34.88, "step": 54925, "train_speed(iter/s)": 0.412207 }, { "acc": 0.90319719, "epoch": 1.4872877914060596, "grad_norm": 13.006916046142578, "learning_rate": 7.426102697058815e-06, "loss": 0.56221366, "memory(GiB)": 34.88, "step": 54930, "train_speed(iter/s)": 0.412209 }, { "acc": 0.89967022, "epoch": 1.4874231716892752, "grad_norm": 12.297213554382324, "learning_rate": 7.425613388000352e-06, "loss": 0.56767955, "memory(GiB)": 34.88, "step": 54935, "train_speed(iter/s)": 0.412211 }, { "acc": 0.89141502, "epoch": 1.4875585519724908, "grad_norm": 16.43657112121582, "learning_rate": 7.425124048562578e-06, "loss": 0.53170185, "memory(GiB)": 34.88, "step": 54940, "train_speed(iter/s)": 0.412213 }, { "acc": 0.90558195, "epoch": 1.4876939322557063, "grad_norm": 4.9491496086120605, "learning_rate": 7.424634678751625e-06, "loss": 0.45795212, "memory(GiB)": 34.88, "step": 54945, "train_speed(iter/s)": 0.412215 }, { "acc": 0.9095068, "epoch": 1.487829312538922, "grad_norm": 4.793663024902344, "learning_rate": 7.424145278573619e-06, "loss": 0.50988655, "memory(GiB)": 34.88, "step": 54950, "train_speed(iter/s)": 0.412218 }, { "acc": 0.87179127, "epoch": 1.4879646928221373, "grad_norm": 9.276561737060547, "learning_rate": 7.423655848034693e-06, "loss": 0.70036874, "memory(GiB)": 34.88, "step": 54955, "train_speed(iter/s)": 0.41222 }, { "acc": 0.88821926, "epoch": 1.4881000731053529, "grad_norm": 9.70654010772705, "learning_rate": 7.42316638714098e-06, "loss": 0.68362255, "memory(GiB)": 34.88, "step": 54960, "train_speed(iter/s)": 0.412222 }, { "acc": 0.89648647, "epoch": 1.4882354533885684, "grad_norm": 6.649321556091309, "learning_rate": 7.42267689589861e-06, "loss": 0.50964928, "memory(GiB)": 34.88, "step": 54965, "train_speed(iter/s)": 0.412223 }, { "acc": 0.90448866, "epoch": 1.488370833671784, "grad_norm": 6.708069801330566, "learning_rate": 7.422187374313715e-06, "loss": 0.47598343, "memory(GiB)": 34.88, "step": 54970, "train_speed(iter/s)": 0.412225 }, { "acc": 0.90359631, "epoch": 1.4885062139549996, "grad_norm": 7.074008464813232, "learning_rate": 7.421697822392427e-06, "loss": 0.48757, "memory(GiB)": 34.88, "step": 54975, "train_speed(iter/s)": 0.412227 }, { "acc": 0.90560532, "epoch": 1.4886415942382152, "grad_norm": 9.302184104919434, "learning_rate": 7.421208240140881e-06, "loss": 0.48745375, "memory(GiB)": 34.88, "step": 54980, "train_speed(iter/s)": 0.412229 }, { "acc": 0.88503952, "epoch": 1.4887769745214308, "grad_norm": 10.165034294128418, "learning_rate": 7.420718627565202e-06, "loss": 0.66235838, "memory(GiB)": 34.88, "step": 54985, "train_speed(iter/s)": 0.41223 }, { "acc": 0.87764587, "epoch": 1.4889123548046461, "grad_norm": 9.227996826171875, "learning_rate": 7.4202289846715335e-06, "loss": 0.59016323, "memory(GiB)": 34.88, "step": 54990, "train_speed(iter/s)": 0.412232 }, { "acc": 0.902458, "epoch": 1.489047735087862, "grad_norm": 5.853607177734375, "learning_rate": 7.419739311466002e-06, "loss": 0.48913269, "memory(GiB)": 34.88, "step": 54995, "train_speed(iter/s)": 0.412234 }, { "acc": 0.8963789, "epoch": 1.4891831153710773, "grad_norm": 16.459848403930664, "learning_rate": 7.419249607954747e-06, "loss": 0.61684952, "memory(GiB)": 34.88, "step": 55000, "train_speed(iter/s)": 0.412236 }, { "acc": 0.89341183, "epoch": 1.4893184956542929, "grad_norm": 9.029409408569336, "learning_rate": 7.418759874143898e-06, "loss": 0.54789228, "memory(GiB)": 34.88, "step": 55005, "train_speed(iter/s)": 0.412238 }, { "acc": 0.92087545, "epoch": 1.4894538759375084, "grad_norm": 7.979489803314209, "learning_rate": 7.418270110039594e-06, "loss": 0.51024928, "memory(GiB)": 34.88, "step": 55010, "train_speed(iter/s)": 0.41224 }, { "acc": 0.91669178, "epoch": 1.489589256220724, "grad_norm": 8.755181312561035, "learning_rate": 7.417780315647967e-06, "loss": 0.42717428, "memory(GiB)": 34.88, "step": 55015, "train_speed(iter/s)": 0.412242 }, { "acc": 0.9045002, "epoch": 1.4897246365039396, "grad_norm": 7.9196367263793945, "learning_rate": 7.417290490975155e-06, "loss": 0.51095462, "memory(GiB)": 34.88, "step": 55020, "train_speed(iter/s)": 0.412244 }, { "acc": 0.89467506, "epoch": 1.4898600167871552, "grad_norm": 6.591283321380615, "learning_rate": 7.416800636027294e-06, "loss": 0.57485738, "memory(GiB)": 34.88, "step": 55025, "train_speed(iter/s)": 0.412245 }, { "acc": 0.88900585, "epoch": 1.4899953970703708, "grad_norm": 9.7622709274292, "learning_rate": 7.416310750810517e-06, "loss": 0.65786638, "memory(GiB)": 34.88, "step": 55030, "train_speed(iter/s)": 0.412247 }, { "acc": 0.92224054, "epoch": 1.4901307773535861, "grad_norm": 8.4248046875, "learning_rate": 7.415820835330964e-06, "loss": 0.41286941, "memory(GiB)": 34.88, "step": 55035, "train_speed(iter/s)": 0.412249 }, { "acc": 0.91646767, "epoch": 1.4902661576368017, "grad_norm": 7.052905082702637, "learning_rate": 7.415330889594772e-06, "loss": 0.41620178, "memory(GiB)": 34.88, "step": 55040, "train_speed(iter/s)": 0.412251 }, { "acc": 0.90302258, "epoch": 1.4904015379200173, "grad_norm": 6.773543834686279, "learning_rate": 7.4148409136080775e-06, "loss": 0.54147401, "memory(GiB)": 34.88, "step": 55045, "train_speed(iter/s)": 0.412253 }, { "acc": 0.90751286, "epoch": 1.4905369182032329, "grad_norm": 15.336322784423828, "learning_rate": 7.414350907377021e-06, "loss": 0.49440107, "memory(GiB)": 34.88, "step": 55050, "train_speed(iter/s)": 0.412255 }, { "acc": 0.91778107, "epoch": 1.4906722984864484, "grad_norm": 7.774392604827881, "learning_rate": 7.4138608709077356e-06, "loss": 0.47066164, "memory(GiB)": 34.88, "step": 55055, "train_speed(iter/s)": 0.412257 }, { "acc": 0.89694729, "epoch": 1.490807678769664, "grad_norm": 11.131380081176758, "learning_rate": 7.413370804206366e-06, "loss": 0.50525131, "memory(GiB)": 34.88, "step": 55060, "train_speed(iter/s)": 0.412259 }, { "acc": 0.88530111, "epoch": 1.4909430590528796, "grad_norm": 33.961326599121094, "learning_rate": 7.412880707279047e-06, "loss": 0.59782228, "memory(GiB)": 34.88, "step": 55065, "train_speed(iter/s)": 0.412261 }, { "acc": 0.90600386, "epoch": 1.491078439336095, "grad_norm": 4.918825626373291, "learning_rate": 7.41239058013192e-06, "loss": 0.54117641, "memory(GiB)": 34.88, "step": 55070, "train_speed(iter/s)": 0.412263 }, { "acc": 0.8635561, "epoch": 1.4912138196193108, "grad_norm": 16.24897575378418, "learning_rate": 7.411900422771121e-06, "loss": 0.72666855, "memory(GiB)": 34.88, "step": 55075, "train_speed(iter/s)": 0.412265 }, { "acc": 0.89095192, "epoch": 1.4913491999025261, "grad_norm": 26.267297744750977, "learning_rate": 7.411410235202797e-06, "loss": 0.56311202, "memory(GiB)": 34.88, "step": 55080, "train_speed(iter/s)": 0.412267 }, { "acc": 0.87198458, "epoch": 1.4914845801857417, "grad_norm": 14.827574729919434, "learning_rate": 7.410920017433082e-06, "loss": 0.68339114, "memory(GiB)": 34.88, "step": 55085, "train_speed(iter/s)": 0.412269 }, { "acc": 0.89665051, "epoch": 1.4916199604689573, "grad_norm": 13.15946102142334, "learning_rate": 7.4104297694681205e-06, "loss": 0.5733912, "memory(GiB)": 34.88, "step": 55090, "train_speed(iter/s)": 0.41227 }, { "acc": 0.91697969, "epoch": 1.4917553407521729, "grad_norm": 7.162306785583496, "learning_rate": 7.4099394913140554e-06, "loss": 0.49325304, "memory(GiB)": 34.88, "step": 55095, "train_speed(iter/s)": 0.412272 }, { "acc": 0.89440336, "epoch": 1.4918907210353884, "grad_norm": 15.113750457763672, "learning_rate": 7.409449182977024e-06, "loss": 0.62652249, "memory(GiB)": 34.88, "step": 55100, "train_speed(iter/s)": 0.412274 }, { "acc": 0.91576843, "epoch": 1.492026101318604, "grad_norm": 4.672504425048828, "learning_rate": 7.408958844463172e-06, "loss": 0.49192834, "memory(GiB)": 34.88, "step": 55105, "train_speed(iter/s)": 0.412276 }, { "acc": 0.89215269, "epoch": 1.4921614816018196, "grad_norm": 10.620871543884277, "learning_rate": 7.408468475778639e-06, "loss": 0.59125385, "memory(GiB)": 34.88, "step": 55110, "train_speed(iter/s)": 0.412278 }, { "acc": 0.91272125, "epoch": 1.492296861885035, "grad_norm": 6.005087375640869, "learning_rate": 7.407978076929569e-06, "loss": 0.46404157, "memory(GiB)": 34.88, "step": 55115, "train_speed(iter/s)": 0.41228 }, { "acc": 0.87331514, "epoch": 1.4924322421682505, "grad_norm": 9.909419059753418, "learning_rate": 7.407487647922107e-06, "loss": 0.64517403, "memory(GiB)": 34.88, "step": 55120, "train_speed(iter/s)": 0.412282 }, { "acc": 0.91545506, "epoch": 1.4925676224514661, "grad_norm": 7.614212512969971, "learning_rate": 7.406997188762395e-06, "loss": 0.41928744, "memory(GiB)": 34.88, "step": 55125, "train_speed(iter/s)": 0.412284 }, { "acc": 0.90770178, "epoch": 1.4927030027346817, "grad_norm": 6.827564239501953, "learning_rate": 7.406506699456574e-06, "loss": 0.51504388, "memory(GiB)": 34.88, "step": 55130, "train_speed(iter/s)": 0.412286 }, { "acc": 0.91632223, "epoch": 1.4928383830178973, "grad_norm": 6.652910232543945, "learning_rate": 7.4060161800107935e-06, "loss": 0.44846954, "memory(GiB)": 34.88, "step": 55135, "train_speed(iter/s)": 0.412288 }, { "acc": 0.90854836, "epoch": 1.4929737633011129, "grad_norm": 8.116540908813477, "learning_rate": 7.405525630431196e-06, "loss": 0.52552533, "memory(GiB)": 34.88, "step": 55140, "train_speed(iter/s)": 0.41229 }, { "acc": 0.88863115, "epoch": 1.4931091435843284, "grad_norm": 7.598836421966553, "learning_rate": 7.405035050723928e-06, "loss": 0.59579592, "memory(GiB)": 34.88, "step": 55145, "train_speed(iter/s)": 0.412291 }, { "acc": 0.91575747, "epoch": 1.4932445238675438, "grad_norm": 3.9616453647613525, "learning_rate": 7.404544440895132e-06, "loss": 0.39073648, "memory(GiB)": 34.88, "step": 55150, "train_speed(iter/s)": 0.412294 }, { "acc": 0.90341377, "epoch": 1.4933799041507596, "grad_norm": 5.6708197593688965, "learning_rate": 7.404053800950955e-06, "loss": 0.54095869, "memory(GiB)": 34.88, "step": 55155, "train_speed(iter/s)": 0.412295 }, { "acc": 0.90994053, "epoch": 1.493515284433975, "grad_norm": 7.069772720336914, "learning_rate": 7.4035631308975445e-06, "loss": 0.43039985, "memory(GiB)": 34.88, "step": 55160, "train_speed(iter/s)": 0.412297 }, { "acc": 0.92718544, "epoch": 1.4936506647171905, "grad_norm": 6.186469554901123, "learning_rate": 7.403072430741045e-06, "loss": 0.44929428, "memory(GiB)": 34.88, "step": 55165, "train_speed(iter/s)": 0.412299 }, { "acc": 0.90900955, "epoch": 1.4937860450004061, "grad_norm": 19.791446685791016, "learning_rate": 7.402581700487606e-06, "loss": 0.48320169, "memory(GiB)": 34.88, "step": 55170, "train_speed(iter/s)": 0.412301 }, { "acc": 0.91983404, "epoch": 1.4939214252836217, "grad_norm": 3.461643934249878, "learning_rate": 7.402090940143373e-06, "loss": 0.40768681, "memory(GiB)": 34.88, "step": 55175, "train_speed(iter/s)": 0.412303 }, { "acc": 0.9125824, "epoch": 1.4940568055668373, "grad_norm": 7.956474304199219, "learning_rate": 7.401600149714494e-06, "loss": 0.4713768, "memory(GiB)": 34.88, "step": 55180, "train_speed(iter/s)": 0.412305 }, { "acc": 0.9028429, "epoch": 1.4941921858500529, "grad_norm": 7.1709370613098145, "learning_rate": 7.401109329207119e-06, "loss": 0.51510811, "memory(GiB)": 34.88, "step": 55185, "train_speed(iter/s)": 0.412308 }, { "acc": 0.89147224, "epoch": 1.4943275661332684, "grad_norm": 8.039693832397461, "learning_rate": 7.400618478627395e-06, "loss": 0.60289259, "memory(GiB)": 34.88, "step": 55190, "train_speed(iter/s)": 0.412309 }, { "acc": 0.91770248, "epoch": 1.4944629464164838, "grad_norm": 10.08466625213623, "learning_rate": 7.40012759798147e-06, "loss": 0.47563367, "memory(GiB)": 34.88, "step": 55195, "train_speed(iter/s)": 0.412311 }, { "acc": 0.90611916, "epoch": 1.4945983266996994, "grad_norm": 22.002052307128906, "learning_rate": 7.3996366872754924e-06, "loss": 0.51846418, "memory(GiB)": 34.88, "step": 55200, "train_speed(iter/s)": 0.412313 }, { "acc": 0.89757748, "epoch": 1.494733706982915, "grad_norm": 8.049994468688965, "learning_rate": 7.399145746515615e-06, "loss": 0.56354456, "memory(GiB)": 34.88, "step": 55205, "train_speed(iter/s)": 0.412315 }, { "acc": 0.89468975, "epoch": 1.4948690872661305, "grad_norm": 13.45915699005127, "learning_rate": 7.398654775707987e-06, "loss": 0.61278753, "memory(GiB)": 34.88, "step": 55210, "train_speed(iter/s)": 0.412317 }, { "acc": 0.89696903, "epoch": 1.4950044675493461, "grad_norm": 2.5100533962249756, "learning_rate": 7.398163774858757e-06, "loss": 0.53246689, "memory(GiB)": 34.88, "step": 55215, "train_speed(iter/s)": 0.412319 }, { "acc": 0.89984016, "epoch": 1.4951398478325617, "grad_norm": 12.390262603759766, "learning_rate": 7.397672743974077e-06, "loss": 0.52348099, "memory(GiB)": 34.88, "step": 55220, "train_speed(iter/s)": 0.412321 }, { "acc": 0.91548157, "epoch": 1.4952752281157773, "grad_norm": 5.607431411743164, "learning_rate": 7.397181683060098e-06, "loss": 0.44253211, "memory(GiB)": 34.88, "step": 55225, "train_speed(iter/s)": 0.412323 }, { "acc": 0.89604836, "epoch": 1.4954106083989926, "grad_norm": 18.912153244018555, "learning_rate": 7.3966905921229735e-06, "loss": 0.58154993, "memory(GiB)": 34.88, "step": 55230, "train_speed(iter/s)": 0.412324 }, { "acc": 0.90395803, "epoch": 1.4955459886822084, "grad_norm": 6.788051128387451, "learning_rate": 7.396199471168851e-06, "loss": 0.45317364, "memory(GiB)": 34.88, "step": 55235, "train_speed(iter/s)": 0.412326 }, { "acc": 0.8950634, "epoch": 1.4956813689654238, "grad_norm": 8.285906791687012, "learning_rate": 7.395708320203885e-06, "loss": 0.50021515, "memory(GiB)": 34.88, "step": 55240, "train_speed(iter/s)": 0.412328 }, { "acc": 0.86249752, "epoch": 1.4958167492486394, "grad_norm": 22.248165130615234, "learning_rate": 7.395217139234229e-06, "loss": 0.69538064, "memory(GiB)": 34.88, "step": 55245, "train_speed(iter/s)": 0.41233 }, { "acc": 0.88194094, "epoch": 1.495952129531855, "grad_norm": 7.702329635620117, "learning_rate": 7.394725928266034e-06, "loss": 0.64948893, "memory(GiB)": 34.88, "step": 55250, "train_speed(iter/s)": 0.412332 }, { "acc": 0.9070076, "epoch": 1.4960875098150705, "grad_norm": 13.333806037902832, "learning_rate": 7.394234687305456e-06, "loss": 0.44652748, "memory(GiB)": 34.88, "step": 55255, "train_speed(iter/s)": 0.412333 }, { "acc": 0.9024456, "epoch": 1.4962228900982861, "grad_norm": 16.11188507080078, "learning_rate": 7.3937434163586464e-06, "loss": 0.61676931, "memory(GiB)": 34.88, "step": 55260, "train_speed(iter/s)": 0.412335 }, { "acc": 0.89350109, "epoch": 1.4963582703815017, "grad_norm": 10.658421516418457, "learning_rate": 7.3932521154317615e-06, "loss": 0.5901844, "memory(GiB)": 34.88, "step": 55265, "train_speed(iter/s)": 0.412337 }, { "acc": 0.88842735, "epoch": 1.4964936506647173, "grad_norm": 10.377862930297852, "learning_rate": 7.392760784530955e-06, "loss": 0.59284601, "memory(GiB)": 34.88, "step": 55270, "train_speed(iter/s)": 0.412339 }, { "acc": 0.90881405, "epoch": 1.4966290309479326, "grad_norm": 13.560945510864258, "learning_rate": 7.392269423662379e-06, "loss": 0.55809269, "memory(GiB)": 34.88, "step": 55275, "train_speed(iter/s)": 0.412341 }, { "acc": 0.89944572, "epoch": 1.4967644112311482, "grad_norm": 7.66699743270874, "learning_rate": 7.391778032832193e-06, "loss": 0.5323801, "memory(GiB)": 34.88, "step": 55280, "train_speed(iter/s)": 0.412343 }, { "acc": 0.90974274, "epoch": 1.4968997915143638, "grad_norm": 9.572272300720215, "learning_rate": 7.3912866120465495e-06, "loss": 0.47065172, "memory(GiB)": 34.88, "step": 55285, "train_speed(iter/s)": 0.412344 }, { "acc": 0.90739985, "epoch": 1.4970351717975794, "grad_norm": 6.104925632476807, "learning_rate": 7.390795161311607e-06, "loss": 0.52059555, "memory(GiB)": 34.88, "step": 55290, "train_speed(iter/s)": 0.412346 }, { "acc": 0.90805912, "epoch": 1.497170552080795, "grad_norm": 5.502286434173584, "learning_rate": 7.3903036806335205e-06, "loss": 0.49323115, "memory(GiB)": 34.88, "step": 55295, "train_speed(iter/s)": 0.412348 }, { "acc": 0.88335552, "epoch": 1.4973059323640106, "grad_norm": 10.40938663482666, "learning_rate": 7.389812170018447e-06, "loss": 0.64074278, "memory(GiB)": 34.88, "step": 55300, "train_speed(iter/s)": 0.41235 }, { "acc": 0.88496132, "epoch": 1.4974413126472261, "grad_norm": 11.545011520385742, "learning_rate": 7.389320629472543e-06, "loss": 0.62968683, "memory(GiB)": 34.88, "step": 55305, "train_speed(iter/s)": 0.412352 }, { "acc": 0.90711746, "epoch": 1.4975766929304415, "grad_norm": 10.88318157196045, "learning_rate": 7.388829059001968e-06, "loss": 0.47317076, "memory(GiB)": 34.88, "step": 55310, "train_speed(iter/s)": 0.412354 }, { "acc": 0.90398579, "epoch": 1.4977120732136573, "grad_norm": 4.191962718963623, "learning_rate": 7.388337458612878e-06, "loss": 0.43265085, "memory(GiB)": 34.88, "step": 55315, "train_speed(iter/s)": 0.412356 }, { "acc": 0.90084143, "epoch": 1.4978474534968726, "grad_norm": 3.720189332962036, "learning_rate": 7.38784582831143e-06, "loss": 0.49320488, "memory(GiB)": 34.88, "step": 55320, "train_speed(iter/s)": 0.412358 }, { "acc": 0.88662319, "epoch": 1.4979828337800882, "grad_norm": 9.382481575012207, "learning_rate": 7.3873541681037865e-06, "loss": 0.63643198, "memory(GiB)": 34.88, "step": 55325, "train_speed(iter/s)": 0.412359 }, { "acc": 0.90073357, "epoch": 1.4981182140633038, "grad_norm": 6.043179988861084, "learning_rate": 7.386862477996102e-06, "loss": 0.53960934, "memory(GiB)": 34.88, "step": 55330, "train_speed(iter/s)": 0.412361 }, { "acc": 0.89740467, "epoch": 1.4982535943465194, "grad_norm": 13.447964668273926, "learning_rate": 7.38637075799454e-06, "loss": 0.56686964, "memory(GiB)": 34.88, "step": 55335, "train_speed(iter/s)": 0.412363 }, { "acc": 0.89174032, "epoch": 1.498388974629735, "grad_norm": 6.41029691696167, "learning_rate": 7.385879008105258e-06, "loss": 0.57577133, "memory(GiB)": 34.88, "step": 55340, "train_speed(iter/s)": 0.412365 }, { "acc": 0.89438839, "epoch": 1.4985243549129506, "grad_norm": 8.03681468963623, "learning_rate": 7.3853872283344165e-06, "loss": 0.52102919, "memory(GiB)": 34.88, "step": 55345, "train_speed(iter/s)": 0.412367 }, { "acc": 0.91718616, "epoch": 1.4986597351961661, "grad_norm": 13.98800277709961, "learning_rate": 7.384895418688176e-06, "loss": 0.47413158, "memory(GiB)": 34.88, "step": 55350, "train_speed(iter/s)": 0.412369 }, { "acc": 0.90786791, "epoch": 1.4987951154793815, "grad_norm": 13.903982162475586, "learning_rate": 7.3844035791726985e-06, "loss": 0.52201085, "memory(GiB)": 34.88, "step": 55355, "train_speed(iter/s)": 0.412371 }, { "acc": 0.9039897, "epoch": 1.498930495762597, "grad_norm": 11.360054969787598, "learning_rate": 7.383911709794145e-06, "loss": 0.59281402, "memory(GiB)": 34.88, "step": 55360, "train_speed(iter/s)": 0.412373 }, { "acc": 0.88183403, "epoch": 1.4990658760458127, "grad_norm": 26.396968841552734, "learning_rate": 7.383419810558674e-06, "loss": 0.68183722, "memory(GiB)": 34.88, "step": 55365, "train_speed(iter/s)": 0.412375 }, { "acc": 0.88759441, "epoch": 1.4992012563290282, "grad_norm": 5.203437805175781, "learning_rate": 7.382927881472453e-06, "loss": 0.60774345, "memory(GiB)": 34.88, "step": 55370, "train_speed(iter/s)": 0.412377 }, { "acc": 0.89538498, "epoch": 1.4993366366122438, "grad_norm": 7.991248607635498, "learning_rate": 7.382435922541637e-06, "loss": 0.4682858, "memory(GiB)": 34.88, "step": 55375, "train_speed(iter/s)": 0.412379 }, { "acc": 0.89731655, "epoch": 1.4994720168954594, "grad_norm": 7.518148422241211, "learning_rate": 7.381943933772397e-06, "loss": 0.48221073, "memory(GiB)": 34.88, "step": 55380, "train_speed(iter/s)": 0.412381 }, { "acc": 0.90544567, "epoch": 1.499607397178675, "grad_norm": 5.330241680145264, "learning_rate": 7.381451915170891e-06, "loss": 0.46413083, "memory(GiB)": 34.88, "step": 55385, "train_speed(iter/s)": 0.412383 }, { "acc": 0.90727119, "epoch": 1.4997427774618903, "grad_norm": 6.695896148681641, "learning_rate": 7.3809598667432825e-06, "loss": 0.54853592, "memory(GiB)": 34.88, "step": 55390, "train_speed(iter/s)": 0.412384 }, { "acc": 0.89068336, "epoch": 1.4998781577451061, "grad_norm": 9.308194160461426, "learning_rate": 7.38046778849574e-06, "loss": 0.55312862, "memory(GiB)": 34.88, "step": 55395, "train_speed(iter/s)": 0.412387 }, { "acc": 0.9018714, "epoch": 1.5000135380283215, "grad_norm": 6.109804153442383, "learning_rate": 7.37997568043442e-06, "loss": 0.53670492, "memory(GiB)": 34.88, "step": 55400, "train_speed(iter/s)": 0.412389 }, { "acc": 0.89961948, "epoch": 1.500148918311537, "grad_norm": 8.59152603149414, "learning_rate": 7.3794835425654945e-06, "loss": 0.51747026, "memory(GiB)": 34.88, "step": 55405, "train_speed(iter/s)": 0.412391 }, { "acc": 0.92399292, "epoch": 1.5002842985947527, "grad_norm": 6.869414806365967, "learning_rate": 7.3789913748951235e-06, "loss": 0.50938158, "memory(GiB)": 34.88, "step": 55410, "train_speed(iter/s)": 0.412393 }, { "acc": 0.8967721, "epoch": 1.5004196788779682, "grad_norm": 12.143669128417969, "learning_rate": 7.378499177429476e-06, "loss": 0.62521486, "memory(GiB)": 34.88, "step": 55415, "train_speed(iter/s)": 0.412394 }, { "acc": 0.89949036, "epoch": 1.5005550591611838, "grad_norm": 8.30566120147705, "learning_rate": 7.3780069501747145e-06, "loss": 0.46508398, "memory(GiB)": 34.88, "step": 55420, "train_speed(iter/s)": 0.412396 }, { "acc": 0.91182442, "epoch": 1.5006904394443992, "grad_norm": 8.624958992004395, "learning_rate": 7.377514693137007e-06, "loss": 0.50591869, "memory(GiB)": 34.88, "step": 55425, "train_speed(iter/s)": 0.412398 }, { "acc": 0.89058437, "epoch": 1.500825819727615, "grad_norm": 6.9295549392700195, "learning_rate": 7.37702240632252e-06, "loss": 0.52964334, "memory(GiB)": 34.88, "step": 55430, "train_speed(iter/s)": 0.4124 }, { "acc": 0.88788576, "epoch": 1.5009612000108303, "grad_norm": 11.282033920288086, "learning_rate": 7.376530089737419e-06, "loss": 0.63991814, "memory(GiB)": 34.88, "step": 55435, "train_speed(iter/s)": 0.412402 }, { "acc": 0.89277372, "epoch": 1.5010965802940461, "grad_norm": 7.61841344833374, "learning_rate": 7.376037743387875e-06, "loss": 0.57083573, "memory(GiB)": 34.88, "step": 55440, "train_speed(iter/s)": 0.412404 }, { "acc": 0.88752041, "epoch": 1.5012319605772615, "grad_norm": 8.749065399169922, "learning_rate": 7.37554536728005e-06, "loss": 0.64256306, "memory(GiB)": 34.88, "step": 55445, "train_speed(iter/s)": 0.412406 }, { "acc": 0.8603941, "epoch": 1.501367340860477, "grad_norm": 8.328352928161621, "learning_rate": 7.375052961420117e-06, "loss": 0.7284924, "memory(GiB)": 34.88, "step": 55450, "train_speed(iter/s)": 0.412408 }, { "acc": 0.89293795, "epoch": 1.5015027211436927, "grad_norm": 6.808588027954102, "learning_rate": 7.3745605258142404e-06, "loss": 0.56875539, "memory(GiB)": 34.88, "step": 55455, "train_speed(iter/s)": 0.412409 }, { "acc": 0.91753721, "epoch": 1.5016381014269082, "grad_norm": 4.912490367889404, "learning_rate": 7.374068060468592e-06, "loss": 0.42922511, "memory(GiB)": 34.88, "step": 55460, "train_speed(iter/s)": 0.412411 }, { "acc": 0.91173573, "epoch": 1.5017734817101238, "grad_norm": 8.276129722595215, "learning_rate": 7.373575565389338e-06, "loss": 0.46187897, "memory(GiB)": 34.88, "step": 55465, "train_speed(iter/s)": 0.412413 }, { "acc": 0.9143568, "epoch": 1.5019088619933392, "grad_norm": 10.009042739868164, "learning_rate": 7.3730830405826495e-06, "loss": 0.40441818, "memory(GiB)": 34.88, "step": 55470, "train_speed(iter/s)": 0.412415 }, { "acc": 0.90324583, "epoch": 1.502044242276555, "grad_norm": 5.006046295166016, "learning_rate": 7.372590486054698e-06, "loss": 0.5591814, "memory(GiB)": 34.88, "step": 55475, "train_speed(iter/s)": 0.412417 }, { "acc": 0.89889269, "epoch": 1.5021796225597703, "grad_norm": 6.181910514831543, "learning_rate": 7.3720979018116515e-06, "loss": 0.52790565, "memory(GiB)": 34.88, "step": 55480, "train_speed(iter/s)": 0.412419 }, { "acc": 0.90508118, "epoch": 1.502315002842986, "grad_norm": 10.076020240783691, "learning_rate": 7.371605287859681e-06, "loss": 0.55498786, "memory(GiB)": 34.88, "step": 55485, "train_speed(iter/s)": 0.412421 }, { "acc": 0.90367327, "epoch": 1.5024503831262015, "grad_norm": 8.485876083374023, "learning_rate": 7.371112644204957e-06, "loss": 0.5427866, "memory(GiB)": 34.88, "step": 55490, "train_speed(iter/s)": 0.412423 }, { "acc": 0.90307007, "epoch": 1.502585763409417, "grad_norm": 6.882416248321533, "learning_rate": 7.370619970853652e-06, "loss": 0.53152895, "memory(GiB)": 34.88, "step": 55495, "train_speed(iter/s)": 0.412425 }, { "acc": 0.88874741, "epoch": 1.5027211436926327, "grad_norm": 6.079939842224121, "learning_rate": 7.370127267811936e-06, "loss": 0.64309978, "memory(GiB)": 34.88, "step": 55500, "train_speed(iter/s)": 0.412427 }, { "acc": 0.90815411, "epoch": 1.502856523975848, "grad_norm": 9.727127075195312, "learning_rate": 7.369634535085983e-06, "loss": 0.47695742, "memory(GiB)": 34.88, "step": 55505, "train_speed(iter/s)": 0.412429 }, { "acc": 0.89607677, "epoch": 1.5029919042590638, "grad_norm": 5.240988731384277, "learning_rate": 7.369141772681965e-06, "loss": 0.51657476, "memory(GiB)": 34.88, "step": 55510, "train_speed(iter/s)": 0.41243 }, { "acc": 0.91727438, "epoch": 1.5031272845422792, "grad_norm": 11.16850757598877, "learning_rate": 7.368648980606053e-06, "loss": 0.44614868, "memory(GiB)": 34.88, "step": 55515, "train_speed(iter/s)": 0.412432 }, { "acc": 0.9027627, "epoch": 1.503262664825495, "grad_norm": 11.852058410644531, "learning_rate": 7.368156158864424e-06, "loss": 0.50693331, "memory(GiB)": 34.88, "step": 55520, "train_speed(iter/s)": 0.412434 }, { "acc": 0.91715775, "epoch": 1.5033980451087103, "grad_norm": 11.692705154418945, "learning_rate": 7.367663307463247e-06, "loss": 0.39435301, "memory(GiB)": 34.88, "step": 55525, "train_speed(iter/s)": 0.412436 }, { "acc": 0.9086731, "epoch": 1.503533425391926, "grad_norm": 5.626338958740234, "learning_rate": 7.3671704264087e-06, "loss": 0.49858847, "memory(GiB)": 34.88, "step": 55530, "train_speed(iter/s)": 0.412437 }, { "acc": 0.89031439, "epoch": 1.5036688056751415, "grad_norm": 10.278033256530762, "learning_rate": 7.366677515706955e-06, "loss": 0.62558351, "memory(GiB)": 34.88, "step": 55535, "train_speed(iter/s)": 0.412439 }, { "acc": 0.90886784, "epoch": 1.503804185958357, "grad_norm": 6.185492992401123, "learning_rate": 7.366184575364185e-06, "loss": 0.52292194, "memory(GiB)": 34.88, "step": 55540, "train_speed(iter/s)": 0.412441 }, { "acc": 0.88470764, "epoch": 1.5039395662415727, "grad_norm": 8.982863426208496, "learning_rate": 7.365691605386569e-06, "loss": 0.67364826, "memory(GiB)": 34.88, "step": 55545, "train_speed(iter/s)": 0.412443 }, { "acc": 0.86825848, "epoch": 1.504074946524788, "grad_norm": 17.273775100708008, "learning_rate": 7.36519860578028e-06, "loss": 0.7838131, "memory(GiB)": 34.88, "step": 55550, "train_speed(iter/s)": 0.412445 }, { "acc": 0.91597652, "epoch": 1.5042103268080038, "grad_norm": 14.53189754486084, "learning_rate": 7.364705576551494e-06, "loss": 0.40837641, "memory(GiB)": 34.88, "step": 55555, "train_speed(iter/s)": 0.412447 }, { "acc": 0.911549, "epoch": 1.5043457070912192, "grad_norm": 16.98555564880371, "learning_rate": 7.3642125177063875e-06, "loss": 0.46259947, "memory(GiB)": 34.88, "step": 55560, "train_speed(iter/s)": 0.412449 }, { "acc": 0.90397863, "epoch": 1.5044810873744348, "grad_norm": 5.651130199432373, "learning_rate": 7.36371942925114e-06, "loss": 0.4710113, "memory(GiB)": 34.88, "step": 55565, "train_speed(iter/s)": 0.412451 }, { "acc": 0.8947422, "epoch": 1.5046164676576503, "grad_norm": 10.080991744995117, "learning_rate": 7.363226311191921e-06, "loss": 0.61259232, "memory(GiB)": 34.88, "step": 55570, "train_speed(iter/s)": 0.412453 }, { "acc": 0.88420162, "epoch": 1.504751847940866, "grad_norm": 13.464764595031738, "learning_rate": 7.362733163534913e-06, "loss": 0.68094754, "memory(GiB)": 34.88, "step": 55575, "train_speed(iter/s)": 0.412454 }, { "acc": 0.89136496, "epoch": 1.5048872282240815, "grad_norm": 6.615973949432373, "learning_rate": 7.362239986286295e-06, "loss": 0.60527973, "memory(GiB)": 34.88, "step": 55580, "train_speed(iter/s)": 0.412456 }, { "acc": 0.89925766, "epoch": 1.5050226085072969, "grad_norm": 9.13796615600586, "learning_rate": 7.361746779452242e-06, "loss": 0.51349535, "memory(GiB)": 34.88, "step": 55585, "train_speed(iter/s)": 0.412458 }, { "acc": 0.89146156, "epoch": 1.5051579887905127, "grad_norm": 14.424151420593262, "learning_rate": 7.361253543038933e-06, "loss": 0.54837847, "memory(GiB)": 34.88, "step": 55590, "train_speed(iter/s)": 0.41246 }, { "acc": 0.8831707, "epoch": 1.505293369073728, "grad_norm": 31.408090591430664, "learning_rate": 7.360760277052546e-06, "loss": 0.78412514, "memory(GiB)": 34.88, "step": 55595, "train_speed(iter/s)": 0.412461 }, { "acc": 0.90575647, "epoch": 1.5054287493569438, "grad_norm": 3.5688085556030273, "learning_rate": 7.360266981499263e-06, "loss": 0.49363656, "memory(GiB)": 34.88, "step": 55600, "train_speed(iter/s)": 0.412463 }, { "acc": 0.92227516, "epoch": 1.5055641296401592, "grad_norm": 33.23234176635742, "learning_rate": 7.3597736563852615e-06, "loss": 0.45809155, "memory(GiB)": 34.88, "step": 55605, "train_speed(iter/s)": 0.412465 }, { "acc": 0.90361195, "epoch": 1.5056995099233748, "grad_norm": 3.775245428085327, "learning_rate": 7.359280301716722e-06, "loss": 0.50050597, "memory(GiB)": 34.88, "step": 55610, "train_speed(iter/s)": 0.412467 }, { "acc": 0.91422138, "epoch": 1.5058348902065903, "grad_norm": 9.25454044342041, "learning_rate": 7.358786917499822e-06, "loss": 0.44501529, "memory(GiB)": 34.88, "step": 55615, "train_speed(iter/s)": 0.412469 }, { "acc": 0.91565065, "epoch": 1.505970270489806, "grad_norm": 7.764523506164551, "learning_rate": 7.358293503740744e-06, "loss": 0.45244918, "memory(GiB)": 34.88, "step": 55620, "train_speed(iter/s)": 0.412471 }, { "acc": 0.91348228, "epoch": 1.5061056507730215, "grad_norm": 13.928197860717773, "learning_rate": 7.357800060445672e-06, "loss": 0.46986923, "memory(GiB)": 34.88, "step": 55625, "train_speed(iter/s)": 0.412473 }, { "acc": 0.90291405, "epoch": 1.5062410310562369, "grad_norm": 7.115919589996338, "learning_rate": 7.357306587620783e-06, "loss": 0.45230083, "memory(GiB)": 34.88, "step": 55630, "train_speed(iter/s)": 0.412475 }, { "acc": 0.90524673, "epoch": 1.5063764113394527, "grad_norm": 5.940528392791748, "learning_rate": 7.3568130852722605e-06, "loss": 0.54501276, "memory(GiB)": 34.88, "step": 55635, "train_speed(iter/s)": 0.412476 }, { "acc": 0.89825706, "epoch": 1.506511791622668, "grad_norm": 7.061626434326172, "learning_rate": 7.3563195534062856e-06, "loss": 0.4846756, "memory(GiB)": 34.88, "step": 55640, "train_speed(iter/s)": 0.412478 }, { "acc": 0.89837971, "epoch": 1.5066471719058836, "grad_norm": 7.513540267944336, "learning_rate": 7.355825992029043e-06, "loss": 0.6133914, "memory(GiB)": 34.88, "step": 55645, "train_speed(iter/s)": 0.41248 }, { "acc": 0.91359482, "epoch": 1.5067825521890992, "grad_norm": 12.06275749206543, "learning_rate": 7.355332401146715e-06, "loss": 0.47399178, "memory(GiB)": 34.88, "step": 55650, "train_speed(iter/s)": 0.412482 }, { "acc": 0.88525009, "epoch": 1.5069179324723148, "grad_norm": 11.485418319702148, "learning_rate": 7.354838780765482e-06, "loss": 0.63679361, "memory(GiB)": 34.88, "step": 55655, "train_speed(iter/s)": 0.412484 }, { "acc": 0.90644741, "epoch": 1.5070533127555303, "grad_norm": 7.822380542755127, "learning_rate": 7.35434513089153e-06, "loss": 0.42972884, "memory(GiB)": 34.88, "step": 55660, "train_speed(iter/s)": 0.412486 }, { "acc": 0.89370518, "epoch": 1.5071886930387457, "grad_norm": 9.89803695678711, "learning_rate": 7.353851451531042e-06, "loss": 0.57101908, "memory(GiB)": 34.88, "step": 55665, "train_speed(iter/s)": 0.412488 }, { "acc": 0.88144732, "epoch": 1.5073240733219615, "grad_norm": 12.04814338684082, "learning_rate": 7.353357742690204e-06, "loss": 0.72331123, "memory(GiB)": 34.88, "step": 55670, "train_speed(iter/s)": 0.412489 }, { "acc": 0.89792442, "epoch": 1.5074594536051769, "grad_norm": 6.815690040588379, "learning_rate": 7.3528640043752e-06, "loss": 0.52028503, "memory(GiB)": 34.88, "step": 55675, "train_speed(iter/s)": 0.412491 }, { "acc": 0.90786829, "epoch": 1.5075948338883927, "grad_norm": 4.618001461029053, "learning_rate": 7.3523702365922125e-06, "loss": 0.517838, "memory(GiB)": 34.88, "step": 55680, "train_speed(iter/s)": 0.412493 }, { "acc": 0.90207319, "epoch": 1.507730214171608, "grad_norm": 10.536725044250488, "learning_rate": 7.35187643934743e-06, "loss": 0.57229247, "memory(GiB)": 34.88, "step": 55685, "train_speed(iter/s)": 0.412494 }, { "acc": 0.88724766, "epoch": 1.5078655944548236, "grad_norm": 6.1253461837768555, "learning_rate": 7.351382612647036e-06, "loss": 0.64750223, "memory(GiB)": 34.88, "step": 55690, "train_speed(iter/s)": 0.412496 }, { "acc": 0.89777212, "epoch": 1.5080009747380392, "grad_norm": 12.646690368652344, "learning_rate": 7.35088875649722e-06, "loss": 0.52878752, "memory(GiB)": 34.88, "step": 55695, "train_speed(iter/s)": 0.412498 }, { "acc": 0.89928608, "epoch": 1.5081363550212548, "grad_norm": 16.19249725341797, "learning_rate": 7.350394870904165e-06, "loss": 0.46310978, "memory(GiB)": 34.88, "step": 55700, "train_speed(iter/s)": 0.4125 }, { "acc": 0.88610544, "epoch": 1.5082717353044703, "grad_norm": 11.672553062438965, "learning_rate": 7.349900955874058e-06, "loss": 0.62891026, "memory(GiB)": 34.88, "step": 55705, "train_speed(iter/s)": 0.412502 }, { "acc": 0.88320541, "epoch": 1.5084071155876857, "grad_norm": 12.805179595947266, "learning_rate": 7.349407011413089e-06, "loss": 0.69775586, "memory(GiB)": 34.88, "step": 55710, "train_speed(iter/s)": 0.412504 }, { "acc": 0.9113307, "epoch": 1.5085424958709015, "grad_norm": 16.991695404052734, "learning_rate": 7.348913037527444e-06, "loss": 0.49127812, "memory(GiB)": 34.88, "step": 55715, "train_speed(iter/s)": 0.412506 }, { "acc": 0.90449314, "epoch": 1.5086778761541169, "grad_norm": 9.028351783752441, "learning_rate": 7.34841903422331e-06, "loss": 0.53950624, "memory(GiB)": 34.88, "step": 55720, "train_speed(iter/s)": 0.412508 }, { "acc": 0.89324236, "epoch": 1.5088132564373324, "grad_norm": 5.096306800842285, "learning_rate": 7.347925001506877e-06, "loss": 0.53125081, "memory(GiB)": 34.88, "step": 55725, "train_speed(iter/s)": 0.41251 }, { "acc": 0.92728281, "epoch": 1.508948636720548, "grad_norm": 10.336575508117676, "learning_rate": 7.347430939384334e-06, "loss": 0.42501836, "memory(GiB)": 34.88, "step": 55730, "train_speed(iter/s)": 0.412512 }, { "acc": 0.90318489, "epoch": 1.5090840170037636, "grad_norm": 8.990777015686035, "learning_rate": 7.34693684786187e-06, "loss": 0.47816992, "memory(GiB)": 34.88, "step": 55735, "train_speed(iter/s)": 0.412513 }, { "acc": 0.89528999, "epoch": 1.5092193972869792, "grad_norm": 6.747007846832275, "learning_rate": 7.3464427269456715e-06, "loss": 0.59383802, "memory(GiB)": 34.88, "step": 55740, "train_speed(iter/s)": 0.412515 }, { "acc": 0.92055712, "epoch": 1.5093547775701945, "grad_norm": 7.207326889038086, "learning_rate": 7.345948576641931e-06, "loss": 0.452665, "memory(GiB)": 34.88, "step": 55745, "train_speed(iter/s)": 0.412517 }, { "acc": 0.88958006, "epoch": 1.5094901578534103, "grad_norm": 6.755659103393555, "learning_rate": 7.345454396956839e-06, "loss": 0.60962176, "memory(GiB)": 34.88, "step": 55750, "train_speed(iter/s)": 0.412519 }, { "acc": 0.89323387, "epoch": 1.5096255381366257, "grad_norm": 24.703596115112305, "learning_rate": 7.344960187896585e-06, "loss": 0.53461809, "memory(GiB)": 34.88, "step": 55755, "train_speed(iter/s)": 0.412521 }, { "acc": 0.87955799, "epoch": 1.5097609184198413, "grad_norm": 7.905874252319336, "learning_rate": 7.3444659494673595e-06, "loss": 0.63730054, "memory(GiB)": 34.88, "step": 55760, "train_speed(iter/s)": 0.412523 }, { "acc": 0.88480749, "epoch": 1.5098962987030569, "grad_norm": 7.235955238342285, "learning_rate": 7.343971681675356e-06, "loss": 0.59666586, "memory(GiB)": 34.88, "step": 55765, "train_speed(iter/s)": 0.412524 }, { "acc": 0.8875391, "epoch": 1.5100316789862724, "grad_norm": 10.694133758544922, "learning_rate": 7.343477384526762e-06, "loss": 0.53540497, "memory(GiB)": 34.88, "step": 55770, "train_speed(iter/s)": 0.412526 }, { "acc": 0.90075779, "epoch": 1.510167059269488, "grad_norm": 11.95961856842041, "learning_rate": 7.342983058027778e-06, "loss": 0.5652473, "memory(GiB)": 34.88, "step": 55775, "train_speed(iter/s)": 0.412528 }, { "acc": 0.88713942, "epoch": 1.5103024395527034, "grad_norm": 20.24267578125, "learning_rate": 7.342488702184586e-06, "loss": 0.59776187, "memory(GiB)": 34.88, "step": 55780, "train_speed(iter/s)": 0.41253 }, { "acc": 0.91273003, "epoch": 1.5104378198359192, "grad_norm": 6.596409320831299, "learning_rate": 7.341994317003386e-06, "loss": 0.48941164, "memory(GiB)": 34.88, "step": 55785, "train_speed(iter/s)": 0.412532 }, { "acc": 0.87596226, "epoch": 1.5105732001191345, "grad_norm": 13.103078842163086, "learning_rate": 7.341499902490367e-06, "loss": 0.55401115, "memory(GiB)": 34.88, "step": 55790, "train_speed(iter/s)": 0.412534 }, { "acc": 0.89658728, "epoch": 1.5107085804023503, "grad_norm": 8.826159477233887, "learning_rate": 7.3410054586517256e-06, "loss": 0.59611588, "memory(GiB)": 34.88, "step": 55795, "train_speed(iter/s)": 0.412535 }, { "acc": 0.90361328, "epoch": 1.5108439606855657, "grad_norm": 6.337291717529297, "learning_rate": 7.3405109854936514e-06, "loss": 0.52316141, "memory(GiB)": 34.88, "step": 55800, "train_speed(iter/s)": 0.412537 }, { "acc": 0.90100994, "epoch": 1.5109793409687813, "grad_norm": 9.733701705932617, "learning_rate": 7.340016483022343e-06, "loss": 0.52558994, "memory(GiB)": 34.88, "step": 55805, "train_speed(iter/s)": 0.412539 }, { "acc": 0.909056, "epoch": 1.5111147212519969, "grad_norm": 6.787483215332031, "learning_rate": 7.339521951243995e-06, "loss": 0.51253281, "memory(GiB)": 34.88, "step": 55810, "train_speed(iter/s)": 0.412541 }, { "acc": 0.9210083, "epoch": 1.5112501015352124, "grad_norm": 6.579009056091309, "learning_rate": 7.3390273901648e-06, "loss": 0.36521344, "memory(GiB)": 34.88, "step": 55815, "train_speed(iter/s)": 0.412543 }, { "acc": 0.89541702, "epoch": 1.511385481818428, "grad_norm": 7.808764457702637, "learning_rate": 7.338532799790954e-06, "loss": 0.5335113, "memory(GiB)": 34.88, "step": 55820, "train_speed(iter/s)": 0.412544 }, { "acc": 0.88295784, "epoch": 1.5115208621016434, "grad_norm": 7.832260608673096, "learning_rate": 7.338038180128652e-06, "loss": 0.62831678, "memory(GiB)": 34.88, "step": 55825, "train_speed(iter/s)": 0.412546 }, { "acc": 0.90603657, "epoch": 1.5116562423848592, "grad_norm": 4.905850887298584, "learning_rate": 7.337543531184093e-06, "loss": 0.4725009, "memory(GiB)": 34.88, "step": 55830, "train_speed(iter/s)": 0.412548 }, { "acc": 0.90019064, "epoch": 1.5117916226680745, "grad_norm": 9.261387825012207, "learning_rate": 7.3370488529634695e-06, "loss": 0.53201623, "memory(GiB)": 34.88, "step": 55835, "train_speed(iter/s)": 0.41255 }, { "acc": 0.9027668, "epoch": 1.5119270029512901, "grad_norm": 6.070114612579346, "learning_rate": 7.336554145472982e-06, "loss": 0.43898959, "memory(GiB)": 34.88, "step": 55840, "train_speed(iter/s)": 0.412552 }, { "acc": 0.88157215, "epoch": 1.5120623832345057, "grad_norm": 8.571976661682129, "learning_rate": 7.336059408718825e-06, "loss": 0.64956474, "memory(GiB)": 34.88, "step": 55845, "train_speed(iter/s)": 0.412554 }, { "acc": 0.89588928, "epoch": 1.5121977635177213, "grad_norm": 24.050512313842773, "learning_rate": 7.335564642707197e-06, "loss": 0.63924398, "memory(GiB)": 34.88, "step": 55850, "train_speed(iter/s)": 0.412556 }, { "acc": 0.90580263, "epoch": 1.5123331438009369, "grad_norm": 6.942253112792969, "learning_rate": 7.3350698474442974e-06, "loss": 0.54842863, "memory(GiB)": 34.88, "step": 55855, "train_speed(iter/s)": 0.412558 }, { "acc": 0.89347153, "epoch": 1.5124685240841522, "grad_norm": 9.309477806091309, "learning_rate": 7.334575022936322e-06, "loss": 0.54140496, "memory(GiB)": 34.88, "step": 55860, "train_speed(iter/s)": 0.412559 }, { "acc": 0.88118553, "epoch": 1.512603904367368, "grad_norm": 7.758697032928467, "learning_rate": 7.3340801691894704e-06, "loss": 0.77985821, "memory(GiB)": 34.88, "step": 55865, "train_speed(iter/s)": 0.412561 }, { "acc": 0.89495888, "epoch": 1.5127392846505834, "grad_norm": 68.14600372314453, "learning_rate": 7.333585286209941e-06, "loss": 0.51091676, "memory(GiB)": 34.88, "step": 55870, "train_speed(iter/s)": 0.412563 }, { "acc": 0.92715044, "epoch": 1.5128746649337992, "grad_norm": 3.4999020099639893, "learning_rate": 7.333090374003935e-06, "loss": 0.39198065, "memory(GiB)": 34.88, "step": 55875, "train_speed(iter/s)": 0.412565 }, { "acc": 0.89970455, "epoch": 1.5130100452170145, "grad_norm": 5.798074722290039, "learning_rate": 7.3325954325776505e-06, "loss": 0.58401132, "memory(GiB)": 34.88, "step": 55880, "train_speed(iter/s)": 0.412567 }, { "acc": 0.900914, "epoch": 1.5131454255002301, "grad_norm": 6.2794342041015625, "learning_rate": 7.332100461937288e-06, "loss": 0.51795607, "memory(GiB)": 34.88, "step": 55885, "train_speed(iter/s)": 0.412569 }, { "acc": 0.89743872, "epoch": 1.5132808057834457, "grad_norm": 11.169339179992676, "learning_rate": 7.3316054620890485e-06, "loss": 0.50794106, "memory(GiB)": 34.88, "step": 55890, "train_speed(iter/s)": 0.412571 }, { "acc": 0.88419094, "epoch": 1.5134161860666613, "grad_norm": 6.549639701843262, "learning_rate": 7.331110433039131e-06, "loss": 0.65432053, "memory(GiB)": 34.88, "step": 55895, "train_speed(iter/s)": 0.412573 }, { "acc": 0.90921125, "epoch": 1.5135515663498769, "grad_norm": 8.005942344665527, "learning_rate": 7.3306153747937425e-06, "loss": 0.42893381, "memory(GiB)": 34.88, "step": 55900, "train_speed(iter/s)": 0.412574 }, { "acc": 0.90147982, "epoch": 1.5136869466330922, "grad_norm": 5.4074249267578125, "learning_rate": 7.330120287359076e-06, "loss": 0.50099173, "memory(GiB)": 34.88, "step": 55905, "train_speed(iter/s)": 0.412576 }, { "acc": 0.91815701, "epoch": 1.513822326916308, "grad_norm": 9.66501522064209, "learning_rate": 7.32962517074134e-06, "loss": 0.46017108, "memory(GiB)": 34.88, "step": 55910, "train_speed(iter/s)": 0.412578 }, { "acc": 0.90622673, "epoch": 1.5139577071995234, "grad_norm": 4.907763481140137, "learning_rate": 7.329130024946733e-06, "loss": 0.50048423, "memory(GiB)": 34.88, "step": 55915, "train_speed(iter/s)": 0.41258 }, { "acc": 0.91179352, "epoch": 1.514093087482739, "grad_norm": 6.284844398498535, "learning_rate": 7.328634849981459e-06, "loss": 0.46457996, "memory(GiB)": 34.88, "step": 55920, "train_speed(iter/s)": 0.412582 }, { "acc": 0.89403057, "epoch": 1.5142284677659545, "grad_norm": 6.601426124572754, "learning_rate": 7.328139645851723e-06, "loss": 0.51992197, "memory(GiB)": 34.88, "step": 55925, "train_speed(iter/s)": 0.412584 }, { "acc": 0.91042747, "epoch": 1.5143638480491701, "grad_norm": 6.676933288574219, "learning_rate": 7.327644412563725e-06, "loss": 0.50910726, "memory(GiB)": 34.88, "step": 55930, "train_speed(iter/s)": 0.412586 }, { "acc": 0.90112362, "epoch": 1.5144992283323857, "grad_norm": 15.280068397521973, "learning_rate": 7.327149150123671e-06, "loss": 0.5104516, "memory(GiB)": 34.88, "step": 55935, "train_speed(iter/s)": 0.412588 }, { "acc": 0.91444025, "epoch": 1.514634608615601, "grad_norm": 7.691727638244629, "learning_rate": 7.326653858537766e-06, "loss": 0.43736281, "memory(GiB)": 34.88, "step": 55940, "train_speed(iter/s)": 0.412589 }, { "acc": 0.90096664, "epoch": 1.5147699888988169, "grad_norm": 9.57518196105957, "learning_rate": 7.326158537812212e-06, "loss": 0.54699717, "memory(GiB)": 34.88, "step": 55945, "train_speed(iter/s)": 0.412591 }, { "acc": 0.90549088, "epoch": 1.5149053691820322, "grad_norm": 8.004437446594238, "learning_rate": 7.325663187953214e-06, "loss": 0.49048176, "memory(GiB)": 34.88, "step": 55950, "train_speed(iter/s)": 0.412593 }, { "acc": 0.89843054, "epoch": 1.515040749465248, "grad_norm": 9.447625160217285, "learning_rate": 7.325167808966978e-06, "loss": 0.53862591, "memory(GiB)": 34.88, "step": 55955, "train_speed(iter/s)": 0.412595 }, { "acc": 0.90442095, "epoch": 1.5151761297484634, "grad_norm": 5.133631706237793, "learning_rate": 7.324672400859711e-06, "loss": 0.51020594, "memory(GiB)": 34.88, "step": 55960, "train_speed(iter/s)": 0.412597 }, { "acc": 0.89804363, "epoch": 1.515311510031679, "grad_norm": 5.892904758453369, "learning_rate": 7.3241769636376156e-06, "loss": 0.66208267, "memory(GiB)": 34.88, "step": 55965, "train_speed(iter/s)": 0.412598 }, { "acc": 0.90565472, "epoch": 1.5154468903148945, "grad_norm": 6.563878059387207, "learning_rate": 7.323681497306903e-06, "loss": 0.5256937, "memory(GiB)": 34.88, "step": 55970, "train_speed(iter/s)": 0.4126 }, { "acc": 0.90913353, "epoch": 1.5155822705981101, "grad_norm": 5.582046031951904, "learning_rate": 7.323186001873775e-06, "loss": 0.45829639, "memory(GiB)": 34.88, "step": 55975, "train_speed(iter/s)": 0.412602 }, { "acc": 0.9251358, "epoch": 1.5157176508813257, "grad_norm": 6.083691120147705, "learning_rate": 7.322690477344442e-06, "loss": 0.39809535, "memory(GiB)": 34.88, "step": 55980, "train_speed(iter/s)": 0.412603 }, { "acc": 0.90169277, "epoch": 1.515853031164541, "grad_norm": 10.075581550598145, "learning_rate": 7.32219492372511e-06, "loss": 0.56485462, "memory(GiB)": 34.88, "step": 55985, "train_speed(iter/s)": 0.412605 }, { "acc": 0.9073123, "epoch": 1.5159884114477569, "grad_norm": 9.256893157958984, "learning_rate": 7.321699341021986e-06, "loss": 0.56874981, "memory(GiB)": 34.88, "step": 55990, "train_speed(iter/s)": 0.412607 }, { "acc": 0.88878155, "epoch": 1.5161237917309722, "grad_norm": 8.17471981048584, "learning_rate": 7.3212037292412805e-06, "loss": 0.59324546, "memory(GiB)": 34.88, "step": 55995, "train_speed(iter/s)": 0.412609 }, { "acc": 0.9022831, "epoch": 1.5162591720141878, "grad_norm": 9.917804718017578, "learning_rate": 7.320708088389199e-06, "loss": 0.46066561, "memory(GiB)": 34.88, "step": 56000, "train_speed(iter/s)": 0.412611 }, { "acc": 0.87160254, "epoch": 1.5163945522974034, "grad_norm": 8.65061092376709, "learning_rate": 7.320212418471954e-06, "loss": 0.58873315, "memory(GiB)": 34.88, "step": 56005, "train_speed(iter/s)": 0.412613 }, { "acc": 0.89810896, "epoch": 1.516529932580619, "grad_norm": 8.228384971618652, "learning_rate": 7.3197167194957495e-06, "loss": 0.55713778, "memory(GiB)": 34.88, "step": 56010, "train_speed(iter/s)": 0.412614 }, { "acc": 0.90778008, "epoch": 1.5166653128638345, "grad_norm": 16.088430404663086, "learning_rate": 7.319220991466801e-06, "loss": 0.41663561, "memory(GiB)": 34.88, "step": 56015, "train_speed(iter/s)": 0.412616 }, { "acc": 0.89060221, "epoch": 1.51680069314705, "grad_norm": 8.82996940612793, "learning_rate": 7.318725234391316e-06, "loss": 0.58577242, "memory(GiB)": 34.88, "step": 56020, "train_speed(iter/s)": 0.412618 }, { "acc": 0.91699219, "epoch": 1.5169360734302657, "grad_norm": 7.39370059967041, "learning_rate": 7.318229448275503e-06, "loss": 0.52581949, "memory(GiB)": 34.88, "step": 56025, "train_speed(iter/s)": 0.41262 }, { "acc": 0.89162216, "epoch": 1.517071453713481, "grad_norm": 46.161376953125, "learning_rate": 7.317733633125577e-06, "loss": 0.64393511, "memory(GiB)": 34.88, "step": 56030, "train_speed(iter/s)": 0.412622 }, { "acc": 0.9017745, "epoch": 1.5172068339966969, "grad_norm": 18.629053115844727, "learning_rate": 7.317237788947743e-06, "loss": 0.4944787, "memory(GiB)": 34.88, "step": 56035, "train_speed(iter/s)": 0.412624 }, { "acc": 0.89842196, "epoch": 1.5173422142799122, "grad_norm": 7.942348003387451, "learning_rate": 7.316741915748219e-06, "loss": 0.5917923, "memory(GiB)": 34.88, "step": 56040, "train_speed(iter/s)": 0.412625 }, { "acc": 0.89655571, "epoch": 1.5174775945631278, "grad_norm": 9.281494140625, "learning_rate": 7.316246013533212e-06, "loss": 0.6269012, "memory(GiB)": 34.88, "step": 56045, "train_speed(iter/s)": 0.412627 }, { "acc": 0.90316782, "epoch": 1.5176129748463434, "grad_norm": 4.088438510894775, "learning_rate": 7.315750082308934e-06, "loss": 0.52641087, "memory(GiB)": 34.88, "step": 56050, "train_speed(iter/s)": 0.412629 }, { "acc": 0.89440432, "epoch": 1.517748355129559, "grad_norm": 10.595498085021973, "learning_rate": 7.315254122081602e-06, "loss": 0.58692312, "memory(GiB)": 34.88, "step": 56055, "train_speed(iter/s)": 0.412631 }, { "acc": 0.89983578, "epoch": 1.5178837354127745, "grad_norm": 7.54937744140625, "learning_rate": 7.314758132857426e-06, "loss": 0.53764815, "memory(GiB)": 34.88, "step": 56060, "train_speed(iter/s)": 0.412633 }, { "acc": 0.92158575, "epoch": 1.51801911569599, "grad_norm": 6.846201419830322, "learning_rate": 7.314262114642618e-06, "loss": 0.41853771, "memory(GiB)": 34.88, "step": 56065, "train_speed(iter/s)": 0.412634 }, { "acc": 0.89566927, "epoch": 1.5181544959792057, "grad_norm": 11.168771743774414, "learning_rate": 7.313766067443393e-06, "loss": 0.62459669, "memory(GiB)": 34.88, "step": 56070, "train_speed(iter/s)": 0.412636 }, { "acc": 0.90584087, "epoch": 1.518289876262421, "grad_norm": 8.33616828918457, "learning_rate": 7.313269991265966e-06, "loss": 0.54691334, "memory(GiB)": 34.88, "step": 56075, "train_speed(iter/s)": 0.412638 }, { "acc": 0.92760592, "epoch": 1.5184252565456366, "grad_norm": 9.254510879516602, "learning_rate": 7.31277388611655e-06, "loss": 0.36355634, "memory(GiB)": 34.88, "step": 56080, "train_speed(iter/s)": 0.41264 }, { "acc": 0.88035011, "epoch": 1.5185606368288522, "grad_norm": 11.590023040771484, "learning_rate": 7.312277752001359e-06, "loss": 0.73293271, "memory(GiB)": 34.88, "step": 56085, "train_speed(iter/s)": 0.412642 }, { "acc": 0.86524334, "epoch": 1.5186960171120678, "grad_norm": 18.576419830322266, "learning_rate": 7.311781588926609e-06, "loss": 0.80336266, "memory(GiB)": 34.88, "step": 56090, "train_speed(iter/s)": 0.412644 }, { "acc": 0.90699024, "epoch": 1.5188313973952834, "grad_norm": 18.119447708129883, "learning_rate": 7.311285396898518e-06, "loss": 0.4984941, "memory(GiB)": 34.88, "step": 56095, "train_speed(iter/s)": 0.412646 }, { "acc": 0.90804157, "epoch": 1.5189667776784987, "grad_norm": 9.127445220947266, "learning_rate": 7.310789175923296e-06, "loss": 0.53135004, "memory(GiB)": 34.88, "step": 56100, "train_speed(iter/s)": 0.412648 }, { "acc": 0.88854427, "epoch": 1.5191021579617145, "grad_norm": 8.466902732849121, "learning_rate": 7.3102929260071634e-06, "loss": 0.5903573, "memory(GiB)": 34.88, "step": 56105, "train_speed(iter/s)": 0.412649 }, { "acc": 0.9055212, "epoch": 1.51923753824493, "grad_norm": 5.899655342102051, "learning_rate": 7.309796647156336e-06, "loss": 0.51566925, "memory(GiB)": 34.88, "step": 56110, "train_speed(iter/s)": 0.412651 }, { "acc": 0.90424185, "epoch": 1.5193729185281457, "grad_norm": 13.568778991699219, "learning_rate": 7.309300339377031e-06, "loss": 0.53364272, "memory(GiB)": 34.88, "step": 56115, "train_speed(iter/s)": 0.412653 }, { "acc": 0.9134696, "epoch": 1.519508298811361, "grad_norm": 5.97783899307251, "learning_rate": 7.308804002675464e-06, "loss": 0.44839602, "memory(GiB)": 34.88, "step": 56120, "train_speed(iter/s)": 0.412655 }, { "acc": 0.89799023, "epoch": 1.5196436790945766, "grad_norm": 15.627833366394043, "learning_rate": 7.3083076370578545e-06, "loss": 0.55167551, "memory(GiB)": 34.88, "step": 56125, "train_speed(iter/s)": 0.412657 }, { "acc": 0.90223026, "epoch": 1.5197790593777922, "grad_norm": 6.567981719970703, "learning_rate": 7.307811242530419e-06, "loss": 0.45458012, "memory(GiB)": 34.88, "step": 56130, "train_speed(iter/s)": 0.412659 }, { "acc": 0.90982342, "epoch": 1.5199144396610078, "grad_norm": 8.841201782226562, "learning_rate": 7.3073148190993756e-06, "loss": 0.5382637, "memory(GiB)": 34.88, "step": 56135, "train_speed(iter/s)": 0.412661 }, { "acc": 0.90675125, "epoch": 1.5200498199442234, "grad_norm": 7.400546550750732, "learning_rate": 7.306818366770944e-06, "loss": 0.4592988, "memory(GiB)": 34.88, "step": 56140, "train_speed(iter/s)": 0.412662 }, { "acc": 0.88372288, "epoch": 1.5201852002274387, "grad_norm": 9.922282218933105, "learning_rate": 7.306321885551344e-06, "loss": 0.63996353, "memory(GiB)": 34.88, "step": 56145, "train_speed(iter/s)": 0.412664 }, { "acc": 0.898034, "epoch": 1.5203205805106546, "grad_norm": 4.009702205657959, "learning_rate": 7.305825375446794e-06, "loss": 0.56467056, "memory(GiB)": 34.88, "step": 56150, "train_speed(iter/s)": 0.412666 }, { "acc": 0.90256367, "epoch": 1.52045596079387, "grad_norm": 22.256103515625, "learning_rate": 7.305328836463512e-06, "loss": 0.46749878, "memory(GiB)": 34.88, "step": 56155, "train_speed(iter/s)": 0.412668 }, { "acc": 0.90301208, "epoch": 1.5205913410770855, "grad_norm": 7.108104705810547, "learning_rate": 7.304832268607721e-06, "loss": 0.57364807, "memory(GiB)": 34.88, "step": 56160, "train_speed(iter/s)": 0.41267 }, { "acc": 0.89251118, "epoch": 1.520726721360301, "grad_norm": 10.963027954101562, "learning_rate": 7.304335671885641e-06, "loss": 0.60565948, "memory(GiB)": 34.88, "step": 56165, "train_speed(iter/s)": 0.412672 }, { "acc": 0.89612999, "epoch": 1.5208621016435167, "grad_norm": 20.583162307739258, "learning_rate": 7.30383904630349e-06, "loss": 0.64151611, "memory(GiB)": 34.88, "step": 56170, "train_speed(iter/s)": 0.412674 }, { "acc": 0.88460827, "epoch": 1.5209974819267322, "grad_norm": 9.53537368774414, "learning_rate": 7.303342391867493e-06, "loss": 0.62321463, "memory(GiB)": 34.88, "step": 56175, "train_speed(iter/s)": 0.412675 }, { "acc": 0.91412315, "epoch": 1.5211328622099476, "grad_norm": 3.1789567470550537, "learning_rate": 7.302845708583869e-06, "loss": 0.43664141, "memory(GiB)": 34.88, "step": 56180, "train_speed(iter/s)": 0.412678 }, { "acc": 0.89147148, "epoch": 1.5212682424931634, "grad_norm": 7.434838771820068, "learning_rate": 7.302348996458841e-06, "loss": 0.53843503, "memory(GiB)": 34.88, "step": 56185, "train_speed(iter/s)": 0.412679 }, { "acc": 0.91228905, "epoch": 1.5214036227763788, "grad_norm": 5.243173122406006, "learning_rate": 7.301852255498632e-06, "loss": 0.45249219, "memory(GiB)": 34.88, "step": 56190, "train_speed(iter/s)": 0.412681 }, { "acc": 0.90367479, "epoch": 1.5215390030595946, "grad_norm": 12.809972763061523, "learning_rate": 7.301355485709464e-06, "loss": 0.60941758, "memory(GiB)": 34.88, "step": 56195, "train_speed(iter/s)": 0.412683 }, { "acc": 0.8952877, "epoch": 1.52167438334281, "grad_norm": 11.604904174804688, "learning_rate": 7.300858687097559e-06, "loss": 0.57537241, "memory(GiB)": 34.88, "step": 56200, "train_speed(iter/s)": 0.412685 }, { "acc": 0.90503178, "epoch": 1.5218097636260255, "grad_norm": 5.9198198318481445, "learning_rate": 7.300361859669143e-06, "loss": 0.4994657, "memory(GiB)": 34.88, "step": 56205, "train_speed(iter/s)": 0.412687 }, { "acc": 0.91186676, "epoch": 1.521945143909241, "grad_norm": 6.395482063293457, "learning_rate": 7.299865003430436e-06, "loss": 0.47259502, "memory(GiB)": 34.88, "step": 56210, "train_speed(iter/s)": 0.412689 }, { "acc": 0.87208681, "epoch": 1.5220805241924567, "grad_norm": 15.954543113708496, "learning_rate": 7.299368118387664e-06, "loss": 0.67375698, "memory(GiB)": 34.88, "step": 56215, "train_speed(iter/s)": 0.41269 }, { "acc": 0.89838743, "epoch": 1.5222159044756722, "grad_norm": 12.150354385375977, "learning_rate": 7.298871204547052e-06, "loss": 0.51123371, "memory(GiB)": 34.88, "step": 56220, "train_speed(iter/s)": 0.412692 }, { "acc": 0.91457167, "epoch": 1.5223512847588876, "grad_norm": 10.647974014282227, "learning_rate": 7.298374261914827e-06, "loss": 0.49493537, "memory(GiB)": 34.88, "step": 56225, "train_speed(iter/s)": 0.412694 }, { "acc": 0.90642481, "epoch": 1.5224866650421034, "grad_norm": 8.434065818786621, "learning_rate": 7.297877290497207e-06, "loss": 0.49559493, "memory(GiB)": 34.88, "step": 56230, "train_speed(iter/s)": 0.412696 }, { "acc": 0.92137699, "epoch": 1.5226220453253188, "grad_norm": 6.879419326782227, "learning_rate": 7.297380290300426e-06, "loss": 0.4739531, "memory(GiB)": 34.88, "step": 56235, "train_speed(iter/s)": 0.412698 }, { "acc": 0.8920845, "epoch": 1.5227574256085343, "grad_norm": 7.260431289672852, "learning_rate": 7.296883261330704e-06, "loss": 0.5540513, "memory(GiB)": 34.88, "step": 56240, "train_speed(iter/s)": 0.4127 }, { "acc": 0.89766521, "epoch": 1.52289280589175, "grad_norm": 10.03779125213623, "learning_rate": 7.296386203594271e-06, "loss": 0.56931458, "memory(GiB)": 34.88, "step": 56245, "train_speed(iter/s)": 0.412701 }, { "acc": 0.90047951, "epoch": 1.5230281861749655, "grad_norm": 3.4616165161132812, "learning_rate": 7.2958891170973505e-06, "loss": 0.55325356, "memory(GiB)": 34.88, "step": 56250, "train_speed(iter/s)": 0.412703 }, { "acc": 0.89821968, "epoch": 1.523163566458181, "grad_norm": 11.6753511428833, "learning_rate": 7.295392001846169e-06, "loss": 0.53730583, "memory(GiB)": 34.88, "step": 56255, "train_speed(iter/s)": 0.412705 }, { "acc": 0.88238316, "epoch": 1.5232989467413964, "grad_norm": 19.273597717285156, "learning_rate": 7.294894857846958e-06, "loss": 0.59605231, "memory(GiB)": 34.88, "step": 56260, "train_speed(iter/s)": 0.412706 }, { "acc": 0.88500996, "epoch": 1.5234343270246122, "grad_norm": 7.179394721984863, "learning_rate": 7.294397685105943e-06, "loss": 0.56846542, "memory(GiB)": 34.88, "step": 56265, "train_speed(iter/s)": 0.412708 }, { "acc": 0.90439253, "epoch": 1.5235697073078276, "grad_norm": 7.9925761222839355, "learning_rate": 7.2939004836293505e-06, "loss": 0.43005657, "memory(GiB)": 34.88, "step": 56270, "train_speed(iter/s)": 0.41271 }, { "acc": 0.91543312, "epoch": 1.5237050875910434, "grad_norm": 3.598881959915161, "learning_rate": 7.293403253423413e-06, "loss": 0.44481974, "memory(GiB)": 34.88, "step": 56275, "train_speed(iter/s)": 0.412712 }, { "acc": 0.9211504, "epoch": 1.5238404678742588, "grad_norm": 7.46062707901001, "learning_rate": 7.292905994494356e-06, "loss": 0.35415514, "memory(GiB)": 34.88, "step": 56280, "train_speed(iter/s)": 0.412713 }, { "acc": 0.89453049, "epoch": 1.5239758481574743, "grad_norm": 12.539807319641113, "learning_rate": 7.292408706848406e-06, "loss": 0.6125761, "memory(GiB)": 34.88, "step": 56285, "train_speed(iter/s)": 0.412715 }, { "acc": 0.89786606, "epoch": 1.52411122844069, "grad_norm": 5.083742618560791, "learning_rate": 7.2919113904918e-06, "loss": 0.57816262, "memory(GiB)": 34.88, "step": 56290, "train_speed(iter/s)": 0.412717 }, { "acc": 0.90952625, "epoch": 1.5242466087239055, "grad_norm": 15.85202693939209, "learning_rate": 7.291414045430761e-06, "loss": 0.40550923, "memory(GiB)": 34.88, "step": 56295, "train_speed(iter/s)": 0.412719 }, { "acc": 0.88517466, "epoch": 1.524381989007121, "grad_norm": 10.814865112304688, "learning_rate": 7.290916671671523e-06, "loss": 0.59980059, "memory(GiB)": 34.88, "step": 56300, "train_speed(iter/s)": 0.412721 }, { "acc": 0.89078312, "epoch": 1.5245173692903364, "grad_norm": 8.24317455291748, "learning_rate": 7.290419269220317e-06, "loss": 0.60400162, "memory(GiB)": 34.88, "step": 56305, "train_speed(iter/s)": 0.412723 }, { "acc": 0.91650009, "epoch": 1.5246527495735522, "grad_norm": 6.473984718322754, "learning_rate": 7.289921838083369e-06, "loss": 0.40260415, "memory(GiB)": 34.88, "step": 56310, "train_speed(iter/s)": 0.412725 }, { "acc": 0.9251708, "epoch": 1.5247881298567676, "grad_norm": 7.117663383483887, "learning_rate": 7.289424378266916e-06, "loss": 0.38167198, "memory(GiB)": 34.88, "step": 56315, "train_speed(iter/s)": 0.412726 }, { "acc": 0.88787556, "epoch": 1.5249235101399832, "grad_norm": 8.548184394836426, "learning_rate": 7.288926889777188e-06, "loss": 0.53425822, "memory(GiB)": 34.88, "step": 56320, "train_speed(iter/s)": 0.412728 }, { "acc": 0.90065937, "epoch": 1.5250588904231988, "grad_norm": 7.217508316040039, "learning_rate": 7.288429372620416e-06, "loss": 0.52516956, "memory(GiB)": 34.88, "step": 56325, "train_speed(iter/s)": 0.412729 }, { "acc": 0.87200203, "epoch": 1.5251942707064143, "grad_norm": 11.236515998840332, "learning_rate": 7.287931826802832e-06, "loss": 0.6689158, "memory(GiB)": 34.88, "step": 56330, "train_speed(iter/s)": 0.412731 }, { "acc": 0.88419704, "epoch": 1.52532965098963, "grad_norm": 4.2625508308410645, "learning_rate": 7.28743425233067e-06, "loss": 0.56374917, "memory(GiB)": 34.88, "step": 56335, "train_speed(iter/s)": 0.412733 }, { "acc": 0.9133419, "epoch": 1.5254650312728453, "grad_norm": 6.895529270172119, "learning_rate": 7.286936649210163e-06, "loss": 0.48262506, "memory(GiB)": 34.88, "step": 56340, "train_speed(iter/s)": 0.412735 }, { "acc": 0.91641464, "epoch": 1.525600411556061, "grad_norm": 8.238279342651367, "learning_rate": 7.286439017447543e-06, "loss": 0.39920225, "memory(GiB)": 34.88, "step": 56345, "train_speed(iter/s)": 0.412737 }, { "acc": 0.89991703, "epoch": 1.5257357918392764, "grad_norm": 5.730626583099365, "learning_rate": 7.2859413570490466e-06, "loss": 0.47411838, "memory(GiB)": 34.88, "step": 56350, "train_speed(iter/s)": 0.412739 }, { "acc": 0.89549866, "epoch": 1.5258711721224922, "grad_norm": 10.101082801818848, "learning_rate": 7.285443668020905e-06, "loss": 0.6126904, "memory(GiB)": 34.88, "step": 56355, "train_speed(iter/s)": 0.412741 }, { "acc": 0.89940319, "epoch": 1.5260065524057076, "grad_norm": 7.243188381195068, "learning_rate": 7.284945950369357e-06, "loss": 0.54321651, "memory(GiB)": 34.88, "step": 56360, "train_speed(iter/s)": 0.412743 }, { "acc": 0.90306292, "epoch": 1.5261419326889232, "grad_norm": 4.329080104827881, "learning_rate": 7.284448204100632e-06, "loss": 0.50721464, "memory(GiB)": 34.88, "step": 56365, "train_speed(iter/s)": 0.412744 }, { "acc": 0.90236559, "epoch": 1.5262773129721388, "grad_norm": 11.808873176574707, "learning_rate": 7.283950429220966e-06, "loss": 0.46302333, "memory(GiB)": 34.88, "step": 56370, "train_speed(iter/s)": 0.412746 }, { "acc": 0.90816021, "epoch": 1.5264126932553543, "grad_norm": 7.883615016937256, "learning_rate": 7.283452625736599e-06, "loss": 0.5163795, "memory(GiB)": 34.88, "step": 56375, "train_speed(iter/s)": 0.412748 }, { "acc": 0.89892235, "epoch": 1.52654807353857, "grad_norm": 5.756627559661865, "learning_rate": 7.282954793653762e-06, "loss": 0.5049036, "memory(GiB)": 34.88, "step": 56380, "train_speed(iter/s)": 0.41275 }, { "acc": 0.90369139, "epoch": 1.5266834538217853, "grad_norm": 8.305723190307617, "learning_rate": 7.282456932978698e-06, "loss": 0.57919717, "memory(GiB)": 34.88, "step": 56385, "train_speed(iter/s)": 0.412752 }, { "acc": 0.9113596, "epoch": 1.526818834105001, "grad_norm": 6.09092378616333, "learning_rate": 7.281959043717636e-06, "loss": 0.49642906, "memory(GiB)": 34.88, "step": 56390, "train_speed(iter/s)": 0.412753 }, { "acc": 0.90575371, "epoch": 1.5269542143882164, "grad_norm": 10.814096450805664, "learning_rate": 7.281461125876816e-06, "loss": 0.54676948, "memory(GiB)": 34.88, "step": 56395, "train_speed(iter/s)": 0.412755 }, { "acc": 0.90804653, "epoch": 1.527089594671432, "grad_norm": 6.472392559051514, "learning_rate": 7.280963179462477e-06, "loss": 0.45469851, "memory(GiB)": 34.88, "step": 56400, "train_speed(iter/s)": 0.412757 }, { "acc": 0.90242882, "epoch": 1.5272249749546476, "grad_norm": 7.975703239440918, "learning_rate": 7.280465204480857e-06, "loss": 0.53415885, "memory(GiB)": 34.88, "step": 56405, "train_speed(iter/s)": 0.412759 }, { "acc": 0.89269447, "epoch": 1.5273603552378632, "grad_norm": 15.702720642089844, "learning_rate": 7.279967200938191e-06, "loss": 0.60164394, "memory(GiB)": 34.88, "step": 56410, "train_speed(iter/s)": 0.412761 }, { "acc": 0.8996273, "epoch": 1.5274957355210788, "grad_norm": 5.335821628570557, "learning_rate": 7.2794691688407195e-06, "loss": 0.54926353, "memory(GiB)": 34.88, "step": 56415, "train_speed(iter/s)": 0.412763 }, { "acc": 0.89872465, "epoch": 1.5276311158042941, "grad_norm": 13.6969575881958, "learning_rate": 7.278971108194681e-06, "loss": 0.55882893, "memory(GiB)": 34.88, "step": 56420, "train_speed(iter/s)": 0.412765 }, { "acc": 0.8896698, "epoch": 1.52776649608751, "grad_norm": 21.302934646606445, "learning_rate": 7.278473019006315e-06, "loss": 0.5969461, "memory(GiB)": 34.88, "step": 56425, "train_speed(iter/s)": 0.412767 }, { "acc": 0.91186571, "epoch": 1.5279018763707253, "grad_norm": 4.318882942199707, "learning_rate": 7.27797490128186e-06, "loss": 0.41316128, "memory(GiB)": 34.88, "step": 56430, "train_speed(iter/s)": 0.412768 }, { "acc": 0.9008564, "epoch": 1.528037256653941, "grad_norm": 6.243368625640869, "learning_rate": 7.277476755027556e-06, "loss": 0.48389988, "memory(GiB)": 34.88, "step": 56435, "train_speed(iter/s)": 0.41277 }, { "acc": 0.89016094, "epoch": 1.5281726369371564, "grad_norm": 5.432116508483887, "learning_rate": 7.276978580249647e-06, "loss": 0.57352457, "memory(GiB)": 34.88, "step": 56440, "train_speed(iter/s)": 0.412772 }, { "acc": 0.9093358, "epoch": 1.528308017220372, "grad_norm": 6.243907928466797, "learning_rate": 7.276480376954368e-06, "loss": 0.45445113, "memory(GiB)": 34.88, "step": 56445, "train_speed(iter/s)": 0.412774 }, { "acc": 0.86606827, "epoch": 1.5284433975035876, "grad_norm": 28.499834060668945, "learning_rate": 7.2759821451479625e-06, "loss": 0.65978703, "memory(GiB)": 34.88, "step": 56450, "train_speed(iter/s)": 0.412776 }, { "acc": 0.89225426, "epoch": 1.5285787777868032, "grad_norm": 7.09921407699585, "learning_rate": 7.275483884836672e-06, "loss": 0.63537846, "memory(GiB)": 34.88, "step": 56455, "train_speed(iter/s)": 0.412778 }, { "acc": 0.90609121, "epoch": 1.5287141580700188, "grad_norm": 4.332963466644287, "learning_rate": 7.274985596026738e-06, "loss": 0.5694521, "memory(GiB)": 34.88, "step": 56460, "train_speed(iter/s)": 0.41278 }, { "acc": 0.92370529, "epoch": 1.5288495383532341, "grad_norm": 10.928812026977539, "learning_rate": 7.274487278724402e-06, "loss": 0.43162293, "memory(GiB)": 34.88, "step": 56465, "train_speed(iter/s)": 0.412781 }, { "acc": 0.8980505, "epoch": 1.52898491863645, "grad_norm": 9.554228782653809, "learning_rate": 7.273988932935908e-06, "loss": 0.54388509, "memory(GiB)": 34.88, "step": 56470, "train_speed(iter/s)": 0.412783 }, { "acc": 0.91833849, "epoch": 1.5291202989196653, "grad_norm": 5.946626663208008, "learning_rate": 7.273490558667497e-06, "loss": 0.50233068, "memory(GiB)": 34.88, "step": 56475, "train_speed(iter/s)": 0.412785 }, { "acc": 0.90914698, "epoch": 1.5292556792028809, "grad_norm": 8.788681030273438, "learning_rate": 7.272992155925413e-06, "loss": 0.46817303, "memory(GiB)": 34.88, "step": 56480, "train_speed(iter/s)": 0.412787 }, { "acc": 0.92338591, "epoch": 1.5293910594860964, "grad_norm": 5.6968817710876465, "learning_rate": 7.272493724715901e-06, "loss": 0.38540692, "memory(GiB)": 34.88, "step": 56485, "train_speed(iter/s)": 0.412789 }, { "acc": 0.90137911, "epoch": 1.529526439769312, "grad_norm": 12.962723731994629, "learning_rate": 7.271995265045202e-06, "loss": 0.59270492, "memory(GiB)": 34.88, "step": 56490, "train_speed(iter/s)": 0.412791 }, { "acc": 0.88659821, "epoch": 1.5296618200525276, "grad_norm": 4.760529518127441, "learning_rate": 7.271496776919561e-06, "loss": 0.58151979, "memory(GiB)": 34.88, "step": 56495, "train_speed(iter/s)": 0.412792 }, { "acc": 0.88682423, "epoch": 1.529797200335743, "grad_norm": 6.793002605438232, "learning_rate": 7.270998260345223e-06, "loss": 0.60635233, "memory(GiB)": 34.88, "step": 56500, "train_speed(iter/s)": 0.412794 }, { "acc": 0.92300758, "epoch": 1.5299325806189588, "grad_norm": 6.695400714874268, "learning_rate": 7.270499715328433e-06, "loss": 0.46278014, "memory(GiB)": 34.88, "step": 56505, "train_speed(iter/s)": 0.412796 }, { "acc": 0.90954208, "epoch": 1.5300679609021741, "grad_norm": 6.686457633972168, "learning_rate": 7.2700011418754345e-06, "loss": 0.50314641, "memory(GiB)": 34.88, "step": 56510, "train_speed(iter/s)": 0.412798 }, { "acc": 0.92168674, "epoch": 1.53020334118539, "grad_norm": 10.992501258850098, "learning_rate": 7.269502539992476e-06, "loss": 0.38494077, "memory(GiB)": 34.88, "step": 56515, "train_speed(iter/s)": 0.412799 }, { "acc": 0.89185925, "epoch": 1.5303387214686053, "grad_norm": 7.9481120109558105, "learning_rate": 7.269003909685801e-06, "loss": 0.55051622, "memory(GiB)": 34.88, "step": 56520, "train_speed(iter/s)": 0.412801 }, { "acc": 0.90892334, "epoch": 1.5304741017518209, "grad_norm": 14.89919376373291, "learning_rate": 7.2685052509616595e-06, "loss": 0.49591064, "memory(GiB)": 34.88, "step": 56525, "train_speed(iter/s)": 0.412802 }, { "acc": 0.91098843, "epoch": 1.5306094820350364, "grad_norm": 11.189504623413086, "learning_rate": 7.268006563826294e-06, "loss": 0.45163221, "memory(GiB)": 34.88, "step": 56530, "train_speed(iter/s)": 0.412804 }, { "acc": 0.89101219, "epoch": 1.530744862318252, "grad_norm": 10.282448768615723, "learning_rate": 7.267507848285953e-06, "loss": 0.67376938, "memory(GiB)": 34.88, "step": 56535, "train_speed(iter/s)": 0.412806 }, { "acc": 0.88884335, "epoch": 1.5308802426014676, "grad_norm": 5.190971374511719, "learning_rate": 7.267009104346884e-06, "loss": 0.64191933, "memory(GiB)": 34.88, "step": 56540, "train_speed(iter/s)": 0.412808 }, { "acc": 0.8900341, "epoch": 1.531015622884683, "grad_norm": 8.510412216186523, "learning_rate": 7.266510332015335e-06, "loss": 0.57824936, "memory(GiB)": 34.88, "step": 56545, "train_speed(iter/s)": 0.41281 }, { "acc": 0.88008728, "epoch": 1.5311510031678988, "grad_norm": 6.15342378616333, "learning_rate": 7.266011531297553e-06, "loss": 0.6154952, "memory(GiB)": 34.88, "step": 56550, "train_speed(iter/s)": 0.412812 }, { "acc": 0.8968235, "epoch": 1.5312863834511141, "grad_norm": 8.40419864654541, "learning_rate": 7.265512702199789e-06, "loss": 0.55288157, "memory(GiB)": 34.88, "step": 56555, "train_speed(iter/s)": 0.412814 }, { "acc": 0.90739326, "epoch": 1.5314217637343297, "grad_norm": 5.4901556968688965, "learning_rate": 7.265013844728289e-06, "loss": 0.44655318, "memory(GiB)": 34.88, "step": 56560, "train_speed(iter/s)": 0.412815 }, { "acc": 0.89851952, "epoch": 1.5315571440175453, "grad_norm": 9.178352355957031, "learning_rate": 7.264514958889302e-06, "loss": 0.5354445, "memory(GiB)": 34.88, "step": 56565, "train_speed(iter/s)": 0.412817 }, { "acc": 0.88922176, "epoch": 1.5316925243007609, "grad_norm": 7.299638748168945, "learning_rate": 7.264016044689084e-06, "loss": 0.5285964, "memory(GiB)": 34.88, "step": 56570, "train_speed(iter/s)": 0.412819 }, { "acc": 0.89655447, "epoch": 1.5318279045839764, "grad_norm": 7.482475757598877, "learning_rate": 7.263517102133875e-06, "loss": 0.57259908, "memory(GiB)": 34.88, "step": 56575, "train_speed(iter/s)": 0.412821 }, { "acc": 0.90354176, "epoch": 1.5319632848671918, "grad_norm": 12.232321739196777, "learning_rate": 7.263018131229932e-06, "loss": 0.47573338, "memory(GiB)": 34.88, "step": 56580, "train_speed(iter/s)": 0.412822 }, { "acc": 0.88142624, "epoch": 1.5320986651504076, "grad_norm": 7.138794422149658, "learning_rate": 7.262519131983502e-06, "loss": 0.65973358, "memory(GiB)": 34.88, "step": 56585, "train_speed(iter/s)": 0.412824 }, { "acc": 0.88015137, "epoch": 1.532234045433623, "grad_norm": 9.543557167053223, "learning_rate": 7.262020104400837e-06, "loss": 0.68875313, "memory(GiB)": 34.88, "step": 56590, "train_speed(iter/s)": 0.412826 }, { "acc": 0.92422962, "epoch": 1.5323694257168388, "grad_norm": 8.600159645080566, "learning_rate": 7.261521048488192e-06, "loss": 0.36860638, "memory(GiB)": 34.88, "step": 56595, "train_speed(iter/s)": 0.412827 }, { "acc": 0.892169, "epoch": 1.5325048060000541, "grad_norm": 53.434146881103516, "learning_rate": 7.261021964251813e-06, "loss": 0.6264493, "memory(GiB)": 34.88, "step": 56600, "train_speed(iter/s)": 0.412829 }, { "acc": 0.92069273, "epoch": 1.5326401862832697, "grad_norm": 8.284351348876953, "learning_rate": 7.260522851697955e-06, "loss": 0.45688839, "memory(GiB)": 34.88, "step": 56605, "train_speed(iter/s)": 0.412831 }, { "acc": 0.87158899, "epoch": 1.5327755665664853, "grad_norm": 14.187630653381348, "learning_rate": 7.2600237108328695e-06, "loss": 0.79399223, "memory(GiB)": 34.88, "step": 56610, "train_speed(iter/s)": 0.412833 }, { "acc": 0.90320721, "epoch": 1.5329109468497009, "grad_norm": 9.398885726928711, "learning_rate": 7.2595245416628114e-06, "loss": 0.49650784, "memory(GiB)": 34.88, "step": 56615, "train_speed(iter/s)": 0.412835 }, { "acc": 0.89460325, "epoch": 1.5330463271329164, "grad_norm": 27.13160514831543, "learning_rate": 7.25902534419403e-06, "loss": 0.63527355, "memory(GiB)": 34.88, "step": 56620, "train_speed(iter/s)": 0.412837 }, { "acc": 0.89982395, "epoch": 1.5331817074161318, "grad_norm": 10.531678199768066, "learning_rate": 7.258526118432781e-06, "loss": 0.5449338, "memory(GiB)": 34.88, "step": 56625, "train_speed(iter/s)": 0.412839 }, { "acc": 0.91912756, "epoch": 1.5333170876993476, "grad_norm": 4.996700286865234, "learning_rate": 7.258026864385318e-06, "loss": 0.4831924, "memory(GiB)": 34.88, "step": 56630, "train_speed(iter/s)": 0.412841 }, { "acc": 0.91031628, "epoch": 1.533452467982563, "grad_norm": 8.284915924072266, "learning_rate": 7.2575275820578946e-06, "loss": 0.45523372, "memory(GiB)": 34.88, "step": 56635, "train_speed(iter/s)": 0.412843 }, { "acc": 0.88764172, "epoch": 1.5335878482657785, "grad_norm": 5.400578498840332, "learning_rate": 7.257028271456767e-06, "loss": 0.56224608, "memory(GiB)": 34.88, "step": 56640, "train_speed(iter/s)": 0.412845 }, { "acc": 0.89755564, "epoch": 1.5337232285489941, "grad_norm": 10.169197082519531, "learning_rate": 7.2565289325881875e-06, "loss": 0.57128987, "memory(GiB)": 34.88, "step": 56645, "train_speed(iter/s)": 0.412846 }, { "acc": 0.88723507, "epoch": 1.5338586088322097, "grad_norm": 21.07572364807129, "learning_rate": 7.2560295654584134e-06, "loss": 0.6264801, "memory(GiB)": 34.88, "step": 56650, "train_speed(iter/s)": 0.412848 }, { "acc": 0.91940813, "epoch": 1.5339939891154253, "grad_norm": 6.770440578460693, "learning_rate": 7.255530170073699e-06, "loss": 0.41684513, "memory(GiB)": 34.88, "step": 56655, "train_speed(iter/s)": 0.41285 }, { "acc": 0.90210571, "epoch": 1.5341293693986406, "grad_norm": 13.307828903198242, "learning_rate": 7.2550307464403e-06, "loss": 0.54733782, "memory(GiB)": 34.88, "step": 56660, "train_speed(iter/s)": 0.412852 }, { "acc": 0.89988613, "epoch": 1.5342647496818564, "grad_norm": 3.892113208770752, "learning_rate": 7.254531294564474e-06, "loss": 0.5346211, "memory(GiB)": 34.88, "step": 56665, "train_speed(iter/s)": 0.412853 }, { "acc": 0.91588097, "epoch": 1.5344001299650718, "grad_norm": 8.176233291625977, "learning_rate": 7.254031814452476e-06, "loss": 0.41380281, "memory(GiB)": 34.88, "step": 56670, "train_speed(iter/s)": 0.412855 }, { "acc": 0.90244522, "epoch": 1.5345355102482876, "grad_norm": 12.084936141967773, "learning_rate": 7.253532306110564e-06, "loss": 0.48775568, "memory(GiB)": 34.88, "step": 56675, "train_speed(iter/s)": 0.412857 }, { "acc": 0.8755724, "epoch": 1.534670890531503, "grad_norm": 14.23246955871582, "learning_rate": 7.253032769544995e-06, "loss": 0.73053808, "memory(GiB)": 34.88, "step": 56680, "train_speed(iter/s)": 0.412859 }, { "acc": 0.88526783, "epoch": 1.5348062708147185, "grad_norm": 10.47166633605957, "learning_rate": 7.252533204762029e-06, "loss": 0.62802992, "memory(GiB)": 34.88, "step": 56685, "train_speed(iter/s)": 0.412861 }, { "acc": 0.89701099, "epoch": 1.5349416510979341, "grad_norm": 11.959125518798828, "learning_rate": 7.25203361176792e-06, "loss": 0.5410923, "memory(GiB)": 34.88, "step": 56690, "train_speed(iter/s)": 0.412862 }, { "acc": 0.90588799, "epoch": 1.5350770313811497, "grad_norm": 6.524888038635254, "learning_rate": 7.25153399056893e-06, "loss": 0.43780794, "memory(GiB)": 34.88, "step": 56695, "train_speed(iter/s)": 0.412864 }, { "acc": 0.92503757, "epoch": 1.5352124116643653, "grad_norm": 8.495766639709473, "learning_rate": 7.251034341171314e-06, "loss": 0.35372868, "memory(GiB)": 34.88, "step": 56700, "train_speed(iter/s)": 0.412866 }, { "acc": 0.90184307, "epoch": 1.5353477919475806, "grad_norm": 12.97059154510498, "learning_rate": 7.250534663581333e-06, "loss": 0.54183016, "memory(GiB)": 34.88, "step": 56705, "train_speed(iter/s)": 0.412868 }, { "acc": 0.90499859, "epoch": 1.5354831722307964, "grad_norm": 5.140671730041504, "learning_rate": 7.250034957805248e-06, "loss": 0.5046979, "memory(GiB)": 34.88, "step": 56710, "train_speed(iter/s)": 0.412869 }, { "acc": 0.90268307, "epoch": 1.5356185525140118, "grad_norm": 8.828278541564941, "learning_rate": 7.249535223849316e-06, "loss": 0.50295038, "memory(GiB)": 34.88, "step": 56715, "train_speed(iter/s)": 0.412871 }, { "acc": 0.88956347, "epoch": 1.5357539327972274, "grad_norm": 6.638428688049316, "learning_rate": 7.249035461719799e-06, "loss": 0.61117487, "memory(GiB)": 34.88, "step": 56720, "train_speed(iter/s)": 0.412873 }, { "acc": 0.90619888, "epoch": 1.535889313080443, "grad_norm": 4.139581680297852, "learning_rate": 7.248535671422956e-06, "loss": 0.49371967, "memory(GiB)": 34.88, "step": 56725, "train_speed(iter/s)": 0.412875 }, { "acc": 0.90138941, "epoch": 1.5360246933636585, "grad_norm": 5.426465034484863, "learning_rate": 7.248035852965051e-06, "loss": 0.51338725, "memory(GiB)": 34.88, "step": 56730, "train_speed(iter/s)": 0.412876 }, { "acc": 0.90763044, "epoch": 1.5361600736468741, "grad_norm": 7.088599681854248, "learning_rate": 7.247536006352341e-06, "loss": 0.47371135, "memory(GiB)": 34.88, "step": 56735, "train_speed(iter/s)": 0.412878 }, { "acc": 0.88925629, "epoch": 1.5362954539300895, "grad_norm": 8.763079643249512, "learning_rate": 7.247036131591091e-06, "loss": 0.62062044, "memory(GiB)": 34.88, "step": 56740, "train_speed(iter/s)": 0.41288 }, { "acc": 0.90489006, "epoch": 1.5364308342133053, "grad_norm": 8.327946662902832, "learning_rate": 7.246536228687562e-06, "loss": 0.52634273, "memory(GiB)": 34.88, "step": 56745, "train_speed(iter/s)": 0.412881 }, { "acc": 0.89614849, "epoch": 1.5365662144965206, "grad_norm": 11.3707275390625, "learning_rate": 7.2460362976480145e-06, "loss": 0.5888607, "memory(GiB)": 34.88, "step": 56750, "train_speed(iter/s)": 0.412883 }, { "acc": 0.91103859, "epoch": 1.5367015947797362, "grad_norm": 21.562313079833984, "learning_rate": 7.245536338478712e-06, "loss": 0.47290931, "memory(GiB)": 34.88, "step": 56755, "train_speed(iter/s)": 0.412885 }, { "acc": 0.90089178, "epoch": 1.5368369750629518, "grad_norm": 5.531270980834961, "learning_rate": 7.245036351185919e-06, "loss": 0.52297888, "memory(GiB)": 34.88, "step": 56760, "train_speed(iter/s)": 0.412887 }, { "acc": 0.91151772, "epoch": 1.5369723553461674, "grad_norm": 10.249833106994629, "learning_rate": 7.244536335775897e-06, "loss": 0.43019753, "memory(GiB)": 34.88, "step": 56765, "train_speed(iter/s)": 0.412888 }, { "acc": 0.91177292, "epoch": 1.537107735629383, "grad_norm": 9.685525894165039, "learning_rate": 7.24403629225491e-06, "loss": 0.53900766, "memory(GiB)": 34.88, "step": 56770, "train_speed(iter/s)": 0.41289 }, { "acc": 0.90488529, "epoch": 1.5372431159125983, "grad_norm": 8.879405975341797, "learning_rate": 7.243536220629223e-06, "loss": 0.46985788, "memory(GiB)": 34.88, "step": 56775, "train_speed(iter/s)": 0.412892 }, { "acc": 0.90326366, "epoch": 1.5373784961958141, "grad_norm": 6.721384048461914, "learning_rate": 7.243036120905102e-06, "loss": 0.51047087, "memory(GiB)": 34.88, "step": 56780, "train_speed(iter/s)": 0.412894 }, { "acc": 0.90486736, "epoch": 1.5375138764790295, "grad_norm": 9.145027160644531, "learning_rate": 7.2425359930888065e-06, "loss": 0.50674152, "memory(GiB)": 34.88, "step": 56785, "train_speed(iter/s)": 0.412896 }, { "acc": 0.89833994, "epoch": 1.5376492567622453, "grad_norm": 7.566407203674316, "learning_rate": 7.242035837186605e-06, "loss": 0.56179647, "memory(GiB)": 34.88, "step": 56790, "train_speed(iter/s)": 0.412898 }, { "acc": 0.89797659, "epoch": 1.5377846370454606, "grad_norm": 13.415283203125, "learning_rate": 7.241535653204763e-06, "loss": 0.46500216, "memory(GiB)": 34.88, "step": 56795, "train_speed(iter/s)": 0.412899 }, { "acc": 0.91858778, "epoch": 1.5379200173286762, "grad_norm": 4.7788166999816895, "learning_rate": 7.241035441149545e-06, "loss": 0.39211817, "memory(GiB)": 34.88, "step": 56800, "train_speed(iter/s)": 0.412901 }, { "acc": 0.90191612, "epoch": 1.5380553976118918, "grad_norm": 9.318192481994629, "learning_rate": 7.240535201027219e-06, "loss": 0.50702038, "memory(GiB)": 34.88, "step": 56805, "train_speed(iter/s)": 0.412903 }, { "acc": 0.91119995, "epoch": 1.5381907778951074, "grad_norm": 6.004639625549316, "learning_rate": 7.24003493284405e-06, "loss": 0.49148583, "memory(GiB)": 34.88, "step": 56810, "train_speed(iter/s)": 0.412905 }, { "acc": 0.92622318, "epoch": 1.538326158178323, "grad_norm": 11.233186721801758, "learning_rate": 7.239534636606304e-06, "loss": 0.35595989, "memory(GiB)": 34.88, "step": 56815, "train_speed(iter/s)": 0.412907 }, { "acc": 0.90300741, "epoch": 1.5384615384615383, "grad_norm": 5.876098155975342, "learning_rate": 7.2390343123202534e-06, "loss": 0.46049185, "memory(GiB)": 34.88, "step": 56820, "train_speed(iter/s)": 0.412909 }, { "acc": 0.8853653, "epoch": 1.5385969187447541, "grad_norm": 9.217636108398438, "learning_rate": 7.238533959992159e-06, "loss": 0.55535488, "memory(GiB)": 34.88, "step": 56825, "train_speed(iter/s)": 0.412911 }, { "acc": 0.88770571, "epoch": 1.5387322990279695, "grad_norm": 11.66314697265625, "learning_rate": 7.238033579628292e-06, "loss": 0.64603968, "memory(GiB)": 34.88, "step": 56830, "train_speed(iter/s)": 0.412912 }, { "acc": 0.91584206, "epoch": 1.538867679311185, "grad_norm": 5.555936813354492, "learning_rate": 7.237533171234919e-06, "loss": 0.43988037, "memory(GiB)": 34.88, "step": 56835, "train_speed(iter/s)": 0.412914 }, { "acc": 0.90093069, "epoch": 1.5390030595944006, "grad_norm": 8.653082847595215, "learning_rate": 7.237032734818311e-06, "loss": 0.52173824, "memory(GiB)": 34.88, "step": 56840, "train_speed(iter/s)": 0.412916 }, { "acc": 0.88445435, "epoch": 1.5391384398776162, "grad_norm": 7.821249485015869, "learning_rate": 7.236532270384736e-06, "loss": 0.63369675, "memory(GiB)": 34.88, "step": 56845, "train_speed(iter/s)": 0.412918 }, { "acc": 0.89645309, "epoch": 1.5392738201608318, "grad_norm": 5.743566513061523, "learning_rate": 7.236031777940462e-06, "loss": 0.57673178, "memory(GiB)": 34.88, "step": 56850, "train_speed(iter/s)": 0.412919 }, { "acc": 0.9110096, "epoch": 1.5394092004440472, "grad_norm": 7.234043121337891, "learning_rate": 7.235531257491759e-06, "loss": 0.50658607, "memory(GiB)": 34.88, "step": 56855, "train_speed(iter/s)": 0.412921 }, { "acc": 0.89388962, "epoch": 1.539544580727263, "grad_norm": 7.6040120124816895, "learning_rate": 7.2350307090449e-06, "loss": 0.54825211, "memory(GiB)": 34.88, "step": 56860, "train_speed(iter/s)": 0.412923 }, { "acc": 0.90790548, "epoch": 1.5396799610104783, "grad_norm": 5.739504337310791, "learning_rate": 7.2345301326061516e-06, "loss": 0.40194378, "memory(GiB)": 34.88, "step": 56865, "train_speed(iter/s)": 0.412924 }, { "acc": 0.89872866, "epoch": 1.5398153412936941, "grad_norm": 9.990870475769043, "learning_rate": 7.234029528181788e-06, "loss": 0.55002017, "memory(GiB)": 34.88, "step": 56870, "train_speed(iter/s)": 0.412926 }, { "acc": 0.91574841, "epoch": 1.5399507215769095, "grad_norm": 8.515495300292969, "learning_rate": 7.233528895778076e-06, "loss": 0.41810541, "memory(GiB)": 34.88, "step": 56875, "train_speed(iter/s)": 0.412928 }, { "acc": 0.87846146, "epoch": 1.540086101860125, "grad_norm": 12.303954124450684, "learning_rate": 7.2330282354012896e-06, "loss": 0.63692694, "memory(GiB)": 34.88, "step": 56880, "train_speed(iter/s)": 0.41293 }, { "acc": 0.90886421, "epoch": 1.5402214821433406, "grad_norm": 4.1343536376953125, "learning_rate": 7.232527547057698e-06, "loss": 0.41875935, "memory(GiB)": 34.88, "step": 56885, "train_speed(iter/s)": 0.412932 }, { "acc": 0.89935646, "epoch": 1.5403568624265562, "grad_norm": 6.978561878204346, "learning_rate": 7.232026830753578e-06, "loss": 0.57777152, "memory(GiB)": 34.88, "step": 56890, "train_speed(iter/s)": 0.412933 }, { "acc": 0.91298027, "epoch": 1.5404922427097718, "grad_norm": 4.907942771911621, "learning_rate": 7.2315260864952e-06, "loss": 0.39200745, "memory(GiB)": 34.88, "step": 56895, "train_speed(iter/s)": 0.412935 }, { "acc": 0.90820465, "epoch": 1.5406276229929872, "grad_norm": 3.0771429538726807, "learning_rate": 7.231025314288834e-06, "loss": 0.5296402, "memory(GiB)": 34.88, "step": 56900, "train_speed(iter/s)": 0.412937 }, { "acc": 0.91225758, "epoch": 1.540763003276203, "grad_norm": 7.282678604125977, "learning_rate": 7.2305245141407585e-06, "loss": 0.45059161, "memory(GiB)": 34.88, "step": 56905, "train_speed(iter/s)": 0.412938 }, { "acc": 0.91019802, "epoch": 1.5408983835594183, "grad_norm": 7.657385349273682, "learning_rate": 7.2300236860572425e-06, "loss": 0.46155996, "memory(GiB)": 34.88, "step": 56910, "train_speed(iter/s)": 0.41294 }, { "acc": 0.90178728, "epoch": 1.541033763842634, "grad_norm": 16.83406639099121, "learning_rate": 7.2295228300445616e-06, "loss": 0.52835255, "memory(GiB)": 34.88, "step": 56915, "train_speed(iter/s)": 0.412942 }, { "acc": 0.91802692, "epoch": 1.5411691441258495, "grad_norm": 3.9614675045013428, "learning_rate": 7.229021946108988e-06, "loss": 0.42649536, "memory(GiB)": 34.88, "step": 56920, "train_speed(iter/s)": 0.412944 }, { "acc": 0.88960438, "epoch": 1.541304524409065, "grad_norm": 8.100581169128418, "learning_rate": 7.2285210342568e-06, "loss": 0.56416364, "memory(GiB)": 34.88, "step": 56925, "train_speed(iter/s)": 0.412946 }, { "acc": 0.89036331, "epoch": 1.5414399046922806, "grad_norm": 13.498937606811523, "learning_rate": 7.2280200944942695e-06, "loss": 0.64710937, "memory(GiB)": 34.88, "step": 56930, "train_speed(iter/s)": 0.412947 }, { "acc": 0.91715946, "epoch": 1.541575284975496, "grad_norm": 4.032426834106445, "learning_rate": 7.227519126827672e-06, "loss": 0.43792624, "memory(GiB)": 34.88, "step": 56935, "train_speed(iter/s)": 0.412949 }, { "acc": 0.88642673, "epoch": 1.5417106652587118, "grad_norm": 7.920762538909912, "learning_rate": 7.227018131263285e-06, "loss": 0.66720033, "memory(GiB)": 34.88, "step": 56940, "train_speed(iter/s)": 0.412951 }, { "acc": 0.90041332, "epoch": 1.5418460455419272, "grad_norm": 33.28386688232422, "learning_rate": 7.226517107807383e-06, "loss": 0.56037145, "memory(GiB)": 34.88, "step": 56945, "train_speed(iter/s)": 0.412953 }, { "acc": 0.89666224, "epoch": 1.541981425825143, "grad_norm": 11.3126220703125, "learning_rate": 7.226016056466245e-06, "loss": 0.59610038, "memory(GiB)": 34.88, "step": 56950, "train_speed(iter/s)": 0.412954 }, { "acc": 0.9174139, "epoch": 1.5421168061083583, "grad_norm": 9.402911186218262, "learning_rate": 7.225514977246142e-06, "loss": 0.44917588, "memory(GiB)": 34.88, "step": 56955, "train_speed(iter/s)": 0.412956 }, { "acc": 0.8981389, "epoch": 1.542252186391574, "grad_norm": 15.751250267028809, "learning_rate": 7.225013870153355e-06, "loss": 0.58134565, "memory(GiB)": 34.88, "step": 56960, "train_speed(iter/s)": 0.412958 }, { "acc": 0.90960474, "epoch": 1.5423875666747895, "grad_norm": 6.750145435333252, "learning_rate": 7.224512735194161e-06, "loss": 0.44958014, "memory(GiB)": 34.88, "step": 56965, "train_speed(iter/s)": 0.41296 }, { "acc": 0.89255114, "epoch": 1.542522946958005, "grad_norm": 6.996134281158447, "learning_rate": 7.2240115723748365e-06, "loss": 0.46374283, "memory(GiB)": 34.88, "step": 56970, "train_speed(iter/s)": 0.412961 }, { "acc": 0.91587086, "epoch": 1.5426583272412207, "grad_norm": 8.408929824829102, "learning_rate": 7.223510381701662e-06, "loss": 0.46614571, "memory(GiB)": 34.88, "step": 56975, "train_speed(iter/s)": 0.412963 }, { "acc": 0.8783843, "epoch": 1.542793707524436, "grad_norm": 7.7032790184021, "learning_rate": 7.223009163180913e-06, "loss": 0.6297121, "memory(GiB)": 34.88, "step": 56980, "train_speed(iter/s)": 0.412965 }, { "acc": 0.90494328, "epoch": 1.5429290878076518, "grad_norm": 6.866147518157959, "learning_rate": 7.222507916818869e-06, "loss": 0.4153563, "memory(GiB)": 34.88, "step": 56985, "train_speed(iter/s)": 0.412967 }, { "acc": 0.92183399, "epoch": 1.5430644680908672, "grad_norm": 12.629018783569336, "learning_rate": 7.222006642621812e-06, "loss": 0.39696629, "memory(GiB)": 34.88, "step": 56990, "train_speed(iter/s)": 0.412969 }, { "acc": 0.90374908, "epoch": 1.5431998483740827, "grad_norm": 7.376100540161133, "learning_rate": 7.221505340596018e-06, "loss": 0.52261038, "memory(GiB)": 34.88, "step": 56995, "train_speed(iter/s)": 0.41297 }, { "acc": 0.89867764, "epoch": 1.5433352286572983, "grad_norm": 8.286026000976562, "learning_rate": 7.2210040107477665e-06, "loss": 0.4665494, "memory(GiB)": 34.88, "step": 57000, "train_speed(iter/s)": 0.412972 }, { "acc": 0.88313951, "epoch": 1.543470608940514, "grad_norm": 20.145368576049805, "learning_rate": 7.220502653083339e-06, "loss": 0.74024429, "memory(GiB)": 34.88, "step": 57005, "train_speed(iter/s)": 0.412974 }, { "acc": 0.88976393, "epoch": 1.5436059892237295, "grad_norm": 27.430185317993164, "learning_rate": 7.220001267609017e-06, "loss": 0.55764799, "memory(GiB)": 34.88, "step": 57010, "train_speed(iter/s)": 0.412976 }, { "acc": 0.91898479, "epoch": 1.5437413695069448, "grad_norm": 5.132026195526123, "learning_rate": 7.21949985433108e-06, "loss": 0.4352706, "memory(GiB)": 34.88, "step": 57015, "train_speed(iter/s)": 0.412977 }, { "acc": 0.89472408, "epoch": 1.5438767497901607, "grad_norm": 11.5182523727417, "learning_rate": 7.21899841325581e-06, "loss": 0.55273209, "memory(GiB)": 34.88, "step": 57020, "train_speed(iter/s)": 0.412979 }, { "acc": 0.91016264, "epoch": 1.544012130073376, "grad_norm": 6.48014497756958, "learning_rate": 7.218496944389487e-06, "loss": 0.56039371, "memory(GiB)": 34.88, "step": 57025, "train_speed(iter/s)": 0.412981 }, { "acc": 0.88094349, "epoch": 1.5441475103565918, "grad_norm": 16.386444091796875, "learning_rate": 7.217995447738397e-06, "loss": 0.65433888, "memory(GiB)": 34.88, "step": 57030, "train_speed(iter/s)": 0.412982 }, { "acc": 0.8894309, "epoch": 1.5442828906398072, "grad_norm": 8.562623023986816, "learning_rate": 7.2174939233088155e-06, "loss": 0.55535889, "memory(GiB)": 34.88, "step": 57035, "train_speed(iter/s)": 0.412984 }, { "acc": 0.89546623, "epoch": 1.5444182709230228, "grad_norm": 6.719033718109131, "learning_rate": 7.21699237110703e-06, "loss": 0.56324682, "memory(GiB)": 34.88, "step": 57040, "train_speed(iter/s)": 0.412986 }, { "acc": 0.89546032, "epoch": 1.5445536512062383, "grad_norm": 10.076852798461914, "learning_rate": 7.216490791139322e-06, "loss": 0.51438236, "memory(GiB)": 34.88, "step": 57045, "train_speed(iter/s)": 0.412988 }, { "acc": 0.9090023, "epoch": 1.544689031489454, "grad_norm": 9.970632553100586, "learning_rate": 7.215989183411976e-06, "loss": 0.53974867, "memory(GiB)": 34.88, "step": 57050, "train_speed(iter/s)": 0.412989 }, { "acc": 0.89967957, "epoch": 1.5448244117726695, "grad_norm": 9.405842781066895, "learning_rate": 7.2154875479312725e-06, "loss": 0.57496786, "memory(GiB)": 34.88, "step": 57055, "train_speed(iter/s)": 0.412991 }, { "acc": 0.93680916, "epoch": 1.5449597920558849, "grad_norm": 2.955733299255371, "learning_rate": 7.2149858847034985e-06, "loss": 0.29347701, "memory(GiB)": 34.88, "step": 57060, "train_speed(iter/s)": 0.412992 }, { "acc": 0.90677366, "epoch": 1.5450951723391007, "grad_norm": 8.428196907043457, "learning_rate": 7.214484193734937e-06, "loss": 0.51306314, "memory(GiB)": 34.88, "step": 57065, "train_speed(iter/s)": 0.412993 }, { "acc": 0.93362637, "epoch": 1.545230552622316, "grad_norm": 15.244654655456543, "learning_rate": 7.213982475031875e-06, "loss": 0.32827864, "memory(GiB)": 34.88, "step": 57070, "train_speed(iter/s)": 0.412995 }, { "acc": 0.89680138, "epoch": 1.5453659329055316, "grad_norm": 11.262097358703613, "learning_rate": 7.213480728600593e-06, "loss": 0.58684435, "memory(GiB)": 34.88, "step": 57075, "train_speed(iter/s)": 0.412997 }, { "acc": 0.91532707, "epoch": 1.5455013131887472, "grad_norm": 6.975793838500977, "learning_rate": 7.212978954447381e-06, "loss": 0.48185773, "memory(GiB)": 34.88, "step": 57080, "train_speed(iter/s)": 0.412999 }, { "acc": 0.90657043, "epoch": 1.5456366934719628, "grad_norm": 7.081601619720459, "learning_rate": 7.212477152578519e-06, "loss": 0.51530113, "memory(GiB)": 34.88, "step": 57085, "train_speed(iter/s)": 0.413001 }, { "acc": 0.91806335, "epoch": 1.5457720737551783, "grad_norm": 15.030491828918457, "learning_rate": 7.211975323000301e-06, "loss": 0.42317009, "memory(GiB)": 34.88, "step": 57090, "train_speed(iter/s)": 0.413002 }, { "acc": 0.90098782, "epoch": 1.5459074540383937, "grad_norm": 11.19014835357666, "learning_rate": 7.211473465719006e-06, "loss": 0.53874407, "memory(GiB)": 34.88, "step": 57095, "train_speed(iter/s)": 0.413004 }, { "acc": 0.89581375, "epoch": 1.5460428343216095, "grad_norm": 3.8721041679382324, "learning_rate": 7.210971580740925e-06, "loss": 0.55721846, "memory(GiB)": 34.88, "step": 57100, "train_speed(iter/s)": 0.413005 }, { "acc": 0.88592157, "epoch": 1.5461782146048249, "grad_norm": 26.200363159179688, "learning_rate": 7.210469668072343e-06, "loss": 0.54788642, "memory(GiB)": 34.88, "step": 57105, "train_speed(iter/s)": 0.413007 }, { "acc": 0.93060284, "epoch": 1.5463135948880407, "grad_norm": 5.8995561599731445, "learning_rate": 7.20996772771955e-06, "loss": 0.37999775, "memory(GiB)": 34.88, "step": 57110, "train_speed(iter/s)": 0.413009 }, { "acc": 0.86863184, "epoch": 1.546448975171256, "grad_norm": 9.267926216125488, "learning_rate": 7.209465759688832e-06, "loss": 0.7831213, "memory(GiB)": 34.88, "step": 57115, "train_speed(iter/s)": 0.41301 }, { "acc": 0.90851326, "epoch": 1.5465843554544716, "grad_norm": 7.253322124481201, "learning_rate": 7.208963763986475e-06, "loss": 0.4272604, "memory(GiB)": 34.88, "step": 57120, "train_speed(iter/s)": 0.413012 }, { "acc": 0.90383797, "epoch": 1.5467197357376872, "grad_norm": 4.3897576332092285, "learning_rate": 7.208461740618773e-06, "loss": 0.49163594, "memory(GiB)": 34.88, "step": 57125, "train_speed(iter/s)": 0.413013 }, { "acc": 0.9132225, "epoch": 1.5468551160209028, "grad_norm": 9.550243377685547, "learning_rate": 7.207959689592009e-06, "loss": 0.43944993, "memory(GiB)": 34.88, "step": 57130, "train_speed(iter/s)": 0.413015 }, { "acc": 0.89036274, "epoch": 1.5469904963041183, "grad_norm": 10.590821266174316, "learning_rate": 7.207457610912475e-06, "loss": 0.66183186, "memory(GiB)": 34.88, "step": 57135, "train_speed(iter/s)": 0.413016 }, { "acc": 0.87852459, "epoch": 1.5471258765873337, "grad_norm": 7.187110424041748, "learning_rate": 7.206955504586461e-06, "loss": 0.65015879, "memory(GiB)": 34.88, "step": 57140, "train_speed(iter/s)": 0.413018 }, { "acc": 0.9135623, "epoch": 1.5472612568705495, "grad_norm": 10.42941951751709, "learning_rate": 7.206453370620256e-06, "loss": 0.46932092, "memory(GiB)": 34.88, "step": 57145, "train_speed(iter/s)": 0.413019 }, { "acc": 0.90409851, "epoch": 1.5473966371537649, "grad_norm": 4.878691673278809, "learning_rate": 7.205951209020149e-06, "loss": 0.55463243, "memory(GiB)": 34.88, "step": 57150, "train_speed(iter/s)": 0.413021 }, { "acc": 0.89208136, "epoch": 1.5475320174369804, "grad_norm": 14.436690330505371, "learning_rate": 7.205449019792433e-06, "loss": 0.60914841, "memory(GiB)": 34.88, "step": 57155, "train_speed(iter/s)": 0.413023 }, { "acc": 0.90634613, "epoch": 1.547667397720196, "grad_norm": 5.31828498840332, "learning_rate": 7.2049468029434e-06, "loss": 0.56104698, "memory(GiB)": 34.88, "step": 57160, "train_speed(iter/s)": 0.413025 }, { "acc": 0.91995878, "epoch": 1.5478027780034116, "grad_norm": 7.939366817474365, "learning_rate": 7.204444558479337e-06, "loss": 0.48695564, "memory(GiB)": 34.88, "step": 57165, "train_speed(iter/s)": 0.413027 }, { "acc": 0.89855499, "epoch": 1.5479381582866272, "grad_norm": 8.662514686584473, "learning_rate": 7.203942286406538e-06, "loss": 0.55894356, "memory(GiB)": 34.88, "step": 57170, "train_speed(iter/s)": 0.413028 }, { "acc": 0.9077652, "epoch": 1.5480735385698425, "grad_norm": 9.0045804977417, "learning_rate": 7.203439986731293e-06, "loss": 0.52799911, "memory(GiB)": 34.88, "step": 57175, "train_speed(iter/s)": 0.413029 }, { "acc": 0.90542822, "epoch": 1.5482089188530583, "grad_norm": 5.973686695098877, "learning_rate": 7.202937659459898e-06, "loss": 0.46190934, "memory(GiB)": 34.88, "step": 57180, "train_speed(iter/s)": 0.413031 }, { "acc": 0.91826305, "epoch": 1.5483442991362737, "grad_norm": 18.109512329101562, "learning_rate": 7.202435304598641e-06, "loss": 0.37194357, "memory(GiB)": 34.88, "step": 57185, "train_speed(iter/s)": 0.413032 }, { "acc": 0.90360909, "epoch": 1.5484796794194895, "grad_norm": 6.001242637634277, "learning_rate": 7.2019329221538206e-06, "loss": 0.4857213, "memory(GiB)": 34.88, "step": 57190, "train_speed(iter/s)": 0.413034 }, { "acc": 0.88771076, "epoch": 1.5486150597027049, "grad_norm": 14.262462615966797, "learning_rate": 7.201430512131725e-06, "loss": 0.73261337, "memory(GiB)": 34.88, "step": 57195, "train_speed(iter/s)": 0.413036 }, { "acc": 0.90225182, "epoch": 1.5487504399859204, "grad_norm": 6.45422887802124, "learning_rate": 7.200928074538651e-06, "loss": 0.5870748, "memory(GiB)": 34.88, "step": 57200, "train_speed(iter/s)": 0.413038 }, { "acc": 0.90099554, "epoch": 1.548885820269136, "grad_norm": 10.673246383666992, "learning_rate": 7.200425609380891e-06, "loss": 0.57071061, "memory(GiB)": 34.88, "step": 57205, "train_speed(iter/s)": 0.413039 }, { "acc": 0.91532211, "epoch": 1.5490212005523516, "grad_norm": 3.2514610290527344, "learning_rate": 7.1999231166647395e-06, "loss": 0.52641296, "memory(GiB)": 34.88, "step": 57210, "train_speed(iter/s)": 0.413041 }, { "acc": 0.90198622, "epoch": 1.5491565808355672, "grad_norm": 7.837442398071289, "learning_rate": 7.1994205963964925e-06, "loss": 0.48624182, "memory(GiB)": 34.88, "step": 57215, "train_speed(iter/s)": 0.413043 }, { "acc": 0.91137686, "epoch": 1.5492919611187825, "grad_norm": 7.74784517288208, "learning_rate": 7.198918048582443e-06, "loss": 0.42751551, "memory(GiB)": 34.88, "step": 57220, "train_speed(iter/s)": 0.413044 }, { "acc": 0.90779896, "epoch": 1.5494273414019983, "grad_norm": 9.80688190460205, "learning_rate": 7.198415473228889e-06, "loss": 0.50414777, "memory(GiB)": 34.88, "step": 57225, "train_speed(iter/s)": 0.413046 }, { "acc": 0.89494696, "epoch": 1.5495627216852137, "grad_norm": 7.448768138885498, "learning_rate": 7.197912870342124e-06, "loss": 0.58279166, "memory(GiB)": 34.88, "step": 57230, "train_speed(iter/s)": 0.413048 }, { "acc": 0.89083481, "epoch": 1.5496981019684293, "grad_norm": 17.2481746673584, "learning_rate": 7.197410239928444e-06, "loss": 0.53448057, "memory(GiB)": 34.88, "step": 57235, "train_speed(iter/s)": 0.413049 }, { "acc": 0.9034153, "epoch": 1.5498334822516449, "grad_norm": 10.432525634765625, "learning_rate": 7.196907581994149e-06, "loss": 0.55788436, "memory(GiB)": 34.88, "step": 57240, "train_speed(iter/s)": 0.413051 }, { "acc": 0.89501944, "epoch": 1.5499688625348604, "grad_norm": 22.93940544128418, "learning_rate": 7.196404896545531e-06, "loss": 0.65815396, "memory(GiB)": 34.88, "step": 57245, "train_speed(iter/s)": 0.413053 }, { "acc": 0.90593262, "epoch": 1.550104242818076, "grad_norm": 7.234249114990234, "learning_rate": 7.1959021835888895e-06, "loss": 0.50245914, "memory(GiB)": 34.88, "step": 57250, "train_speed(iter/s)": 0.413055 }, { "acc": 0.91129704, "epoch": 1.5502396231012914, "grad_norm": 7.455187797546387, "learning_rate": 7.1953994431305205e-06, "loss": 0.5189229, "memory(GiB)": 34.88, "step": 57255, "train_speed(iter/s)": 0.413057 }, { "acc": 0.88981495, "epoch": 1.5503750033845072, "grad_norm": 5.795306205749512, "learning_rate": 7.194896675176726e-06, "loss": 0.5542675, "memory(GiB)": 34.88, "step": 57260, "train_speed(iter/s)": 0.413058 }, { "acc": 0.90892477, "epoch": 1.5505103836677225, "grad_norm": 13.537699699401855, "learning_rate": 7.194393879733799e-06, "loss": 0.49215097, "memory(GiB)": 34.88, "step": 57265, "train_speed(iter/s)": 0.41306 }, { "acc": 0.89812889, "epoch": 1.5506457639509383, "grad_norm": 38.825565338134766, "learning_rate": 7.193891056808041e-06, "loss": 0.59025631, "memory(GiB)": 34.88, "step": 57270, "train_speed(iter/s)": 0.41306 }, { "acc": 0.89379444, "epoch": 1.5507811442341537, "grad_norm": 6.298569202423096, "learning_rate": 7.193388206405749e-06, "loss": 0.55923996, "memory(GiB)": 34.88, "step": 57275, "train_speed(iter/s)": 0.413062 }, { "acc": 0.90342178, "epoch": 1.5509165245173693, "grad_norm": 7.928016185760498, "learning_rate": 7.192885328533224e-06, "loss": 0.54491224, "memory(GiB)": 34.88, "step": 57280, "train_speed(iter/s)": 0.413063 }, { "acc": 0.89078293, "epoch": 1.5510519048005849, "grad_norm": 9.998025894165039, "learning_rate": 7.192382423196765e-06, "loss": 0.60559382, "memory(GiB)": 34.88, "step": 57285, "train_speed(iter/s)": 0.413065 }, { "acc": 0.88447514, "epoch": 1.5511872850838004, "grad_norm": 5.607230186462402, "learning_rate": 7.1918794904026715e-06, "loss": 0.65960474, "memory(GiB)": 34.88, "step": 57290, "train_speed(iter/s)": 0.413067 }, { "acc": 0.90793982, "epoch": 1.551322665367016, "grad_norm": 6.299668312072754, "learning_rate": 7.191376530157244e-06, "loss": 0.45346351, "memory(GiB)": 34.88, "step": 57295, "train_speed(iter/s)": 0.413069 }, { "acc": 0.89889278, "epoch": 1.5514580456502314, "grad_norm": 9.373363494873047, "learning_rate": 7.190873542466782e-06, "loss": 0.57535563, "memory(GiB)": 34.88, "step": 57300, "train_speed(iter/s)": 0.413071 }, { "acc": 0.90163183, "epoch": 1.5515934259334472, "grad_norm": 6.962933540344238, "learning_rate": 7.190370527337587e-06, "loss": 0.53539019, "memory(GiB)": 34.88, "step": 57305, "train_speed(iter/s)": 0.413072 }, { "acc": 0.91173601, "epoch": 1.5517288062166625, "grad_norm": 4.3597025871276855, "learning_rate": 7.189867484775961e-06, "loss": 0.48202114, "memory(GiB)": 34.88, "step": 57310, "train_speed(iter/s)": 0.413074 }, { "acc": 0.9079422, "epoch": 1.5518641864998781, "grad_norm": 15.557469367980957, "learning_rate": 7.189364414788206e-06, "loss": 0.52641773, "memory(GiB)": 34.88, "step": 57315, "train_speed(iter/s)": 0.413076 }, { "acc": 0.90667133, "epoch": 1.5519995667830937, "grad_norm": 18.191043853759766, "learning_rate": 7.188861317380622e-06, "loss": 0.52057624, "memory(GiB)": 34.88, "step": 57320, "train_speed(iter/s)": 0.413078 }, { "acc": 0.91573744, "epoch": 1.5521349470663093, "grad_norm": 47.631317138671875, "learning_rate": 7.188358192559514e-06, "loss": 0.45771327, "memory(GiB)": 34.88, "step": 57325, "train_speed(iter/s)": 0.413079 }, { "acc": 0.8892931, "epoch": 1.5522703273495249, "grad_norm": 22.221172332763672, "learning_rate": 7.187855040331183e-06, "loss": 0.65149059, "memory(GiB)": 34.88, "step": 57330, "train_speed(iter/s)": 0.413081 }, { "acc": 0.90257254, "epoch": 1.5524057076327402, "grad_norm": 8.707337379455566, "learning_rate": 7.187351860701929e-06, "loss": 0.48370628, "memory(GiB)": 34.88, "step": 57335, "train_speed(iter/s)": 0.413083 }, { "acc": 0.90252533, "epoch": 1.552541087915956, "grad_norm": 12.796046257019043, "learning_rate": 7.18684865367806e-06, "loss": 0.6155663, "memory(GiB)": 34.88, "step": 57340, "train_speed(iter/s)": 0.413084 }, { "acc": 0.90358458, "epoch": 1.5526764681991714, "grad_norm": 43.898338317871094, "learning_rate": 7.186345419265879e-06, "loss": 0.52859168, "memory(GiB)": 34.88, "step": 57345, "train_speed(iter/s)": 0.413086 }, { "acc": 0.92054195, "epoch": 1.5528118484823872, "grad_norm": 5.906818866729736, "learning_rate": 7.1858421574716875e-06, "loss": 0.448312, "memory(GiB)": 34.88, "step": 57350, "train_speed(iter/s)": 0.413088 }, { "acc": 0.90142422, "epoch": 1.5529472287656025, "grad_norm": 8.76392936706543, "learning_rate": 7.185338868301792e-06, "loss": 0.53551254, "memory(GiB)": 34.88, "step": 57355, "train_speed(iter/s)": 0.41309 }, { "acc": 0.91456928, "epoch": 1.5530826090488181, "grad_norm": 10.554360389709473, "learning_rate": 7.184835551762496e-06, "loss": 0.42898836, "memory(GiB)": 34.88, "step": 57360, "train_speed(iter/s)": 0.413092 }, { "acc": 0.91603527, "epoch": 1.5532179893320337, "grad_norm": 10.262704849243164, "learning_rate": 7.184332207860107e-06, "loss": 0.44583721, "memory(GiB)": 34.88, "step": 57365, "train_speed(iter/s)": 0.413093 }, { "acc": 0.89968185, "epoch": 1.5533533696152493, "grad_norm": 6.503796100616455, "learning_rate": 7.183828836600926e-06, "loss": 0.58720622, "memory(GiB)": 34.88, "step": 57370, "train_speed(iter/s)": 0.413095 }, { "acc": 0.90038986, "epoch": 1.5534887498984649, "grad_norm": 11.45262622833252, "learning_rate": 7.183325437991262e-06, "loss": 0.59271336, "memory(GiB)": 34.88, "step": 57375, "train_speed(iter/s)": 0.413097 }, { "acc": 0.90273819, "epoch": 1.5536241301816802, "grad_norm": 8.588770866394043, "learning_rate": 7.18282201203742e-06, "loss": 0.54784293, "memory(GiB)": 34.88, "step": 57380, "train_speed(iter/s)": 0.413099 }, { "acc": 0.92338562, "epoch": 1.553759510464896, "grad_norm": 15.896239280700684, "learning_rate": 7.182318558745705e-06, "loss": 0.38863225, "memory(GiB)": 34.88, "step": 57385, "train_speed(iter/s)": 0.413101 }, { "acc": 0.90368729, "epoch": 1.5538948907481114, "grad_norm": 46.82925033569336, "learning_rate": 7.181815078122427e-06, "loss": 0.60242939, "memory(GiB)": 34.88, "step": 57390, "train_speed(iter/s)": 0.413103 }, { "acc": 0.89197025, "epoch": 1.554030271031327, "grad_norm": 6.444890975952148, "learning_rate": 7.181311570173891e-06, "loss": 0.55797076, "memory(GiB)": 34.88, "step": 57395, "train_speed(iter/s)": 0.413104 }, { "acc": 0.89521046, "epoch": 1.5541656513145425, "grad_norm": 31.85554313659668, "learning_rate": 7.180808034906406e-06, "loss": 0.63794389, "memory(GiB)": 34.88, "step": 57400, "train_speed(iter/s)": 0.413106 }, { "acc": 0.8939682, "epoch": 1.5543010315977581, "grad_norm": 17.40407943725586, "learning_rate": 7.180304472326277e-06, "loss": 0.60957232, "memory(GiB)": 34.88, "step": 57405, "train_speed(iter/s)": 0.413108 }, { "acc": 0.91299028, "epoch": 1.5544364118809737, "grad_norm": 11.816381454467773, "learning_rate": 7.179800882439816e-06, "loss": 0.45404649, "memory(GiB)": 34.88, "step": 57410, "train_speed(iter/s)": 0.41311 }, { "acc": 0.91369476, "epoch": 1.554571792164189, "grad_norm": 17.330917358398438, "learning_rate": 7.179297265253327e-06, "loss": 0.51270704, "memory(GiB)": 34.88, "step": 57415, "train_speed(iter/s)": 0.413112 }, { "acc": 0.92091122, "epoch": 1.5547071724474049, "grad_norm": 5.823095798492432, "learning_rate": 7.17879362077312e-06, "loss": 0.41940184, "memory(GiB)": 34.88, "step": 57420, "train_speed(iter/s)": 0.413113 }, { "acc": 0.88926802, "epoch": 1.5548425527306202, "grad_norm": 5.73289680480957, "learning_rate": 7.178289949005507e-06, "loss": 0.65099249, "memory(GiB)": 34.88, "step": 57425, "train_speed(iter/s)": 0.413115 }, { "acc": 0.913346, "epoch": 1.554977933013836, "grad_norm": 14.534534454345703, "learning_rate": 7.177786249956794e-06, "loss": 0.44377251, "memory(GiB)": 34.88, "step": 57430, "train_speed(iter/s)": 0.413117 }, { "acc": 0.92868185, "epoch": 1.5551133132970514, "grad_norm": 8.624299049377441, "learning_rate": 7.177282523633294e-06, "loss": 0.45575361, "memory(GiB)": 34.88, "step": 57435, "train_speed(iter/s)": 0.413119 }, { "acc": 0.90833035, "epoch": 1.555248693580267, "grad_norm": 11.546051025390625, "learning_rate": 7.1767787700413135e-06, "loss": 0.48856192, "memory(GiB)": 34.88, "step": 57440, "train_speed(iter/s)": 0.413121 }, { "acc": 0.88465395, "epoch": 1.5553840738634825, "grad_norm": 9.266082763671875, "learning_rate": 7.176274989187167e-06, "loss": 0.62906389, "memory(GiB)": 34.88, "step": 57445, "train_speed(iter/s)": 0.413122 }, { "acc": 0.8769474, "epoch": 1.5555194541466981, "grad_norm": 13.04196834564209, "learning_rate": 7.175771181077163e-06, "loss": 0.68036642, "memory(GiB)": 34.88, "step": 57450, "train_speed(iter/s)": 0.413124 }, { "acc": 0.90950222, "epoch": 1.5556548344299137, "grad_norm": 18.4252872467041, "learning_rate": 7.175267345717612e-06, "loss": 0.46485782, "memory(GiB)": 34.88, "step": 57455, "train_speed(iter/s)": 0.413126 }, { "acc": 0.90559053, "epoch": 1.555790214713129, "grad_norm": 3.5236735343933105, "learning_rate": 7.174763483114828e-06, "loss": 0.5195425, "memory(GiB)": 34.88, "step": 57460, "train_speed(iter/s)": 0.413128 }, { "acc": 0.91396236, "epoch": 1.5559255949963449, "grad_norm": 7.625459671020508, "learning_rate": 7.174259593275122e-06, "loss": 0.41906877, "memory(GiB)": 34.88, "step": 57465, "train_speed(iter/s)": 0.41313 }, { "acc": 0.906353, "epoch": 1.5560609752795602, "grad_norm": 9.462817192077637, "learning_rate": 7.173755676204804e-06, "loss": 0.54472218, "memory(GiB)": 34.88, "step": 57470, "train_speed(iter/s)": 0.413132 }, { "acc": 0.88918867, "epoch": 1.5561963555627758, "grad_norm": 4.714013576507568, "learning_rate": 7.173251731910187e-06, "loss": 0.59308424, "memory(GiB)": 34.88, "step": 57475, "train_speed(iter/s)": 0.413133 }, { "acc": 0.89621134, "epoch": 1.5563317358459914, "grad_norm": 9.111339569091797, "learning_rate": 7.172747760397587e-06, "loss": 0.54941216, "memory(GiB)": 34.88, "step": 57480, "train_speed(iter/s)": 0.413135 }, { "acc": 0.89346485, "epoch": 1.556467116129207, "grad_norm": 8.591394424438477, "learning_rate": 7.172243761673315e-06, "loss": 0.55000267, "memory(GiB)": 34.88, "step": 57485, "train_speed(iter/s)": 0.413136 }, { "acc": 0.8726903, "epoch": 1.5566024964124225, "grad_norm": 14.901713371276855, "learning_rate": 7.171739735743686e-06, "loss": 0.76872091, "memory(GiB)": 34.88, "step": 57490, "train_speed(iter/s)": 0.413138 }, { "acc": 0.8868762, "epoch": 1.556737876695638, "grad_norm": 34.540313720703125, "learning_rate": 7.171235682615012e-06, "loss": 0.62641702, "memory(GiB)": 34.88, "step": 57495, "train_speed(iter/s)": 0.41314 }, { "acc": 0.87474566, "epoch": 1.5568732569788537, "grad_norm": 5.492094039916992, "learning_rate": 7.170731602293608e-06, "loss": 0.76270447, "memory(GiB)": 34.88, "step": 57500, "train_speed(iter/s)": 0.413141 }, { "acc": 0.89622173, "epoch": 1.557008637262069, "grad_norm": 13.601269721984863, "learning_rate": 7.170227494785788e-06, "loss": 0.52944479, "memory(GiB)": 34.88, "step": 57505, "train_speed(iter/s)": 0.413143 }, { "acc": 0.89273739, "epoch": 1.5571440175452849, "grad_norm": 8.709426879882812, "learning_rate": 7.169723360097869e-06, "loss": 0.51584387, "memory(GiB)": 34.88, "step": 57510, "train_speed(iter/s)": 0.413145 }, { "acc": 0.89811249, "epoch": 1.5572793978285002, "grad_norm": 15.144782066345215, "learning_rate": 7.169219198236163e-06, "loss": 0.55330043, "memory(GiB)": 34.88, "step": 57515, "train_speed(iter/s)": 0.413147 }, { "acc": 0.92871151, "epoch": 1.5574147781117158, "grad_norm": 6.356062889099121, "learning_rate": 7.168715009206989e-06, "loss": 0.3659811, "memory(GiB)": 34.88, "step": 57520, "train_speed(iter/s)": 0.413148 }, { "acc": 0.91499109, "epoch": 1.5575501583949314, "grad_norm": 13.559823036193848, "learning_rate": 7.168210793016661e-06, "loss": 0.47637887, "memory(GiB)": 34.88, "step": 57525, "train_speed(iter/s)": 0.41315 }, { "acc": 0.91217575, "epoch": 1.557685538678147, "grad_norm": 7.1792402267456055, "learning_rate": 7.167706549671495e-06, "loss": 0.42451792, "memory(GiB)": 34.88, "step": 57530, "train_speed(iter/s)": 0.413152 }, { "acc": 0.89501514, "epoch": 1.5578209189613625, "grad_norm": 4.814488410949707, "learning_rate": 7.167202279177811e-06, "loss": 0.5247921, "memory(GiB)": 34.88, "step": 57535, "train_speed(iter/s)": 0.413154 }, { "acc": 0.90789471, "epoch": 1.557956299244578, "grad_norm": 4.14923620223999, "learning_rate": 7.16669798154192e-06, "loss": 0.46668386, "memory(GiB)": 34.88, "step": 57540, "train_speed(iter/s)": 0.413155 }, { "acc": 0.90575762, "epoch": 1.5580916795277937, "grad_norm": 13.909543991088867, "learning_rate": 7.166193656770145e-06, "loss": 0.50430202, "memory(GiB)": 34.88, "step": 57545, "train_speed(iter/s)": 0.413157 }, { "acc": 0.88420315, "epoch": 1.558227059811009, "grad_norm": 8.823944091796875, "learning_rate": 7.1656893048688e-06, "loss": 0.60085683, "memory(GiB)": 34.88, "step": 57550, "train_speed(iter/s)": 0.413159 }, { "acc": 0.90117149, "epoch": 1.5583624400942246, "grad_norm": 8.942461967468262, "learning_rate": 7.165184925844205e-06, "loss": 0.59867401, "memory(GiB)": 34.88, "step": 57555, "train_speed(iter/s)": 0.41316 }, { "acc": 0.91032743, "epoch": 1.5584978203774402, "grad_norm": 4.590283393859863, "learning_rate": 7.164680519702678e-06, "loss": 0.46281581, "memory(GiB)": 34.88, "step": 57560, "train_speed(iter/s)": 0.413162 }, { "acc": 0.91291637, "epoch": 1.5586332006606558, "grad_norm": 11.016143798828125, "learning_rate": 7.164176086450538e-06, "loss": 0.49701405, "memory(GiB)": 34.88, "step": 57565, "train_speed(iter/s)": 0.413163 }, { "acc": 0.88746319, "epoch": 1.5587685809438714, "grad_norm": 21.25272560119629, "learning_rate": 7.163671626094102e-06, "loss": 0.60642767, "memory(GiB)": 34.88, "step": 57570, "train_speed(iter/s)": 0.413165 }, { "acc": 0.91122322, "epoch": 1.5589039612270867, "grad_norm": 6.014666557312012, "learning_rate": 7.163167138639693e-06, "loss": 0.38304999, "memory(GiB)": 34.88, "step": 57575, "train_speed(iter/s)": 0.413167 }, { "acc": 0.92094755, "epoch": 1.5590393415103025, "grad_norm": 3.430433750152588, "learning_rate": 7.162662624093626e-06, "loss": 0.39969161, "memory(GiB)": 34.88, "step": 57580, "train_speed(iter/s)": 0.413169 }, { "acc": 0.9009572, "epoch": 1.559174721793518, "grad_norm": 13.231608390808105, "learning_rate": 7.162158082462224e-06, "loss": 0.56412582, "memory(GiB)": 34.88, "step": 57585, "train_speed(iter/s)": 0.413171 }, { "acc": 0.90521221, "epoch": 1.5593101020767337, "grad_norm": 25.53447914123535, "learning_rate": 7.161653513751806e-06, "loss": 0.45439758, "memory(GiB)": 34.88, "step": 57590, "train_speed(iter/s)": 0.413172 }, { "acc": 0.89961462, "epoch": 1.559445482359949, "grad_norm": 9.384530067443848, "learning_rate": 7.161148917968697e-06, "loss": 0.52271881, "memory(GiB)": 34.88, "step": 57595, "train_speed(iter/s)": 0.413174 }, { "acc": 0.9041647, "epoch": 1.5595808626431646, "grad_norm": 13.248918533325195, "learning_rate": 7.160644295119211e-06, "loss": 0.55643272, "memory(GiB)": 34.88, "step": 57600, "train_speed(iter/s)": 0.413176 }, { "acc": 0.91962318, "epoch": 1.5597162429263802, "grad_norm": 11.765326499938965, "learning_rate": 7.160139645209675e-06, "loss": 0.39296103, "memory(GiB)": 34.88, "step": 57605, "train_speed(iter/s)": 0.413177 }, { "acc": 0.90485401, "epoch": 1.5598516232095958, "grad_norm": 6.667721748352051, "learning_rate": 7.159634968246408e-06, "loss": 0.53483348, "memory(GiB)": 34.88, "step": 57610, "train_speed(iter/s)": 0.413179 }, { "acc": 0.90055609, "epoch": 1.5599870034928114, "grad_norm": 17.76048469543457, "learning_rate": 7.159130264235733e-06, "loss": 0.56004691, "memory(GiB)": 34.88, "step": 57615, "train_speed(iter/s)": 0.413181 }, { "acc": 0.9091115, "epoch": 1.5601223837760267, "grad_norm": 6.392755031585693, "learning_rate": 7.158625533183973e-06, "loss": 0.48766556, "memory(GiB)": 34.88, "step": 57620, "train_speed(iter/s)": 0.413182 }, { "acc": 0.88924999, "epoch": 1.5602577640592425, "grad_norm": 8.607362747192383, "learning_rate": 7.158120775097448e-06, "loss": 0.67777691, "memory(GiB)": 34.88, "step": 57625, "train_speed(iter/s)": 0.413184 }, { "acc": 0.89615374, "epoch": 1.560393144342458, "grad_norm": 12.73415470123291, "learning_rate": 7.1576159899824855e-06, "loss": 0.65251098, "memory(GiB)": 34.88, "step": 57630, "train_speed(iter/s)": 0.413186 }, { "acc": 0.88546543, "epoch": 1.5605285246256735, "grad_norm": 13.778055191040039, "learning_rate": 7.157111177845405e-06, "loss": 0.65588055, "memory(GiB)": 34.88, "step": 57635, "train_speed(iter/s)": 0.413188 }, { "acc": 0.88957214, "epoch": 1.560663904908889, "grad_norm": 11.138750076293945, "learning_rate": 7.156606338692531e-06, "loss": 0.65404172, "memory(GiB)": 34.88, "step": 57640, "train_speed(iter/s)": 0.413189 }, { "acc": 0.8880743, "epoch": 1.5607992851921046, "grad_norm": 10.270062446594238, "learning_rate": 7.15610147253019e-06, "loss": 0.6460166, "memory(GiB)": 34.88, "step": 57645, "train_speed(iter/s)": 0.413191 }, { "acc": 0.89324236, "epoch": 1.5609346654753202, "grad_norm": 7.846590518951416, "learning_rate": 7.1555965793647046e-06, "loss": 0.51464081, "memory(GiB)": 34.88, "step": 57650, "train_speed(iter/s)": 0.413193 }, { "acc": 0.89796658, "epoch": 1.5610700457585356, "grad_norm": 4.831684112548828, "learning_rate": 7.155091659202399e-06, "loss": 0.5496829, "memory(GiB)": 34.88, "step": 57655, "train_speed(iter/s)": 0.413195 }, { "acc": 0.90826855, "epoch": 1.5612054260417514, "grad_norm": 16.424640655517578, "learning_rate": 7.154586712049601e-06, "loss": 0.44018273, "memory(GiB)": 34.88, "step": 57660, "train_speed(iter/s)": 0.413197 }, { "acc": 0.90966721, "epoch": 1.5613408063249667, "grad_norm": 4.682897567749023, "learning_rate": 7.154081737912632e-06, "loss": 0.48137369, "memory(GiB)": 34.88, "step": 57665, "train_speed(iter/s)": 0.413198 }, { "acc": 0.89049301, "epoch": 1.5614761866081825, "grad_norm": 6.357680320739746, "learning_rate": 7.153576736797819e-06, "loss": 0.64412031, "memory(GiB)": 34.88, "step": 57670, "train_speed(iter/s)": 0.4132 }, { "acc": 0.90268631, "epoch": 1.561611566891398, "grad_norm": 10.304882049560547, "learning_rate": 7.153071708711493e-06, "loss": 0.48636303, "memory(GiB)": 34.88, "step": 57675, "train_speed(iter/s)": 0.413202 }, { "acc": 0.91217966, "epoch": 1.5617469471746135, "grad_norm": 5.977757453918457, "learning_rate": 7.1525666536599725e-06, "loss": 0.45388145, "memory(GiB)": 34.88, "step": 57680, "train_speed(iter/s)": 0.413204 }, { "acc": 0.8815237, "epoch": 1.561882327457829, "grad_norm": 10.862159729003906, "learning_rate": 7.15206157164959e-06, "loss": 0.65695477, "memory(GiB)": 34.88, "step": 57685, "train_speed(iter/s)": 0.413205 }, { "acc": 0.90558052, "epoch": 1.5620177077410446, "grad_norm": 5.211399555206299, "learning_rate": 7.151556462686672e-06, "loss": 0.4957696, "memory(GiB)": 34.88, "step": 57690, "train_speed(iter/s)": 0.413207 }, { "acc": 0.8940999, "epoch": 1.5621530880242602, "grad_norm": 7.919876575469971, "learning_rate": 7.151051326777544e-06, "loss": 0.60451679, "memory(GiB)": 34.88, "step": 57695, "train_speed(iter/s)": 0.413209 }, { "acc": 0.90480442, "epoch": 1.5622884683074756, "grad_norm": 6.651314735412598, "learning_rate": 7.1505461639285356e-06, "loss": 0.46370869, "memory(GiB)": 34.88, "step": 57700, "train_speed(iter/s)": 0.41321 }, { "acc": 0.88685741, "epoch": 1.5624238485906914, "grad_norm": 10.990777969360352, "learning_rate": 7.150040974145973e-06, "loss": 0.63529634, "memory(GiB)": 34.88, "step": 57705, "train_speed(iter/s)": 0.413212 }, { "acc": 0.90468178, "epoch": 1.5625592288739067, "grad_norm": 8.085482597351074, "learning_rate": 7.149535757436188e-06, "loss": 0.49730186, "memory(GiB)": 34.88, "step": 57710, "train_speed(iter/s)": 0.413214 }, { "acc": 0.91050816, "epoch": 1.5626946091571223, "grad_norm": 7.900872707366943, "learning_rate": 7.149030513805505e-06, "loss": 0.46531553, "memory(GiB)": 34.88, "step": 57715, "train_speed(iter/s)": 0.413215 }, { "acc": 0.91668797, "epoch": 1.562829989440338, "grad_norm": 6.887323379516602, "learning_rate": 7.148525243260255e-06, "loss": 0.39675558, "memory(GiB)": 34.88, "step": 57720, "train_speed(iter/s)": 0.413217 }, { "acc": 0.89422474, "epoch": 1.5629653697235535, "grad_norm": 5.19309139251709, "learning_rate": 7.148019945806769e-06, "loss": 0.54249935, "memory(GiB)": 34.88, "step": 57725, "train_speed(iter/s)": 0.413219 }, { "acc": 0.90260792, "epoch": 1.563100750006769, "grad_norm": 10.371949195861816, "learning_rate": 7.1475146214513756e-06, "loss": 0.42446947, "memory(GiB)": 34.88, "step": 57730, "train_speed(iter/s)": 0.41322 }, { "acc": 0.90921803, "epoch": 1.5632361302899844, "grad_norm": 13.509252548217773, "learning_rate": 7.147009270200406e-06, "loss": 0.51030951, "memory(GiB)": 34.88, "step": 57735, "train_speed(iter/s)": 0.413222 }, { "acc": 0.89021463, "epoch": 1.5633715105732002, "grad_norm": 15.347392082214355, "learning_rate": 7.1465038920601896e-06, "loss": 0.6607502, "memory(GiB)": 34.88, "step": 57740, "train_speed(iter/s)": 0.413224 }, { "acc": 0.92118797, "epoch": 1.5635068908564156, "grad_norm": 20.24434471130371, "learning_rate": 7.145998487037058e-06, "loss": 0.49991217, "memory(GiB)": 34.88, "step": 57745, "train_speed(iter/s)": 0.413226 }, { "acc": 0.89666977, "epoch": 1.5636422711396314, "grad_norm": 10.172550201416016, "learning_rate": 7.14549305513734e-06, "loss": 0.53127947, "memory(GiB)": 34.88, "step": 57750, "train_speed(iter/s)": 0.413227 }, { "acc": 0.88365879, "epoch": 1.5637776514228467, "grad_norm": 10.33640193939209, "learning_rate": 7.1449875963673706e-06, "loss": 0.66733971, "memory(GiB)": 34.88, "step": 57755, "train_speed(iter/s)": 0.413229 }, { "acc": 0.89856081, "epoch": 1.5639130317060623, "grad_norm": 9.42485237121582, "learning_rate": 7.14448211073348e-06, "loss": 0.49038687, "memory(GiB)": 34.88, "step": 57760, "train_speed(iter/s)": 0.413231 }, { "acc": 0.88267365, "epoch": 1.564048411989278, "grad_norm": 22.264047622680664, "learning_rate": 7.1439765982420004e-06, "loss": 0.66648016, "memory(GiB)": 34.88, "step": 57765, "train_speed(iter/s)": 0.413232 }, { "acc": 0.92054253, "epoch": 1.5641837922724935, "grad_norm": 5.955776214599609, "learning_rate": 7.143471058899265e-06, "loss": 0.36538916, "memory(GiB)": 34.88, "step": 57770, "train_speed(iter/s)": 0.413234 }, { "acc": 0.90053158, "epoch": 1.564319172555709, "grad_norm": 11.411624908447266, "learning_rate": 7.142965492711606e-06, "loss": 0.49999619, "memory(GiB)": 34.88, "step": 57775, "train_speed(iter/s)": 0.413236 }, { "acc": 0.90375299, "epoch": 1.5644545528389244, "grad_norm": 7.006369113922119, "learning_rate": 7.1424598996853565e-06, "loss": 0.50133462, "memory(GiB)": 34.88, "step": 57780, "train_speed(iter/s)": 0.413238 }, { "acc": 0.90276279, "epoch": 1.5645899331221402, "grad_norm": 8.787202835083008, "learning_rate": 7.14195427982685e-06, "loss": 0.5163311, "memory(GiB)": 34.88, "step": 57785, "train_speed(iter/s)": 0.41324 }, { "acc": 0.89469929, "epoch": 1.5647253134053556, "grad_norm": 10.59131145477295, "learning_rate": 7.1414486331424214e-06, "loss": 0.52453089, "memory(GiB)": 34.88, "step": 57790, "train_speed(iter/s)": 0.413241 }, { "acc": 0.90354033, "epoch": 1.5648606936885712, "grad_norm": 9.932279586791992, "learning_rate": 7.140942959638404e-06, "loss": 0.5246057, "memory(GiB)": 34.88, "step": 57795, "train_speed(iter/s)": 0.413243 }, { "acc": 0.90477514, "epoch": 1.5649960739717867, "grad_norm": 10.198776245117188, "learning_rate": 7.140437259321132e-06, "loss": 0.47555485, "memory(GiB)": 34.88, "step": 57800, "train_speed(iter/s)": 0.413244 }, { "acc": 0.90195627, "epoch": 1.5651314542550023, "grad_norm": 31.55680274963379, "learning_rate": 7.139931532196942e-06, "loss": 0.52552137, "memory(GiB)": 34.88, "step": 57805, "train_speed(iter/s)": 0.413246 }, { "acc": 0.89107027, "epoch": 1.565266834538218, "grad_norm": 12.69495677947998, "learning_rate": 7.139425778272167e-06, "loss": 0.59259949, "memory(GiB)": 34.88, "step": 57810, "train_speed(iter/s)": 0.413248 }, { "acc": 0.90216341, "epoch": 1.5654022148214333, "grad_norm": 7.928159713745117, "learning_rate": 7.138919997553143e-06, "loss": 0.52770677, "memory(GiB)": 34.88, "step": 57815, "train_speed(iter/s)": 0.41325 }, { "acc": 0.89050941, "epoch": 1.565537595104649, "grad_norm": 12.835667610168457, "learning_rate": 7.138414190046207e-06, "loss": 0.6051671, "memory(GiB)": 34.88, "step": 57820, "train_speed(iter/s)": 0.413251 }, { "acc": 0.88361311, "epoch": 1.5656729753878644, "grad_norm": 18.560548782348633, "learning_rate": 7.137908355757697e-06, "loss": 0.67107964, "memory(GiB)": 34.88, "step": 57825, "train_speed(iter/s)": 0.413253 }, { "acc": 0.91041155, "epoch": 1.56580835567108, "grad_norm": 7.998758792877197, "learning_rate": 7.137402494693946e-06, "loss": 0.50290146, "memory(GiB)": 34.88, "step": 57830, "train_speed(iter/s)": 0.413254 }, { "acc": 0.90723953, "epoch": 1.5659437359542956, "grad_norm": 11.506051063537598, "learning_rate": 7.136896606861291e-06, "loss": 0.53413763, "memory(GiB)": 34.88, "step": 57835, "train_speed(iter/s)": 0.413256 }, { "acc": 0.9100318, "epoch": 1.5660791162375112, "grad_norm": 11.733346939086914, "learning_rate": 7.1363906922660705e-06, "loss": 0.59169993, "memory(GiB)": 34.88, "step": 57840, "train_speed(iter/s)": 0.413258 }, { "acc": 0.88180523, "epoch": 1.5662144965207268, "grad_norm": 46.36742401123047, "learning_rate": 7.135884750914623e-06, "loss": 0.73753433, "memory(GiB)": 34.88, "step": 57845, "train_speed(iter/s)": 0.413259 }, { "acc": 0.91500912, "epoch": 1.566349876803942, "grad_norm": 13.083791732788086, "learning_rate": 7.135378782813286e-06, "loss": 0.44152417, "memory(GiB)": 34.88, "step": 57850, "train_speed(iter/s)": 0.413261 }, { "acc": 0.91277018, "epoch": 1.566485257087158, "grad_norm": 6.780362129211426, "learning_rate": 7.134872787968396e-06, "loss": 0.49598818, "memory(GiB)": 34.88, "step": 57855, "train_speed(iter/s)": 0.413263 }, { "acc": 0.90252132, "epoch": 1.5666206373703733, "grad_norm": 17.069181442260742, "learning_rate": 7.134366766386295e-06, "loss": 0.47476168, "memory(GiB)": 34.88, "step": 57860, "train_speed(iter/s)": 0.413265 }, { "acc": 0.90515394, "epoch": 1.566756017653589, "grad_norm": 7.189350128173828, "learning_rate": 7.133860718073316e-06, "loss": 0.45552192, "memory(GiB)": 34.88, "step": 57865, "train_speed(iter/s)": 0.413266 }, { "acc": 0.88868179, "epoch": 1.5668913979368044, "grad_norm": 8.820920944213867, "learning_rate": 7.133354643035806e-06, "loss": 0.65053315, "memory(GiB)": 34.88, "step": 57870, "train_speed(iter/s)": 0.413268 }, { "acc": 0.88261547, "epoch": 1.56702677822002, "grad_norm": 9.661173820495605, "learning_rate": 7.132848541280099e-06, "loss": 0.65201311, "memory(GiB)": 34.88, "step": 57875, "train_speed(iter/s)": 0.41327 }, { "acc": 0.88660793, "epoch": 1.5671621585032356, "grad_norm": 7.528846740722656, "learning_rate": 7.132342412812535e-06, "loss": 0.6284627, "memory(GiB)": 34.88, "step": 57880, "train_speed(iter/s)": 0.413271 }, { "acc": 0.88846607, "epoch": 1.5672975387864512, "grad_norm": 8.452458381652832, "learning_rate": 7.131836257639457e-06, "loss": 0.63476553, "memory(GiB)": 34.88, "step": 57885, "train_speed(iter/s)": 0.413273 }, { "acc": 0.90041504, "epoch": 1.5674329190696668, "grad_norm": 14.513092041015625, "learning_rate": 7.131330075767203e-06, "loss": 0.57792563, "memory(GiB)": 34.88, "step": 57890, "train_speed(iter/s)": 0.413275 }, { "acc": 0.90158253, "epoch": 1.567568299352882, "grad_norm": 25.27572250366211, "learning_rate": 7.130823867202117e-06, "loss": 0.62286062, "memory(GiB)": 34.88, "step": 57895, "train_speed(iter/s)": 0.413277 }, { "acc": 0.91074028, "epoch": 1.567703679636098, "grad_norm": 8.248613357543945, "learning_rate": 7.130317631950537e-06, "loss": 0.44049573, "memory(GiB)": 34.88, "step": 57900, "train_speed(iter/s)": 0.413279 }, { "acc": 0.92178764, "epoch": 1.5678390599193133, "grad_norm": 5.820849895477295, "learning_rate": 7.129811370018808e-06, "loss": 0.41790581, "memory(GiB)": 34.88, "step": 57905, "train_speed(iter/s)": 0.41328 }, { "acc": 0.90762482, "epoch": 1.5679744402025289, "grad_norm": 5.497132301330566, "learning_rate": 7.129305081413269e-06, "loss": 0.51957464, "memory(GiB)": 34.88, "step": 57910, "train_speed(iter/s)": 0.413282 }, { "acc": 0.89621277, "epoch": 1.5681098204857444, "grad_norm": 9.439311981201172, "learning_rate": 7.128798766140262e-06, "loss": 0.55295234, "memory(GiB)": 34.88, "step": 57915, "train_speed(iter/s)": 0.413283 }, { "acc": 0.88420324, "epoch": 1.56824520076896, "grad_norm": 18.784894943237305, "learning_rate": 7.1282924242061336e-06, "loss": 0.62323623, "memory(GiB)": 34.88, "step": 57920, "train_speed(iter/s)": 0.413285 }, { "acc": 0.90746403, "epoch": 1.5683805810521756, "grad_norm": 9.197654724121094, "learning_rate": 7.127786055617223e-06, "loss": 0.51298213, "memory(GiB)": 34.88, "step": 57925, "train_speed(iter/s)": 0.413287 }, { "acc": 0.90325069, "epoch": 1.568515961335391, "grad_norm": 5.525975704193115, "learning_rate": 7.127279660379873e-06, "loss": 0.50079393, "memory(GiB)": 34.88, "step": 57930, "train_speed(iter/s)": 0.413288 }, { "acc": 0.89038754, "epoch": 1.5686513416186068, "grad_norm": 19.8284854888916, "learning_rate": 7.126773238500429e-06, "loss": 0.62403946, "memory(GiB)": 34.88, "step": 57935, "train_speed(iter/s)": 0.41329 }, { "acc": 0.92281303, "epoch": 1.5687867219018221, "grad_norm": 20.805524826049805, "learning_rate": 7.126266789985235e-06, "loss": 0.42338486, "memory(GiB)": 34.88, "step": 57940, "train_speed(iter/s)": 0.413292 }, { "acc": 0.89740324, "epoch": 1.568922102185038, "grad_norm": 6.108547687530518, "learning_rate": 7.125760314840635e-06, "loss": 0.51616392, "memory(GiB)": 34.88, "step": 57945, "train_speed(iter/s)": 0.413293 }, { "acc": 0.91988792, "epoch": 1.5690574824682533, "grad_norm": 4.579138278961182, "learning_rate": 7.125253813072973e-06, "loss": 0.41946888, "memory(GiB)": 34.88, "step": 57950, "train_speed(iter/s)": 0.413295 }, { "acc": 0.90235119, "epoch": 1.5691928627514689, "grad_norm": 13.330536842346191, "learning_rate": 7.124747284688597e-06, "loss": 0.57123666, "memory(GiB)": 34.88, "step": 57955, "train_speed(iter/s)": 0.413297 }, { "acc": 0.90769386, "epoch": 1.5693282430346844, "grad_norm": 10.57170295715332, "learning_rate": 7.1242407296938455e-06, "loss": 0.56875677, "memory(GiB)": 34.88, "step": 57960, "train_speed(iter/s)": 0.413299 }, { "acc": 0.89953403, "epoch": 1.5694636233179, "grad_norm": 13.932615280151367, "learning_rate": 7.123734148095071e-06, "loss": 0.58647099, "memory(GiB)": 34.88, "step": 57965, "train_speed(iter/s)": 0.4133 }, { "acc": 0.89577847, "epoch": 1.5695990036011156, "grad_norm": 7.904679775238037, "learning_rate": 7.1232275398986154e-06, "loss": 0.52863598, "memory(GiB)": 34.88, "step": 57970, "train_speed(iter/s)": 0.413301 }, { "acc": 0.91945114, "epoch": 1.569734383884331, "grad_norm": 8.972397804260254, "learning_rate": 7.122720905110827e-06, "loss": 0.46235895, "memory(GiB)": 34.88, "step": 57975, "train_speed(iter/s)": 0.413303 }, { "acc": 0.88957653, "epoch": 1.5698697641675468, "grad_norm": 7.092719554901123, "learning_rate": 7.122214243738049e-06, "loss": 0.56526904, "memory(GiB)": 34.88, "step": 57980, "train_speed(iter/s)": 0.413305 }, { "acc": 0.9038023, "epoch": 1.5700051444507621, "grad_norm": 8.165520668029785, "learning_rate": 7.121707555786633e-06, "loss": 0.56514349, "memory(GiB)": 34.88, "step": 57985, "train_speed(iter/s)": 0.413307 }, { "acc": 0.88895807, "epoch": 1.5701405247339777, "grad_norm": 15.534858703613281, "learning_rate": 7.1212008412629255e-06, "loss": 0.62482953, "memory(GiB)": 34.88, "step": 57990, "train_speed(iter/s)": 0.413308 }, { "acc": 0.90824099, "epoch": 1.5702759050171933, "grad_norm": 6.9460954666137695, "learning_rate": 7.120694100173272e-06, "loss": 0.4725245, "memory(GiB)": 34.88, "step": 57995, "train_speed(iter/s)": 0.41331 }, { "acc": 0.90798864, "epoch": 1.5704112853004089, "grad_norm": 7.429706573486328, "learning_rate": 7.12018733252402e-06, "loss": 0.49816165, "memory(GiB)": 34.88, "step": 58000, "train_speed(iter/s)": 0.413312 }, { "acc": 0.90180864, "epoch": 1.5705466655836244, "grad_norm": 7.024697780609131, "learning_rate": 7.1196805383215194e-06, "loss": 0.51289907, "memory(GiB)": 34.88, "step": 58005, "train_speed(iter/s)": 0.413313 }, { "acc": 0.9120532, "epoch": 1.5706820458668398, "grad_norm": 4.6622209548950195, "learning_rate": 7.11917371757212e-06, "loss": 0.4747529, "memory(GiB)": 34.88, "step": 58010, "train_speed(iter/s)": 0.413315 }, { "acc": 0.88556232, "epoch": 1.5708174261500556, "grad_norm": 8.18095874786377, "learning_rate": 7.1186668702821674e-06, "loss": 0.65696435, "memory(GiB)": 34.88, "step": 58015, "train_speed(iter/s)": 0.413316 }, { "acc": 0.90532341, "epoch": 1.570952806433271, "grad_norm": 8.370077133178711, "learning_rate": 7.118159996458012e-06, "loss": 0.45915833, "memory(GiB)": 34.88, "step": 58020, "train_speed(iter/s)": 0.413318 }, { "acc": 0.91560621, "epoch": 1.5710881867164868, "grad_norm": 5.953598976135254, "learning_rate": 7.117653096106005e-06, "loss": 0.4990315, "memory(GiB)": 34.88, "step": 58025, "train_speed(iter/s)": 0.41332 }, { "acc": 0.8954937, "epoch": 1.5712235669997021, "grad_norm": 8.222143173217773, "learning_rate": 7.117146169232495e-06, "loss": 0.62629414, "memory(GiB)": 34.88, "step": 58030, "train_speed(iter/s)": 0.413321 }, { "acc": 0.89558964, "epoch": 1.5713589472829177, "grad_norm": 6.284947395324707, "learning_rate": 7.116639215843832e-06, "loss": 0.54836502, "memory(GiB)": 34.88, "step": 58035, "train_speed(iter/s)": 0.413323 }, { "acc": 0.91053619, "epoch": 1.5714943275661333, "grad_norm": 4.803878307342529, "learning_rate": 7.116132235946369e-06, "loss": 0.41939344, "memory(GiB)": 34.88, "step": 58040, "train_speed(iter/s)": 0.413325 }, { "acc": 0.90934334, "epoch": 1.5716297078493489, "grad_norm": 7.269163608551025, "learning_rate": 7.115625229546455e-06, "loss": 0.40517645, "memory(GiB)": 34.88, "step": 58045, "train_speed(iter/s)": 0.413327 }, { "acc": 0.88874302, "epoch": 1.5717650881325644, "grad_norm": 8.415183067321777, "learning_rate": 7.115118196650438e-06, "loss": 0.64772539, "memory(GiB)": 34.88, "step": 58050, "train_speed(iter/s)": 0.413328 }, { "acc": 0.88828878, "epoch": 1.5719004684157798, "grad_norm": 8.37650203704834, "learning_rate": 7.1146111372646754e-06, "loss": 0.63720431, "memory(GiB)": 34.88, "step": 58055, "train_speed(iter/s)": 0.41333 }, { "acc": 0.89798126, "epoch": 1.5720358486989956, "grad_norm": 5.504094123840332, "learning_rate": 7.1141040513955176e-06, "loss": 0.57552037, "memory(GiB)": 34.88, "step": 58060, "train_speed(iter/s)": 0.413332 }, { "acc": 0.90323811, "epoch": 1.572171228982211, "grad_norm": 5.902419567108154, "learning_rate": 7.113596939049312e-06, "loss": 0.52166739, "memory(GiB)": 34.88, "step": 58065, "train_speed(iter/s)": 0.413334 }, { "acc": 0.89715614, "epoch": 1.5723066092654265, "grad_norm": 6.678633689880371, "learning_rate": 7.113089800232418e-06, "loss": 0.50086164, "memory(GiB)": 34.88, "step": 58070, "train_speed(iter/s)": 0.413336 }, { "acc": 0.89212742, "epoch": 1.5724419895486421, "grad_norm": 17.39975929260254, "learning_rate": 7.112582634951185e-06, "loss": 0.56796885, "memory(GiB)": 34.88, "step": 58075, "train_speed(iter/s)": 0.413337 }, { "acc": 0.88346329, "epoch": 1.5725773698318577, "grad_norm": 7.166642665863037, "learning_rate": 7.112075443211968e-06, "loss": 0.65447965, "memory(GiB)": 34.88, "step": 58080, "train_speed(iter/s)": 0.413339 }, { "acc": 0.91737995, "epoch": 1.5727127501150733, "grad_norm": 6.044747352600098, "learning_rate": 7.111568225021118e-06, "loss": 0.43845825, "memory(GiB)": 34.88, "step": 58085, "train_speed(iter/s)": 0.41334 }, { "acc": 0.88766117, "epoch": 1.5728481303982886, "grad_norm": 12.696832656860352, "learning_rate": 7.111060980384989e-06, "loss": 0.58450184, "memory(GiB)": 34.88, "step": 58090, "train_speed(iter/s)": 0.413341 }, { "acc": 0.88250599, "epoch": 1.5729835106815044, "grad_norm": 5.769753932952881, "learning_rate": 7.1105537093099385e-06, "loss": 0.60454674, "memory(GiB)": 34.88, "step": 58095, "train_speed(iter/s)": 0.413343 }, { "acc": 0.9223917, "epoch": 1.5731188909647198, "grad_norm": 5.643186569213867, "learning_rate": 7.1100464118023185e-06, "loss": 0.43393974, "memory(GiB)": 34.88, "step": 58100, "train_speed(iter/s)": 0.413345 }, { "acc": 0.9126009, "epoch": 1.5732542712479356, "grad_norm": 6.740299224853516, "learning_rate": 7.109539087868484e-06, "loss": 0.46339149, "memory(GiB)": 34.88, "step": 58105, "train_speed(iter/s)": 0.413347 }, { "acc": 0.90977173, "epoch": 1.573389651531151, "grad_norm": 14.217309951782227, "learning_rate": 7.109031737514791e-06, "loss": 0.53552914, "memory(GiB)": 34.88, "step": 58110, "train_speed(iter/s)": 0.413348 }, { "acc": 0.91131067, "epoch": 1.5735250318143665, "grad_norm": 13.770796775817871, "learning_rate": 7.108524360747595e-06, "loss": 0.49184036, "memory(GiB)": 34.88, "step": 58115, "train_speed(iter/s)": 0.41335 }, { "acc": 0.90144911, "epoch": 1.5736604120975821, "grad_norm": 9.742607116699219, "learning_rate": 7.108016957573251e-06, "loss": 0.48774614, "memory(GiB)": 34.88, "step": 58120, "train_speed(iter/s)": 0.413352 }, { "acc": 0.90685883, "epoch": 1.5737957923807977, "grad_norm": 7.963084697723389, "learning_rate": 7.107509527998115e-06, "loss": 0.52009549, "memory(GiB)": 34.88, "step": 58125, "train_speed(iter/s)": 0.413354 }, { "acc": 0.88868179, "epoch": 1.5739311726640133, "grad_norm": 13.53056812286377, "learning_rate": 7.107002072028547e-06, "loss": 0.69379845, "memory(GiB)": 34.88, "step": 58130, "train_speed(iter/s)": 0.413355 }, { "acc": 0.89248524, "epoch": 1.5740665529472286, "grad_norm": 8.68727970123291, "learning_rate": 7.1064945896708995e-06, "loss": 0.49832258, "memory(GiB)": 34.88, "step": 58135, "train_speed(iter/s)": 0.413357 }, { "acc": 0.89667244, "epoch": 1.5742019332304444, "grad_norm": 9.693385124206543, "learning_rate": 7.105987080931531e-06, "loss": 0.51249671, "memory(GiB)": 34.88, "step": 58140, "train_speed(iter/s)": 0.413359 }, { "acc": 0.9179801, "epoch": 1.5743373135136598, "grad_norm": 19.85952377319336, "learning_rate": 7.1054795458168004e-06, "loss": 0.45715752, "memory(GiB)": 34.88, "step": 58145, "train_speed(iter/s)": 0.413361 }, { "acc": 0.9057373, "epoch": 1.5744726937968754, "grad_norm": 10.648171424865723, "learning_rate": 7.104971984333066e-06, "loss": 0.43732634, "memory(GiB)": 34.88, "step": 58150, "train_speed(iter/s)": 0.413362 }, { "acc": 0.90635395, "epoch": 1.574608074080091, "grad_norm": 25.536693572998047, "learning_rate": 7.104464396486682e-06, "loss": 0.48034015, "memory(GiB)": 34.88, "step": 58155, "train_speed(iter/s)": 0.413364 }, { "acc": 0.89683609, "epoch": 1.5747434543633065, "grad_norm": 10.151410102844238, "learning_rate": 7.103956782284012e-06, "loss": 0.58581924, "memory(GiB)": 34.88, "step": 58160, "train_speed(iter/s)": 0.413366 }, { "acc": 0.89146538, "epoch": 1.5748788346465221, "grad_norm": 11.109755516052246, "learning_rate": 7.103449141731412e-06, "loss": 0.60760665, "memory(GiB)": 34.88, "step": 58165, "train_speed(iter/s)": 0.413367 }, { "acc": 0.90562782, "epoch": 1.5750142149297375, "grad_norm": 7.485194206237793, "learning_rate": 7.10294147483524e-06, "loss": 0.5748929, "memory(GiB)": 34.88, "step": 58170, "train_speed(iter/s)": 0.413369 }, { "acc": 0.91661606, "epoch": 1.5751495952129533, "grad_norm": 8.422056198120117, "learning_rate": 7.10243378160186e-06, "loss": 0.46747894, "memory(GiB)": 34.88, "step": 58175, "train_speed(iter/s)": 0.413371 }, { "acc": 0.91424131, "epoch": 1.5752849754961686, "grad_norm": 6.460788726806641, "learning_rate": 7.1019260620376265e-06, "loss": 0.45368915, "memory(GiB)": 34.88, "step": 58180, "train_speed(iter/s)": 0.413373 }, { "acc": 0.89993906, "epoch": 1.5754203557793844, "grad_norm": 9.566651344299316, "learning_rate": 7.1014183161489036e-06, "loss": 0.56855006, "memory(GiB)": 34.88, "step": 58185, "train_speed(iter/s)": 0.413374 }, { "acc": 0.91069164, "epoch": 1.5755557360625998, "grad_norm": 17.44353485107422, "learning_rate": 7.1009105439420504e-06, "loss": 0.50393391, "memory(GiB)": 34.88, "step": 58190, "train_speed(iter/s)": 0.413376 }, { "acc": 0.86722946, "epoch": 1.5756911163458154, "grad_norm": 9.636297225952148, "learning_rate": 7.100402745423428e-06, "loss": 0.80691242, "memory(GiB)": 34.88, "step": 58195, "train_speed(iter/s)": 0.413378 }, { "acc": 0.90454655, "epoch": 1.575826496629031, "grad_norm": 4.693437576293945, "learning_rate": 7.099894920599396e-06, "loss": 0.52130418, "memory(GiB)": 34.88, "step": 58200, "train_speed(iter/s)": 0.413379 }, { "acc": 0.8985754, "epoch": 1.5759618769122465, "grad_norm": 11.256709098815918, "learning_rate": 7.0993870694763186e-06, "loss": 0.54464817, "memory(GiB)": 34.88, "step": 58205, "train_speed(iter/s)": 0.413381 }, { "acc": 0.88804483, "epoch": 1.5760972571954621, "grad_norm": 6.380012512207031, "learning_rate": 7.098879192060556e-06, "loss": 0.64912672, "memory(GiB)": 34.88, "step": 58210, "train_speed(iter/s)": 0.413382 }, { "acc": 0.93098373, "epoch": 1.5762326374786775, "grad_norm": 7.638372421264648, "learning_rate": 7.098371288358471e-06, "loss": 0.31913519, "memory(GiB)": 34.88, "step": 58215, "train_speed(iter/s)": 0.413384 }, { "acc": 0.90641603, "epoch": 1.5763680177618933, "grad_norm": 18.424924850463867, "learning_rate": 7.097863358376425e-06, "loss": 0.6037158, "memory(GiB)": 34.88, "step": 58220, "train_speed(iter/s)": 0.413385 }, { "acc": 0.88399734, "epoch": 1.5765033980451086, "grad_norm": 24.841249465942383, "learning_rate": 7.097355402120783e-06, "loss": 0.68039932, "memory(GiB)": 34.88, "step": 58225, "train_speed(iter/s)": 0.413387 }, { "acc": 0.89302006, "epoch": 1.5766387783283242, "grad_norm": 11.35782527923584, "learning_rate": 7.096847419597907e-06, "loss": 0.55703163, "memory(GiB)": 34.88, "step": 58230, "train_speed(iter/s)": 0.413389 }, { "acc": 0.90153084, "epoch": 1.5767741586115398, "grad_norm": 6.3278422355651855, "learning_rate": 7.096339410814157e-06, "loss": 0.585956, "memory(GiB)": 34.88, "step": 58235, "train_speed(iter/s)": 0.41339 }, { "acc": 0.88129826, "epoch": 1.5769095388947554, "grad_norm": 7.66481876373291, "learning_rate": 7.095831375775905e-06, "loss": 0.67482195, "memory(GiB)": 34.88, "step": 58240, "train_speed(iter/s)": 0.413392 }, { "acc": 0.89659243, "epoch": 1.577044919177971, "grad_norm": 11.404061317443848, "learning_rate": 7.095323314489506e-06, "loss": 0.6204792, "memory(GiB)": 34.88, "step": 58245, "train_speed(iter/s)": 0.413394 }, { "acc": 0.92088013, "epoch": 1.5771802994611863, "grad_norm": 8.6876802444458, "learning_rate": 7.09481522696133e-06, "loss": 0.44459453, "memory(GiB)": 34.88, "step": 58250, "train_speed(iter/s)": 0.413395 }, { "acc": 0.90223579, "epoch": 1.5773156797444021, "grad_norm": 22.443771362304688, "learning_rate": 7.094307113197741e-06, "loss": 0.5006752, "memory(GiB)": 34.88, "step": 58255, "train_speed(iter/s)": 0.413397 }, { "acc": 0.9239748, "epoch": 1.5774510600276175, "grad_norm": 10.89460563659668, "learning_rate": 7.093798973205104e-06, "loss": 0.43695445, "memory(GiB)": 34.88, "step": 58260, "train_speed(iter/s)": 0.413399 }, { "acc": 0.89046955, "epoch": 1.5775864403108333, "grad_norm": 36.52721405029297, "learning_rate": 7.0932908069897845e-06, "loss": 0.56651211, "memory(GiB)": 34.88, "step": 58265, "train_speed(iter/s)": 0.413401 }, { "acc": 0.90558023, "epoch": 1.5777218205940486, "grad_norm": 5.655633926391602, "learning_rate": 7.092782614558145e-06, "loss": 0.49337978, "memory(GiB)": 34.88, "step": 58270, "train_speed(iter/s)": 0.413402 }, { "acc": 0.89825974, "epoch": 1.5778572008772642, "grad_norm": 9.098846435546875, "learning_rate": 7.092274395916556e-06, "loss": 0.571945, "memory(GiB)": 34.88, "step": 58275, "train_speed(iter/s)": 0.413404 }, { "acc": 0.89840927, "epoch": 1.5779925811604798, "grad_norm": 39.556968688964844, "learning_rate": 7.091766151071382e-06, "loss": 0.56067448, "memory(GiB)": 34.88, "step": 58280, "train_speed(iter/s)": 0.413405 }, { "acc": 0.90956764, "epoch": 1.5781279614436954, "grad_norm": 16.75107765197754, "learning_rate": 7.0912578800289896e-06, "loss": 0.4985539, "memory(GiB)": 34.88, "step": 58285, "train_speed(iter/s)": 0.413407 }, { "acc": 0.89463444, "epoch": 1.578263341726911, "grad_norm": 10.028639793395996, "learning_rate": 7.0907495827957496e-06, "loss": 0.59373102, "memory(GiB)": 34.88, "step": 58290, "train_speed(iter/s)": 0.413409 }, { "acc": 0.87485218, "epoch": 1.5783987220101263, "grad_norm": 7.93367338180542, "learning_rate": 7.090241259378023e-06, "loss": 0.64723015, "memory(GiB)": 34.88, "step": 58295, "train_speed(iter/s)": 0.413411 }, { "acc": 0.91960411, "epoch": 1.5785341022933421, "grad_norm": 17.29037857055664, "learning_rate": 7.089732909782183e-06, "loss": 0.46996574, "memory(GiB)": 34.88, "step": 58300, "train_speed(iter/s)": 0.413412 }, { "acc": 0.91988277, "epoch": 1.5786694825765575, "grad_norm": 3.467787027359009, "learning_rate": 7.089224534014594e-06, "loss": 0.37889056, "memory(GiB)": 34.88, "step": 58305, "train_speed(iter/s)": 0.413414 }, { "acc": 0.91096745, "epoch": 1.578804862859773, "grad_norm": 6.662145614624023, "learning_rate": 7.088716132081627e-06, "loss": 0.46832705, "memory(GiB)": 34.88, "step": 58310, "train_speed(iter/s)": 0.413415 }, { "acc": 0.90560961, "epoch": 1.5789402431429886, "grad_norm": 7.944462299346924, "learning_rate": 7.088207703989647e-06, "loss": 0.5972681, "memory(GiB)": 34.88, "step": 58315, "train_speed(iter/s)": 0.413417 }, { "acc": 0.90417118, "epoch": 1.5790756234262042, "grad_norm": 4.981600284576416, "learning_rate": 7.087699249745027e-06, "loss": 0.49280481, "memory(GiB)": 34.88, "step": 58320, "train_speed(iter/s)": 0.413419 }, { "acc": 0.91685762, "epoch": 1.5792110037094198, "grad_norm": 7.53586483001709, "learning_rate": 7.087190769354136e-06, "loss": 0.43125243, "memory(GiB)": 34.88, "step": 58325, "train_speed(iter/s)": 0.41342 }, { "acc": 0.90089951, "epoch": 1.5793463839926352, "grad_norm": 13.476239204406738, "learning_rate": 7.086682262823344e-06, "loss": 0.56440563, "memory(GiB)": 34.88, "step": 58330, "train_speed(iter/s)": 0.413422 }, { "acc": 0.90512276, "epoch": 1.579481764275851, "grad_norm": 6.142688274383545, "learning_rate": 7.086173730159017e-06, "loss": 0.51982126, "memory(GiB)": 34.88, "step": 58335, "train_speed(iter/s)": 0.413424 }, { "acc": 0.91070881, "epoch": 1.5796171445590663, "grad_norm": 7.615496635437012, "learning_rate": 7.08566517136753e-06, "loss": 0.51683698, "memory(GiB)": 34.88, "step": 58340, "train_speed(iter/s)": 0.413425 }, { "acc": 0.90013475, "epoch": 1.5797525248422821, "grad_norm": 10.247288703918457, "learning_rate": 7.085156586455251e-06, "loss": 0.55165281, "memory(GiB)": 34.88, "step": 58345, "train_speed(iter/s)": 0.413427 }, { "acc": 0.88520813, "epoch": 1.5798879051254975, "grad_norm": 7.330904960632324, "learning_rate": 7.084647975428551e-06, "loss": 0.59204354, "memory(GiB)": 34.88, "step": 58350, "train_speed(iter/s)": 0.413429 }, { "acc": 0.88946342, "epoch": 1.580023285408713, "grad_norm": 9.916772842407227, "learning_rate": 7.0841393382938025e-06, "loss": 0.63999171, "memory(GiB)": 34.88, "step": 58355, "train_speed(iter/s)": 0.41343 }, { "acc": 0.90616188, "epoch": 1.5801586656919286, "grad_norm": 13.280252456665039, "learning_rate": 7.083630675057378e-06, "loss": 0.50258799, "memory(GiB)": 34.88, "step": 58360, "train_speed(iter/s)": 0.413432 }, { "acc": 0.92480106, "epoch": 1.5802940459751442, "grad_norm": 4.033143520355225, "learning_rate": 7.0831219857256485e-06, "loss": 0.38255372, "memory(GiB)": 34.88, "step": 58365, "train_speed(iter/s)": 0.413433 }, { "acc": 0.91499615, "epoch": 1.5804294262583598, "grad_norm": 5.900285243988037, "learning_rate": 7.082613270304987e-06, "loss": 0.42416372, "memory(GiB)": 34.88, "step": 58370, "train_speed(iter/s)": 0.413435 }, { "acc": 0.9222765, "epoch": 1.5805648065415752, "grad_norm": 7.493152141571045, "learning_rate": 7.082104528801764e-06, "loss": 0.46087351, "memory(GiB)": 34.88, "step": 58375, "train_speed(iter/s)": 0.413437 }, { "acc": 0.89034328, "epoch": 1.580700186824791, "grad_norm": 11.42161750793457, "learning_rate": 7.081595761222355e-06, "loss": 0.6156949, "memory(GiB)": 34.88, "step": 58380, "train_speed(iter/s)": 0.413439 }, { "acc": 0.90147314, "epoch": 1.5808355671080063, "grad_norm": 6.428150177001953, "learning_rate": 7.081086967573132e-06, "loss": 0.54208536, "memory(GiB)": 34.88, "step": 58385, "train_speed(iter/s)": 0.41344 }, { "acc": 0.91330414, "epoch": 1.580970947391222, "grad_norm": 3.871117115020752, "learning_rate": 7.080578147860468e-06, "loss": 0.40357141, "memory(GiB)": 34.88, "step": 58390, "train_speed(iter/s)": 0.413442 }, { "acc": 0.92441044, "epoch": 1.5811063276744375, "grad_norm": 8.179709434509277, "learning_rate": 7.080069302090738e-06, "loss": 0.42318435, "memory(GiB)": 34.88, "step": 58395, "train_speed(iter/s)": 0.413444 }, { "acc": 0.90023251, "epoch": 1.581241707957653, "grad_norm": 5.593101501464844, "learning_rate": 7.079560430270316e-06, "loss": 0.57092404, "memory(GiB)": 34.88, "step": 58400, "train_speed(iter/s)": 0.413445 }, { "acc": 0.89936485, "epoch": 1.5813770882408686, "grad_norm": 7.735348224639893, "learning_rate": 7.0790515324055785e-06, "loss": 0.57605572, "memory(GiB)": 34.88, "step": 58405, "train_speed(iter/s)": 0.413447 }, { "acc": 0.9125927, "epoch": 1.581512468524084, "grad_norm": 8.449860572814941, "learning_rate": 7.078542608502897e-06, "loss": 0.51282024, "memory(GiB)": 34.88, "step": 58410, "train_speed(iter/s)": 0.413449 }, { "acc": 0.85569534, "epoch": 1.5816478488072998, "grad_norm": 10.470731735229492, "learning_rate": 7.078033658568649e-06, "loss": 0.78178825, "memory(GiB)": 34.88, "step": 58415, "train_speed(iter/s)": 0.413451 }, { "acc": 0.89602299, "epoch": 1.5817832290905152, "grad_norm": 5.0199408531188965, "learning_rate": 7.077524682609209e-06, "loss": 0.54905138, "memory(GiB)": 34.88, "step": 58420, "train_speed(iter/s)": 0.413453 }, { "acc": 0.9199707, "epoch": 1.581918609373731, "grad_norm": 8.031291007995605, "learning_rate": 7.077015680630955e-06, "loss": 0.36820793, "memory(GiB)": 34.88, "step": 58425, "train_speed(iter/s)": 0.413454 }, { "acc": 0.90847368, "epoch": 1.5820539896569463, "grad_norm": 9.578864097595215, "learning_rate": 7.07650665264026e-06, "loss": 0.47748919, "memory(GiB)": 34.88, "step": 58430, "train_speed(iter/s)": 0.413456 }, { "acc": 0.89448128, "epoch": 1.582189369940162, "grad_norm": 10.87799072265625, "learning_rate": 7.075997598643503e-06, "loss": 0.48539596, "memory(GiB)": 34.88, "step": 58435, "train_speed(iter/s)": 0.413458 }, { "acc": 0.90578403, "epoch": 1.5823247502233775, "grad_norm": 8.19953441619873, "learning_rate": 7.0754885186470615e-06, "loss": 0.50190787, "memory(GiB)": 34.88, "step": 58440, "train_speed(iter/s)": 0.413459 }, { "acc": 0.89801006, "epoch": 1.582460130506593, "grad_norm": 16.053041458129883, "learning_rate": 7.074979412657309e-06, "loss": 0.58558102, "memory(GiB)": 34.88, "step": 58445, "train_speed(iter/s)": 0.413461 }, { "acc": 0.90255909, "epoch": 1.5825955107898086, "grad_norm": 8.139864921569824, "learning_rate": 7.074470280680628e-06, "loss": 0.61726089, "memory(GiB)": 34.88, "step": 58450, "train_speed(iter/s)": 0.413463 }, { "acc": 0.89583511, "epoch": 1.582730891073024, "grad_norm": 9.98554515838623, "learning_rate": 7.073961122723392e-06, "loss": 0.59721742, "memory(GiB)": 34.88, "step": 58455, "train_speed(iter/s)": 0.413465 }, { "acc": 0.8985899, "epoch": 1.5828662713562398, "grad_norm": 4.999712944030762, "learning_rate": 7.0734519387919835e-06, "loss": 0.55473428, "memory(GiB)": 34.88, "step": 58460, "train_speed(iter/s)": 0.413466 }, { "acc": 0.91203403, "epoch": 1.5830016516394552, "grad_norm": 6.35076379776001, "learning_rate": 7.072942728892777e-06, "loss": 0.50026274, "memory(GiB)": 34.88, "step": 58465, "train_speed(iter/s)": 0.413468 }, { "acc": 0.91095037, "epoch": 1.5831370319226707, "grad_norm": 4.848783493041992, "learning_rate": 7.072433493032151e-06, "loss": 0.40573702, "memory(GiB)": 34.88, "step": 58470, "train_speed(iter/s)": 0.41347 }, { "acc": 0.90152187, "epoch": 1.5832724122058863, "grad_norm": 73.48008728027344, "learning_rate": 7.071924231216489e-06, "loss": 0.54700298, "memory(GiB)": 34.88, "step": 58475, "train_speed(iter/s)": 0.413471 }, { "acc": 0.91529961, "epoch": 1.583407792489102, "grad_norm": 8.618577003479004, "learning_rate": 7.071414943452168e-06, "loss": 0.42834029, "memory(GiB)": 34.88, "step": 58480, "train_speed(iter/s)": 0.413473 }, { "acc": 0.92865763, "epoch": 1.5835431727723175, "grad_norm": 6.628276824951172, "learning_rate": 7.070905629745567e-06, "loss": 0.40252175, "memory(GiB)": 34.88, "step": 58485, "train_speed(iter/s)": 0.413475 }, { "acc": 0.91381025, "epoch": 1.5836785530555328, "grad_norm": 9.35848617553711, "learning_rate": 7.070396290103068e-06, "loss": 0.41927547, "memory(GiB)": 34.88, "step": 58490, "train_speed(iter/s)": 0.413476 }, { "acc": 0.91825581, "epoch": 1.5838139333387486, "grad_norm": 4.838957786560059, "learning_rate": 7.069886924531051e-06, "loss": 0.44793825, "memory(GiB)": 34.88, "step": 58495, "train_speed(iter/s)": 0.413478 }, { "acc": 0.91056976, "epoch": 1.583949313621964, "grad_norm": 9.56308364868164, "learning_rate": 7.069377533035896e-06, "loss": 0.53480644, "memory(GiB)": 34.88, "step": 58500, "train_speed(iter/s)": 0.41348 }, { "acc": 0.90714779, "epoch": 1.5840846939051798, "grad_norm": 8.461941719055176, "learning_rate": 7.068868115623984e-06, "loss": 0.55588078, "memory(GiB)": 34.88, "step": 58505, "train_speed(iter/s)": 0.413481 }, { "acc": 0.92557011, "epoch": 1.5842200741883952, "grad_norm": 4.7471232414245605, "learning_rate": 7.0683586723016995e-06, "loss": 0.40816522, "memory(GiB)": 34.88, "step": 58510, "train_speed(iter/s)": 0.413483 }, { "acc": 0.91396179, "epoch": 1.5843554544716107, "grad_norm": 23.681795120239258, "learning_rate": 7.067849203075418e-06, "loss": 0.51426067, "memory(GiB)": 34.88, "step": 58515, "train_speed(iter/s)": 0.413485 }, { "acc": 0.90566454, "epoch": 1.5844908347548263, "grad_norm": 8.574084281921387, "learning_rate": 7.067339707951528e-06, "loss": 0.51666584, "memory(GiB)": 34.88, "step": 58520, "train_speed(iter/s)": 0.413486 }, { "acc": 0.89082069, "epoch": 1.584626215038042, "grad_norm": 6.690738201141357, "learning_rate": 7.066830186936408e-06, "loss": 0.59484491, "memory(GiB)": 34.88, "step": 58525, "train_speed(iter/s)": 0.413488 }, { "acc": 0.90632582, "epoch": 1.5847615953212575, "grad_norm": 10.508533477783203, "learning_rate": 7.066320640036443e-06, "loss": 0.53455105, "memory(GiB)": 34.88, "step": 58530, "train_speed(iter/s)": 0.41349 }, { "acc": 0.90510788, "epoch": 1.5848969756044728, "grad_norm": 7.934365272521973, "learning_rate": 7.065811067258014e-06, "loss": 0.4828124, "memory(GiB)": 34.88, "step": 58535, "train_speed(iter/s)": 0.413491 }, { "acc": 0.89512634, "epoch": 1.5850323558876886, "grad_norm": 7.265182018280029, "learning_rate": 7.065301468607506e-06, "loss": 0.49758549, "memory(GiB)": 34.88, "step": 58540, "train_speed(iter/s)": 0.413493 }, { "acc": 0.89643116, "epoch": 1.585167736170904, "grad_norm": 6.112176895141602, "learning_rate": 7.064791844091304e-06, "loss": 0.60681443, "memory(GiB)": 34.88, "step": 58545, "train_speed(iter/s)": 0.413495 }, { "acc": 0.89720774, "epoch": 1.5853031164541196, "grad_norm": 6.960626125335693, "learning_rate": 7.064282193715789e-06, "loss": 0.53844538, "memory(GiB)": 34.88, "step": 58550, "train_speed(iter/s)": 0.413496 }, { "acc": 0.90517769, "epoch": 1.5854384967373352, "grad_norm": 6.704861164093018, "learning_rate": 7.0637725174873465e-06, "loss": 0.47582674, "memory(GiB)": 34.88, "step": 58555, "train_speed(iter/s)": 0.413498 }, { "acc": 0.91520309, "epoch": 1.5855738770205507, "grad_norm": 10.013155937194824, "learning_rate": 7.06326281541236e-06, "loss": 0.55916519, "memory(GiB)": 34.88, "step": 58560, "train_speed(iter/s)": 0.4135 }, { "acc": 0.91259689, "epoch": 1.5857092573037663, "grad_norm": 5.023841857910156, "learning_rate": 7.0627530874972195e-06, "loss": 0.49984179, "memory(GiB)": 34.88, "step": 58565, "train_speed(iter/s)": 0.413501 }, { "acc": 0.91790848, "epoch": 1.5858446375869817, "grad_norm": 3.7880821228027344, "learning_rate": 7.062243333748304e-06, "loss": 0.39780006, "memory(GiB)": 34.88, "step": 58570, "train_speed(iter/s)": 0.413503 }, { "acc": 0.88925152, "epoch": 1.5859800178701975, "grad_norm": 6.936983108520508, "learning_rate": 7.061733554172002e-06, "loss": 0.59776993, "memory(GiB)": 34.88, "step": 58575, "train_speed(iter/s)": 0.413504 }, { "acc": 0.89971275, "epoch": 1.5861153981534128, "grad_norm": 13.669027328491211, "learning_rate": 7.0612237487747e-06, "loss": 0.49000583, "memory(GiB)": 34.88, "step": 58580, "train_speed(iter/s)": 0.413506 }, { "acc": 0.90528879, "epoch": 1.5862507784366287, "grad_norm": 6.64530611038208, "learning_rate": 7.0607139175627835e-06, "loss": 0.54198093, "memory(GiB)": 34.88, "step": 58585, "train_speed(iter/s)": 0.413508 }, { "acc": 0.90402527, "epoch": 1.586386158719844, "grad_norm": 5.84839391708374, "learning_rate": 7.060204060542641e-06, "loss": 0.49189739, "memory(GiB)": 34.88, "step": 58590, "train_speed(iter/s)": 0.413509 }, { "acc": 0.9106308, "epoch": 1.5865215390030596, "grad_norm": 5.282186031341553, "learning_rate": 7.0596941777206555e-06, "loss": 0.42072849, "memory(GiB)": 34.88, "step": 58595, "train_speed(iter/s)": 0.413511 }, { "acc": 0.91265478, "epoch": 1.5866569192862752, "grad_norm": 10.757914543151855, "learning_rate": 7.059184269103218e-06, "loss": 0.47914734, "memory(GiB)": 34.88, "step": 58600, "train_speed(iter/s)": 0.413513 }, { "acc": 0.88665266, "epoch": 1.5867922995694907, "grad_norm": 9.59129810333252, "learning_rate": 7.058674334696714e-06, "loss": 0.65744958, "memory(GiB)": 34.88, "step": 58605, "train_speed(iter/s)": 0.413514 }, { "acc": 0.88348885, "epoch": 1.5869276798527063, "grad_norm": 7.992222785949707, "learning_rate": 7.058164374507534e-06, "loss": 0.66351004, "memory(GiB)": 34.88, "step": 58610, "train_speed(iter/s)": 0.413516 }, { "acc": 0.89838905, "epoch": 1.5870630601359217, "grad_norm": 16.829326629638672, "learning_rate": 7.0576543885420625e-06, "loss": 0.56009932, "memory(GiB)": 34.88, "step": 58615, "train_speed(iter/s)": 0.413518 }, { "acc": 0.91040115, "epoch": 1.5871984404191375, "grad_norm": 7.991108417510986, "learning_rate": 7.0571443768066885e-06, "loss": 0.4423666, "memory(GiB)": 34.88, "step": 58620, "train_speed(iter/s)": 0.413519 }, { "acc": 0.90484295, "epoch": 1.5873338207023528, "grad_norm": 7.852595329284668, "learning_rate": 7.056634339307806e-06, "loss": 0.38906708, "memory(GiB)": 34.88, "step": 58625, "train_speed(iter/s)": 0.413521 }, { "acc": 0.88547316, "epoch": 1.5874692009855684, "grad_norm": 6.807123184204102, "learning_rate": 7.056124276051798e-06, "loss": 0.61113167, "memory(GiB)": 34.88, "step": 58630, "train_speed(iter/s)": 0.413523 }, { "acc": 0.91029758, "epoch": 1.587604581268784, "grad_norm": 12.541543960571289, "learning_rate": 7.055614187045058e-06, "loss": 0.48650479, "memory(GiB)": 34.88, "step": 58635, "train_speed(iter/s)": 0.413525 }, { "acc": 0.90383549, "epoch": 1.5877399615519996, "grad_norm": 3.6148860454559326, "learning_rate": 7.055104072293974e-06, "loss": 0.59757843, "memory(GiB)": 34.88, "step": 58640, "train_speed(iter/s)": 0.413526 }, { "acc": 0.90709457, "epoch": 1.5878753418352152, "grad_norm": 16.373779296875, "learning_rate": 7.054593931804937e-06, "loss": 0.49410834, "memory(GiB)": 34.88, "step": 58645, "train_speed(iter/s)": 0.413528 }, { "acc": 0.90615969, "epoch": 1.5880107221184305, "grad_norm": 6.040183067321777, "learning_rate": 7.054083765584336e-06, "loss": 0.45295339, "memory(GiB)": 34.88, "step": 58650, "train_speed(iter/s)": 0.41353 }, { "acc": 0.89490414, "epoch": 1.5881461024016463, "grad_norm": 9.812825202941895, "learning_rate": 7.053573573638565e-06, "loss": 0.53278303, "memory(GiB)": 34.88, "step": 58655, "train_speed(iter/s)": 0.413531 }, { "acc": 0.91588726, "epoch": 1.5882814826848617, "grad_norm": 4.495517730712891, "learning_rate": 7.053063355974011e-06, "loss": 0.48810673, "memory(GiB)": 34.88, "step": 58660, "train_speed(iter/s)": 0.413533 }, { "acc": 0.91368322, "epoch": 1.5884168629680775, "grad_norm": 4.368695259094238, "learning_rate": 7.05255311259707e-06, "loss": 0.44671669, "memory(GiB)": 34.88, "step": 58665, "train_speed(iter/s)": 0.413535 }, { "acc": 0.90566473, "epoch": 1.5885522432512929, "grad_norm": 5.965291500091553, "learning_rate": 7.0520428435141304e-06, "loss": 0.46700926, "memory(GiB)": 34.88, "step": 58670, "train_speed(iter/s)": 0.413537 }, { "acc": 0.89234915, "epoch": 1.5886876235345084, "grad_norm": 4.735486030578613, "learning_rate": 7.051532548731586e-06, "loss": 0.67968969, "memory(GiB)": 34.88, "step": 58675, "train_speed(iter/s)": 0.413539 }, { "acc": 0.90455999, "epoch": 1.588823003817724, "grad_norm": 5.799690246582031, "learning_rate": 7.051022228255828e-06, "loss": 0.50644698, "memory(GiB)": 34.88, "step": 58680, "train_speed(iter/s)": 0.41354 }, { "acc": 0.90518503, "epoch": 1.5889583841009396, "grad_norm": 8.293547630310059, "learning_rate": 7.050511882093249e-06, "loss": 0.53626261, "memory(GiB)": 34.88, "step": 58685, "train_speed(iter/s)": 0.413542 }, { "acc": 0.89584846, "epoch": 1.5890937643841552, "grad_norm": 10.820125579833984, "learning_rate": 7.050001510250243e-06, "loss": 0.66569376, "memory(GiB)": 34.88, "step": 58690, "train_speed(iter/s)": 0.413543 }, { "acc": 0.90566206, "epoch": 1.5892291446673705, "grad_norm": 5.026533126831055, "learning_rate": 7.049491112733204e-06, "loss": 0.51628766, "memory(GiB)": 34.88, "step": 58695, "train_speed(iter/s)": 0.413545 }, { "acc": 0.91191902, "epoch": 1.5893645249505863, "grad_norm": 6.154839515686035, "learning_rate": 7.0489806895485245e-06, "loss": 0.49003987, "memory(GiB)": 34.88, "step": 58700, "train_speed(iter/s)": 0.413546 }, { "acc": 0.90456066, "epoch": 1.5894999052338017, "grad_norm": 18.945083618164062, "learning_rate": 7.0484702407026e-06, "loss": 0.44142027, "memory(GiB)": 34.88, "step": 58705, "train_speed(iter/s)": 0.413548 }, { "acc": 0.90424662, "epoch": 1.5896352855170173, "grad_norm": 5.556673526763916, "learning_rate": 7.047959766201825e-06, "loss": 0.47963362, "memory(GiB)": 34.88, "step": 58710, "train_speed(iter/s)": 0.41355 }, { "acc": 0.8995326, "epoch": 1.5897706658002329, "grad_norm": 7.5565032958984375, "learning_rate": 7.047449266052592e-06, "loss": 0.48317475, "memory(GiB)": 34.88, "step": 58715, "train_speed(iter/s)": 0.413552 }, { "acc": 0.91422129, "epoch": 1.5899060460834484, "grad_norm": 7.4159932136535645, "learning_rate": 7.0469387402612975e-06, "loss": 0.46994815, "memory(GiB)": 34.88, "step": 58720, "train_speed(iter/s)": 0.413553 }, { "acc": 0.90746117, "epoch": 1.590041426366664, "grad_norm": 3.5885298252105713, "learning_rate": 7.046428188834335e-06, "loss": 0.4610703, "memory(GiB)": 34.88, "step": 58725, "train_speed(iter/s)": 0.413555 }, { "acc": 0.87486382, "epoch": 1.5901768066498794, "grad_norm": 14.510520935058594, "learning_rate": 7.045917611778103e-06, "loss": 0.75835834, "memory(GiB)": 34.88, "step": 58730, "train_speed(iter/s)": 0.413556 }, { "acc": 0.90142097, "epoch": 1.5903121869330952, "grad_norm": 5.1592535972595215, "learning_rate": 7.045407009098996e-06, "loss": 0.54651155, "memory(GiB)": 34.88, "step": 58735, "train_speed(iter/s)": 0.413558 }, { "acc": 0.9045496, "epoch": 1.5904475672163105, "grad_norm": 21.79969024658203, "learning_rate": 7.044896380803411e-06, "loss": 0.54294615, "memory(GiB)": 34.88, "step": 58740, "train_speed(iter/s)": 0.413559 }, { "acc": 0.89667931, "epoch": 1.5905829474995263, "grad_norm": 8.778179168701172, "learning_rate": 7.044385726897743e-06, "loss": 0.52175469, "memory(GiB)": 34.88, "step": 58745, "train_speed(iter/s)": 0.413561 }, { "acc": 0.89877739, "epoch": 1.5907183277827417, "grad_norm": 11.22522258758545, "learning_rate": 7.043875047388392e-06, "loss": 0.53077497, "memory(GiB)": 34.88, "step": 58750, "train_speed(iter/s)": 0.413563 }, { "acc": 0.87961712, "epoch": 1.5908537080659573, "grad_norm": 6.078620433807373, "learning_rate": 7.043364342281751e-06, "loss": 0.62752361, "memory(GiB)": 34.88, "step": 58755, "train_speed(iter/s)": 0.413565 }, { "acc": 0.87721424, "epoch": 1.5909890883491729, "grad_norm": 34.9892463684082, "learning_rate": 7.042853611584222e-06, "loss": 0.67064519, "memory(GiB)": 34.88, "step": 58760, "train_speed(iter/s)": 0.413566 }, { "acc": 0.91182995, "epoch": 1.5911244686323884, "grad_norm": 8.903947830200195, "learning_rate": 7.042342855302199e-06, "loss": 0.56121807, "memory(GiB)": 34.88, "step": 58765, "train_speed(iter/s)": 0.413568 }, { "acc": 0.88520985, "epoch": 1.591259848915604, "grad_norm": 5.535277843475342, "learning_rate": 7.041832073442082e-06, "loss": 0.62915583, "memory(GiB)": 34.88, "step": 58770, "train_speed(iter/s)": 0.41357 }, { "acc": 0.90405025, "epoch": 1.5913952291988194, "grad_norm": 7.5036492347717285, "learning_rate": 7.04132126601027e-06, "loss": 0.58709188, "memory(GiB)": 34.88, "step": 58775, "train_speed(iter/s)": 0.413571 }, { "acc": 0.90395575, "epoch": 1.5915306094820352, "grad_norm": 10.233588218688965, "learning_rate": 7.0408104330131625e-06, "loss": 0.61531496, "memory(GiB)": 34.88, "step": 58780, "train_speed(iter/s)": 0.413573 }, { "acc": 0.89956684, "epoch": 1.5916659897652505, "grad_norm": 11.76573657989502, "learning_rate": 7.040299574457157e-06, "loss": 0.5762886, "memory(GiB)": 34.88, "step": 58785, "train_speed(iter/s)": 0.413575 }, { "acc": 0.89607286, "epoch": 1.5918013700484661, "grad_norm": 52.744876861572266, "learning_rate": 7.039788690348651e-06, "loss": 0.53314648, "memory(GiB)": 34.88, "step": 58790, "train_speed(iter/s)": 0.413576 }, { "acc": 0.88981647, "epoch": 1.5919367503316817, "grad_norm": 13.031185150146484, "learning_rate": 7.039277780694051e-06, "loss": 0.6662499, "memory(GiB)": 34.88, "step": 58795, "train_speed(iter/s)": 0.413578 }, { "acc": 0.90469875, "epoch": 1.5920721306148973, "grad_norm": 6.7801289558410645, "learning_rate": 7.038766845499751e-06, "loss": 0.55269966, "memory(GiB)": 34.88, "step": 58800, "train_speed(iter/s)": 0.41358 }, { "acc": 0.8910717, "epoch": 1.5922075108981129, "grad_norm": 16.199993133544922, "learning_rate": 7.038255884772154e-06, "loss": 0.6700943, "memory(GiB)": 34.88, "step": 58805, "train_speed(iter/s)": 0.413582 }, { "acc": 0.91553822, "epoch": 1.5923428911813282, "grad_norm": 16.933427810668945, "learning_rate": 7.037744898517661e-06, "loss": 0.4196454, "memory(GiB)": 34.88, "step": 58810, "train_speed(iter/s)": 0.413583 }, { "acc": 0.90689402, "epoch": 1.592478271464544, "grad_norm": 31.377246856689453, "learning_rate": 7.037233886742673e-06, "loss": 0.47387114, "memory(GiB)": 34.88, "step": 58815, "train_speed(iter/s)": 0.413585 }, { "acc": 0.88332348, "epoch": 1.5926136517477594, "grad_norm": 14.332533836364746, "learning_rate": 7.036722849453589e-06, "loss": 0.57917624, "memory(GiB)": 34.88, "step": 58820, "train_speed(iter/s)": 0.413587 }, { "acc": 0.89718685, "epoch": 1.592749032030975, "grad_norm": 8.917679786682129, "learning_rate": 7.036211786656813e-06, "loss": 0.42041645, "memory(GiB)": 34.88, "step": 58825, "train_speed(iter/s)": 0.413589 }, { "acc": 0.90211143, "epoch": 1.5928844123141905, "grad_norm": 11.433667182922363, "learning_rate": 7.035700698358747e-06, "loss": 0.5169663, "memory(GiB)": 34.88, "step": 58830, "train_speed(iter/s)": 0.41359 }, { "acc": 0.90698462, "epoch": 1.5930197925974061, "grad_norm": 7.2579755783081055, "learning_rate": 7.035189584565794e-06, "loss": 0.46391096, "memory(GiB)": 34.88, "step": 58835, "train_speed(iter/s)": 0.413592 }, { "acc": 0.918577, "epoch": 1.5931551728806217, "grad_norm": 9.340530395507812, "learning_rate": 7.034678445284354e-06, "loss": 0.39158556, "memory(GiB)": 34.88, "step": 58840, "train_speed(iter/s)": 0.413594 }, { "acc": 0.89651108, "epoch": 1.593290553163837, "grad_norm": 8.515239715576172, "learning_rate": 7.034167280520834e-06, "loss": 0.56868787, "memory(GiB)": 34.88, "step": 58845, "train_speed(iter/s)": 0.413595 }, { "acc": 0.8841773, "epoch": 1.5934259334470529, "grad_norm": 12.785091400146484, "learning_rate": 7.033656090281633e-06, "loss": 0.60416365, "memory(GiB)": 34.88, "step": 58850, "train_speed(iter/s)": 0.413597 }, { "acc": 0.89841423, "epoch": 1.5935613137302682, "grad_norm": 8.482842445373535, "learning_rate": 7.033144874573159e-06, "loss": 0.52861834, "memory(GiB)": 34.88, "step": 58855, "train_speed(iter/s)": 0.413599 }, { "acc": 0.91652327, "epoch": 1.593696694013484, "grad_norm": 7.429754734039307, "learning_rate": 7.032633633401812e-06, "loss": 0.41066308, "memory(GiB)": 34.88, "step": 58860, "train_speed(iter/s)": 0.4136 }, { "acc": 0.86055479, "epoch": 1.5938320742966994, "grad_norm": 11.701476097106934, "learning_rate": 7.0321223667739975e-06, "loss": 0.7698216, "memory(GiB)": 34.88, "step": 58865, "train_speed(iter/s)": 0.413602 }, { "acc": 0.90040054, "epoch": 1.593967454579915, "grad_norm": 11.570414543151855, "learning_rate": 7.031611074696122e-06, "loss": 0.51237221, "memory(GiB)": 34.88, "step": 58870, "train_speed(iter/s)": 0.413604 }, { "acc": 0.91595764, "epoch": 1.5941028348631305, "grad_norm": 10.313613891601562, "learning_rate": 7.03109975717459e-06, "loss": 0.45297017, "memory(GiB)": 34.88, "step": 58875, "train_speed(iter/s)": 0.413605 }, { "acc": 0.90115891, "epoch": 1.5942382151463461, "grad_norm": 20.186439514160156, "learning_rate": 7.030588414215805e-06, "loss": 0.5525125, "memory(GiB)": 34.88, "step": 58880, "train_speed(iter/s)": 0.413607 }, { "acc": 0.90219212, "epoch": 1.5943735954295617, "grad_norm": 8.387106895446777, "learning_rate": 7.030077045826173e-06, "loss": 0.50164075, "memory(GiB)": 34.88, "step": 58885, "train_speed(iter/s)": 0.413608 }, { "acc": 0.91426563, "epoch": 1.594508975712777, "grad_norm": 7.89110803604126, "learning_rate": 7.0295656520121015e-06, "loss": 0.36607771, "memory(GiB)": 34.88, "step": 58890, "train_speed(iter/s)": 0.41361 }, { "acc": 0.91239491, "epoch": 1.5946443559959929, "grad_norm": 6.918483257293701, "learning_rate": 7.029054232779995e-06, "loss": 0.45994773, "memory(GiB)": 34.88, "step": 58895, "train_speed(iter/s)": 0.413612 }, { "acc": 0.89520931, "epoch": 1.5947797362792082, "grad_norm": 9.012516021728516, "learning_rate": 7.028542788136261e-06, "loss": 0.54619064, "memory(GiB)": 34.88, "step": 58900, "train_speed(iter/s)": 0.413613 }, { "acc": 0.92461376, "epoch": 1.5949151165624238, "grad_norm": 4.487972259521484, "learning_rate": 7.028031318087305e-06, "loss": 0.42193451, "memory(GiB)": 34.88, "step": 58905, "train_speed(iter/s)": 0.413615 }, { "acc": 0.89876556, "epoch": 1.5950504968456394, "grad_norm": 4.349414348602295, "learning_rate": 7.0275198226395355e-06, "loss": 0.49626613, "memory(GiB)": 34.88, "step": 58910, "train_speed(iter/s)": 0.413617 }, { "acc": 0.88775272, "epoch": 1.595185877128855, "grad_norm": 7.166146755218506, "learning_rate": 7.02700830179936e-06, "loss": 0.5562974, "memory(GiB)": 34.88, "step": 58915, "train_speed(iter/s)": 0.413618 }, { "acc": 0.87877235, "epoch": 1.5953212574120705, "grad_norm": 10.434663772583008, "learning_rate": 7.026496755573185e-06, "loss": 0.63774085, "memory(GiB)": 34.88, "step": 58920, "train_speed(iter/s)": 0.41362 }, { "acc": 0.91077271, "epoch": 1.595456637695286, "grad_norm": 6.809765338897705, "learning_rate": 7.025985183967422e-06, "loss": 0.49376731, "memory(GiB)": 34.88, "step": 58925, "train_speed(iter/s)": 0.413622 }, { "acc": 0.897791, "epoch": 1.5955920179785017, "grad_norm": 12.29487419128418, "learning_rate": 7.0254735869884735e-06, "loss": 0.59855976, "memory(GiB)": 34.88, "step": 58930, "train_speed(iter/s)": 0.413623 }, { "acc": 0.91655207, "epoch": 1.595727398261717, "grad_norm": 5.494542598724365, "learning_rate": 7.024961964642755e-06, "loss": 0.43002253, "memory(GiB)": 34.88, "step": 58935, "train_speed(iter/s)": 0.413625 }, { "acc": 0.90903234, "epoch": 1.5958627785449329, "grad_norm": 6.4689741134643555, "learning_rate": 7.024450316936672e-06, "loss": 0.47712288, "memory(GiB)": 34.88, "step": 58940, "train_speed(iter/s)": 0.413627 }, { "acc": 0.899821, "epoch": 1.5959981588281482, "grad_norm": 10.29967212677002, "learning_rate": 7.023938643876634e-06, "loss": 0.57070599, "memory(GiB)": 34.88, "step": 58945, "train_speed(iter/s)": 0.413628 }, { "acc": 0.90786152, "epoch": 1.5961335391113638, "grad_norm": 8.425931930541992, "learning_rate": 7.023426945469047e-06, "loss": 0.52849441, "memory(GiB)": 34.88, "step": 58950, "train_speed(iter/s)": 0.413629 }, { "acc": 0.89702158, "epoch": 1.5962689193945794, "grad_norm": 18.047887802124023, "learning_rate": 7.022915221720329e-06, "loss": 0.52714558, "memory(GiB)": 34.88, "step": 58955, "train_speed(iter/s)": 0.413631 }, { "acc": 0.88882284, "epoch": 1.596404299677795, "grad_norm": 12.550436019897461, "learning_rate": 7.022403472636886e-06, "loss": 0.62349944, "memory(GiB)": 34.88, "step": 58960, "train_speed(iter/s)": 0.413633 }, { "acc": 0.91781721, "epoch": 1.5965396799610105, "grad_norm": 6.409839630126953, "learning_rate": 7.0218916982251285e-06, "loss": 0.4763238, "memory(GiB)": 34.88, "step": 58965, "train_speed(iter/s)": 0.413634 }, { "acc": 0.89758186, "epoch": 1.596675060244226, "grad_norm": 7.714928150177002, "learning_rate": 7.021379898491468e-06, "loss": 0.5095726, "memory(GiB)": 34.88, "step": 58970, "train_speed(iter/s)": 0.413636 }, { "acc": 0.90071888, "epoch": 1.5968104405274417, "grad_norm": 5.6246724128723145, "learning_rate": 7.020868073442315e-06, "loss": 0.52885876, "memory(GiB)": 34.88, "step": 58975, "train_speed(iter/s)": 0.413637 }, { "acc": 0.91296101, "epoch": 1.596945820810657, "grad_norm": 10.17098331451416, "learning_rate": 7.020356223084084e-06, "loss": 0.45490818, "memory(GiB)": 34.88, "step": 58980, "train_speed(iter/s)": 0.413639 }, { "acc": 0.89484396, "epoch": 1.5970812010938726, "grad_norm": 12.329522132873535, "learning_rate": 7.019844347423181e-06, "loss": 0.55376244, "memory(GiB)": 34.88, "step": 58985, "train_speed(iter/s)": 0.413641 }, { "acc": 0.88578129, "epoch": 1.5972165813770882, "grad_norm": 7.759327411651611, "learning_rate": 7.0193324464660266e-06, "loss": 0.60445299, "memory(GiB)": 34.88, "step": 58990, "train_speed(iter/s)": 0.413642 }, { "acc": 0.91574211, "epoch": 1.5973519616603038, "grad_norm": 6.425750255584717, "learning_rate": 7.018820520219025e-06, "loss": 0.42007179, "memory(GiB)": 34.88, "step": 58995, "train_speed(iter/s)": 0.413644 }, { "acc": 0.89539289, "epoch": 1.5974873419435194, "grad_norm": 8.919963836669922, "learning_rate": 7.018308568688595e-06, "loss": 0.55606761, "memory(GiB)": 34.88, "step": 59000, "train_speed(iter/s)": 0.413645 }, { "acc": 0.90565462, "epoch": 1.5976227222267347, "grad_norm": 6.419107437133789, "learning_rate": 7.017796591881147e-06, "loss": 0.54231081, "memory(GiB)": 34.88, "step": 59005, "train_speed(iter/s)": 0.413647 }, { "acc": 0.91309805, "epoch": 1.5977581025099505, "grad_norm": 13.088754653930664, "learning_rate": 7.017284589803095e-06, "loss": 0.42831297, "memory(GiB)": 34.88, "step": 59010, "train_speed(iter/s)": 0.413649 }, { "acc": 0.89436665, "epoch": 1.597893482793166, "grad_norm": 7.94718074798584, "learning_rate": 7.016772562460854e-06, "loss": 0.56623468, "memory(GiB)": 34.88, "step": 59015, "train_speed(iter/s)": 0.41365 }, { "acc": 0.88983259, "epoch": 1.5980288630763817, "grad_norm": 8.993005752563477, "learning_rate": 7.016260509860836e-06, "loss": 0.60448322, "memory(GiB)": 34.88, "step": 59020, "train_speed(iter/s)": 0.413652 }, { "acc": 0.89827585, "epoch": 1.598164243359597, "grad_norm": 9.651643753051758, "learning_rate": 7.015748432009457e-06, "loss": 0.53993244, "memory(GiB)": 34.88, "step": 59025, "train_speed(iter/s)": 0.413653 }, { "acc": 0.89941692, "epoch": 1.5982996236428126, "grad_norm": 7.745540618896484, "learning_rate": 7.015236328913132e-06, "loss": 0.5689249, "memory(GiB)": 34.88, "step": 59030, "train_speed(iter/s)": 0.413655 }, { "acc": 0.93027258, "epoch": 1.5984350039260282, "grad_norm": 6.90828800201416, "learning_rate": 7.014724200578274e-06, "loss": 0.32210274, "memory(GiB)": 34.88, "step": 59035, "train_speed(iter/s)": 0.413657 }, { "acc": 0.90629454, "epoch": 1.5985703842092438, "grad_norm": 14.039422035217285, "learning_rate": 7.014212047011302e-06, "loss": 0.41577225, "memory(GiB)": 34.88, "step": 59040, "train_speed(iter/s)": 0.413659 }, { "acc": 0.90275135, "epoch": 1.5987057644924594, "grad_norm": 8.439313888549805, "learning_rate": 7.01369986821863e-06, "loss": 0.49698668, "memory(GiB)": 34.88, "step": 59045, "train_speed(iter/s)": 0.41366 }, { "acc": 0.89334955, "epoch": 1.5988411447756747, "grad_norm": 9.079419136047363, "learning_rate": 7.013187664206672e-06, "loss": 0.5875041, "memory(GiB)": 34.88, "step": 59050, "train_speed(iter/s)": 0.413662 }, { "acc": 0.91797781, "epoch": 1.5989765250588905, "grad_norm": 6.264579772949219, "learning_rate": 7.012675434981847e-06, "loss": 0.38251553, "memory(GiB)": 34.88, "step": 59055, "train_speed(iter/s)": 0.413663 }, { "acc": 0.92070923, "epoch": 1.599111905342106, "grad_norm": 22.125682830810547, "learning_rate": 7.01216318055057e-06, "loss": 0.38528028, "memory(GiB)": 34.88, "step": 59060, "train_speed(iter/s)": 0.413665 }, { "acc": 0.91127596, "epoch": 1.5992472856253215, "grad_norm": 9.459127426147461, "learning_rate": 7.011650900919259e-06, "loss": 0.4622613, "memory(GiB)": 34.88, "step": 59065, "train_speed(iter/s)": 0.413666 }, { "acc": 0.92129936, "epoch": 1.599382665908537, "grad_norm": 15.576851844787598, "learning_rate": 7.01113859609433e-06, "loss": 0.35902128, "memory(GiB)": 34.88, "step": 59070, "train_speed(iter/s)": 0.413668 }, { "acc": 0.89583397, "epoch": 1.5995180461917526, "grad_norm": 13.017657279968262, "learning_rate": 7.010626266082204e-06, "loss": 0.57617769, "memory(GiB)": 34.88, "step": 59075, "train_speed(iter/s)": 0.413669 }, { "acc": 0.91356859, "epoch": 1.5996534264749682, "grad_norm": 9.424967765808105, "learning_rate": 7.010113910889295e-06, "loss": 0.42365913, "memory(GiB)": 34.88, "step": 59080, "train_speed(iter/s)": 0.413671 }, { "acc": 0.89379425, "epoch": 1.5997888067581836, "grad_norm": 4.149187088012695, "learning_rate": 7.009601530522025e-06, "loss": 0.53415189, "memory(GiB)": 34.88, "step": 59085, "train_speed(iter/s)": 0.413673 }, { "acc": 0.91272697, "epoch": 1.5999241870413994, "grad_norm": 7.386567115783691, "learning_rate": 7.009089124986807e-06, "loss": 0.4452291, "memory(GiB)": 34.88, "step": 59090, "train_speed(iter/s)": 0.413675 }, { "acc": 0.90587187, "epoch": 1.6000595673246147, "grad_norm": 4.9959540367126465, "learning_rate": 7.008576694290066e-06, "loss": 0.52757506, "memory(GiB)": 34.88, "step": 59095, "train_speed(iter/s)": 0.413676 }, { "acc": 0.89276733, "epoch": 1.6001949476078305, "grad_norm": 24.578189849853516, "learning_rate": 7.008064238438217e-06, "loss": 0.55115166, "memory(GiB)": 34.88, "step": 59100, "train_speed(iter/s)": 0.413678 }, { "acc": 0.89541645, "epoch": 1.600330327891046, "grad_norm": 17.979736328125, "learning_rate": 7.007551757437682e-06, "loss": 0.54935875, "memory(GiB)": 34.88, "step": 59105, "train_speed(iter/s)": 0.41368 }, { "acc": 0.88171978, "epoch": 1.6004657081742615, "grad_norm": 17.601680755615234, "learning_rate": 7.0070392512948795e-06, "loss": 0.69107561, "memory(GiB)": 34.88, "step": 59110, "train_speed(iter/s)": 0.413681 }, { "acc": 0.89876919, "epoch": 1.600601088457477, "grad_norm": 10.995159149169922, "learning_rate": 7.006526720016229e-06, "loss": 0.62290764, "memory(GiB)": 34.88, "step": 59115, "train_speed(iter/s)": 0.413683 }, { "acc": 0.91001129, "epoch": 1.6007364687406926, "grad_norm": 23.672151565551758, "learning_rate": 7.0060141636081546e-06, "loss": 0.47390895, "memory(GiB)": 34.88, "step": 59120, "train_speed(iter/s)": 0.413685 }, { "acc": 0.90802822, "epoch": 1.6008718490239082, "grad_norm": 7.039827346801758, "learning_rate": 7.0055015820770725e-06, "loss": 0.57906594, "memory(GiB)": 34.88, "step": 59125, "train_speed(iter/s)": 0.413686 }, { "acc": 0.91418953, "epoch": 1.6010072293071236, "grad_norm": 8.231051445007324, "learning_rate": 7.004988975429408e-06, "loss": 0.49694018, "memory(GiB)": 34.88, "step": 59130, "train_speed(iter/s)": 0.413688 }, { "acc": 0.89623051, "epoch": 1.6011426095903394, "grad_norm": 7.703020095825195, "learning_rate": 7.004476343671577e-06, "loss": 0.62506447, "memory(GiB)": 34.88, "step": 59135, "train_speed(iter/s)": 0.41369 }, { "acc": 0.91021614, "epoch": 1.6012779898735547, "grad_norm": 9.893865585327148, "learning_rate": 7.003963686810005e-06, "loss": 0.51400762, "memory(GiB)": 34.88, "step": 59140, "train_speed(iter/s)": 0.413692 }, { "acc": 0.89073162, "epoch": 1.6014133701567703, "grad_norm": 10.979522705078125, "learning_rate": 7.003451004851116e-06, "loss": 0.6041759, "memory(GiB)": 34.88, "step": 59145, "train_speed(iter/s)": 0.413693 }, { "acc": 0.90360632, "epoch": 1.601548750439986, "grad_norm": 11.502339363098145, "learning_rate": 7.002938297801327e-06, "loss": 0.52267985, "memory(GiB)": 34.88, "step": 59150, "train_speed(iter/s)": 0.413695 }, { "acc": 0.90813246, "epoch": 1.6016841307232015, "grad_norm": 7.643923282623291, "learning_rate": 7.002425565667066e-06, "loss": 0.60586128, "memory(GiB)": 34.88, "step": 59155, "train_speed(iter/s)": 0.413697 }, { "acc": 0.88329391, "epoch": 1.601819511006417, "grad_norm": 10.697564125061035, "learning_rate": 7.001912808454753e-06, "loss": 0.60618024, "memory(GiB)": 34.88, "step": 59160, "train_speed(iter/s)": 0.413698 }, { "acc": 0.91862288, "epoch": 1.6019548912896324, "grad_norm": 13.426020622253418, "learning_rate": 7.0014000261708104e-06, "loss": 0.48436356, "memory(GiB)": 34.88, "step": 59165, "train_speed(iter/s)": 0.4137 }, { "acc": 0.89419155, "epoch": 1.6020902715728482, "grad_norm": 39.33853530883789, "learning_rate": 7.0008872188216645e-06, "loss": 0.58906455, "memory(GiB)": 34.88, "step": 59170, "train_speed(iter/s)": 0.413702 }, { "acc": 0.92190237, "epoch": 1.6022256518560636, "grad_norm": 10.421263694763184, "learning_rate": 7.000374386413738e-06, "loss": 0.40615587, "memory(GiB)": 34.88, "step": 59175, "train_speed(iter/s)": 0.413704 }, { "acc": 0.90589314, "epoch": 1.6023610321392794, "grad_norm": 19.11923599243164, "learning_rate": 6.999861528953454e-06, "loss": 0.51529007, "memory(GiB)": 34.88, "step": 59180, "train_speed(iter/s)": 0.413705 }, { "acc": 0.89787998, "epoch": 1.6024964124224947, "grad_norm": 10.602523803710938, "learning_rate": 6.999348646447238e-06, "loss": 0.5458807, "memory(GiB)": 34.88, "step": 59185, "train_speed(iter/s)": 0.413707 }, { "acc": 0.90530224, "epoch": 1.6026317927057103, "grad_norm": 18.60491371154785, "learning_rate": 6.998835738901516e-06, "loss": 0.49039822, "memory(GiB)": 34.88, "step": 59190, "train_speed(iter/s)": 0.413709 }, { "acc": 0.88882179, "epoch": 1.602767172988926, "grad_norm": 10.722511291503906, "learning_rate": 6.998322806322711e-06, "loss": 0.66148877, "memory(GiB)": 34.88, "step": 59195, "train_speed(iter/s)": 0.413711 }, { "acc": 0.9070776, "epoch": 1.6029025532721415, "grad_norm": 6.206470012664795, "learning_rate": 6.997809848717252e-06, "loss": 0.47800736, "memory(GiB)": 34.88, "step": 59200, "train_speed(iter/s)": 0.413712 }, { "acc": 0.9058445, "epoch": 1.603037933555357, "grad_norm": 7.641424655914307, "learning_rate": 6.997296866091561e-06, "loss": 0.47082033, "memory(GiB)": 34.88, "step": 59205, "train_speed(iter/s)": 0.413714 }, { "acc": 0.89468069, "epoch": 1.6031733138385724, "grad_norm": 9.201637268066406, "learning_rate": 6.996783858452067e-06, "loss": 0.53053312, "memory(GiB)": 34.88, "step": 59210, "train_speed(iter/s)": 0.413715 }, { "acc": 0.90812635, "epoch": 1.6033086941217882, "grad_norm": 16.709230422973633, "learning_rate": 6.996270825805192e-06, "loss": 0.46915617, "memory(GiB)": 34.88, "step": 59215, "train_speed(iter/s)": 0.413717 }, { "acc": 0.88957815, "epoch": 1.6034440744050036, "grad_norm": 14.130838394165039, "learning_rate": 6.995757768157367e-06, "loss": 0.52629175, "memory(GiB)": 34.88, "step": 59220, "train_speed(iter/s)": 0.413719 }, { "acc": 0.91214628, "epoch": 1.6035794546882192, "grad_norm": 7.2697649002075195, "learning_rate": 6.9952446855150195e-06, "loss": 0.48156595, "memory(GiB)": 34.88, "step": 59225, "train_speed(iter/s)": 0.41372 }, { "acc": 0.88634605, "epoch": 1.6037148349714347, "grad_norm": 8.56924057006836, "learning_rate": 6.9947315778845716e-06, "loss": 0.54290814, "memory(GiB)": 34.88, "step": 59230, "train_speed(iter/s)": 0.413722 }, { "acc": 0.91291189, "epoch": 1.6038502152546503, "grad_norm": 6.375998020172119, "learning_rate": 6.994218445272458e-06, "loss": 0.40833516, "memory(GiB)": 34.88, "step": 59235, "train_speed(iter/s)": 0.413724 }, { "acc": 0.90594006, "epoch": 1.603985595537866, "grad_norm": 9.879542350769043, "learning_rate": 6.9937052876851004e-06, "loss": 0.5700016, "memory(GiB)": 34.88, "step": 59240, "train_speed(iter/s)": 0.413725 }, { "acc": 0.89698811, "epoch": 1.6041209758210813, "grad_norm": 10.288311004638672, "learning_rate": 6.993192105128932e-06, "loss": 0.49986887, "memory(GiB)": 34.88, "step": 59245, "train_speed(iter/s)": 0.413727 }, { "acc": 0.89218712, "epoch": 1.604256356104297, "grad_norm": 10.997246742248535, "learning_rate": 6.992678897610378e-06, "loss": 0.57373219, "memory(GiB)": 34.88, "step": 59250, "train_speed(iter/s)": 0.413729 }, { "acc": 0.90892038, "epoch": 1.6043917363875124, "grad_norm": 3.32177734375, "learning_rate": 6.992165665135869e-06, "loss": 0.46848259, "memory(GiB)": 34.88, "step": 59255, "train_speed(iter/s)": 0.41373 }, { "acc": 0.8976408, "epoch": 1.6045271166707282, "grad_norm": 9.874151229858398, "learning_rate": 6.991652407711836e-06, "loss": 0.50756865, "memory(GiB)": 34.88, "step": 59260, "train_speed(iter/s)": 0.413731 }, { "acc": 0.9066246, "epoch": 1.6046624969539436, "grad_norm": 4.9042768478393555, "learning_rate": 6.991139125344704e-06, "loss": 0.51011, "memory(GiB)": 34.88, "step": 59265, "train_speed(iter/s)": 0.413733 }, { "acc": 0.90172081, "epoch": 1.6047978772371592, "grad_norm": 4.673561096191406, "learning_rate": 6.990625818040906e-06, "loss": 0.61674123, "memory(GiB)": 34.88, "step": 59270, "train_speed(iter/s)": 0.413735 }, { "acc": 0.90758572, "epoch": 1.6049332575203747, "grad_norm": 9.973684310913086, "learning_rate": 6.990112485806871e-06, "loss": 0.41862369, "memory(GiB)": 34.88, "step": 59275, "train_speed(iter/s)": 0.413736 }, { "acc": 0.89699659, "epoch": 1.6050686378035903, "grad_norm": 20.420724868774414, "learning_rate": 6.989599128649031e-06, "loss": 0.48455343, "memory(GiB)": 34.88, "step": 59280, "train_speed(iter/s)": 0.413738 }, { "acc": 0.9063242, "epoch": 1.605204018086806, "grad_norm": 5.31009578704834, "learning_rate": 6.989085746573815e-06, "loss": 0.50556946, "memory(GiB)": 34.88, "step": 59285, "train_speed(iter/s)": 0.413739 }, { "acc": 0.90675526, "epoch": 1.6053393983700213, "grad_norm": 6.720024585723877, "learning_rate": 6.9885723395876545e-06, "loss": 0.51220388, "memory(GiB)": 34.88, "step": 59290, "train_speed(iter/s)": 0.413741 }, { "acc": 0.92317038, "epoch": 1.605474778653237, "grad_norm": 6.981245040893555, "learning_rate": 6.988058907696984e-06, "loss": 0.42835355, "memory(GiB)": 34.88, "step": 59295, "train_speed(iter/s)": 0.413743 }, { "acc": 0.91669922, "epoch": 1.6056101589364524, "grad_norm": 5.463068008422852, "learning_rate": 6.987545450908231e-06, "loss": 0.37180595, "memory(GiB)": 34.88, "step": 59300, "train_speed(iter/s)": 0.413745 }, { "acc": 0.89256916, "epoch": 1.605745539219668, "grad_norm": 12.721035957336426, "learning_rate": 6.98703196922783e-06, "loss": 0.64664903, "memory(GiB)": 34.88, "step": 59305, "train_speed(iter/s)": 0.413746 }, { "acc": 0.90000143, "epoch": 1.6058809195028836, "grad_norm": 18.80791664123535, "learning_rate": 6.9865184626622106e-06, "loss": 0.50704613, "memory(GiB)": 34.88, "step": 59310, "train_speed(iter/s)": 0.413748 }, { "acc": 0.89971352, "epoch": 1.6060162997860992, "grad_norm": 12.299405097961426, "learning_rate": 6.986004931217809e-06, "loss": 0.59499383, "memory(GiB)": 34.88, "step": 59315, "train_speed(iter/s)": 0.413749 }, { "acc": 0.91405649, "epoch": 1.6061516800693147, "grad_norm": 10.937298774719238, "learning_rate": 6.9854913749010556e-06, "loss": 0.47872052, "memory(GiB)": 34.88, "step": 59320, "train_speed(iter/s)": 0.413751 }, { "acc": 0.89915762, "epoch": 1.60628706035253, "grad_norm": 6.226603031158447, "learning_rate": 6.984977793718386e-06, "loss": 0.53053908, "memory(GiB)": 34.88, "step": 59325, "train_speed(iter/s)": 0.413753 }, { "acc": 0.91536903, "epoch": 1.606422440635746, "grad_norm": 8.836462020874023, "learning_rate": 6.984464187676231e-06, "loss": 0.38929551, "memory(GiB)": 34.88, "step": 59330, "train_speed(iter/s)": 0.413754 }, { "acc": 0.9244628, "epoch": 1.6065578209189613, "grad_norm": 6.592881202697754, "learning_rate": 6.983950556781026e-06, "loss": 0.39946387, "memory(GiB)": 34.88, "step": 59335, "train_speed(iter/s)": 0.413756 }, { "acc": 0.9061039, "epoch": 1.606693201202177, "grad_norm": 10.042341232299805, "learning_rate": 6.983436901039209e-06, "loss": 0.54110031, "memory(GiB)": 34.88, "step": 59340, "train_speed(iter/s)": 0.413758 }, { "acc": 0.92292595, "epoch": 1.6068285814853924, "grad_norm": 7.005505561828613, "learning_rate": 6.982923220457206e-06, "loss": 0.34185166, "memory(GiB)": 34.88, "step": 59345, "train_speed(iter/s)": 0.413759 }, { "acc": 0.90344296, "epoch": 1.606963961768608, "grad_norm": 9.625245094299316, "learning_rate": 6.98240951504146e-06, "loss": 0.51420813, "memory(GiB)": 34.88, "step": 59350, "train_speed(iter/s)": 0.413761 }, { "acc": 0.89905729, "epoch": 1.6070993420518236, "grad_norm": 8.480374336242676, "learning_rate": 6.981895784798401e-06, "loss": 0.52261114, "memory(GiB)": 34.88, "step": 59355, "train_speed(iter/s)": 0.413763 }, { "acc": 0.89840794, "epoch": 1.6072347223350392, "grad_norm": 6.076653003692627, "learning_rate": 6.981382029734467e-06, "loss": 0.54417791, "memory(GiB)": 34.88, "step": 59360, "train_speed(iter/s)": 0.413764 }, { "acc": 0.91031265, "epoch": 1.6073701026182547, "grad_norm": 5.2077813148498535, "learning_rate": 6.9808682498560935e-06, "loss": 0.43283806, "memory(GiB)": 34.88, "step": 59365, "train_speed(iter/s)": 0.413766 }, { "acc": 0.92506638, "epoch": 1.60750548290147, "grad_norm": 4.996214866638184, "learning_rate": 6.980354445169715e-06, "loss": 0.31456883, "memory(GiB)": 34.88, "step": 59370, "train_speed(iter/s)": 0.413767 }, { "acc": 0.90489597, "epoch": 1.607640863184686, "grad_norm": 13.882050514221191, "learning_rate": 6.97984061568177e-06, "loss": 0.52799625, "memory(GiB)": 34.88, "step": 59375, "train_speed(iter/s)": 0.413769 }, { "acc": 0.90973759, "epoch": 1.6077762434679013, "grad_norm": 12.995816230773926, "learning_rate": 6.979326761398694e-06, "loss": 0.55713434, "memory(GiB)": 34.88, "step": 59380, "train_speed(iter/s)": 0.413771 }, { "acc": 0.89043169, "epoch": 1.6079116237511168, "grad_norm": 7.5333452224731445, "learning_rate": 6.978812882326927e-06, "loss": 0.6334672, "memory(GiB)": 34.88, "step": 59385, "train_speed(iter/s)": 0.413772 }, { "acc": 0.90983753, "epoch": 1.6080470040343324, "grad_norm": 10.305445671081543, "learning_rate": 6.978298978472902e-06, "loss": 0.48176932, "memory(GiB)": 34.88, "step": 59390, "train_speed(iter/s)": 0.413774 }, { "acc": 0.90524025, "epoch": 1.608182384317548, "grad_norm": 6.219632625579834, "learning_rate": 6.977785049843058e-06, "loss": 0.48543301, "memory(GiB)": 34.88, "step": 59395, "train_speed(iter/s)": 0.413775 }, { "acc": 0.90559978, "epoch": 1.6083177646007636, "grad_norm": 8.177785873413086, "learning_rate": 6.977271096443832e-06, "loss": 0.55812454, "memory(GiB)": 34.88, "step": 59400, "train_speed(iter/s)": 0.413776 }, { "acc": 0.89414539, "epoch": 1.608453144883979, "grad_norm": 8.79130744934082, "learning_rate": 6.976757118281665e-06, "loss": 0.61923361, "memory(GiB)": 34.88, "step": 59405, "train_speed(iter/s)": 0.413778 }, { "acc": 0.91739206, "epoch": 1.6085885251671947, "grad_norm": 6.656496524810791, "learning_rate": 6.976243115362995e-06, "loss": 0.46443019, "memory(GiB)": 34.88, "step": 59410, "train_speed(iter/s)": 0.413779 }, { "acc": 0.9341032, "epoch": 1.60872390545041, "grad_norm": 4.375906944274902, "learning_rate": 6.97572908769426e-06, "loss": 0.33670254, "memory(GiB)": 34.88, "step": 59415, "train_speed(iter/s)": 0.413781 }, { "acc": 0.91427364, "epoch": 1.608859285733626, "grad_norm": 5.215552806854248, "learning_rate": 6.9752150352818995e-06, "loss": 0.40716877, "memory(GiB)": 34.88, "step": 59420, "train_speed(iter/s)": 0.413783 }, { "acc": 0.92067423, "epoch": 1.6089946660168413, "grad_norm": 11.473467826843262, "learning_rate": 6.974700958132351e-06, "loss": 0.43589692, "memory(GiB)": 34.88, "step": 59425, "train_speed(iter/s)": 0.413784 }, { "acc": 0.91359425, "epoch": 1.6091300463000568, "grad_norm": 18.246402740478516, "learning_rate": 6.974186856252059e-06, "loss": 0.48212681, "memory(GiB)": 34.88, "step": 59430, "train_speed(iter/s)": 0.413786 }, { "acc": 0.89097805, "epoch": 1.6092654265832724, "grad_norm": 23.29464340209961, "learning_rate": 6.973672729647458e-06, "loss": 0.64325228, "memory(GiB)": 34.88, "step": 59435, "train_speed(iter/s)": 0.413787 }, { "acc": 0.89955845, "epoch": 1.609400806866488, "grad_norm": 8.295920372009277, "learning_rate": 6.973158578324994e-06, "loss": 0.58890467, "memory(GiB)": 34.88, "step": 59440, "train_speed(iter/s)": 0.413789 }, { "acc": 0.91219501, "epoch": 1.6095361871497036, "grad_norm": 13.475090026855469, "learning_rate": 6.972644402291102e-06, "loss": 0.49120483, "memory(GiB)": 34.88, "step": 59445, "train_speed(iter/s)": 0.41379 }, { "acc": 0.89782724, "epoch": 1.609671567432919, "grad_norm": 6.752827167510986, "learning_rate": 6.972130201552228e-06, "loss": 0.58743114, "memory(GiB)": 34.88, "step": 59450, "train_speed(iter/s)": 0.413791 }, { "acc": 0.89347515, "epoch": 1.6098069477161348, "grad_norm": 6.233994007110596, "learning_rate": 6.971615976114812e-06, "loss": 0.52699695, "memory(GiB)": 34.88, "step": 59455, "train_speed(iter/s)": 0.413792 }, { "acc": 0.88726711, "epoch": 1.60994232799935, "grad_norm": 17.357284545898438, "learning_rate": 6.971101725985294e-06, "loss": 0.65679765, "memory(GiB)": 34.88, "step": 59460, "train_speed(iter/s)": 0.413794 }, { "acc": 0.88327856, "epoch": 1.6100777082825657, "grad_norm": 8.310340881347656, "learning_rate": 6.97058745117012e-06, "loss": 0.64051409, "memory(GiB)": 34.88, "step": 59465, "train_speed(iter/s)": 0.413796 }, { "acc": 0.91377392, "epoch": 1.6102130885657813, "grad_norm": 11.110613822937012, "learning_rate": 6.970073151675726e-06, "loss": 0.51508102, "memory(GiB)": 34.88, "step": 59470, "train_speed(iter/s)": 0.413798 }, { "acc": 0.9063221, "epoch": 1.6103484688489969, "grad_norm": 14.154126167297363, "learning_rate": 6.969558827508559e-06, "loss": 0.58429151, "memory(GiB)": 34.88, "step": 59475, "train_speed(iter/s)": 0.413799 }, { "acc": 0.91159182, "epoch": 1.6104838491322124, "grad_norm": 6.1329450607299805, "learning_rate": 6.969044478675061e-06, "loss": 0.44417038, "memory(GiB)": 34.88, "step": 59480, "train_speed(iter/s)": 0.4138 }, { "acc": 0.91590166, "epoch": 1.6106192294154278, "grad_norm": 10.55517578125, "learning_rate": 6.968530105181675e-06, "loss": 0.45512848, "memory(GiB)": 34.88, "step": 59485, "train_speed(iter/s)": 0.413802 }, { "acc": 0.90296898, "epoch": 1.6107546096986436, "grad_norm": 7.785950660705566, "learning_rate": 6.968015707034846e-06, "loss": 0.55738525, "memory(GiB)": 34.88, "step": 59490, "train_speed(iter/s)": 0.413803 }, { "acc": 0.90675564, "epoch": 1.610889989981859, "grad_norm": 9.077981948852539, "learning_rate": 6.967501284241014e-06, "loss": 0.42005644, "memory(GiB)": 34.88, "step": 59495, "train_speed(iter/s)": 0.413805 }, { "acc": 0.9003953, "epoch": 1.6110253702650748, "grad_norm": 11.873445510864258, "learning_rate": 6.966986836806627e-06, "loss": 0.53606873, "memory(GiB)": 34.88, "step": 59500, "train_speed(iter/s)": 0.413806 }, { "acc": 0.90082273, "epoch": 1.61116075054829, "grad_norm": 15.440508842468262, "learning_rate": 6.96647236473813e-06, "loss": 0.58024955, "memory(GiB)": 34.88, "step": 59505, "train_speed(iter/s)": 0.413808 }, { "acc": 0.92761078, "epoch": 1.6112961308315057, "grad_norm": 5.66847562789917, "learning_rate": 6.9659578680419645e-06, "loss": 0.33790159, "memory(GiB)": 34.88, "step": 59510, "train_speed(iter/s)": 0.413809 }, { "acc": 0.92498407, "epoch": 1.6114315111147213, "grad_norm": 8.751765251159668, "learning_rate": 6.965443346724576e-06, "loss": 0.49049206, "memory(GiB)": 34.88, "step": 59515, "train_speed(iter/s)": 0.413811 }, { "acc": 0.89053335, "epoch": 1.6115668913979369, "grad_norm": 13.23525619506836, "learning_rate": 6.964928800792413e-06, "loss": 0.65951014, "memory(GiB)": 34.88, "step": 59520, "train_speed(iter/s)": 0.413812 }, { "acc": 0.88349867, "epoch": 1.6117022716811524, "grad_norm": 10.527595520019531, "learning_rate": 6.964414230251919e-06, "loss": 0.65185742, "memory(GiB)": 34.88, "step": 59525, "train_speed(iter/s)": 0.413813 }, { "acc": 0.91964283, "epoch": 1.6118376519643678, "grad_norm": 16.572235107421875, "learning_rate": 6.963899635109539e-06, "loss": 0.39390028, "memory(GiB)": 34.88, "step": 59530, "train_speed(iter/s)": 0.413814 }, { "acc": 0.92967997, "epoch": 1.6119730322475836, "grad_norm": 6.458101749420166, "learning_rate": 6.963385015371722e-06, "loss": 0.36353068, "memory(GiB)": 34.88, "step": 59535, "train_speed(iter/s)": 0.413816 }, { "acc": 0.89971428, "epoch": 1.612108412530799, "grad_norm": 8.74918270111084, "learning_rate": 6.962870371044911e-06, "loss": 0.5883059, "memory(GiB)": 34.88, "step": 59540, "train_speed(iter/s)": 0.413817 }, { "acc": 0.91874771, "epoch": 1.6122437928140145, "grad_norm": 10.439605712890625, "learning_rate": 6.962355702135558e-06, "loss": 0.47219806, "memory(GiB)": 34.88, "step": 59545, "train_speed(iter/s)": 0.413819 }, { "acc": 0.91390915, "epoch": 1.6123791730972301, "grad_norm": 7.417722702026367, "learning_rate": 6.961841008650105e-06, "loss": 0.40129099, "memory(GiB)": 34.88, "step": 59550, "train_speed(iter/s)": 0.41382 }, { "acc": 0.90172491, "epoch": 1.6125145533804457, "grad_norm": 12.523900032043457, "learning_rate": 6.961326290595002e-06, "loss": 0.47509136, "memory(GiB)": 34.88, "step": 59555, "train_speed(iter/s)": 0.413822 }, { "acc": 0.89504318, "epoch": 1.6126499336636613, "grad_norm": 5.469950199127197, "learning_rate": 6.960811547976697e-06, "loss": 0.62606668, "memory(GiB)": 34.88, "step": 59560, "train_speed(iter/s)": 0.413824 }, { "acc": 0.91090841, "epoch": 1.6127853139468766, "grad_norm": 7.278200626373291, "learning_rate": 6.960296780801639e-06, "loss": 0.46737871, "memory(GiB)": 34.88, "step": 59565, "train_speed(iter/s)": 0.413825 }, { "acc": 0.9001565, "epoch": 1.6129206942300924, "grad_norm": 9.60355281829834, "learning_rate": 6.959781989076273e-06, "loss": 0.51259708, "memory(GiB)": 34.88, "step": 59570, "train_speed(iter/s)": 0.413826 }, { "acc": 0.93341274, "epoch": 1.6130560745133078, "grad_norm": 6.303142547607422, "learning_rate": 6.959267172807052e-06, "loss": 0.31946268, "memory(GiB)": 34.88, "step": 59575, "train_speed(iter/s)": 0.413828 }, { "acc": 0.91317635, "epoch": 1.6131914547965236, "grad_norm": 8.003106117248535, "learning_rate": 6.958752332000422e-06, "loss": 0.36447475, "memory(GiB)": 34.88, "step": 59580, "train_speed(iter/s)": 0.413829 }, { "acc": 0.91823015, "epoch": 1.613326835079739, "grad_norm": 16.783414840698242, "learning_rate": 6.958237466662835e-06, "loss": 0.42406893, "memory(GiB)": 34.88, "step": 59585, "train_speed(iter/s)": 0.41383 }, { "acc": 0.91112404, "epoch": 1.6134622153629545, "grad_norm": 6.350612163543701, "learning_rate": 6.957722576800738e-06, "loss": 0.45491924, "memory(GiB)": 34.88, "step": 59590, "train_speed(iter/s)": 0.413832 }, { "acc": 0.89522266, "epoch": 1.6135975956461701, "grad_norm": 7.708820819854736, "learning_rate": 6.957207662420584e-06, "loss": 0.52902246, "memory(GiB)": 34.88, "step": 59595, "train_speed(iter/s)": 0.413833 }, { "acc": 0.91601086, "epoch": 1.6137329759293857, "grad_norm": 7.737095355987549, "learning_rate": 6.956692723528819e-06, "loss": 0.4059691, "memory(GiB)": 34.88, "step": 59600, "train_speed(iter/s)": 0.413835 }, { "acc": 0.90033178, "epoch": 1.6138683562126013, "grad_norm": 5.698310852050781, "learning_rate": 6.956177760131898e-06, "loss": 0.55557427, "memory(GiB)": 34.88, "step": 59605, "train_speed(iter/s)": 0.413836 }, { "acc": 0.90347595, "epoch": 1.6140037364958166, "grad_norm": 7.753463268280029, "learning_rate": 6.955662772236269e-06, "loss": 0.47794805, "memory(GiB)": 34.88, "step": 59610, "train_speed(iter/s)": 0.413837 }, { "acc": 0.90614891, "epoch": 1.6141391167790324, "grad_norm": 10.46123218536377, "learning_rate": 6.955147759848386e-06, "loss": 0.4163065, "memory(GiB)": 34.88, "step": 59615, "train_speed(iter/s)": 0.413839 }, { "acc": 0.90528479, "epoch": 1.6142744970622478, "grad_norm": 6.193877220153809, "learning_rate": 6.954632722974697e-06, "loss": 0.51040773, "memory(GiB)": 34.88, "step": 59620, "train_speed(iter/s)": 0.413841 }, { "acc": 0.92066097, "epoch": 1.6144098773454634, "grad_norm": 6.148013591766357, "learning_rate": 6.9541176616216574e-06, "loss": 0.38026178, "memory(GiB)": 34.88, "step": 59625, "train_speed(iter/s)": 0.413843 }, { "acc": 0.89507971, "epoch": 1.614545257628679, "grad_norm": 20.84650421142578, "learning_rate": 6.953602575795718e-06, "loss": 0.64977055, "memory(GiB)": 34.88, "step": 59630, "train_speed(iter/s)": 0.413844 }, { "acc": 0.90708828, "epoch": 1.6146806379118945, "grad_norm": 8.062495231628418, "learning_rate": 6.95308746550333e-06, "loss": 0.5251132, "memory(GiB)": 34.88, "step": 59635, "train_speed(iter/s)": 0.413846 }, { "acc": 0.88086262, "epoch": 1.6148160181951101, "grad_norm": 3.832822322845459, "learning_rate": 6.952572330750948e-06, "loss": 0.62607288, "memory(GiB)": 34.88, "step": 59640, "train_speed(iter/s)": 0.413847 }, { "acc": 0.88328085, "epoch": 1.6149513984783255, "grad_norm": 8.140619277954102, "learning_rate": 6.952057171545022e-06, "loss": 0.6676887, "memory(GiB)": 34.88, "step": 59645, "train_speed(iter/s)": 0.413849 }, { "acc": 0.88660507, "epoch": 1.6150867787615413, "grad_norm": 9.228126525878906, "learning_rate": 6.95154198789201e-06, "loss": 0.59004698, "memory(GiB)": 34.88, "step": 59650, "train_speed(iter/s)": 0.413851 }, { "acc": 0.88472519, "epoch": 1.6152221590447566, "grad_norm": 12.88849925994873, "learning_rate": 6.951026779798362e-06, "loss": 0.71789784, "memory(GiB)": 34.88, "step": 59655, "train_speed(iter/s)": 0.413852 }, { "acc": 0.88743095, "epoch": 1.6153575393279724, "grad_norm": 11.251456260681152, "learning_rate": 6.950511547270534e-06, "loss": 0.68423944, "memory(GiB)": 34.88, "step": 59660, "train_speed(iter/s)": 0.413854 }, { "acc": 0.89564476, "epoch": 1.6154929196111878, "grad_norm": 14.16630744934082, "learning_rate": 6.94999629031498e-06, "loss": 0.59224968, "memory(GiB)": 34.88, "step": 59665, "train_speed(iter/s)": 0.413855 }, { "acc": 0.89803295, "epoch": 1.6156282998944034, "grad_norm": 7.617825984954834, "learning_rate": 6.949481008938152e-06, "loss": 0.53099394, "memory(GiB)": 34.88, "step": 59670, "train_speed(iter/s)": 0.413857 }, { "acc": 0.90415649, "epoch": 1.615763680177619, "grad_norm": 7.579922676086426, "learning_rate": 6.948965703146511e-06, "loss": 0.50106907, "memory(GiB)": 34.88, "step": 59675, "train_speed(iter/s)": 0.413858 }, { "acc": 0.89050579, "epoch": 1.6158990604608345, "grad_norm": 8.047173500061035, "learning_rate": 6.948450372946505e-06, "loss": 0.65524669, "memory(GiB)": 34.88, "step": 59680, "train_speed(iter/s)": 0.41386 }, { "acc": 0.90629625, "epoch": 1.6160344407440501, "grad_norm": 5.988617420196533, "learning_rate": 6.947935018344595e-06, "loss": 0.4775032, "memory(GiB)": 34.88, "step": 59685, "train_speed(iter/s)": 0.413861 }, { "acc": 0.9060029, "epoch": 1.6161698210272655, "grad_norm": 5.226457595825195, "learning_rate": 6.947419639347232e-06, "loss": 0.40401011, "memory(GiB)": 34.88, "step": 59690, "train_speed(iter/s)": 0.413863 }, { "acc": 0.91927481, "epoch": 1.6163052013104813, "grad_norm": 6.876227378845215, "learning_rate": 6.946904235960877e-06, "loss": 0.41956825, "memory(GiB)": 34.88, "step": 59695, "train_speed(iter/s)": 0.413864 }, { "acc": 0.92088432, "epoch": 1.6164405815936966, "grad_norm": 10.145123481750488, "learning_rate": 6.9463888081919836e-06, "loss": 0.40637631, "memory(GiB)": 34.88, "step": 59700, "train_speed(iter/s)": 0.413866 }, { "acc": 0.90601521, "epoch": 1.6165759618769122, "grad_norm": 7.331818580627441, "learning_rate": 6.94587335604701e-06, "loss": 0.46997819, "memory(GiB)": 34.88, "step": 59705, "train_speed(iter/s)": 0.413867 }, { "acc": 0.89894915, "epoch": 1.6167113421601278, "grad_norm": 8.676451683044434, "learning_rate": 6.945357879532412e-06, "loss": 0.56336217, "memory(GiB)": 34.88, "step": 59710, "train_speed(iter/s)": 0.413868 }, { "acc": 0.90864563, "epoch": 1.6168467224433434, "grad_norm": 6.193761825561523, "learning_rate": 6.944842378654647e-06, "loss": 0.42446871, "memory(GiB)": 34.88, "step": 59715, "train_speed(iter/s)": 0.41387 }, { "acc": 0.90527668, "epoch": 1.616982102726559, "grad_norm": 5.854153156280518, "learning_rate": 6.944326853420174e-06, "loss": 0.49686255, "memory(GiB)": 34.88, "step": 59720, "train_speed(iter/s)": 0.413872 }, { "acc": 0.90936813, "epoch": 1.6171174830097743, "grad_norm": 12.610391616821289, "learning_rate": 6.9438113038354484e-06, "loss": 0.51810279, "memory(GiB)": 34.88, "step": 59725, "train_speed(iter/s)": 0.413873 }, { "acc": 0.90757837, "epoch": 1.6172528632929901, "grad_norm": 5.039105415344238, "learning_rate": 6.943295729906931e-06, "loss": 0.47156663, "memory(GiB)": 34.88, "step": 59730, "train_speed(iter/s)": 0.413875 }, { "acc": 0.92028255, "epoch": 1.6173882435762055, "grad_norm": 12.848898887634277, "learning_rate": 6.942780131641079e-06, "loss": 0.43943768, "memory(GiB)": 34.88, "step": 59735, "train_speed(iter/s)": 0.413876 }, { "acc": 0.90026407, "epoch": 1.6175236238594213, "grad_norm": 9.248729705810547, "learning_rate": 6.942264509044354e-06, "loss": 0.50341997, "memory(GiB)": 34.88, "step": 59740, "train_speed(iter/s)": 0.413878 }, { "acc": 0.89310932, "epoch": 1.6176590041426366, "grad_norm": 6.781955242156982, "learning_rate": 6.94174886212321e-06, "loss": 0.55532045, "memory(GiB)": 34.88, "step": 59745, "train_speed(iter/s)": 0.41388 }, { "acc": 0.89571505, "epoch": 1.6177943844258522, "grad_norm": 8.767623901367188, "learning_rate": 6.94123319088411e-06, "loss": 0.57280498, "memory(GiB)": 34.88, "step": 59750, "train_speed(iter/s)": 0.413881 }, { "acc": 0.91031656, "epoch": 1.6179297647090678, "grad_norm": 3.7272584438323975, "learning_rate": 6.940717495333517e-06, "loss": 0.43884029, "memory(GiB)": 34.88, "step": 59755, "train_speed(iter/s)": 0.413883 }, { "acc": 0.93022509, "epoch": 1.6180651449922834, "grad_norm": 13.179953575134277, "learning_rate": 6.940201775477883e-06, "loss": 0.45596509, "memory(GiB)": 34.88, "step": 59760, "train_speed(iter/s)": 0.413885 }, { "acc": 0.90272055, "epoch": 1.618200525275499, "grad_norm": 6.924374580383301, "learning_rate": 6.939686031323675e-06, "loss": 0.52094979, "memory(GiB)": 34.88, "step": 59765, "train_speed(iter/s)": 0.413886 }, { "acc": 0.90577917, "epoch": 1.6183359055587143, "grad_norm": 8.288949012756348, "learning_rate": 6.93917026287735e-06, "loss": 0.45947084, "memory(GiB)": 34.88, "step": 59770, "train_speed(iter/s)": 0.413888 }, { "acc": 0.90135937, "epoch": 1.6184712858419301, "grad_norm": 5.666102886199951, "learning_rate": 6.938654470145371e-06, "loss": 0.4684176, "memory(GiB)": 34.88, "step": 59775, "train_speed(iter/s)": 0.413889 }, { "acc": 0.89666729, "epoch": 1.6186066661251455, "grad_norm": 11.213583946228027, "learning_rate": 6.938138653134198e-06, "loss": 0.52171121, "memory(GiB)": 34.88, "step": 59780, "train_speed(iter/s)": 0.413891 }, { "acc": 0.89919062, "epoch": 1.618742046408361, "grad_norm": 11.554569244384766, "learning_rate": 6.937622811850294e-06, "loss": 0.56587148, "memory(GiB)": 34.88, "step": 59785, "train_speed(iter/s)": 0.413892 }, { "acc": 0.91118889, "epoch": 1.6188774266915766, "grad_norm": 11.856963157653809, "learning_rate": 6.93710694630012e-06, "loss": 0.43853879, "memory(GiB)": 34.88, "step": 59790, "train_speed(iter/s)": 0.413894 }, { "acc": 0.90068226, "epoch": 1.6190128069747922, "grad_norm": 8.24000358581543, "learning_rate": 6.93659105649014e-06, "loss": 0.50079298, "memory(GiB)": 34.88, "step": 59795, "train_speed(iter/s)": 0.413895 }, { "acc": 0.91861401, "epoch": 1.6191481872580078, "grad_norm": 4.873650550842285, "learning_rate": 6.936075142426816e-06, "loss": 0.41624699, "memory(GiB)": 34.88, "step": 59800, "train_speed(iter/s)": 0.413897 }, { "acc": 0.90739059, "epoch": 1.6192835675412232, "grad_norm": 9.841073036193848, "learning_rate": 6.935559204116608e-06, "loss": 0.49378672, "memory(GiB)": 34.88, "step": 59805, "train_speed(iter/s)": 0.413899 }, { "acc": 0.90786066, "epoch": 1.619418947824439, "grad_norm": 7.1238579750061035, "learning_rate": 6.935043241565982e-06, "loss": 0.51996641, "memory(GiB)": 34.88, "step": 59810, "train_speed(iter/s)": 0.4139 }, { "acc": 0.89215031, "epoch": 1.6195543281076543, "grad_norm": 7.357663631439209, "learning_rate": 6.934527254781399e-06, "loss": 0.58391809, "memory(GiB)": 34.88, "step": 59815, "train_speed(iter/s)": 0.413902 }, { "acc": 0.917628, "epoch": 1.6196897083908701, "grad_norm": 4.254947662353516, "learning_rate": 6.934011243769324e-06, "loss": 0.39748335, "memory(GiB)": 34.88, "step": 59820, "train_speed(iter/s)": 0.413903 }, { "acc": 0.87330742, "epoch": 1.6198250886740855, "grad_norm": 7.538050174713135, "learning_rate": 6.933495208536223e-06, "loss": 0.68253593, "memory(GiB)": 34.88, "step": 59825, "train_speed(iter/s)": 0.413905 }, { "acc": 0.89916878, "epoch": 1.619960468957301, "grad_norm": 8.259013175964355, "learning_rate": 6.932979149088558e-06, "loss": 0.50398636, "memory(GiB)": 34.88, "step": 59830, "train_speed(iter/s)": 0.413906 }, { "acc": 0.90244312, "epoch": 1.6200958492405166, "grad_norm": 6.1058573722839355, "learning_rate": 6.932463065432795e-06, "loss": 0.55639715, "memory(GiB)": 34.88, "step": 59835, "train_speed(iter/s)": 0.413908 }, { "acc": 0.90305347, "epoch": 1.620231229523732, "grad_norm": 8.22940731048584, "learning_rate": 6.9319469575753984e-06, "loss": 0.5374135, "memory(GiB)": 34.88, "step": 59840, "train_speed(iter/s)": 0.41391 }, { "acc": 0.91063499, "epoch": 1.6203666098069478, "grad_norm": 13.17936897277832, "learning_rate": 6.9314308255228325e-06, "loss": 0.53490524, "memory(GiB)": 34.88, "step": 59845, "train_speed(iter/s)": 0.413911 }, { "acc": 0.91040163, "epoch": 1.6205019900901632, "grad_norm": 10.291829109191895, "learning_rate": 6.9309146692815645e-06, "loss": 0.55063667, "memory(GiB)": 34.88, "step": 59850, "train_speed(iter/s)": 0.413913 }, { "acc": 0.91160688, "epoch": 1.620637370373379, "grad_norm": 10.359845161437988, "learning_rate": 6.930398488858058e-06, "loss": 0.53953075, "memory(GiB)": 34.88, "step": 59855, "train_speed(iter/s)": 0.413915 }, { "acc": 0.91075821, "epoch": 1.6207727506565943, "grad_norm": 5.2712721824646, "learning_rate": 6.929882284258781e-06, "loss": 0.52718811, "memory(GiB)": 34.88, "step": 59860, "train_speed(iter/s)": 0.413916 }, { "acc": 0.89904737, "epoch": 1.62090813093981, "grad_norm": 7.499403953552246, "learning_rate": 6.929366055490201e-06, "loss": 0.46599369, "memory(GiB)": 34.88, "step": 59865, "train_speed(iter/s)": 0.413918 }, { "acc": 0.92470484, "epoch": 1.6210435112230255, "grad_norm": 5.462992191314697, "learning_rate": 6.928849802558784e-06, "loss": 0.32930889, "memory(GiB)": 34.88, "step": 59870, "train_speed(iter/s)": 0.41392 }, { "acc": 0.89680099, "epoch": 1.621178891506241, "grad_norm": 8.3377046585083, "learning_rate": 6.928333525470994e-06, "loss": 0.53687782, "memory(GiB)": 34.88, "step": 59875, "train_speed(iter/s)": 0.413922 }, { "acc": 0.90635147, "epoch": 1.6213142717894566, "grad_norm": 6.259366989135742, "learning_rate": 6.927817224233303e-06, "loss": 0.48240433, "memory(GiB)": 34.88, "step": 59880, "train_speed(iter/s)": 0.413923 }, { "acc": 0.90880575, "epoch": 1.621449652072672, "grad_norm": 6.970514297485352, "learning_rate": 6.927300898852176e-06, "loss": 0.45027356, "memory(GiB)": 34.88, "step": 59885, "train_speed(iter/s)": 0.413925 }, { "acc": 0.9174962, "epoch": 1.6215850323558878, "grad_norm": 10.40872859954834, "learning_rate": 6.92678454933408e-06, "loss": 0.4165411, "memory(GiB)": 34.88, "step": 59890, "train_speed(iter/s)": 0.413927 }, { "acc": 0.8726408, "epoch": 1.6217204126391032, "grad_norm": 8.292948722839355, "learning_rate": 6.926268175685488e-06, "loss": 0.68490572, "memory(GiB)": 34.88, "step": 59895, "train_speed(iter/s)": 0.413928 }, { "acc": 0.91237268, "epoch": 1.6218557929223187, "grad_norm": 9.535290718078613, "learning_rate": 6.925751777912862e-06, "loss": 0.48991709, "memory(GiB)": 34.88, "step": 59900, "train_speed(iter/s)": 0.41393 }, { "acc": 0.8955061, "epoch": 1.6219911732055343, "grad_norm": 10.167829513549805, "learning_rate": 6.925235356022678e-06, "loss": 0.52897935, "memory(GiB)": 34.88, "step": 59905, "train_speed(iter/s)": 0.413932 }, { "acc": 0.89497976, "epoch": 1.62212655348875, "grad_norm": 9.451228141784668, "learning_rate": 6.924718910021399e-06, "loss": 0.50638323, "memory(GiB)": 34.88, "step": 59910, "train_speed(iter/s)": 0.413933 }, { "acc": 0.91666279, "epoch": 1.6222619337719655, "grad_norm": 6.329900741577148, "learning_rate": 6.9242024399154975e-06, "loss": 0.48750925, "memory(GiB)": 34.88, "step": 59915, "train_speed(iter/s)": 0.413935 }, { "acc": 0.90557737, "epoch": 1.6223973140551808, "grad_norm": 8.9048490524292, "learning_rate": 6.923685945711442e-06, "loss": 0.50919847, "memory(GiB)": 34.88, "step": 59920, "train_speed(iter/s)": 0.413936 }, { "acc": 0.88279867, "epoch": 1.6225326943383966, "grad_norm": 13.889673233032227, "learning_rate": 6.923169427415707e-06, "loss": 0.66258278, "memory(GiB)": 34.88, "step": 59925, "train_speed(iter/s)": 0.413937 }, { "acc": 0.91233406, "epoch": 1.622668074621612, "grad_norm": 4.634616851806641, "learning_rate": 6.922652885034757e-06, "loss": 0.51217599, "memory(GiB)": 34.88, "step": 59930, "train_speed(iter/s)": 0.413939 }, { "acc": 0.90265322, "epoch": 1.6228034549048278, "grad_norm": 9.882951736450195, "learning_rate": 6.922136318575063e-06, "loss": 0.50080009, "memory(GiB)": 34.88, "step": 59935, "train_speed(iter/s)": 0.413941 }, { "acc": 0.90667515, "epoch": 1.6229388351880432, "grad_norm": 6.822294235229492, "learning_rate": 6.921619728043102e-06, "loss": 0.49151087, "memory(GiB)": 34.88, "step": 59940, "train_speed(iter/s)": 0.413942 }, { "acc": 0.90386324, "epoch": 1.6230742154712587, "grad_norm": 9.381192207336426, "learning_rate": 6.921103113445339e-06, "loss": 0.4445632, "memory(GiB)": 34.88, "step": 59945, "train_speed(iter/s)": 0.413944 }, { "acc": 0.91018124, "epoch": 1.6232095957544743, "grad_norm": 8.278020858764648, "learning_rate": 6.920586474788249e-06, "loss": 0.42981949, "memory(GiB)": 34.88, "step": 59950, "train_speed(iter/s)": 0.413945 }, { "acc": 0.89785748, "epoch": 1.62334497603769, "grad_norm": 7.823278903961182, "learning_rate": 6.920069812078302e-06, "loss": 0.53246441, "memory(GiB)": 34.88, "step": 59955, "train_speed(iter/s)": 0.413947 }, { "acc": 0.90842171, "epoch": 1.6234803563209055, "grad_norm": 10.527894020080566, "learning_rate": 6.9195531253219715e-06, "loss": 0.51207304, "memory(GiB)": 34.88, "step": 59960, "train_speed(iter/s)": 0.413948 }, { "acc": 0.91116858, "epoch": 1.6236157366041208, "grad_norm": 17.630035400390625, "learning_rate": 6.919036414525731e-06, "loss": 0.43784323, "memory(GiB)": 34.88, "step": 59965, "train_speed(iter/s)": 0.41395 }, { "acc": 0.9119648, "epoch": 1.6237511168873366, "grad_norm": 13.016213417053223, "learning_rate": 6.91851967969605e-06, "loss": 0.43039408, "memory(GiB)": 34.88, "step": 59970, "train_speed(iter/s)": 0.413951 }, { "acc": 0.90834818, "epoch": 1.623886497170552, "grad_norm": 6.597060680389404, "learning_rate": 6.918002920839405e-06, "loss": 0.49246492, "memory(GiB)": 34.88, "step": 59975, "train_speed(iter/s)": 0.413953 }, { "acc": 0.90642815, "epoch": 1.6240218774537676, "grad_norm": 7.163657188415527, "learning_rate": 6.917486137962267e-06, "loss": 0.59413152, "memory(GiB)": 34.88, "step": 59980, "train_speed(iter/s)": 0.413955 }, { "acc": 0.90123329, "epoch": 1.6241572577369832, "grad_norm": 5.937587738037109, "learning_rate": 6.9169693310711125e-06, "loss": 0.50704188, "memory(GiB)": 34.88, "step": 59985, "train_speed(iter/s)": 0.413956 }, { "acc": 0.89580555, "epoch": 1.6242926380201987, "grad_norm": 14.400847434997559, "learning_rate": 6.916452500172412e-06, "loss": 0.49957552, "memory(GiB)": 34.88, "step": 59990, "train_speed(iter/s)": 0.413958 }, { "acc": 0.91700506, "epoch": 1.6244280183034143, "grad_norm": 9.082808494567871, "learning_rate": 6.915935645272642e-06, "loss": 0.51363487, "memory(GiB)": 34.88, "step": 59995, "train_speed(iter/s)": 0.413959 }, { "acc": 0.92170591, "epoch": 1.6245633985866297, "grad_norm": 7.501194477081299, "learning_rate": 6.915418766378277e-06, "loss": 0.38196635, "memory(GiB)": 34.88, "step": 60000, "train_speed(iter/s)": 0.413961 }, { "epoch": 1.6245633985866297, "eval_acc": 0.6077771189162384, "eval_loss": 1.1209900379180908, "eval_runtime": 1296.6335, "eval_samples_per_second": 66.561, "eval_steps_per_second": 2.081, "step": 60000 }, { "acc": 0.90535488, "epoch": 1.6246987788698455, "grad_norm": 10.001628875732422, "learning_rate": 6.9149018634957906e-06, "loss": 0.50016232, "memory(GiB)": 34.88, "step": 60005, "train_speed(iter/s)": 0.410224 }, { "acc": 0.92520409, "epoch": 1.6248341591530608, "grad_norm": 7.341752052307129, "learning_rate": 6.914384936631661e-06, "loss": 0.42262106, "memory(GiB)": 34.88, "step": 60010, "train_speed(iter/s)": 0.410225 }, { "acc": 0.91225529, "epoch": 1.6249695394362766, "grad_norm": 10.171430587768555, "learning_rate": 6.9138679857923606e-06, "loss": 0.43755121, "memory(GiB)": 34.88, "step": 60015, "train_speed(iter/s)": 0.410227 }, { "acc": 0.92226543, "epoch": 1.625104919719492, "grad_norm": 13.915035247802734, "learning_rate": 6.913351010984367e-06, "loss": 0.48562727, "memory(GiB)": 34.88, "step": 60020, "train_speed(iter/s)": 0.410229 }, { "acc": 0.90690384, "epoch": 1.6252403000027076, "grad_norm": 74.67654418945312, "learning_rate": 6.912834012214155e-06, "loss": 0.53860159, "memory(GiB)": 34.88, "step": 60025, "train_speed(iter/s)": 0.410231 }, { "acc": 0.89925365, "epoch": 1.6253756802859232, "grad_norm": 6.777012348175049, "learning_rate": 6.912316989488202e-06, "loss": 0.58397331, "memory(GiB)": 34.88, "step": 60030, "train_speed(iter/s)": 0.410233 }, { "acc": 0.92130985, "epoch": 1.6255110605691387, "grad_norm": 7.719175815582275, "learning_rate": 6.911799942812984e-06, "loss": 0.42661943, "memory(GiB)": 34.88, "step": 60035, "train_speed(iter/s)": 0.410235 }, { "acc": 0.88722153, "epoch": 1.6256464408523543, "grad_norm": 8.986190795898438, "learning_rate": 6.91128287219498e-06, "loss": 0.53272114, "memory(GiB)": 34.88, "step": 60040, "train_speed(iter/s)": 0.410237 }, { "acc": 0.89788189, "epoch": 1.6257818211355697, "grad_norm": 11.471609115600586, "learning_rate": 6.910765777640666e-06, "loss": 0.52006168, "memory(GiB)": 34.88, "step": 60045, "train_speed(iter/s)": 0.410239 }, { "acc": 0.90130501, "epoch": 1.6259172014187855, "grad_norm": 6.521834850311279, "learning_rate": 6.910248659156518e-06, "loss": 0.49727249, "memory(GiB)": 34.88, "step": 60050, "train_speed(iter/s)": 0.410241 }, { "acc": 0.90504255, "epoch": 1.6260525817020008, "grad_norm": 10.491360664367676, "learning_rate": 6.9097315167490165e-06, "loss": 0.53371449, "memory(GiB)": 34.88, "step": 60055, "train_speed(iter/s)": 0.410243 }, { "acc": 0.92090244, "epoch": 1.6261879619852164, "grad_norm": 5.6739702224731445, "learning_rate": 6.909214350424637e-06, "loss": 0.42087507, "memory(GiB)": 34.88, "step": 60060, "train_speed(iter/s)": 0.410245 }, { "acc": 0.90342312, "epoch": 1.626323342268432, "grad_norm": 10.647592544555664, "learning_rate": 6.90869716018986e-06, "loss": 0.56606407, "memory(GiB)": 34.88, "step": 60065, "train_speed(iter/s)": 0.410246 }, { "acc": 0.89552603, "epoch": 1.6264587225516476, "grad_norm": 13.638941764831543, "learning_rate": 6.908179946051166e-06, "loss": 0.61181593, "memory(GiB)": 34.88, "step": 60070, "train_speed(iter/s)": 0.410248 }, { "acc": 0.91587353, "epoch": 1.6265941028348632, "grad_norm": 6.540677547454834, "learning_rate": 6.9076627080150295e-06, "loss": 0.43665915, "memory(GiB)": 34.88, "step": 60075, "train_speed(iter/s)": 0.41025 }, { "acc": 0.9069767, "epoch": 1.6267294831180785, "grad_norm": 11.901504516601562, "learning_rate": 6.907145446087932e-06, "loss": 0.48251381, "memory(GiB)": 34.88, "step": 60080, "train_speed(iter/s)": 0.410252 }, { "acc": 0.88139238, "epoch": 1.6268648634012943, "grad_norm": 20.650867462158203, "learning_rate": 6.906628160276356e-06, "loss": 0.66397138, "memory(GiB)": 34.88, "step": 60085, "train_speed(iter/s)": 0.410254 }, { "acc": 0.90461502, "epoch": 1.6270002436845097, "grad_norm": 5.810044765472412, "learning_rate": 6.906110850586779e-06, "loss": 0.51088018, "memory(GiB)": 34.88, "step": 60090, "train_speed(iter/s)": 0.410256 }, { "acc": 0.90227747, "epoch": 1.6271356239677255, "grad_norm": 5.805494785308838, "learning_rate": 6.905593517025682e-06, "loss": 0.47346277, "memory(GiB)": 34.88, "step": 60095, "train_speed(iter/s)": 0.410258 }, { "acc": 0.90282688, "epoch": 1.6272710042509408, "grad_norm": 11.808001518249512, "learning_rate": 6.905076159599545e-06, "loss": 0.6493392, "memory(GiB)": 34.88, "step": 60100, "train_speed(iter/s)": 0.410259 }, { "acc": 0.91580381, "epoch": 1.6274063845341564, "grad_norm": 10.255870819091797, "learning_rate": 6.904558778314848e-06, "loss": 0.44705524, "memory(GiB)": 34.88, "step": 60105, "train_speed(iter/s)": 0.410261 }, { "acc": 0.89380589, "epoch": 1.627541764817372, "grad_norm": 8.318032264709473, "learning_rate": 6.904041373178075e-06, "loss": 0.49748039, "memory(GiB)": 34.88, "step": 60110, "train_speed(iter/s)": 0.410263 }, { "acc": 0.91152668, "epoch": 1.6276771451005876, "grad_norm": 7.4870476722717285, "learning_rate": 6.903523944195703e-06, "loss": 0.44602723, "memory(GiB)": 34.88, "step": 60115, "train_speed(iter/s)": 0.410265 }, { "acc": 0.91975861, "epoch": 1.6278125253838032, "grad_norm": 6.81455135345459, "learning_rate": 6.903006491374219e-06, "loss": 0.41151943, "memory(GiB)": 34.88, "step": 60120, "train_speed(iter/s)": 0.410267 }, { "acc": 0.91114788, "epoch": 1.6279479056670185, "grad_norm": 6.057526588439941, "learning_rate": 6.902489014720103e-06, "loss": 0.50872068, "memory(GiB)": 34.88, "step": 60125, "train_speed(iter/s)": 0.410269 }, { "acc": 0.89365158, "epoch": 1.6280832859502343, "grad_norm": 8.134467124938965, "learning_rate": 6.901971514239836e-06, "loss": 0.52243614, "memory(GiB)": 34.88, "step": 60130, "train_speed(iter/s)": 0.410271 }, { "acc": 0.89628897, "epoch": 1.6282186662334497, "grad_norm": 12.29712200164795, "learning_rate": 6.901453989939901e-06, "loss": 0.5700716, "memory(GiB)": 34.88, "step": 60135, "train_speed(iter/s)": 0.410273 }, { "acc": 0.8838665, "epoch": 1.6283540465166653, "grad_norm": 31.851333618164062, "learning_rate": 6.9009364418267844e-06, "loss": 0.61587172, "memory(GiB)": 34.88, "step": 60140, "train_speed(iter/s)": 0.410274 }, { "acc": 0.89910555, "epoch": 1.6284894267998808, "grad_norm": 4.652007102966309, "learning_rate": 6.900418869906966e-06, "loss": 0.4762702, "memory(GiB)": 34.88, "step": 60145, "train_speed(iter/s)": 0.410276 }, { "acc": 0.90177059, "epoch": 1.6286248070830964, "grad_norm": 8.84779167175293, "learning_rate": 6.89990127418693e-06, "loss": 0.50554042, "memory(GiB)": 34.88, "step": 60150, "train_speed(iter/s)": 0.410278 }, { "acc": 0.88710365, "epoch": 1.628760187366312, "grad_norm": 14.380447387695312, "learning_rate": 6.8993836546731605e-06, "loss": 0.64782639, "memory(GiB)": 34.88, "step": 60155, "train_speed(iter/s)": 0.41028 }, { "acc": 0.92687674, "epoch": 1.6288955676495274, "grad_norm": 6.214993000030518, "learning_rate": 6.898866011372142e-06, "loss": 0.4086956, "memory(GiB)": 34.88, "step": 60160, "train_speed(iter/s)": 0.410282 }, { "acc": 0.89138966, "epoch": 1.6290309479327432, "grad_norm": 9.259202003479004, "learning_rate": 6.898348344290358e-06, "loss": 0.53954868, "memory(GiB)": 34.88, "step": 60165, "train_speed(iter/s)": 0.410284 }, { "acc": 0.91059608, "epoch": 1.6291663282159585, "grad_norm": 6.158595085144043, "learning_rate": 6.897830653434298e-06, "loss": 0.44484501, "memory(GiB)": 34.88, "step": 60170, "train_speed(iter/s)": 0.410286 }, { "acc": 0.91655922, "epoch": 1.6293017084991743, "grad_norm": 11.028454780578613, "learning_rate": 6.897312938810441e-06, "loss": 0.42704992, "memory(GiB)": 34.88, "step": 60175, "train_speed(iter/s)": 0.410288 }, { "acc": 0.91273098, "epoch": 1.6294370887823897, "grad_norm": 7.513357162475586, "learning_rate": 6.896795200425275e-06, "loss": 0.50089822, "memory(GiB)": 34.88, "step": 60180, "train_speed(iter/s)": 0.41029 }, { "acc": 0.90603342, "epoch": 1.6295724690656053, "grad_norm": 3.804443597793579, "learning_rate": 6.8962774382852824e-06, "loss": 0.53865323, "memory(GiB)": 34.88, "step": 60185, "train_speed(iter/s)": 0.410292 }, { "acc": 0.90606794, "epoch": 1.6297078493488208, "grad_norm": 7.047616481781006, "learning_rate": 6.895759652396955e-06, "loss": 0.54522777, "memory(GiB)": 34.88, "step": 60190, "train_speed(iter/s)": 0.410293 }, { "acc": 0.89668293, "epoch": 1.6298432296320364, "grad_norm": 14.45377254486084, "learning_rate": 6.895241842766777e-06, "loss": 0.58421936, "memory(GiB)": 34.88, "step": 60195, "train_speed(iter/s)": 0.410295 }, { "acc": 0.91978168, "epoch": 1.629978609915252, "grad_norm": 3.2977426052093506, "learning_rate": 6.894724009401232e-06, "loss": 0.39116049, "memory(GiB)": 34.88, "step": 60200, "train_speed(iter/s)": 0.410297 }, { "acc": 0.92459011, "epoch": 1.6301139901984674, "grad_norm": 8.39516544342041, "learning_rate": 6.894206152306812e-06, "loss": 0.4614584, "memory(GiB)": 34.88, "step": 60205, "train_speed(iter/s)": 0.410299 }, { "acc": 0.91577768, "epoch": 1.6302493704816832, "grad_norm": 10.888025283813477, "learning_rate": 6.8936882714899985e-06, "loss": 0.4180378, "memory(GiB)": 34.88, "step": 60210, "train_speed(iter/s)": 0.410301 }, { "acc": 0.9021616, "epoch": 1.6303847507648985, "grad_norm": 5.637205123901367, "learning_rate": 6.893170366957283e-06, "loss": 0.5417326, "memory(GiB)": 34.88, "step": 60215, "train_speed(iter/s)": 0.410303 }, { "acc": 0.89119816, "epoch": 1.630520131048114, "grad_norm": 6.647952556610107, "learning_rate": 6.892652438715153e-06, "loss": 0.52692595, "memory(GiB)": 34.88, "step": 60220, "train_speed(iter/s)": 0.410305 }, { "acc": 0.90953579, "epoch": 1.6306555113313297, "grad_norm": 11.149791717529297, "learning_rate": 6.892134486770095e-06, "loss": 0.49358582, "memory(GiB)": 34.88, "step": 60225, "train_speed(iter/s)": 0.410307 }, { "acc": 0.91188402, "epoch": 1.6307908916145453, "grad_norm": 62.91691970825195, "learning_rate": 6.891616511128598e-06, "loss": 0.46740208, "memory(GiB)": 34.88, "step": 60230, "train_speed(iter/s)": 0.410309 }, { "acc": 0.89563665, "epoch": 1.6309262718977608, "grad_norm": 10.612643241882324, "learning_rate": 6.89109851179715e-06, "loss": 0.62261438, "memory(GiB)": 34.88, "step": 60235, "train_speed(iter/s)": 0.410311 }, { "acc": 0.89057426, "epoch": 1.6310616521809762, "grad_norm": 7.566264629364014, "learning_rate": 6.89058048878224e-06, "loss": 0.56924376, "memory(GiB)": 34.88, "step": 60240, "train_speed(iter/s)": 0.410312 }, { "acc": 0.90304155, "epoch": 1.631197032464192, "grad_norm": 10.722650527954102, "learning_rate": 6.890062442090359e-06, "loss": 0.50261083, "memory(GiB)": 34.88, "step": 60245, "train_speed(iter/s)": 0.410314 }, { "acc": 0.90369244, "epoch": 1.6313324127474074, "grad_norm": 12.878071784973145, "learning_rate": 6.889544371727995e-06, "loss": 0.62223053, "memory(GiB)": 34.88, "step": 60250, "train_speed(iter/s)": 0.410316 }, { "acc": 0.91248312, "epoch": 1.6314677930306232, "grad_norm": 10.207037925720215, "learning_rate": 6.889026277701639e-06, "loss": 0.45063109, "memory(GiB)": 34.88, "step": 60255, "train_speed(iter/s)": 0.410318 }, { "acc": 0.92570505, "epoch": 1.6316031733138385, "grad_norm": 10.296503067016602, "learning_rate": 6.888508160017782e-06, "loss": 0.41134386, "memory(GiB)": 34.88, "step": 60260, "train_speed(iter/s)": 0.41032 }, { "acc": 0.90629654, "epoch": 1.631738553597054, "grad_norm": 7.0108747482299805, "learning_rate": 6.887990018682909e-06, "loss": 0.5015162, "memory(GiB)": 34.88, "step": 60265, "train_speed(iter/s)": 0.410322 }, { "acc": 0.9091548, "epoch": 1.6318739338802697, "grad_norm": 21.549362182617188, "learning_rate": 6.887471853703517e-06, "loss": 0.51285005, "memory(GiB)": 34.88, "step": 60270, "train_speed(iter/s)": 0.410324 }, { "acc": 0.90942926, "epoch": 1.6320093141634853, "grad_norm": 26.617944717407227, "learning_rate": 6.886953665086094e-06, "loss": 0.49359112, "memory(GiB)": 34.88, "step": 60275, "train_speed(iter/s)": 0.410325 }, { "acc": 0.91492062, "epoch": 1.6321446944467008, "grad_norm": 5.855575084686279, "learning_rate": 6.886435452837131e-06, "loss": 0.42196736, "memory(GiB)": 34.88, "step": 60280, "train_speed(iter/s)": 0.410327 }, { "acc": 0.90957775, "epoch": 1.6322800747299162, "grad_norm": 7.705112934112549, "learning_rate": 6.885917216963122e-06, "loss": 0.47000513, "memory(GiB)": 34.88, "step": 60285, "train_speed(iter/s)": 0.410329 }, { "acc": 0.91434841, "epoch": 1.632415455013132, "grad_norm": 8.110546112060547, "learning_rate": 6.885398957470558e-06, "loss": 0.45978093, "memory(GiB)": 34.88, "step": 60290, "train_speed(iter/s)": 0.410331 }, { "acc": 0.90223598, "epoch": 1.6325508352963474, "grad_norm": 19.1197452545166, "learning_rate": 6.884880674365929e-06, "loss": 0.61755476, "memory(GiB)": 34.88, "step": 60295, "train_speed(iter/s)": 0.410333 }, { "acc": 0.92090683, "epoch": 1.632686215579563, "grad_norm": 7.610928058624268, "learning_rate": 6.884362367655731e-06, "loss": 0.38182542, "memory(GiB)": 34.88, "step": 60300, "train_speed(iter/s)": 0.410335 }, { "acc": 0.90225344, "epoch": 1.6328215958627785, "grad_norm": 10.313623428344727, "learning_rate": 6.883844037346453e-06, "loss": 0.56146879, "memory(GiB)": 34.88, "step": 60305, "train_speed(iter/s)": 0.410336 }, { "acc": 0.90711794, "epoch": 1.632956976145994, "grad_norm": 3.85782790184021, "learning_rate": 6.883325683444593e-06, "loss": 0.47480803, "memory(GiB)": 34.88, "step": 60310, "train_speed(iter/s)": 0.410338 }, { "acc": 0.87909241, "epoch": 1.6330923564292097, "grad_norm": 19.960777282714844, "learning_rate": 6.882807305956639e-06, "loss": 0.67123365, "memory(GiB)": 34.88, "step": 60315, "train_speed(iter/s)": 0.41034 }, { "acc": 0.90223503, "epoch": 1.633227736712425, "grad_norm": 15.40147876739502, "learning_rate": 6.882288904889089e-06, "loss": 0.52489662, "memory(GiB)": 34.88, "step": 60320, "train_speed(iter/s)": 0.410342 }, { "acc": 0.91278162, "epoch": 1.6333631169956409, "grad_norm": 15.927323341369629, "learning_rate": 6.881770480248434e-06, "loss": 0.50831032, "memory(GiB)": 34.88, "step": 60325, "train_speed(iter/s)": 0.410344 }, { "acc": 0.89331245, "epoch": 1.6334984972788562, "grad_norm": 15.246443748474121, "learning_rate": 6.88125203204117e-06, "loss": 0.59044447, "memory(GiB)": 34.88, "step": 60330, "train_speed(iter/s)": 0.410346 }, { "acc": 0.89714508, "epoch": 1.633633877562072, "grad_norm": 13.327298164367676, "learning_rate": 6.880733560273791e-06, "loss": 0.57907972, "memory(GiB)": 34.88, "step": 60335, "train_speed(iter/s)": 0.410348 }, { "acc": 0.90749187, "epoch": 1.6337692578452874, "grad_norm": 5.46074914932251, "learning_rate": 6.880215064952793e-06, "loss": 0.4314868, "memory(GiB)": 34.88, "step": 60340, "train_speed(iter/s)": 0.41035 }, { "acc": 0.89398232, "epoch": 1.633904638128503, "grad_norm": 15.974238395690918, "learning_rate": 6.879696546084669e-06, "loss": 0.63351431, "memory(GiB)": 34.88, "step": 60345, "train_speed(iter/s)": 0.410351 }, { "acc": 0.92244968, "epoch": 1.6340400184117185, "grad_norm": 5.2551798820495605, "learning_rate": 6.879178003675916e-06, "loss": 0.4148942, "memory(GiB)": 34.88, "step": 60350, "train_speed(iter/s)": 0.410353 }, { "acc": 0.89812775, "epoch": 1.6341753986949341, "grad_norm": 10.561792373657227, "learning_rate": 6.878659437733031e-06, "loss": 0.54945273, "memory(GiB)": 34.88, "step": 60355, "train_speed(iter/s)": 0.410355 }, { "acc": 0.89054737, "epoch": 1.6343107789781497, "grad_norm": 11.726066589355469, "learning_rate": 6.878140848262506e-06, "loss": 0.61314526, "memory(GiB)": 34.88, "step": 60360, "train_speed(iter/s)": 0.410357 }, { "acc": 0.8991375, "epoch": 1.634446159261365, "grad_norm": 8.831682205200195, "learning_rate": 6.87762223527084e-06, "loss": 0.51156211, "memory(GiB)": 34.88, "step": 60365, "train_speed(iter/s)": 0.410359 }, { "acc": 0.90289469, "epoch": 1.6345815395445809, "grad_norm": 12.009833335876465, "learning_rate": 6.8771035987645306e-06, "loss": 0.51820865, "memory(GiB)": 34.88, "step": 60370, "train_speed(iter/s)": 0.410361 }, { "acc": 0.89616108, "epoch": 1.6347169198277962, "grad_norm": 4.370454788208008, "learning_rate": 6.876584938750074e-06, "loss": 0.58011618, "memory(GiB)": 34.88, "step": 60375, "train_speed(iter/s)": 0.410363 }, { "acc": 0.9118782, "epoch": 1.6348523001110118, "grad_norm": 21.883577346801758, "learning_rate": 6.876066255233965e-06, "loss": 0.49684811, "memory(GiB)": 34.88, "step": 60380, "train_speed(iter/s)": 0.410365 }, { "acc": 0.90193977, "epoch": 1.6349876803942274, "grad_norm": 9.709729194641113, "learning_rate": 6.875547548222705e-06, "loss": 0.44677334, "memory(GiB)": 34.88, "step": 60385, "train_speed(iter/s)": 0.410367 }, { "acc": 0.90699902, "epoch": 1.635123060677443, "grad_norm": 6.720669269561768, "learning_rate": 6.875028817722789e-06, "loss": 0.45690031, "memory(GiB)": 34.88, "step": 60390, "train_speed(iter/s)": 0.410369 }, { "acc": 0.92159567, "epoch": 1.6352584409606585, "grad_norm": 8.10458755493164, "learning_rate": 6.874510063740717e-06, "loss": 0.44195189, "memory(GiB)": 34.88, "step": 60395, "train_speed(iter/s)": 0.410371 }, { "acc": 0.91240635, "epoch": 1.635393821243874, "grad_norm": 4.981758117675781, "learning_rate": 6.873991286282986e-06, "loss": 0.38396759, "memory(GiB)": 34.88, "step": 60400, "train_speed(iter/s)": 0.410373 }, { "acc": 0.91807623, "epoch": 1.6355292015270897, "grad_norm": 19.583303451538086, "learning_rate": 6.8734724853560956e-06, "loss": 0.41069717, "memory(GiB)": 34.88, "step": 60405, "train_speed(iter/s)": 0.410375 }, { "acc": 0.90637083, "epoch": 1.635664581810305, "grad_norm": 11.902350425720215, "learning_rate": 6.8729536609665435e-06, "loss": 0.57844949, "memory(GiB)": 34.88, "step": 60410, "train_speed(iter/s)": 0.410376 }, { "acc": 0.89970694, "epoch": 1.6357999620935209, "grad_norm": 7.560004234313965, "learning_rate": 6.87243481312083e-06, "loss": 0.57550592, "memory(GiB)": 34.88, "step": 60415, "train_speed(iter/s)": 0.410378 }, { "acc": 0.90526457, "epoch": 1.6359353423767362, "grad_norm": 15.869623184204102, "learning_rate": 6.871915941825455e-06, "loss": 0.48798819, "memory(GiB)": 34.88, "step": 60420, "train_speed(iter/s)": 0.41038 }, { "acc": 0.91059189, "epoch": 1.6360707226599518, "grad_norm": 8.263236045837402, "learning_rate": 6.871397047086918e-06, "loss": 0.44857731, "memory(GiB)": 34.88, "step": 60425, "train_speed(iter/s)": 0.410382 }, { "acc": 0.9012331, "epoch": 1.6362061029431674, "grad_norm": 10.587532997131348, "learning_rate": 6.870878128911719e-06, "loss": 0.61274357, "memory(GiB)": 34.88, "step": 60430, "train_speed(iter/s)": 0.410384 }, { "acc": 0.89071703, "epoch": 1.636341483226383, "grad_norm": 12.809976577758789, "learning_rate": 6.870359187306361e-06, "loss": 0.56333251, "memory(GiB)": 34.88, "step": 60435, "train_speed(iter/s)": 0.410386 }, { "acc": 0.89536953, "epoch": 1.6364768635095985, "grad_norm": 13.071863174438477, "learning_rate": 6.86984022227734e-06, "loss": 0.56890726, "memory(GiB)": 34.88, "step": 60440, "train_speed(iter/s)": 0.410388 }, { "acc": 0.87950077, "epoch": 1.636612243792814, "grad_norm": 8.266682624816895, "learning_rate": 6.8693212338311586e-06, "loss": 0.74515538, "memory(GiB)": 34.88, "step": 60445, "train_speed(iter/s)": 0.41039 }, { "acc": 0.926511, "epoch": 1.6367476240760297, "grad_norm": 6.2789201736450195, "learning_rate": 6.868802221974321e-06, "loss": 0.33603311, "memory(GiB)": 34.88, "step": 60450, "train_speed(iter/s)": 0.410392 }, { "acc": 0.90949535, "epoch": 1.636883004359245, "grad_norm": 8.121908187866211, "learning_rate": 6.868283186713325e-06, "loss": 0.48663721, "memory(GiB)": 34.88, "step": 60455, "train_speed(iter/s)": 0.410394 }, { "acc": 0.90800447, "epoch": 1.6370183846424606, "grad_norm": 7.326038837432861, "learning_rate": 6.867764128054677e-06, "loss": 0.53650346, "memory(GiB)": 34.88, "step": 60460, "train_speed(iter/s)": 0.410396 }, { "acc": 0.89445868, "epoch": 1.6371537649256762, "grad_norm": 24.21152687072754, "learning_rate": 6.867245046004873e-06, "loss": 0.60271006, "memory(GiB)": 34.88, "step": 60465, "train_speed(iter/s)": 0.410397 }, { "acc": 0.91005316, "epoch": 1.6372891452088918, "grad_norm": 4.884410858154297, "learning_rate": 6.866725940570424e-06, "loss": 0.52714272, "memory(GiB)": 34.88, "step": 60470, "train_speed(iter/s)": 0.410399 }, { "acc": 0.89404411, "epoch": 1.6374245254921074, "grad_norm": 13.497784614562988, "learning_rate": 6.866206811757824e-06, "loss": 0.58837433, "memory(GiB)": 34.88, "step": 60475, "train_speed(iter/s)": 0.410401 }, { "acc": 0.8946579, "epoch": 1.6375599057753227, "grad_norm": 15.208377838134766, "learning_rate": 6.865687659573582e-06, "loss": 0.58099928, "memory(GiB)": 34.88, "step": 60480, "train_speed(iter/s)": 0.410403 }, { "acc": 0.88830214, "epoch": 1.6376952860585385, "grad_norm": 6.1859025955200195, "learning_rate": 6.865168484024198e-06, "loss": 0.59521065, "memory(GiB)": 34.88, "step": 60485, "train_speed(iter/s)": 0.410405 }, { "acc": 0.89935226, "epoch": 1.637830666341754, "grad_norm": 7.4452643394470215, "learning_rate": 6.8646492851161785e-06, "loss": 0.5412003, "memory(GiB)": 34.88, "step": 60490, "train_speed(iter/s)": 0.410407 }, { "acc": 0.90153055, "epoch": 1.6379660466249697, "grad_norm": 15.531543731689453, "learning_rate": 6.864130062856025e-06, "loss": 0.56445727, "memory(GiB)": 34.88, "step": 60495, "train_speed(iter/s)": 0.410409 }, { "acc": 0.90661325, "epoch": 1.638101426908185, "grad_norm": 6.561826229095459, "learning_rate": 6.863610817250244e-06, "loss": 0.44605436, "memory(GiB)": 34.88, "step": 60500, "train_speed(iter/s)": 0.410411 }, { "acc": 0.90220566, "epoch": 1.6382368071914006, "grad_norm": 6.724559783935547, "learning_rate": 6.863091548305338e-06, "loss": 0.55437393, "memory(GiB)": 34.88, "step": 60505, "train_speed(iter/s)": 0.410412 }, { "acc": 0.92546291, "epoch": 1.6383721874746162, "grad_norm": 4.489383697509766, "learning_rate": 6.862572256027815e-06, "loss": 0.34130599, "memory(GiB)": 34.88, "step": 60510, "train_speed(iter/s)": 0.410414 }, { "acc": 0.905966, "epoch": 1.6385075677578318, "grad_norm": 14.48523235321045, "learning_rate": 6.862052940424176e-06, "loss": 0.44746962, "memory(GiB)": 34.88, "step": 60515, "train_speed(iter/s)": 0.410416 }, { "acc": 0.89220829, "epoch": 1.6386429480410474, "grad_norm": 4.113193988800049, "learning_rate": 6.861533601500929e-06, "loss": 0.50001435, "memory(GiB)": 34.88, "step": 60520, "train_speed(iter/s)": 0.410418 }, { "acc": 0.8820672, "epoch": 1.6387783283242627, "grad_norm": 15.113134384155273, "learning_rate": 6.86101423926458e-06, "loss": 0.71254649, "memory(GiB)": 34.88, "step": 60525, "train_speed(iter/s)": 0.41042 }, { "acc": 0.91134377, "epoch": 1.6389137086074785, "grad_norm": 6.953556060791016, "learning_rate": 6.860494853721633e-06, "loss": 0.49881639, "memory(GiB)": 34.88, "step": 60530, "train_speed(iter/s)": 0.410421 }, { "acc": 0.91426182, "epoch": 1.639049088890694, "grad_norm": 8.281476974487305, "learning_rate": 6.859975444878596e-06, "loss": 0.4594337, "memory(GiB)": 34.88, "step": 60535, "train_speed(iter/s)": 0.410423 }, { "acc": 0.90778599, "epoch": 1.6391844691739095, "grad_norm": 6.082209587097168, "learning_rate": 6.859456012741975e-06, "loss": 0.45946474, "memory(GiB)": 34.88, "step": 60540, "train_speed(iter/s)": 0.410425 }, { "acc": 0.88600464, "epoch": 1.639319849457125, "grad_norm": 22.6986083984375, "learning_rate": 6.858936557318276e-06, "loss": 0.64189262, "memory(GiB)": 34.88, "step": 60545, "train_speed(iter/s)": 0.410427 }, { "acc": 0.90042181, "epoch": 1.6394552297403406, "grad_norm": 16.260902404785156, "learning_rate": 6.858417078614009e-06, "loss": 0.49163465, "memory(GiB)": 34.88, "step": 60550, "train_speed(iter/s)": 0.410429 }, { "acc": 0.90407505, "epoch": 1.6395906100235562, "grad_norm": 6.992373943328857, "learning_rate": 6.85789757663568e-06, "loss": 0.45296559, "memory(GiB)": 34.88, "step": 60555, "train_speed(iter/s)": 0.410431 }, { "acc": 0.91365118, "epoch": 1.6397259903067716, "grad_norm": 14.122987747192383, "learning_rate": 6.857378051389797e-06, "loss": 0.43334765, "memory(GiB)": 34.88, "step": 60560, "train_speed(iter/s)": 0.410432 }, { "acc": 0.88594828, "epoch": 1.6398613705899874, "grad_norm": 7.726941108703613, "learning_rate": 6.856858502882866e-06, "loss": 0.61920338, "memory(GiB)": 34.88, "step": 60565, "train_speed(iter/s)": 0.410434 }, { "acc": 0.89594946, "epoch": 1.6399967508732027, "grad_norm": 5.943454265594482, "learning_rate": 6.8563389311213965e-06, "loss": 0.58324423, "memory(GiB)": 34.88, "step": 60570, "train_speed(iter/s)": 0.410436 }, { "acc": 0.91420221, "epoch": 1.6401321311564185, "grad_norm": 5.138392925262451, "learning_rate": 6.855819336111899e-06, "loss": 0.47460852, "memory(GiB)": 34.88, "step": 60575, "train_speed(iter/s)": 0.410438 }, { "acc": 0.90581369, "epoch": 1.640267511439634, "grad_norm": 9.02076530456543, "learning_rate": 6.85529971786088e-06, "loss": 0.48196635, "memory(GiB)": 34.88, "step": 60580, "train_speed(iter/s)": 0.41044 }, { "acc": 0.89383049, "epoch": 1.6404028917228495, "grad_norm": 13.897178649902344, "learning_rate": 6.854780076374852e-06, "loss": 0.56960554, "memory(GiB)": 34.88, "step": 60585, "train_speed(iter/s)": 0.410442 }, { "acc": 0.89672241, "epoch": 1.640538272006065, "grad_norm": 11.708484649658203, "learning_rate": 6.854260411660322e-06, "loss": 0.58632183, "memory(GiB)": 34.88, "step": 60590, "train_speed(iter/s)": 0.410443 }, { "acc": 0.90135078, "epoch": 1.6406736522892806, "grad_norm": 9.146005630493164, "learning_rate": 6.8537407237237995e-06, "loss": 0.50115256, "memory(GiB)": 34.88, "step": 60595, "train_speed(iter/s)": 0.410445 }, { "acc": 0.91253338, "epoch": 1.6408090325724962, "grad_norm": 17.6271915435791, "learning_rate": 6.853221012571796e-06, "loss": 0.41993222, "memory(GiB)": 34.88, "step": 60600, "train_speed(iter/s)": 0.410446 }, { "acc": 0.90860653, "epoch": 1.6409444128557116, "grad_norm": 12.267622947692871, "learning_rate": 6.852701278210819e-06, "loss": 0.57396469, "memory(GiB)": 34.88, "step": 60605, "train_speed(iter/s)": 0.410448 }, { "acc": 0.90571098, "epoch": 1.6410797931389274, "grad_norm": 7.498353004455566, "learning_rate": 6.852181520647384e-06, "loss": 0.54850903, "memory(GiB)": 34.88, "step": 60610, "train_speed(iter/s)": 0.41045 }, { "acc": 0.90724239, "epoch": 1.6412151734221427, "grad_norm": 16.567445755004883, "learning_rate": 6.851661739887998e-06, "loss": 0.49264355, "memory(GiB)": 34.88, "step": 60615, "train_speed(iter/s)": 0.410452 }, { "acc": 0.90911818, "epoch": 1.6413505537053583, "grad_norm": 5.671987533569336, "learning_rate": 6.851141935939173e-06, "loss": 0.51981363, "memory(GiB)": 34.88, "step": 60620, "train_speed(iter/s)": 0.410454 }, { "acc": 0.88910999, "epoch": 1.641485933988574, "grad_norm": 9.29094409942627, "learning_rate": 6.850622108807423e-06, "loss": 0.58226857, "memory(GiB)": 34.88, "step": 60625, "train_speed(iter/s)": 0.410455 }, { "acc": 0.88336878, "epoch": 1.6416213142717895, "grad_norm": 19.431612014770508, "learning_rate": 6.850102258499258e-06, "loss": 0.70337505, "memory(GiB)": 34.88, "step": 60630, "train_speed(iter/s)": 0.410457 }, { "acc": 0.92591925, "epoch": 1.641756694555005, "grad_norm": 6.7819318771362305, "learning_rate": 6.849582385021188e-06, "loss": 0.41308951, "memory(GiB)": 34.88, "step": 60635, "train_speed(iter/s)": 0.410459 }, { "acc": 0.89929695, "epoch": 1.6418920748382204, "grad_norm": 9.35933780670166, "learning_rate": 6.84906248837973e-06, "loss": 0.63062782, "memory(GiB)": 34.88, "step": 60640, "train_speed(iter/s)": 0.410461 }, { "acc": 0.90872383, "epoch": 1.6420274551214362, "grad_norm": 3.8613622188568115, "learning_rate": 6.848542568581393e-06, "loss": 0.41613522, "memory(GiB)": 34.88, "step": 60645, "train_speed(iter/s)": 0.410463 }, { "acc": 0.90207481, "epoch": 1.6421628354046516, "grad_norm": 21.705326080322266, "learning_rate": 6.848022625632692e-06, "loss": 0.55397391, "memory(GiB)": 34.88, "step": 60650, "train_speed(iter/s)": 0.410465 }, { "acc": 0.91314449, "epoch": 1.6422982156878674, "grad_norm": 5.156265735626221, "learning_rate": 6.847502659540141e-06, "loss": 0.41128836, "memory(GiB)": 34.88, "step": 60655, "train_speed(iter/s)": 0.410467 }, { "acc": 0.89683933, "epoch": 1.6424335959710827, "grad_norm": 13.950739860534668, "learning_rate": 6.84698267031025e-06, "loss": 0.56482902, "memory(GiB)": 34.88, "step": 60660, "train_speed(iter/s)": 0.410469 }, { "acc": 0.90513878, "epoch": 1.6425689762542983, "grad_norm": 21.176136016845703, "learning_rate": 6.8464626579495375e-06, "loss": 0.50412354, "memory(GiB)": 34.88, "step": 60665, "train_speed(iter/s)": 0.410471 }, { "acc": 0.9097106, "epoch": 1.642704356537514, "grad_norm": 7.987428188323975, "learning_rate": 6.845942622464514e-06, "loss": 0.48091927, "memory(GiB)": 34.88, "step": 60670, "train_speed(iter/s)": 0.410473 }, { "acc": 0.88899422, "epoch": 1.6428397368207295, "grad_norm": 15.662886619567871, "learning_rate": 6.8454225638616954e-06, "loss": 0.54397449, "memory(GiB)": 34.88, "step": 60675, "train_speed(iter/s)": 0.410475 }, { "acc": 0.90194073, "epoch": 1.642975117103945, "grad_norm": 13.490462303161621, "learning_rate": 6.844902482147597e-06, "loss": 0.55155869, "memory(GiB)": 34.88, "step": 60680, "train_speed(iter/s)": 0.410476 }, { "acc": 0.90346384, "epoch": 1.6431104973871604, "grad_norm": 6.671805381774902, "learning_rate": 6.8443823773287335e-06, "loss": 0.51531553, "memory(GiB)": 34.88, "step": 60685, "train_speed(iter/s)": 0.410478 }, { "acc": 0.8980072, "epoch": 1.6432458776703762, "grad_norm": 8.574164390563965, "learning_rate": 6.8438622494116195e-06, "loss": 0.55698175, "memory(GiB)": 34.88, "step": 60690, "train_speed(iter/s)": 0.41048 }, { "acc": 0.90070686, "epoch": 1.6433812579535916, "grad_norm": 8.854048728942871, "learning_rate": 6.843342098402771e-06, "loss": 0.4641263, "memory(GiB)": 34.88, "step": 60695, "train_speed(iter/s)": 0.410482 }, { "acc": 0.91719961, "epoch": 1.6435166382368072, "grad_norm": 9.01219654083252, "learning_rate": 6.842821924308705e-06, "loss": 0.46002741, "memory(GiB)": 34.88, "step": 60700, "train_speed(iter/s)": 0.410484 }, { "acc": 0.90563984, "epoch": 1.6436520185200227, "grad_norm": 7.231470108032227, "learning_rate": 6.842301727135934e-06, "loss": 0.49955406, "memory(GiB)": 34.88, "step": 60705, "train_speed(iter/s)": 0.410486 }, { "acc": 0.91797829, "epoch": 1.6437873988032383, "grad_norm": 10.145276069641113, "learning_rate": 6.8417815068909795e-06, "loss": 0.37602575, "memory(GiB)": 34.88, "step": 60710, "train_speed(iter/s)": 0.410488 }, { "acc": 0.88958092, "epoch": 1.643922779086454, "grad_norm": 9.467900276184082, "learning_rate": 6.8412612635803554e-06, "loss": 0.61737309, "memory(GiB)": 34.88, "step": 60715, "train_speed(iter/s)": 0.410489 }, { "acc": 0.8979579, "epoch": 1.6440581593696693, "grad_norm": 9.051855087280273, "learning_rate": 6.84074099721058e-06, "loss": 0.57277241, "memory(GiB)": 34.88, "step": 60720, "train_speed(iter/s)": 0.410491 }, { "acc": 0.91088905, "epoch": 1.644193539652885, "grad_norm": 8.78229808807373, "learning_rate": 6.84022070778817e-06, "loss": 0.42566729, "memory(GiB)": 34.88, "step": 60725, "train_speed(iter/s)": 0.410493 }, { "acc": 0.91003628, "epoch": 1.6443289199361004, "grad_norm": 17.193248748779297, "learning_rate": 6.839700395319642e-06, "loss": 0.46557398, "memory(GiB)": 34.88, "step": 60730, "train_speed(iter/s)": 0.410495 }, { "acc": 0.91910152, "epoch": 1.6444643002193162, "grad_norm": 4.091202735900879, "learning_rate": 6.839180059811516e-06, "loss": 0.44746475, "memory(GiB)": 34.88, "step": 60735, "train_speed(iter/s)": 0.410497 }, { "acc": 0.89611425, "epoch": 1.6445996805025316, "grad_norm": 18.709280014038086, "learning_rate": 6.838659701270309e-06, "loss": 0.61801949, "memory(GiB)": 34.88, "step": 60740, "train_speed(iter/s)": 0.410499 }, { "acc": 0.88494425, "epoch": 1.6447350607857472, "grad_norm": 8.768576622009277, "learning_rate": 6.83813931970254e-06, "loss": 0.68871937, "memory(GiB)": 34.88, "step": 60745, "train_speed(iter/s)": 0.410501 }, { "acc": 0.91418705, "epoch": 1.6448704410689627, "grad_norm": 8.49372386932373, "learning_rate": 6.837618915114727e-06, "loss": 0.4143012, "memory(GiB)": 34.88, "step": 60750, "train_speed(iter/s)": 0.410502 }, { "acc": 0.89153633, "epoch": 1.6450058213521783, "grad_norm": 4.219911098480225, "learning_rate": 6.8370984875133895e-06, "loss": 0.55996914, "memory(GiB)": 34.88, "step": 60755, "train_speed(iter/s)": 0.410504 }, { "acc": 0.88103819, "epoch": 1.645141201635394, "grad_norm": 14.481912612915039, "learning_rate": 6.836578036905048e-06, "loss": 0.69302607, "memory(GiB)": 34.88, "step": 60760, "train_speed(iter/s)": 0.410506 }, { "acc": 0.90093746, "epoch": 1.6452765819186093, "grad_norm": 8.361845970153809, "learning_rate": 6.83605756329622e-06, "loss": 0.46878929, "memory(GiB)": 34.88, "step": 60765, "train_speed(iter/s)": 0.410508 }, { "acc": 0.92037287, "epoch": 1.645411962201825, "grad_norm": 7.690628528594971, "learning_rate": 6.835537066693428e-06, "loss": 0.51589661, "memory(GiB)": 34.88, "step": 60770, "train_speed(iter/s)": 0.41051 }, { "acc": 0.90385714, "epoch": 1.6455473424850404, "grad_norm": 9.493302345275879, "learning_rate": 6.83501654710319e-06, "loss": 0.51197686, "memory(GiB)": 34.88, "step": 60775, "train_speed(iter/s)": 0.410512 }, { "acc": 0.91118078, "epoch": 1.645682722768256, "grad_norm": 7.5922627449035645, "learning_rate": 6.834496004532029e-06, "loss": 0.55988398, "memory(GiB)": 34.88, "step": 60780, "train_speed(iter/s)": 0.410514 }, { "acc": 0.90388041, "epoch": 1.6458181030514716, "grad_norm": 8.89531135559082, "learning_rate": 6.833975438986463e-06, "loss": 0.54869337, "memory(GiB)": 34.88, "step": 60785, "train_speed(iter/s)": 0.410516 }, { "acc": 0.89694405, "epoch": 1.6459534833346872, "grad_norm": 16.142431259155273, "learning_rate": 6.833454850473015e-06, "loss": 0.56911216, "memory(GiB)": 34.88, "step": 60790, "train_speed(iter/s)": 0.410517 }, { "acc": 0.91983261, "epoch": 1.6460888636179027, "grad_norm": 14.278633117675781, "learning_rate": 6.832934238998202e-06, "loss": 0.45467854, "memory(GiB)": 34.88, "step": 60795, "train_speed(iter/s)": 0.410519 }, { "acc": 0.91470928, "epoch": 1.646224243901118, "grad_norm": 13.054408073425293, "learning_rate": 6.832413604568553e-06, "loss": 0.43103752, "memory(GiB)": 34.88, "step": 60800, "train_speed(iter/s)": 0.410521 }, { "acc": 0.91711273, "epoch": 1.646359624184334, "grad_norm": 6.549034595489502, "learning_rate": 6.831892947190587e-06, "loss": 0.5029058, "memory(GiB)": 34.88, "step": 60805, "train_speed(iter/s)": 0.410523 }, { "acc": 0.91635361, "epoch": 1.6464950044675493, "grad_norm": 11.835489273071289, "learning_rate": 6.831372266870825e-06, "loss": 0.43653932, "memory(GiB)": 34.88, "step": 60810, "train_speed(iter/s)": 0.410525 }, { "acc": 0.9328619, "epoch": 1.646630384750765, "grad_norm": 7.361481666564941, "learning_rate": 6.83085156361579e-06, "loss": 0.42442579, "memory(GiB)": 34.88, "step": 60815, "train_speed(iter/s)": 0.410527 }, { "acc": 0.90143795, "epoch": 1.6467657650339804, "grad_norm": 12.250160217285156, "learning_rate": 6.830330837432004e-06, "loss": 0.58384361, "memory(GiB)": 34.88, "step": 60820, "train_speed(iter/s)": 0.410529 }, { "acc": 0.91196175, "epoch": 1.646901145317196, "grad_norm": 8.235419273376465, "learning_rate": 6.829810088325992e-06, "loss": 0.44518795, "memory(GiB)": 34.88, "step": 60825, "train_speed(iter/s)": 0.410531 }, { "acc": 0.90542517, "epoch": 1.6470365256004116, "grad_norm": 5.320618629455566, "learning_rate": 6.829289316304276e-06, "loss": 0.51150031, "memory(GiB)": 34.88, "step": 60830, "train_speed(iter/s)": 0.410533 }, { "acc": 0.88805971, "epoch": 1.6471719058836272, "grad_norm": 8.982945442199707, "learning_rate": 6.82876852137338e-06, "loss": 0.61452131, "memory(GiB)": 34.88, "step": 60835, "train_speed(iter/s)": 0.410535 }, { "acc": 0.91983166, "epoch": 1.6473072861668427, "grad_norm": 6.775248050689697, "learning_rate": 6.828247703539829e-06, "loss": 0.41283827, "memory(GiB)": 34.88, "step": 60840, "train_speed(iter/s)": 0.410537 }, { "acc": 0.91276598, "epoch": 1.647442666450058, "grad_norm": 9.765926361083984, "learning_rate": 6.827726862810148e-06, "loss": 0.4617568, "memory(GiB)": 34.88, "step": 60845, "train_speed(iter/s)": 0.410539 }, { "acc": 0.91775208, "epoch": 1.647578046733274, "grad_norm": 6.125330448150635, "learning_rate": 6.827205999190859e-06, "loss": 0.47328329, "memory(GiB)": 34.88, "step": 60850, "train_speed(iter/s)": 0.41054 }, { "acc": 0.90380402, "epoch": 1.6477134270164893, "grad_norm": 22.376461029052734, "learning_rate": 6.826685112688487e-06, "loss": 0.57509613, "memory(GiB)": 34.88, "step": 60855, "train_speed(iter/s)": 0.410542 }, { "acc": 0.8981986, "epoch": 1.6478488072997048, "grad_norm": 9.304166793823242, "learning_rate": 6.826164203309559e-06, "loss": 0.57805204, "memory(GiB)": 34.88, "step": 60860, "train_speed(iter/s)": 0.410544 }, { "acc": 0.90086994, "epoch": 1.6479841875829204, "grad_norm": 7.295652389526367, "learning_rate": 6.825643271060598e-06, "loss": 0.49095368, "memory(GiB)": 34.88, "step": 60865, "train_speed(iter/s)": 0.410546 }, { "acc": 0.88636456, "epoch": 1.648119567866136, "grad_norm": 17.146732330322266, "learning_rate": 6.825122315948132e-06, "loss": 0.66243839, "memory(GiB)": 34.88, "step": 60870, "train_speed(iter/s)": 0.410548 }, { "acc": 0.91135597, "epoch": 1.6482549481493516, "grad_norm": 7.411468505859375, "learning_rate": 6.824601337978687e-06, "loss": 0.50522556, "memory(GiB)": 34.88, "step": 60875, "train_speed(iter/s)": 0.41055 }, { "acc": 0.90130405, "epoch": 1.648390328432567, "grad_norm": 9.256326675415039, "learning_rate": 6.824080337158786e-06, "loss": 0.50122762, "memory(GiB)": 34.88, "step": 60880, "train_speed(iter/s)": 0.410552 }, { "acc": 0.90293694, "epoch": 1.6485257087157827, "grad_norm": 16.7636661529541, "learning_rate": 6.82355931349496e-06, "loss": 0.62210274, "memory(GiB)": 34.88, "step": 60885, "train_speed(iter/s)": 0.410553 }, { "acc": 0.9075181, "epoch": 1.648661088998998, "grad_norm": 8.115897178649902, "learning_rate": 6.823038266993733e-06, "loss": 0.51885948, "memory(GiB)": 34.88, "step": 60890, "train_speed(iter/s)": 0.410555 }, { "acc": 0.89826498, "epoch": 1.6487964692822137, "grad_norm": 7.258447170257568, "learning_rate": 6.822517197661634e-06, "loss": 0.53482757, "memory(GiB)": 34.88, "step": 60895, "train_speed(iter/s)": 0.410557 }, { "acc": 0.91692381, "epoch": 1.6489318495654293, "grad_norm": 5.185377597808838, "learning_rate": 6.821996105505189e-06, "loss": 0.47414837, "memory(GiB)": 34.88, "step": 60900, "train_speed(iter/s)": 0.410559 }, { "acc": 0.906884, "epoch": 1.6490672298486448, "grad_norm": 16.226627349853516, "learning_rate": 6.821474990530923e-06, "loss": 0.47406702, "memory(GiB)": 34.88, "step": 60905, "train_speed(iter/s)": 0.410561 }, { "acc": 0.90633698, "epoch": 1.6492026101318604, "grad_norm": 5.441330909729004, "learning_rate": 6.82095385274537e-06, "loss": 0.50988045, "memory(GiB)": 34.88, "step": 60910, "train_speed(iter/s)": 0.410563 }, { "acc": 0.9042408, "epoch": 1.6493379904150758, "grad_norm": 22.7995548248291, "learning_rate": 6.820432692155056e-06, "loss": 0.49594202, "memory(GiB)": 34.88, "step": 60915, "train_speed(iter/s)": 0.410565 }, { "acc": 0.91800642, "epoch": 1.6494733706982916, "grad_norm": 9.478994369506836, "learning_rate": 6.819911508766508e-06, "loss": 0.4676578, "memory(GiB)": 34.88, "step": 60920, "train_speed(iter/s)": 0.410566 }, { "acc": 0.9126358, "epoch": 1.649608750981507, "grad_norm": 4.1009979248046875, "learning_rate": 6.819390302586254e-06, "loss": 0.45762558, "memory(GiB)": 34.88, "step": 60925, "train_speed(iter/s)": 0.410568 }, { "acc": 0.91203899, "epoch": 1.6497441312647227, "grad_norm": 10.217589378356934, "learning_rate": 6.818869073620828e-06, "loss": 0.49126835, "memory(GiB)": 34.88, "step": 60930, "train_speed(iter/s)": 0.41057 }, { "acc": 0.90704632, "epoch": 1.649879511547938, "grad_norm": 8.921000480651855, "learning_rate": 6.818347821876754e-06, "loss": 0.56490741, "memory(GiB)": 34.88, "step": 60935, "train_speed(iter/s)": 0.410572 }, { "acc": 0.90230417, "epoch": 1.6500148918311537, "grad_norm": 8.270824432373047, "learning_rate": 6.817826547360564e-06, "loss": 0.55011244, "memory(GiB)": 34.88, "step": 60940, "train_speed(iter/s)": 0.410574 }, { "acc": 0.91268272, "epoch": 1.6501502721143693, "grad_norm": 6.947545051574707, "learning_rate": 6.817305250078789e-06, "loss": 0.481496, "memory(GiB)": 34.88, "step": 60945, "train_speed(iter/s)": 0.410576 }, { "acc": 0.91365242, "epoch": 1.6502856523975848, "grad_norm": 37.91145706176758, "learning_rate": 6.816783930037957e-06, "loss": 0.47315187, "memory(GiB)": 34.88, "step": 60950, "train_speed(iter/s)": 0.410578 }, { "acc": 0.9016655, "epoch": 1.6504210326808004, "grad_norm": 7.804558277130127, "learning_rate": 6.816262587244601e-06, "loss": 0.48376489, "memory(GiB)": 34.88, "step": 60955, "train_speed(iter/s)": 0.41058 }, { "acc": 0.90705633, "epoch": 1.6505564129640158, "grad_norm": 13.204763412475586, "learning_rate": 6.815741221705249e-06, "loss": 0.54969244, "memory(GiB)": 34.88, "step": 60960, "train_speed(iter/s)": 0.410582 }, { "acc": 0.93461571, "epoch": 1.6506917932472316, "grad_norm": 5.869895935058594, "learning_rate": 6.815219833426435e-06, "loss": 0.3473268, "memory(GiB)": 34.88, "step": 60965, "train_speed(iter/s)": 0.410583 }, { "acc": 0.91527424, "epoch": 1.650827173530447, "grad_norm": 9.055176734924316, "learning_rate": 6.814698422414689e-06, "loss": 0.48009148, "memory(GiB)": 34.88, "step": 60970, "train_speed(iter/s)": 0.410585 }, { "acc": 0.89462337, "epoch": 1.6509625538136625, "grad_norm": 8.59460735321045, "learning_rate": 6.814176988676545e-06, "loss": 0.65611534, "memory(GiB)": 34.88, "step": 60975, "train_speed(iter/s)": 0.410587 }, { "acc": 0.87615204, "epoch": 1.651097934096878, "grad_norm": 13.789579391479492, "learning_rate": 6.813655532218529e-06, "loss": 0.6898838, "memory(GiB)": 34.88, "step": 60980, "train_speed(iter/s)": 0.410589 }, { "acc": 0.88440266, "epoch": 1.6512333143800937, "grad_norm": 15.170166015625, "learning_rate": 6.81313405304718e-06, "loss": 0.59612803, "memory(GiB)": 34.88, "step": 60985, "train_speed(iter/s)": 0.410591 }, { "acc": 0.91897821, "epoch": 1.6513686946633093, "grad_norm": 6.744087219238281, "learning_rate": 6.812612551169026e-06, "loss": 0.42589002, "memory(GiB)": 34.88, "step": 60990, "train_speed(iter/s)": 0.410592 }, { "acc": 0.93173094, "epoch": 1.6515040749465246, "grad_norm": 12.260334014892578, "learning_rate": 6.812091026590601e-06, "loss": 0.29527271, "memory(GiB)": 34.88, "step": 60995, "train_speed(iter/s)": 0.410594 }, { "acc": 0.91315403, "epoch": 1.6516394552297404, "grad_norm": 10.17760944366455, "learning_rate": 6.81156947931844e-06, "loss": 0.53265848, "memory(GiB)": 34.88, "step": 61000, "train_speed(iter/s)": 0.410595 }, { "acc": 0.91358595, "epoch": 1.6517748355129558, "grad_norm": 4.213904857635498, "learning_rate": 6.811047909359076e-06, "loss": 0.5386867, "memory(GiB)": 34.88, "step": 61005, "train_speed(iter/s)": 0.410597 }, { "acc": 0.89346275, "epoch": 1.6519102157961716, "grad_norm": 9.4705810546875, "learning_rate": 6.810526316719041e-06, "loss": 0.59461646, "memory(GiB)": 34.88, "step": 61010, "train_speed(iter/s)": 0.410599 }, { "acc": 0.88533783, "epoch": 1.652045596079387, "grad_norm": 9.321205139160156, "learning_rate": 6.810004701404869e-06, "loss": 0.67527475, "memory(GiB)": 34.88, "step": 61015, "train_speed(iter/s)": 0.410601 }, { "acc": 0.90375271, "epoch": 1.6521809763626025, "grad_norm": 4.325841426849365, "learning_rate": 6.8094830634230945e-06, "loss": 0.49753456, "memory(GiB)": 34.88, "step": 61020, "train_speed(iter/s)": 0.410603 }, { "acc": 0.91458187, "epoch": 1.652316356645818, "grad_norm": 5.144272327423096, "learning_rate": 6.808961402780253e-06, "loss": 0.39594028, "memory(GiB)": 34.88, "step": 61025, "train_speed(iter/s)": 0.410605 }, { "acc": 0.90163746, "epoch": 1.6524517369290337, "grad_norm": 6.2840800285339355, "learning_rate": 6.808439719482879e-06, "loss": 0.5035697, "memory(GiB)": 34.88, "step": 61030, "train_speed(iter/s)": 0.410607 }, { "acc": 0.90135937, "epoch": 1.6525871172122493, "grad_norm": 10.50594711303711, "learning_rate": 6.8079180135375064e-06, "loss": 0.55686278, "memory(GiB)": 34.88, "step": 61035, "train_speed(iter/s)": 0.410608 }, { "acc": 0.9013382, "epoch": 1.6527224974954646, "grad_norm": 4.546528339385986, "learning_rate": 6.8073962849506716e-06, "loss": 0.52075524, "memory(GiB)": 34.88, "step": 61040, "train_speed(iter/s)": 0.41061 }, { "acc": 0.918291, "epoch": 1.6528578777786804, "grad_norm": 8.39249324798584, "learning_rate": 6.806874533728913e-06, "loss": 0.43797879, "memory(GiB)": 34.88, "step": 61045, "train_speed(iter/s)": 0.410612 }, { "acc": 0.90335045, "epoch": 1.6529932580618958, "grad_norm": 11.57749080657959, "learning_rate": 6.80635275987876e-06, "loss": 0.48589077, "memory(GiB)": 34.88, "step": 61050, "train_speed(iter/s)": 0.410614 }, { "acc": 0.89504776, "epoch": 1.6531286383451114, "grad_norm": 12.038860321044922, "learning_rate": 6.805830963406755e-06, "loss": 0.4890471, "memory(GiB)": 34.88, "step": 61055, "train_speed(iter/s)": 0.410616 }, { "acc": 0.88827667, "epoch": 1.653264018628327, "grad_norm": 9.420005798339844, "learning_rate": 6.805309144319433e-06, "loss": 0.58538103, "memory(GiB)": 34.88, "step": 61060, "train_speed(iter/s)": 0.410618 }, { "acc": 0.92788248, "epoch": 1.6533993989115425, "grad_norm": 5.0868916511535645, "learning_rate": 6.804787302623329e-06, "loss": 0.37640214, "memory(GiB)": 34.88, "step": 61065, "train_speed(iter/s)": 0.410619 }, { "acc": 0.87997189, "epoch": 1.653534779194758, "grad_norm": 26.776945114135742, "learning_rate": 6.804265438324981e-06, "loss": 0.84596558, "memory(GiB)": 34.88, "step": 61070, "train_speed(iter/s)": 0.410621 }, { "acc": 0.89935656, "epoch": 1.6536701594779735, "grad_norm": 10.183895111083984, "learning_rate": 6.803743551430927e-06, "loss": 0.64619579, "memory(GiB)": 34.88, "step": 61075, "train_speed(iter/s)": 0.410623 }, { "acc": 0.92849236, "epoch": 1.6538055397611893, "grad_norm": 6.2426676750183105, "learning_rate": 6.803221641947704e-06, "loss": 0.41309648, "memory(GiB)": 34.88, "step": 61080, "train_speed(iter/s)": 0.410625 }, { "acc": 0.90319366, "epoch": 1.6539409200444046, "grad_norm": 10.74247932434082, "learning_rate": 6.802699709881848e-06, "loss": 0.56509371, "memory(GiB)": 34.88, "step": 61085, "train_speed(iter/s)": 0.410626 }, { "acc": 0.91677856, "epoch": 1.6540763003276204, "grad_norm": 18.21269416809082, "learning_rate": 6.802177755239902e-06, "loss": 0.48657103, "memory(GiB)": 34.88, "step": 61090, "train_speed(iter/s)": 0.410628 }, { "acc": 0.90676975, "epoch": 1.6542116806108358, "grad_norm": 7.426088809967041, "learning_rate": 6.8016557780284e-06, "loss": 0.45402069, "memory(GiB)": 34.88, "step": 61095, "train_speed(iter/s)": 0.41063 }, { "acc": 0.91879854, "epoch": 1.6543470608940514, "grad_norm": 25.958040237426758, "learning_rate": 6.801133778253884e-06, "loss": 0.45496464, "memory(GiB)": 34.88, "step": 61100, "train_speed(iter/s)": 0.410632 }, { "acc": 0.92672968, "epoch": 1.654482441177267, "grad_norm": 10.47899341583252, "learning_rate": 6.8006117559228925e-06, "loss": 0.38172612, "memory(GiB)": 34.88, "step": 61105, "train_speed(iter/s)": 0.410634 }, { "acc": 0.90405235, "epoch": 1.6546178214604825, "grad_norm": 9.280464172363281, "learning_rate": 6.800089711041962e-06, "loss": 0.47718573, "memory(GiB)": 34.88, "step": 61110, "train_speed(iter/s)": 0.410636 }, { "acc": 0.89885712, "epoch": 1.654753201743698, "grad_norm": 11.951006889343262, "learning_rate": 6.799567643617635e-06, "loss": 0.58851509, "memory(GiB)": 34.88, "step": 61115, "train_speed(iter/s)": 0.410637 }, { "acc": 0.91623316, "epoch": 1.6548885820269135, "grad_norm": 8.43106460571289, "learning_rate": 6.799045553656451e-06, "loss": 0.4506875, "memory(GiB)": 34.88, "step": 61120, "train_speed(iter/s)": 0.410639 }, { "acc": 0.92208576, "epoch": 1.6550239623101293, "grad_norm": 7.0086445808410645, "learning_rate": 6.7985234411649495e-06, "loss": 0.470929, "memory(GiB)": 34.88, "step": 61125, "train_speed(iter/s)": 0.410641 }, { "acc": 0.90897655, "epoch": 1.6551593425933446, "grad_norm": 12.21480941772461, "learning_rate": 6.798001306149671e-06, "loss": 0.48129787, "memory(GiB)": 34.88, "step": 61130, "train_speed(iter/s)": 0.410643 }, { "acc": 0.90753078, "epoch": 1.6552947228765602, "grad_norm": 6.265902996063232, "learning_rate": 6.797479148617156e-06, "loss": 0.50108733, "memory(GiB)": 34.88, "step": 61135, "train_speed(iter/s)": 0.410645 }, { "acc": 0.89648457, "epoch": 1.6554301031597758, "grad_norm": 13.882926940917969, "learning_rate": 6.796956968573948e-06, "loss": 0.56923389, "memory(GiB)": 34.88, "step": 61140, "train_speed(iter/s)": 0.410647 }, { "acc": 0.91719007, "epoch": 1.6555654834429914, "grad_norm": 6.921004772186279, "learning_rate": 6.796434766026585e-06, "loss": 0.45008745, "memory(GiB)": 34.88, "step": 61145, "train_speed(iter/s)": 0.410648 }, { "acc": 0.90229244, "epoch": 1.655700863726207, "grad_norm": 7.777665138244629, "learning_rate": 6.795912540981609e-06, "loss": 0.56740508, "memory(GiB)": 34.88, "step": 61150, "train_speed(iter/s)": 0.41065 }, { "acc": 0.92156239, "epoch": 1.6558362440094223, "grad_norm": 6.962104797363281, "learning_rate": 6.795390293445565e-06, "loss": 0.4454741, "memory(GiB)": 34.88, "step": 61155, "train_speed(iter/s)": 0.410652 }, { "acc": 0.9181983, "epoch": 1.6559716242926381, "grad_norm": 4.67216157913208, "learning_rate": 6.794868023424993e-06, "loss": 0.35761395, "memory(GiB)": 34.88, "step": 61160, "train_speed(iter/s)": 0.410654 }, { "acc": 0.89655972, "epoch": 1.6561070045758535, "grad_norm": 5.721891403198242, "learning_rate": 6.794345730926434e-06, "loss": 0.51551113, "memory(GiB)": 34.88, "step": 61165, "train_speed(iter/s)": 0.410656 }, { "acc": 0.93455372, "epoch": 1.6562423848590693, "grad_norm": 9.75677490234375, "learning_rate": 6.793823415956433e-06, "loss": 0.34591963, "memory(GiB)": 34.88, "step": 61170, "train_speed(iter/s)": 0.410657 }, { "acc": 0.90292034, "epoch": 1.6563777651422846, "grad_norm": 7.733463287353516, "learning_rate": 6.793301078521532e-06, "loss": 0.61307201, "memory(GiB)": 34.88, "step": 61175, "train_speed(iter/s)": 0.410659 }, { "acc": 0.90646935, "epoch": 1.6565131454255002, "grad_norm": 22.775150299072266, "learning_rate": 6.792778718628275e-06, "loss": 0.50887213, "memory(GiB)": 34.88, "step": 61180, "train_speed(iter/s)": 0.410661 }, { "acc": 0.91957226, "epoch": 1.6566485257087158, "grad_norm": 9.163036346435547, "learning_rate": 6.792256336283207e-06, "loss": 0.4091176, "memory(GiB)": 34.88, "step": 61185, "train_speed(iter/s)": 0.410663 }, { "acc": 0.9066102, "epoch": 1.6567839059919314, "grad_norm": 15.584436416625977, "learning_rate": 6.791733931492867e-06, "loss": 0.57080498, "memory(GiB)": 34.88, "step": 61190, "train_speed(iter/s)": 0.410665 }, { "acc": 0.90980492, "epoch": 1.656919286275147, "grad_norm": 12.805706977844238, "learning_rate": 6.791211504263806e-06, "loss": 0.42191381, "memory(GiB)": 34.88, "step": 61195, "train_speed(iter/s)": 0.410667 }, { "acc": 0.89810238, "epoch": 1.6570546665583623, "grad_norm": 7.012566089630127, "learning_rate": 6.790689054602562e-06, "loss": 0.51955557, "memory(GiB)": 34.88, "step": 61200, "train_speed(iter/s)": 0.410668 }, { "acc": 0.92121181, "epoch": 1.6571900468415781, "grad_norm": 6.3244218826293945, "learning_rate": 6.790166582515683e-06, "loss": 0.37989831, "memory(GiB)": 34.88, "step": 61205, "train_speed(iter/s)": 0.41067 }, { "acc": 0.88194199, "epoch": 1.6573254271247935, "grad_norm": 10.419766426086426, "learning_rate": 6.789644088009714e-06, "loss": 0.68076487, "memory(GiB)": 34.88, "step": 61210, "train_speed(iter/s)": 0.410672 }, { "acc": 0.89302731, "epoch": 1.657460807408009, "grad_norm": 8.537718772888184, "learning_rate": 6.789121571091196e-06, "loss": 0.59152145, "memory(GiB)": 34.88, "step": 61215, "train_speed(iter/s)": 0.410674 }, { "acc": 0.9081459, "epoch": 1.6575961876912246, "grad_norm": 9.712528228759766, "learning_rate": 6.788599031766683e-06, "loss": 0.47132015, "memory(GiB)": 34.88, "step": 61220, "train_speed(iter/s)": 0.410676 }, { "acc": 0.90451031, "epoch": 1.6577315679744402, "grad_norm": 8.999637603759766, "learning_rate": 6.788076470042715e-06, "loss": 0.57765999, "memory(GiB)": 34.88, "step": 61225, "train_speed(iter/s)": 0.410677 }, { "acc": 0.91372509, "epoch": 1.6578669482576558, "grad_norm": 21.872156143188477, "learning_rate": 6.787553885925839e-06, "loss": 0.50718689, "memory(GiB)": 34.88, "step": 61230, "train_speed(iter/s)": 0.410679 }, { "acc": 0.90674648, "epoch": 1.6580023285408712, "grad_norm": 6.787623882293701, "learning_rate": 6.787031279422599e-06, "loss": 0.42629242, "memory(GiB)": 34.88, "step": 61235, "train_speed(iter/s)": 0.410681 }, { "acc": 0.90905476, "epoch": 1.658137708824087, "grad_norm": 7.834770202636719, "learning_rate": 6.786508650539546e-06, "loss": 0.46572733, "memory(GiB)": 34.88, "step": 61240, "train_speed(iter/s)": 0.410683 }, { "acc": 0.89949417, "epoch": 1.6582730891073023, "grad_norm": 5.794246196746826, "learning_rate": 6.785985999283225e-06, "loss": 0.51600218, "memory(GiB)": 34.88, "step": 61245, "train_speed(iter/s)": 0.410685 }, { "acc": 0.90093994, "epoch": 1.6584084693905181, "grad_norm": 5.323230266571045, "learning_rate": 6.785463325660183e-06, "loss": 0.48061266, "memory(GiB)": 34.88, "step": 61250, "train_speed(iter/s)": 0.410687 }, { "acc": 0.89901867, "epoch": 1.6585438496737335, "grad_norm": 7.924891948699951, "learning_rate": 6.784940629676969e-06, "loss": 0.5854785, "memory(GiB)": 34.88, "step": 61255, "train_speed(iter/s)": 0.410689 }, { "acc": 0.91057997, "epoch": 1.658679229956949, "grad_norm": 5.99644660949707, "learning_rate": 6.784417911340129e-06, "loss": 0.53352785, "memory(GiB)": 34.88, "step": 61260, "train_speed(iter/s)": 0.410691 }, { "acc": 0.88340101, "epoch": 1.6588146102401646, "grad_norm": 9.830948829650879, "learning_rate": 6.783895170656212e-06, "loss": 0.60207109, "memory(GiB)": 34.88, "step": 61265, "train_speed(iter/s)": 0.410692 }, { "acc": 0.90390129, "epoch": 1.6589499905233802, "grad_norm": 17.182249069213867, "learning_rate": 6.783372407631767e-06, "loss": 0.50061674, "memory(GiB)": 34.88, "step": 61270, "train_speed(iter/s)": 0.410694 }, { "acc": 0.91193905, "epoch": 1.6590853708065958, "grad_norm": 36.95476150512695, "learning_rate": 6.7828496222733395e-06, "loss": 0.45977411, "memory(GiB)": 34.88, "step": 61275, "train_speed(iter/s)": 0.410695 }, { "acc": 0.90546894, "epoch": 1.6592207510898112, "grad_norm": 4.089441299438477, "learning_rate": 6.782326814587481e-06, "loss": 0.49779768, "memory(GiB)": 34.88, "step": 61280, "train_speed(iter/s)": 0.410697 }, { "acc": 0.8850668, "epoch": 1.659356131373027, "grad_norm": 38.36265182495117, "learning_rate": 6.781803984580741e-06, "loss": 0.58322783, "memory(GiB)": 34.88, "step": 61285, "train_speed(iter/s)": 0.410699 }, { "acc": 0.89579487, "epoch": 1.6594915116562423, "grad_norm": 9.907413482666016, "learning_rate": 6.781281132259669e-06, "loss": 0.66289477, "memory(GiB)": 34.88, "step": 61290, "train_speed(iter/s)": 0.410701 }, { "acc": 0.89567165, "epoch": 1.659626891939458, "grad_norm": 8.610319137573242, "learning_rate": 6.780758257630814e-06, "loss": 0.60849361, "memory(GiB)": 34.88, "step": 61295, "train_speed(iter/s)": 0.410702 }, { "acc": 0.91428165, "epoch": 1.6597622722226735, "grad_norm": 10.787909507751465, "learning_rate": 6.780235360700725e-06, "loss": 0.49931879, "memory(GiB)": 34.88, "step": 61300, "train_speed(iter/s)": 0.410704 }, { "acc": 0.91600904, "epoch": 1.659897652505889, "grad_norm": 6.905974388122559, "learning_rate": 6.779712441475954e-06, "loss": 0.4874033, "memory(GiB)": 34.88, "step": 61305, "train_speed(iter/s)": 0.410706 }, { "acc": 0.92945499, "epoch": 1.6600330327891046, "grad_norm": 4.815916538238525, "learning_rate": 6.779189499963054e-06, "loss": 0.30167928, "memory(GiB)": 34.88, "step": 61310, "train_speed(iter/s)": 0.410708 }, { "acc": 0.91568241, "epoch": 1.66016841307232, "grad_norm": 22.771255493164062, "learning_rate": 6.778666536168569e-06, "loss": 0.43394489, "memory(GiB)": 34.88, "step": 61315, "train_speed(iter/s)": 0.410709 }, { "acc": 0.91456776, "epoch": 1.6603037933555358, "grad_norm": 5.9903130531311035, "learning_rate": 6.778143550099055e-06, "loss": 0.45982418, "memory(GiB)": 34.88, "step": 61320, "train_speed(iter/s)": 0.410711 }, { "acc": 0.90000496, "epoch": 1.6604391736387512, "grad_norm": 7.492209434509277, "learning_rate": 6.777620541761063e-06, "loss": 0.52651658, "memory(GiB)": 34.88, "step": 61325, "train_speed(iter/s)": 0.410712 }, { "acc": 0.91887531, "epoch": 1.660574553921967, "grad_norm": 6.75922155380249, "learning_rate": 6.777097511161143e-06, "loss": 0.46058192, "memory(GiB)": 34.88, "step": 61330, "train_speed(iter/s)": 0.410714 }, { "acc": 0.93369179, "epoch": 1.6607099342051823, "grad_norm": 6.69456148147583, "learning_rate": 6.776574458305849e-06, "loss": 0.40413518, "memory(GiB)": 34.88, "step": 61335, "train_speed(iter/s)": 0.410716 }, { "acc": 0.89530907, "epoch": 1.660845314488398, "grad_norm": 7.518722057342529, "learning_rate": 6.776051383201732e-06, "loss": 0.55263224, "memory(GiB)": 34.88, "step": 61340, "train_speed(iter/s)": 0.410717 }, { "acc": 0.89627476, "epoch": 1.6609806947716135, "grad_norm": 16.35428237915039, "learning_rate": 6.775528285855346e-06, "loss": 0.63077574, "memory(GiB)": 34.88, "step": 61345, "train_speed(iter/s)": 0.410719 }, { "acc": 0.89629364, "epoch": 1.661116075054829, "grad_norm": 17.554574966430664, "learning_rate": 6.775005166273243e-06, "loss": 0.53992434, "memory(GiB)": 34.88, "step": 61350, "train_speed(iter/s)": 0.410721 }, { "acc": 0.90894928, "epoch": 1.6612514553380446, "grad_norm": 7.347192287445068, "learning_rate": 6.7744820244619744e-06, "loss": 0.56648116, "memory(GiB)": 34.88, "step": 61355, "train_speed(iter/s)": 0.410722 }, { "acc": 0.90266619, "epoch": 1.66138683562126, "grad_norm": 7.276185512542725, "learning_rate": 6.773958860428097e-06, "loss": 0.45903502, "memory(GiB)": 34.88, "step": 61360, "train_speed(iter/s)": 0.410723 }, { "acc": 0.92096605, "epoch": 1.6615222159044758, "grad_norm": 10.927440643310547, "learning_rate": 6.77343567417816e-06, "loss": 0.42860527, "memory(GiB)": 34.88, "step": 61365, "train_speed(iter/s)": 0.410725 }, { "acc": 0.90151348, "epoch": 1.6616575961876912, "grad_norm": 12.321293830871582, "learning_rate": 6.772912465718721e-06, "loss": 0.56748581, "memory(GiB)": 34.88, "step": 61370, "train_speed(iter/s)": 0.410727 }, { "acc": 0.90647717, "epoch": 1.6617929764709067, "grad_norm": 9.507311820983887, "learning_rate": 6.772389235056333e-06, "loss": 0.55948257, "memory(GiB)": 34.88, "step": 61375, "train_speed(iter/s)": 0.410729 }, { "acc": 0.9141264, "epoch": 1.6619283567541223, "grad_norm": 7.877858638763428, "learning_rate": 6.771865982197552e-06, "loss": 0.45842361, "memory(GiB)": 34.88, "step": 61380, "train_speed(iter/s)": 0.410731 }, { "acc": 0.92229347, "epoch": 1.662063737037338, "grad_norm": 5.687107563018799, "learning_rate": 6.7713427071489295e-06, "loss": 0.40774708, "memory(GiB)": 34.88, "step": 61385, "train_speed(iter/s)": 0.410732 }, { "acc": 0.90313435, "epoch": 1.6621991173205535, "grad_norm": 11.5697021484375, "learning_rate": 6.770819409917023e-06, "loss": 0.65263062, "memory(GiB)": 34.88, "step": 61390, "train_speed(iter/s)": 0.410734 }, { "acc": 0.90279512, "epoch": 1.6623344976037688, "grad_norm": 9.200998306274414, "learning_rate": 6.770296090508388e-06, "loss": 0.48600168, "memory(GiB)": 34.88, "step": 61395, "train_speed(iter/s)": 0.410736 }, { "acc": 0.93124685, "epoch": 1.6624698778869846, "grad_norm": 4.834563732147217, "learning_rate": 6.769772748929577e-06, "loss": 0.35512676, "memory(GiB)": 34.88, "step": 61400, "train_speed(iter/s)": 0.410737 }, { "acc": 0.89624109, "epoch": 1.6626052581702, "grad_norm": 5.732303619384766, "learning_rate": 6.769249385187151e-06, "loss": 0.5482439, "memory(GiB)": 34.88, "step": 61405, "train_speed(iter/s)": 0.410739 }, { "acc": 0.91122751, "epoch": 1.6627406384534158, "grad_norm": 12.151949882507324, "learning_rate": 6.768725999287662e-06, "loss": 0.40842981, "memory(GiB)": 34.88, "step": 61410, "train_speed(iter/s)": 0.41074 }, { "acc": 0.91851692, "epoch": 1.6628760187366312, "grad_norm": 5.41679573059082, "learning_rate": 6.768202591237668e-06, "loss": 0.42220449, "memory(GiB)": 34.88, "step": 61415, "train_speed(iter/s)": 0.410742 }, { "acc": 0.92920446, "epoch": 1.6630113990198467, "grad_norm": 10.895806312561035, "learning_rate": 6.767679161043725e-06, "loss": 0.33541665, "memory(GiB)": 34.88, "step": 61420, "train_speed(iter/s)": 0.410744 }, { "acc": 0.90966473, "epoch": 1.6631467793030623, "grad_norm": 4.341799736022949, "learning_rate": 6.767155708712392e-06, "loss": 0.40735602, "memory(GiB)": 34.88, "step": 61425, "train_speed(iter/s)": 0.410745 }, { "acc": 0.89968567, "epoch": 1.663282159586278, "grad_norm": 9.605929374694824, "learning_rate": 6.766632234250223e-06, "loss": 0.58952823, "memory(GiB)": 34.88, "step": 61430, "train_speed(iter/s)": 0.410747 }, { "acc": 0.91913414, "epoch": 1.6634175398694935, "grad_norm": 5.197184085845947, "learning_rate": 6.766108737663778e-06, "loss": 0.4047986, "memory(GiB)": 34.88, "step": 61435, "train_speed(iter/s)": 0.410749 }, { "acc": 0.9046464, "epoch": 1.6635529201527088, "grad_norm": 6.7851715087890625, "learning_rate": 6.765585218959615e-06, "loss": 0.51599007, "memory(GiB)": 34.88, "step": 61440, "train_speed(iter/s)": 0.41075 }, { "acc": 0.89135036, "epoch": 1.6636883004359246, "grad_norm": 7.483938217163086, "learning_rate": 6.76506167814429e-06, "loss": 0.56792316, "memory(GiB)": 34.88, "step": 61445, "train_speed(iter/s)": 0.410752 }, { "acc": 0.89685726, "epoch": 1.66382368071914, "grad_norm": 9.218372344970703, "learning_rate": 6.764538115224363e-06, "loss": 0.53918571, "memory(GiB)": 34.88, "step": 61450, "train_speed(iter/s)": 0.410754 }, { "acc": 0.89418583, "epoch": 1.6639590610023556, "grad_norm": 13.64326286315918, "learning_rate": 6.764014530206393e-06, "loss": 0.5586369, "memory(GiB)": 34.88, "step": 61455, "train_speed(iter/s)": 0.410755 }, { "acc": 0.89400215, "epoch": 1.6640944412855712, "grad_norm": 10.486749649047852, "learning_rate": 6.763490923096937e-06, "loss": 0.52520404, "memory(GiB)": 34.88, "step": 61460, "train_speed(iter/s)": 0.410757 }, { "acc": 0.8981432, "epoch": 1.6642298215687867, "grad_norm": 9.180330276489258, "learning_rate": 6.762967293902556e-06, "loss": 0.59700279, "memory(GiB)": 34.88, "step": 61465, "train_speed(iter/s)": 0.410758 }, { "acc": 0.88475971, "epoch": 1.6643652018520023, "grad_norm": 15.458856582641602, "learning_rate": 6.7624436426298075e-06, "loss": 0.69984145, "memory(GiB)": 34.88, "step": 61470, "train_speed(iter/s)": 0.41076 }, { "acc": 0.90993738, "epoch": 1.6645005821352177, "grad_norm": 7.1807074546813965, "learning_rate": 6.761919969285256e-06, "loss": 0.41851625, "memory(GiB)": 34.88, "step": 61475, "train_speed(iter/s)": 0.410762 }, { "acc": 0.91154242, "epoch": 1.6646359624184335, "grad_norm": 6.547055721282959, "learning_rate": 6.761396273875456e-06, "loss": 0.52126589, "memory(GiB)": 34.88, "step": 61480, "train_speed(iter/s)": 0.410763 }, { "acc": 0.92957249, "epoch": 1.6647713427016488, "grad_norm": 7.714767932891846, "learning_rate": 6.760872556406971e-06, "loss": 0.37320678, "memory(GiB)": 34.88, "step": 61485, "train_speed(iter/s)": 0.410765 }, { "acc": 0.90302172, "epoch": 1.6649067229848646, "grad_norm": 15.862594604492188, "learning_rate": 6.760348816886359e-06, "loss": 0.52337022, "memory(GiB)": 34.88, "step": 61490, "train_speed(iter/s)": 0.410767 }, { "acc": 0.90499401, "epoch": 1.66504210326808, "grad_norm": 6.824835300445557, "learning_rate": 6.7598250553201835e-06, "loss": 0.53650875, "memory(GiB)": 34.88, "step": 61495, "train_speed(iter/s)": 0.410768 }, { "acc": 0.89079247, "epoch": 1.6651774835512956, "grad_norm": 8.341711044311523, "learning_rate": 6.759301271715004e-06, "loss": 0.55853076, "memory(GiB)": 34.88, "step": 61500, "train_speed(iter/s)": 0.41077 }, { "acc": 0.91730194, "epoch": 1.6653128638345112, "grad_norm": 7.451966285705566, "learning_rate": 6.758777466077383e-06, "loss": 0.37583156, "memory(GiB)": 34.88, "step": 61505, "train_speed(iter/s)": 0.410772 }, { "acc": 0.89504852, "epoch": 1.6654482441177267, "grad_norm": 20.677541732788086, "learning_rate": 6.758253638413882e-06, "loss": 0.55585999, "memory(GiB)": 34.88, "step": 61510, "train_speed(iter/s)": 0.410774 }, { "acc": 0.9029438, "epoch": 1.6655836244009423, "grad_norm": 9.835769653320312, "learning_rate": 6.757729788731062e-06, "loss": 0.53625569, "memory(GiB)": 34.88, "step": 61515, "train_speed(iter/s)": 0.410776 }, { "acc": 0.90482454, "epoch": 1.6657190046841577, "grad_norm": 11.016695022583008, "learning_rate": 6.757205917035488e-06, "loss": 0.51405315, "memory(GiB)": 34.88, "step": 61520, "train_speed(iter/s)": 0.410778 }, { "acc": 0.91734514, "epoch": 1.6658543849673735, "grad_norm": 10.444145202636719, "learning_rate": 6.756682023333718e-06, "loss": 0.44362154, "memory(GiB)": 34.88, "step": 61525, "train_speed(iter/s)": 0.41078 }, { "acc": 0.91764183, "epoch": 1.6659897652505888, "grad_norm": 10.962345123291016, "learning_rate": 6.756158107632319e-06, "loss": 0.43393631, "memory(GiB)": 34.88, "step": 61530, "train_speed(iter/s)": 0.410782 }, { "acc": 0.93335629, "epoch": 1.6661251455338044, "grad_norm": 6.841923236846924, "learning_rate": 6.75563416993785e-06, "loss": 0.34040935, "memory(GiB)": 34.88, "step": 61535, "train_speed(iter/s)": 0.410783 }, { "acc": 0.87865868, "epoch": 1.66626052581702, "grad_norm": 19.837568283081055, "learning_rate": 6.75511021025688e-06, "loss": 0.73067102, "memory(GiB)": 34.88, "step": 61540, "train_speed(iter/s)": 0.410785 }, { "acc": 0.89495487, "epoch": 1.6663959061002356, "grad_norm": 12.979840278625488, "learning_rate": 6.7545862285959666e-06, "loss": 0.59855099, "memory(GiB)": 34.88, "step": 61545, "train_speed(iter/s)": 0.410787 }, { "acc": 0.89216652, "epoch": 1.6665312863834512, "grad_norm": 9.970786094665527, "learning_rate": 6.754062224961676e-06, "loss": 0.59173374, "memory(GiB)": 34.88, "step": 61550, "train_speed(iter/s)": 0.410789 }, { "acc": 0.9214797, "epoch": 1.6666666666666665, "grad_norm": 4.194538593292236, "learning_rate": 6.753538199360574e-06, "loss": 0.42684584, "memory(GiB)": 34.88, "step": 61555, "train_speed(iter/s)": 0.410791 }, { "acc": 0.90974903, "epoch": 1.6668020469498823, "grad_norm": 3.6549201011657715, "learning_rate": 6.753014151799225e-06, "loss": 0.50753994, "memory(GiB)": 34.88, "step": 61560, "train_speed(iter/s)": 0.410792 }, { "acc": 0.89365082, "epoch": 1.6669374272330977, "grad_norm": 9.192048072814941, "learning_rate": 6.752490082284192e-06, "loss": 0.68979015, "memory(GiB)": 34.88, "step": 61565, "train_speed(iter/s)": 0.410794 }, { "acc": 0.89878817, "epoch": 1.6670728075163135, "grad_norm": 11.882598876953125, "learning_rate": 6.75196599082204e-06, "loss": 0.5437613, "memory(GiB)": 34.88, "step": 61570, "train_speed(iter/s)": 0.410795 }, { "acc": 0.9014472, "epoch": 1.6672081877995288, "grad_norm": 5.69965124130249, "learning_rate": 6.751441877419336e-06, "loss": 0.54705906, "memory(GiB)": 34.88, "step": 61575, "train_speed(iter/s)": 0.410797 }, { "acc": 0.92945156, "epoch": 1.6673435680827444, "grad_norm": 13.103318214416504, "learning_rate": 6.750917742082642e-06, "loss": 0.40825138, "memory(GiB)": 34.88, "step": 61580, "train_speed(iter/s)": 0.410799 }, { "acc": 0.92135143, "epoch": 1.66747894836596, "grad_norm": 8.704785346984863, "learning_rate": 6.750393584818526e-06, "loss": 0.43205357, "memory(GiB)": 34.88, "step": 61585, "train_speed(iter/s)": 0.410801 }, { "acc": 0.92769585, "epoch": 1.6676143286491756, "grad_norm": 8.508248329162598, "learning_rate": 6.749869405633556e-06, "loss": 0.34561229, "memory(GiB)": 34.88, "step": 61590, "train_speed(iter/s)": 0.410802 }, { "acc": 0.89394884, "epoch": 1.6677497089323912, "grad_norm": 8.149598121643066, "learning_rate": 6.7493452045342965e-06, "loss": 0.59155684, "memory(GiB)": 34.88, "step": 61595, "train_speed(iter/s)": 0.410804 }, { "acc": 0.90325336, "epoch": 1.6678850892156065, "grad_norm": 7.839402675628662, "learning_rate": 6.748820981527316e-06, "loss": 0.52172918, "memory(GiB)": 34.88, "step": 61600, "train_speed(iter/s)": 0.410806 }, { "acc": 0.90407839, "epoch": 1.6680204694988223, "grad_norm": 5.281653881072998, "learning_rate": 6.748296736619178e-06, "loss": 0.49359231, "memory(GiB)": 34.88, "step": 61605, "train_speed(iter/s)": 0.410807 }, { "acc": 0.90972481, "epoch": 1.6681558497820377, "grad_norm": 5.25417423248291, "learning_rate": 6.747772469816452e-06, "loss": 0.51799517, "memory(GiB)": 34.88, "step": 61610, "train_speed(iter/s)": 0.410809 }, { "acc": 0.90578966, "epoch": 1.6682912300652533, "grad_norm": 9.987808227539062, "learning_rate": 6.747248181125704e-06, "loss": 0.51521268, "memory(GiB)": 34.88, "step": 61615, "train_speed(iter/s)": 0.410811 }, { "acc": 0.90418034, "epoch": 1.6684266103484688, "grad_norm": 4.201086521148682, "learning_rate": 6.746723870553502e-06, "loss": 0.48446312, "memory(GiB)": 34.88, "step": 61620, "train_speed(iter/s)": 0.410812 }, { "acc": 0.91558161, "epoch": 1.6685619906316844, "grad_norm": 4.568602085113525, "learning_rate": 6.7461995381064175e-06, "loss": 0.38594289, "memory(GiB)": 34.88, "step": 61625, "train_speed(iter/s)": 0.410814 }, { "acc": 0.89368935, "epoch": 1.6686973709149, "grad_norm": 7.679210186004639, "learning_rate": 6.745675183791013e-06, "loss": 0.55015903, "memory(GiB)": 34.88, "step": 61630, "train_speed(iter/s)": 0.410816 }, { "acc": 0.89195728, "epoch": 1.6688327511981154, "grad_norm": 8.573631286621094, "learning_rate": 6.745150807613862e-06, "loss": 0.636308, "memory(GiB)": 34.88, "step": 61635, "train_speed(iter/s)": 0.410818 }, { "acc": 0.90404797, "epoch": 1.6689681314813312, "grad_norm": 7.738760471343994, "learning_rate": 6.74462640958153e-06, "loss": 0.54714584, "memory(GiB)": 34.88, "step": 61640, "train_speed(iter/s)": 0.41082 }, { "acc": 0.90756798, "epoch": 1.6691035117645465, "grad_norm": 5.921601295471191, "learning_rate": 6.74410198970059e-06, "loss": 0.43247375, "memory(GiB)": 34.88, "step": 61645, "train_speed(iter/s)": 0.410821 }, { "acc": 0.90023994, "epoch": 1.6692388920477623, "grad_norm": 7.156922817230225, "learning_rate": 6.7435775479776066e-06, "loss": 0.55968351, "memory(GiB)": 34.88, "step": 61650, "train_speed(iter/s)": 0.410823 }, { "acc": 0.91792259, "epoch": 1.6693742723309777, "grad_norm": 6.344970703125, "learning_rate": 6.743053084419152e-06, "loss": 0.3804595, "memory(GiB)": 34.88, "step": 61655, "train_speed(iter/s)": 0.410825 }, { "acc": 0.89685745, "epoch": 1.6695096526141933, "grad_norm": 18.212631225585938, "learning_rate": 6.742528599031797e-06, "loss": 0.55408392, "memory(GiB)": 34.88, "step": 61660, "train_speed(iter/s)": 0.410827 }, { "acc": 0.90768442, "epoch": 1.6696450328974088, "grad_norm": 12.05884075164795, "learning_rate": 6.7420040918221095e-06, "loss": 0.53186674, "memory(GiB)": 34.88, "step": 61665, "train_speed(iter/s)": 0.410828 }, { "acc": 0.91109209, "epoch": 1.6697804131806244, "grad_norm": 4.388837814331055, "learning_rate": 6.7414795627966615e-06, "loss": 0.52502565, "memory(GiB)": 34.88, "step": 61670, "train_speed(iter/s)": 0.41083 }, { "acc": 0.92105083, "epoch": 1.66991579346384, "grad_norm": 9.916513442993164, "learning_rate": 6.7409550119620235e-06, "loss": 0.43521471, "memory(GiB)": 34.88, "step": 61675, "train_speed(iter/s)": 0.410832 }, { "acc": 0.90037909, "epoch": 1.6700511737470554, "grad_norm": 4.713771343231201, "learning_rate": 6.7404304393247675e-06, "loss": 0.50743327, "memory(GiB)": 34.88, "step": 61680, "train_speed(iter/s)": 0.410834 }, { "acc": 0.89473906, "epoch": 1.6701865540302712, "grad_norm": 6.9639201164245605, "learning_rate": 6.739905844891463e-06, "loss": 0.58583937, "memory(GiB)": 34.88, "step": 61685, "train_speed(iter/s)": 0.410836 }, { "acc": 0.91760216, "epoch": 1.6703219343134865, "grad_norm": 6.167989253997803, "learning_rate": 6.739381228668681e-06, "loss": 0.38202865, "memory(GiB)": 34.88, "step": 61690, "train_speed(iter/s)": 0.410838 }, { "acc": 0.91299744, "epoch": 1.670457314596702, "grad_norm": 5.385618686676025, "learning_rate": 6.7388565906629975e-06, "loss": 0.4326396, "memory(GiB)": 34.88, "step": 61695, "train_speed(iter/s)": 0.41084 }, { "acc": 0.91431007, "epoch": 1.6705926948799177, "grad_norm": 8.369454383850098, "learning_rate": 6.738331930880979e-06, "loss": 0.43308396, "memory(GiB)": 34.88, "step": 61700, "train_speed(iter/s)": 0.410842 }, { "acc": 0.91437473, "epoch": 1.6707280751631333, "grad_norm": 16.101354598999023, "learning_rate": 6.737807249329203e-06, "loss": 0.52547445, "memory(GiB)": 34.88, "step": 61705, "train_speed(iter/s)": 0.410844 }, { "acc": 0.89563923, "epoch": 1.6708634554463488, "grad_norm": 9.626032829284668, "learning_rate": 6.7372825460142385e-06, "loss": 0.51222401, "memory(GiB)": 34.88, "step": 61710, "train_speed(iter/s)": 0.410846 }, { "acc": 0.91183281, "epoch": 1.6709988357295642, "grad_norm": 3.362877130508423, "learning_rate": 6.736757820942661e-06, "loss": 0.52088499, "memory(GiB)": 34.88, "step": 61715, "train_speed(iter/s)": 0.410848 }, { "acc": 0.90816269, "epoch": 1.67113421601278, "grad_norm": 10.084380149841309, "learning_rate": 6.736233074121041e-06, "loss": 0.46948528, "memory(GiB)": 34.88, "step": 61720, "train_speed(iter/s)": 0.410849 }, { "acc": 0.91379375, "epoch": 1.6712695962959954, "grad_norm": 7.428598880767822, "learning_rate": 6.735708305555956e-06, "loss": 0.50824103, "memory(GiB)": 34.88, "step": 61725, "train_speed(iter/s)": 0.410851 }, { "acc": 0.90579166, "epoch": 1.6714049765792112, "grad_norm": 8.974690437316895, "learning_rate": 6.735183515253976e-06, "loss": 0.53433967, "memory(GiB)": 34.88, "step": 61730, "train_speed(iter/s)": 0.410852 }, { "acc": 0.90982323, "epoch": 1.6715403568624265, "grad_norm": 3.866711139678955, "learning_rate": 6.734658703221675e-06, "loss": 0.42211037, "memory(GiB)": 34.88, "step": 61735, "train_speed(iter/s)": 0.410854 }, { "acc": 0.92015038, "epoch": 1.671675737145642, "grad_norm": 5.819150447845459, "learning_rate": 6.73413386946563e-06, "loss": 0.3713418, "memory(GiB)": 34.88, "step": 61740, "train_speed(iter/s)": 0.410856 }, { "acc": 0.91383104, "epoch": 1.6718111174288577, "grad_norm": 8.351369857788086, "learning_rate": 6.733609013992414e-06, "loss": 0.52016835, "memory(GiB)": 34.88, "step": 61745, "train_speed(iter/s)": 0.410858 }, { "acc": 0.91840715, "epoch": 1.6719464977120733, "grad_norm": 4.1810150146484375, "learning_rate": 6.733084136808602e-06, "loss": 0.41601477, "memory(GiB)": 34.88, "step": 61750, "train_speed(iter/s)": 0.41086 }, { "acc": 0.90037518, "epoch": 1.6720818779952888, "grad_norm": 13.01550579071045, "learning_rate": 6.732559237920768e-06, "loss": 0.46776843, "memory(GiB)": 34.88, "step": 61755, "train_speed(iter/s)": 0.410861 }, { "acc": 0.91634998, "epoch": 1.6722172582785042, "grad_norm": 24.695253372192383, "learning_rate": 6.732034317335491e-06, "loss": 0.49300604, "memory(GiB)": 34.88, "step": 61760, "train_speed(iter/s)": 0.410863 }, { "acc": 0.90773182, "epoch": 1.67235263856172, "grad_norm": 8.077762603759766, "learning_rate": 6.731509375059343e-06, "loss": 0.55244055, "memory(GiB)": 34.88, "step": 61765, "train_speed(iter/s)": 0.410865 }, { "acc": 0.90644855, "epoch": 1.6724880188449354, "grad_norm": 11.05158519744873, "learning_rate": 6.7309844110988995e-06, "loss": 0.54596834, "memory(GiB)": 34.88, "step": 61770, "train_speed(iter/s)": 0.410867 }, { "acc": 0.91331024, "epoch": 1.672623399128151, "grad_norm": 11.143959999084473, "learning_rate": 6.730459425460742e-06, "loss": 0.46836309, "memory(GiB)": 34.88, "step": 61775, "train_speed(iter/s)": 0.410869 }, { "acc": 0.91706867, "epoch": 1.6727587794113665, "grad_norm": 6.612564563751221, "learning_rate": 6.72993441815144e-06, "loss": 0.46297803, "memory(GiB)": 34.88, "step": 61780, "train_speed(iter/s)": 0.410871 }, { "acc": 0.90717545, "epoch": 1.672894159694582, "grad_norm": 21.205297470092773, "learning_rate": 6.729409389177576e-06, "loss": 0.49872904, "memory(GiB)": 34.88, "step": 61785, "train_speed(iter/s)": 0.410872 }, { "acc": 0.90514708, "epoch": 1.6730295399777977, "grad_norm": 6.2027506828308105, "learning_rate": 6.728884338545721e-06, "loss": 0.43865976, "memory(GiB)": 34.88, "step": 61790, "train_speed(iter/s)": 0.410874 }, { "acc": 0.88511238, "epoch": 1.673164920261013, "grad_norm": 11.3414888381958, "learning_rate": 6.72835926626246e-06, "loss": 0.69111114, "memory(GiB)": 34.88, "step": 61795, "train_speed(iter/s)": 0.410875 }, { "acc": 0.91006384, "epoch": 1.6733003005442288, "grad_norm": 3.4156861305236816, "learning_rate": 6.7278341723343655e-06, "loss": 0.4270443, "memory(GiB)": 34.88, "step": 61800, "train_speed(iter/s)": 0.410877 }, { "acc": 0.91426744, "epoch": 1.6734356808274442, "grad_norm": 10.462882041931152, "learning_rate": 6.727309056768015e-06, "loss": 0.49942322, "memory(GiB)": 34.88, "step": 61805, "train_speed(iter/s)": 0.410879 }, { "acc": 0.91139736, "epoch": 1.67357106111066, "grad_norm": 4.622138500213623, "learning_rate": 6.726783919569989e-06, "loss": 0.38415475, "memory(GiB)": 34.88, "step": 61810, "train_speed(iter/s)": 0.410881 }, { "acc": 0.90027218, "epoch": 1.6737064413938754, "grad_norm": 7.620891094207764, "learning_rate": 6.7262587607468635e-06, "loss": 0.44334717, "memory(GiB)": 34.88, "step": 61815, "train_speed(iter/s)": 0.410883 }, { "acc": 0.90361881, "epoch": 1.673841821677091, "grad_norm": 4.221364498138428, "learning_rate": 6.725733580305219e-06, "loss": 0.53383646, "memory(GiB)": 34.88, "step": 61820, "train_speed(iter/s)": 0.410885 }, { "acc": 0.91013002, "epoch": 1.6739772019603065, "grad_norm": 6.087482929229736, "learning_rate": 6.725208378251634e-06, "loss": 0.46424489, "memory(GiB)": 34.88, "step": 61825, "train_speed(iter/s)": 0.410887 }, { "acc": 0.9109972, "epoch": 1.674112582243522, "grad_norm": 9.308506965637207, "learning_rate": 6.724683154592688e-06, "loss": 0.54042778, "memory(GiB)": 34.88, "step": 61830, "train_speed(iter/s)": 0.410888 }, { "acc": 0.92592468, "epoch": 1.6742479625267377, "grad_norm": 4.7555975914001465, "learning_rate": 6.724157909334959e-06, "loss": 0.34832439, "memory(GiB)": 34.88, "step": 61835, "train_speed(iter/s)": 0.41089 }, { "acc": 0.91190891, "epoch": 1.674383342809953, "grad_norm": 7.409592628479004, "learning_rate": 6.723632642485029e-06, "loss": 0.46342211, "memory(GiB)": 34.88, "step": 61840, "train_speed(iter/s)": 0.410892 }, { "acc": 0.89609528, "epoch": 1.6745187230931688, "grad_norm": 7.096837043762207, "learning_rate": 6.7231073540494755e-06, "loss": 0.52336769, "memory(GiB)": 34.88, "step": 61845, "train_speed(iter/s)": 0.410893 }, { "acc": 0.9142664, "epoch": 1.6746541033763842, "grad_norm": 6.661782264709473, "learning_rate": 6.722582044034879e-06, "loss": 0.46581368, "memory(GiB)": 34.88, "step": 61850, "train_speed(iter/s)": 0.410895 }, { "acc": 0.91065025, "epoch": 1.6747894836595998, "grad_norm": 15.229289054870605, "learning_rate": 6.722056712447823e-06, "loss": 0.48900309, "memory(GiB)": 34.88, "step": 61855, "train_speed(iter/s)": 0.410897 }, { "acc": 0.91641998, "epoch": 1.6749248639428154, "grad_norm": 6.482406139373779, "learning_rate": 6.721531359294886e-06, "loss": 0.44771619, "memory(GiB)": 34.88, "step": 61860, "train_speed(iter/s)": 0.410899 }, { "acc": 0.91107302, "epoch": 1.675060244226031, "grad_norm": 5.1837358474731445, "learning_rate": 6.721005984582649e-06, "loss": 0.51369619, "memory(GiB)": 34.88, "step": 61865, "train_speed(iter/s)": 0.4109 }, { "acc": 0.91100721, "epoch": 1.6751956245092465, "grad_norm": 9.818205833435059, "learning_rate": 6.720480588317694e-06, "loss": 0.44228468, "memory(GiB)": 34.88, "step": 61870, "train_speed(iter/s)": 0.410902 }, { "acc": 0.90113258, "epoch": 1.6753310047924619, "grad_norm": 9.765593528747559, "learning_rate": 6.719955170506601e-06, "loss": 0.4851697, "memory(GiB)": 34.88, "step": 61875, "train_speed(iter/s)": 0.410904 }, { "acc": 0.89589233, "epoch": 1.6754663850756777, "grad_norm": 21.3605899810791, "learning_rate": 6.719429731155956e-06, "loss": 0.52612348, "memory(GiB)": 34.88, "step": 61880, "train_speed(iter/s)": 0.410906 }, { "acc": 0.92107658, "epoch": 1.675601765358893, "grad_norm": 8.066987037658691, "learning_rate": 6.718904270272335e-06, "loss": 0.4246232, "memory(GiB)": 34.88, "step": 61885, "train_speed(iter/s)": 0.410907 }, { "acc": 0.92211113, "epoch": 1.6757371456421088, "grad_norm": 8.78195571899414, "learning_rate": 6.718378787862326e-06, "loss": 0.46252146, "memory(GiB)": 34.88, "step": 61890, "train_speed(iter/s)": 0.410909 }, { "acc": 0.91196442, "epoch": 1.6758725259253242, "grad_norm": 7.205535888671875, "learning_rate": 6.717853283932511e-06, "loss": 0.49641609, "memory(GiB)": 34.88, "step": 61895, "train_speed(iter/s)": 0.410911 }, { "acc": 0.93487024, "epoch": 1.6760079062085398, "grad_norm": 4.281613826751709, "learning_rate": 6.717327758489469e-06, "loss": 0.32870991, "memory(GiB)": 34.88, "step": 61900, "train_speed(iter/s)": 0.410913 }, { "acc": 0.92311983, "epoch": 1.6761432864917554, "grad_norm": 27.17538833618164, "learning_rate": 6.7168022115397864e-06, "loss": 0.44046683, "memory(GiB)": 34.88, "step": 61905, "train_speed(iter/s)": 0.410915 }, { "acc": 0.91320124, "epoch": 1.6762786667749707, "grad_norm": 19.538814544677734, "learning_rate": 6.716276643090047e-06, "loss": 0.47514071, "memory(GiB)": 34.88, "step": 61910, "train_speed(iter/s)": 0.410916 }, { "acc": 0.92378187, "epoch": 1.6764140470581865, "grad_norm": 18.479198455810547, "learning_rate": 6.715751053146833e-06, "loss": 0.39026809, "memory(GiB)": 34.88, "step": 61915, "train_speed(iter/s)": 0.410917 }, { "acc": 0.88863907, "epoch": 1.6765494273414019, "grad_norm": 9.528035163879395, "learning_rate": 6.715225441716727e-06, "loss": 0.64674397, "memory(GiB)": 34.88, "step": 61920, "train_speed(iter/s)": 0.410919 }, { "acc": 0.9047987, "epoch": 1.6766848076246177, "grad_norm": 10.402928352355957, "learning_rate": 6.714699808806319e-06, "loss": 0.53148751, "memory(GiB)": 34.88, "step": 61925, "train_speed(iter/s)": 0.410921 }, { "acc": 0.90520058, "epoch": 1.676820187907833, "grad_norm": 6.619893550872803, "learning_rate": 6.7141741544221876e-06, "loss": 0.51348753, "memory(GiB)": 34.88, "step": 61930, "train_speed(iter/s)": 0.410923 }, { "acc": 0.89503632, "epoch": 1.6769555681910486, "grad_norm": 9.200736999511719, "learning_rate": 6.7136484785709225e-06, "loss": 0.54727988, "memory(GiB)": 34.88, "step": 61935, "train_speed(iter/s)": 0.410925 }, { "acc": 0.91861076, "epoch": 1.6770909484742642, "grad_norm": 8.46275520324707, "learning_rate": 6.7131227812591045e-06, "loss": 0.39155908, "memory(GiB)": 34.88, "step": 61940, "train_speed(iter/s)": 0.410926 }, { "acc": 0.89948969, "epoch": 1.6772263287574798, "grad_norm": 8.698445320129395, "learning_rate": 6.71259706249332e-06, "loss": 0.57335739, "memory(GiB)": 34.88, "step": 61945, "train_speed(iter/s)": 0.410928 }, { "acc": 0.90731525, "epoch": 1.6773617090406954, "grad_norm": 8.085103034973145, "learning_rate": 6.712071322280156e-06, "loss": 0.47604384, "memory(GiB)": 34.88, "step": 61950, "train_speed(iter/s)": 0.41093 }, { "acc": 0.91007624, "epoch": 1.6774970893239107, "grad_norm": 12.58225154876709, "learning_rate": 6.711545560626198e-06, "loss": 0.43613796, "memory(GiB)": 34.88, "step": 61955, "train_speed(iter/s)": 0.410932 }, { "acc": 0.91208429, "epoch": 1.6776324696071265, "grad_norm": 10.534451484680176, "learning_rate": 6.7110197775380335e-06, "loss": 0.53227439, "memory(GiB)": 34.88, "step": 61960, "train_speed(iter/s)": 0.410934 }, { "acc": 0.92108126, "epoch": 1.677767849890342, "grad_norm": 5.259311676025391, "learning_rate": 6.710493973022248e-06, "loss": 0.37649324, "memory(GiB)": 34.88, "step": 61965, "train_speed(iter/s)": 0.410936 }, { "acc": 0.90630827, "epoch": 1.6779032301735575, "grad_norm": 6.717191219329834, "learning_rate": 6.709968147085427e-06, "loss": 0.49063444, "memory(GiB)": 34.88, "step": 61970, "train_speed(iter/s)": 0.410938 }, { "acc": 0.90312519, "epoch": 1.678038610456773, "grad_norm": 6.18538761138916, "learning_rate": 6.709442299734157e-06, "loss": 0.57359056, "memory(GiB)": 34.88, "step": 61975, "train_speed(iter/s)": 0.410939 }, { "acc": 0.91555138, "epoch": 1.6781739907399886, "grad_norm": 6.9631524085998535, "learning_rate": 6.708916430975029e-06, "loss": 0.48795447, "memory(GiB)": 34.88, "step": 61980, "train_speed(iter/s)": 0.410941 }, { "acc": 0.92990704, "epoch": 1.6783093710232042, "grad_norm": 5.335236549377441, "learning_rate": 6.708390540814626e-06, "loss": 0.38530841, "memory(GiB)": 34.88, "step": 61985, "train_speed(iter/s)": 0.410943 }, { "acc": 0.91082172, "epoch": 1.6784447513064196, "grad_norm": 25.87760353088379, "learning_rate": 6.7078646292595395e-06, "loss": 0.50362148, "memory(GiB)": 34.88, "step": 61990, "train_speed(iter/s)": 0.410945 }, { "acc": 0.90443363, "epoch": 1.6785801315896354, "grad_norm": 12.278308868408203, "learning_rate": 6.707338696316355e-06, "loss": 0.5384078, "memory(GiB)": 34.88, "step": 61995, "train_speed(iter/s)": 0.410947 }, { "acc": 0.89643269, "epoch": 1.6787155118728507, "grad_norm": 10.488439559936523, "learning_rate": 6.7068127419916615e-06, "loss": 0.58537674, "memory(GiB)": 34.88, "step": 62000, "train_speed(iter/s)": 0.410949 }, { "acc": 0.90608749, "epoch": 1.6788508921560665, "grad_norm": 14.93879508972168, "learning_rate": 6.7062867662920495e-06, "loss": 0.53099017, "memory(GiB)": 34.88, "step": 62005, "train_speed(iter/s)": 0.41095 }, { "acc": 0.9019248, "epoch": 1.678986272439282, "grad_norm": 8.409646987915039, "learning_rate": 6.705760769224104e-06, "loss": 0.59271226, "memory(GiB)": 34.88, "step": 62010, "train_speed(iter/s)": 0.410952 }, { "acc": 0.89654255, "epoch": 1.6791216527224975, "grad_norm": 13.215341567993164, "learning_rate": 6.705234750794419e-06, "loss": 0.49322701, "memory(GiB)": 34.88, "step": 62015, "train_speed(iter/s)": 0.410954 }, { "acc": 0.91856222, "epoch": 1.679257033005713, "grad_norm": 8.117104530334473, "learning_rate": 6.704708711009582e-06, "loss": 0.45440845, "memory(GiB)": 34.88, "step": 62020, "train_speed(iter/s)": 0.410956 }, { "acc": 0.9089077, "epoch": 1.6793924132889286, "grad_norm": 3.860626220703125, "learning_rate": 6.704182649876181e-06, "loss": 0.50659165, "memory(GiB)": 34.88, "step": 62025, "train_speed(iter/s)": 0.410957 }, { "acc": 0.91690502, "epoch": 1.6795277935721442, "grad_norm": 14.595905303955078, "learning_rate": 6.703656567400806e-06, "loss": 0.43626652, "memory(GiB)": 34.88, "step": 62030, "train_speed(iter/s)": 0.410959 }, { "acc": 0.89596958, "epoch": 1.6796631738553596, "grad_norm": 16.356067657470703, "learning_rate": 6.7031304635900486e-06, "loss": 0.63774652, "memory(GiB)": 34.88, "step": 62035, "train_speed(iter/s)": 0.410961 }, { "acc": 0.89665642, "epoch": 1.6797985541385754, "grad_norm": 11.732548713684082, "learning_rate": 6.7026043384505e-06, "loss": 0.49686637, "memory(GiB)": 34.88, "step": 62040, "train_speed(iter/s)": 0.410962 }, { "acc": 0.92303095, "epoch": 1.6799339344217907, "grad_norm": 8.08838176727295, "learning_rate": 6.70207819198875e-06, "loss": 0.44300432, "memory(GiB)": 34.88, "step": 62045, "train_speed(iter/s)": 0.410964 }, { "acc": 0.90403595, "epoch": 1.6800693147050063, "grad_norm": 47.04771423339844, "learning_rate": 6.70155202421139e-06, "loss": 0.57425547, "memory(GiB)": 34.88, "step": 62050, "train_speed(iter/s)": 0.410966 }, { "acc": 0.91883011, "epoch": 1.680204694988222, "grad_norm": 10.761983871459961, "learning_rate": 6.701025835125008e-06, "loss": 0.38362117, "memory(GiB)": 34.88, "step": 62055, "train_speed(iter/s)": 0.410968 }, { "acc": 0.90414648, "epoch": 1.6803400752714375, "grad_norm": 13.631343841552734, "learning_rate": 6.700499624736201e-06, "loss": 0.46716046, "memory(GiB)": 34.88, "step": 62060, "train_speed(iter/s)": 0.41097 }, { "acc": 0.90480442, "epoch": 1.680475455554653, "grad_norm": 8.813623428344727, "learning_rate": 6.699973393051558e-06, "loss": 0.50126982, "memory(GiB)": 34.88, "step": 62065, "train_speed(iter/s)": 0.410971 }, { "acc": 0.89589424, "epoch": 1.6806108358378684, "grad_norm": 10.829224586486816, "learning_rate": 6.6994471400776705e-06, "loss": 0.61144247, "memory(GiB)": 34.88, "step": 62070, "train_speed(iter/s)": 0.410973 }, { "acc": 0.92342567, "epoch": 1.6807462161210842, "grad_norm": 7.811389923095703, "learning_rate": 6.698920865821132e-06, "loss": 0.44612198, "memory(GiB)": 34.88, "step": 62075, "train_speed(iter/s)": 0.410975 }, { "acc": 0.9233078, "epoch": 1.6808815964042996, "grad_norm": 5.185702323913574, "learning_rate": 6.698394570288535e-06, "loss": 0.39419241, "memory(GiB)": 34.88, "step": 62080, "train_speed(iter/s)": 0.410977 }, { "acc": 0.90435314, "epoch": 1.6810169766875154, "grad_norm": 10.041894912719727, "learning_rate": 6.697868253486473e-06, "loss": 0.50683818, "memory(GiB)": 34.88, "step": 62085, "train_speed(iter/s)": 0.410979 }, { "acc": 0.89816551, "epoch": 1.6811523569707307, "grad_norm": 13.451794624328613, "learning_rate": 6.697341915421534e-06, "loss": 0.53378401, "memory(GiB)": 34.88, "step": 62090, "train_speed(iter/s)": 0.410981 }, { "acc": 0.89963541, "epoch": 1.6812877372539463, "grad_norm": 6.762233734130859, "learning_rate": 6.696815556100321e-06, "loss": 0.51637325, "memory(GiB)": 34.88, "step": 62095, "train_speed(iter/s)": 0.410982 }, { "acc": 0.89842587, "epoch": 1.681423117537162, "grad_norm": 10.10997486114502, "learning_rate": 6.696289175529419e-06, "loss": 0.50671282, "memory(GiB)": 34.88, "step": 62100, "train_speed(iter/s)": 0.410984 }, { "acc": 0.90698624, "epoch": 1.6815584978203775, "grad_norm": 6.259591579437256, "learning_rate": 6.695762773715426e-06, "loss": 0.52865329, "memory(GiB)": 34.88, "step": 62105, "train_speed(iter/s)": 0.410986 }, { "acc": 0.92425003, "epoch": 1.681693878103593, "grad_norm": 3.0780298709869385, "learning_rate": 6.695236350664938e-06, "loss": 0.43649588, "memory(GiB)": 34.88, "step": 62110, "train_speed(iter/s)": 0.410988 }, { "acc": 0.9129425, "epoch": 1.6818292583868084, "grad_norm": 8.147035598754883, "learning_rate": 6.694709906384544e-06, "loss": 0.48874345, "memory(GiB)": 34.88, "step": 62115, "train_speed(iter/s)": 0.41099 }, { "acc": 0.90981255, "epoch": 1.6819646386700242, "grad_norm": 7.291962146759033, "learning_rate": 6.694183440880843e-06, "loss": 0.43324738, "memory(GiB)": 34.88, "step": 62120, "train_speed(iter/s)": 0.410991 }, { "acc": 0.90632582, "epoch": 1.6821000189532396, "grad_norm": 29.68791389465332, "learning_rate": 6.693656954160429e-06, "loss": 0.4525095, "memory(GiB)": 34.88, "step": 62125, "train_speed(iter/s)": 0.410993 }, { "acc": 0.91433983, "epoch": 1.6822353992364552, "grad_norm": 2.8024206161499023, "learning_rate": 6.6931304462298975e-06, "loss": 0.39823537, "memory(GiB)": 34.88, "step": 62130, "train_speed(iter/s)": 0.410995 }, { "acc": 0.92264853, "epoch": 1.6823707795196707, "grad_norm": 6.762220859527588, "learning_rate": 6.692603917095843e-06, "loss": 0.40273471, "memory(GiB)": 34.88, "step": 62135, "train_speed(iter/s)": 0.410997 }, { "acc": 0.91151657, "epoch": 1.6825061598028863, "grad_norm": 7.797889232635498, "learning_rate": 6.6920773667648615e-06, "loss": 0.49857483, "memory(GiB)": 34.88, "step": 62140, "train_speed(iter/s)": 0.410998 }, { "acc": 0.88455191, "epoch": 1.682641540086102, "grad_norm": 13.629630088806152, "learning_rate": 6.691550795243552e-06, "loss": 0.64052596, "memory(GiB)": 34.88, "step": 62145, "train_speed(iter/s)": 0.411 }, { "acc": 0.90455523, "epoch": 1.6827769203693173, "grad_norm": 9.854389190673828, "learning_rate": 6.691024202538507e-06, "loss": 0.53339481, "memory(GiB)": 34.88, "step": 62150, "train_speed(iter/s)": 0.411002 }, { "acc": 0.88370199, "epoch": 1.682912300652533, "grad_norm": 6.09822940826416, "learning_rate": 6.6904975886563225e-06, "loss": 0.65882816, "memory(GiB)": 34.88, "step": 62155, "train_speed(iter/s)": 0.411004 }, { "acc": 0.90879135, "epoch": 1.6830476809357484, "grad_norm": 5.9061279296875, "learning_rate": 6.689970953603599e-06, "loss": 0.49103332, "memory(GiB)": 34.88, "step": 62160, "train_speed(iter/s)": 0.411005 }, { "acc": 0.89420195, "epoch": 1.6831830612189642, "grad_norm": 13.79716968536377, "learning_rate": 6.689444297386932e-06, "loss": 0.57460742, "memory(GiB)": 34.88, "step": 62165, "train_speed(iter/s)": 0.411007 }, { "acc": 0.9084053, "epoch": 1.6833184415021796, "grad_norm": 10.144791603088379, "learning_rate": 6.688917620012918e-06, "loss": 0.45109253, "memory(GiB)": 34.88, "step": 62170, "train_speed(iter/s)": 0.411009 }, { "acc": 0.91363497, "epoch": 1.6834538217853952, "grad_norm": 24.356687545776367, "learning_rate": 6.688390921488158e-06, "loss": 0.52658238, "memory(GiB)": 34.88, "step": 62175, "train_speed(iter/s)": 0.411011 }, { "acc": 0.89191389, "epoch": 1.6835892020686107, "grad_norm": 8.587064743041992, "learning_rate": 6.687864201819244e-06, "loss": 0.65586162, "memory(GiB)": 34.88, "step": 62180, "train_speed(iter/s)": 0.411013 }, { "acc": 0.9083457, "epoch": 1.6837245823518263, "grad_norm": 8.495963096618652, "learning_rate": 6.68733746101278e-06, "loss": 0.51227846, "memory(GiB)": 34.88, "step": 62185, "train_speed(iter/s)": 0.411014 }, { "acc": 0.90952225, "epoch": 1.683859962635042, "grad_norm": 11.857782363891602, "learning_rate": 6.686810699075363e-06, "loss": 0.46807323, "memory(GiB)": 34.88, "step": 62190, "train_speed(iter/s)": 0.411016 }, { "acc": 0.91480446, "epoch": 1.6839953429182573, "grad_norm": 5.264892578125, "learning_rate": 6.6862839160135885e-06, "loss": 0.49450283, "memory(GiB)": 34.88, "step": 62195, "train_speed(iter/s)": 0.411018 }, { "acc": 0.90058908, "epoch": 1.684130723201473, "grad_norm": 4.87408971786499, "learning_rate": 6.68575711183406e-06, "loss": 0.54173527, "memory(GiB)": 34.88, "step": 62200, "train_speed(iter/s)": 0.41102 }, { "acc": 0.89284945, "epoch": 1.6842661034846884, "grad_norm": 13.148021697998047, "learning_rate": 6.685230286543372e-06, "loss": 0.6098762, "memory(GiB)": 34.88, "step": 62205, "train_speed(iter/s)": 0.411022 }, { "acc": 0.90870409, "epoch": 1.684401483767904, "grad_norm": 61.69749069213867, "learning_rate": 6.68470344014813e-06, "loss": 0.48706036, "memory(GiB)": 34.88, "step": 62210, "train_speed(iter/s)": 0.411024 }, { "acc": 0.89921961, "epoch": 1.6845368640511196, "grad_norm": 7.644430160522461, "learning_rate": 6.684176572654928e-06, "loss": 0.56530423, "memory(GiB)": 34.88, "step": 62215, "train_speed(iter/s)": 0.411026 }, { "acc": 0.90255203, "epoch": 1.6846722443343352, "grad_norm": 8.570282936096191, "learning_rate": 6.683649684070368e-06, "loss": 0.51495872, "memory(GiB)": 34.88, "step": 62220, "train_speed(iter/s)": 0.411027 }, { "acc": 0.91015244, "epoch": 1.6848076246175507, "grad_norm": 6.609251022338867, "learning_rate": 6.683122774401054e-06, "loss": 0.50477195, "memory(GiB)": 34.88, "step": 62225, "train_speed(iter/s)": 0.411029 }, { "acc": 0.90594139, "epoch": 1.684943004900766, "grad_norm": 12.197054862976074, "learning_rate": 6.682595843653582e-06, "loss": 0.55524006, "memory(GiB)": 34.88, "step": 62230, "train_speed(iter/s)": 0.411031 }, { "acc": 0.89962807, "epoch": 1.685078385183982, "grad_norm": 10.917491912841797, "learning_rate": 6.6820688918345535e-06, "loss": 0.56069403, "memory(GiB)": 34.88, "step": 62235, "train_speed(iter/s)": 0.411032 }, { "acc": 0.89891205, "epoch": 1.6852137654671973, "grad_norm": 8.25035285949707, "learning_rate": 6.681541918950572e-06, "loss": 0.6250474, "memory(GiB)": 34.88, "step": 62240, "train_speed(iter/s)": 0.411034 }, { "acc": 0.912148, "epoch": 1.685349145750413, "grad_norm": 10.020350456237793, "learning_rate": 6.681014925008235e-06, "loss": 0.51000719, "memory(GiB)": 34.88, "step": 62245, "train_speed(iter/s)": 0.411036 }, { "acc": 0.89000626, "epoch": 1.6854845260336284, "grad_norm": 45.72592544555664, "learning_rate": 6.6804879100141465e-06, "loss": 0.60349517, "memory(GiB)": 34.88, "step": 62250, "train_speed(iter/s)": 0.411038 }, { "acc": 0.89964361, "epoch": 1.685619906316844, "grad_norm": 6.198527812957764, "learning_rate": 6.679960873974908e-06, "loss": 0.6207294, "memory(GiB)": 34.88, "step": 62255, "train_speed(iter/s)": 0.41104 }, { "acc": 0.90887699, "epoch": 1.6857552866000596, "grad_norm": 4.142719268798828, "learning_rate": 6.679433816897124e-06, "loss": 0.4441195, "memory(GiB)": 34.88, "step": 62260, "train_speed(iter/s)": 0.411041 }, { "acc": 0.91215267, "epoch": 1.6858906668832752, "grad_norm": 14.019684791564941, "learning_rate": 6.678906738787393e-06, "loss": 0.45568986, "memory(GiB)": 34.88, "step": 62265, "train_speed(iter/s)": 0.411043 }, { "acc": 0.91697426, "epoch": 1.6860260471664907, "grad_norm": 12.926122665405273, "learning_rate": 6.67837963965232e-06, "loss": 0.4448144, "memory(GiB)": 34.88, "step": 62270, "train_speed(iter/s)": 0.411045 }, { "acc": 0.90449209, "epoch": 1.686161427449706, "grad_norm": 6.086298942565918, "learning_rate": 6.677852519498509e-06, "loss": 0.4751822, "memory(GiB)": 34.88, "step": 62275, "train_speed(iter/s)": 0.411047 }, { "acc": 0.90747662, "epoch": 1.686296807732922, "grad_norm": 9.579275131225586, "learning_rate": 6.677325378332561e-06, "loss": 0.46278038, "memory(GiB)": 34.88, "step": 62280, "train_speed(iter/s)": 0.411048 }, { "acc": 0.91830025, "epoch": 1.6864321880161373, "grad_norm": 5.186293125152588, "learning_rate": 6.676798216161079e-06, "loss": 0.39169173, "memory(GiB)": 34.88, "step": 62285, "train_speed(iter/s)": 0.41105 }, { "acc": 0.90444965, "epoch": 1.6865675682993528, "grad_norm": 6.756953239440918, "learning_rate": 6.6762710329906684e-06, "loss": 0.5123662, "memory(GiB)": 34.88, "step": 62290, "train_speed(iter/s)": 0.411052 }, { "acc": 0.91860209, "epoch": 1.6867029485825684, "grad_norm": 6.198143482208252, "learning_rate": 6.675743828827933e-06, "loss": 0.50836458, "memory(GiB)": 34.88, "step": 62295, "train_speed(iter/s)": 0.411054 }, { "acc": 0.8895813, "epoch": 1.686838328865784, "grad_norm": 11.017181396484375, "learning_rate": 6.6752166036794765e-06, "loss": 0.52542305, "memory(GiB)": 34.88, "step": 62300, "train_speed(iter/s)": 0.411056 }, { "acc": 0.91287727, "epoch": 1.6869737091489996, "grad_norm": 18.974504470825195, "learning_rate": 6.674689357551903e-06, "loss": 0.42360411, "memory(GiB)": 34.88, "step": 62305, "train_speed(iter/s)": 0.411058 }, { "acc": 0.90709772, "epoch": 1.687109089432215, "grad_norm": 8.968430519104004, "learning_rate": 6.674162090451819e-06, "loss": 0.55343256, "memory(GiB)": 34.88, "step": 62310, "train_speed(iter/s)": 0.411059 }, { "acc": 0.91815701, "epoch": 1.6872444697154307, "grad_norm": 4.7753472328186035, "learning_rate": 6.67363480238583e-06, "loss": 0.47534971, "memory(GiB)": 34.88, "step": 62315, "train_speed(iter/s)": 0.411061 }, { "acc": 0.89990396, "epoch": 1.687379849998646, "grad_norm": 7.754997253417969, "learning_rate": 6.673107493360539e-06, "loss": 0.53461771, "memory(GiB)": 34.88, "step": 62320, "train_speed(iter/s)": 0.411063 }, { "acc": 0.90885048, "epoch": 1.687515230281862, "grad_norm": 3.8484556674957275, "learning_rate": 6.672580163382552e-06, "loss": 0.46115294, "memory(GiB)": 34.88, "step": 62325, "train_speed(iter/s)": 0.411065 }, { "acc": 0.9261569, "epoch": 1.6876506105650773, "grad_norm": 4.957632064819336, "learning_rate": 6.672052812458474e-06, "loss": 0.41235371, "memory(GiB)": 34.88, "step": 62330, "train_speed(iter/s)": 0.411067 }, { "acc": 0.88981514, "epoch": 1.6877859908482928, "grad_norm": 9.766782760620117, "learning_rate": 6.671525440594914e-06, "loss": 0.58243384, "memory(GiB)": 34.88, "step": 62335, "train_speed(iter/s)": 0.411069 }, { "acc": 0.90201349, "epoch": 1.6879213711315084, "grad_norm": 12.065448760986328, "learning_rate": 6.670998047798476e-06, "loss": 0.55609894, "memory(GiB)": 34.88, "step": 62340, "train_speed(iter/s)": 0.41107 }, { "acc": 0.91510162, "epoch": 1.688056751414724, "grad_norm": 7.796881198883057, "learning_rate": 6.670470634075768e-06, "loss": 0.47561064, "memory(GiB)": 34.88, "step": 62345, "train_speed(iter/s)": 0.411072 }, { "acc": 0.91120262, "epoch": 1.6881921316979396, "grad_norm": 6.701786994934082, "learning_rate": 6.669943199433396e-06, "loss": 0.42643847, "memory(GiB)": 34.88, "step": 62350, "train_speed(iter/s)": 0.411074 }, { "acc": 0.88813896, "epoch": 1.688327511981155, "grad_norm": 13.3583345413208, "learning_rate": 6.669415743877968e-06, "loss": 0.58348436, "memory(GiB)": 34.88, "step": 62355, "train_speed(iter/s)": 0.411076 }, { "acc": 0.89676991, "epoch": 1.6884628922643707, "grad_norm": 11.715170860290527, "learning_rate": 6.668888267416092e-06, "loss": 0.6106751, "memory(GiB)": 34.88, "step": 62360, "train_speed(iter/s)": 0.411078 }, { "acc": 0.91529331, "epoch": 1.688598272547586, "grad_norm": 9.674494743347168, "learning_rate": 6.668360770054372e-06, "loss": 0.49114199, "memory(GiB)": 34.88, "step": 62365, "train_speed(iter/s)": 0.411079 }, { "acc": 0.91684647, "epoch": 1.6887336528308017, "grad_norm": 5.256096839904785, "learning_rate": 6.667833251799418e-06, "loss": 0.44652233, "memory(GiB)": 34.88, "step": 62370, "train_speed(iter/s)": 0.411081 }, { "acc": 0.91304874, "epoch": 1.6888690331140173, "grad_norm": 20.64736557006836, "learning_rate": 6.667305712657842e-06, "loss": 0.44034209, "memory(GiB)": 34.88, "step": 62375, "train_speed(iter/s)": 0.411083 }, { "acc": 0.90852203, "epoch": 1.6890044133972328, "grad_norm": 12.819653511047363, "learning_rate": 6.666778152636248e-06, "loss": 0.5268681, "memory(GiB)": 34.88, "step": 62380, "train_speed(iter/s)": 0.411085 }, { "acc": 0.91042347, "epoch": 1.6891397936804484, "grad_norm": 6.64156436920166, "learning_rate": 6.666250571741244e-06, "loss": 0.52178507, "memory(GiB)": 34.88, "step": 62385, "train_speed(iter/s)": 0.411086 }, { "acc": 0.9124649, "epoch": 1.6892751739636638, "grad_norm": 5.836840629577637, "learning_rate": 6.665722969979442e-06, "loss": 0.48289394, "memory(GiB)": 34.88, "step": 62390, "train_speed(iter/s)": 0.411088 }, { "acc": 0.90314236, "epoch": 1.6894105542468796, "grad_norm": 9.0068998336792, "learning_rate": 6.665195347357452e-06, "loss": 0.5824635, "memory(GiB)": 34.88, "step": 62395, "train_speed(iter/s)": 0.41109 }, { "acc": 0.90574255, "epoch": 1.689545934530095, "grad_norm": 8.426191329956055, "learning_rate": 6.66466770388188e-06, "loss": 0.52393603, "memory(GiB)": 34.88, "step": 62400, "train_speed(iter/s)": 0.411091 }, { "acc": 0.92546864, "epoch": 1.6896813148133107, "grad_norm": 10.432770729064941, "learning_rate": 6.664140039559337e-06, "loss": 0.41628122, "memory(GiB)": 34.88, "step": 62405, "train_speed(iter/s)": 0.411093 }, { "acc": 0.90101757, "epoch": 1.689816695096526, "grad_norm": 7.668822765350342, "learning_rate": 6.663612354396435e-06, "loss": 0.57987375, "memory(GiB)": 34.88, "step": 62410, "train_speed(iter/s)": 0.411095 }, { "acc": 0.91417007, "epoch": 1.6899520753797417, "grad_norm": 10.149991989135742, "learning_rate": 6.66308464839978e-06, "loss": 0.48771396, "memory(GiB)": 34.88, "step": 62415, "train_speed(iter/s)": 0.411097 }, { "acc": 0.91972179, "epoch": 1.6900874556629573, "grad_norm": 3.9170305728912354, "learning_rate": 6.662556921575988e-06, "loss": 0.40801592, "memory(GiB)": 34.88, "step": 62420, "train_speed(iter/s)": 0.411099 }, { "acc": 0.92102394, "epoch": 1.6902228359461728, "grad_norm": 9.014772415161133, "learning_rate": 6.662029173931666e-06, "loss": 0.44717264, "memory(GiB)": 34.88, "step": 62425, "train_speed(iter/s)": 0.4111 }, { "acc": 0.89733782, "epoch": 1.6903582162293884, "grad_norm": 6.788616180419922, "learning_rate": 6.661501405473426e-06, "loss": 0.52673965, "memory(GiB)": 34.88, "step": 62430, "train_speed(iter/s)": 0.411102 }, { "acc": 0.9286293, "epoch": 1.6904935965126038, "grad_norm": 8.49749755859375, "learning_rate": 6.66097361620788e-06, "loss": 0.4223846, "memory(GiB)": 34.88, "step": 62435, "train_speed(iter/s)": 0.411104 }, { "acc": 0.9194025, "epoch": 1.6906289767958196, "grad_norm": 5.980205059051514, "learning_rate": 6.66044580614164e-06, "loss": 0.3878027, "memory(GiB)": 34.88, "step": 62440, "train_speed(iter/s)": 0.411105 }, { "acc": 0.88752499, "epoch": 1.690764357079035, "grad_norm": 7.694263458251953, "learning_rate": 6.659917975281316e-06, "loss": 0.51167569, "memory(GiB)": 34.88, "step": 62445, "train_speed(iter/s)": 0.411107 }, { "acc": 0.8964592, "epoch": 1.6908997373622505, "grad_norm": 15.90766716003418, "learning_rate": 6.659390123633521e-06, "loss": 0.49100175, "memory(GiB)": 34.88, "step": 62450, "train_speed(iter/s)": 0.411109 }, { "acc": 0.91805077, "epoch": 1.691035117645466, "grad_norm": 12.537638664245605, "learning_rate": 6.658862251204867e-06, "loss": 0.49238186, "memory(GiB)": 34.88, "step": 62455, "train_speed(iter/s)": 0.411111 }, { "acc": 0.91919165, "epoch": 1.6911704979286817, "grad_norm": 5.785762310028076, "learning_rate": 6.658334358001967e-06, "loss": 0.46269326, "memory(GiB)": 34.88, "step": 62460, "train_speed(iter/s)": 0.411113 }, { "acc": 0.89050846, "epoch": 1.6913058782118973, "grad_norm": 21.748939514160156, "learning_rate": 6.657806444031437e-06, "loss": 0.62998142, "memory(GiB)": 34.88, "step": 62465, "train_speed(iter/s)": 0.411115 }, { "acc": 0.91408424, "epoch": 1.6914412584951126, "grad_norm": 25.511838912963867, "learning_rate": 6.657278509299884e-06, "loss": 0.45045128, "memory(GiB)": 34.88, "step": 62470, "train_speed(iter/s)": 0.411116 }, { "acc": 0.91129694, "epoch": 1.6915766387783284, "grad_norm": 4.313765525817871, "learning_rate": 6.656750553813927e-06, "loss": 0.45826936, "memory(GiB)": 34.88, "step": 62475, "train_speed(iter/s)": 0.411118 }, { "acc": 0.90692902, "epoch": 1.6917120190615438, "grad_norm": 6.224250316619873, "learning_rate": 6.656222577580176e-06, "loss": 0.49632707, "memory(GiB)": 34.88, "step": 62480, "train_speed(iter/s)": 0.41112 }, { "acc": 0.91983509, "epoch": 1.6918473993447596, "grad_norm": 19.020421981811523, "learning_rate": 6.655694580605246e-06, "loss": 0.45300236, "memory(GiB)": 34.88, "step": 62485, "train_speed(iter/s)": 0.411122 }, { "acc": 0.91445208, "epoch": 1.691982779627975, "grad_norm": 3.5426907539367676, "learning_rate": 6.655166562895751e-06, "loss": 0.41590419, "memory(GiB)": 34.88, "step": 62490, "train_speed(iter/s)": 0.411124 }, { "acc": 0.89268799, "epoch": 1.6921181599111905, "grad_norm": 10.203503608703613, "learning_rate": 6.654638524458306e-06, "loss": 0.57795887, "memory(GiB)": 34.88, "step": 62495, "train_speed(iter/s)": 0.411125 }, { "acc": 0.91783915, "epoch": 1.692253540194406, "grad_norm": 5.922614574432373, "learning_rate": 6.654110465299527e-06, "loss": 0.41177006, "memory(GiB)": 34.88, "step": 62500, "train_speed(iter/s)": 0.411127 }, { "acc": 0.91979027, "epoch": 1.6923889204776217, "grad_norm": 3.9926095008850098, "learning_rate": 6.653582385426026e-06, "loss": 0.49588699, "memory(GiB)": 34.88, "step": 62505, "train_speed(iter/s)": 0.411129 }, { "acc": 0.92099857, "epoch": 1.6925243007608373, "grad_norm": 6.590603828430176, "learning_rate": 6.65305428484442e-06, "loss": 0.37931809, "memory(GiB)": 34.88, "step": 62510, "train_speed(iter/s)": 0.41113 }, { "acc": 0.91433372, "epoch": 1.6926596810440526, "grad_norm": 8.455998420715332, "learning_rate": 6.652526163561326e-06, "loss": 0.47909336, "memory(GiB)": 34.88, "step": 62515, "train_speed(iter/s)": 0.411132 }, { "acc": 0.91314545, "epoch": 1.6927950613272684, "grad_norm": 7.905567169189453, "learning_rate": 6.6519980215833545e-06, "loss": 0.45582857, "memory(GiB)": 34.88, "step": 62520, "train_speed(iter/s)": 0.411134 }, { "acc": 0.90352449, "epoch": 1.6929304416104838, "grad_norm": 5.489475250244141, "learning_rate": 6.651469858917129e-06, "loss": 0.5393012, "memory(GiB)": 34.88, "step": 62525, "train_speed(iter/s)": 0.411136 }, { "acc": 0.9070961, "epoch": 1.6930658218936994, "grad_norm": 7.441926956176758, "learning_rate": 6.65094167556926e-06, "loss": 0.48384018, "memory(GiB)": 34.88, "step": 62530, "train_speed(iter/s)": 0.411138 }, { "acc": 0.90586338, "epoch": 1.693201202176915, "grad_norm": 5.838936805725098, "learning_rate": 6.650413471546366e-06, "loss": 0.50226097, "memory(GiB)": 34.88, "step": 62535, "train_speed(iter/s)": 0.411139 }, { "acc": 0.88821564, "epoch": 1.6933365824601305, "grad_norm": 14.21687126159668, "learning_rate": 6.649885246855064e-06, "loss": 0.66997299, "memory(GiB)": 34.88, "step": 62540, "train_speed(iter/s)": 0.411141 }, { "acc": 0.90362549, "epoch": 1.693471962743346, "grad_norm": 8.39618968963623, "learning_rate": 6.649357001501972e-06, "loss": 0.54182343, "memory(GiB)": 34.88, "step": 62545, "train_speed(iter/s)": 0.411142 }, { "acc": 0.9014102, "epoch": 1.6936073430265615, "grad_norm": 6.7048115730285645, "learning_rate": 6.6488287354937045e-06, "loss": 0.51720657, "memory(GiB)": 34.88, "step": 62550, "train_speed(iter/s)": 0.411144 }, { "acc": 0.90565758, "epoch": 1.6937427233097773, "grad_norm": 4.947570323944092, "learning_rate": 6.64830044883688e-06, "loss": 0.55926819, "memory(GiB)": 34.88, "step": 62555, "train_speed(iter/s)": 0.411146 }, { "acc": 0.91507874, "epoch": 1.6938781035929926, "grad_norm": 7.234161376953125, "learning_rate": 6.647772141538118e-06, "loss": 0.37711501, "memory(GiB)": 34.88, "step": 62560, "train_speed(iter/s)": 0.411147 }, { "acc": 0.90200863, "epoch": 1.6940134838762084, "grad_norm": 8.65027141571045, "learning_rate": 6.647243813604036e-06, "loss": 0.49013729, "memory(GiB)": 34.88, "step": 62565, "train_speed(iter/s)": 0.411149 }, { "acc": 0.90726328, "epoch": 1.6941488641594238, "grad_norm": 6.285654544830322, "learning_rate": 6.646715465041251e-06, "loss": 0.495999, "memory(GiB)": 34.88, "step": 62570, "train_speed(iter/s)": 0.41115 }, { "acc": 0.89617805, "epoch": 1.6942842444426394, "grad_norm": 3.2895655632019043, "learning_rate": 6.646187095856382e-06, "loss": 0.57738848, "memory(GiB)": 34.88, "step": 62575, "train_speed(iter/s)": 0.411152 }, { "acc": 0.92220049, "epoch": 1.694419624725855, "grad_norm": 8.983014106750488, "learning_rate": 6.64565870605605e-06, "loss": 0.39469507, "memory(GiB)": 34.88, "step": 62580, "train_speed(iter/s)": 0.411154 }, { "acc": 0.91505032, "epoch": 1.6945550050090705, "grad_norm": 5.2800750732421875, "learning_rate": 6.645130295646871e-06, "loss": 0.40978298, "memory(GiB)": 34.88, "step": 62585, "train_speed(iter/s)": 0.411156 }, { "acc": 0.90568781, "epoch": 1.694690385292286, "grad_norm": 6.931899070739746, "learning_rate": 6.644601864635467e-06, "loss": 0.49502592, "memory(GiB)": 34.88, "step": 62590, "train_speed(iter/s)": 0.411158 }, { "acc": 0.91807575, "epoch": 1.6948257655755015, "grad_norm": 14.121809005737305, "learning_rate": 6.644073413028456e-06, "loss": 0.43997269, "memory(GiB)": 34.88, "step": 62595, "train_speed(iter/s)": 0.411159 }, { "acc": 0.90294991, "epoch": 1.6949611458587173, "grad_norm": 5.3721208572387695, "learning_rate": 6.643544940832457e-06, "loss": 0.54896927, "memory(GiB)": 34.88, "step": 62600, "train_speed(iter/s)": 0.411161 }, { "acc": 0.91837196, "epoch": 1.6950965261419326, "grad_norm": 8.25149917602539, "learning_rate": 6.6430164480540925e-06, "loss": 0.49381933, "memory(GiB)": 34.88, "step": 62605, "train_speed(iter/s)": 0.411163 }, { "acc": 0.90650311, "epoch": 1.6952319064251482, "grad_norm": 10.955426216125488, "learning_rate": 6.642487934699984e-06, "loss": 0.56977282, "memory(GiB)": 34.88, "step": 62610, "train_speed(iter/s)": 0.411164 }, { "acc": 0.92174244, "epoch": 1.6953672867083638, "grad_norm": 6.288532733917236, "learning_rate": 6.641959400776749e-06, "loss": 0.42208738, "memory(GiB)": 34.88, "step": 62615, "train_speed(iter/s)": 0.411166 }, { "acc": 0.89601822, "epoch": 1.6955026669915794, "grad_norm": 9.206807136535645, "learning_rate": 6.641430846291008e-06, "loss": 0.58450403, "memory(GiB)": 34.88, "step": 62620, "train_speed(iter/s)": 0.411168 }, { "acc": 0.89495831, "epoch": 1.695638047274795, "grad_norm": 6.68696928024292, "learning_rate": 6.640902271249384e-06, "loss": 0.58313417, "memory(GiB)": 34.88, "step": 62625, "train_speed(iter/s)": 0.41117 }, { "acc": 0.90908117, "epoch": 1.6957734275580103, "grad_norm": 35.79864501953125, "learning_rate": 6.640373675658498e-06, "loss": 0.51245832, "memory(GiB)": 34.88, "step": 62630, "train_speed(iter/s)": 0.411171 }, { "acc": 0.90553856, "epoch": 1.695908807841226, "grad_norm": 11.227152824401855, "learning_rate": 6.639845059524973e-06, "loss": 0.55574827, "memory(GiB)": 34.88, "step": 62635, "train_speed(iter/s)": 0.411173 }, { "acc": 0.91520872, "epoch": 1.6960441881244415, "grad_norm": 10.355463981628418, "learning_rate": 6.639316422855431e-06, "loss": 0.45027008, "memory(GiB)": 34.88, "step": 62640, "train_speed(iter/s)": 0.411175 }, { "acc": 0.88835859, "epoch": 1.6961795684076573, "grad_norm": 7.798751354217529, "learning_rate": 6.638787765656489e-06, "loss": 0.57031102, "memory(GiB)": 34.88, "step": 62645, "train_speed(iter/s)": 0.411177 }, { "acc": 0.89668636, "epoch": 1.6963149486908726, "grad_norm": 5.21006441116333, "learning_rate": 6.638259087934779e-06, "loss": 0.60745339, "memory(GiB)": 34.88, "step": 62650, "train_speed(iter/s)": 0.411179 }, { "acc": 0.90034018, "epoch": 1.6964503289740882, "grad_norm": 8.691633224487305, "learning_rate": 6.637730389696915e-06, "loss": 0.53328257, "memory(GiB)": 34.88, "step": 62655, "train_speed(iter/s)": 0.411181 }, { "acc": 0.8756424, "epoch": 1.6965857092573038, "grad_norm": 11.04359245300293, "learning_rate": 6.637201670949522e-06, "loss": 0.68101139, "memory(GiB)": 34.88, "step": 62660, "train_speed(iter/s)": 0.411183 }, { "acc": 0.90398712, "epoch": 1.6967210895405194, "grad_norm": 5.238134860992432, "learning_rate": 6.636672931699226e-06, "loss": 0.53055587, "memory(GiB)": 34.88, "step": 62665, "train_speed(iter/s)": 0.411184 }, { "acc": 0.89416685, "epoch": 1.696856469823735, "grad_norm": 15.005334854125977, "learning_rate": 6.636144171952649e-06, "loss": 0.58549271, "memory(GiB)": 34.88, "step": 62670, "train_speed(iter/s)": 0.411186 }, { "acc": 0.91367702, "epoch": 1.6969918501069503, "grad_norm": 11.275562286376953, "learning_rate": 6.6356153917164155e-06, "loss": 0.46565008, "memory(GiB)": 34.88, "step": 62675, "train_speed(iter/s)": 0.411188 }, { "acc": 0.91759186, "epoch": 1.697127230390166, "grad_norm": 6.731690406799316, "learning_rate": 6.635086590997148e-06, "loss": 0.43112783, "memory(GiB)": 34.88, "step": 62680, "train_speed(iter/s)": 0.411189 }, { "acc": 0.89343872, "epoch": 1.6972626106733815, "grad_norm": 41.982295989990234, "learning_rate": 6.634557769801471e-06, "loss": 0.50324717, "memory(GiB)": 34.88, "step": 62685, "train_speed(iter/s)": 0.411191 }, { "acc": 0.89174213, "epoch": 1.697397990956597, "grad_norm": 8.230695724487305, "learning_rate": 6.63402892813601e-06, "loss": 0.6487464, "memory(GiB)": 34.88, "step": 62690, "train_speed(iter/s)": 0.411193 }, { "acc": 0.90597563, "epoch": 1.6975333712398126, "grad_norm": 7.372643947601318, "learning_rate": 6.63350006600739e-06, "loss": 0.50715327, "memory(GiB)": 34.88, "step": 62695, "train_speed(iter/s)": 0.411195 }, { "acc": 0.9059906, "epoch": 1.6976687515230282, "grad_norm": 13.642468452453613, "learning_rate": 6.632971183422233e-06, "loss": 0.53486438, "memory(GiB)": 34.88, "step": 62700, "train_speed(iter/s)": 0.411197 }, { "acc": 0.91307125, "epoch": 1.6978041318062438, "grad_norm": 11.424934387207031, "learning_rate": 6.632442280387169e-06, "loss": 0.5139348, "memory(GiB)": 34.88, "step": 62705, "train_speed(iter/s)": 0.411198 }, { "acc": 0.92146273, "epoch": 1.6979395120894591, "grad_norm": 5.957557201385498, "learning_rate": 6.63191335690882e-06, "loss": 0.4714541, "memory(GiB)": 34.88, "step": 62710, "train_speed(iter/s)": 0.4112 }, { "acc": 0.92019129, "epoch": 1.698074892372675, "grad_norm": 8.91788101196289, "learning_rate": 6.631384412993813e-06, "loss": 0.39861088, "memory(GiB)": 34.88, "step": 62715, "train_speed(iter/s)": 0.411202 }, { "acc": 0.89686155, "epoch": 1.6982102726558903, "grad_norm": 22.56040382385254, "learning_rate": 6.630855448648775e-06, "loss": 0.61107988, "memory(GiB)": 34.88, "step": 62720, "train_speed(iter/s)": 0.411203 }, { "acc": 0.90948639, "epoch": 1.698345652939106, "grad_norm": 8.441640853881836, "learning_rate": 6.63032646388033e-06, "loss": 0.46807332, "memory(GiB)": 34.88, "step": 62725, "train_speed(iter/s)": 0.411205 }, { "acc": 0.91311083, "epoch": 1.6984810332223215, "grad_norm": 10.045611381530762, "learning_rate": 6.629797458695106e-06, "loss": 0.4085866, "memory(GiB)": 34.88, "step": 62730, "train_speed(iter/s)": 0.411207 }, { "acc": 0.90739202, "epoch": 1.698616413505537, "grad_norm": 7.039401531219482, "learning_rate": 6.629268433099732e-06, "loss": 0.44046035, "memory(GiB)": 34.88, "step": 62735, "train_speed(iter/s)": 0.411209 }, { "acc": 0.89959974, "epoch": 1.6987517937887526, "grad_norm": 12.801570892333984, "learning_rate": 6.628739387100831e-06, "loss": 0.61520824, "memory(GiB)": 34.88, "step": 62740, "train_speed(iter/s)": 0.411211 }, { "acc": 0.92542219, "epoch": 1.6988871740719682, "grad_norm": 4.63014554977417, "learning_rate": 6.628210320705033e-06, "loss": 0.35711679, "memory(GiB)": 34.88, "step": 62745, "train_speed(iter/s)": 0.411213 }, { "acc": 0.88906622, "epoch": 1.6990225543551838, "grad_norm": 5.61111307144165, "learning_rate": 6.627681233918963e-06, "loss": 0.54635973, "memory(GiB)": 34.88, "step": 62750, "train_speed(iter/s)": 0.411214 }, { "acc": 0.909095, "epoch": 1.6991579346383991, "grad_norm": 9.406709671020508, "learning_rate": 6.627152126749252e-06, "loss": 0.51422024, "memory(GiB)": 34.88, "step": 62755, "train_speed(iter/s)": 0.411216 }, { "acc": 0.91669188, "epoch": 1.699293314921615, "grad_norm": 5.116699695587158, "learning_rate": 6.626622999202527e-06, "loss": 0.46572123, "memory(GiB)": 34.88, "step": 62760, "train_speed(iter/s)": 0.411218 }, { "acc": 0.89951744, "epoch": 1.6994286952048303, "grad_norm": 6.091009140014648, "learning_rate": 6.626093851285416e-06, "loss": 0.52253389, "memory(GiB)": 34.88, "step": 62765, "train_speed(iter/s)": 0.41122 }, { "acc": 0.91179695, "epoch": 1.6995640754880459, "grad_norm": 9.075654983520508, "learning_rate": 6.6255646830045465e-06, "loss": 0.49171906, "memory(GiB)": 34.88, "step": 62770, "train_speed(iter/s)": 0.411221 }, { "acc": 0.90449371, "epoch": 1.6996994557712615, "grad_norm": 5.513859748840332, "learning_rate": 6.6250354943665504e-06, "loss": 0.52686157, "memory(GiB)": 34.88, "step": 62775, "train_speed(iter/s)": 0.411223 }, { "acc": 0.93112698, "epoch": 1.699834836054477, "grad_norm": 5.041333198547363, "learning_rate": 6.624506285378054e-06, "loss": 0.37222314, "memory(GiB)": 34.88, "step": 62780, "train_speed(iter/s)": 0.411225 }, { "acc": 0.89522085, "epoch": 1.6999702163376926, "grad_norm": 7.872111797332764, "learning_rate": 6.6239770560456875e-06, "loss": 0.57631855, "memory(GiB)": 34.88, "step": 62785, "train_speed(iter/s)": 0.411226 }, { "acc": 0.9070281, "epoch": 1.700105596620908, "grad_norm": 5.813760757446289, "learning_rate": 6.623447806376082e-06, "loss": 0.45543613, "memory(GiB)": 34.88, "step": 62790, "train_speed(iter/s)": 0.411228 }, { "acc": 0.8792861, "epoch": 1.7002409769041238, "grad_norm": 11.189983367919922, "learning_rate": 6.622918536375864e-06, "loss": 0.6669569, "memory(GiB)": 34.88, "step": 62795, "train_speed(iter/s)": 0.411229 }, { "acc": 0.90718861, "epoch": 1.7003763571873391, "grad_norm": 6.697086334228516, "learning_rate": 6.622389246051667e-06, "loss": 0.5107738, "memory(GiB)": 34.88, "step": 62800, "train_speed(iter/s)": 0.411231 }, { "acc": 0.91077271, "epoch": 1.700511737470555, "grad_norm": 10.446001052856445, "learning_rate": 6.621859935410121e-06, "loss": 0.4319787, "memory(GiB)": 34.88, "step": 62805, "train_speed(iter/s)": 0.411233 }, { "acc": 0.90718527, "epoch": 1.7006471177537703, "grad_norm": 14.706591606140137, "learning_rate": 6.621330604457855e-06, "loss": 0.50572729, "memory(GiB)": 34.88, "step": 62810, "train_speed(iter/s)": 0.411234 }, { "acc": 0.90065413, "epoch": 1.700782498036986, "grad_norm": 8.897445678710938, "learning_rate": 6.620801253201499e-06, "loss": 0.57438431, "memory(GiB)": 34.88, "step": 62815, "train_speed(iter/s)": 0.411236 }, { "acc": 0.91215267, "epoch": 1.7009178783202015, "grad_norm": 7.2391533851623535, "learning_rate": 6.62027188164769e-06, "loss": 0.48191929, "memory(GiB)": 34.88, "step": 62820, "train_speed(iter/s)": 0.411238 }, { "acc": 0.88473682, "epoch": 1.701053258603417, "grad_norm": 10.293386459350586, "learning_rate": 6.619742489803054e-06, "loss": 0.72875719, "memory(GiB)": 34.88, "step": 62825, "train_speed(iter/s)": 0.41124 }, { "acc": 0.89892292, "epoch": 1.7011886388866326, "grad_norm": 7.0668230056762695, "learning_rate": 6.619213077674221e-06, "loss": 0.55655022, "memory(GiB)": 34.88, "step": 62830, "train_speed(iter/s)": 0.411241 }, { "acc": 0.91121807, "epoch": 1.701324019169848, "grad_norm": 7.701427936553955, "learning_rate": 6.618683645267829e-06, "loss": 0.48785, "memory(GiB)": 34.88, "step": 62835, "train_speed(iter/s)": 0.411243 }, { "acc": 0.90043707, "epoch": 1.7014593994530638, "grad_norm": 5.400707721710205, "learning_rate": 6.618154192590506e-06, "loss": 0.49846859, "memory(GiB)": 34.88, "step": 62840, "train_speed(iter/s)": 0.411245 }, { "acc": 0.90006027, "epoch": 1.7015947797362792, "grad_norm": 3.8706612586975098, "learning_rate": 6.617624719648885e-06, "loss": 0.52005668, "memory(GiB)": 34.88, "step": 62845, "train_speed(iter/s)": 0.411247 }, { "acc": 0.92653475, "epoch": 1.7017301600194947, "grad_norm": 7.426080226898193, "learning_rate": 6.6170952264496e-06, "loss": 0.43520732, "memory(GiB)": 34.88, "step": 62850, "train_speed(iter/s)": 0.411248 }, { "acc": 0.8799984, "epoch": 1.7018655403027103, "grad_norm": 16.84572410583496, "learning_rate": 6.616565712999283e-06, "loss": 0.64019308, "memory(GiB)": 34.88, "step": 62855, "train_speed(iter/s)": 0.41125 }, { "acc": 0.91190224, "epoch": 1.702000920585926, "grad_norm": 8.8615083694458, "learning_rate": 6.616036179304569e-06, "loss": 0.49413896, "memory(GiB)": 34.88, "step": 62860, "train_speed(iter/s)": 0.411252 }, { "acc": 0.91188374, "epoch": 1.7021363008691415, "grad_norm": 5.593262195587158, "learning_rate": 6.6155066253720866e-06, "loss": 0.50261917, "memory(GiB)": 34.88, "step": 62865, "train_speed(iter/s)": 0.411254 }, { "acc": 0.9033699, "epoch": 1.7022716811523568, "grad_norm": 9.268601417541504, "learning_rate": 6.614977051208477e-06, "loss": 0.50012302, "memory(GiB)": 34.88, "step": 62870, "train_speed(iter/s)": 0.411256 }, { "acc": 0.89250851, "epoch": 1.7024070614355726, "grad_norm": 4.293179512023926, "learning_rate": 6.614447456820366e-06, "loss": 0.62505846, "memory(GiB)": 34.88, "step": 62875, "train_speed(iter/s)": 0.411257 }, { "acc": 0.91178017, "epoch": 1.702542441718788, "grad_norm": 4.333273887634277, "learning_rate": 6.613917842214394e-06, "loss": 0.45773444, "memory(GiB)": 34.88, "step": 62880, "train_speed(iter/s)": 0.411259 }, { "acc": 0.902318, "epoch": 1.7026778220020038, "grad_norm": 9.961050987243652, "learning_rate": 6.613388207397191e-06, "loss": 0.56290836, "memory(GiB)": 34.88, "step": 62885, "train_speed(iter/s)": 0.411261 }, { "acc": 0.91267843, "epoch": 1.7028132022852192, "grad_norm": 8.549590110778809, "learning_rate": 6.612858552375396e-06, "loss": 0.50694761, "memory(GiB)": 34.88, "step": 62890, "train_speed(iter/s)": 0.411262 }, { "acc": 0.91624975, "epoch": 1.7029485825684347, "grad_norm": 7.45953369140625, "learning_rate": 6.61232887715564e-06, "loss": 0.42628756, "memory(GiB)": 34.88, "step": 62895, "train_speed(iter/s)": 0.411264 }, { "acc": 0.91769886, "epoch": 1.7030839628516503, "grad_norm": 6.453602313995361, "learning_rate": 6.611799181744562e-06, "loss": 0.4518805, "memory(GiB)": 34.88, "step": 62900, "train_speed(iter/s)": 0.411266 }, { "acc": 0.89934349, "epoch": 1.703219343134866, "grad_norm": 10.952247619628906, "learning_rate": 6.6112694661487954e-06, "loss": 0.4607307, "memory(GiB)": 34.88, "step": 62905, "train_speed(iter/s)": 0.411268 }, { "acc": 0.90443411, "epoch": 1.7033547234180815, "grad_norm": 6.477667808532715, "learning_rate": 6.610739730374975e-06, "loss": 0.51144714, "memory(GiB)": 34.88, "step": 62910, "train_speed(iter/s)": 0.41127 }, { "acc": 0.91083956, "epoch": 1.7034901037012968, "grad_norm": 11.721158027648926, "learning_rate": 6.610209974429738e-06, "loss": 0.52100039, "memory(GiB)": 34.88, "step": 62915, "train_speed(iter/s)": 0.411272 }, { "acc": 0.91441584, "epoch": 1.7036254839845126, "grad_norm": 13.917424201965332, "learning_rate": 6.609680198319719e-06, "loss": 0.49972501, "memory(GiB)": 34.88, "step": 62920, "train_speed(iter/s)": 0.411273 }, { "acc": 0.9090354, "epoch": 1.703760864267728, "grad_norm": 6.368042469024658, "learning_rate": 6.6091504020515585e-06, "loss": 0.46338472, "memory(GiB)": 34.88, "step": 62925, "train_speed(iter/s)": 0.411275 }, { "acc": 0.91734037, "epoch": 1.7038962445509436, "grad_norm": 9.34216594696045, "learning_rate": 6.608620585631889e-06, "loss": 0.41734295, "memory(GiB)": 34.88, "step": 62930, "train_speed(iter/s)": 0.411277 }, { "acc": 0.91884985, "epoch": 1.7040316248341592, "grad_norm": 4.867424011230469, "learning_rate": 6.60809074906735e-06, "loss": 0.44289193, "memory(GiB)": 34.88, "step": 62935, "train_speed(iter/s)": 0.411278 }, { "acc": 0.91407347, "epoch": 1.7041670051173747, "grad_norm": 6.669612884521484, "learning_rate": 6.607560892364578e-06, "loss": 0.46346064, "memory(GiB)": 34.88, "step": 62940, "train_speed(iter/s)": 0.41128 }, { "acc": 0.91580944, "epoch": 1.7043023854005903, "grad_norm": 14.563705444335938, "learning_rate": 6.607031015530212e-06, "loss": 0.46916261, "memory(GiB)": 34.88, "step": 62945, "train_speed(iter/s)": 0.411282 }, { "acc": 0.92894049, "epoch": 1.7044377656838057, "grad_norm": 5.030964374542236, "learning_rate": 6.606501118570886e-06, "loss": 0.4161438, "memory(GiB)": 34.88, "step": 62950, "train_speed(iter/s)": 0.411284 }, { "acc": 0.92707033, "epoch": 1.7045731459670215, "grad_norm": 5.482761859893799, "learning_rate": 6.605971201493241e-06, "loss": 0.34429283, "memory(GiB)": 34.88, "step": 62955, "train_speed(iter/s)": 0.411285 }, { "acc": 0.89586468, "epoch": 1.7047085262502368, "grad_norm": 22.11054039001465, "learning_rate": 6.605441264303915e-06, "loss": 0.57219496, "memory(GiB)": 34.88, "step": 62960, "train_speed(iter/s)": 0.411287 }, { "acc": 0.88930426, "epoch": 1.7048439065334524, "grad_norm": 13.939987182617188, "learning_rate": 6.604911307009544e-06, "loss": 0.71118441, "memory(GiB)": 34.88, "step": 62965, "train_speed(iter/s)": 0.411289 }, { "acc": 0.88949575, "epoch": 1.704979286816668, "grad_norm": 12.465912818908691, "learning_rate": 6.604381329616771e-06, "loss": 0.68869677, "memory(GiB)": 34.88, "step": 62970, "train_speed(iter/s)": 0.41129 }, { "acc": 0.90493031, "epoch": 1.7051146670998836, "grad_norm": 9.349174499511719, "learning_rate": 6.603851332132231e-06, "loss": 0.44027858, "memory(GiB)": 34.88, "step": 62975, "train_speed(iter/s)": 0.411292 }, { "acc": 0.91696949, "epoch": 1.7052500473830992, "grad_norm": 16.5039119720459, "learning_rate": 6.603321314562565e-06, "loss": 0.4074337, "memory(GiB)": 34.88, "step": 62980, "train_speed(iter/s)": 0.411294 }, { "acc": 0.91479931, "epoch": 1.7053854276663145, "grad_norm": 9.167305946350098, "learning_rate": 6.602791276914415e-06, "loss": 0.56441045, "memory(GiB)": 34.88, "step": 62985, "train_speed(iter/s)": 0.411295 }, { "acc": 0.90937996, "epoch": 1.7055208079495303, "grad_norm": 16.46933937072754, "learning_rate": 6.602261219194417e-06, "loss": 0.46132851, "memory(GiB)": 34.88, "step": 62990, "train_speed(iter/s)": 0.411297 }, { "acc": 0.89264431, "epoch": 1.7056561882327457, "grad_norm": 8.738138198852539, "learning_rate": 6.601731141409211e-06, "loss": 0.62887192, "memory(GiB)": 34.88, "step": 62995, "train_speed(iter/s)": 0.411299 }, { "acc": 0.92656155, "epoch": 1.7057915685159615, "grad_norm": 7.131628513336182, "learning_rate": 6.601201043565439e-06, "loss": 0.37939572, "memory(GiB)": 34.88, "step": 63000, "train_speed(iter/s)": 0.4113 }, { "acc": 0.89064827, "epoch": 1.7059269487991768, "grad_norm": 9.256454467773438, "learning_rate": 6.600670925669741e-06, "loss": 0.60565882, "memory(GiB)": 34.88, "step": 63005, "train_speed(iter/s)": 0.411302 }, { "acc": 0.91073475, "epoch": 1.7060623290823924, "grad_norm": 17.972864151000977, "learning_rate": 6.600140787728758e-06, "loss": 0.44144325, "memory(GiB)": 34.88, "step": 63010, "train_speed(iter/s)": 0.411304 }, { "acc": 0.89639053, "epoch": 1.706197709365608, "grad_norm": 7.471711158752441, "learning_rate": 6.59961062974913e-06, "loss": 0.54173317, "memory(GiB)": 34.88, "step": 63015, "train_speed(iter/s)": 0.411305 }, { "acc": 0.91088562, "epoch": 1.7063330896488236, "grad_norm": 6.384994983673096, "learning_rate": 6.599080451737502e-06, "loss": 0.47749519, "memory(GiB)": 34.88, "step": 63020, "train_speed(iter/s)": 0.411307 }, { "acc": 0.88150864, "epoch": 1.7064684699320392, "grad_norm": 8.27392292022705, "learning_rate": 6.598550253700511e-06, "loss": 0.70120211, "memory(GiB)": 34.88, "step": 63025, "train_speed(iter/s)": 0.411309 }, { "acc": 0.90097361, "epoch": 1.7066038502152545, "grad_norm": 14.500138282775879, "learning_rate": 6.5980200356448006e-06, "loss": 0.58622165, "memory(GiB)": 34.88, "step": 63030, "train_speed(iter/s)": 0.411311 }, { "acc": 0.9289032, "epoch": 1.7067392304984703, "grad_norm": 4.852242946624756, "learning_rate": 6.597489797577012e-06, "loss": 0.38208947, "memory(GiB)": 34.88, "step": 63035, "train_speed(iter/s)": 0.411312 }, { "acc": 0.87693615, "epoch": 1.7068746107816857, "grad_norm": 10.3074312210083, "learning_rate": 6.596959539503788e-06, "loss": 0.70548124, "memory(GiB)": 34.88, "step": 63040, "train_speed(iter/s)": 0.411314 }, { "acc": 0.90621128, "epoch": 1.7070099910649013, "grad_norm": 8.099294662475586, "learning_rate": 6.596429261431772e-06, "loss": 0.49186783, "memory(GiB)": 34.88, "step": 63045, "train_speed(iter/s)": 0.411316 }, { "acc": 0.91920528, "epoch": 1.7071453713481168, "grad_norm": 7.489249229431152, "learning_rate": 6.595898963367604e-06, "loss": 0.43707156, "memory(GiB)": 34.88, "step": 63050, "train_speed(iter/s)": 0.411317 }, { "acc": 0.90248241, "epoch": 1.7072807516313324, "grad_norm": 32.09066390991211, "learning_rate": 6.595368645317931e-06, "loss": 0.47925234, "memory(GiB)": 34.88, "step": 63055, "train_speed(iter/s)": 0.411319 }, { "acc": 0.91887693, "epoch": 1.707416131914548, "grad_norm": 11.18882942199707, "learning_rate": 6.594838307289394e-06, "loss": 0.49546885, "memory(GiB)": 34.88, "step": 63060, "train_speed(iter/s)": 0.411321 }, { "acc": 0.90211821, "epoch": 1.7075515121977634, "grad_norm": 6.6354851722717285, "learning_rate": 6.594307949288636e-06, "loss": 0.56701684, "memory(GiB)": 34.88, "step": 63065, "train_speed(iter/s)": 0.411322 }, { "acc": 0.8998167, "epoch": 1.7076868924809792, "grad_norm": 10.004252433776855, "learning_rate": 6.593777571322304e-06, "loss": 0.56033754, "memory(GiB)": 34.88, "step": 63070, "train_speed(iter/s)": 0.411324 }, { "acc": 0.92470932, "epoch": 1.7078222727641945, "grad_norm": 5.27583646774292, "learning_rate": 6.593247173397038e-06, "loss": 0.40222673, "memory(GiB)": 34.88, "step": 63075, "train_speed(iter/s)": 0.411326 }, { "acc": 0.91411037, "epoch": 1.7079576530474103, "grad_norm": 30.83067512512207, "learning_rate": 6.592716755519484e-06, "loss": 0.47905746, "memory(GiB)": 34.88, "step": 63080, "train_speed(iter/s)": 0.411328 }, { "acc": 0.89738131, "epoch": 1.7080930333306257, "grad_norm": 10.743496894836426, "learning_rate": 6.592186317696285e-06, "loss": 0.53636618, "memory(GiB)": 34.88, "step": 63085, "train_speed(iter/s)": 0.411329 }, { "acc": 0.90997953, "epoch": 1.7082284136138413, "grad_norm": 10.539388656616211, "learning_rate": 6.591655859934089e-06, "loss": 0.48747711, "memory(GiB)": 34.88, "step": 63090, "train_speed(iter/s)": 0.411331 }, { "acc": 0.90631981, "epoch": 1.7083637938970568, "grad_norm": 13.708256721496582, "learning_rate": 6.591125382239539e-06, "loss": 0.57262387, "memory(GiB)": 34.88, "step": 63095, "train_speed(iter/s)": 0.411333 }, { "acc": 0.89390335, "epoch": 1.7084991741802724, "grad_norm": 8.784601211547852, "learning_rate": 6.59059488461928e-06, "loss": 0.59486036, "memory(GiB)": 34.88, "step": 63100, "train_speed(iter/s)": 0.411335 }, { "acc": 0.90602713, "epoch": 1.708634554463488, "grad_norm": 8.719568252563477, "learning_rate": 6.590064367079958e-06, "loss": 0.49619837, "memory(GiB)": 34.88, "step": 63105, "train_speed(iter/s)": 0.411336 }, { "acc": 0.89819689, "epoch": 1.7087699347467034, "grad_norm": 34.302555084228516, "learning_rate": 6.58953382962822e-06, "loss": 0.65327234, "memory(GiB)": 34.88, "step": 63110, "train_speed(iter/s)": 0.411337 }, { "acc": 0.91153622, "epoch": 1.7089053150299192, "grad_norm": 62.24215316772461, "learning_rate": 6.58900327227071e-06, "loss": 0.53398294, "memory(GiB)": 34.88, "step": 63115, "train_speed(iter/s)": 0.411339 }, { "acc": 0.92391071, "epoch": 1.7090406953131345, "grad_norm": 4.260321140289307, "learning_rate": 6.5884726950140746e-06, "loss": 0.33896279, "memory(GiB)": 34.88, "step": 63120, "train_speed(iter/s)": 0.411341 }, { "acc": 0.89118977, "epoch": 1.70917607559635, "grad_norm": 6.596445560455322, "learning_rate": 6.587942097864961e-06, "loss": 0.62551723, "memory(GiB)": 34.88, "step": 63125, "train_speed(iter/s)": 0.411343 }, { "acc": 0.92096148, "epoch": 1.7093114558795657, "grad_norm": 6.0277791023254395, "learning_rate": 6.587411480830016e-06, "loss": 0.37628617, "memory(GiB)": 34.88, "step": 63130, "train_speed(iter/s)": 0.411344 }, { "acc": 0.91864624, "epoch": 1.7094468361627813, "grad_norm": 38.69584274291992, "learning_rate": 6.586880843915887e-06, "loss": 0.43164034, "memory(GiB)": 34.88, "step": 63135, "train_speed(iter/s)": 0.411346 }, { "acc": 0.89857559, "epoch": 1.7095822164459968, "grad_norm": 6.9774065017700195, "learning_rate": 6.586350187129219e-06, "loss": 0.5006258, "memory(GiB)": 34.88, "step": 63140, "train_speed(iter/s)": 0.411348 }, { "acc": 0.89868736, "epoch": 1.7097175967292122, "grad_norm": 9.57373046875, "learning_rate": 6.585819510476664e-06, "loss": 0.53751364, "memory(GiB)": 34.88, "step": 63145, "train_speed(iter/s)": 0.41135 }, { "acc": 0.91740494, "epoch": 1.709852977012428, "grad_norm": 9.141509056091309, "learning_rate": 6.585288813964865e-06, "loss": 0.43006268, "memory(GiB)": 34.88, "step": 63150, "train_speed(iter/s)": 0.411351 }, { "acc": 0.90722618, "epoch": 1.7099883572956434, "grad_norm": 8.54839038848877, "learning_rate": 6.5847580976004735e-06, "loss": 0.46915398, "memory(GiB)": 34.88, "step": 63155, "train_speed(iter/s)": 0.411353 }, { "acc": 0.90321198, "epoch": 1.7101237375788592, "grad_norm": 5.920734882354736, "learning_rate": 6.584227361390134e-06, "loss": 0.48173094, "memory(GiB)": 34.88, "step": 63160, "train_speed(iter/s)": 0.411355 }, { "acc": 0.90007915, "epoch": 1.7102591178620745, "grad_norm": 9.976922988891602, "learning_rate": 6.583696605340499e-06, "loss": 0.53089123, "memory(GiB)": 34.88, "step": 63165, "train_speed(iter/s)": 0.411356 }, { "acc": 0.90411739, "epoch": 1.71039449814529, "grad_norm": 6.857450008392334, "learning_rate": 6.5831658294582156e-06, "loss": 0.48436623, "memory(GiB)": 34.88, "step": 63170, "train_speed(iter/s)": 0.411357 }, { "acc": 0.91334171, "epoch": 1.7105298784285057, "grad_norm": 3.3224122524261475, "learning_rate": 6.5826350337499314e-06, "loss": 0.42236443, "memory(GiB)": 34.88, "step": 63175, "train_speed(iter/s)": 0.411359 }, { "acc": 0.91159706, "epoch": 1.7106652587117213, "grad_norm": 9.232336044311523, "learning_rate": 6.582104218222299e-06, "loss": 0.5164916, "memory(GiB)": 34.88, "step": 63180, "train_speed(iter/s)": 0.411361 }, { "acc": 0.91558189, "epoch": 1.7108006389949368, "grad_norm": 8.71662425994873, "learning_rate": 6.581573382881964e-06, "loss": 0.3902848, "memory(GiB)": 34.88, "step": 63185, "train_speed(iter/s)": 0.411362 }, { "acc": 0.88868532, "epoch": 1.7109360192781522, "grad_norm": 5.995715618133545, "learning_rate": 6.5810425277355795e-06, "loss": 0.6305757, "memory(GiB)": 34.88, "step": 63190, "train_speed(iter/s)": 0.411364 }, { "acc": 0.9168745, "epoch": 1.711071399561368, "grad_norm": 5.6526360511779785, "learning_rate": 6.5805116527897924e-06, "loss": 0.41673121, "memory(GiB)": 34.88, "step": 63195, "train_speed(iter/s)": 0.411366 }, { "acc": 0.88968496, "epoch": 1.7112067798445834, "grad_norm": 14.588496208190918, "learning_rate": 6.579980758051255e-06, "loss": 0.65763049, "memory(GiB)": 34.88, "step": 63200, "train_speed(iter/s)": 0.411367 }, { "acc": 0.92259712, "epoch": 1.711342160127799, "grad_norm": 9.040260314941406, "learning_rate": 6.5794498435266196e-06, "loss": 0.47030802, "memory(GiB)": 34.88, "step": 63205, "train_speed(iter/s)": 0.411369 }, { "acc": 0.91327667, "epoch": 1.7114775404110145, "grad_norm": 6.90669059753418, "learning_rate": 6.578918909222532e-06, "loss": 0.5225831, "memory(GiB)": 34.88, "step": 63210, "train_speed(iter/s)": 0.411371 }, { "acc": 0.88923397, "epoch": 1.71161292069423, "grad_norm": 7.66080379486084, "learning_rate": 6.578387955145649e-06, "loss": 0.67893076, "memory(GiB)": 34.88, "step": 63215, "train_speed(iter/s)": 0.411373 }, { "acc": 0.90591879, "epoch": 1.7117483009774457, "grad_norm": 9.738832473754883, "learning_rate": 6.577856981302616e-06, "loss": 0.51372728, "memory(GiB)": 34.88, "step": 63220, "train_speed(iter/s)": 0.411375 }, { "acc": 0.91325932, "epoch": 1.711883681260661, "grad_norm": 5.65955114364624, "learning_rate": 6.577325987700088e-06, "loss": 0.50687423, "memory(GiB)": 34.88, "step": 63225, "train_speed(iter/s)": 0.411376 }, { "acc": 0.89702559, "epoch": 1.7120190615438768, "grad_norm": 8.648099899291992, "learning_rate": 6.576794974344715e-06, "loss": 0.59267693, "memory(GiB)": 34.88, "step": 63230, "train_speed(iter/s)": 0.411378 }, { "acc": 0.90325556, "epoch": 1.7121544418270922, "grad_norm": 11.223542213439941, "learning_rate": 6.576263941243154e-06, "loss": 0.57538214, "memory(GiB)": 34.88, "step": 63235, "train_speed(iter/s)": 0.41138 }, { "acc": 0.91858025, "epoch": 1.712289822110308, "grad_norm": 14.783529281616211, "learning_rate": 6.5757328884020496e-06, "loss": 0.48138037, "memory(GiB)": 34.88, "step": 63240, "train_speed(iter/s)": 0.411381 }, { "acc": 0.91698399, "epoch": 1.7124252023935234, "grad_norm": 6.662282943725586, "learning_rate": 6.57520181582806e-06, "loss": 0.4084991, "memory(GiB)": 34.88, "step": 63245, "train_speed(iter/s)": 0.411383 }, { "acc": 0.91908808, "epoch": 1.712560582676739, "grad_norm": 8.709102630615234, "learning_rate": 6.574670723527835e-06, "loss": 0.47621841, "memory(GiB)": 34.88, "step": 63250, "train_speed(iter/s)": 0.411385 }, { "acc": 0.90296345, "epoch": 1.7126959629599545, "grad_norm": 22.042207717895508, "learning_rate": 6.5741396115080284e-06, "loss": 0.58411388, "memory(GiB)": 34.88, "step": 63255, "train_speed(iter/s)": 0.411386 }, { "acc": 0.91288786, "epoch": 1.71283134324317, "grad_norm": 10.322834014892578, "learning_rate": 6.573608479775293e-06, "loss": 0.40730333, "memory(GiB)": 34.88, "step": 63260, "train_speed(iter/s)": 0.411388 }, { "acc": 0.90879936, "epoch": 1.7129667235263857, "grad_norm": 5.542201042175293, "learning_rate": 6.573077328336283e-06, "loss": 0.44572239, "memory(GiB)": 34.88, "step": 63265, "train_speed(iter/s)": 0.41139 }, { "acc": 0.90549908, "epoch": 1.713102103809601, "grad_norm": 9.765480041503906, "learning_rate": 6.5725461571976515e-06, "loss": 0.55177212, "memory(GiB)": 34.88, "step": 63270, "train_speed(iter/s)": 0.411392 }, { "acc": 0.90874319, "epoch": 1.7132374840928168, "grad_norm": 22.001426696777344, "learning_rate": 6.572014966366052e-06, "loss": 0.48564544, "memory(GiB)": 34.88, "step": 63275, "train_speed(iter/s)": 0.411393 }, { "acc": 0.92730217, "epoch": 1.7133728643760322, "grad_norm": 5.280323028564453, "learning_rate": 6.571483755848143e-06, "loss": 0.38295541, "memory(GiB)": 34.88, "step": 63280, "train_speed(iter/s)": 0.411395 }, { "acc": 0.91341629, "epoch": 1.7135082446592478, "grad_norm": 10.546401023864746, "learning_rate": 6.570952525650574e-06, "loss": 0.49242849, "memory(GiB)": 34.88, "step": 63285, "train_speed(iter/s)": 0.411397 }, { "acc": 0.90460539, "epoch": 1.7136436249424634, "grad_norm": 9.979297637939453, "learning_rate": 6.570421275779999e-06, "loss": 0.54804749, "memory(GiB)": 34.88, "step": 63290, "train_speed(iter/s)": 0.411399 }, { "acc": 0.91340675, "epoch": 1.713779005225679, "grad_norm": 9.128448486328125, "learning_rate": 6.569890006243077e-06, "loss": 0.5428216, "memory(GiB)": 34.88, "step": 63295, "train_speed(iter/s)": 0.4114 }, { "acc": 0.91286955, "epoch": 1.7139143855088945, "grad_norm": 4.578867435455322, "learning_rate": 6.569358717046461e-06, "loss": 0.44330192, "memory(GiB)": 34.88, "step": 63300, "train_speed(iter/s)": 0.411401 }, { "acc": 0.89861736, "epoch": 1.7140497657921099, "grad_norm": 8.302469253540039, "learning_rate": 6.568827408196807e-06, "loss": 0.58961821, "memory(GiB)": 34.88, "step": 63305, "train_speed(iter/s)": 0.411403 }, { "acc": 0.91050625, "epoch": 1.7141851460753257, "grad_norm": 7.308863162994385, "learning_rate": 6.568296079700769e-06, "loss": 0.51800222, "memory(GiB)": 34.88, "step": 63310, "train_speed(iter/s)": 0.411405 }, { "acc": 0.91755505, "epoch": 1.714320526358541, "grad_norm": 9.306780815124512, "learning_rate": 6.567764731565005e-06, "loss": 0.38304362, "memory(GiB)": 34.88, "step": 63315, "train_speed(iter/s)": 0.411407 }, { "acc": 0.90527525, "epoch": 1.7144559066417568, "grad_norm": 9.34753704071045, "learning_rate": 6.5672333637961724e-06, "loss": 0.52818289, "memory(GiB)": 34.88, "step": 63320, "train_speed(iter/s)": 0.411408 }, { "acc": 0.90347672, "epoch": 1.7145912869249722, "grad_norm": 10.03959846496582, "learning_rate": 6.566701976400926e-06, "loss": 0.57169385, "memory(GiB)": 34.88, "step": 63325, "train_speed(iter/s)": 0.41141 }, { "acc": 0.9108427, "epoch": 1.7147266672081878, "grad_norm": 5.383254528045654, "learning_rate": 6.566170569385919e-06, "loss": 0.42924976, "memory(GiB)": 34.88, "step": 63330, "train_speed(iter/s)": 0.411412 }, { "acc": 0.9170908, "epoch": 1.7148620474914034, "grad_norm": 8.276398658752441, "learning_rate": 6.565639142757815e-06, "loss": 0.37189529, "memory(GiB)": 34.88, "step": 63335, "train_speed(iter/s)": 0.411414 }, { "acc": 0.91127396, "epoch": 1.714997427774619, "grad_norm": 5.7849225997924805, "learning_rate": 6.565107696523265e-06, "loss": 0.42240305, "memory(GiB)": 34.88, "step": 63340, "train_speed(iter/s)": 0.411415 }, { "acc": 0.91837969, "epoch": 1.7151328080578345, "grad_norm": 6.171535491943359, "learning_rate": 6.56457623068893e-06, "loss": 0.37374926, "memory(GiB)": 34.88, "step": 63345, "train_speed(iter/s)": 0.411417 }, { "acc": 0.88868799, "epoch": 1.7152681883410499, "grad_norm": 7.864618301391602, "learning_rate": 6.564044745261467e-06, "loss": 0.65610037, "memory(GiB)": 34.88, "step": 63350, "train_speed(iter/s)": 0.411419 }, { "acc": 0.93290958, "epoch": 1.7154035686242657, "grad_norm": 11.913559913635254, "learning_rate": 6.563513240247534e-06, "loss": 0.35395412, "memory(GiB)": 34.88, "step": 63355, "train_speed(iter/s)": 0.41142 }, { "acc": 0.91599646, "epoch": 1.715538948907481, "grad_norm": 6.830780982971191, "learning_rate": 6.562981715653789e-06, "loss": 0.45511436, "memory(GiB)": 34.88, "step": 63360, "train_speed(iter/s)": 0.411422 }, { "acc": 0.88318138, "epoch": 1.7156743291906966, "grad_norm": 14.27831745147705, "learning_rate": 6.562450171486891e-06, "loss": 0.60947428, "memory(GiB)": 34.88, "step": 63365, "train_speed(iter/s)": 0.411423 }, { "acc": 0.90674858, "epoch": 1.7158097094739122, "grad_norm": 8.244744300842285, "learning_rate": 6.561918607753496e-06, "loss": 0.44281926, "memory(GiB)": 34.88, "step": 63370, "train_speed(iter/s)": 0.411425 }, { "acc": 0.93130426, "epoch": 1.7159450897571278, "grad_norm": 5.695082664489746, "learning_rate": 6.561387024460267e-06, "loss": 0.37842457, "memory(GiB)": 34.88, "step": 63375, "train_speed(iter/s)": 0.411427 }, { "acc": 0.89796743, "epoch": 1.7160804700403434, "grad_norm": 11.3713960647583, "learning_rate": 6.560855421613858e-06, "loss": 0.5178503, "memory(GiB)": 34.88, "step": 63380, "train_speed(iter/s)": 0.411429 }, { "acc": 0.9122015, "epoch": 1.7162158503235587, "grad_norm": 6.365060329437256, "learning_rate": 6.560323799220931e-06, "loss": 0.50989132, "memory(GiB)": 34.88, "step": 63385, "train_speed(iter/s)": 0.41143 }, { "acc": 0.90478134, "epoch": 1.7163512306067745, "grad_norm": 4.809212684631348, "learning_rate": 6.5597921572881484e-06, "loss": 0.4099236, "memory(GiB)": 34.88, "step": 63390, "train_speed(iter/s)": 0.411432 }, { "acc": 0.90301332, "epoch": 1.7164866108899899, "grad_norm": 7.289055824279785, "learning_rate": 6.559260495822166e-06, "loss": 0.4633687, "memory(GiB)": 34.88, "step": 63395, "train_speed(iter/s)": 0.411434 }, { "acc": 0.89711809, "epoch": 1.7166219911732057, "grad_norm": 4.579399585723877, "learning_rate": 6.558728814829647e-06, "loss": 0.55036926, "memory(GiB)": 34.88, "step": 63400, "train_speed(iter/s)": 0.411435 }, { "acc": 0.89406195, "epoch": 1.716757371456421, "grad_norm": 6.428951740264893, "learning_rate": 6.558197114317251e-06, "loss": 0.61667433, "memory(GiB)": 34.88, "step": 63405, "train_speed(iter/s)": 0.411437 }, { "acc": 0.91117115, "epoch": 1.7168927517396366, "grad_norm": 6.204862117767334, "learning_rate": 6.5576653942916355e-06, "loss": 0.49690704, "memory(GiB)": 34.88, "step": 63410, "train_speed(iter/s)": 0.411439 }, { "acc": 0.89705791, "epoch": 1.7170281320228522, "grad_norm": 10.815855026245117, "learning_rate": 6.557133654759463e-06, "loss": 0.58054419, "memory(GiB)": 34.88, "step": 63415, "train_speed(iter/s)": 0.411441 }, { "acc": 0.91246042, "epoch": 1.7171635123060678, "grad_norm": 6.199950218200684, "learning_rate": 6.556601895727396e-06, "loss": 0.40517778, "memory(GiB)": 34.88, "step": 63420, "train_speed(iter/s)": 0.411442 }, { "acc": 0.91316786, "epoch": 1.7172988925892834, "grad_norm": 5.707417011260986, "learning_rate": 6.556070117202096e-06, "loss": 0.44090385, "memory(GiB)": 34.88, "step": 63425, "train_speed(iter/s)": 0.411444 }, { "acc": 0.91491823, "epoch": 1.7174342728724987, "grad_norm": 14.378579139709473, "learning_rate": 6.555538319190222e-06, "loss": 0.45805321, "memory(GiB)": 34.88, "step": 63430, "train_speed(iter/s)": 0.411446 }, { "acc": 0.91527729, "epoch": 1.7175696531557145, "grad_norm": 4.677145481109619, "learning_rate": 6.5550065016984385e-06, "loss": 0.5351531, "memory(GiB)": 34.88, "step": 63435, "train_speed(iter/s)": 0.411448 }, { "acc": 0.90720425, "epoch": 1.7177050334389299, "grad_norm": 16.18180274963379, "learning_rate": 6.554474664733407e-06, "loss": 0.52898965, "memory(GiB)": 34.88, "step": 63440, "train_speed(iter/s)": 0.41145 }, { "acc": 0.90003395, "epoch": 1.7178404137221455, "grad_norm": 7.217530727386475, "learning_rate": 6.55394280830179e-06, "loss": 0.50045767, "memory(GiB)": 34.88, "step": 63445, "train_speed(iter/s)": 0.411451 }, { "acc": 0.90332594, "epoch": 1.717975794005361, "grad_norm": 7.582179546356201, "learning_rate": 6.553410932410247e-06, "loss": 0.51392379, "memory(GiB)": 34.88, "step": 63450, "train_speed(iter/s)": 0.411453 }, { "acc": 0.91716261, "epoch": 1.7181111742885766, "grad_norm": 13.04664421081543, "learning_rate": 6.552879037065443e-06, "loss": 0.43540978, "memory(GiB)": 34.88, "step": 63455, "train_speed(iter/s)": 0.411455 }, { "acc": 0.90228386, "epoch": 1.7182465545717922, "grad_norm": 22.100202560424805, "learning_rate": 6.5523471222740435e-06, "loss": 0.53759956, "memory(GiB)": 34.88, "step": 63460, "train_speed(iter/s)": 0.411457 }, { "acc": 0.9150547, "epoch": 1.7183819348550076, "grad_norm": 4.696138858795166, "learning_rate": 6.5518151880427075e-06, "loss": 0.39730856, "memory(GiB)": 34.88, "step": 63465, "train_speed(iter/s)": 0.411458 }, { "acc": 0.91420593, "epoch": 1.7185173151382234, "grad_norm": 5.385622978210449, "learning_rate": 6.5512832343781005e-06, "loss": 0.45332623, "memory(GiB)": 34.88, "step": 63470, "train_speed(iter/s)": 0.41146 }, { "acc": 0.90638351, "epoch": 1.7186526954214387, "grad_norm": 12.904703140258789, "learning_rate": 6.550751261286886e-06, "loss": 0.53124981, "memory(GiB)": 34.88, "step": 63475, "train_speed(iter/s)": 0.411462 }, { "acc": 0.91458073, "epoch": 1.7187880757046545, "grad_norm": 13.146851539611816, "learning_rate": 6.550219268775729e-06, "loss": 0.51703663, "memory(GiB)": 34.88, "step": 63480, "train_speed(iter/s)": 0.411464 }, { "acc": 0.91489449, "epoch": 1.7189234559878699, "grad_norm": 12.738364219665527, "learning_rate": 6.549687256851294e-06, "loss": 0.36413274, "memory(GiB)": 34.88, "step": 63485, "train_speed(iter/s)": 0.411465 }, { "acc": 0.92934904, "epoch": 1.7190588362710855, "grad_norm": 8.531712532043457, "learning_rate": 6.549155225520244e-06, "loss": 0.48992286, "memory(GiB)": 34.88, "step": 63490, "train_speed(iter/s)": 0.411467 }, { "acc": 0.90177822, "epoch": 1.719194216554301, "grad_norm": 7.4194183349609375, "learning_rate": 6.548623174789242e-06, "loss": 0.52829351, "memory(GiB)": 34.88, "step": 63495, "train_speed(iter/s)": 0.411469 }, { "acc": 0.91345692, "epoch": 1.7193295968375166, "grad_norm": 8.086159706115723, "learning_rate": 6.548091104664957e-06, "loss": 0.43976135, "memory(GiB)": 34.88, "step": 63500, "train_speed(iter/s)": 0.411471 }, { "acc": 0.90992956, "epoch": 1.7194649771207322, "grad_norm": 12.97950553894043, "learning_rate": 6.547559015154052e-06, "loss": 0.46903481, "memory(GiB)": 34.88, "step": 63505, "train_speed(iter/s)": 0.411472 }, { "acc": 0.91042814, "epoch": 1.7196003574039476, "grad_norm": 7.060988903045654, "learning_rate": 6.5470269062631945e-06, "loss": 0.54167986, "memory(GiB)": 34.88, "step": 63510, "train_speed(iter/s)": 0.411474 }, { "acc": 0.90641918, "epoch": 1.7197357376871634, "grad_norm": 6.564538955688477, "learning_rate": 6.5464947779990465e-06, "loss": 0.47272396, "memory(GiB)": 34.88, "step": 63515, "train_speed(iter/s)": 0.411476 }, { "acc": 0.89258432, "epoch": 1.7198711179703787, "grad_norm": 10.739302635192871, "learning_rate": 6.545962630368275e-06, "loss": 0.57928667, "memory(GiB)": 34.88, "step": 63520, "train_speed(iter/s)": 0.411477 }, { "acc": 0.91423035, "epoch": 1.7200064982535943, "grad_norm": 16.75820541381836, "learning_rate": 6.54543046337755e-06, "loss": 0.46841879, "memory(GiB)": 34.88, "step": 63525, "train_speed(iter/s)": 0.411479 }, { "acc": 0.90790367, "epoch": 1.7201418785368099, "grad_norm": 7.3338704109191895, "learning_rate": 6.544898277033536e-06, "loss": 0.44866757, "memory(GiB)": 34.88, "step": 63530, "train_speed(iter/s)": 0.411481 }, { "acc": 0.8876812, "epoch": 1.7202772588200255, "grad_norm": 6.799678325653076, "learning_rate": 6.544366071342896e-06, "loss": 0.56632395, "memory(GiB)": 34.88, "step": 63535, "train_speed(iter/s)": 0.411482 }, { "acc": 0.91559286, "epoch": 1.720412639103241, "grad_norm": 5.671276569366455, "learning_rate": 6.5438338463122994e-06, "loss": 0.43551426, "memory(GiB)": 34.88, "step": 63540, "train_speed(iter/s)": 0.411484 }, { "acc": 0.89619522, "epoch": 1.7205480193864564, "grad_norm": 4.8866496086120605, "learning_rate": 6.543301601948415e-06, "loss": 0.51512814, "memory(GiB)": 34.88, "step": 63545, "train_speed(iter/s)": 0.411485 }, { "acc": 0.900527, "epoch": 1.7206833996696722, "grad_norm": 9.0798978805542, "learning_rate": 6.5427693382579095e-06, "loss": 0.49877052, "memory(GiB)": 34.88, "step": 63550, "train_speed(iter/s)": 0.411487 }, { "acc": 0.92233887, "epoch": 1.7208187799528876, "grad_norm": 6.751298427581787, "learning_rate": 6.542237055247449e-06, "loss": 0.44474106, "memory(GiB)": 34.88, "step": 63555, "train_speed(iter/s)": 0.411489 }, { "acc": 0.89393463, "epoch": 1.7209541602361034, "grad_norm": 5.00101375579834, "learning_rate": 6.541704752923703e-06, "loss": 0.577527, "memory(GiB)": 34.88, "step": 63560, "train_speed(iter/s)": 0.41149 }, { "acc": 0.88154488, "epoch": 1.7210895405193187, "grad_norm": 6.171504020690918, "learning_rate": 6.5411724312933375e-06, "loss": 0.72831726, "memory(GiB)": 34.88, "step": 63565, "train_speed(iter/s)": 0.411492 }, { "acc": 0.9175333, "epoch": 1.7212249208025343, "grad_norm": 6.8535332679748535, "learning_rate": 6.540640090363025e-06, "loss": 0.49348412, "memory(GiB)": 34.88, "step": 63570, "train_speed(iter/s)": 0.411494 }, { "acc": 0.90320702, "epoch": 1.7213603010857499, "grad_norm": 6.140537261962891, "learning_rate": 6.54010773013943e-06, "loss": 0.49894857, "memory(GiB)": 34.88, "step": 63575, "train_speed(iter/s)": 0.411495 }, { "acc": 0.90829144, "epoch": 1.7214956813689655, "grad_norm": 5.894057273864746, "learning_rate": 6.5395753506292224e-06, "loss": 0.41513996, "memory(GiB)": 34.88, "step": 63580, "train_speed(iter/s)": 0.411497 }, { "acc": 0.90774899, "epoch": 1.721631061652181, "grad_norm": 11.439172744750977, "learning_rate": 6.5390429518390734e-06, "loss": 0.45574698, "memory(GiB)": 34.88, "step": 63585, "train_speed(iter/s)": 0.411499 }, { "acc": 0.91384583, "epoch": 1.7217664419353964, "grad_norm": 9.715681076049805, "learning_rate": 6.538510533775649e-06, "loss": 0.57376847, "memory(GiB)": 34.88, "step": 63590, "train_speed(iter/s)": 0.4115 }, { "acc": 0.91450176, "epoch": 1.7219018222186122, "grad_norm": 7.284318447113037, "learning_rate": 6.5379780964456216e-06, "loss": 0.43517618, "memory(GiB)": 34.88, "step": 63595, "train_speed(iter/s)": 0.411502 }, { "acc": 0.90129499, "epoch": 1.7220372025018276, "grad_norm": 5.727386474609375, "learning_rate": 6.5374456398556595e-06, "loss": 0.52197905, "memory(GiB)": 34.88, "step": 63600, "train_speed(iter/s)": 0.411503 }, { "acc": 0.89979296, "epoch": 1.7221725827850431, "grad_norm": 13.14034366607666, "learning_rate": 6.5369131640124335e-06, "loss": 0.60904264, "memory(GiB)": 34.88, "step": 63605, "train_speed(iter/s)": 0.411505 }, { "acc": 0.92603598, "epoch": 1.7223079630682587, "grad_norm": 3.6719110012054443, "learning_rate": 6.5363806689226116e-06, "loss": 0.37476828, "memory(GiB)": 34.88, "step": 63610, "train_speed(iter/s)": 0.411507 }, { "acc": 0.92490053, "epoch": 1.7224433433514743, "grad_norm": 6.500538349151611, "learning_rate": 6.535848154592869e-06, "loss": 0.38973436, "memory(GiB)": 34.88, "step": 63615, "train_speed(iter/s)": 0.411508 }, { "acc": 0.90438213, "epoch": 1.72257872363469, "grad_norm": 7.100159645080566, "learning_rate": 6.535315621029873e-06, "loss": 0.53344231, "memory(GiB)": 34.88, "step": 63620, "train_speed(iter/s)": 0.41151 }, { "acc": 0.9205122, "epoch": 1.7227141039179052, "grad_norm": 6.3970441818237305, "learning_rate": 6.5347830682402944e-06, "loss": 0.41141086, "memory(GiB)": 34.88, "step": 63625, "train_speed(iter/s)": 0.411512 }, { "acc": 0.89877815, "epoch": 1.722849484201121, "grad_norm": 2.3049042224884033, "learning_rate": 6.5342504962308074e-06, "loss": 0.48325872, "memory(GiB)": 34.88, "step": 63630, "train_speed(iter/s)": 0.411514 }, { "acc": 0.9039752, "epoch": 1.7229848644843364, "grad_norm": 13.490371704101562, "learning_rate": 6.533717905008079e-06, "loss": 0.49704237, "memory(GiB)": 34.88, "step": 63635, "train_speed(iter/s)": 0.411516 }, { "acc": 0.91355104, "epoch": 1.7231202447675522, "grad_norm": 6.084644317626953, "learning_rate": 6.533185294578787e-06, "loss": 0.45152321, "memory(GiB)": 34.88, "step": 63640, "train_speed(iter/s)": 0.411517 }, { "acc": 0.90821352, "epoch": 1.7232556250507676, "grad_norm": 13.724797248840332, "learning_rate": 6.532652664949597e-06, "loss": 0.46693974, "memory(GiB)": 34.88, "step": 63645, "train_speed(iter/s)": 0.411519 }, { "acc": 0.90724545, "epoch": 1.7233910053339832, "grad_norm": 8.094003677368164, "learning_rate": 6.532120016127185e-06, "loss": 0.50378704, "memory(GiB)": 34.88, "step": 63650, "train_speed(iter/s)": 0.411521 }, { "acc": 0.88961945, "epoch": 1.7235263856171987, "grad_norm": 25.580541610717773, "learning_rate": 6.5315873481182235e-06, "loss": 0.60657678, "memory(GiB)": 34.88, "step": 63655, "train_speed(iter/s)": 0.411523 }, { "acc": 0.9034565, "epoch": 1.7236617659004143, "grad_norm": 5.266337871551514, "learning_rate": 6.5310546609293835e-06, "loss": 0.45251637, "memory(GiB)": 34.88, "step": 63660, "train_speed(iter/s)": 0.411525 }, { "acc": 0.91812124, "epoch": 1.72379714618363, "grad_norm": 9.540536880493164, "learning_rate": 6.5305219545673395e-06, "loss": 0.4655302, "memory(GiB)": 34.88, "step": 63665, "train_speed(iter/s)": 0.411526 }, { "acc": 0.90799179, "epoch": 1.7239325264668452, "grad_norm": 7.5256876945495605, "learning_rate": 6.5299892290387636e-06, "loss": 0.50097818, "memory(GiB)": 34.88, "step": 63670, "train_speed(iter/s)": 0.411528 }, { "acc": 0.92260418, "epoch": 1.724067906750061, "grad_norm": 9.222594261169434, "learning_rate": 6.5294564843503295e-06, "loss": 0.41274147, "memory(GiB)": 34.88, "step": 63675, "train_speed(iter/s)": 0.411529 }, { "acc": 0.90264282, "epoch": 1.7242032870332764, "grad_norm": 9.919942855834961, "learning_rate": 6.528923720508711e-06, "loss": 0.55470943, "memory(GiB)": 34.88, "step": 63680, "train_speed(iter/s)": 0.411531 }, { "acc": 0.91866322, "epoch": 1.724338667316492, "grad_norm": 6.226984024047852, "learning_rate": 6.528390937520581e-06, "loss": 0.46490831, "memory(GiB)": 34.88, "step": 63685, "train_speed(iter/s)": 0.411532 }, { "acc": 0.91606197, "epoch": 1.7244740475997076, "grad_norm": 9.189996719360352, "learning_rate": 6.527858135392618e-06, "loss": 0.43818846, "memory(GiB)": 34.88, "step": 63690, "train_speed(iter/s)": 0.411534 }, { "acc": 0.90586529, "epoch": 1.7246094278829232, "grad_norm": 8.202911376953125, "learning_rate": 6.5273253141314895e-06, "loss": 0.60787697, "memory(GiB)": 34.88, "step": 63695, "train_speed(iter/s)": 0.411536 }, { "acc": 0.90789099, "epoch": 1.7247448081661387, "grad_norm": 8.696595191955566, "learning_rate": 6.526792473743876e-06, "loss": 0.49802823, "memory(GiB)": 34.88, "step": 63700, "train_speed(iter/s)": 0.411537 }, { "acc": 0.92013702, "epoch": 1.724880188449354, "grad_norm": 4.588885307312012, "learning_rate": 6.52625961423645e-06, "loss": 0.45786467, "memory(GiB)": 34.88, "step": 63705, "train_speed(iter/s)": 0.411539 }, { "acc": 0.91773052, "epoch": 1.72501556873257, "grad_norm": 7.038128852844238, "learning_rate": 6.525726735615886e-06, "loss": 0.45887623, "memory(GiB)": 34.88, "step": 63710, "train_speed(iter/s)": 0.411541 }, { "acc": 0.9042182, "epoch": 1.7251509490157853, "grad_norm": 5.05478572845459, "learning_rate": 6.5251938378888605e-06, "loss": 0.47470045, "memory(GiB)": 34.88, "step": 63715, "train_speed(iter/s)": 0.411542 }, { "acc": 0.90558071, "epoch": 1.725286329299001, "grad_norm": 19.714757919311523, "learning_rate": 6.524660921062047e-06, "loss": 0.56476736, "memory(GiB)": 34.88, "step": 63720, "train_speed(iter/s)": 0.411544 }, { "acc": 0.89661274, "epoch": 1.7254217095822164, "grad_norm": 10.901595115661621, "learning_rate": 6.524127985142125e-06, "loss": 0.55393496, "memory(GiB)": 34.88, "step": 63725, "train_speed(iter/s)": 0.411546 }, { "acc": 0.88178263, "epoch": 1.725557089865432, "grad_norm": 25.476224899291992, "learning_rate": 6.523595030135767e-06, "loss": 0.68247681, "memory(GiB)": 34.88, "step": 63730, "train_speed(iter/s)": 0.411547 }, { "acc": 0.92198009, "epoch": 1.7256924701486476, "grad_norm": 7.637290954589844, "learning_rate": 6.523062056049653e-06, "loss": 0.37743552, "memory(GiB)": 34.88, "step": 63735, "train_speed(iter/s)": 0.411549 }, { "acc": 0.90196438, "epoch": 1.7258278504318632, "grad_norm": 11.599968910217285, "learning_rate": 6.522529062890456e-06, "loss": 0.53311396, "memory(GiB)": 34.88, "step": 63740, "train_speed(iter/s)": 0.41155 }, { "acc": 0.90461378, "epoch": 1.7259632307150787, "grad_norm": 7.679587364196777, "learning_rate": 6.521996050664856e-06, "loss": 0.48538871, "memory(GiB)": 34.88, "step": 63745, "train_speed(iter/s)": 0.411551 }, { "acc": 0.89478741, "epoch": 1.726098610998294, "grad_norm": 24.007911682128906, "learning_rate": 6.521463019379526e-06, "loss": 0.66606331, "memory(GiB)": 34.88, "step": 63750, "train_speed(iter/s)": 0.411552 }, { "acc": 0.90478401, "epoch": 1.72623399128151, "grad_norm": 22.66307258605957, "learning_rate": 6.520929969041144e-06, "loss": 0.5569397, "memory(GiB)": 34.88, "step": 63755, "train_speed(iter/s)": 0.411554 }, { "acc": 0.93410702, "epoch": 1.7263693715647253, "grad_norm": 3.694164991378784, "learning_rate": 6.520396899656392e-06, "loss": 0.27430878, "memory(GiB)": 34.88, "step": 63760, "train_speed(iter/s)": 0.411556 }, { "acc": 0.90774879, "epoch": 1.7265047518479408, "grad_norm": 8.348313331604004, "learning_rate": 6.519863811231945e-06, "loss": 0.52167587, "memory(GiB)": 34.88, "step": 63765, "train_speed(iter/s)": 0.411558 }, { "acc": 0.90551071, "epoch": 1.7266401321311564, "grad_norm": 5.362394332885742, "learning_rate": 6.519330703774479e-06, "loss": 0.56107807, "memory(GiB)": 34.88, "step": 63770, "train_speed(iter/s)": 0.411559 }, { "acc": 0.91144314, "epoch": 1.726775512414372, "grad_norm": 9.719728469848633, "learning_rate": 6.518797577290674e-06, "loss": 0.47555218, "memory(GiB)": 34.88, "step": 63775, "train_speed(iter/s)": 0.411561 }, { "acc": 0.90060472, "epoch": 1.7269108926975876, "grad_norm": 6.112280368804932, "learning_rate": 6.51826443178721e-06, "loss": 0.5871707, "memory(GiB)": 34.88, "step": 63780, "train_speed(iter/s)": 0.411562 }, { "acc": 0.905966, "epoch": 1.727046272980803, "grad_norm": 5.9657416343688965, "learning_rate": 6.517731267270764e-06, "loss": 0.47906537, "memory(GiB)": 34.88, "step": 63785, "train_speed(iter/s)": 0.411564 }, { "acc": 0.91977339, "epoch": 1.7271816532640187, "grad_norm": 4.38523006439209, "learning_rate": 6.517198083748014e-06, "loss": 0.45058517, "memory(GiB)": 34.88, "step": 63790, "train_speed(iter/s)": 0.411566 }, { "acc": 0.92396326, "epoch": 1.727317033547234, "grad_norm": 17.977710723876953, "learning_rate": 6.516664881225639e-06, "loss": 0.44522505, "memory(GiB)": 34.88, "step": 63795, "train_speed(iter/s)": 0.411567 }, { "acc": 0.92207909, "epoch": 1.72745241383045, "grad_norm": 8.406941413879395, "learning_rate": 6.516131659710321e-06, "loss": 0.43391352, "memory(GiB)": 34.88, "step": 63800, "train_speed(iter/s)": 0.411568 }, { "acc": 0.89309654, "epoch": 1.7275877941136653, "grad_norm": 8.55527114868164, "learning_rate": 6.515598419208739e-06, "loss": 0.59080744, "memory(GiB)": 34.88, "step": 63805, "train_speed(iter/s)": 0.411569 }, { "acc": 0.90978088, "epoch": 1.7277231743968808, "grad_norm": 8.41733455657959, "learning_rate": 6.515065159727571e-06, "loss": 0.47121367, "memory(GiB)": 34.88, "step": 63810, "train_speed(iter/s)": 0.411571 }, { "acc": 0.91718864, "epoch": 1.7278585546800964, "grad_norm": 27.099817276000977, "learning_rate": 6.5145318812734984e-06, "loss": 0.38074007, "memory(GiB)": 34.88, "step": 63815, "train_speed(iter/s)": 0.411572 }, { "acc": 0.90208502, "epoch": 1.727993934963312, "grad_norm": 9.48688793182373, "learning_rate": 6.5139985838532005e-06, "loss": 0.48524594, "memory(GiB)": 34.88, "step": 63820, "train_speed(iter/s)": 0.411574 }, { "acc": 0.89179478, "epoch": 1.7281293152465276, "grad_norm": 8.802595138549805, "learning_rate": 6.513465267473361e-06, "loss": 0.62859488, "memory(GiB)": 34.88, "step": 63825, "train_speed(iter/s)": 0.411575 }, { "acc": 0.90948009, "epoch": 1.728264695529743, "grad_norm": 5.9180755615234375, "learning_rate": 6.5129319321406584e-06, "loss": 0.46531982, "memory(GiB)": 34.88, "step": 63830, "train_speed(iter/s)": 0.411576 }, { "acc": 0.89416599, "epoch": 1.7284000758129587, "grad_norm": 7.603086471557617, "learning_rate": 6.512398577861771e-06, "loss": 0.56910262, "memory(GiB)": 34.88, "step": 63835, "train_speed(iter/s)": 0.411578 }, { "acc": 0.93177681, "epoch": 1.728535456096174, "grad_norm": 6.367757320404053, "learning_rate": 6.511865204643386e-06, "loss": 0.39998872, "memory(GiB)": 34.88, "step": 63840, "train_speed(iter/s)": 0.411579 }, { "acc": 0.89800291, "epoch": 1.7286708363793897, "grad_norm": 16.16742706298828, "learning_rate": 6.511331812492181e-06, "loss": 0.51033902, "memory(GiB)": 34.88, "step": 63845, "train_speed(iter/s)": 0.411581 }, { "acc": 0.90992708, "epoch": 1.7288062166626053, "grad_norm": 4.0577473640441895, "learning_rate": 6.510798401414839e-06, "loss": 0.49929247, "memory(GiB)": 34.88, "step": 63850, "train_speed(iter/s)": 0.411583 }, { "acc": 0.90396233, "epoch": 1.7289415969458208, "grad_norm": 10.261152267456055, "learning_rate": 6.51026497141804e-06, "loss": 0.53439398, "memory(GiB)": 34.88, "step": 63855, "train_speed(iter/s)": 0.411584 }, { "acc": 0.90394936, "epoch": 1.7290769772290364, "grad_norm": 12.808692932128906, "learning_rate": 6.50973152250847e-06, "loss": 0.51755047, "memory(GiB)": 34.88, "step": 63860, "train_speed(iter/s)": 0.411586 }, { "acc": 0.91822634, "epoch": 1.7292123575122518, "grad_norm": 5.197683811187744, "learning_rate": 6.5091980546928105e-06, "loss": 0.43038845, "memory(GiB)": 34.88, "step": 63865, "train_speed(iter/s)": 0.411588 }, { "acc": 0.89532623, "epoch": 1.7293477377954676, "grad_norm": 6.927616596221924, "learning_rate": 6.508664567977742e-06, "loss": 0.66167679, "memory(GiB)": 34.88, "step": 63870, "train_speed(iter/s)": 0.411589 }, { "acc": 0.91748247, "epoch": 1.729483118078683, "grad_norm": 7.623842716217041, "learning_rate": 6.508131062369949e-06, "loss": 0.39572244, "memory(GiB)": 34.88, "step": 63875, "train_speed(iter/s)": 0.411591 }, { "acc": 0.91501522, "epoch": 1.7296184983618987, "grad_norm": 7.388539791107178, "learning_rate": 6.5075975378761125e-06, "loss": 0.50090542, "memory(GiB)": 34.88, "step": 63880, "train_speed(iter/s)": 0.411593 }, { "acc": 0.90684919, "epoch": 1.729753878645114, "grad_norm": 9.244478225708008, "learning_rate": 6.50706399450292e-06, "loss": 0.50579433, "memory(GiB)": 34.88, "step": 63885, "train_speed(iter/s)": 0.411595 }, { "acc": 0.9096385, "epoch": 1.7298892589283297, "grad_norm": 38.03224182128906, "learning_rate": 6.506530432257052e-06, "loss": 0.50945115, "memory(GiB)": 34.88, "step": 63890, "train_speed(iter/s)": 0.411596 }, { "acc": 0.90816269, "epoch": 1.7300246392115453, "grad_norm": 5.803237438201904, "learning_rate": 6.505996851145194e-06, "loss": 0.50446396, "memory(GiB)": 34.88, "step": 63895, "train_speed(iter/s)": 0.411598 }, { "acc": 0.91065388, "epoch": 1.7301600194947608, "grad_norm": 6.712589740753174, "learning_rate": 6.50546325117403e-06, "loss": 0.48481293, "memory(GiB)": 34.88, "step": 63900, "train_speed(iter/s)": 0.4116 }, { "acc": 0.8829052, "epoch": 1.7302953997779764, "grad_norm": 16.179428100585938, "learning_rate": 6.504929632350244e-06, "loss": 0.74173956, "memory(GiB)": 34.88, "step": 63905, "train_speed(iter/s)": 0.411601 }, { "acc": 0.90643711, "epoch": 1.7304307800611918, "grad_norm": 7.588306903839111, "learning_rate": 6.50439599468052e-06, "loss": 0.5553555, "memory(GiB)": 34.88, "step": 63910, "train_speed(iter/s)": 0.411603 }, { "acc": 0.89587965, "epoch": 1.7305661603444076, "grad_norm": 21.79926300048828, "learning_rate": 6.503862338171544e-06, "loss": 0.63354759, "memory(GiB)": 34.88, "step": 63915, "train_speed(iter/s)": 0.411605 }, { "acc": 0.90090027, "epoch": 1.730701540627623, "grad_norm": 2.728142738342285, "learning_rate": 6.503328662830002e-06, "loss": 0.51425056, "memory(GiB)": 34.88, "step": 63920, "train_speed(iter/s)": 0.411606 }, { "acc": 0.90284309, "epoch": 1.7308369209108385, "grad_norm": 22.129533767700195, "learning_rate": 6.502794968662575e-06, "loss": 0.53038578, "memory(GiB)": 34.88, "step": 63925, "train_speed(iter/s)": 0.411608 }, { "acc": 0.89553757, "epoch": 1.730972301194054, "grad_norm": 10.53104305267334, "learning_rate": 6.502261255675954e-06, "loss": 0.54742856, "memory(GiB)": 34.88, "step": 63930, "train_speed(iter/s)": 0.411609 }, { "acc": 0.91379061, "epoch": 1.7311076814772697, "grad_norm": 6.47485876083374, "learning_rate": 6.5017275238768205e-06, "loss": 0.44801049, "memory(GiB)": 34.88, "step": 63935, "train_speed(iter/s)": 0.411611 }, { "acc": 0.91679058, "epoch": 1.7312430617604853, "grad_norm": 8.185644149780273, "learning_rate": 6.5011937732718636e-06, "loss": 0.39853985, "memory(GiB)": 34.88, "step": 63940, "train_speed(iter/s)": 0.411613 }, { "acc": 0.93344698, "epoch": 1.7313784420437006, "grad_norm": 10.817100524902344, "learning_rate": 6.500660003867767e-06, "loss": 0.35259061, "memory(GiB)": 34.88, "step": 63945, "train_speed(iter/s)": 0.411614 }, { "acc": 0.89447327, "epoch": 1.7315138223269164, "grad_norm": 10.036471366882324, "learning_rate": 6.50012621567122e-06, "loss": 0.66363807, "memory(GiB)": 34.88, "step": 63950, "train_speed(iter/s)": 0.411616 }, { "acc": 0.90485287, "epoch": 1.7316492026101318, "grad_norm": 11.836843490600586, "learning_rate": 6.499592408688907e-06, "loss": 0.49450703, "memory(GiB)": 34.88, "step": 63955, "train_speed(iter/s)": 0.411617 }, { "acc": 0.90294857, "epoch": 1.7317845828933476, "grad_norm": 18.0636043548584, "learning_rate": 6.499058582927516e-06, "loss": 0.58573775, "memory(GiB)": 34.88, "step": 63960, "train_speed(iter/s)": 0.411619 }, { "acc": 0.91308403, "epoch": 1.731919963176563, "grad_norm": 11.024439811706543, "learning_rate": 6.498524738393736e-06, "loss": 0.48960056, "memory(GiB)": 34.88, "step": 63965, "train_speed(iter/s)": 0.41162 }, { "acc": 0.9180521, "epoch": 1.7320553434597785, "grad_norm": 8.136380195617676, "learning_rate": 6.497990875094249e-06, "loss": 0.50231791, "memory(GiB)": 34.88, "step": 63970, "train_speed(iter/s)": 0.411621 }, { "acc": 0.90673313, "epoch": 1.732190723742994, "grad_norm": 6.596191883087158, "learning_rate": 6.497456993035749e-06, "loss": 0.45155988, "memory(GiB)": 34.88, "step": 63975, "train_speed(iter/s)": 0.411623 }, { "acc": 0.90961933, "epoch": 1.7323261040262095, "grad_norm": 8.753241539001465, "learning_rate": 6.496923092224919e-06, "loss": 0.5031188, "memory(GiB)": 34.88, "step": 63980, "train_speed(iter/s)": 0.411625 }, { "acc": 0.93142309, "epoch": 1.7324614843094253, "grad_norm": 4.99131441116333, "learning_rate": 6.49638917266845e-06, "loss": 0.34678488, "memory(GiB)": 34.88, "step": 63985, "train_speed(iter/s)": 0.411626 }, { "acc": 0.90328979, "epoch": 1.7325968645926406, "grad_norm": 5.763925075531006, "learning_rate": 6.4958552343730316e-06, "loss": 0.52650132, "memory(GiB)": 34.88, "step": 63990, "train_speed(iter/s)": 0.411628 }, { "acc": 0.90518265, "epoch": 1.7327322448758564, "grad_norm": 6.479598522186279, "learning_rate": 6.49532127734535e-06, "loss": 0.48493943, "memory(GiB)": 34.88, "step": 63995, "train_speed(iter/s)": 0.411629 }, { "acc": 0.91428022, "epoch": 1.7328676251590718, "grad_norm": 7.258960247039795, "learning_rate": 6.4947873015920934e-06, "loss": 0.46212711, "memory(GiB)": 34.88, "step": 64000, "train_speed(iter/s)": 0.411631 }, { "acc": 0.89663458, "epoch": 1.7330030054422874, "grad_norm": 7.794873237609863, "learning_rate": 6.494253307119952e-06, "loss": 0.55459642, "memory(GiB)": 34.88, "step": 64005, "train_speed(iter/s)": 0.411633 }, { "acc": 0.88347445, "epoch": 1.733138385725503, "grad_norm": 8.479194641113281, "learning_rate": 6.493719293935615e-06, "loss": 0.7088872, "memory(GiB)": 34.88, "step": 64010, "train_speed(iter/s)": 0.411634 }, { "acc": 0.92024155, "epoch": 1.7332737660087185, "grad_norm": 6.889636516571045, "learning_rate": 6.493185262045773e-06, "loss": 0.47262535, "memory(GiB)": 34.88, "step": 64015, "train_speed(iter/s)": 0.411636 }, { "acc": 0.91590805, "epoch": 1.733409146291934, "grad_norm": 12.654853820800781, "learning_rate": 6.492651211457115e-06, "loss": 0.4826499, "memory(GiB)": 34.88, "step": 64020, "train_speed(iter/s)": 0.411638 }, { "acc": 0.91544037, "epoch": 1.7335445265751495, "grad_norm": 4.848562717437744, "learning_rate": 6.492117142176331e-06, "loss": 0.48374519, "memory(GiB)": 34.88, "step": 64025, "train_speed(iter/s)": 0.41164 }, { "acc": 0.91776075, "epoch": 1.7336799068583653, "grad_norm": 5.960440635681152, "learning_rate": 6.49158305421011e-06, "loss": 0.43950853, "memory(GiB)": 34.88, "step": 64030, "train_speed(iter/s)": 0.411641 }, { "acc": 0.9032177, "epoch": 1.7338152871415806, "grad_norm": 7.250924110412598, "learning_rate": 6.491048947565146e-06, "loss": 0.55854449, "memory(GiB)": 34.88, "step": 64035, "train_speed(iter/s)": 0.411643 }, { "acc": 0.90589085, "epoch": 1.7339506674247962, "grad_norm": 9.433087348937988, "learning_rate": 6.490514822248125e-06, "loss": 0.60264502, "memory(GiB)": 34.88, "step": 64040, "train_speed(iter/s)": 0.411645 }, { "acc": 0.89033384, "epoch": 1.7340860477080118, "grad_norm": 8.173585891723633, "learning_rate": 6.489980678265744e-06, "loss": 0.58339033, "memory(GiB)": 34.88, "step": 64045, "train_speed(iter/s)": 0.411647 }, { "acc": 0.91232586, "epoch": 1.7342214279912274, "grad_norm": 37.49336242675781, "learning_rate": 6.489446515624686e-06, "loss": 0.43565049, "memory(GiB)": 34.88, "step": 64050, "train_speed(iter/s)": 0.411648 }, { "acc": 0.93473396, "epoch": 1.734356808274443, "grad_norm": 7.950851917266846, "learning_rate": 6.4889123343316506e-06, "loss": 0.39119964, "memory(GiB)": 34.88, "step": 64055, "train_speed(iter/s)": 0.41165 }, { "acc": 0.93495541, "epoch": 1.7344921885576583, "grad_norm": 5.90886116027832, "learning_rate": 6.488378134393324e-06, "loss": 0.34570961, "memory(GiB)": 34.88, "step": 64060, "train_speed(iter/s)": 0.411652 }, { "acc": 0.90535126, "epoch": 1.734627568840874, "grad_norm": 8.773853302001953, "learning_rate": 6.487843915816401e-06, "loss": 0.5102376, "memory(GiB)": 34.88, "step": 64065, "train_speed(iter/s)": 0.411654 }, { "acc": 0.9008028, "epoch": 1.7347629491240895, "grad_norm": 5.55953311920166, "learning_rate": 6.487309678607574e-06, "loss": 0.42365203, "memory(GiB)": 34.88, "step": 64070, "train_speed(iter/s)": 0.411655 }, { "acc": 0.91467323, "epoch": 1.7348983294073053, "grad_norm": 5.562064170837402, "learning_rate": 6.486775422773534e-06, "loss": 0.39248519, "memory(GiB)": 34.88, "step": 64075, "train_speed(iter/s)": 0.411657 }, { "acc": 0.89499359, "epoch": 1.7350337096905206, "grad_norm": 11.39901351928711, "learning_rate": 6.486241148320972e-06, "loss": 0.56767468, "memory(GiB)": 34.88, "step": 64080, "train_speed(iter/s)": 0.411659 }, { "acc": 0.90428543, "epoch": 1.7351690899737362, "grad_norm": 7.218194007873535, "learning_rate": 6.485706855256583e-06, "loss": 0.54679866, "memory(GiB)": 34.88, "step": 64085, "train_speed(iter/s)": 0.41166 }, { "acc": 0.89590549, "epoch": 1.7353044702569518, "grad_norm": 10.298510551452637, "learning_rate": 6.485172543587062e-06, "loss": 0.53838787, "memory(GiB)": 34.88, "step": 64090, "train_speed(iter/s)": 0.411662 }, { "acc": 0.90864849, "epoch": 1.7354398505401674, "grad_norm": 3.0873124599456787, "learning_rate": 6.484638213319097e-06, "loss": 0.56165628, "memory(GiB)": 34.88, "step": 64095, "train_speed(iter/s)": 0.411664 }, { "acc": 0.90383377, "epoch": 1.735575230823383, "grad_norm": 6.802583694458008, "learning_rate": 6.484103864459385e-06, "loss": 0.47379227, "memory(GiB)": 34.88, "step": 64100, "train_speed(iter/s)": 0.411665 }, { "acc": 0.90751753, "epoch": 1.7357106111065983, "grad_norm": 6.700179100036621, "learning_rate": 6.4835694970146205e-06, "loss": 0.4896493, "memory(GiB)": 34.88, "step": 64105, "train_speed(iter/s)": 0.411667 }, { "acc": 0.90637856, "epoch": 1.735845991389814, "grad_norm": 11.62559986114502, "learning_rate": 6.483035110991495e-06, "loss": 0.57710648, "memory(GiB)": 34.88, "step": 64110, "train_speed(iter/s)": 0.411668 }, { "acc": 0.92343864, "epoch": 1.7359813716730295, "grad_norm": 4.978997707366943, "learning_rate": 6.482500706396707e-06, "loss": 0.35681229, "memory(GiB)": 34.88, "step": 64115, "train_speed(iter/s)": 0.41167 }, { "acc": 0.91710472, "epoch": 1.736116751956245, "grad_norm": 26.394399642944336, "learning_rate": 6.481966283236945e-06, "loss": 0.43773403, "memory(GiB)": 34.88, "step": 64120, "train_speed(iter/s)": 0.411672 }, { "acc": 0.91547356, "epoch": 1.7362521322394606, "grad_norm": 10.683049201965332, "learning_rate": 6.481431841518908e-06, "loss": 0.43428755, "memory(GiB)": 34.88, "step": 64125, "train_speed(iter/s)": 0.411674 }, { "acc": 0.90683403, "epoch": 1.7363875125226762, "grad_norm": 3.3180012702941895, "learning_rate": 6.480897381249289e-06, "loss": 0.47490368, "memory(GiB)": 34.88, "step": 64130, "train_speed(iter/s)": 0.411675 }, { "acc": 0.93369808, "epoch": 1.7365228928058918, "grad_norm": 9.58346176147461, "learning_rate": 6.480362902434785e-06, "loss": 0.34952099, "memory(GiB)": 34.88, "step": 64135, "train_speed(iter/s)": 0.411677 }, { "acc": 0.90939102, "epoch": 1.7366582730891071, "grad_norm": 5.172794342041016, "learning_rate": 6.479828405082089e-06, "loss": 0.39765909, "memory(GiB)": 34.88, "step": 64140, "train_speed(iter/s)": 0.411679 }, { "acc": 0.89485741, "epoch": 1.736793653372323, "grad_norm": 7.473167896270752, "learning_rate": 6.479293889197898e-06, "loss": 0.55453777, "memory(GiB)": 34.88, "step": 64145, "train_speed(iter/s)": 0.41168 }, { "acc": 0.89705076, "epoch": 1.7369290336555383, "grad_norm": 21.19635009765625, "learning_rate": 6.478759354788908e-06, "loss": 0.62102604, "memory(GiB)": 34.88, "step": 64150, "train_speed(iter/s)": 0.411682 }, { "acc": 0.90918608, "epoch": 1.737064413938754, "grad_norm": 5.275172233581543, "learning_rate": 6.478224801861814e-06, "loss": 0.4893054, "memory(GiB)": 34.88, "step": 64155, "train_speed(iter/s)": 0.411684 }, { "acc": 0.90038528, "epoch": 1.7371997942219695, "grad_norm": 11.33171558380127, "learning_rate": 6.477690230423317e-06, "loss": 0.50435114, "memory(GiB)": 34.88, "step": 64160, "train_speed(iter/s)": 0.411686 }, { "acc": 0.91075745, "epoch": 1.737335174505185, "grad_norm": 11.394111633300781, "learning_rate": 6.477155640480105e-06, "loss": 0.49850569, "memory(GiB)": 34.88, "step": 64165, "train_speed(iter/s)": 0.411687 }, { "acc": 0.87630014, "epoch": 1.7374705547884006, "grad_norm": 10.099441528320312, "learning_rate": 6.476621032038881e-06, "loss": 0.73129702, "memory(GiB)": 34.88, "step": 64170, "train_speed(iter/s)": 0.411689 }, { "acc": 0.89714661, "epoch": 1.7376059350716162, "grad_norm": 12.565461158752441, "learning_rate": 6.476086405106341e-06, "loss": 0.6284903, "memory(GiB)": 34.88, "step": 64175, "train_speed(iter/s)": 0.411691 }, { "acc": 0.91491995, "epoch": 1.7377413153548318, "grad_norm": 7.957925319671631, "learning_rate": 6.47555175968918e-06, "loss": 0.37590151, "memory(GiB)": 34.88, "step": 64180, "train_speed(iter/s)": 0.411693 }, { "acc": 0.9034996, "epoch": 1.7378766956380471, "grad_norm": 8.102652549743652, "learning_rate": 6.475017095794099e-06, "loss": 0.47623997, "memory(GiB)": 34.88, "step": 64185, "train_speed(iter/s)": 0.411695 }, { "acc": 0.90718737, "epoch": 1.738012075921263, "grad_norm": 9.646933555603027, "learning_rate": 6.474482413427794e-06, "loss": 0.52949061, "memory(GiB)": 34.88, "step": 64190, "train_speed(iter/s)": 0.411696 }, { "acc": 0.88772602, "epoch": 1.7381474562044783, "grad_norm": 11.200685501098633, "learning_rate": 6.473947712596963e-06, "loss": 0.55479341, "memory(GiB)": 34.88, "step": 64195, "train_speed(iter/s)": 0.411698 }, { "acc": 0.89272003, "epoch": 1.7382828364876939, "grad_norm": 6.524208068847656, "learning_rate": 6.473412993308304e-06, "loss": 0.58706512, "memory(GiB)": 34.88, "step": 64200, "train_speed(iter/s)": 0.411699 }, { "acc": 0.91521597, "epoch": 1.7384182167709095, "grad_norm": 11.02741813659668, "learning_rate": 6.472878255568514e-06, "loss": 0.52569008, "memory(GiB)": 34.88, "step": 64205, "train_speed(iter/s)": 0.411701 }, { "acc": 0.9263258, "epoch": 1.738553597054125, "grad_norm": 3.4198646545410156, "learning_rate": 6.472343499384295e-06, "loss": 0.36136751, "memory(GiB)": 34.88, "step": 64210, "train_speed(iter/s)": 0.411702 }, { "acc": 0.90761604, "epoch": 1.7386889773373406, "grad_norm": 5.477459907531738, "learning_rate": 6.471808724762342e-06, "loss": 0.53040404, "memory(GiB)": 34.88, "step": 64215, "train_speed(iter/s)": 0.411704 }, { "acc": 0.90116348, "epoch": 1.738824357620556, "grad_norm": 11.190383911132812, "learning_rate": 6.471273931709358e-06, "loss": 0.50011373, "memory(GiB)": 34.88, "step": 64220, "train_speed(iter/s)": 0.411706 }, { "acc": 0.90584211, "epoch": 1.7389597379037718, "grad_norm": 8.67333984375, "learning_rate": 6.470739120232039e-06, "loss": 0.45148382, "memory(GiB)": 34.88, "step": 64225, "train_speed(iter/s)": 0.411707 }, { "acc": 0.89869556, "epoch": 1.7390951181869871, "grad_norm": 9.305569648742676, "learning_rate": 6.470204290337087e-06, "loss": 0.53316498, "memory(GiB)": 34.88, "step": 64230, "train_speed(iter/s)": 0.411709 }, { "acc": 0.9150176, "epoch": 1.739230498470203, "grad_norm": 4.107235908508301, "learning_rate": 6.4696694420312004e-06, "loss": 0.33019443, "memory(GiB)": 34.88, "step": 64235, "train_speed(iter/s)": 0.411711 }, { "acc": 0.90054855, "epoch": 1.7393658787534183, "grad_norm": 6.418861389160156, "learning_rate": 6.469134575321081e-06, "loss": 0.60355988, "memory(GiB)": 34.88, "step": 64240, "train_speed(iter/s)": 0.411712 }, { "acc": 0.91496735, "epoch": 1.7395012590366339, "grad_norm": 5.8715949058532715, "learning_rate": 6.468599690213427e-06, "loss": 0.42303047, "memory(GiB)": 34.88, "step": 64245, "train_speed(iter/s)": 0.411714 }, { "acc": 0.9116169, "epoch": 1.7396366393198495, "grad_norm": 8.302652359008789, "learning_rate": 6.468064786714938e-06, "loss": 0.50967455, "memory(GiB)": 34.88, "step": 64250, "train_speed(iter/s)": 0.411715 }, { "acc": 0.92063532, "epoch": 1.739772019603065, "grad_norm": 15.931939125061035, "learning_rate": 6.467529864832317e-06, "loss": 0.47572951, "memory(GiB)": 34.88, "step": 64255, "train_speed(iter/s)": 0.411717 }, { "acc": 0.91711464, "epoch": 1.7399073998862806, "grad_norm": 9.6737642288208, "learning_rate": 6.466994924572264e-06, "loss": 0.46707087, "memory(GiB)": 34.88, "step": 64260, "train_speed(iter/s)": 0.411719 }, { "acc": 0.8837471, "epoch": 1.740042780169496, "grad_norm": 7.254148483276367, "learning_rate": 6.466459965941483e-06, "loss": 0.57133942, "memory(GiB)": 34.88, "step": 64265, "train_speed(iter/s)": 0.41172 }, { "acc": 0.90358953, "epoch": 1.7401781604527118, "grad_norm": 10.146495819091797, "learning_rate": 6.465924988946669e-06, "loss": 0.50048504, "memory(GiB)": 34.88, "step": 64270, "train_speed(iter/s)": 0.411722 }, { "acc": 0.9006259, "epoch": 1.7403135407359271, "grad_norm": 8.063065528869629, "learning_rate": 6.465389993594531e-06, "loss": 0.51331291, "memory(GiB)": 34.88, "step": 64275, "train_speed(iter/s)": 0.411724 }, { "acc": 0.90588188, "epoch": 1.7404489210191427, "grad_norm": 11.990523338317871, "learning_rate": 6.464854979891764e-06, "loss": 0.57738256, "memory(GiB)": 34.88, "step": 64280, "train_speed(iter/s)": 0.411726 }, { "acc": 0.89884949, "epoch": 1.7405843013023583, "grad_norm": 9.672014236450195, "learning_rate": 6.464319947845074e-06, "loss": 0.58792748, "memory(GiB)": 34.88, "step": 64285, "train_speed(iter/s)": 0.411727 }, { "acc": 0.91390209, "epoch": 1.7407196815855739, "grad_norm": 7.396196365356445, "learning_rate": 6.463784897461165e-06, "loss": 0.53845458, "memory(GiB)": 34.88, "step": 64290, "train_speed(iter/s)": 0.411729 }, { "acc": 0.88241243, "epoch": 1.7408550618687895, "grad_norm": 8.231208801269531, "learning_rate": 6.463249828746737e-06, "loss": 0.67515302, "memory(GiB)": 34.88, "step": 64295, "train_speed(iter/s)": 0.411731 }, { "acc": 0.89711752, "epoch": 1.7409904421520048, "grad_norm": 8.463919639587402, "learning_rate": 6.462714741708491e-06, "loss": 0.6427743, "memory(GiB)": 34.88, "step": 64300, "train_speed(iter/s)": 0.411732 }, { "acc": 0.91494694, "epoch": 1.7411258224352206, "grad_norm": 9.452296257019043, "learning_rate": 6.462179636353133e-06, "loss": 0.52622652, "memory(GiB)": 34.88, "step": 64305, "train_speed(iter/s)": 0.411734 }, { "acc": 0.91213684, "epoch": 1.741261202718436, "grad_norm": 6.594911575317383, "learning_rate": 6.461644512687367e-06, "loss": 0.53570929, "memory(GiB)": 34.88, "step": 64310, "train_speed(iter/s)": 0.411735 }, { "acc": 0.88731976, "epoch": 1.7413965830016518, "grad_norm": 16.49452781677246, "learning_rate": 6.461109370717893e-06, "loss": 0.65874124, "memory(GiB)": 34.88, "step": 64315, "train_speed(iter/s)": 0.411737 }, { "acc": 0.90989847, "epoch": 1.7415319632848671, "grad_norm": 13.603205680847168, "learning_rate": 6.460574210451416e-06, "loss": 0.48389425, "memory(GiB)": 34.88, "step": 64320, "train_speed(iter/s)": 0.411739 }, { "acc": 0.92193699, "epoch": 1.7416673435680827, "grad_norm": 6.739342212677002, "learning_rate": 6.460039031894643e-06, "loss": 0.44969816, "memory(GiB)": 34.88, "step": 64325, "train_speed(iter/s)": 0.41174 }, { "acc": 0.92327232, "epoch": 1.7418027238512983, "grad_norm": 4.1595892906188965, "learning_rate": 6.459503835054274e-06, "loss": 0.37864609, "memory(GiB)": 34.88, "step": 64330, "train_speed(iter/s)": 0.411742 }, { "acc": 0.9136673, "epoch": 1.7419381041345139, "grad_norm": 12.066861152648926, "learning_rate": 6.458968619937015e-06, "loss": 0.46646566, "memory(GiB)": 34.88, "step": 64335, "train_speed(iter/s)": 0.411744 }, { "acc": 0.88795414, "epoch": 1.7420734844177295, "grad_norm": 29.273265838623047, "learning_rate": 6.45843338654957e-06, "loss": 0.6092236, "memory(GiB)": 34.88, "step": 64340, "train_speed(iter/s)": 0.411745 }, { "acc": 0.91527176, "epoch": 1.7422088647009448, "grad_norm": 10.326462745666504, "learning_rate": 6.457898134898646e-06, "loss": 0.44558897, "memory(GiB)": 34.88, "step": 64345, "train_speed(iter/s)": 0.411747 }, { "acc": 0.90817442, "epoch": 1.7423442449841606, "grad_norm": 12.08212947845459, "learning_rate": 6.457362864990945e-06, "loss": 0.52764149, "memory(GiB)": 34.88, "step": 64350, "train_speed(iter/s)": 0.411748 }, { "acc": 0.92438879, "epoch": 1.742479625267376, "grad_norm": 8.64794921875, "learning_rate": 6.4568275768331745e-06, "loss": 0.45349383, "memory(GiB)": 34.88, "step": 64355, "train_speed(iter/s)": 0.41175 }, { "acc": 0.91213732, "epoch": 1.7426150055505916, "grad_norm": 6.208333969116211, "learning_rate": 6.456292270432041e-06, "loss": 0.44570518, "memory(GiB)": 34.88, "step": 64360, "train_speed(iter/s)": 0.411751 }, { "acc": 0.92204742, "epoch": 1.7427503858338071, "grad_norm": 4.920225143432617, "learning_rate": 6.455756945794246e-06, "loss": 0.40706563, "memory(GiB)": 34.88, "step": 64365, "train_speed(iter/s)": 0.411753 }, { "acc": 0.91961422, "epoch": 1.7428857661170227, "grad_norm": 28.101831436157227, "learning_rate": 6.4552216029265006e-06, "loss": 0.48165073, "memory(GiB)": 34.88, "step": 64370, "train_speed(iter/s)": 0.411755 }, { "acc": 0.90129623, "epoch": 1.7430211464002383, "grad_norm": 9.972333908081055, "learning_rate": 6.454686241835507e-06, "loss": 0.57000885, "memory(GiB)": 34.88, "step": 64375, "train_speed(iter/s)": 0.411756 }, { "acc": 0.91903687, "epoch": 1.7431565266834537, "grad_norm": 18.203645706176758, "learning_rate": 6.454150862527973e-06, "loss": 0.45401468, "memory(GiB)": 34.88, "step": 64380, "train_speed(iter/s)": 0.411758 }, { "acc": 0.91422634, "epoch": 1.7432919069666695, "grad_norm": 4.853491306304932, "learning_rate": 6.453615465010607e-06, "loss": 0.46402197, "memory(GiB)": 34.88, "step": 64385, "train_speed(iter/s)": 0.41176 }, { "acc": 0.91517773, "epoch": 1.7434272872498848, "grad_norm": 7.222736358642578, "learning_rate": 6.453080049290114e-06, "loss": 0.44185266, "memory(GiB)": 34.88, "step": 64390, "train_speed(iter/s)": 0.411761 }, { "acc": 0.90686331, "epoch": 1.7435626675331006, "grad_norm": 10.031990051269531, "learning_rate": 6.4525446153732e-06, "loss": 0.51871495, "memory(GiB)": 34.88, "step": 64395, "train_speed(iter/s)": 0.411763 }, { "acc": 0.91756067, "epoch": 1.743698047816316, "grad_norm": 3.9373512268066406, "learning_rate": 6.452009163266576e-06, "loss": 0.40979681, "memory(GiB)": 34.88, "step": 64400, "train_speed(iter/s)": 0.411765 }, { "acc": 0.91389675, "epoch": 1.7438334280995316, "grad_norm": 4.390132427215576, "learning_rate": 6.4514736929769465e-06, "loss": 0.45232906, "memory(GiB)": 34.88, "step": 64405, "train_speed(iter/s)": 0.411766 }, { "acc": 0.88014698, "epoch": 1.7439688083827471, "grad_norm": 10.675315856933594, "learning_rate": 6.450938204511021e-06, "loss": 0.69194221, "memory(GiB)": 34.88, "step": 64410, "train_speed(iter/s)": 0.411768 }, { "acc": 0.91543465, "epoch": 1.7441041886659627, "grad_norm": 23.76213264465332, "learning_rate": 6.450402697875508e-06, "loss": 0.40209951, "memory(GiB)": 34.88, "step": 64415, "train_speed(iter/s)": 0.41177 }, { "acc": 0.91545525, "epoch": 1.7442395689491783, "grad_norm": 5.658894062042236, "learning_rate": 6.449867173077112e-06, "loss": 0.38114195, "memory(GiB)": 34.88, "step": 64420, "train_speed(iter/s)": 0.411772 }, { "acc": 0.91163435, "epoch": 1.7443749492323937, "grad_norm": 5.227316379547119, "learning_rate": 6.449331630122546e-06, "loss": 0.41255708, "memory(GiB)": 34.88, "step": 64425, "train_speed(iter/s)": 0.411773 }, { "acc": 0.90590534, "epoch": 1.7445103295156095, "grad_norm": 9.985917091369629, "learning_rate": 6.4487960690185145e-06, "loss": 0.5317543, "memory(GiB)": 34.88, "step": 64430, "train_speed(iter/s)": 0.411775 }, { "acc": 0.92160187, "epoch": 1.7446457097988248, "grad_norm": 6.198678016662598, "learning_rate": 6.448260489771731e-06, "loss": 0.46501746, "memory(GiB)": 34.88, "step": 64435, "train_speed(iter/s)": 0.411777 }, { "acc": 0.90517387, "epoch": 1.7447810900820404, "grad_norm": 12.844754219055176, "learning_rate": 6.447724892388903e-06, "loss": 0.48423042, "memory(GiB)": 34.88, "step": 64440, "train_speed(iter/s)": 0.411778 }, { "acc": 0.90810585, "epoch": 1.744916470365256, "grad_norm": 6.52907133102417, "learning_rate": 6.447189276876737e-06, "loss": 0.54589596, "memory(GiB)": 34.88, "step": 64445, "train_speed(iter/s)": 0.411779 }, { "acc": 0.91054897, "epoch": 1.7450518506484716, "grad_norm": 9.133142471313477, "learning_rate": 6.446653643241947e-06, "loss": 0.45669866, "memory(GiB)": 34.88, "step": 64450, "train_speed(iter/s)": 0.411781 }, { "acc": 0.92598305, "epoch": 1.7451872309316872, "grad_norm": 12.276809692382812, "learning_rate": 6.446117991491241e-06, "loss": 0.43574896, "memory(GiB)": 34.88, "step": 64455, "train_speed(iter/s)": 0.411783 }, { "acc": 0.90509739, "epoch": 1.7453226112149025, "grad_norm": 6.505612373352051, "learning_rate": 6.445582321631329e-06, "loss": 0.53201342, "memory(GiB)": 34.88, "step": 64460, "train_speed(iter/s)": 0.411785 }, { "acc": 0.90626202, "epoch": 1.7454579914981183, "grad_norm": 9.359009742736816, "learning_rate": 6.44504663366892e-06, "loss": 0.45581975, "memory(GiB)": 34.88, "step": 64465, "train_speed(iter/s)": 0.411786 }, { "acc": 0.90888309, "epoch": 1.7455933717813337, "grad_norm": 12.374969482421875, "learning_rate": 6.444510927610726e-06, "loss": 0.49226189, "memory(GiB)": 34.88, "step": 64470, "train_speed(iter/s)": 0.411788 }, { "acc": 0.91831532, "epoch": 1.7457287520645495, "grad_norm": 8.078639030456543, "learning_rate": 6.443975203463459e-06, "loss": 0.42590351, "memory(GiB)": 34.88, "step": 64475, "train_speed(iter/s)": 0.411789 }, { "acc": 0.89895725, "epoch": 1.7458641323477648, "grad_norm": 13.360374450683594, "learning_rate": 6.4434394612338284e-06, "loss": 0.61259623, "memory(GiB)": 34.88, "step": 64480, "train_speed(iter/s)": 0.411791 }, { "acc": 0.91370983, "epoch": 1.7459995126309804, "grad_norm": 7.301697731018066, "learning_rate": 6.4429037009285455e-06, "loss": 0.43158112, "memory(GiB)": 34.88, "step": 64485, "train_speed(iter/s)": 0.411793 }, { "acc": 0.91886311, "epoch": 1.746134892914196, "grad_norm": 5.504431247711182, "learning_rate": 6.442367922554321e-06, "loss": 0.44241066, "memory(GiB)": 34.88, "step": 64490, "train_speed(iter/s)": 0.411794 }, { "acc": 0.92747936, "epoch": 1.7462702731974116, "grad_norm": 10.110799789428711, "learning_rate": 6.4418321261178695e-06, "loss": 0.39147747, "memory(GiB)": 34.88, "step": 64495, "train_speed(iter/s)": 0.411796 }, { "acc": 0.90802898, "epoch": 1.7464056534806272, "grad_norm": 9.130515098571777, "learning_rate": 6.4412963116259e-06, "loss": 0.48751044, "memory(GiB)": 34.88, "step": 64500, "train_speed(iter/s)": 0.411798 }, { "acc": 0.91023254, "epoch": 1.7465410337638425, "grad_norm": 6.865330219268799, "learning_rate": 6.440760479085124e-06, "loss": 0.47047281, "memory(GiB)": 34.88, "step": 64505, "train_speed(iter/s)": 0.4118 }, { "acc": 0.90912743, "epoch": 1.7466764140470583, "grad_norm": 11.472615242004395, "learning_rate": 6.440224628502257e-06, "loss": 0.55900407, "memory(GiB)": 34.88, "step": 64510, "train_speed(iter/s)": 0.411801 }, { "acc": 0.88693104, "epoch": 1.7468117943302737, "grad_norm": 14.957466125488281, "learning_rate": 6.439688759884008e-06, "loss": 0.5511446, "memory(GiB)": 34.88, "step": 64515, "train_speed(iter/s)": 0.411802 }, { "acc": 0.90132627, "epoch": 1.7469471746134893, "grad_norm": 12.890380859375, "learning_rate": 6.439152873237094e-06, "loss": 0.53598719, "memory(GiB)": 34.88, "step": 64520, "train_speed(iter/s)": 0.411804 }, { "acc": 0.92088442, "epoch": 1.7470825548967048, "grad_norm": 10.375981330871582, "learning_rate": 6.4386169685682255e-06, "loss": 0.37488737, "memory(GiB)": 34.88, "step": 64525, "train_speed(iter/s)": 0.411806 }, { "acc": 0.91402817, "epoch": 1.7472179351799204, "grad_norm": 6.4106526374816895, "learning_rate": 6.438081045884116e-06, "loss": 0.4087009, "memory(GiB)": 34.88, "step": 64530, "train_speed(iter/s)": 0.411808 }, { "acc": 0.91278801, "epoch": 1.747353315463136, "grad_norm": 7.306076526641846, "learning_rate": 6.43754510519148e-06, "loss": 0.44511805, "memory(GiB)": 34.88, "step": 64535, "train_speed(iter/s)": 0.411809 }, { "acc": 0.91965513, "epoch": 1.7474886957463514, "grad_norm": 6.144583225250244, "learning_rate": 6.437009146497028e-06, "loss": 0.41030788, "memory(GiB)": 34.88, "step": 64540, "train_speed(iter/s)": 0.411811 }, { "acc": 0.92267895, "epoch": 1.7476240760295672, "grad_norm": 11.086174964904785, "learning_rate": 6.436473169807478e-06, "loss": 0.39769032, "memory(GiB)": 34.88, "step": 64545, "train_speed(iter/s)": 0.411813 }, { "acc": 0.91303673, "epoch": 1.7477594563127825, "grad_norm": 28.596342086791992, "learning_rate": 6.4359371751295405e-06, "loss": 0.41174917, "memory(GiB)": 34.88, "step": 64550, "train_speed(iter/s)": 0.411814 }, { "acc": 0.90453682, "epoch": 1.7478948365959983, "grad_norm": 14.995087623596191, "learning_rate": 6.435401162469933e-06, "loss": 0.52375593, "memory(GiB)": 34.88, "step": 64555, "train_speed(iter/s)": 0.411816 }, { "acc": 0.89986839, "epoch": 1.7480302168792137, "grad_norm": 7.963647365570068, "learning_rate": 6.4348651318353674e-06, "loss": 0.59675016, "memory(GiB)": 34.88, "step": 64560, "train_speed(iter/s)": 0.411817 }, { "acc": 0.9251133, "epoch": 1.7481655971624293, "grad_norm": 5.95093297958374, "learning_rate": 6.434329083232562e-06, "loss": 0.35698874, "memory(GiB)": 34.88, "step": 64565, "train_speed(iter/s)": 0.411819 }, { "acc": 0.9127244, "epoch": 1.7483009774456448, "grad_norm": 10.839103698730469, "learning_rate": 6.43379301666823e-06, "loss": 0.44642439, "memory(GiB)": 34.88, "step": 64570, "train_speed(iter/s)": 0.411821 }, { "acc": 0.92537308, "epoch": 1.7484363577288604, "grad_norm": 8.779783248901367, "learning_rate": 6.433256932149085e-06, "loss": 0.40599275, "memory(GiB)": 34.88, "step": 64575, "train_speed(iter/s)": 0.411823 }, { "acc": 0.9138032, "epoch": 1.748571738012076, "grad_norm": 4.809426784515381, "learning_rate": 6.432720829681846e-06, "loss": 0.48041191, "memory(GiB)": 34.88, "step": 64580, "train_speed(iter/s)": 0.411824 }, { "acc": 0.90168171, "epoch": 1.7487071182952914, "grad_norm": 11.842273712158203, "learning_rate": 6.432184709273225e-06, "loss": 0.54200974, "memory(GiB)": 34.88, "step": 64585, "train_speed(iter/s)": 0.411826 }, { "acc": 0.92500496, "epoch": 1.7488424985785072, "grad_norm": 6.768985271453857, "learning_rate": 6.431648570929941e-06, "loss": 0.44202285, "memory(GiB)": 34.88, "step": 64590, "train_speed(iter/s)": 0.411828 }, { "acc": 0.92624378, "epoch": 1.7489778788617225, "grad_norm": 6.208236217498779, "learning_rate": 6.4311124146587075e-06, "loss": 0.39123516, "memory(GiB)": 34.88, "step": 64595, "train_speed(iter/s)": 0.41183 }, { "acc": 0.90851202, "epoch": 1.749113259144938, "grad_norm": 13.379477500915527, "learning_rate": 6.430576240466244e-06, "loss": 0.52779679, "memory(GiB)": 34.88, "step": 64600, "train_speed(iter/s)": 0.411831 }, { "acc": 0.90292435, "epoch": 1.7492486394281537, "grad_norm": 10.124029159545898, "learning_rate": 6.430040048359264e-06, "loss": 0.54782228, "memory(GiB)": 34.88, "step": 64605, "train_speed(iter/s)": 0.411833 }, { "acc": 0.90218163, "epoch": 1.7493840197113693, "grad_norm": 17.597688674926758, "learning_rate": 6.429503838344486e-06, "loss": 0.59444327, "memory(GiB)": 34.88, "step": 64610, "train_speed(iter/s)": 0.411835 }, { "acc": 0.92080765, "epoch": 1.7495193999945848, "grad_norm": 8.461650848388672, "learning_rate": 6.428967610428628e-06, "loss": 0.47448597, "memory(GiB)": 34.88, "step": 64615, "train_speed(iter/s)": 0.411836 }, { "acc": 0.91074963, "epoch": 1.7496547802778002, "grad_norm": 4.844122886657715, "learning_rate": 6.428431364618406e-06, "loss": 0.53955879, "memory(GiB)": 34.88, "step": 64620, "train_speed(iter/s)": 0.411838 }, { "acc": 0.91167622, "epoch": 1.749790160561016, "grad_norm": 7.68276834487915, "learning_rate": 6.427895100920538e-06, "loss": 0.43057179, "memory(GiB)": 34.88, "step": 64625, "train_speed(iter/s)": 0.411839 }, { "acc": 0.90242529, "epoch": 1.7499255408442314, "grad_norm": 12.141873359680176, "learning_rate": 6.42735881934174e-06, "loss": 0.59024973, "memory(GiB)": 34.88, "step": 64630, "train_speed(iter/s)": 0.411841 }, { "acc": 0.90752668, "epoch": 1.7500609211274472, "grad_norm": 20.02667236328125, "learning_rate": 6.4268225198887325e-06, "loss": 0.53648272, "memory(GiB)": 34.88, "step": 64635, "train_speed(iter/s)": 0.411843 }, { "acc": 0.89659004, "epoch": 1.7501963014106625, "grad_norm": 7.764914512634277, "learning_rate": 6.426286202568232e-06, "loss": 0.53570142, "memory(GiB)": 34.88, "step": 64640, "train_speed(iter/s)": 0.411844 }, { "acc": 0.9003397, "epoch": 1.750331681693878, "grad_norm": 7.082955360412598, "learning_rate": 6.425749867386959e-06, "loss": 0.58526602, "memory(GiB)": 34.88, "step": 64645, "train_speed(iter/s)": 0.411846 }, { "acc": 0.89654083, "epoch": 1.7504670619770937, "grad_norm": 11.395158767700195, "learning_rate": 6.42521351435163e-06, "loss": 0.5362535, "memory(GiB)": 34.88, "step": 64650, "train_speed(iter/s)": 0.411848 }, { "acc": 0.90528107, "epoch": 1.7506024422603093, "grad_norm": 11.225257873535156, "learning_rate": 6.424677143468966e-06, "loss": 0.5815805, "memory(GiB)": 34.88, "step": 64655, "train_speed(iter/s)": 0.411849 }, { "acc": 0.89322491, "epoch": 1.7507378225435248, "grad_norm": 9.43542766571045, "learning_rate": 6.424140754745683e-06, "loss": 0.56572404, "memory(GiB)": 34.88, "step": 64660, "train_speed(iter/s)": 0.411851 }, { "acc": 0.90131245, "epoch": 1.7508732028267402, "grad_norm": 7.165745258331299, "learning_rate": 6.423604348188503e-06, "loss": 0.48478985, "memory(GiB)": 34.88, "step": 64665, "train_speed(iter/s)": 0.411852 }, { "acc": 0.9131773, "epoch": 1.751008583109956, "grad_norm": 6.780421257019043, "learning_rate": 6.423067923804145e-06, "loss": 0.45965667, "memory(GiB)": 34.88, "step": 64670, "train_speed(iter/s)": 0.411854 }, { "acc": 0.9008316, "epoch": 1.7511439633931714, "grad_norm": 12.097514152526855, "learning_rate": 6.422531481599327e-06, "loss": 0.54076614, "memory(GiB)": 34.88, "step": 64675, "train_speed(iter/s)": 0.411855 }, { "acc": 0.90561705, "epoch": 1.751279343676387, "grad_norm": 12.783254623413086, "learning_rate": 6.4219950215807725e-06, "loss": 0.57888861, "memory(GiB)": 34.88, "step": 64680, "train_speed(iter/s)": 0.411857 }, { "acc": 0.89877071, "epoch": 1.7514147239596025, "grad_norm": 5.563540935516357, "learning_rate": 6.421458543755197e-06, "loss": 0.53373556, "memory(GiB)": 34.88, "step": 64685, "train_speed(iter/s)": 0.411858 }, { "acc": 0.90113382, "epoch": 1.751550104242818, "grad_norm": 6.301764965057373, "learning_rate": 6.420922048129325e-06, "loss": 0.54954786, "memory(GiB)": 34.88, "step": 64690, "train_speed(iter/s)": 0.41186 }, { "acc": 0.91057987, "epoch": 1.7516854845260337, "grad_norm": 7.692788600921631, "learning_rate": 6.420385534709875e-06, "loss": 0.49124317, "memory(GiB)": 34.88, "step": 64695, "train_speed(iter/s)": 0.411862 }, { "acc": 0.90134315, "epoch": 1.751820864809249, "grad_norm": 7.465754985809326, "learning_rate": 6.419849003503569e-06, "loss": 0.46111565, "memory(GiB)": 34.88, "step": 64700, "train_speed(iter/s)": 0.411863 }, { "acc": 0.90995026, "epoch": 1.7519562450924648, "grad_norm": 8.982037544250488, "learning_rate": 6.419312454517129e-06, "loss": 0.51479645, "memory(GiB)": 34.88, "step": 64705, "train_speed(iter/s)": 0.411865 }, { "acc": 0.92207108, "epoch": 1.7520916253756802, "grad_norm": 6.771363258361816, "learning_rate": 6.4187758877572734e-06, "loss": 0.36824775, "memory(GiB)": 34.88, "step": 64710, "train_speed(iter/s)": 0.411867 }, { "acc": 0.89587317, "epoch": 1.752227005658896, "grad_norm": 7.542675495147705, "learning_rate": 6.418239303230728e-06, "loss": 0.55094357, "memory(GiB)": 34.88, "step": 64715, "train_speed(iter/s)": 0.411869 }, { "acc": 0.89776592, "epoch": 1.7523623859421114, "grad_norm": 9.31776237487793, "learning_rate": 6.417702700944208e-06, "loss": 0.55764542, "memory(GiB)": 34.88, "step": 64720, "train_speed(iter/s)": 0.41187 }, { "acc": 0.91014671, "epoch": 1.752497766225327, "grad_norm": 4.561777114868164, "learning_rate": 6.417166080904442e-06, "loss": 0.42612844, "memory(GiB)": 34.88, "step": 64725, "train_speed(iter/s)": 0.411872 }, { "acc": 0.9037034, "epoch": 1.7526331465085425, "grad_norm": 5.536306858062744, "learning_rate": 6.416629443118148e-06, "loss": 0.48927751, "memory(GiB)": 34.88, "step": 64730, "train_speed(iter/s)": 0.411873 }, { "acc": 0.91174173, "epoch": 1.752768526791758, "grad_norm": 16.10060691833496, "learning_rate": 6.416092787592051e-06, "loss": 0.49246187, "memory(GiB)": 34.88, "step": 64735, "train_speed(iter/s)": 0.411875 }, { "acc": 0.91703587, "epoch": 1.7529039070749737, "grad_norm": 9.70429515838623, "learning_rate": 6.415556114332874e-06, "loss": 0.43777938, "memory(GiB)": 34.88, "step": 64740, "train_speed(iter/s)": 0.411876 }, { "acc": 0.90677567, "epoch": 1.753039287358189, "grad_norm": 4.617502212524414, "learning_rate": 6.415019423347337e-06, "loss": 0.4339469, "memory(GiB)": 34.88, "step": 64745, "train_speed(iter/s)": 0.411878 }, { "acc": 0.92746124, "epoch": 1.7531746676414048, "grad_norm": 7.919836521148682, "learning_rate": 6.414482714642165e-06, "loss": 0.39346938, "memory(GiB)": 34.88, "step": 64750, "train_speed(iter/s)": 0.41188 }, { "acc": 0.90010433, "epoch": 1.7533100479246202, "grad_norm": 12.015487670898438, "learning_rate": 6.413945988224081e-06, "loss": 0.55435457, "memory(GiB)": 34.88, "step": 64755, "train_speed(iter/s)": 0.411881 }, { "acc": 0.92083693, "epoch": 1.7534454282078358, "grad_norm": 13.824785232543945, "learning_rate": 6.413409244099809e-06, "loss": 0.35513372, "memory(GiB)": 34.88, "step": 64760, "train_speed(iter/s)": 0.411883 }, { "acc": 0.91856537, "epoch": 1.7535808084910514, "grad_norm": 5.817317962646484, "learning_rate": 6.412872482276071e-06, "loss": 0.4627296, "memory(GiB)": 34.88, "step": 64765, "train_speed(iter/s)": 0.411885 }, { "acc": 0.90883293, "epoch": 1.753716188774267, "grad_norm": 11.138781547546387, "learning_rate": 6.4123357027595934e-06, "loss": 0.48145685, "memory(GiB)": 34.88, "step": 64770, "train_speed(iter/s)": 0.411886 }, { "acc": 0.91632862, "epoch": 1.7538515690574825, "grad_norm": 50.23666763305664, "learning_rate": 6.411798905557099e-06, "loss": 0.42780762, "memory(GiB)": 34.88, "step": 64775, "train_speed(iter/s)": 0.411888 }, { "acc": 0.90696297, "epoch": 1.7539869493406979, "grad_norm": 19.198705673217773, "learning_rate": 6.411262090675312e-06, "loss": 0.49213729, "memory(GiB)": 34.88, "step": 64780, "train_speed(iter/s)": 0.41189 }, { "acc": 0.89692965, "epoch": 1.7541223296239137, "grad_norm": 6.857916831970215, "learning_rate": 6.410725258120959e-06, "loss": 0.65235257, "memory(GiB)": 34.88, "step": 64785, "train_speed(iter/s)": 0.411891 }, { "acc": 0.89832993, "epoch": 1.754257709907129, "grad_norm": 17.77370262145996, "learning_rate": 6.410188407900764e-06, "loss": 0.61693459, "memory(GiB)": 34.88, "step": 64790, "train_speed(iter/s)": 0.411893 }, { "acc": 0.89574213, "epoch": 1.7543930901903448, "grad_norm": 8.980489730834961, "learning_rate": 6.409651540021451e-06, "loss": 0.52431464, "memory(GiB)": 34.88, "step": 64795, "train_speed(iter/s)": 0.411895 }, { "acc": 0.90942631, "epoch": 1.7545284704735602, "grad_norm": 4.308132648468018, "learning_rate": 6.409114654489745e-06, "loss": 0.52583208, "memory(GiB)": 34.88, "step": 64800, "train_speed(iter/s)": 0.411897 }, { "acc": 0.91057081, "epoch": 1.7546638507567758, "grad_norm": 7.215863227844238, "learning_rate": 6.408577751312374e-06, "loss": 0.3816607, "memory(GiB)": 34.88, "step": 64805, "train_speed(iter/s)": 0.411898 }, { "acc": 0.90364332, "epoch": 1.7547992310399914, "grad_norm": 32.27444076538086, "learning_rate": 6.408040830496061e-06, "loss": 0.42434397, "memory(GiB)": 34.88, "step": 64810, "train_speed(iter/s)": 0.4119 }, { "acc": 0.89814262, "epoch": 1.754934611323207, "grad_norm": 16.130870819091797, "learning_rate": 6.407503892047533e-06, "loss": 0.58557215, "memory(GiB)": 34.88, "step": 64815, "train_speed(iter/s)": 0.411902 }, { "acc": 0.91594658, "epoch": 1.7550699916064225, "grad_norm": 17.30261993408203, "learning_rate": 6.4069669359735185e-06, "loss": 0.47100725, "memory(GiB)": 34.88, "step": 64820, "train_speed(iter/s)": 0.411904 }, { "acc": 0.90371532, "epoch": 1.7552053718896379, "grad_norm": 10.055100440979004, "learning_rate": 6.406429962280739e-06, "loss": 0.58748674, "memory(GiB)": 34.88, "step": 64825, "train_speed(iter/s)": 0.411905 }, { "acc": 0.9063735, "epoch": 1.7553407521728537, "grad_norm": 5.953189849853516, "learning_rate": 6.405892970975929e-06, "loss": 0.44322977, "memory(GiB)": 34.88, "step": 64830, "train_speed(iter/s)": 0.411907 }, { "acc": 0.9231432, "epoch": 1.755476132456069, "grad_norm": 5.257235050201416, "learning_rate": 6.405355962065806e-06, "loss": 0.36438403, "memory(GiB)": 34.88, "step": 64835, "train_speed(iter/s)": 0.411909 }, { "acc": 0.91058922, "epoch": 1.7556115127392846, "grad_norm": 9.093733787536621, "learning_rate": 6.4048189355571036e-06, "loss": 0.48713036, "memory(GiB)": 34.88, "step": 64840, "train_speed(iter/s)": 0.41191 }, { "acc": 0.92448578, "epoch": 1.7557468930225002, "grad_norm": 5.754497051239014, "learning_rate": 6.404281891456545e-06, "loss": 0.38037076, "memory(GiB)": 34.88, "step": 64845, "train_speed(iter/s)": 0.411912 }, { "acc": 0.91091938, "epoch": 1.7558822733057158, "grad_norm": 8.57657241821289, "learning_rate": 6.40374482977086e-06, "loss": 0.52578983, "memory(GiB)": 34.88, "step": 64850, "train_speed(iter/s)": 0.411913 }, { "acc": 0.93748379, "epoch": 1.7560176535889314, "grad_norm": 16.083681106567383, "learning_rate": 6.403207750506778e-06, "loss": 0.36432128, "memory(GiB)": 34.88, "step": 64855, "train_speed(iter/s)": 0.411915 }, { "acc": 0.91793156, "epoch": 1.7561530338721467, "grad_norm": 5.970615386962891, "learning_rate": 6.402670653671024e-06, "loss": 0.40640717, "memory(GiB)": 34.88, "step": 64860, "train_speed(iter/s)": 0.411916 }, { "acc": 0.89205809, "epoch": 1.7562884141553625, "grad_norm": 11.656224250793457, "learning_rate": 6.402133539270328e-06, "loss": 0.62670403, "memory(GiB)": 34.88, "step": 64865, "train_speed(iter/s)": 0.411918 }, { "acc": 0.90229454, "epoch": 1.7564237944385779, "grad_norm": 8.246770858764648, "learning_rate": 6.401596407311418e-06, "loss": 0.50816908, "memory(GiB)": 34.88, "step": 64870, "train_speed(iter/s)": 0.41192 }, { "acc": 0.91625748, "epoch": 1.7565591747217937, "grad_norm": 5.039968967437744, "learning_rate": 6.401059257801021e-06, "loss": 0.37520814, "memory(GiB)": 34.88, "step": 64875, "train_speed(iter/s)": 0.411921 }, { "acc": 0.90738726, "epoch": 1.756694555005009, "grad_norm": 7.8632683753967285, "learning_rate": 6.400522090745869e-06, "loss": 0.52064815, "memory(GiB)": 34.88, "step": 64880, "train_speed(iter/s)": 0.411923 }, { "acc": 0.91867104, "epoch": 1.7568299352882246, "grad_norm": 14.286227226257324, "learning_rate": 6.399984906152688e-06, "loss": 0.41919503, "memory(GiB)": 34.88, "step": 64885, "train_speed(iter/s)": 0.411925 }, { "acc": 0.91503601, "epoch": 1.7569653155714402, "grad_norm": 19.17280387878418, "learning_rate": 6.399447704028209e-06, "loss": 0.52336273, "memory(GiB)": 34.88, "step": 64890, "train_speed(iter/s)": 0.411926 }, { "acc": 0.89556961, "epoch": 1.7571006958546558, "grad_norm": 8.001901626586914, "learning_rate": 6.398910484379159e-06, "loss": 0.6134294, "memory(GiB)": 34.88, "step": 64895, "train_speed(iter/s)": 0.411928 }, { "acc": 0.90842342, "epoch": 1.7572360761378714, "grad_norm": 10.186614036560059, "learning_rate": 6.398373247212273e-06, "loss": 0.52303457, "memory(GiB)": 34.88, "step": 64900, "train_speed(iter/s)": 0.41193 }, { "acc": 0.89412489, "epoch": 1.7573714564210867, "grad_norm": 6.928090572357178, "learning_rate": 6.397835992534275e-06, "loss": 0.53755932, "memory(GiB)": 34.88, "step": 64905, "train_speed(iter/s)": 0.411931 }, { "acc": 0.90465336, "epoch": 1.7575068367043025, "grad_norm": 10.39950942993164, "learning_rate": 6.3972987203519e-06, "loss": 0.49166965, "memory(GiB)": 34.88, "step": 64910, "train_speed(iter/s)": 0.411933 }, { "acc": 0.89041739, "epoch": 1.7576422169875179, "grad_norm": 14.776330947875977, "learning_rate": 6.396761430671876e-06, "loss": 0.65722847, "memory(GiB)": 34.88, "step": 64915, "train_speed(iter/s)": 0.411935 }, { "acc": 0.88014641, "epoch": 1.7577775972707335, "grad_norm": 12.137866973876953, "learning_rate": 6.396224123500933e-06, "loss": 0.71794405, "memory(GiB)": 34.88, "step": 64920, "train_speed(iter/s)": 0.411936 }, { "acc": 0.91493187, "epoch": 1.757912977553949, "grad_norm": 15.279716491699219, "learning_rate": 6.395686798845804e-06, "loss": 0.47616634, "memory(GiB)": 34.88, "step": 64925, "train_speed(iter/s)": 0.411938 }, { "acc": 0.90678263, "epoch": 1.7580483578371646, "grad_norm": 10.94863510131836, "learning_rate": 6.395149456713217e-06, "loss": 0.51402235, "memory(GiB)": 34.88, "step": 64930, "train_speed(iter/s)": 0.41194 }, { "acc": 0.90521336, "epoch": 1.7581837381203802, "grad_norm": 6.453639507293701, "learning_rate": 6.394612097109907e-06, "loss": 0.55311475, "memory(GiB)": 34.88, "step": 64935, "train_speed(iter/s)": 0.411941 }, { "acc": 0.91068134, "epoch": 1.7583191184035956, "grad_norm": 14.218395233154297, "learning_rate": 6.3940747200426025e-06, "loss": 0.47170725, "memory(GiB)": 34.88, "step": 64940, "train_speed(iter/s)": 0.411943 }, { "acc": 0.90421581, "epoch": 1.7584544986868114, "grad_norm": 24.451622009277344, "learning_rate": 6.3935373255180365e-06, "loss": 0.46593885, "memory(GiB)": 34.88, "step": 64945, "train_speed(iter/s)": 0.411944 }, { "acc": 0.89254026, "epoch": 1.7585898789700267, "grad_norm": 6.939149856567383, "learning_rate": 6.392999913542941e-06, "loss": 0.54631472, "memory(GiB)": 34.88, "step": 64950, "train_speed(iter/s)": 0.411946 }, { "acc": 0.90749073, "epoch": 1.7587252592532425, "grad_norm": 13.845427513122559, "learning_rate": 6.392462484124047e-06, "loss": 0.54175329, "memory(GiB)": 34.88, "step": 64955, "train_speed(iter/s)": 0.411947 }, { "acc": 0.92192078, "epoch": 1.7588606395364579, "grad_norm": 5.746909141540527, "learning_rate": 6.391925037268088e-06, "loss": 0.3832062, "memory(GiB)": 34.88, "step": 64960, "train_speed(iter/s)": 0.411949 }, { "acc": 0.90533447, "epoch": 1.7589960198196735, "grad_norm": 8.883408546447754, "learning_rate": 6.3913875729817965e-06, "loss": 0.54685526, "memory(GiB)": 34.88, "step": 64965, "train_speed(iter/s)": 0.411951 }, { "acc": 0.91095905, "epoch": 1.759131400102889, "grad_norm": 7.237437725067139, "learning_rate": 6.3908500912719065e-06, "loss": 0.46815181, "memory(GiB)": 34.88, "step": 64970, "train_speed(iter/s)": 0.411952 }, { "acc": 0.89914951, "epoch": 1.7592667803861046, "grad_norm": 8.845827102661133, "learning_rate": 6.390312592145147e-06, "loss": 0.72584128, "memory(GiB)": 34.88, "step": 64975, "train_speed(iter/s)": 0.411954 }, { "acc": 0.91254826, "epoch": 1.7594021606693202, "grad_norm": 10.78870964050293, "learning_rate": 6.389775075608256e-06, "loss": 0.46723213, "memory(GiB)": 34.88, "step": 64980, "train_speed(iter/s)": 0.411956 }, { "acc": 0.905867, "epoch": 1.7595375409525356, "grad_norm": 10.069976806640625, "learning_rate": 6.389237541667962e-06, "loss": 0.48771515, "memory(GiB)": 34.88, "step": 64985, "train_speed(iter/s)": 0.411957 }, { "acc": 0.91138878, "epoch": 1.7596729212357514, "grad_norm": 3.5240914821624756, "learning_rate": 6.388699990331003e-06, "loss": 0.55307727, "memory(GiB)": 34.88, "step": 64990, "train_speed(iter/s)": 0.411959 }, { "acc": 0.91003551, "epoch": 1.7598083015189667, "grad_norm": 5.331108093261719, "learning_rate": 6.3881624216041116e-06, "loss": 0.54266958, "memory(GiB)": 34.88, "step": 64995, "train_speed(iter/s)": 0.411961 }, { "acc": 0.90419788, "epoch": 1.7599436818021823, "grad_norm": 5.585247993469238, "learning_rate": 6.387624835494022e-06, "loss": 0.52118464, "memory(GiB)": 34.88, "step": 65000, "train_speed(iter/s)": 0.411963 }, { "acc": 0.89550762, "epoch": 1.7600790620853979, "grad_norm": 25.604759216308594, "learning_rate": 6.387087232007469e-06, "loss": 0.53381987, "memory(GiB)": 34.88, "step": 65005, "train_speed(iter/s)": 0.411964 }, { "acc": 0.90554237, "epoch": 1.7602144423686135, "grad_norm": 4.027730941772461, "learning_rate": 6.386549611151183e-06, "loss": 0.4772995, "memory(GiB)": 34.88, "step": 65010, "train_speed(iter/s)": 0.411966 }, { "acc": 0.92591543, "epoch": 1.760349822651829, "grad_norm": 8.712421417236328, "learning_rate": 6.386011972931903e-06, "loss": 0.38063221, "memory(GiB)": 34.88, "step": 65015, "train_speed(iter/s)": 0.411968 }, { "acc": 0.89878025, "epoch": 1.7604852029350444, "grad_norm": 6.323354244232178, "learning_rate": 6.385474317356364e-06, "loss": 0.54349656, "memory(GiB)": 34.88, "step": 65020, "train_speed(iter/s)": 0.411969 }, { "acc": 0.92853661, "epoch": 1.7606205832182602, "grad_norm": 5.63271427154541, "learning_rate": 6.384936644431301e-06, "loss": 0.31403606, "memory(GiB)": 34.88, "step": 65025, "train_speed(iter/s)": 0.411971 }, { "acc": 0.91902189, "epoch": 1.7607559635014756, "grad_norm": 10.70704174041748, "learning_rate": 6.384398954163447e-06, "loss": 0.42244339, "memory(GiB)": 34.88, "step": 65030, "train_speed(iter/s)": 0.411972 }, { "acc": 0.91275978, "epoch": 1.7608913437846911, "grad_norm": 7.645967960357666, "learning_rate": 6.383861246559539e-06, "loss": 0.46882019, "memory(GiB)": 34.88, "step": 65035, "train_speed(iter/s)": 0.411974 }, { "acc": 0.91371756, "epoch": 1.7610267240679067, "grad_norm": 8.965410232543945, "learning_rate": 6.383323521626317e-06, "loss": 0.40523262, "memory(GiB)": 34.88, "step": 65040, "train_speed(iter/s)": 0.411976 }, { "acc": 0.91342411, "epoch": 1.7611621043511223, "grad_norm": 4.3355712890625, "learning_rate": 6.3827857793705095e-06, "loss": 0.37520428, "memory(GiB)": 34.88, "step": 65045, "train_speed(iter/s)": 0.411977 }, { "acc": 0.89767656, "epoch": 1.7612974846343379, "grad_norm": 14.0785551071167, "learning_rate": 6.3822480197988566e-06, "loss": 0.6299119, "memory(GiB)": 34.88, "step": 65050, "train_speed(iter/s)": 0.411979 }, { "acc": 0.91426859, "epoch": 1.7614328649175532, "grad_norm": 10.31248950958252, "learning_rate": 6.381710242918095e-06, "loss": 0.45656381, "memory(GiB)": 34.88, "step": 65055, "train_speed(iter/s)": 0.411981 }, { "acc": 0.91473608, "epoch": 1.761568245200769, "grad_norm": 14.938349723815918, "learning_rate": 6.381172448734963e-06, "loss": 0.44926729, "memory(GiB)": 34.88, "step": 65060, "train_speed(iter/s)": 0.411982 }, { "acc": 0.91587639, "epoch": 1.7617036254839844, "grad_norm": 6.0823798179626465, "learning_rate": 6.3806346372561924e-06, "loss": 0.39217539, "memory(GiB)": 34.88, "step": 65065, "train_speed(iter/s)": 0.411984 }, { "acc": 0.89927549, "epoch": 1.7618390057672002, "grad_norm": 14.57414436340332, "learning_rate": 6.380096808488523e-06, "loss": 0.56134834, "memory(GiB)": 34.88, "step": 65070, "train_speed(iter/s)": 0.411985 }, { "acc": 0.90091667, "epoch": 1.7619743860504156, "grad_norm": 9.188592910766602, "learning_rate": 6.379558962438695e-06, "loss": 0.50512161, "memory(GiB)": 34.88, "step": 65075, "train_speed(iter/s)": 0.411987 }, { "acc": 0.91724806, "epoch": 1.7621097663336311, "grad_norm": 8.156005859375, "learning_rate": 6.379021099113444e-06, "loss": 0.46274681, "memory(GiB)": 34.88, "step": 65080, "train_speed(iter/s)": 0.411989 }, { "acc": 0.91478186, "epoch": 1.7622451466168467, "grad_norm": 8.142130851745605, "learning_rate": 6.378483218519506e-06, "loss": 0.45352736, "memory(GiB)": 34.88, "step": 65085, "train_speed(iter/s)": 0.411991 }, { "acc": 0.91934423, "epoch": 1.7623805269000623, "grad_norm": 11.794678688049316, "learning_rate": 6.377945320663621e-06, "loss": 0.5211256, "memory(GiB)": 34.88, "step": 65090, "train_speed(iter/s)": 0.411992 }, { "acc": 0.90613327, "epoch": 1.7625159071832779, "grad_norm": 8.21091365814209, "learning_rate": 6.377407405552528e-06, "loss": 0.58018885, "memory(GiB)": 34.88, "step": 65095, "train_speed(iter/s)": 0.411994 }, { "acc": 0.92120419, "epoch": 1.7626512874664932, "grad_norm": 13.705713272094727, "learning_rate": 6.376869473192962e-06, "loss": 0.41093364, "memory(GiB)": 34.88, "step": 65100, "train_speed(iter/s)": 0.411995 }, { "acc": 0.91377058, "epoch": 1.762786667749709, "grad_norm": 6.964674949645996, "learning_rate": 6.376331523591666e-06, "loss": 0.51331625, "memory(GiB)": 34.88, "step": 65105, "train_speed(iter/s)": 0.411997 }, { "acc": 0.89858704, "epoch": 1.7629220480329244, "grad_norm": 13.924844741821289, "learning_rate": 6.375793556755374e-06, "loss": 0.520574, "memory(GiB)": 34.88, "step": 65110, "train_speed(iter/s)": 0.411999 }, { "acc": 0.90598459, "epoch": 1.76305742831614, "grad_norm": 8.288764953613281, "learning_rate": 6.375255572690828e-06, "loss": 0.45848465, "memory(GiB)": 34.88, "step": 65115, "train_speed(iter/s)": 0.412001 }, { "acc": 0.92832108, "epoch": 1.7631928085993556, "grad_norm": 5.660318851470947, "learning_rate": 6.3747175714047695e-06, "loss": 0.39646826, "memory(GiB)": 34.88, "step": 65120, "train_speed(iter/s)": 0.412002 }, { "acc": 0.8824501, "epoch": 1.7633281888825711, "grad_norm": 34.7630615234375, "learning_rate": 6.374179552903934e-06, "loss": 0.78281088, "memory(GiB)": 34.88, "step": 65125, "train_speed(iter/s)": 0.412004 }, { "acc": 0.92360458, "epoch": 1.7634635691657867, "grad_norm": 7.596765518188477, "learning_rate": 6.373641517195064e-06, "loss": 0.41595345, "memory(GiB)": 34.88, "step": 65130, "train_speed(iter/s)": 0.412005 }, { "acc": 0.91695385, "epoch": 1.763598949449002, "grad_norm": 7.90966796875, "learning_rate": 6.373103464284897e-06, "loss": 0.47278533, "memory(GiB)": 34.88, "step": 65135, "train_speed(iter/s)": 0.412007 }, { "acc": 0.91470509, "epoch": 1.7637343297322179, "grad_norm": 7.650386333465576, "learning_rate": 6.372565394180174e-06, "loss": 0.50067358, "memory(GiB)": 34.88, "step": 65140, "train_speed(iter/s)": 0.412009 }, { "acc": 0.90991879, "epoch": 1.7638697100154332, "grad_norm": 7.3782429695129395, "learning_rate": 6.372027306887637e-06, "loss": 0.48516707, "memory(GiB)": 34.88, "step": 65145, "train_speed(iter/s)": 0.41201 }, { "acc": 0.91427841, "epoch": 1.764005090298649, "grad_norm": 4.449976921081543, "learning_rate": 6.371489202414025e-06, "loss": 0.44197726, "memory(GiB)": 34.88, "step": 65150, "train_speed(iter/s)": 0.412012 }, { "acc": 0.91087275, "epoch": 1.7641404705818644, "grad_norm": 12.062087059020996, "learning_rate": 6.370951080766079e-06, "loss": 0.50214481, "memory(GiB)": 34.88, "step": 65155, "train_speed(iter/s)": 0.412013 }, { "acc": 0.91008596, "epoch": 1.76427585086508, "grad_norm": 5.914727210998535, "learning_rate": 6.37041294195054e-06, "loss": 0.48520565, "memory(GiB)": 34.88, "step": 65160, "train_speed(iter/s)": 0.412015 }, { "acc": 0.8989583, "epoch": 1.7644112311482956, "grad_norm": 14.51724624633789, "learning_rate": 6.3698747859741515e-06, "loss": 0.54937286, "memory(GiB)": 34.88, "step": 65165, "train_speed(iter/s)": 0.412017 }, { "acc": 0.89581614, "epoch": 1.7645466114315111, "grad_norm": 8.001190185546875, "learning_rate": 6.369336612843652e-06, "loss": 0.59818668, "memory(GiB)": 34.88, "step": 65170, "train_speed(iter/s)": 0.412018 }, { "acc": 0.91503506, "epoch": 1.7646819917147267, "grad_norm": 7.226108074188232, "learning_rate": 6.368798422565784e-06, "loss": 0.44108768, "memory(GiB)": 34.88, "step": 65175, "train_speed(iter/s)": 0.41202 }, { "acc": 0.915308, "epoch": 1.764817371997942, "grad_norm": 6.259718894958496, "learning_rate": 6.3682602151472896e-06, "loss": 0.43843908, "memory(GiB)": 34.88, "step": 65180, "train_speed(iter/s)": 0.412021 }, { "acc": 0.92065811, "epoch": 1.7649527522811579, "grad_norm": 8.733979225158691, "learning_rate": 6.36772199059491e-06, "loss": 0.39727912, "memory(GiB)": 34.88, "step": 65185, "train_speed(iter/s)": 0.412023 }, { "acc": 0.90882854, "epoch": 1.7650881325643732, "grad_norm": 6.44539737701416, "learning_rate": 6.367183748915389e-06, "loss": 0.46607223, "memory(GiB)": 34.88, "step": 65190, "train_speed(iter/s)": 0.412025 }, { "acc": 0.921591, "epoch": 1.7652235128475888, "grad_norm": 11.651228904724121, "learning_rate": 6.366645490115467e-06, "loss": 0.51893578, "memory(GiB)": 34.88, "step": 65195, "train_speed(iter/s)": 0.412026 }, { "acc": 0.91751547, "epoch": 1.7653588931308044, "grad_norm": 6.191586971282959, "learning_rate": 6.3661072142018895e-06, "loss": 0.47512379, "memory(GiB)": 34.88, "step": 65200, "train_speed(iter/s)": 0.412028 }, { "acc": 0.8882597, "epoch": 1.76549427341402, "grad_norm": 8.160491943359375, "learning_rate": 6.3655689211814e-06, "loss": 0.51512842, "memory(GiB)": 34.88, "step": 65205, "train_speed(iter/s)": 0.41203 }, { "acc": 0.91167088, "epoch": 1.7656296536972356, "grad_norm": 8.555785179138184, "learning_rate": 6.365030611060739e-06, "loss": 0.47038584, "memory(GiB)": 34.88, "step": 65210, "train_speed(iter/s)": 0.412031 }, { "acc": 0.91932545, "epoch": 1.765765033980451, "grad_norm": 12.896744728088379, "learning_rate": 6.364492283846649e-06, "loss": 0.43563929, "memory(GiB)": 34.88, "step": 65215, "train_speed(iter/s)": 0.412033 }, { "acc": 0.90769062, "epoch": 1.7659004142636667, "grad_norm": 9.439313888549805, "learning_rate": 6.3639539395458774e-06, "loss": 0.4292778, "memory(GiB)": 34.88, "step": 65220, "train_speed(iter/s)": 0.412035 }, { "acc": 0.91790504, "epoch": 1.766035794546882, "grad_norm": 22.877389907836914, "learning_rate": 6.363415578165165e-06, "loss": 0.40271597, "memory(GiB)": 34.88, "step": 65225, "train_speed(iter/s)": 0.412037 }, { "acc": 0.90319481, "epoch": 1.766171174830098, "grad_norm": 6.204105854034424, "learning_rate": 6.3628771997112575e-06, "loss": 0.45666771, "memory(GiB)": 34.88, "step": 65230, "train_speed(iter/s)": 0.412038 }, { "acc": 0.9164854, "epoch": 1.7663065551133132, "grad_norm": 6.146779537200928, "learning_rate": 6.362338804190897e-06, "loss": 0.36053529, "memory(GiB)": 34.88, "step": 65235, "train_speed(iter/s)": 0.41204 }, { "acc": 0.91156693, "epoch": 1.7664419353965288, "grad_norm": 7.382913112640381, "learning_rate": 6.36180039161083e-06, "loss": 0.48735752, "memory(GiB)": 34.88, "step": 65240, "train_speed(iter/s)": 0.412041 }, { "acc": 0.92193499, "epoch": 1.7665773156797444, "grad_norm": 6.940350532531738, "learning_rate": 6.361261961977801e-06, "loss": 0.50427761, "memory(GiB)": 34.88, "step": 65245, "train_speed(iter/s)": 0.412043 }, { "acc": 0.91797705, "epoch": 1.76671269596296, "grad_norm": 11.582119941711426, "learning_rate": 6.360723515298557e-06, "loss": 0.49956074, "memory(GiB)": 34.88, "step": 65250, "train_speed(iter/s)": 0.412045 }, { "acc": 0.89689474, "epoch": 1.7668480762461756, "grad_norm": 7.237959861755371, "learning_rate": 6.360185051579837e-06, "loss": 0.59648695, "memory(GiB)": 34.88, "step": 65255, "train_speed(iter/s)": 0.412046 }, { "acc": 0.9044199, "epoch": 1.766983456529391, "grad_norm": 21.976089477539062, "learning_rate": 6.359646570828392e-06, "loss": 0.561971, "memory(GiB)": 34.88, "step": 65260, "train_speed(iter/s)": 0.412048 }, { "acc": 0.8929944, "epoch": 1.7671188368126067, "grad_norm": 13.717655181884766, "learning_rate": 6.359108073050965e-06, "loss": 0.62966866, "memory(GiB)": 34.88, "step": 65265, "train_speed(iter/s)": 0.412049 }, { "acc": 0.92887735, "epoch": 1.767254217095822, "grad_norm": 8.91375732421875, "learning_rate": 6.358569558254302e-06, "loss": 0.31647856, "memory(GiB)": 34.88, "step": 65270, "train_speed(iter/s)": 0.412051 }, { "acc": 0.90232086, "epoch": 1.7673895973790377, "grad_norm": 4.81467342376709, "learning_rate": 6.3580310264451485e-06, "loss": 0.56410232, "memory(GiB)": 34.88, "step": 65275, "train_speed(iter/s)": 0.412053 }, { "acc": 0.91161699, "epoch": 1.7675249776622532, "grad_norm": 11.480864524841309, "learning_rate": 6.357492477630253e-06, "loss": 0.4966856, "memory(GiB)": 34.88, "step": 65280, "train_speed(iter/s)": 0.412054 }, { "acc": 0.89844122, "epoch": 1.7676603579454688, "grad_norm": 7.014297962188721, "learning_rate": 6.356953911816357e-06, "loss": 0.56878386, "memory(GiB)": 34.88, "step": 65285, "train_speed(iter/s)": 0.412056 }, { "acc": 0.90377941, "epoch": 1.7677957382286844, "grad_norm": 8.787972450256348, "learning_rate": 6.356415329010216e-06, "loss": 0.54193149, "memory(GiB)": 34.88, "step": 65290, "train_speed(iter/s)": 0.412057 }, { "acc": 0.9083765, "epoch": 1.7679311185118998, "grad_norm": 35.095664978027344, "learning_rate": 6.355876729218566e-06, "loss": 0.52861829, "memory(GiB)": 34.88, "step": 65295, "train_speed(iter/s)": 0.412059 }, { "acc": 0.89142838, "epoch": 1.7680664987951156, "grad_norm": 6.321873664855957, "learning_rate": 6.355338112448159e-06, "loss": 0.58289256, "memory(GiB)": 34.88, "step": 65300, "train_speed(iter/s)": 0.412061 }, { "acc": 0.93112907, "epoch": 1.768201879078331, "grad_norm": 5.237868785858154, "learning_rate": 6.354799478705747e-06, "loss": 0.35623059, "memory(GiB)": 34.88, "step": 65305, "train_speed(iter/s)": 0.412062 }, { "acc": 0.92854652, "epoch": 1.7683372593615467, "grad_norm": 6.887019634246826, "learning_rate": 6.354260827998069e-06, "loss": 0.37564335, "memory(GiB)": 34.88, "step": 65310, "train_speed(iter/s)": 0.412064 }, { "acc": 0.9147109, "epoch": 1.768472639644762, "grad_norm": 10.49428653717041, "learning_rate": 6.353722160331877e-06, "loss": 0.52745843, "memory(GiB)": 34.88, "step": 65315, "train_speed(iter/s)": 0.412066 }, { "acc": 0.91241989, "epoch": 1.7686080199279777, "grad_norm": 4.355561256408691, "learning_rate": 6.353183475713919e-06, "loss": 0.48878303, "memory(GiB)": 34.88, "step": 65320, "train_speed(iter/s)": 0.412068 }, { "acc": 0.90662231, "epoch": 1.7687434002111933, "grad_norm": 6.00462007522583, "learning_rate": 6.352644774150944e-06, "loss": 0.41972475, "memory(GiB)": 34.88, "step": 65325, "train_speed(iter/s)": 0.412069 }, { "acc": 0.89420977, "epoch": 1.7688787804944088, "grad_norm": 5.302226543426514, "learning_rate": 6.3521060556496965e-06, "loss": 0.66289773, "memory(GiB)": 34.88, "step": 65330, "train_speed(iter/s)": 0.412071 }, { "acc": 0.90171452, "epoch": 1.7690141607776244, "grad_norm": 13.91748332977295, "learning_rate": 6.351567320216928e-06, "loss": 0.58778629, "memory(GiB)": 34.88, "step": 65335, "train_speed(iter/s)": 0.412073 }, { "acc": 0.92049103, "epoch": 1.7691495410608398, "grad_norm": 6.860599517822266, "learning_rate": 6.351028567859387e-06, "loss": 0.43527455, "memory(GiB)": 34.88, "step": 65340, "train_speed(iter/s)": 0.412074 }, { "acc": 0.92830925, "epoch": 1.7692849213440556, "grad_norm": 4.841795444488525, "learning_rate": 6.350489798583821e-06, "loss": 0.37330248, "memory(GiB)": 34.88, "step": 65345, "train_speed(iter/s)": 0.412076 }, { "acc": 0.92780609, "epoch": 1.769420301627271, "grad_norm": 6.373537540435791, "learning_rate": 6.3499510123969806e-06, "loss": 0.38179731, "memory(GiB)": 34.88, "step": 65350, "train_speed(iter/s)": 0.412077 }, { "acc": 0.89380312, "epoch": 1.7695556819104865, "grad_norm": 6.870961666107178, "learning_rate": 6.349412209305613e-06, "loss": 0.58219786, "memory(GiB)": 34.88, "step": 65355, "train_speed(iter/s)": 0.412079 }, { "acc": 0.90341768, "epoch": 1.769691062193702, "grad_norm": 7.344071388244629, "learning_rate": 6.348873389316471e-06, "loss": 0.53140783, "memory(GiB)": 34.88, "step": 65360, "train_speed(iter/s)": 0.41208 }, { "acc": 0.88521099, "epoch": 1.7698264424769177, "grad_norm": 10.828519821166992, "learning_rate": 6.348334552436301e-06, "loss": 0.70086298, "memory(GiB)": 34.88, "step": 65365, "train_speed(iter/s)": 0.412082 }, { "acc": 0.92159033, "epoch": 1.7699618227601333, "grad_norm": 8.010875701904297, "learning_rate": 6.347795698671858e-06, "loss": 0.40741882, "memory(GiB)": 34.88, "step": 65370, "train_speed(iter/s)": 0.412084 }, { "acc": 0.92742929, "epoch": 1.7700972030433486, "grad_norm": 3.1010453701019287, "learning_rate": 6.3472568280298874e-06, "loss": 0.34730797, "memory(GiB)": 34.88, "step": 65375, "train_speed(iter/s)": 0.412085 }, { "acc": 0.89347372, "epoch": 1.7702325833265644, "grad_norm": 9.681772232055664, "learning_rate": 6.346717940517141e-06, "loss": 0.55180178, "memory(GiB)": 34.88, "step": 65380, "train_speed(iter/s)": 0.412087 }, { "acc": 0.92601871, "epoch": 1.7703679636097798, "grad_norm": 9.541473388671875, "learning_rate": 6.3461790361403695e-06, "loss": 0.41831584, "memory(GiB)": 34.88, "step": 65385, "train_speed(iter/s)": 0.412089 }, { "acc": 0.91190033, "epoch": 1.7705033438929956, "grad_norm": 4.786684513092041, "learning_rate": 6.345640114906323e-06, "loss": 0.51347532, "memory(GiB)": 34.88, "step": 65390, "train_speed(iter/s)": 0.41209 }, { "acc": 0.8848362, "epoch": 1.770638724176211, "grad_norm": 8.731876373291016, "learning_rate": 6.3451011768217554e-06, "loss": 0.63100305, "memory(GiB)": 34.88, "step": 65395, "train_speed(iter/s)": 0.412092 }, { "acc": 0.91836853, "epoch": 1.7707741044594265, "grad_norm": 3.9100682735443115, "learning_rate": 6.344562221893413e-06, "loss": 0.38427091, "memory(GiB)": 34.88, "step": 65400, "train_speed(iter/s)": 0.412093 }, { "acc": 0.91003819, "epoch": 1.770909484742642, "grad_norm": 9.788320541381836, "learning_rate": 6.344023250128053e-06, "loss": 0.61423702, "memory(GiB)": 34.88, "step": 65405, "train_speed(iter/s)": 0.412094 }, { "acc": 0.90445728, "epoch": 1.7710448650258577, "grad_norm": 4.423884391784668, "learning_rate": 6.343484261532422e-06, "loss": 0.53312535, "memory(GiB)": 34.88, "step": 65410, "train_speed(iter/s)": 0.412095 }, { "acc": 0.88953457, "epoch": 1.7711802453090733, "grad_norm": 12.342906951904297, "learning_rate": 6.342945256113277e-06, "loss": 0.64208398, "memory(GiB)": 34.88, "step": 65415, "train_speed(iter/s)": 0.412097 }, { "acc": 0.91202765, "epoch": 1.7713156255922886, "grad_norm": 9.738125801086426, "learning_rate": 6.342406233877365e-06, "loss": 0.48914347, "memory(GiB)": 34.88, "step": 65420, "train_speed(iter/s)": 0.412099 }, { "acc": 0.91234045, "epoch": 1.7714510058755044, "grad_norm": 4.165002346038818, "learning_rate": 6.341867194831441e-06, "loss": 0.47435913, "memory(GiB)": 34.88, "step": 65425, "train_speed(iter/s)": 0.4121 }, { "acc": 0.89342709, "epoch": 1.7715863861587198, "grad_norm": 8.08077621459961, "learning_rate": 6.3413281389822575e-06, "loss": 0.58521671, "memory(GiB)": 34.88, "step": 65430, "train_speed(iter/s)": 0.412102 }, { "acc": 0.91768732, "epoch": 1.7717217664419354, "grad_norm": 5.00364351272583, "learning_rate": 6.340789066336567e-06, "loss": 0.43441596, "memory(GiB)": 34.88, "step": 65435, "train_speed(iter/s)": 0.412104 }, { "acc": 0.89856815, "epoch": 1.771857146725151, "grad_norm": 7.274097919464111, "learning_rate": 6.340249976901122e-06, "loss": 0.55604115, "memory(GiB)": 34.88, "step": 65440, "train_speed(iter/s)": 0.412105 }, { "acc": 0.91975708, "epoch": 1.7719925270083665, "grad_norm": 5.0706892013549805, "learning_rate": 6.339710870682676e-06, "loss": 0.45940094, "memory(GiB)": 34.88, "step": 65445, "train_speed(iter/s)": 0.412107 }, { "acc": 0.912992, "epoch": 1.772127907291582, "grad_norm": 19.406837463378906, "learning_rate": 6.339171747687981e-06, "loss": 0.41446633, "memory(GiB)": 34.88, "step": 65450, "train_speed(iter/s)": 0.412108 }, { "acc": 0.91369667, "epoch": 1.7722632875747975, "grad_norm": 7.142813205718994, "learning_rate": 6.338632607923794e-06, "loss": 0.43196216, "memory(GiB)": 34.88, "step": 65455, "train_speed(iter/s)": 0.41211 }, { "acc": 0.90994272, "epoch": 1.7723986678580133, "grad_norm": 7.300795555114746, "learning_rate": 6.338093451396866e-06, "loss": 0.46695309, "memory(GiB)": 34.88, "step": 65460, "train_speed(iter/s)": 0.412112 }, { "acc": 0.89536057, "epoch": 1.7725340481412286, "grad_norm": 6.2810211181640625, "learning_rate": 6.337554278113952e-06, "loss": 0.63344841, "memory(GiB)": 34.88, "step": 65465, "train_speed(iter/s)": 0.412113 }, { "acc": 0.90589161, "epoch": 1.7726694284244444, "grad_norm": 10.008760452270508, "learning_rate": 6.337015088081804e-06, "loss": 0.55528693, "memory(GiB)": 34.88, "step": 65470, "train_speed(iter/s)": 0.412115 }, { "acc": 0.91053562, "epoch": 1.7728048087076598, "grad_norm": 8.769166946411133, "learning_rate": 6.33647588130718e-06, "loss": 0.44268103, "memory(GiB)": 34.88, "step": 65475, "train_speed(iter/s)": 0.412117 }, { "acc": 0.89893141, "epoch": 1.7729401889908754, "grad_norm": 7.676334381103516, "learning_rate": 6.335936657796832e-06, "loss": 0.59452176, "memory(GiB)": 34.88, "step": 65480, "train_speed(iter/s)": 0.412118 }, { "acc": 0.88432846, "epoch": 1.773075569274091, "grad_norm": 9.011552810668945, "learning_rate": 6.335397417557516e-06, "loss": 0.66472201, "memory(GiB)": 34.88, "step": 65485, "train_speed(iter/s)": 0.41212 }, { "acc": 0.91303825, "epoch": 1.7732109495573065, "grad_norm": 13.576851844787598, "learning_rate": 6.334858160595987e-06, "loss": 0.49220915, "memory(GiB)": 34.88, "step": 65490, "train_speed(iter/s)": 0.412122 }, { "acc": 0.91983871, "epoch": 1.773346329840522, "grad_norm": 8.303874015808105, "learning_rate": 6.334318886918998e-06, "loss": 0.46392193, "memory(GiB)": 34.88, "step": 65495, "train_speed(iter/s)": 0.412124 }, { "acc": 0.89050055, "epoch": 1.7734817101237375, "grad_norm": 7.825855731964111, "learning_rate": 6.33377959653331e-06, "loss": 0.66436205, "memory(GiB)": 34.88, "step": 65500, "train_speed(iter/s)": 0.412125 }, { "acc": 0.91619387, "epoch": 1.7736170904069533, "grad_norm": 22.679149627685547, "learning_rate": 6.3332402894456746e-06, "loss": 0.4704576, "memory(GiB)": 34.88, "step": 65505, "train_speed(iter/s)": 0.412126 }, { "acc": 0.89435015, "epoch": 1.7737524706901686, "grad_norm": 9.559917449951172, "learning_rate": 6.332700965662847e-06, "loss": 0.58447714, "memory(GiB)": 34.88, "step": 65510, "train_speed(iter/s)": 0.412128 }, { "acc": 0.8792799, "epoch": 1.7738878509733842, "grad_norm": 17.652145385742188, "learning_rate": 6.332161625191584e-06, "loss": 0.74227953, "memory(GiB)": 34.88, "step": 65515, "train_speed(iter/s)": 0.41213 }, { "acc": 0.92270117, "epoch": 1.7740232312565998, "grad_norm": 6.740024089813232, "learning_rate": 6.331622268038643e-06, "loss": 0.38182995, "memory(GiB)": 34.88, "step": 65520, "train_speed(iter/s)": 0.412132 }, { "acc": 0.91221285, "epoch": 1.7741586115398154, "grad_norm": 5.938122749328613, "learning_rate": 6.33108289421078e-06, "loss": 0.48711596, "memory(GiB)": 34.88, "step": 65525, "train_speed(iter/s)": 0.412133 }, { "acc": 0.91821642, "epoch": 1.774293991823031, "grad_norm": 25.94316291809082, "learning_rate": 6.330543503714752e-06, "loss": 0.49711976, "memory(GiB)": 34.88, "step": 65530, "train_speed(iter/s)": 0.412135 }, { "acc": 0.90491657, "epoch": 1.7744293721062463, "grad_norm": 6.209863662719727, "learning_rate": 6.3300040965573155e-06, "loss": 0.48225484, "memory(GiB)": 34.88, "step": 65535, "train_speed(iter/s)": 0.412137 }, { "acc": 0.91590395, "epoch": 1.774564752389462, "grad_norm": 6.847529888153076, "learning_rate": 6.329464672745227e-06, "loss": 0.37622762, "memory(GiB)": 34.88, "step": 65540, "train_speed(iter/s)": 0.412138 }, { "acc": 0.90735455, "epoch": 1.7747001326726775, "grad_norm": 9.49550724029541, "learning_rate": 6.3289252322852465e-06, "loss": 0.46222649, "memory(GiB)": 34.88, "step": 65545, "train_speed(iter/s)": 0.41214 }, { "acc": 0.92922068, "epoch": 1.7748355129558933, "grad_norm": 8.716080665588379, "learning_rate": 6.328385775184128e-06, "loss": 0.41839218, "memory(GiB)": 34.88, "step": 65550, "train_speed(iter/s)": 0.412142 }, { "acc": 0.87982445, "epoch": 1.7749708932391086, "grad_norm": 13.20444107055664, "learning_rate": 6.327846301448631e-06, "loss": 0.66555758, "memory(GiB)": 34.88, "step": 65555, "train_speed(iter/s)": 0.412143 }, { "acc": 0.91111889, "epoch": 1.7751062735223242, "grad_norm": 10.28702449798584, "learning_rate": 6.327306811085514e-06, "loss": 0.51941843, "memory(GiB)": 34.88, "step": 65560, "train_speed(iter/s)": 0.412145 }, { "acc": 0.90246258, "epoch": 1.7752416538055398, "grad_norm": 9.033327102661133, "learning_rate": 6.326767304101533e-06, "loss": 0.56142511, "memory(GiB)": 34.88, "step": 65565, "train_speed(iter/s)": 0.412146 }, { "acc": 0.91223621, "epoch": 1.7753770340887554, "grad_norm": 5.578620910644531, "learning_rate": 6.3262277805034485e-06, "loss": 0.48564982, "memory(GiB)": 34.88, "step": 65570, "train_speed(iter/s)": 0.412148 }, { "acc": 0.89838753, "epoch": 1.775512414371971, "grad_norm": 6.356534004211426, "learning_rate": 6.325688240298019e-06, "loss": 0.5302434, "memory(GiB)": 34.88, "step": 65575, "train_speed(iter/s)": 0.412149 }, { "acc": 0.90919056, "epoch": 1.7756477946551863, "grad_norm": 8.720515251159668, "learning_rate": 6.325148683492004e-06, "loss": 0.5458272, "memory(GiB)": 34.88, "step": 65580, "train_speed(iter/s)": 0.412151 }, { "acc": 0.91589069, "epoch": 1.775783174938402, "grad_norm": 4.6738505363464355, "learning_rate": 6.3246091100921595e-06, "loss": 0.46036344, "memory(GiB)": 34.88, "step": 65585, "train_speed(iter/s)": 0.412153 }, { "acc": 0.88946152, "epoch": 1.7759185552216175, "grad_norm": 11.40923023223877, "learning_rate": 6.324069520105247e-06, "loss": 0.62529473, "memory(GiB)": 34.88, "step": 65590, "train_speed(iter/s)": 0.412154 }, { "acc": 0.90804853, "epoch": 1.776053935504833, "grad_norm": 6.291545391082764, "learning_rate": 6.323529913538026e-06, "loss": 0.46518259, "memory(GiB)": 34.88, "step": 65595, "train_speed(iter/s)": 0.412156 }, { "acc": 0.90610981, "epoch": 1.7761893157880486, "grad_norm": 6.497136116027832, "learning_rate": 6.322990290397254e-06, "loss": 0.52162604, "memory(GiB)": 34.88, "step": 65600, "train_speed(iter/s)": 0.412157 }, { "acc": 0.91605072, "epoch": 1.7763246960712642, "grad_norm": 5.540160179138184, "learning_rate": 6.3224506506896936e-06, "loss": 0.41525879, "memory(GiB)": 34.88, "step": 65605, "train_speed(iter/s)": 0.412159 }, { "acc": 0.90764074, "epoch": 1.7764600763544798, "grad_norm": 8.382111549377441, "learning_rate": 6.321910994422104e-06, "loss": 0.49999123, "memory(GiB)": 34.88, "step": 65610, "train_speed(iter/s)": 0.412161 }, { "acc": 0.90424862, "epoch": 1.7765954566376951, "grad_norm": 8.825565338134766, "learning_rate": 6.321371321601245e-06, "loss": 0.486695, "memory(GiB)": 34.88, "step": 65615, "train_speed(iter/s)": 0.412162 }, { "acc": 0.92216444, "epoch": 1.776730836920911, "grad_norm": 5.643824100494385, "learning_rate": 6.320831632233876e-06, "loss": 0.4386219, "memory(GiB)": 34.88, "step": 65620, "train_speed(iter/s)": 0.412164 }, { "acc": 0.92137909, "epoch": 1.7768662172041263, "grad_norm": 9.097087860107422, "learning_rate": 6.320291926326762e-06, "loss": 0.47364969, "memory(GiB)": 34.88, "step": 65625, "train_speed(iter/s)": 0.412165 }, { "acc": 0.9109745, "epoch": 1.777001597487342, "grad_norm": 7.148375034332275, "learning_rate": 6.3197522038866576e-06, "loss": 0.50703821, "memory(GiB)": 34.88, "step": 65630, "train_speed(iter/s)": 0.412167 }, { "acc": 0.92264347, "epoch": 1.7771369777705575, "grad_norm": 2.7519145011901855, "learning_rate": 6.319212464920327e-06, "loss": 0.45573454, "memory(GiB)": 34.88, "step": 65635, "train_speed(iter/s)": 0.412169 }, { "acc": 0.90297756, "epoch": 1.777272358053773, "grad_norm": 16.690143585205078, "learning_rate": 6.318672709434533e-06, "loss": 0.54325972, "memory(GiB)": 34.88, "step": 65640, "train_speed(iter/s)": 0.41217 }, { "acc": 0.88878803, "epoch": 1.7774077383369886, "grad_norm": 16.19758415222168, "learning_rate": 6.318132937436035e-06, "loss": 0.61369457, "memory(GiB)": 34.88, "step": 65645, "train_speed(iter/s)": 0.412172 }, { "acc": 0.90085907, "epoch": 1.7775431186202042, "grad_norm": 10.537642478942871, "learning_rate": 6.317593148931594e-06, "loss": 0.60135098, "memory(GiB)": 34.88, "step": 65650, "train_speed(iter/s)": 0.412173 }, { "acc": 0.91841383, "epoch": 1.7776784989034198, "grad_norm": 7.409090518951416, "learning_rate": 6.3170533439279745e-06, "loss": 0.46304464, "memory(GiB)": 34.88, "step": 65655, "train_speed(iter/s)": 0.412175 }, { "acc": 0.90078468, "epoch": 1.7778138791866351, "grad_norm": 23.037166595458984, "learning_rate": 6.316513522431939e-06, "loss": 0.50698481, "memory(GiB)": 34.88, "step": 65660, "train_speed(iter/s)": 0.412176 }, { "acc": 0.92007389, "epoch": 1.777949259469851, "grad_norm": 7.025816917419434, "learning_rate": 6.3159736844502455e-06, "loss": 0.46788559, "memory(GiB)": 34.88, "step": 65665, "train_speed(iter/s)": 0.412178 }, { "acc": 0.91191006, "epoch": 1.7780846397530663, "grad_norm": 6.3681640625, "learning_rate": 6.31543382998966e-06, "loss": 0.47516623, "memory(GiB)": 34.88, "step": 65670, "train_speed(iter/s)": 0.41218 }, { "acc": 0.9183033, "epoch": 1.7782200200362819, "grad_norm": 5.916450023651123, "learning_rate": 6.314893959056946e-06, "loss": 0.39409204, "memory(GiB)": 34.88, "step": 65675, "train_speed(iter/s)": 0.412181 }, { "acc": 0.90763836, "epoch": 1.7783554003194975, "grad_norm": 27.88572883605957, "learning_rate": 6.314354071658863e-06, "loss": 0.48044415, "memory(GiB)": 34.88, "step": 65680, "train_speed(iter/s)": 0.412183 }, { "acc": 0.93213844, "epoch": 1.778490780602713, "grad_norm": 4.254399299621582, "learning_rate": 6.313814167802177e-06, "loss": 0.32299652, "memory(GiB)": 34.88, "step": 65685, "train_speed(iter/s)": 0.412185 }, { "acc": 0.90647688, "epoch": 1.7786261608859286, "grad_norm": 14.70856761932373, "learning_rate": 6.3132742474936504e-06, "loss": 0.52730241, "memory(GiB)": 34.88, "step": 65690, "train_speed(iter/s)": 0.412186 }, { "acc": 0.91884699, "epoch": 1.778761541169144, "grad_norm": 7.96418571472168, "learning_rate": 6.312734310740047e-06, "loss": 0.41276655, "memory(GiB)": 34.88, "step": 65695, "train_speed(iter/s)": 0.412188 }, { "acc": 0.90979271, "epoch": 1.7788969214523598, "grad_norm": 6.091448783874512, "learning_rate": 6.312194357548128e-06, "loss": 0.41449575, "memory(GiB)": 34.88, "step": 65700, "train_speed(iter/s)": 0.41219 }, { "acc": 0.91825676, "epoch": 1.7790323017355751, "grad_norm": 7.062378406524658, "learning_rate": 6.311654387924662e-06, "loss": 0.46626987, "memory(GiB)": 34.88, "step": 65705, "train_speed(iter/s)": 0.412191 }, { "acc": 0.91386318, "epoch": 1.779167682018791, "grad_norm": 8.975159645080566, "learning_rate": 6.311114401876411e-06, "loss": 0.55008335, "memory(GiB)": 34.88, "step": 65710, "train_speed(iter/s)": 0.412193 }, { "acc": 0.89222078, "epoch": 1.7793030623020063, "grad_norm": 7.5157246589660645, "learning_rate": 6.310574399410139e-06, "loss": 0.50551662, "memory(GiB)": 34.88, "step": 65715, "train_speed(iter/s)": 0.412195 }, { "acc": 0.92333269, "epoch": 1.7794384425852219, "grad_norm": 6.018086910247803, "learning_rate": 6.31003438053261e-06, "loss": 0.42541561, "memory(GiB)": 34.88, "step": 65720, "train_speed(iter/s)": 0.412196 }, { "acc": 0.92034912, "epoch": 1.7795738228684375, "grad_norm": 6.8917927742004395, "learning_rate": 6.30949434525059e-06, "loss": 0.40940289, "memory(GiB)": 34.88, "step": 65725, "train_speed(iter/s)": 0.412198 }, { "acc": 0.90312099, "epoch": 1.779709203151653, "grad_norm": 8.5003662109375, "learning_rate": 6.308954293570844e-06, "loss": 0.606075, "memory(GiB)": 34.88, "step": 65730, "train_speed(iter/s)": 0.412199 }, { "acc": 0.93506737, "epoch": 1.7798445834348686, "grad_norm": 3.7123682498931885, "learning_rate": 6.3084142255001355e-06, "loss": 0.30055985, "memory(GiB)": 34.88, "step": 65735, "train_speed(iter/s)": 0.412201 }, { "acc": 0.90190954, "epoch": 1.779979963718084, "grad_norm": 5.082712650299072, "learning_rate": 6.307874141045233e-06, "loss": 0.48933291, "memory(GiB)": 34.88, "step": 65740, "train_speed(iter/s)": 0.412203 }, { "acc": 0.91588421, "epoch": 1.7801153440012998, "grad_norm": 5.104959011077881, "learning_rate": 6.307334040212898e-06, "loss": 0.46598673, "memory(GiB)": 34.88, "step": 65745, "train_speed(iter/s)": 0.412204 }, { "acc": 0.89858112, "epoch": 1.7802507242845151, "grad_norm": 6.289933681488037, "learning_rate": 6.306793923009903e-06, "loss": 0.47624273, "memory(GiB)": 34.88, "step": 65750, "train_speed(iter/s)": 0.412205 }, { "acc": 0.9123457, "epoch": 1.7803861045677307, "grad_norm": 40.02991485595703, "learning_rate": 6.306253789443007e-06, "loss": 0.41880221, "memory(GiB)": 34.88, "step": 65755, "train_speed(iter/s)": 0.412207 }, { "acc": 0.92600117, "epoch": 1.7805214848509463, "grad_norm": 6.479033470153809, "learning_rate": 6.305713639518978e-06, "loss": 0.36789212, "memory(GiB)": 34.88, "step": 65760, "train_speed(iter/s)": 0.412208 }, { "acc": 0.9195672, "epoch": 1.7806568651341619, "grad_norm": 9.857497215270996, "learning_rate": 6.305173473244586e-06, "loss": 0.46149192, "memory(GiB)": 34.88, "step": 65765, "train_speed(iter/s)": 0.41221 }, { "acc": 0.92290764, "epoch": 1.7807922454173775, "grad_norm": 5.489395618438721, "learning_rate": 6.304633290626593e-06, "loss": 0.3544662, "memory(GiB)": 34.88, "step": 65770, "train_speed(iter/s)": 0.412211 }, { "acc": 0.90508614, "epoch": 1.7809276257005928, "grad_norm": 9.2049560546875, "learning_rate": 6.304093091671768e-06, "loss": 0.53170266, "memory(GiB)": 34.88, "step": 65775, "train_speed(iter/s)": 0.412213 }, { "acc": 0.92417564, "epoch": 1.7810630059838086, "grad_norm": 8.243310928344727, "learning_rate": 6.30355287638688e-06, "loss": 0.42693081, "memory(GiB)": 34.88, "step": 65780, "train_speed(iter/s)": 0.412214 }, { "acc": 0.90715084, "epoch": 1.781198386267024, "grad_norm": 24.865737915039062, "learning_rate": 6.303012644778691e-06, "loss": 0.54886379, "memory(GiB)": 34.88, "step": 65785, "train_speed(iter/s)": 0.412216 }, { "acc": 0.92017155, "epoch": 1.7813337665502398, "grad_norm": 7.880832195281982, "learning_rate": 6.302472396853974e-06, "loss": 0.41680751, "memory(GiB)": 34.88, "step": 65790, "train_speed(iter/s)": 0.412217 }, { "acc": 0.91304827, "epoch": 1.7814691468334551, "grad_norm": 8.816835403442383, "learning_rate": 6.301932132619495e-06, "loss": 0.49727221, "memory(GiB)": 34.88, "step": 65795, "train_speed(iter/s)": 0.412219 }, { "acc": 0.90818186, "epoch": 1.7816045271166707, "grad_norm": 10.677103042602539, "learning_rate": 6.301391852082019e-06, "loss": 0.56763177, "memory(GiB)": 34.88, "step": 65800, "train_speed(iter/s)": 0.41222 }, { "acc": 0.90290995, "epoch": 1.7817399073998863, "grad_norm": 9.298436164855957, "learning_rate": 6.3008515552483185e-06, "loss": 0.56115127, "memory(GiB)": 34.88, "step": 65805, "train_speed(iter/s)": 0.412222 }, { "acc": 0.91051579, "epoch": 1.7818752876831019, "grad_norm": 6.395934104919434, "learning_rate": 6.300311242125159e-06, "loss": 0.39736509, "memory(GiB)": 34.88, "step": 65810, "train_speed(iter/s)": 0.412223 }, { "acc": 0.91671925, "epoch": 1.7820106679663175, "grad_norm": 8.513693809509277, "learning_rate": 6.299770912719308e-06, "loss": 0.40924754, "memory(GiB)": 34.88, "step": 65815, "train_speed(iter/s)": 0.412225 }, { "acc": 0.89495811, "epoch": 1.7821460482495328, "grad_norm": 5.233642101287842, "learning_rate": 6.2992305670375365e-06, "loss": 0.60270414, "memory(GiB)": 34.88, "step": 65820, "train_speed(iter/s)": 0.412226 }, { "acc": 0.92025509, "epoch": 1.7822814285327486, "grad_norm": 1.8319733142852783, "learning_rate": 6.298690205086613e-06, "loss": 0.43043661, "memory(GiB)": 34.88, "step": 65825, "train_speed(iter/s)": 0.412228 }, { "acc": 0.9192627, "epoch": 1.782416808815964, "grad_norm": 9.371014595031738, "learning_rate": 6.298149826873305e-06, "loss": 0.48743606, "memory(GiB)": 34.88, "step": 65830, "train_speed(iter/s)": 0.412229 }, { "acc": 0.91057634, "epoch": 1.7825521890991796, "grad_norm": 12.339527130126953, "learning_rate": 6.297609432404387e-06, "loss": 0.53269825, "memory(GiB)": 34.88, "step": 65835, "train_speed(iter/s)": 0.412231 }, { "acc": 0.92357903, "epoch": 1.7826875693823951, "grad_norm": 4.943993091583252, "learning_rate": 6.2970690216866206e-06, "loss": 0.44223666, "memory(GiB)": 34.88, "step": 65840, "train_speed(iter/s)": 0.412232 }, { "acc": 0.89403706, "epoch": 1.7828229496656107, "grad_norm": 9.814359664916992, "learning_rate": 6.2965285947267805e-06, "loss": 0.57453775, "memory(GiB)": 34.88, "step": 65845, "train_speed(iter/s)": 0.412234 }, { "acc": 0.91412926, "epoch": 1.7829583299488263, "grad_norm": 8.517959594726562, "learning_rate": 6.295988151531635e-06, "loss": 0.46085229, "memory(GiB)": 34.88, "step": 65850, "train_speed(iter/s)": 0.412236 }, { "acc": 0.91722717, "epoch": 1.7830937102320417, "grad_norm": 6.4918341636657715, "learning_rate": 6.2954476921079566e-06, "loss": 0.47114725, "memory(GiB)": 34.88, "step": 65855, "train_speed(iter/s)": 0.412236 }, { "acc": 0.89846783, "epoch": 1.7832290905152575, "grad_norm": 6.646522045135498, "learning_rate": 6.294907216462513e-06, "loss": 0.59065294, "memory(GiB)": 34.88, "step": 65860, "train_speed(iter/s)": 0.412238 }, { "acc": 0.90323811, "epoch": 1.7833644707984728, "grad_norm": 22.059537887573242, "learning_rate": 6.294366724602074e-06, "loss": 0.50337825, "memory(GiB)": 34.88, "step": 65865, "train_speed(iter/s)": 0.41224 }, { "acc": 0.92546291, "epoch": 1.7834998510816886, "grad_norm": 6.645003318786621, "learning_rate": 6.293826216533413e-06, "loss": 0.34869442, "memory(GiB)": 34.88, "step": 65870, "train_speed(iter/s)": 0.412242 }, { "acc": 0.89760818, "epoch": 1.783635231364904, "grad_norm": 6.560294151306152, "learning_rate": 6.293285692263299e-06, "loss": 0.5373631, "memory(GiB)": 34.88, "step": 65875, "train_speed(iter/s)": 0.412243 }, { "acc": 0.91500587, "epoch": 1.7837706116481196, "grad_norm": 20.76528549194336, "learning_rate": 6.292745151798506e-06, "loss": 0.41228018, "memory(GiB)": 34.88, "step": 65880, "train_speed(iter/s)": 0.412245 }, { "acc": 0.91543159, "epoch": 1.7839059919313351, "grad_norm": 12.098398208618164, "learning_rate": 6.292204595145801e-06, "loss": 0.46777334, "memory(GiB)": 34.88, "step": 65885, "train_speed(iter/s)": 0.412247 }, { "acc": 0.88626442, "epoch": 1.7840413722145507, "grad_norm": 7.338461875915527, "learning_rate": 6.291664022311959e-06, "loss": 0.53158603, "memory(GiB)": 34.88, "step": 65890, "train_speed(iter/s)": 0.412249 }, { "acc": 0.8898325, "epoch": 1.7841767524977663, "grad_norm": 13.763984680175781, "learning_rate": 6.29112343330375e-06, "loss": 0.57596598, "memory(GiB)": 34.88, "step": 65895, "train_speed(iter/s)": 0.41225 }, { "acc": 0.89209538, "epoch": 1.7843121327809817, "grad_norm": 10.293916702270508, "learning_rate": 6.2905828281279455e-06, "loss": 0.61853495, "memory(GiB)": 34.88, "step": 65900, "train_speed(iter/s)": 0.412252 }, { "acc": 0.90597773, "epoch": 1.7844475130641975, "grad_norm": 9.769132614135742, "learning_rate": 6.29004220679132e-06, "loss": 0.51077919, "memory(GiB)": 34.88, "step": 65905, "train_speed(iter/s)": 0.412253 }, { "acc": 0.90099602, "epoch": 1.7845828933474128, "grad_norm": 7.2175397872924805, "learning_rate": 6.289501569300645e-06, "loss": 0.5824584, "memory(GiB)": 34.88, "step": 65910, "train_speed(iter/s)": 0.412255 }, { "acc": 0.92223377, "epoch": 1.7847182736306284, "grad_norm": 6.176903247833252, "learning_rate": 6.288960915662691e-06, "loss": 0.33820577, "memory(GiB)": 34.88, "step": 65915, "train_speed(iter/s)": 0.412256 }, { "acc": 0.91342869, "epoch": 1.784853653913844, "grad_norm": 7.30194616317749, "learning_rate": 6.288420245884233e-06, "loss": 0.49237413, "memory(GiB)": 34.88, "step": 65920, "train_speed(iter/s)": 0.412258 }, { "acc": 0.90667744, "epoch": 1.7849890341970596, "grad_norm": 6.917449951171875, "learning_rate": 6.2878795599720445e-06, "loss": 0.47922988, "memory(GiB)": 34.88, "step": 65925, "train_speed(iter/s)": 0.41226 }, { "acc": 0.90824156, "epoch": 1.7851244144802751, "grad_norm": 6.286544322967529, "learning_rate": 6.287338857932894e-06, "loss": 0.51968689, "memory(GiB)": 34.88, "step": 65930, "train_speed(iter/s)": 0.412261 }, { "acc": 0.89977131, "epoch": 1.7852597947634905, "grad_norm": 10.486392974853516, "learning_rate": 6.2867981397735586e-06, "loss": 0.55879636, "memory(GiB)": 34.88, "step": 65935, "train_speed(iter/s)": 0.412263 }, { "acc": 0.9038702, "epoch": 1.7853951750467063, "grad_norm": 14.700555801391602, "learning_rate": 6.286257405500814e-06, "loss": 0.51477747, "memory(GiB)": 34.88, "step": 65940, "train_speed(iter/s)": 0.412265 }, { "acc": 0.91182213, "epoch": 1.7855305553299217, "grad_norm": 8.694913864135742, "learning_rate": 6.2857166551214296e-06, "loss": 0.41330843, "memory(GiB)": 34.88, "step": 65945, "train_speed(iter/s)": 0.412266 }, { "acc": 0.91487617, "epoch": 1.7856659356131375, "grad_norm": 8.581414222717285, "learning_rate": 6.285175888642181e-06, "loss": 0.52683263, "memory(GiB)": 34.88, "step": 65950, "train_speed(iter/s)": 0.412267 }, { "acc": 0.9120636, "epoch": 1.7858013158963528, "grad_norm": 6.664836883544922, "learning_rate": 6.284635106069844e-06, "loss": 0.46234331, "memory(GiB)": 34.88, "step": 65955, "train_speed(iter/s)": 0.412269 }, { "acc": 0.89785595, "epoch": 1.7859366961795684, "grad_norm": 9.204163551330566, "learning_rate": 6.284094307411192e-06, "loss": 0.54771547, "memory(GiB)": 34.88, "step": 65960, "train_speed(iter/s)": 0.412271 }, { "acc": 0.91094351, "epoch": 1.786072076462784, "grad_norm": 6.196674823760986, "learning_rate": 6.283553492672997e-06, "loss": 0.46946611, "memory(GiB)": 34.88, "step": 65965, "train_speed(iter/s)": 0.412272 }, { "acc": 0.89446869, "epoch": 1.7862074567459996, "grad_norm": 6.579535007476807, "learning_rate": 6.283012661862035e-06, "loss": 0.61824479, "memory(GiB)": 34.88, "step": 65970, "train_speed(iter/s)": 0.412274 }, { "acc": 0.91102428, "epoch": 1.7863428370292151, "grad_norm": 17.984651565551758, "learning_rate": 6.282471814985084e-06, "loss": 0.51228533, "memory(GiB)": 34.88, "step": 65975, "train_speed(iter/s)": 0.412276 }, { "acc": 0.90916309, "epoch": 1.7864782173124305, "grad_norm": 10.794194221496582, "learning_rate": 6.281930952048916e-06, "loss": 0.54955497, "memory(GiB)": 34.88, "step": 65980, "train_speed(iter/s)": 0.412277 }, { "acc": 0.9034586, "epoch": 1.7866135975956463, "grad_norm": 21.89101219177246, "learning_rate": 6.2813900730603075e-06, "loss": 0.45403633, "memory(GiB)": 34.88, "step": 65985, "train_speed(iter/s)": 0.412279 }, { "acc": 0.9309063, "epoch": 1.7867489778788617, "grad_norm": 4.790067672729492, "learning_rate": 6.280849178026034e-06, "loss": 0.38253245, "memory(GiB)": 34.88, "step": 65990, "train_speed(iter/s)": 0.41228 }, { "acc": 0.90400581, "epoch": 1.7868843581620772, "grad_norm": 11.135113716125488, "learning_rate": 6.280308266952871e-06, "loss": 0.49269309, "memory(GiB)": 34.88, "step": 65995, "train_speed(iter/s)": 0.412282 }, { "acc": 0.90427074, "epoch": 1.7870197384452928, "grad_norm": 13.778550148010254, "learning_rate": 6.279767339847594e-06, "loss": 0.48869176, "memory(GiB)": 34.88, "step": 66000, "train_speed(iter/s)": 0.412284 }, { "acc": 0.93105659, "epoch": 1.7871551187285084, "grad_norm": 7.8471550941467285, "learning_rate": 6.279226396716981e-06, "loss": 0.34211929, "memory(GiB)": 34.88, "step": 66005, "train_speed(iter/s)": 0.412285 }, { "acc": 0.90463581, "epoch": 1.787290499011724, "grad_norm": 11.256662368774414, "learning_rate": 6.278685437567808e-06, "loss": 0.49448099, "memory(GiB)": 34.88, "step": 66010, "train_speed(iter/s)": 0.412287 }, { "acc": 0.90234194, "epoch": 1.7874258792949393, "grad_norm": 20.048404693603516, "learning_rate": 6.2781444624068475e-06, "loss": 0.57630262, "memory(GiB)": 34.88, "step": 66015, "train_speed(iter/s)": 0.412288 }, { "acc": 0.90619221, "epoch": 1.7875612595781551, "grad_norm": 7.277421951293945, "learning_rate": 6.277603471240882e-06, "loss": 0.47394471, "memory(GiB)": 34.88, "step": 66020, "train_speed(iter/s)": 0.41229 }, { "acc": 0.93196688, "epoch": 1.7876966398613705, "grad_norm": 6.416813373565674, "learning_rate": 6.277062464076685e-06, "loss": 0.36501627, "memory(GiB)": 34.88, "step": 66025, "train_speed(iter/s)": 0.412291 }, { "acc": 0.92970448, "epoch": 1.787832020144586, "grad_norm": 8.380682945251465, "learning_rate": 6.276521440921036e-06, "loss": 0.34905517, "memory(GiB)": 34.88, "step": 66030, "train_speed(iter/s)": 0.412293 }, { "acc": 0.91731911, "epoch": 1.7879674004278017, "grad_norm": 4.062251567840576, "learning_rate": 6.2759804017807095e-06, "loss": 0.45595331, "memory(GiB)": 34.88, "step": 66035, "train_speed(iter/s)": 0.412295 }, { "acc": 0.9174593, "epoch": 1.7881027807110172, "grad_norm": 15.297173500061035, "learning_rate": 6.275439346662486e-06, "loss": 0.44190145, "memory(GiB)": 34.88, "step": 66040, "train_speed(iter/s)": 0.412296 }, { "acc": 0.91451368, "epoch": 1.7882381609942328, "grad_norm": 6.113273620605469, "learning_rate": 6.274898275573143e-06, "loss": 0.43363562, "memory(GiB)": 34.88, "step": 66045, "train_speed(iter/s)": 0.412298 }, { "acc": 0.89460201, "epoch": 1.7883735412774482, "grad_norm": 4.711445331573486, "learning_rate": 6.2743571885194545e-06, "loss": 0.59016027, "memory(GiB)": 34.88, "step": 66050, "train_speed(iter/s)": 0.412299 }, { "acc": 0.90215511, "epoch": 1.788508921560664, "grad_norm": 8.963000297546387, "learning_rate": 6.2738160855082034e-06, "loss": 0.53963366, "memory(GiB)": 34.88, "step": 66055, "train_speed(iter/s)": 0.412301 }, { "acc": 0.90439816, "epoch": 1.7886443018438793, "grad_norm": 6.77389669418335, "learning_rate": 6.273274966546165e-06, "loss": 0.50215063, "memory(GiB)": 34.88, "step": 66060, "train_speed(iter/s)": 0.412302 }, { "acc": 0.88896847, "epoch": 1.7887796821270951, "grad_norm": 10.978906631469727, "learning_rate": 6.272733831640119e-06, "loss": 0.67193966, "memory(GiB)": 34.88, "step": 66065, "train_speed(iter/s)": 0.412304 }, { "acc": 0.90157509, "epoch": 1.7889150624103105, "grad_norm": 9.080888748168945, "learning_rate": 6.272192680796846e-06, "loss": 0.60005503, "memory(GiB)": 34.88, "step": 66070, "train_speed(iter/s)": 0.412305 }, { "acc": 0.8953476, "epoch": 1.789050442693526, "grad_norm": 5.976041793823242, "learning_rate": 6.271651514023123e-06, "loss": 0.5199635, "memory(GiB)": 34.88, "step": 66075, "train_speed(iter/s)": 0.412307 }, { "acc": 0.9174551, "epoch": 1.7891858229767417, "grad_norm": 10.576957702636719, "learning_rate": 6.2711103313257285e-06, "loss": 0.38410974, "memory(GiB)": 34.88, "step": 66080, "train_speed(iter/s)": 0.412308 }, { "acc": 0.88230419, "epoch": 1.7893212032599572, "grad_norm": 10.18912124633789, "learning_rate": 6.2705691327114424e-06, "loss": 0.63733964, "memory(GiB)": 34.88, "step": 66085, "train_speed(iter/s)": 0.412309 }, { "acc": 0.90872211, "epoch": 1.7894565835431728, "grad_norm": 4.162926197052002, "learning_rate": 6.270027918187047e-06, "loss": 0.505369, "memory(GiB)": 34.88, "step": 66090, "train_speed(iter/s)": 0.412311 }, { "acc": 0.90709543, "epoch": 1.7895919638263882, "grad_norm": 11.462965965270996, "learning_rate": 6.269486687759317e-06, "loss": 0.48315611, "memory(GiB)": 34.88, "step": 66095, "train_speed(iter/s)": 0.412312 }, { "acc": 0.89780989, "epoch": 1.789727344109604, "grad_norm": 14.47983455657959, "learning_rate": 6.268945441435038e-06, "loss": 0.5635272, "memory(GiB)": 34.88, "step": 66100, "train_speed(iter/s)": 0.412314 }, { "acc": 0.90719051, "epoch": 1.7898627243928193, "grad_norm": 7.9479594230651855, "learning_rate": 6.268404179220986e-06, "loss": 0.47467599, "memory(GiB)": 34.88, "step": 66105, "train_speed(iter/s)": 0.412315 }, { "acc": 0.93006916, "epoch": 1.789998104676035, "grad_norm": 11.088202476501465, "learning_rate": 6.267862901123942e-06, "loss": 0.38140924, "memory(GiB)": 34.88, "step": 66110, "train_speed(iter/s)": 0.412317 }, { "acc": 0.90540724, "epoch": 1.7901334849592505, "grad_norm": 22.74796485900879, "learning_rate": 6.267321607150688e-06, "loss": 0.50138273, "memory(GiB)": 34.88, "step": 66115, "train_speed(iter/s)": 0.412318 }, { "acc": 0.92428455, "epoch": 1.790268865242466, "grad_norm": 10.834708213806152, "learning_rate": 6.266780297308005e-06, "loss": 0.44597926, "memory(GiB)": 34.88, "step": 66120, "train_speed(iter/s)": 0.41232 }, { "acc": 0.91124363, "epoch": 1.7904042455256817, "grad_norm": 9.235182762145996, "learning_rate": 6.266238971602672e-06, "loss": 0.48281403, "memory(GiB)": 34.88, "step": 66125, "train_speed(iter/s)": 0.412321 }, { "acc": 0.90604324, "epoch": 1.790539625808897, "grad_norm": 8.237638473510742, "learning_rate": 6.265697630041471e-06, "loss": 0.54109678, "memory(GiB)": 34.88, "step": 66130, "train_speed(iter/s)": 0.412323 }, { "acc": 0.91660042, "epoch": 1.7906750060921128, "grad_norm": 10.348003387451172, "learning_rate": 6.265156272631184e-06, "loss": 0.5176157, "memory(GiB)": 34.88, "step": 66135, "train_speed(iter/s)": 0.412324 }, { "acc": 0.91149807, "epoch": 1.7908103863753282, "grad_norm": 7.826396465301514, "learning_rate": 6.2646148993785925e-06, "loss": 0.41539373, "memory(GiB)": 34.88, "step": 66140, "train_speed(iter/s)": 0.412326 }, { "acc": 0.91154413, "epoch": 1.790945766658544, "grad_norm": 7.827669620513916, "learning_rate": 6.264073510290477e-06, "loss": 0.41656756, "memory(GiB)": 34.88, "step": 66145, "train_speed(iter/s)": 0.412327 }, { "acc": 0.91014872, "epoch": 1.7910811469417593, "grad_norm": 8.703039169311523, "learning_rate": 6.263532105373621e-06, "loss": 0.47972565, "memory(GiB)": 34.88, "step": 66150, "train_speed(iter/s)": 0.412329 }, { "acc": 0.91594105, "epoch": 1.791216527224975, "grad_norm": 6.066145896911621, "learning_rate": 6.262990684634805e-06, "loss": 0.47109852, "memory(GiB)": 34.88, "step": 66155, "train_speed(iter/s)": 0.41233 }, { "acc": 0.91787815, "epoch": 1.7913519075081905, "grad_norm": 6.032195568084717, "learning_rate": 6.262449248080814e-06, "loss": 0.47271738, "memory(GiB)": 34.88, "step": 66160, "train_speed(iter/s)": 0.412332 }, { "acc": 0.90506401, "epoch": 1.791487287791406, "grad_norm": 9.56484317779541, "learning_rate": 6.261907795718427e-06, "loss": 0.54491835, "memory(GiB)": 34.88, "step": 66165, "train_speed(iter/s)": 0.412333 }, { "acc": 0.90446529, "epoch": 1.7916226680746217, "grad_norm": 111.87097930908203, "learning_rate": 6.261366327554432e-06, "loss": 0.54315486, "memory(GiB)": 34.88, "step": 66170, "train_speed(iter/s)": 0.412334 }, { "acc": 0.91849537, "epoch": 1.791758048357837, "grad_norm": 5.795315742492676, "learning_rate": 6.2608248435956055e-06, "loss": 0.44606366, "memory(GiB)": 34.88, "step": 66175, "train_speed(iter/s)": 0.412336 }, { "acc": 0.89814472, "epoch": 1.7918934286410528, "grad_norm": 6.44714879989624, "learning_rate": 6.260283343848736e-06, "loss": 0.52150245, "memory(GiB)": 34.88, "step": 66180, "train_speed(iter/s)": 0.412337 }, { "acc": 0.93114948, "epoch": 1.7920288089242682, "grad_norm": 3.9446656703948975, "learning_rate": 6.259741828320602e-06, "loss": 0.3839036, "memory(GiB)": 34.88, "step": 66185, "train_speed(iter/s)": 0.412339 }, { "acc": 0.92512321, "epoch": 1.7921641892074838, "grad_norm": 5.362396240234375, "learning_rate": 6.259200297017991e-06, "loss": 0.36990767, "memory(GiB)": 34.88, "step": 66190, "train_speed(iter/s)": 0.41234 }, { "acc": 0.91794014, "epoch": 1.7922995694906994, "grad_norm": 8.923514366149902, "learning_rate": 6.258658749947686e-06, "loss": 0.42449417, "memory(GiB)": 34.88, "step": 66195, "train_speed(iter/s)": 0.412342 }, { "acc": 0.91129036, "epoch": 1.792434949773915, "grad_norm": 3.760178804397583, "learning_rate": 6.25811718711647e-06, "loss": 0.51349649, "memory(GiB)": 34.88, "step": 66200, "train_speed(iter/s)": 0.412343 }, { "acc": 0.91707726, "epoch": 1.7925703300571305, "grad_norm": 12.117175102233887, "learning_rate": 6.257575608531128e-06, "loss": 0.43446703, "memory(GiB)": 34.88, "step": 66205, "train_speed(iter/s)": 0.412344 }, { "acc": 0.93001251, "epoch": 1.7927057103403459, "grad_norm": 5.792050838470459, "learning_rate": 6.257034014198442e-06, "loss": 0.35729213, "memory(GiB)": 34.88, "step": 66210, "train_speed(iter/s)": 0.412345 }, { "acc": 0.90068235, "epoch": 1.7928410906235617, "grad_norm": 7.61945915222168, "learning_rate": 6.256492404125202e-06, "loss": 0.59024048, "memory(GiB)": 34.88, "step": 66215, "train_speed(iter/s)": 0.412347 }, { "acc": 0.89373474, "epoch": 1.792976470906777, "grad_norm": 12.185432434082031, "learning_rate": 6.255950778318185e-06, "loss": 0.61306734, "memory(GiB)": 34.88, "step": 66220, "train_speed(iter/s)": 0.412348 }, { "acc": 0.91049147, "epoch": 1.7931118511899928, "grad_norm": 6.937598705291748, "learning_rate": 6.255409136784183e-06, "loss": 0.46568809, "memory(GiB)": 34.88, "step": 66225, "train_speed(iter/s)": 0.41235 }, { "acc": 0.90980301, "epoch": 1.7932472314732082, "grad_norm": 5.343146800994873, "learning_rate": 6.254867479529977e-06, "loss": 0.51585121, "memory(GiB)": 34.88, "step": 66230, "train_speed(iter/s)": 0.412351 }, { "acc": 0.91455774, "epoch": 1.7933826117564238, "grad_norm": 7.448304176330566, "learning_rate": 6.254325806562352e-06, "loss": 0.39127803, "memory(GiB)": 34.88, "step": 66235, "train_speed(iter/s)": 0.412353 }, { "acc": 0.90038433, "epoch": 1.7935179920396394, "grad_norm": 4.400125503540039, "learning_rate": 6.253784117888098e-06, "loss": 0.44450254, "memory(GiB)": 34.88, "step": 66240, "train_speed(iter/s)": 0.412353 }, { "acc": 0.92501888, "epoch": 1.793653372322855, "grad_norm": 6.070555210113525, "learning_rate": 6.253242413513996e-06, "loss": 0.41674099, "memory(GiB)": 34.88, "step": 66245, "train_speed(iter/s)": 0.412355 }, { "acc": 0.91524677, "epoch": 1.7937887526060705, "grad_norm": 8.693560600280762, "learning_rate": 6.252700693446834e-06, "loss": 0.43506589, "memory(GiB)": 34.88, "step": 66250, "train_speed(iter/s)": 0.412356 }, { "acc": 0.92912407, "epoch": 1.7939241328892859, "grad_norm": 5.754114627838135, "learning_rate": 6.252158957693398e-06, "loss": 0.41608529, "memory(GiB)": 34.88, "step": 66255, "train_speed(iter/s)": 0.412358 }, { "acc": 0.92236509, "epoch": 1.7940595131725017, "grad_norm": 7.079174995422363, "learning_rate": 6.251617206260474e-06, "loss": 0.41855264, "memory(GiB)": 34.88, "step": 66260, "train_speed(iter/s)": 0.412359 }, { "acc": 0.91889277, "epoch": 1.794194893455717, "grad_norm": 16.284561157226562, "learning_rate": 6.251075439154849e-06, "loss": 0.37755492, "memory(GiB)": 34.88, "step": 66265, "train_speed(iter/s)": 0.41236 }, { "acc": 0.91926508, "epoch": 1.7943302737389326, "grad_norm": 13.899872779846191, "learning_rate": 6.250533656383308e-06, "loss": 0.4475842, "memory(GiB)": 34.88, "step": 66270, "train_speed(iter/s)": 0.412362 }, { "acc": 0.90360193, "epoch": 1.7944656540221482, "grad_norm": 11.228629112243652, "learning_rate": 6.24999185795264e-06, "loss": 0.59396687, "memory(GiB)": 34.88, "step": 66275, "train_speed(iter/s)": 0.412363 }, { "acc": 0.91434364, "epoch": 1.7946010343053638, "grad_norm": 8.981697082519531, "learning_rate": 6.24945004386963e-06, "loss": 0.45988526, "memory(GiB)": 34.88, "step": 66280, "train_speed(iter/s)": 0.412365 }, { "acc": 0.89646416, "epoch": 1.7947364145885794, "grad_norm": 6.706531524658203, "learning_rate": 6.248908214141068e-06, "loss": 0.55111728, "memory(GiB)": 34.88, "step": 66285, "train_speed(iter/s)": 0.412367 }, { "acc": 0.90767393, "epoch": 1.7948717948717947, "grad_norm": 7.728387355804443, "learning_rate": 6.2483663687737385e-06, "loss": 0.47732162, "memory(GiB)": 34.88, "step": 66290, "train_speed(iter/s)": 0.412368 }, { "acc": 0.90990772, "epoch": 1.7950071751550105, "grad_norm": 11.703060150146484, "learning_rate": 6.2478245077744335e-06, "loss": 0.48724594, "memory(GiB)": 34.88, "step": 66295, "train_speed(iter/s)": 0.41237 }, { "acc": 0.91892681, "epoch": 1.7951425554382259, "grad_norm": 7.382725715637207, "learning_rate": 6.247282631149935e-06, "loss": 0.4379961, "memory(GiB)": 34.88, "step": 66300, "train_speed(iter/s)": 0.412371 }, { "acc": 0.89565144, "epoch": 1.7952779357214417, "grad_norm": 17.014286041259766, "learning_rate": 6.246740738907037e-06, "loss": 0.68309131, "memory(GiB)": 34.88, "step": 66305, "train_speed(iter/s)": 0.412373 }, { "acc": 0.90673056, "epoch": 1.795413316004657, "grad_norm": 9.792745590209961, "learning_rate": 6.246198831052521e-06, "loss": 0.49549532, "memory(GiB)": 34.88, "step": 66310, "train_speed(iter/s)": 0.412374 }, { "acc": 0.90421629, "epoch": 1.7955486962878726, "grad_norm": 7.59374475479126, "learning_rate": 6.245656907593182e-06, "loss": 0.51272326, "memory(GiB)": 34.88, "step": 66315, "train_speed(iter/s)": 0.412376 }, { "acc": 0.91460438, "epoch": 1.7956840765710882, "grad_norm": 9.981654167175293, "learning_rate": 6.245114968535805e-06, "loss": 0.45938482, "memory(GiB)": 34.88, "step": 66320, "train_speed(iter/s)": 0.412377 }, { "acc": 0.88799553, "epoch": 1.7958194568543038, "grad_norm": 6.402131080627441, "learning_rate": 6.24457301388718e-06, "loss": 0.66902018, "memory(GiB)": 34.88, "step": 66325, "train_speed(iter/s)": 0.412379 }, { "acc": 0.90452785, "epoch": 1.7959548371375194, "grad_norm": 6.256054878234863, "learning_rate": 6.2440310436540965e-06, "loss": 0.52333679, "memory(GiB)": 34.88, "step": 66330, "train_speed(iter/s)": 0.41238 }, { "acc": 0.9035183, "epoch": 1.7960902174207347, "grad_norm": 16.566818237304688, "learning_rate": 6.2434890578433425e-06, "loss": 0.53938503, "memory(GiB)": 34.88, "step": 66335, "train_speed(iter/s)": 0.412382 }, { "acc": 0.90496445, "epoch": 1.7962255977039505, "grad_norm": 9.75721549987793, "learning_rate": 6.2429470564617105e-06, "loss": 0.598596, "memory(GiB)": 34.88, "step": 66340, "train_speed(iter/s)": 0.412383 }, { "acc": 0.91926632, "epoch": 1.7963609779871659, "grad_norm": 6.232308387756348, "learning_rate": 6.242405039515984e-06, "loss": 0.47182798, "memory(GiB)": 34.88, "step": 66345, "train_speed(iter/s)": 0.412385 }, { "acc": 0.91069469, "epoch": 1.7964963582703815, "grad_norm": 7.07874870300293, "learning_rate": 6.241863007012959e-06, "loss": 0.42105842, "memory(GiB)": 34.88, "step": 66350, "train_speed(iter/s)": 0.412386 }, { "acc": 0.90459766, "epoch": 1.796631738553597, "grad_norm": 8.565681457519531, "learning_rate": 6.241320958959422e-06, "loss": 0.54116039, "memory(GiB)": 34.88, "step": 66355, "train_speed(iter/s)": 0.412388 }, { "acc": 0.90127831, "epoch": 1.7967671188368126, "grad_norm": 17.673473358154297, "learning_rate": 6.240778895362165e-06, "loss": 0.70371013, "memory(GiB)": 34.88, "step": 66360, "train_speed(iter/s)": 0.412389 }, { "acc": 0.89716187, "epoch": 1.7969024991200282, "grad_norm": 9.520028114318848, "learning_rate": 6.240236816227978e-06, "loss": 0.56647568, "memory(GiB)": 34.88, "step": 66365, "train_speed(iter/s)": 0.41239 }, { "acc": 0.91394529, "epoch": 1.7970378794032436, "grad_norm": 20.33746910095215, "learning_rate": 6.239694721563648e-06, "loss": 0.53149366, "memory(GiB)": 34.88, "step": 66370, "train_speed(iter/s)": 0.412392 }, { "acc": 0.90024099, "epoch": 1.7971732596864594, "grad_norm": 7.335829734802246, "learning_rate": 6.239152611375974e-06, "loss": 0.53214073, "memory(GiB)": 34.88, "step": 66375, "train_speed(iter/s)": 0.412393 }, { "acc": 0.88842945, "epoch": 1.7973086399696747, "grad_norm": 10.519329071044922, "learning_rate": 6.23861048567174e-06, "loss": 0.62814269, "memory(GiB)": 34.88, "step": 66380, "train_speed(iter/s)": 0.412395 }, { "acc": 0.89622421, "epoch": 1.7974440202528905, "grad_norm": 5.374983310699463, "learning_rate": 6.238068344457738e-06, "loss": 0.57797365, "memory(GiB)": 34.88, "step": 66385, "train_speed(iter/s)": 0.412396 }, { "acc": 0.90659256, "epoch": 1.7975794005361059, "grad_norm": 3.718252658843994, "learning_rate": 6.237526187740762e-06, "loss": 0.52368283, "memory(GiB)": 34.88, "step": 66390, "train_speed(iter/s)": 0.412398 }, { "acc": 0.89870052, "epoch": 1.7977147808193215, "grad_norm": 11.338103294372559, "learning_rate": 6.2369840155276015e-06, "loss": 0.63109426, "memory(GiB)": 34.88, "step": 66395, "train_speed(iter/s)": 0.412399 }, { "acc": 0.93729773, "epoch": 1.797850161102537, "grad_norm": 4.259257793426514, "learning_rate": 6.236441827825049e-06, "loss": 0.32480106, "memory(GiB)": 34.88, "step": 66400, "train_speed(iter/s)": 0.412401 }, { "acc": 0.90514202, "epoch": 1.7979855413857526, "grad_norm": 11.034290313720703, "learning_rate": 6.235899624639898e-06, "loss": 0.5219717, "memory(GiB)": 34.88, "step": 66405, "train_speed(iter/s)": 0.412403 }, { "acc": 0.93254871, "epoch": 1.7981209216689682, "grad_norm": 4.554389953613281, "learning_rate": 6.235357405978938e-06, "loss": 0.30556002, "memory(GiB)": 34.88, "step": 66410, "train_speed(iter/s)": 0.412404 }, { "acc": 0.90331068, "epoch": 1.7982563019521836, "grad_norm": 9.238850593566895, "learning_rate": 6.2348151718489626e-06, "loss": 0.55758305, "memory(GiB)": 34.88, "step": 66415, "train_speed(iter/s)": 0.412406 }, { "acc": 0.89893351, "epoch": 1.7983916822353994, "grad_norm": 8.692370414733887, "learning_rate": 6.234272922256764e-06, "loss": 0.52198205, "memory(GiB)": 34.88, "step": 66420, "train_speed(iter/s)": 0.412408 }, { "acc": 0.91341581, "epoch": 1.7985270625186147, "grad_norm": 18.38467788696289, "learning_rate": 6.233730657209136e-06, "loss": 0.47964878, "memory(GiB)": 34.88, "step": 66425, "train_speed(iter/s)": 0.412409 }, { "acc": 0.9184803, "epoch": 1.7986624428018303, "grad_norm": 7.262738227844238, "learning_rate": 6.23318837671287e-06, "loss": 0.48413439, "memory(GiB)": 34.88, "step": 66430, "train_speed(iter/s)": 0.412411 }, { "acc": 0.90800095, "epoch": 1.7987978230850459, "grad_norm": 9.083888053894043, "learning_rate": 6.232646080774761e-06, "loss": 0.59285507, "memory(GiB)": 34.88, "step": 66435, "train_speed(iter/s)": 0.412412 }, { "acc": 0.91637497, "epoch": 1.7989332033682615, "grad_norm": 5.216033458709717, "learning_rate": 6.2321037694016e-06, "loss": 0.4203476, "memory(GiB)": 34.88, "step": 66440, "train_speed(iter/s)": 0.412414 }, { "acc": 0.88889885, "epoch": 1.799068583651477, "grad_norm": 14.127303123474121, "learning_rate": 6.231561442600182e-06, "loss": 0.63496919, "memory(GiB)": 34.88, "step": 66445, "train_speed(iter/s)": 0.412415 }, { "acc": 0.87552834, "epoch": 1.7992039639346924, "grad_norm": 16.347803115844727, "learning_rate": 6.231019100377299e-06, "loss": 0.77263489, "memory(GiB)": 34.88, "step": 66450, "train_speed(iter/s)": 0.412417 }, { "acc": 0.90705233, "epoch": 1.7993393442179082, "grad_norm": 9.537545204162598, "learning_rate": 6.230476742739748e-06, "loss": 0.46420193, "memory(GiB)": 34.88, "step": 66455, "train_speed(iter/s)": 0.412418 }, { "acc": 0.90523357, "epoch": 1.7994747245011236, "grad_norm": 7.118086814880371, "learning_rate": 6.229934369694321e-06, "loss": 0.48616438, "memory(GiB)": 34.88, "step": 66460, "train_speed(iter/s)": 0.41242 }, { "acc": 0.91113195, "epoch": 1.7996101047843394, "grad_norm": 6.752363204956055, "learning_rate": 6.2293919812478135e-06, "loss": 0.50953321, "memory(GiB)": 34.88, "step": 66465, "train_speed(iter/s)": 0.412422 }, { "acc": 0.91757746, "epoch": 1.7997454850675547, "grad_norm": 23.44508171081543, "learning_rate": 6.228849577407018e-06, "loss": 0.45763526, "memory(GiB)": 34.88, "step": 66470, "train_speed(iter/s)": 0.412423 }, { "acc": 0.90864639, "epoch": 1.7998808653507703, "grad_norm": 9.219635963439941, "learning_rate": 6.22830715817873e-06, "loss": 0.46456919, "memory(GiB)": 34.88, "step": 66475, "train_speed(iter/s)": 0.412424 }, { "acc": 0.91423979, "epoch": 1.8000162456339859, "grad_norm": 4.547889232635498, "learning_rate": 6.227764723569747e-06, "loss": 0.46590242, "memory(GiB)": 34.88, "step": 66480, "train_speed(iter/s)": 0.412426 }, { "acc": 0.90754375, "epoch": 1.8001516259172015, "grad_norm": 10.08512020111084, "learning_rate": 6.227222273586859e-06, "loss": 0.48904715, "memory(GiB)": 34.88, "step": 66485, "train_speed(iter/s)": 0.412428 }, { "acc": 0.90936584, "epoch": 1.800287006200417, "grad_norm": 4.373373508453369, "learning_rate": 6.226679808236865e-06, "loss": 0.45665727, "memory(GiB)": 34.88, "step": 66490, "train_speed(iter/s)": 0.412429 }, { "acc": 0.91033106, "epoch": 1.8004223864836324, "grad_norm": 11.194537162780762, "learning_rate": 6.2261373275265605e-06, "loss": 0.53414354, "memory(GiB)": 34.88, "step": 66495, "train_speed(iter/s)": 0.412431 }, { "acc": 0.91165504, "epoch": 1.8005577667668482, "grad_norm": 10.253032684326172, "learning_rate": 6.225594831462739e-06, "loss": 0.43608999, "memory(GiB)": 34.88, "step": 66500, "train_speed(iter/s)": 0.412432 }, { "acc": 0.92852888, "epoch": 1.8006931470500636, "grad_norm": 7.415519714355469, "learning_rate": 6.225052320052198e-06, "loss": 0.38857274, "memory(GiB)": 34.88, "step": 66505, "train_speed(iter/s)": 0.412434 }, { "acc": 0.89344215, "epoch": 1.8008285273332791, "grad_norm": 5.5464396476745605, "learning_rate": 6.224509793301733e-06, "loss": 0.5524724, "memory(GiB)": 34.88, "step": 66510, "train_speed(iter/s)": 0.412435 }, { "acc": 0.91386166, "epoch": 1.8009639076164947, "grad_norm": 3.944363832473755, "learning_rate": 6.2239672512181406e-06, "loss": 0.47575068, "memory(GiB)": 34.88, "step": 66515, "train_speed(iter/s)": 0.412437 }, { "acc": 0.90809631, "epoch": 1.8010992878997103, "grad_norm": 6.9493327140808105, "learning_rate": 6.223424693808214e-06, "loss": 0.5974555, "memory(GiB)": 34.88, "step": 66520, "train_speed(iter/s)": 0.412438 }, { "acc": 0.90429668, "epoch": 1.8012346681829259, "grad_norm": 6.0522332191467285, "learning_rate": 6.222882121078756e-06, "loss": 0.543223, "memory(GiB)": 34.88, "step": 66525, "train_speed(iter/s)": 0.41244 }, { "acc": 0.91415472, "epoch": 1.8013700484661412, "grad_norm": 7.699463844299316, "learning_rate": 6.222339533036557e-06, "loss": 0.42724104, "memory(GiB)": 34.88, "step": 66530, "train_speed(iter/s)": 0.412441 }, { "acc": 0.9115696, "epoch": 1.801505428749357, "grad_norm": 19.84444236755371, "learning_rate": 6.221796929688418e-06, "loss": 0.47525578, "memory(GiB)": 34.88, "step": 66535, "train_speed(iter/s)": 0.412443 }, { "acc": 0.90941925, "epoch": 1.8016408090325724, "grad_norm": 10.856582641601562, "learning_rate": 6.221254311041137e-06, "loss": 0.51460385, "memory(GiB)": 34.88, "step": 66540, "train_speed(iter/s)": 0.412444 }, { "acc": 0.93635826, "epoch": 1.8017761893157882, "grad_norm": 5.706522464752197, "learning_rate": 6.220711677101507e-06, "loss": 0.33215718, "memory(GiB)": 34.88, "step": 66545, "train_speed(iter/s)": 0.412446 }, { "acc": 0.91445055, "epoch": 1.8019115695990036, "grad_norm": 12.151806831359863, "learning_rate": 6.220169027876331e-06, "loss": 0.5399281, "memory(GiB)": 34.88, "step": 66550, "train_speed(iter/s)": 0.412448 }, { "acc": 0.9117836, "epoch": 1.8020469498822191, "grad_norm": 23.487031936645508, "learning_rate": 6.219626363372401e-06, "loss": 0.49513354, "memory(GiB)": 34.88, "step": 66555, "train_speed(iter/s)": 0.412449 }, { "acc": 0.91714678, "epoch": 1.8021823301654347, "grad_norm": 5.415004253387451, "learning_rate": 6.219083683596518e-06, "loss": 0.50254025, "memory(GiB)": 34.88, "step": 66560, "train_speed(iter/s)": 0.41245 }, { "acc": 0.92911253, "epoch": 1.8023177104486503, "grad_norm": 4.5862040519714355, "learning_rate": 6.218540988555481e-06, "loss": 0.35826316, "memory(GiB)": 34.88, "step": 66565, "train_speed(iter/s)": 0.412452 }, { "acc": 0.89646492, "epoch": 1.8024530907318659, "grad_norm": 7.509645938873291, "learning_rate": 6.2179982782560855e-06, "loss": 0.53909626, "memory(GiB)": 34.88, "step": 66570, "train_speed(iter/s)": 0.412454 }, { "acc": 0.92179146, "epoch": 1.8025884710150812, "grad_norm": 4.140195846557617, "learning_rate": 6.217455552705134e-06, "loss": 0.39837637, "memory(GiB)": 34.88, "step": 66575, "train_speed(iter/s)": 0.412455 }, { "acc": 0.9048563, "epoch": 1.802723851298297, "grad_norm": 9.274469375610352, "learning_rate": 6.216912811909422e-06, "loss": 0.47017341, "memory(GiB)": 34.88, "step": 66580, "train_speed(iter/s)": 0.412457 }, { "acc": 0.91021576, "epoch": 1.8028592315815124, "grad_norm": 6.643986225128174, "learning_rate": 6.216370055875748e-06, "loss": 0.51233473, "memory(GiB)": 34.88, "step": 66585, "train_speed(iter/s)": 0.412459 }, { "acc": 0.90610313, "epoch": 1.802994611864728, "grad_norm": 11.012018203735352, "learning_rate": 6.215827284610913e-06, "loss": 0.47182617, "memory(GiB)": 34.88, "step": 66590, "train_speed(iter/s)": 0.41246 }, { "acc": 0.89757853, "epoch": 1.8031299921479436, "grad_norm": 9.75037956237793, "learning_rate": 6.215284498121718e-06, "loss": 0.6574049, "memory(GiB)": 34.88, "step": 66595, "train_speed(iter/s)": 0.412462 }, { "acc": 0.89887028, "epoch": 1.8032653724311591, "grad_norm": 6.832551956176758, "learning_rate": 6.214741696414957e-06, "loss": 0.58942347, "memory(GiB)": 34.88, "step": 66600, "train_speed(iter/s)": 0.412463 }, { "acc": 0.93499851, "epoch": 1.8034007527143747, "grad_norm": 5.058276653289795, "learning_rate": 6.214198879497434e-06, "loss": 0.37321291, "memory(GiB)": 34.88, "step": 66605, "train_speed(iter/s)": 0.412465 }, { "acc": 0.88452606, "epoch": 1.80353613299759, "grad_norm": 18.963998794555664, "learning_rate": 6.213656047375947e-06, "loss": 0.65376954, "memory(GiB)": 34.88, "step": 66610, "train_speed(iter/s)": 0.412466 }, { "acc": 0.91990118, "epoch": 1.8036715132808059, "grad_norm": 8.698598861694336, "learning_rate": 6.213113200057297e-06, "loss": 0.38188548, "memory(GiB)": 34.88, "step": 66615, "train_speed(iter/s)": 0.412467 }, { "acc": 0.9192543, "epoch": 1.8038068935640212, "grad_norm": 6.521213054656982, "learning_rate": 6.212570337548285e-06, "loss": 0.44950185, "memory(GiB)": 34.88, "step": 66620, "train_speed(iter/s)": 0.412469 }, { "acc": 0.90195227, "epoch": 1.803942273847237, "grad_norm": 14.266458511352539, "learning_rate": 6.212027459855707e-06, "loss": 0.59268007, "memory(GiB)": 34.88, "step": 66625, "train_speed(iter/s)": 0.412471 }, { "acc": 0.89408054, "epoch": 1.8040776541304524, "grad_norm": 29.497854232788086, "learning_rate": 6.21148456698637e-06, "loss": 0.58903141, "memory(GiB)": 34.88, "step": 66630, "train_speed(iter/s)": 0.412472 }, { "acc": 0.88790798, "epoch": 1.804213034413668, "grad_norm": 31.44962501525879, "learning_rate": 6.210941658947072e-06, "loss": 0.56567569, "memory(GiB)": 34.88, "step": 66635, "train_speed(iter/s)": 0.412474 }, { "acc": 0.91800308, "epoch": 1.8043484146968836, "grad_norm": 10.302216529846191, "learning_rate": 6.210398735744612e-06, "loss": 0.38891916, "memory(GiB)": 34.88, "step": 66640, "train_speed(iter/s)": 0.412475 }, { "acc": 0.92141933, "epoch": 1.8044837949800991, "grad_norm": 8.776942253112793, "learning_rate": 6.209855797385794e-06, "loss": 0.47533069, "memory(GiB)": 34.88, "step": 66645, "train_speed(iter/s)": 0.412477 }, { "acc": 0.89862804, "epoch": 1.8046191752633147, "grad_norm": 11.055901527404785, "learning_rate": 6.209312843877418e-06, "loss": 0.54667158, "memory(GiB)": 34.88, "step": 66650, "train_speed(iter/s)": 0.412478 }, { "acc": 0.90579729, "epoch": 1.80475455554653, "grad_norm": 8.575479507446289, "learning_rate": 6.208769875226285e-06, "loss": 0.47293768, "memory(GiB)": 34.88, "step": 66655, "train_speed(iter/s)": 0.41248 }, { "acc": 0.89997501, "epoch": 1.8048899358297459, "grad_norm": 7.317142963409424, "learning_rate": 6.208226891439197e-06, "loss": 0.54931793, "memory(GiB)": 34.88, "step": 66660, "train_speed(iter/s)": 0.412481 }, { "acc": 0.89916935, "epoch": 1.8050253161129612, "grad_norm": 10.023919105529785, "learning_rate": 6.2076838925229585e-06, "loss": 0.51435795, "memory(GiB)": 34.88, "step": 66665, "train_speed(iter/s)": 0.412483 }, { "acc": 0.89369507, "epoch": 1.8051606963961768, "grad_norm": 16.1416015625, "learning_rate": 6.207140878484369e-06, "loss": 0.55253067, "memory(GiB)": 34.88, "step": 66670, "train_speed(iter/s)": 0.412484 }, { "acc": 0.91388607, "epoch": 1.8052960766793924, "grad_norm": 6.59207010269165, "learning_rate": 6.206597849330234e-06, "loss": 0.42360783, "memory(GiB)": 34.88, "step": 66675, "train_speed(iter/s)": 0.412486 }, { "acc": 0.90612354, "epoch": 1.805431456962608, "grad_norm": 7.764257907867432, "learning_rate": 6.2060548050673485e-06, "loss": 0.57572594, "memory(GiB)": 34.88, "step": 66680, "train_speed(iter/s)": 0.412488 }, { "acc": 0.91479578, "epoch": 1.8055668372458236, "grad_norm": 13.150343894958496, "learning_rate": 6.205511745702521e-06, "loss": 0.38810451, "memory(GiB)": 34.88, "step": 66685, "train_speed(iter/s)": 0.412489 }, { "acc": 0.9134943, "epoch": 1.805702217529039, "grad_norm": 8.365517616271973, "learning_rate": 6.2049686712425575e-06, "loss": 0.53301821, "memory(GiB)": 34.88, "step": 66690, "train_speed(iter/s)": 0.412491 }, { "acc": 0.9030158, "epoch": 1.8058375978122547, "grad_norm": 4.6561198234558105, "learning_rate": 6.204425581694253e-06, "loss": 0.50635886, "memory(GiB)": 34.88, "step": 66695, "train_speed(iter/s)": 0.412492 }, { "acc": 0.90368824, "epoch": 1.80597297809547, "grad_norm": 6.215580463409424, "learning_rate": 6.203882477064417e-06, "loss": 0.50862536, "memory(GiB)": 34.88, "step": 66700, "train_speed(iter/s)": 0.412494 }, { "acc": 0.91986551, "epoch": 1.8061083583786859, "grad_norm": 6.716182708740234, "learning_rate": 6.20333935735985e-06, "loss": 0.4236896, "memory(GiB)": 34.88, "step": 66705, "train_speed(iter/s)": 0.412496 }, { "acc": 0.91209774, "epoch": 1.8062437386619012, "grad_norm": 8.503498077392578, "learning_rate": 6.202796222587357e-06, "loss": 0.40139313, "memory(GiB)": 34.88, "step": 66710, "train_speed(iter/s)": 0.412497 }, { "acc": 0.92040043, "epoch": 1.8063791189451168, "grad_norm": 8.516925811767578, "learning_rate": 6.2022530727537415e-06, "loss": 0.39637249, "memory(GiB)": 34.88, "step": 66715, "train_speed(iter/s)": 0.412499 }, { "acc": 0.91933327, "epoch": 1.8065144992283324, "grad_norm": 9.710777282714844, "learning_rate": 6.2017099078658065e-06, "loss": 0.54070511, "memory(GiB)": 34.88, "step": 66720, "train_speed(iter/s)": 0.4125 }, { "acc": 0.89725723, "epoch": 1.806649879511548, "grad_norm": 7.932877063751221, "learning_rate": 6.201166727930357e-06, "loss": 0.51421356, "memory(GiB)": 34.88, "step": 66725, "train_speed(iter/s)": 0.412501 }, { "acc": 0.90560951, "epoch": 1.8067852597947636, "grad_norm": 9.355429649353027, "learning_rate": 6.200623532954198e-06, "loss": 0.55421562, "memory(GiB)": 34.88, "step": 66730, "train_speed(iter/s)": 0.412503 }, { "acc": 0.91782017, "epoch": 1.806920640077979, "grad_norm": 4.747281551361084, "learning_rate": 6.200080322944134e-06, "loss": 0.49010754, "memory(GiB)": 34.88, "step": 66735, "train_speed(iter/s)": 0.412505 }, { "acc": 0.9019434, "epoch": 1.8070560203611947, "grad_norm": 6.853062152862549, "learning_rate": 6.199537097906968e-06, "loss": 0.49744778, "memory(GiB)": 34.88, "step": 66740, "train_speed(iter/s)": 0.412506 }, { "acc": 0.90916309, "epoch": 1.80719140064441, "grad_norm": 13.846230506896973, "learning_rate": 6.198993857849507e-06, "loss": 0.49612274, "memory(GiB)": 34.88, "step": 66745, "train_speed(iter/s)": 0.412508 }, { "acc": 0.92292728, "epoch": 1.8073267809276257, "grad_norm": 5.32418155670166, "learning_rate": 6.198450602778554e-06, "loss": 0.40325341, "memory(GiB)": 34.88, "step": 66750, "train_speed(iter/s)": 0.412509 }, { "acc": 0.87869625, "epoch": 1.8074621612108412, "grad_norm": 10.580074310302734, "learning_rate": 6.197907332700918e-06, "loss": 0.63548751, "memory(GiB)": 34.88, "step": 66755, "train_speed(iter/s)": 0.412511 }, { "acc": 0.90535212, "epoch": 1.8075975414940568, "grad_norm": 12.653088569641113, "learning_rate": 6.197364047623402e-06, "loss": 0.56634002, "memory(GiB)": 34.88, "step": 66760, "train_speed(iter/s)": 0.412513 }, { "acc": 0.9157959, "epoch": 1.8077329217772724, "grad_norm": 14.864267349243164, "learning_rate": 6.196820747552811e-06, "loss": 0.42848349, "memory(GiB)": 34.88, "step": 66765, "train_speed(iter/s)": 0.412514 }, { "acc": 0.90827103, "epoch": 1.8078683020604878, "grad_norm": 4.925546169281006, "learning_rate": 6.196277432495952e-06, "loss": 0.5468751, "memory(GiB)": 34.88, "step": 66770, "train_speed(iter/s)": 0.412516 }, { "acc": 0.90960865, "epoch": 1.8080036823437036, "grad_norm": 29.303058624267578, "learning_rate": 6.195734102459631e-06, "loss": 0.50524712, "memory(GiB)": 34.88, "step": 66775, "train_speed(iter/s)": 0.412517 }, { "acc": 0.915446, "epoch": 1.808139062626919, "grad_norm": 7.617941379547119, "learning_rate": 6.195190757450655e-06, "loss": 0.46771879, "memory(GiB)": 34.88, "step": 66780, "train_speed(iter/s)": 0.412519 }, { "acc": 0.89260273, "epoch": 1.8082744429101347, "grad_norm": 14.700998306274414, "learning_rate": 6.194647397475828e-06, "loss": 0.60503044, "memory(GiB)": 34.88, "step": 66785, "train_speed(iter/s)": 0.412521 }, { "acc": 0.91996155, "epoch": 1.80840982319335, "grad_norm": 7.509598255157471, "learning_rate": 6.194104022541961e-06, "loss": 0.3912672, "memory(GiB)": 34.88, "step": 66790, "train_speed(iter/s)": 0.412522 }, { "acc": 0.90635586, "epoch": 1.8085452034765657, "grad_norm": 10.414815902709961, "learning_rate": 6.193560632655855e-06, "loss": 0.53392344, "memory(GiB)": 34.88, "step": 66795, "train_speed(iter/s)": 0.412524 }, { "acc": 0.91326094, "epoch": 1.8086805837597812, "grad_norm": 9.40626335144043, "learning_rate": 6.193017227824322e-06, "loss": 0.51941109, "memory(GiB)": 34.88, "step": 66800, "train_speed(iter/s)": 0.412525 }, { "acc": 0.91275978, "epoch": 1.8088159640429968, "grad_norm": 8.571001052856445, "learning_rate": 6.192473808054168e-06, "loss": 0.48356171, "memory(GiB)": 34.88, "step": 66805, "train_speed(iter/s)": 0.412527 }, { "acc": 0.91373644, "epoch": 1.8089513443262124, "grad_norm": 7.188201904296875, "learning_rate": 6.191930373352199e-06, "loss": 0.43373327, "memory(GiB)": 34.88, "step": 66810, "train_speed(iter/s)": 0.412528 }, { "acc": 0.90739269, "epoch": 1.8090867246094278, "grad_norm": 5.242006778717041, "learning_rate": 6.1913869237252235e-06, "loss": 0.57238398, "memory(GiB)": 34.88, "step": 66815, "train_speed(iter/s)": 0.41253 }, { "acc": 0.8916172, "epoch": 1.8092221048926436, "grad_norm": 33.40766906738281, "learning_rate": 6.190843459180048e-06, "loss": 0.56913991, "memory(GiB)": 34.88, "step": 66820, "train_speed(iter/s)": 0.412531 }, { "acc": 0.92759895, "epoch": 1.809357485175859, "grad_norm": 6.230151176452637, "learning_rate": 6.190299979723483e-06, "loss": 0.38754668, "memory(GiB)": 34.88, "step": 66825, "train_speed(iter/s)": 0.412533 }, { "acc": 0.88965282, "epoch": 1.8094928654590745, "grad_norm": 11.666013717651367, "learning_rate": 6.1897564853623345e-06, "loss": 0.60398149, "memory(GiB)": 34.88, "step": 66830, "train_speed(iter/s)": 0.412534 }, { "acc": 0.93647614, "epoch": 1.80962824574229, "grad_norm": 3.4523518085479736, "learning_rate": 6.189212976103412e-06, "loss": 0.35544496, "memory(GiB)": 34.88, "step": 66835, "train_speed(iter/s)": 0.412536 }, { "acc": 0.9074007, "epoch": 1.8097636260255057, "grad_norm": 24.652748107910156, "learning_rate": 6.188669451953525e-06, "loss": 0.48789935, "memory(GiB)": 34.88, "step": 66840, "train_speed(iter/s)": 0.412537 }, { "acc": 0.93951674, "epoch": 1.8098990063087212, "grad_norm": 8.643745422363281, "learning_rate": 6.188125912919479e-06, "loss": 0.32348433, "memory(GiB)": 34.88, "step": 66845, "train_speed(iter/s)": 0.412539 }, { "acc": 0.89941406, "epoch": 1.8100343865919366, "grad_norm": 6.92310094833374, "learning_rate": 6.187582359008087e-06, "loss": 0.59651456, "memory(GiB)": 34.88, "step": 66850, "train_speed(iter/s)": 0.41254 }, { "acc": 0.89799442, "epoch": 1.8101697668751524, "grad_norm": 9.057190895080566, "learning_rate": 6.187038790226153e-06, "loss": 0.60291586, "memory(GiB)": 34.88, "step": 66855, "train_speed(iter/s)": 0.412542 }, { "acc": 0.89791927, "epoch": 1.8103051471583678, "grad_norm": 20.199604034423828, "learning_rate": 6.186495206580491e-06, "loss": 0.56180534, "memory(GiB)": 34.88, "step": 66860, "train_speed(iter/s)": 0.412543 }, { "acc": 0.90140419, "epoch": 1.8104405274415836, "grad_norm": 14.324545860290527, "learning_rate": 6.185951608077907e-06, "loss": 0.52227254, "memory(GiB)": 34.88, "step": 66865, "train_speed(iter/s)": 0.412544 }, { "acc": 0.91827164, "epoch": 1.810575907724799, "grad_norm": 10.414762496948242, "learning_rate": 6.1854079947252145e-06, "loss": 0.48785419, "memory(GiB)": 34.88, "step": 66870, "train_speed(iter/s)": 0.412546 }, { "acc": 0.89562817, "epoch": 1.8107112880080145, "grad_norm": 10.964505195617676, "learning_rate": 6.18486436652922e-06, "loss": 0.57985296, "memory(GiB)": 34.88, "step": 66875, "train_speed(iter/s)": 0.412548 }, { "acc": 0.91807079, "epoch": 1.81084666829123, "grad_norm": 10.621251106262207, "learning_rate": 6.1843207234967324e-06, "loss": 0.39663992, "memory(GiB)": 34.88, "step": 66880, "train_speed(iter/s)": 0.412549 }, { "acc": 0.92682095, "epoch": 1.8109820485744457, "grad_norm": 8.327614784240723, "learning_rate": 6.1837770656345676e-06, "loss": 0.39835026, "memory(GiB)": 34.88, "step": 66885, "train_speed(iter/s)": 0.412551 }, { "acc": 0.92647629, "epoch": 1.8111174288576612, "grad_norm": 11.677467346191406, "learning_rate": 6.18323339294953e-06, "loss": 0.3801224, "memory(GiB)": 34.88, "step": 66890, "train_speed(iter/s)": 0.412552 }, { "acc": 0.9088068, "epoch": 1.8112528091408766, "grad_norm": 7.2239861488342285, "learning_rate": 6.182689705448434e-06, "loss": 0.55849819, "memory(GiB)": 34.88, "step": 66895, "train_speed(iter/s)": 0.412554 }, { "acc": 0.89457626, "epoch": 1.8113881894240924, "grad_norm": 11.146787643432617, "learning_rate": 6.182146003138087e-06, "loss": 0.44657288, "memory(GiB)": 34.88, "step": 66900, "train_speed(iter/s)": 0.412556 }, { "acc": 0.89678841, "epoch": 1.8115235697073078, "grad_norm": 8.329800605773926, "learning_rate": 6.1816022860253045e-06, "loss": 0.57233629, "memory(GiB)": 34.88, "step": 66905, "train_speed(iter/s)": 0.412557 }, { "acc": 0.88863392, "epoch": 1.8116589499905233, "grad_norm": 8.525725364685059, "learning_rate": 6.181058554116893e-06, "loss": 0.65881443, "memory(GiB)": 34.88, "step": 66910, "train_speed(iter/s)": 0.412559 }, { "acc": 0.90948648, "epoch": 1.811794330273739, "grad_norm": 4.8784661293029785, "learning_rate": 6.180514807419666e-06, "loss": 0.54591107, "memory(GiB)": 34.88, "step": 66915, "train_speed(iter/s)": 0.41256 }, { "acc": 0.92713308, "epoch": 1.8119297105569545, "grad_norm": 16.740060806274414, "learning_rate": 6.179971045940436e-06, "loss": 0.36998425, "memory(GiB)": 34.88, "step": 66920, "train_speed(iter/s)": 0.412562 }, { "acc": 0.91778202, "epoch": 1.81206509084017, "grad_norm": 13.126879692077637, "learning_rate": 6.179427269686012e-06, "loss": 0.4137711, "memory(GiB)": 34.88, "step": 66925, "train_speed(iter/s)": 0.412564 }, { "acc": 0.91159821, "epoch": 1.8122004711233854, "grad_norm": 18.19464683532715, "learning_rate": 6.1788834786632105e-06, "loss": 0.50183916, "memory(GiB)": 34.88, "step": 66930, "train_speed(iter/s)": 0.412565 }, { "acc": 0.91767464, "epoch": 1.8123358514066013, "grad_norm": 10.729174613952637, "learning_rate": 6.178339672878837e-06, "loss": 0.528194, "memory(GiB)": 34.88, "step": 66935, "train_speed(iter/s)": 0.412567 }, { "acc": 0.91865444, "epoch": 1.8124712316898166, "grad_norm": 10.75705337524414, "learning_rate": 6.177795852339708e-06, "loss": 0.45916238, "memory(GiB)": 34.88, "step": 66940, "train_speed(iter/s)": 0.412568 }, { "acc": 0.89337902, "epoch": 1.8126066119730324, "grad_norm": 10.044557571411133, "learning_rate": 6.1772520170526355e-06, "loss": 0.59723821, "memory(GiB)": 34.88, "step": 66945, "train_speed(iter/s)": 0.412569 }, { "acc": 0.91691818, "epoch": 1.8127419922562478, "grad_norm": 7.3755669593811035, "learning_rate": 6.17670816702443e-06, "loss": 0.41507149, "memory(GiB)": 34.88, "step": 66950, "train_speed(iter/s)": 0.412571 }, { "acc": 0.9165658, "epoch": 1.8128773725394633, "grad_norm": 6.251662254333496, "learning_rate": 6.176164302261907e-06, "loss": 0.46885834, "memory(GiB)": 34.88, "step": 66955, "train_speed(iter/s)": 0.412573 }, { "acc": 0.91703405, "epoch": 1.813012752822679, "grad_norm": 8.140068054199219, "learning_rate": 6.175620422771879e-06, "loss": 0.55426679, "memory(GiB)": 34.88, "step": 66960, "train_speed(iter/s)": 0.412574 }, { "acc": 0.90305538, "epoch": 1.8131481331058945, "grad_norm": 5.981001853942871, "learning_rate": 6.175076528561159e-06, "loss": 0.54544783, "memory(GiB)": 34.88, "step": 66965, "train_speed(iter/s)": 0.412576 }, { "acc": 0.92737103, "epoch": 1.81328351338911, "grad_norm": 2.772202253341675, "learning_rate": 6.1745326196365594e-06, "loss": 0.41277189, "memory(GiB)": 34.88, "step": 66970, "train_speed(iter/s)": 0.412577 }, { "acc": 0.89341145, "epoch": 1.8134188936723254, "grad_norm": 7.334025859832764, "learning_rate": 6.173988696004894e-06, "loss": 0.63271418, "memory(GiB)": 34.88, "step": 66975, "train_speed(iter/s)": 0.412579 }, { "acc": 0.90477505, "epoch": 1.8135542739555413, "grad_norm": 18.91684341430664, "learning_rate": 6.173444757672978e-06, "loss": 0.56589746, "memory(GiB)": 34.88, "step": 66980, "train_speed(iter/s)": 0.41258 }, { "acc": 0.91131029, "epoch": 1.8136896542387566, "grad_norm": 6.128669261932373, "learning_rate": 6.172900804647622e-06, "loss": 0.45876946, "memory(GiB)": 34.88, "step": 66985, "train_speed(iter/s)": 0.412582 }, { "acc": 0.91013508, "epoch": 1.8138250345219722, "grad_norm": 60.14979934692383, "learning_rate": 6.172356836935644e-06, "loss": 0.45798707, "memory(GiB)": 34.88, "step": 66990, "train_speed(iter/s)": 0.412583 }, { "acc": 0.91731491, "epoch": 1.8139604148051878, "grad_norm": 6.419800281524658, "learning_rate": 6.171812854543855e-06, "loss": 0.45461416, "memory(GiB)": 34.88, "step": 66995, "train_speed(iter/s)": 0.412585 }, { "acc": 0.90323515, "epoch": 1.8140957950884034, "grad_norm": 12.640519142150879, "learning_rate": 6.171268857479071e-06, "loss": 0.5280241, "memory(GiB)": 34.88, "step": 67000, "train_speed(iter/s)": 0.412586 }, { "acc": 0.9090662, "epoch": 1.814231175371619, "grad_norm": 4.71574068069458, "learning_rate": 6.170724845748107e-06, "loss": 0.47535896, "memory(GiB)": 34.88, "step": 67005, "train_speed(iter/s)": 0.412588 }, { "acc": 0.8994195, "epoch": 1.8143665556548343, "grad_norm": 11.322307586669922, "learning_rate": 6.17018081935778e-06, "loss": 0.60676713, "memory(GiB)": 34.88, "step": 67010, "train_speed(iter/s)": 0.412589 }, { "acc": 0.90933647, "epoch": 1.81450193593805, "grad_norm": 6.756951332092285, "learning_rate": 6.169636778314899e-06, "loss": 0.49682837, "memory(GiB)": 34.88, "step": 67015, "train_speed(iter/s)": 0.41259 }, { "acc": 0.908778, "epoch": 1.8146373162212655, "grad_norm": 6.7533278465271, "learning_rate": 6.1690927226262805e-06, "loss": 0.51967363, "memory(GiB)": 34.88, "step": 67020, "train_speed(iter/s)": 0.412592 }, { "acc": 0.93996258, "epoch": 1.8147726965044813, "grad_norm": 4.907415866851807, "learning_rate": 6.168548652298746e-06, "loss": 0.35321367, "memory(GiB)": 34.88, "step": 67025, "train_speed(iter/s)": 0.412594 }, { "acc": 0.91249313, "epoch": 1.8149080767876966, "grad_norm": 8.316688537597656, "learning_rate": 6.168004567339106e-06, "loss": 0.45251026, "memory(GiB)": 34.88, "step": 67030, "train_speed(iter/s)": 0.412595 }, { "acc": 0.90726814, "epoch": 1.8150434570709122, "grad_norm": 14.57270336151123, "learning_rate": 6.1674604677541775e-06, "loss": 0.4683279, "memory(GiB)": 34.88, "step": 67035, "train_speed(iter/s)": 0.412597 }, { "acc": 0.9242075, "epoch": 1.8151788373541278, "grad_norm": 10.196670532226562, "learning_rate": 6.1669163535507736e-06, "loss": 0.44724274, "memory(GiB)": 34.88, "step": 67040, "train_speed(iter/s)": 0.412599 }, { "acc": 0.91043377, "epoch": 1.8153142176373434, "grad_norm": 16.92534637451172, "learning_rate": 6.166372224735715e-06, "loss": 0.4928298, "memory(GiB)": 34.88, "step": 67045, "train_speed(iter/s)": 0.4126 }, { "acc": 0.89552841, "epoch": 1.815449597920559, "grad_norm": 8.937854766845703, "learning_rate": 6.165828081315815e-06, "loss": 0.5013989, "memory(GiB)": 34.88, "step": 67050, "train_speed(iter/s)": 0.412601 }, { "acc": 0.91620235, "epoch": 1.8155849782037743, "grad_norm": 5.6018476486206055, "learning_rate": 6.1652839232978925e-06, "loss": 0.39431808, "memory(GiB)": 34.88, "step": 67055, "train_speed(iter/s)": 0.412603 }, { "acc": 0.89689102, "epoch": 1.81572035848699, "grad_norm": 7.667696475982666, "learning_rate": 6.164739750688761e-06, "loss": 0.550669, "memory(GiB)": 34.88, "step": 67060, "train_speed(iter/s)": 0.412604 }, { "acc": 0.91874657, "epoch": 1.8158557387702055, "grad_norm": 6.744740009307861, "learning_rate": 6.164195563495238e-06, "loss": 0.40891776, "memory(GiB)": 34.88, "step": 67065, "train_speed(iter/s)": 0.412606 }, { "acc": 0.90128574, "epoch": 1.815991119053421, "grad_norm": 11.091245651245117, "learning_rate": 6.163651361724144e-06, "loss": 0.61021786, "memory(GiB)": 34.88, "step": 67070, "train_speed(iter/s)": 0.412607 }, { "acc": 0.90499249, "epoch": 1.8161264993366366, "grad_norm": 14.853252410888672, "learning_rate": 6.163107145382291e-06, "loss": 0.54414186, "memory(GiB)": 34.88, "step": 67075, "train_speed(iter/s)": 0.412609 }, { "acc": 0.92410717, "epoch": 1.8162618796198522, "grad_norm": 9.123905181884766, "learning_rate": 6.162562914476501e-06, "loss": 0.41416497, "memory(GiB)": 34.88, "step": 67080, "train_speed(iter/s)": 0.412611 }, { "acc": 0.91021423, "epoch": 1.8163972599030678, "grad_norm": 6.121103286743164, "learning_rate": 6.162018669013588e-06, "loss": 0.58518353, "memory(GiB)": 34.88, "step": 67085, "train_speed(iter/s)": 0.412612 }, { "acc": 0.90258846, "epoch": 1.8165326401862831, "grad_norm": 11.090967178344727, "learning_rate": 6.161474409000372e-06, "loss": 0.6242075, "memory(GiB)": 34.88, "step": 67090, "train_speed(iter/s)": 0.412614 }, { "acc": 0.89331627, "epoch": 1.816668020469499, "grad_norm": 12.993759155273438, "learning_rate": 6.160930134443671e-06, "loss": 0.59967899, "memory(GiB)": 34.88, "step": 67095, "train_speed(iter/s)": 0.412615 }, { "acc": 0.89147758, "epoch": 1.8168034007527143, "grad_norm": 8.17233657836914, "learning_rate": 6.1603858453503e-06, "loss": 0.55214005, "memory(GiB)": 34.88, "step": 67100, "train_speed(iter/s)": 0.412617 }, { "acc": 0.91140366, "epoch": 1.8169387810359299, "grad_norm": 8.754916191101074, "learning_rate": 6.159841541727083e-06, "loss": 0.49435568, "memory(GiB)": 34.88, "step": 67105, "train_speed(iter/s)": 0.412618 }, { "acc": 0.89573259, "epoch": 1.8170741613191455, "grad_norm": 11.096668243408203, "learning_rate": 6.159297223580833e-06, "loss": 0.56353388, "memory(GiB)": 34.88, "step": 67110, "train_speed(iter/s)": 0.41262 }, { "acc": 0.9120842, "epoch": 1.817209541602361, "grad_norm": 9.764616012573242, "learning_rate": 6.15875289091837e-06, "loss": 0.4559238, "memory(GiB)": 34.88, "step": 67115, "train_speed(iter/s)": 0.412621 }, { "acc": 0.9069088, "epoch": 1.8173449218855766, "grad_norm": 5.46818733215332, "learning_rate": 6.158208543746515e-06, "loss": 0.51594362, "memory(GiB)": 34.88, "step": 67120, "train_speed(iter/s)": 0.412623 }, { "acc": 0.92942867, "epoch": 1.817480302168792, "grad_norm": 8.736523628234863, "learning_rate": 6.157664182072086e-06, "loss": 0.3320966, "memory(GiB)": 34.88, "step": 67125, "train_speed(iter/s)": 0.412624 }, { "acc": 0.90614567, "epoch": 1.8176156824520078, "grad_norm": 6.138152599334717, "learning_rate": 6.1571198059019e-06, "loss": 0.46233435, "memory(GiB)": 34.88, "step": 67130, "train_speed(iter/s)": 0.412626 }, { "acc": 0.91495628, "epoch": 1.8177510627352231, "grad_norm": 6.520210266113281, "learning_rate": 6.156575415242779e-06, "loss": 0.4664825, "memory(GiB)": 34.88, "step": 67135, "train_speed(iter/s)": 0.412628 }, { "acc": 0.91637506, "epoch": 1.817886443018439, "grad_norm": 9.321914672851562, "learning_rate": 6.156031010101543e-06, "loss": 0.47793407, "memory(GiB)": 34.88, "step": 67140, "train_speed(iter/s)": 0.412629 }, { "acc": 0.9240736, "epoch": 1.8180218233016543, "grad_norm": 3.8940744400024414, "learning_rate": 6.155486590485009e-06, "loss": 0.41199117, "memory(GiB)": 34.88, "step": 67145, "train_speed(iter/s)": 0.412631 }, { "acc": 0.91928549, "epoch": 1.8181572035848699, "grad_norm": 5.34244441986084, "learning_rate": 6.154942156399999e-06, "loss": 0.47674561, "memory(GiB)": 34.88, "step": 67150, "train_speed(iter/s)": 0.412632 }, { "acc": 0.91097631, "epoch": 1.8182925838680855, "grad_norm": 8.187533378601074, "learning_rate": 6.1543977078533325e-06, "loss": 0.50407867, "memory(GiB)": 34.88, "step": 67155, "train_speed(iter/s)": 0.412634 }, { "acc": 0.89167976, "epoch": 1.818427964151301, "grad_norm": 8.537041664123535, "learning_rate": 6.1538532448518296e-06, "loss": 0.58997245, "memory(GiB)": 34.88, "step": 67160, "train_speed(iter/s)": 0.412635 }, { "acc": 0.90878325, "epoch": 1.8185633444345166, "grad_norm": 5.143852710723877, "learning_rate": 6.1533087674023095e-06, "loss": 0.49384856, "memory(GiB)": 34.88, "step": 67165, "train_speed(iter/s)": 0.412637 }, { "acc": 0.91516733, "epoch": 1.818698724717732, "grad_norm": 4.916680335998535, "learning_rate": 6.152764275511597e-06, "loss": 0.48420916, "memory(GiB)": 34.88, "step": 67170, "train_speed(iter/s)": 0.412639 }, { "acc": 0.91813641, "epoch": 1.8188341050009478, "grad_norm": 11.673165321350098, "learning_rate": 6.152219769186508e-06, "loss": 0.46855707, "memory(GiB)": 34.88, "step": 67175, "train_speed(iter/s)": 0.41264 }, { "acc": 0.8854744, "epoch": 1.8189694852841631, "grad_norm": 15.411827087402344, "learning_rate": 6.151675248433867e-06, "loss": 0.67949915, "memory(GiB)": 34.88, "step": 67180, "train_speed(iter/s)": 0.412642 }, { "acc": 0.92192068, "epoch": 1.8191048655673787, "grad_norm": 6.223111629486084, "learning_rate": 6.151130713260495e-06, "loss": 0.38372879, "memory(GiB)": 34.88, "step": 67185, "train_speed(iter/s)": 0.412643 }, { "acc": 0.92474079, "epoch": 1.8192402458505943, "grad_norm": 3.071939468383789, "learning_rate": 6.1505861636732115e-06, "loss": 0.43254251, "memory(GiB)": 34.88, "step": 67190, "train_speed(iter/s)": 0.412645 }, { "acc": 0.90799942, "epoch": 1.8193756261338099, "grad_norm": 6.276175498962402, "learning_rate": 6.150041599678838e-06, "loss": 0.4904623, "memory(GiB)": 34.88, "step": 67195, "train_speed(iter/s)": 0.412646 }, { "acc": 0.89837475, "epoch": 1.8195110064170255, "grad_norm": 9.917527198791504, "learning_rate": 6.149497021284198e-06, "loss": 0.611269, "memory(GiB)": 34.88, "step": 67200, "train_speed(iter/s)": 0.412648 }, { "acc": 0.90514393, "epoch": 1.8196463867002408, "grad_norm": 6.946568965911865, "learning_rate": 6.148952428496111e-06, "loss": 0.49897227, "memory(GiB)": 34.88, "step": 67205, "train_speed(iter/s)": 0.41265 }, { "acc": 0.93711739, "epoch": 1.8197817669834566, "grad_norm": 10.953109741210938, "learning_rate": 6.148407821321401e-06, "loss": 0.32151976, "memory(GiB)": 34.88, "step": 67210, "train_speed(iter/s)": 0.412651 }, { "acc": 0.88643751, "epoch": 1.819917147266672, "grad_norm": 22.274072647094727, "learning_rate": 6.147863199766891e-06, "loss": 0.73945045, "memory(GiB)": 34.88, "step": 67215, "train_speed(iter/s)": 0.412652 }, { "acc": 0.9229023, "epoch": 1.8200525275498878, "grad_norm": 10.369048118591309, "learning_rate": 6.147318563839403e-06, "loss": 0.396737, "memory(GiB)": 34.88, "step": 67220, "train_speed(iter/s)": 0.412654 }, { "acc": 0.90711222, "epoch": 1.8201879078331031, "grad_norm": 4.804958820343018, "learning_rate": 6.1467739135457585e-06, "loss": 0.47641029, "memory(GiB)": 34.88, "step": 67225, "train_speed(iter/s)": 0.412656 }, { "acc": 0.90710764, "epoch": 1.8203232881163187, "grad_norm": 6.495536804199219, "learning_rate": 6.146229248892781e-06, "loss": 0.52002306, "memory(GiB)": 34.88, "step": 67230, "train_speed(iter/s)": 0.412657 }, { "acc": 0.91572714, "epoch": 1.8204586683995343, "grad_norm": 7.393073081970215, "learning_rate": 6.145684569887291e-06, "loss": 0.42113819, "memory(GiB)": 34.88, "step": 67235, "train_speed(iter/s)": 0.412659 }, { "acc": 0.91080379, "epoch": 1.8205940486827499, "grad_norm": 10.19462776184082, "learning_rate": 6.145139876536117e-06, "loss": 0.48165808, "memory(GiB)": 34.88, "step": 67240, "train_speed(iter/s)": 0.412661 }, { "acc": 0.92770472, "epoch": 1.8207294289659655, "grad_norm": 6.373382568359375, "learning_rate": 6.144595168846076e-06, "loss": 0.38776946, "memory(GiB)": 34.88, "step": 67245, "train_speed(iter/s)": 0.412662 }, { "acc": 0.92760744, "epoch": 1.8208648092491808, "grad_norm": 4.086745262145996, "learning_rate": 6.144050446823997e-06, "loss": 0.3719836, "memory(GiB)": 34.88, "step": 67250, "train_speed(iter/s)": 0.412664 }, { "acc": 0.89314365, "epoch": 1.8210001895323966, "grad_norm": 6.591683387756348, "learning_rate": 6.143505710476701e-06, "loss": 0.55637956, "memory(GiB)": 34.88, "step": 67255, "train_speed(iter/s)": 0.412665 }, { "acc": 0.89313049, "epoch": 1.821135569815612, "grad_norm": 12.704277992248535, "learning_rate": 6.142960959811015e-06, "loss": 0.66872878, "memory(GiB)": 34.88, "step": 67260, "train_speed(iter/s)": 0.412666 }, { "acc": 0.92927465, "epoch": 1.8212709500988276, "grad_norm": 10.92532730102539, "learning_rate": 6.142416194833759e-06, "loss": 0.37483816, "memory(GiB)": 34.88, "step": 67265, "train_speed(iter/s)": 0.412668 }, { "acc": 0.9191927, "epoch": 1.8214063303820431, "grad_norm": 8.761740684509277, "learning_rate": 6.141871415551758e-06, "loss": 0.45492215, "memory(GiB)": 34.88, "step": 67270, "train_speed(iter/s)": 0.412669 }, { "acc": 0.90768719, "epoch": 1.8215417106652587, "grad_norm": 10.05270767211914, "learning_rate": 6.141326621971837e-06, "loss": 0.52846303, "memory(GiB)": 34.88, "step": 67275, "train_speed(iter/s)": 0.412671 }, { "acc": 0.89482346, "epoch": 1.8216770909484743, "grad_norm": 15.828696250915527, "learning_rate": 6.1407818141008215e-06, "loss": 0.53453999, "memory(GiB)": 34.88, "step": 67280, "train_speed(iter/s)": 0.412672 }, { "acc": 0.91210995, "epoch": 1.8218124712316897, "grad_norm": 11.761829376220703, "learning_rate": 6.140236991945534e-06, "loss": 0.47603378, "memory(GiB)": 34.88, "step": 67285, "train_speed(iter/s)": 0.412674 }, { "acc": 0.90582037, "epoch": 1.8219478515149055, "grad_norm": 16.5937442779541, "learning_rate": 6.139692155512803e-06, "loss": 0.58460808, "memory(GiB)": 34.88, "step": 67290, "train_speed(iter/s)": 0.412675 }, { "acc": 0.91569729, "epoch": 1.8220832317981208, "grad_norm": 6.013371467590332, "learning_rate": 6.139147304809451e-06, "loss": 0.41324539, "memory(GiB)": 34.88, "step": 67295, "train_speed(iter/s)": 0.412677 }, { "acc": 0.92750378, "epoch": 1.8222186120813366, "grad_norm": 11.259642601013184, "learning_rate": 6.138602439842305e-06, "loss": 0.43367362, "memory(GiB)": 34.88, "step": 67300, "train_speed(iter/s)": 0.412678 }, { "acc": 0.90769567, "epoch": 1.822353992364552, "grad_norm": 5.033875465393066, "learning_rate": 6.138057560618188e-06, "loss": 0.48201594, "memory(GiB)": 34.88, "step": 67305, "train_speed(iter/s)": 0.41268 }, { "acc": 0.93391123, "epoch": 1.8224893726477676, "grad_norm": 8.216086387634277, "learning_rate": 6.13751266714393e-06, "loss": 0.38261013, "memory(GiB)": 34.88, "step": 67310, "train_speed(iter/s)": 0.412682 }, { "acc": 0.93135967, "epoch": 1.8226247529309831, "grad_norm": 9.744989395141602, "learning_rate": 6.136967759426351e-06, "loss": 0.32638955, "memory(GiB)": 34.88, "step": 67315, "train_speed(iter/s)": 0.412683 }, { "acc": 0.92447319, "epoch": 1.8227601332141987, "grad_norm": 10.356618881225586, "learning_rate": 6.136422837472281e-06, "loss": 0.42483788, "memory(GiB)": 34.88, "step": 67320, "train_speed(iter/s)": 0.412685 }, { "acc": 0.91618118, "epoch": 1.8228955134974143, "grad_norm": 20.654611587524414, "learning_rate": 6.135877901288546e-06, "loss": 0.45045576, "memory(GiB)": 34.88, "step": 67325, "train_speed(iter/s)": 0.412686 }, { "acc": 0.91839027, "epoch": 1.8230308937806297, "grad_norm": 8.145979881286621, "learning_rate": 6.135332950881971e-06, "loss": 0.42019978, "memory(GiB)": 34.88, "step": 67330, "train_speed(iter/s)": 0.412688 }, { "acc": 0.91030884, "epoch": 1.8231662740638455, "grad_norm": 8.909684181213379, "learning_rate": 6.134787986259385e-06, "loss": 0.45719461, "memory(GiB)": 34.88, "step": 67335, "train_speed(iter/s)": 0.412689 }, { "acc": 0.89929457, "epoch": 1.8233016543470608, "grad_norm": 20.514259338378906, "learning_rate": 6.134243007427611e-06, "loss": 0.55911531, "memory(GiB)": 34.88, "step": 67340, "train_speed(iter/s)": 0.412691 }, { "acc": 0.92700443, "epoch": 1.8234370346302764, "grad_norm": 7.772326946258545, "learning_rate": 6.133698014393481e-06, "loss": 0.4200943, "memory(GiB)": 34.88, "step": 67345, "train_speed(iter/s)": 0.412692 }, { "acc": 0.89381351, "epoch": 1.823572414913492, "grad_norm": 5.536938190460205, "learning_rate": 6.133153007163816e-06, "loss": 0.586273, "memory(GiB)": 34.88, "step": 67350, "train_speed(iter/s)": 0.412694 }, { "acc": 0.91363525, "epoch": 1.8237077951967076, "grad_norm": 7.506486415863037, "learning_rate": 6.132607985745449e-06, "loss": 0.52911901, "memory(GiB)": 34.88, "step": 67355, "train_speed(iter/s)": 0.412695 }, { "acc": 0.8951129, "epoch": 1.8238431754799231, "grad_norm": 7.913441181182861, "learning_rate": 6.132062950145202e-06, "loss": 0.52961178, "memory(GiB)": 34.88, "step": 67360, "train_speed(iter/s)": 0.412697 }, { "acc": 0.90985813, "epoch": 1.8239785557631385, "grad_norm": 6.042026996612549, "learning_rate": 6.131517900369907e-06, "loss": 0.55258231, "memory(GiB)": 34.88, "step": 67365, "train_speed(iter/s)": 0.412698 }, { "acc": 0.91291161, "epoch": 1.8241139360463543, "grad_norm": 7.907186508178711, "learning_rate": 6.130972836426391e-06, "loss": 0.46522923, "memory(GiB)": 34.88, "step": 67370, "train_speed(iter/s)": 0.4127 }, { "acc": 0.92007961, "epoch": 1.8242493163295697, "grad_norm": 13.787595748901367, "learning_rate": 6.13042775832148e-06, "loss": 0.39963787, "memory(GiB)": 34.88, "step": 67375, "train_speed(iter/s)": 0.412701 }, { "acc": 0.91861706, "epoch": 1.8243846966127855, "grad_norm": 8.674821853637695, "learning_rate": 6.1298826660620045e-06, "loss": 0.44066534, "memory(GiB)": 34.88, "step": 67380, "train_speed(iter/s)": 0.412703 }, { "acc": 0.91097269, "epoch": 1.8245200768960008, "grad_norm": 3.918273448944092, "learning_rate": 6.129337559654793e-06, "loss": 0.5045639, "memory(GiB)": 34.88, "step": 67385, "train_speed(iter/s)": 0.412704 }, { "acc": 0.93041725, "epoch": 1.8246554571792164, "grad_norm": 8.088457107543945, "learning_rate": 6.128792439106671e-06, "loss": 0.33558826, "memory(GiB)": 34.88, "step": 67390, "train_speed(iter/s)": 0.412706 }, { "acc": 0.90667419, "epoch": 1.824790837462432, "grad_norm": 11.77744197845459, "learning_rate": 6.128247304424469e-06, "loss": 0.59859695, "memory(GiB)": 34.88, "step": 67395, "train_speed(iter/s)": 0.412707 }, { "acc": 0.89778605, "epoch": 1.8249262177456476, "grad_norm": 13.805932998657227, "learning_rate": 6.127702155615016e-06, "loss": 0.55446448, "memory(GiB)": 34.88, "step": 67400, "train_speed(iter/s)": 0.412709 }, { "acc": 0.9230298, "epoch": 1.8250615980288631, "grad_norm": 4.978702545166016, "learning_rate": 6.127156992685142e-06, "loss": 0.41265182, "memory(GiB)": 34.88, "step": 67405, "train_speed(iter/s)": 0.412711 }, { "acc": 0.91977425, "epoch": 1.8251969783120785, "grad_norm": 13.509062767028809, "learning_rate": 6.126611815641675e-06, "loss": 0.43031683, "memory(GiB)": 34.88, "step": 67410, "train_speed(iter/s)": 0.412712 }, { "acc": 0.91515198, "epoch": 1.8253323585952943, "grad_norm": 6.880367279052734, "learning_rate": 6.126066624491446e-06, "loss": 0.46696625, "memory(GiB)": 34.88, "step": 67415, "train_speed(iter/s)": 0.412714 }, { "acc": 0.90902338, "epoch": 1.8254677388785097, "grad_norm": 5.6025390625, "learning_rate": 6.1255214192412815e-06, "loss": 0.39342039, "memory(GiB)": 34.88, "step": 67420, "train_speed(iter/s)": 0.412715 }, { "acc": 0.91173992, "epoch": 1.8256031191617252, "grad_norm": 10.688124656677246, "learning_rate": 6.124976199898014e-06, "loss": 0.44872551, "memory(GiB)": 34.88, "step": 67425, "train_speed(iter/s)": 0.412717 }, { "acc": 0.92457342, "epoch": 1.8257384994449408, "grad_norm": 5.314393043518066, "learning_rate": 6.124430966468472e-06, "loss": 0.35317259, "memory(GiB)": 34.88, "step": 67430, "train_speed(iter/s)": 0.412718 }, { "acc": 0.90703754, "epoch": 1.8258738797281564, "grad_norm": 2.4318084716796875, "learning_rate": 6.123885718959486e-06, "loss": 0.57163868, "memory(GiB)": 34.88, "step": 67435, "train_speed(iter/s)": 0.41272 }, { "acc": 0.91632776, "epoch": 1.826009260011372, "grad_norm": 13.289328575134277, "learning_rate": 6.123340457377888e-06, "loss": 0.46894665, "memory(GiB)": 34.88, "step": 67440, "train_speed(iter/s)": 0.412721 }, { "acc": 0.90766602, "epoch": 1.8261446402945873, "grad_norm": 6.741940975189209, "learning_rate": 6.122795181730504e-06, "loss": 0.47597895, "memory(GiB)": 34.88, "step": 67445, "train_speed(iter/s)": 0.412723 }, { "acc": 0.92674828, "epoch": 1.8262800205778031, "grad_norm": 5.565876007080078, "learning_rate": 6.122249892024169e-06, "loss": 0.39223247, "memory(GiB)": 34.88, "step": 67450, "train_speed(iter/s)": 0.412724 }, { "acc": 0.92930489, "epoch": 1.8264154008610185, "grad_norm": 8.02164363861084, "learning_rate": 6.121704588265712e-06, "loss": 0.38345926, "memory(GiB)": 34.88, "step": 67455, "train_speed(iter/s)": 0.412726 }, { "acc": 0.91933022, "epoch": 1.8265507811442343, "grad_norm": 7.997905731201172, "learning_rate": 6.121159270461966e-06, "loss": 0.46494398, "memory(GiB)": 34.88, "step": 67460, "train_speed(iter/s)": 0.412727 }, { "acc": 0.91734409, "epoch": 1.8266861614274497, "grad_norm": 7.805574417114258, "learning_rate": 6.120613938619759e-06, "loss": 0.43541465, "memory(GiB)": 34.88, "step": 67465, "train_speed(iter/s)": 0.412729 }, { "acc": 0.91098452, "epoch": 1.8268215417106652, "grad_norm": 40.766788482666016, "learning_rate": 6.120068592745924e-06, "loss": 0.45712767, "memory(GiB)": 34.88, "step": 67470, "train_speed(iter/s)": 0.41273 }, { "acc": 0.91779737, "epoch": 1.8269569219938808, "grad_norm": 5.725094318389893, "learning_rate": 6.119523232847293e-06, "loss": 0.44183187, "memory(GiB)": 34.88, "step": 67475, "train_speed(iter/s)": 0.412731 }, { "acc": 0.93666229, "epoch": 1.8270923022770964, "grad_norm": 5.45824670791626, "learning_rate": 6.118977858930696e-06, "loss": 0.32863736, "memory(GiB)": 34.88, "step": 67480, "train_speed(iter/s)": 0.412732 }, { "acc": 0.91075363, "epoch": 1.827227682560312, "grad_norm": 9.866731643676758, "learning_rate": 6.1184324710029675e-06, "loss": 0.44619513, "memory(GiB)": 34.88, "step": 67485, "train_speed(iter/s)": 0.412734 }, { "acc": 0.92918701, "epoch": 1.8273630628435273, "grad_norm": 7.693456649780273, "learning_rate": 6.117887069070937e-06, "loss": 0.4041008, "memory(GiB)": 34.88, "step": 67490, "train_speed(iter/s)": 0.412735 }, { "acc": 0.88504019, "epoch": 1.8274984431267431, "grad_norm": 12.817150115966797, "learning_rate": 6.1173416531414385e-06, "loss": 0.63681383, "memory(GiB)": 34.88, "step": 67495, "train_speed(iter/s)": 0.412737 }, { "acc": 0.89817715, "epoch": 1.8276338234099585, "grad_norm": 8.674373626708984, "learning_rate": 6.116796223221303e-06, "loss": 0.52822723, "memory(GiB)": 34.88, "step": 67500, "train_speed(iter/s)": 0.412738 }, { "acc": 0.9109272, "epoch": 1.827769203693174, "grad_norm": 8.402680397033691, "learning_rate": 6.116250779317364e-06, "loss": 0.44866242, "memory(GiB)": 34.88, "step": 67505, "train_speed(iter/s)": 0.41274 }, { "acc": 0.89872227, "epoch": 1.8279045839763897, "grad_norm": 9.035622596740723, "learning_rate": 6.115705321436455e-06, "loss": 0.55831032, "memory(GiB)": 34.88, "step": 67510, "train_speed(iter/s)": 0.412741 }, { "acc": 0.89702044, "epoch": 1.8280399642596052, "grad_norm": 5.955443382263184, "learning_rate": 6.115159849585407e-06, "loss": 0.54970951, "memory(GiB)": 34.88, "step": 67515, "train_speed(iter/s)": 0.412742 }, { "acc": 0.89897366, "epoch": 1.8281753445428208, "grad_norm": 6.122821807861328, "learning_rate": 6.114614363771055e-06, "loss": 0.47165194, "memory(GiB)": 34.88, "step": 67520, "train_speed(iter/s)": 0.412744 }, { "acc": 0.90979881, "epoch": 1.8283107248260362, "grad_norm": 10.357109069824219, "learning_rate": 6.11406886400023e-06, "loss": 0.4789505, "memory(GiB)": 34.88, "step": 67525, "train_speed(iter/s)": 0.412745 }, { "acc": 0.90681267, "epoch": 1.828446105109252, "grad_norm": 2.778017282485962, "learning_rate": 6.1135233502797676e-06, "loss": 0.51202812, "memory(GiB)": 34.88, "step": 67530, "train_speed(iter/s)": 0.412747 }, { "acc": 0.91742563, "epoch": 1.8285814853924673, "grad_norm": 6.3062567710876465, "learning_rate": 6.1129778226165e-06, "loss": 0.41178212, "memory(GiB)": 34.88, "step": 67535, "train_speed(iter/s)": 0.412748 }, { "acc": 0.90675793, "epoch": 1.8287168656756831, "grad_norm": 9.047345161437988, "learning_rate": 6.112432281017263e-06, "loss": 0.54743729, "memory(GiB)": 34.88, "step": 67540, "train_speed(iter/s)": 0.41275 }, { "acc": 0.91780577, "epoch": 1.8288522459588985, "grad_norm": 11.197502136230469, "learning_rate": 6.111886725488888e-06, "loss": 0.40189362, "memory(GiB)": 34.88, "step": 67545, "train_speed(iter/s)": 0.412751 }, { "acc": 0.89715643, "epoch": 1.828987626242114, "grad_norm": 3.876779794692993, "learning_rate": 6.11134115603821e-06, "loss": 0.64868007, "memory(GiB)": 34.88, "step": 67550, "train_speed(iter/s)": 0.412753 }, { "acc": 0.91601334, "epoch": 1.8291230065253297, "grad_norm": 7.383938312530518, "learning_rate": 6.110795572672066e-06, "loss": 0.47097168, "memory(GiB)": 34.88, "step": 67555, "train_speed(iter/s)": 0.412754 }, { "acc": 0.92442207, "epoch": 1.8292583868085452, "grad_norm": 7.103622913360596, "learning_rate": 6.110249975397286e-06, "loss": 0.43355579, "memory(GiB)": 34.88, "step": 67560, "train_speed(iter/s)": 0.412756 }, { "acc": 0.90950813, "epoch": 1.8293937670917608, "grad_norm": 7.470798492431641, "learning_rate": 6.1097043642207074e-06, "loss": 0.46912727, "memory(GiB)": 34.88, "step": 67565, "train_speed(iter/s)": 0.412757 }, { "acc": 0.92607622, "epoch": 1.8295291473749762, "grad_norm": 4.892100811004639, "learning_rate": 6.1091587391491644e-06, "loss": 0.36150134, "memory(GiB)": 34.88, "step": 67570, "train_speed(iter/s)": 0.412759 }, { "acc": 0.90515127, "epoch": 1.829664527658192, "grad_norm": 12.109955787658691, "learning_rate": 6.108613100189493e-06, "loss": 0.47782068, "memory(GiB)": 34.88, "step": 67575, "train_speed(iter/s)": 0.41276 }, { "acc": 0.8977047, "epoch": 1.8297999079414073, "grad_norm": 8.449910163879395, "learning_rate": 6.108067447348527e-06, "loss": 0.51937418, "memory(GiB)": 34.88, "step": 67580, "train_speed(iter/s)": 0.412762 }, { "acc": 0.90765676, "epoch": 1.829935288224623, "grad_norm": 7.61012077331543, "learning_rate": 6.107521780633102e-06, "loss": 0.54219303, "memory(GiB)": 34.88, "step": 67585, "train_speed(iter/s)": 0.412763 }, { "acc": 0.92446423, "epoch": 1.8300706685078385, "grad_norm": 6.40854549407959, "learning_rate": 6.106976100050055e-06, "loss": 0.41742516, "memory(GiB)": 34.88, "step": 67590, "train_speed(iter/s)": 0.412765 }, { "acc": 0.9235198, "epoch": 1.830206048791054, "grad_norm": 6.239317893981934, "learning_rate": 6.10643040560622e-06, "loss": 0.43045464, "memory(GiB)": 34.88, "step": 67595, "train_speed(iter/s)": 0.412766 }, { "acc": 0.92327852, "epoch": 1.8303414290742697, "grad_norm": 6.854902744293213, "learning_rate": 6.105884697308434e-06, "loss": 0.40011625, "memory(GiB)": 34.88, "step": 67600, "train_speed(iter/s)": 0.412768 }, { "acc": 0.9066309, "epoch": 1.830476809357485, "grad_norm": 16.7785701751709, "learning_rate": 6.105338975163532e-06, "loss": 0.45284147, "memory(GiB)": 34.88, "step": 67605, "train_speed(iter/s)": 0.412769 }, { "acc": 0.92026634, "epoch": 1.8306121896407008, "grad_norm": 27.48094940185547, "learning_rate": 6.104793239178351e-06, "loss": 0.47820683, "memory(GiB)": 34.88, "step": 67610, "train_speed(iter/s)": 0.412771 }, { "acc": 0.91656914, "epoch": 1.8307475699239162, "grad_norm": 10.89656925201416, "learning_rate": 6.104247489359727e-06, "loss": 0.52273564, "memory(GiB)": 34.88, "step": 67615, "train_speed(iter/s)": 0.412772 }, { "acc": 0.9038784, "epoch": 1.830882950207132, "grad_norm": 10.95230484008789, "learning_rate": 6.1037017257144985e-06, "loss": 0.55102129, "memory(GiB)": 34.88, "step": 67620, "train_speed(iter/s)": 0.412774 }, { "acc": 0.91702976, "epoch": 1.8310183304903473, "grad_norm": 10.39207649230957, "learning_rate": 6.103155948249499e-06, "loss": 0.44299049, "memory(GiB)": 34.88, "step": 67625, "train_speed(iter/s)": 0.412775 }, { "acc": 0.90772686, "epoch": 1.831153710773563, "grad_norm": 9.902190208435059, "learning_rate": 6.102610156971564e-06, "loss": 0.5492197, "memory(GiB)": 34.88, "step": 67630, "train_speed(iter/s)": 0.412777 }, { "acc": 0.92875051, "epoch": 1.8312890910567785, "grad_norm": 9.038835525512695, "learning_rate": 6.102064351887538e-06, "loss": 0.38650827, "memory(GiB)": 34.88, "step": 67635, "train_speed(iter/s)": 0.412778 }, { "acc": 0.90609074, "epoch": 1.831424471339994, "grad_norm": 11.190821647644043, "learning_rate": 6.101518533004255e-06, "loss": 0.49483614, "memory(GiB)": 34.88, "step": 67640, "train_speed(iter/s)": 0.41278 }, { "acc": 0.90125828, "epoch": 1.8315598516232097, "grad_norm": 21.568431854248047, "learning_rate": 6.100972700328548e-06, "loss": 0.6421701, "memory(GiB)": 34.88, "step": 67645, "train_speed(iter/s)": 0.412781 }, { "acc": 0.92009411, "epoch": 1.831695231906425, "grad_norm": 13.468905448913574, "learning_rate": 6.100426853867259e-06, "loss": 0.48500185, "memory(GiB)": 34.88, "step": 67650, "train_speed(iter/s)": 0.412783 }, { "acc": 0.88489323, "epoch": 1.8318306121896408, "grad_norm": 8.427945137023926, "learning_rate": 6.099880993627225e-06, "loss": 0.76739712, "memory(GiB)": 34.88, "step": 67655, "train_speed(iter/s)": 0.412784 }, { "acc": 0.92377357, "epoch": 1.8319659924728562, "grad_norm": 8.53887939453125, "learning_rate": 6.099335119615282e-06, "loss": 0.37187917, "memory(GiB)": 34.88, "step": 67660, "train_speed(iter/s)": 0.412786 }, { "acc": 0.9171361, "epoch": 1.8321013727560718, "grad_norm": 6.987905025482178, "learning_rate": 6.0987892318382725e-06, "loss": 0.4458106, "memory(GiB)": 34.88, "step": 67665, "train_speed(iter/s)": 0.412787 }, { "acc": 0.91885891, "epoch": 1.8322367530392873, "grad_norm": 6.721426010131836, "learning_rate": 6.098243330303032e-06, "loss": 0.43443336, "memory(GiB)": 34.88, "step": 67670, "train_speed(iter/s)": 0.412789 }, { "acc": 0.89931822, "epoch": 1.832372133322503, "grad_norm": 10.399279594421387, "learning_rate": 6.097697415016398e-06, "loss": 0.53186283, "memory(GiB)": 34.88, "step": 67675, "train_speed(iter/s)": 0.41279 }, { "acc": 0.92559032, "epoch": 1.8325075136057185, "grad_norm": 5.773233413696289, "learning_rate": 6.097151485985212e-06, "loss": 0.41693945, "memory(GiB)": 34.88, "step": 67680, "train_speed(iter/s)": 0.412792 }, { "acc": 0.91343765, "epoch": 1.8326428938889339, "grad_norm": 6.311388969421387, "learning_rate": 6.096605543216311e-06, "loss": 0.46973219, "memory(GiB)": 34.88, "step": 67685, "train_speed(iter/s)": 0.412793 }, { "acc": 0.91636295, "epoch": 1.8327782741721497, "grad_norm": 6.560277462005615, "learning_rate": 6.096059586716533e-06, "loss": 0.4542901, "memory(GiB)": 34.88, "step": 67690, "train_speed(iter/s)": 0.412795 }, { "acc": 0.8905798, "epoch": 1.832913654455365, "grad_norm": 9.432458877563477, "learning_rate": 6.095513616492719e-06, "loss": 0.66797662, "memory(GiB)": 34.88, "step": 67695, "train_speed(iter/s)": 0.412796 }, { "acc": 0.92234278, "epoch": 1.8330490347385808, "grad_norm": 19.7568359375, "learning_rate": 6.094967632551706e-06, "loss": 0.4538765, "memory(GiB)": 34.88, "step": 67700, "train_speed(iter/s)": 0.412798 }, { "acc": 0.90684757, "epoch": 1.8331844150217962, "grad_norm": 7.542765140533447, "learning_rate": 6.0944216349003375e-06, "loss": 0.49243584, "memory(GiB)": 34.88, "step": 67705, "train_speed(iter/s)": 0.412799 }, { "acc": 0.92414465, "epoch": 1.8333197953050118, "grad_norm": 6.881198406219482, "learning_rate": 6.093875623545449e-06, "loss": 0.45442491, "memory(GiB)": 34.88, "step": 67710, "train_speed(iter/s)": 0.412801 }, { "acc": 0.92036934, "epoch": 1.8334551755882273, "grad_norm": 7.96991491317749, "learning_rate": 6.093329598493886e-06, "loss": 0.36962199, "memory(GiB)": 34.88, "step": 67715, "train_speed(iter/s)": 0.412802 }, { "acc": 0.9230361, "epoch": 1.833590555871443, "grad_norm": 4.314452648162842, "learning_rate": 6.092783559752481e-06, "loss": 0.377159, "memory(GiB)": 34.88, "step": 67720, "train_speed(iter/s)": 0.412804 }, { "acc": 0.89457016, "epoch": 1.8337259361546585, "grad_norm": 7.395750999450684, "learning_rate": 6.092237507328082e-06, "loss": 0.60094199, "memory(GiB)": 34.88, "step": 67725, "train_speed(iter/s)": 0.412805 }, { "acc": 0.90750561, "epoch": 1.8338613164378739, "grad_norm": 12.079025268554688, "learning_rate": 6.091691441227522e-06, "loss": 0.5369997, "memory(GiB)": 34.88, "step": 67730, "train_speed(iter/s)": 0.412807 }, { "acc": 0.91184473, "epoch": 1.8339966967210897, "grad_norm": 10.978897094726562, "learning_rate": 6.091145361457646e-06, "loss": 0.43292089, "memory(GiB)": 34.88, "step": 67735, "train_speed(iter/s)": 0.412809 }, { "acc": 0.91023397, "epoch": 1.834132077004305, "grad_norm": 6.38389253616333, "learning_rate": 6.090599268025293e-06, "loss": 0.46168222, "memory(GiB)": 34.88, "step": 67740, "train_speed(iter/s)": 0.41281 }, { "acc": 0.91306515, "epoch": 1.8342674572875206, "grad_norm": 27.346359252929688, "learning_rate": 6.0900531609373045e-06, "loss": 0.43320651, "memory(GiB)": 34.88, "step": 67745, "train_speed(iter/s)": 0.412812 }, { "acc": 0.92267017, "epoch": 1.8344028375707362, "grad_norm": 9.668018341064453, "learning_rate": 6.089507040200524e-06, "loss": 0.38269804, "memory(GiB)": 34.88, "step": 67750, "train_speed(iter/s)": 0.412813 }, { "acc": 0.90273209, "epoch": 1.8345382178539518, "grad_norm": 17.734920501708984, "learning_rate": 6.088960905821789e-06, "loss": 0.49780445, "memory(GiB)": 34.88, "step": 67755, "train_speed(iter/s)": 0.412815 }, { "acc": 0.9066473, "epoch": 1.8346735981371673, "grad_norm": 12.289665222167969, "learning_rate": 6.088414757807943e-06, "loss": 0.51866627, "memory(GiB)": 34.88, "step": 67760, "train_speed(iter/s)": 0.412816 }, { "acc": 0.92721348, "epoch": 1.8348089784203827, "grad_norm": 4.775721073150635, "learning_rate": 6.087868596165827e-06, "loss": 0.38024204, "memory(GiB)": 34.88, "step": 67765, "train_speed(iter/s)": 0.412818 }, { "acc": 0.9183857, "epoch": 1.8349443587035985, "grad_norm": 6.673262596130371, "learning_rate": 6.08732242090228e-06, "loss": 0.38411336, "memory(GiB)": 34.88, "step": 67770, "train_speed(iter/s)": 0.412819 }, { "acc": 0.9279829, "epoch": 1.8350797389868139, "grad_norm": 14.966270446777344, "learning_rate": 6.086776232024151e-06, "loss": 0.43862867, "memory(GiB)": 34.88, "step": 67775, "train_speed(iter/s)": 0.41282 }, { "acc": 0.93486023, "epoch": 1.8352151192700297, "grad_norm": 5.847276210784912, "learning_rate": 6.086230029538274e-06, "loss": 0.33022738, "memory(GiB)": 34.88, "step": 67780, "train_speed(iter/s)": 0.412822 }, { "acc": 0.92481918, "epoch": 1.835350499553245, "grad_norm": 9.96356201171875, "learning_rate": 6.085683813451496e-06, "loss": 0.36877418, "memory(GiB)": 34.88, "step": 67785, "train_speed(iter/s)": 0.412823 }, { "acc": 0.91984186, "epoch": 1.8354858798364606, "grad_norm": 6.836309432983398, "learning_rate": 6.0851375837706586e-06, "loss": 0.40314913, "memory(GiB)": 34.88, "step": 67790, "train_speed(iter/s)": 0.412825 }, { "acc": 0.91337051, "epoch": 1.8356212601196762, "grad_norm": 11.741448402404785, "learning_rate": 6.084591340502604e-06, "loss": 0.46143751, "memory(GiB)": 34.88, "step": 67795, "train_speed(iter/s)": 0.412826 }, { "acc": 0.92510166, "epoch": 1.8357566404028918, "grad_norm": 6.070680618286133, "learning_rate": 6.084045083654174e-06, "loss": 0.40353374, "memory(GiB)": 34.88, "step": 67800, "train_speed(iter/s)": 0.412827 }, { "acc": 0.92057772, "epoch": 1.8358920206861074, "grad_norm": 8.761919975280762, "learning_rate": 6.083498813232216e-06, "loss": 0.41109986, "memory(GiB)": 34.88, "step": 67805, "train_speed(iter/s)": 0.412829 }, { "acc": 0.9019227, "epoch": 1.8360274009693227, "grad_norm": 10.582243919372559, "learning_rate": 6.082952529243566e-06, "loss": 0.54039788, "memory(GiB)": 34.88, "step": 67810, "train_speed(iter/s)": 0.41283 }, { "acc": 0.91590919, "epoch": 1.8361627812525385, "grad_norm": 4.212823867797852, "learning_rate": 6.0824062316950725e-06, "loss": 0.41134152, "memory(GiB)": 34.88, "step": 67815, "train_speed(iter/s)": 0.412832 }, { "acc": 0.90713282, "epoch": 1.8362981615357539, "grad_norm": 8.85307788848877, "learning_rate": 6.081859920593579e-06, "loss": 0.52728443, "memory(GiB)": 34.88, "step": 67820, "train_speed(iter/s)": 0.412833 }, { "acc": 0.89888592, "epoch": 1.8364335418189695, "grad_norm": 12.7582368850708, "learning_rate": 6.081313595945926e-06, "loss": 0.52047563, "memory(GiB)": 34.88, "step": 67825, "train_speed(iter/s)": 0.412834 }, { "acc": 0.89381542, "epoch": 1.836568922102185, "grad_norm": 10.255510330200195, "learning_rate": 6.080767257758958e-06, "loss": 0.55236959, "memory(GiB)": 34.88, "step": 67830, "train_speed(iter/s)": 0.412836 }, { "acc": 0.90889244, "epoch": 1.8367043023854006, "grad_norm": 6.216151237487793, "learning_rate": 6.080220906039523e-06, "loss": 0.51674094, "memory(GiB)": 34.88, "step": 67835, "train_speed(iter/s)": 0.412837 }, { "acc": 0.91197462, "epoch": 1.8368396826686162, "grad_norm": 12.168951988220215, "learning_rate": 6.079674540794459e-06, "loss": 0.51504803, "memory(GiB)": 34.88, "step": 67840, "train_speed(iter/s)": 0.412838 }, { "acc": 0.91988144, "epoch": 1.8369750629518315, "grad_norm": 10.479850769042969, "learning_rate": 6.0791281620306146e-06, "loss": 0.47197824, "memory(GiB)": 34.88, "step": 67845, "train_speed(iter/s)": 0.41284 }, { "acc": 0.90115433, "epoch": 1.8371104432350474, "grad_norm": 7.41752815246582, "learning_rate": 6.078581769754833e-06, "loss": 0.44294429, "memory(GiB)": 34.88, "step": 67850, "train_speed(iter/s)": 0.412842 }, { "acc": 0.90481968, "epoch": 1.8372458235182627, "grad_norm": 12.467000961303711, "learning_rate": 6.078035363973959e-06, "loss": 0.52168818, "memory(GiB)": 34.88, "step": 67855, "train_speed(iter/s)": 0.412843 }, { "acc": 0.91950817, "epoch": 1.8373812038014785, "grad_norm": 6.187352180480957, "learning_rate": 6.077488944694837e-06, "loss": 0.42273793, "memory(GiB)": 34.88, "step": 67860, "train_speed(iter/s)": 0.412845 }, { "acc": 0.91257954, "epoch": 1.8375165840846939, "grad_norm": 13.740155220031738, "learning_rate": 6.076942511924311e-06, "loss": 0.51633987, "memory(GiB)": 34.88, "step": 67865, "train_speed(iter/s)": 0.412846 }, { "acc": 0.90699272, "epoch": 1.8376519643679095, "grad_norm": 6.419302463531494, "learning_rate": 6.076396065669229e-06, "loss": 0.53915238, "memory(GiB)": 34.88, "step": 67870, "train_speed(iter/s)": 0.412848 }, { "acc": 0.92448492, "epoch": 1.837787344651125, "grad_norm": 9.980986595153809, "learning_rate": 6.075849605936434e-06, "loss": 0.40202894, "memory(GiB)": 34.88, "step": 67875, "train_speed(iter/s)": 0.412849 }, { "acc": 0.90822487, "epoch": 1.8379227249343406, "grad_norm": 17.334619522094727, "learning_rate": 6.075303132732771e-06, "loss": 0.51320925, "memory(GiB)": 34.88, "step": 67880, "train_speed(iter/s)": 0.412851 }, { "acc": 0.93359261, "epoch": 1.8380581052175562, "grad_norm": 10.799480438232422, "learning_rate": 6.074756646065087e-06, "loss": 0.40578141, "memory(GiB)": 34.88, "step": 67885, "train_speed(iter/s)": 0.412853 }, { "acc": 0.90891304, "epoch": 1.8381934855007716, "grad_norm": 7.412474155426025, "learning_rate": 6.07421014594023e-06, "loss": 0.47576313, "memory(GiB)": 34.88, "step": 67890, "train_speed(iter/s)": 0.412854 }, { "acc": 0.91795349, "epoch": 1.8383288657839874, "grad_norm": 7.965664386749268, "learning_rate": 6.073663632365043e-06, "loss": 0.49211278, "memory(GiB)": 34.88, "step": 67895, "train_speed(iter/s)": 0.412855 }, { "acc": 0.89542313, "epoch": 1.8384642460672027, "grad_norm": 6.412670612335205, "learning_rate": 6.073117105346371e-06, "loss": 0.58690615, "memory(GiB)": 34.88, "step": 67900, "train_speed(iter/s)": 0.412857 }, { "acc": 0.9128231, "epoch": 1.8385996263504183, "grad_norm": 7.556941509246826, "learning_rate": 6.072570564891063e-06, "loss": 0.47378521, "memory(GiB)": 34.88, "step": 67905, "train_speed(iter/s)": 0.412859 }, { "acc": 0.90359106, "epoch": 1.8387350066336339, "grad_norm": 7.750146389007568, "learning_rate": 6.072024011005964e-06, "loss": 0.52906237, "memory(GiB)": 34.88, "step": 67910, "train_speed(iter/s)": 0.41286 }, { "acc": 0.88729973, "epoch": 1.8388703869168495, "grad_norm": 14.660905838012695, "learning_rate": 6.071477443697922e-06, "loss": 0.77198944, "memory(GiB)": 34.88, "step": 67915, "train_speed(iter/s)": 0.412861 }, { "acc": 0.91368523, "epoch": 1.839005767200065, "grad_norm": 6.86216402053833, "learning_rate": 6.070930862973783e-06, "loss": 0.49853721, "memory(GiB)": 34.88, "step": 67920, "train_speed(iter/s)": 0.412863 }, { "acc": 0.90915098, "epoch": 1.8391411474832804, "grad_norm": 7.680159091949463, "learning_rate": 6.0703842688403935e-06, "loss": 0.4671886, "memory(GiB)": 34.88, "step": 67925, "train_speed(iter/s)": 0.412864 }, { "acc": 0.90884619, "epoch": 1.8392765277664962, "grad_norm": 8.200206756591797, "learning_rate": 6.0698376613046015e-06, "loss": 0.58021154, "memory(GiB)": 34.88, "step": 67930, "train_speed(iter/s)": 0.412866 }, { "acc": 0.92320251, "epoch": 1.8394119080497116, "grad_norm": 6.144514083862305, "learning_rate": 6.069291040373254e-06, "loss": 0.40687103, "memory(GiB)": 34.88, "step": 67935, "train_speed(iter/s)": 0.412867 }, { "acc": 0.90336285, "epoch": 1.8395472883329274, "grad_norm": 9.15075397491455, "learning_rate": 6.068744406053198e-06, "loss": 0.5577672, "memory(GiB)": 34.88, "step": 67940, "train_speed(iter/s)": 0.412868 }, { "acc": 0.89995251, "epoch": 1.8396826686161427, "grad_norm": 18.672882080078125, "learning_rate": 6.068197758351284e-06, "loss": 0.51196804, "memory(GiB)": 34.88, "step": 67945, "train_speed(iter/s)": 0.412869 }, { "acc": 0.91656647, "epoch": 1.8398180488993583, "grad_norm": 15.297135353088379, "learning_rate": 6.067651097274354e-06, "loss": 0.42412615, "memory(GiB)": 34.88, "step": 67950, "train_speed(iter/s)": 0.412871 }, { "acc": 0.91883545, "epoch": 1.8399534291825739, "grad_norm": 7.616026401519775, "learning_rate": 6.067104422829263e-06, "loss": 0.41934748, "memory(GiB)": 34.88, "step": 67955, "train_speed(iter/s)": 0.412872 }, { "acc": 0.92735014, "epoch": 1.8400888094657895, "grad_norm": 6.173793792724609, "learning_rate": 6.066557735022853e-06, "loss": 0.40305462, "memory(GiB)": 34.88, "step": 67960, "train_speed(iter/s)": 0.412874 }, { "acc": 0.92660332, "epoch": 1.840224189749005, "grad_norm": 3.923036575317383, "learning_rate": 6.066011033861976e-06, "loss": 0.38785329, "memory(GiB)": 34.88, "step": 67965, "train_speed(iter/s)": 0.412875 }, { "acc": 0.90509529, "epoch": 1.8403595700322204, "grad_norm": 17.506877899169922, "learning_rate": 6.06546431935348e-06, "loss": 0.52547626, "memory(GiB)": 34.88, "step": 67970, "train_speed(iter/s)": 0.412877 }, { "acc": 0.90283012, "epoch": 1.8404949503154362, "grad_norm": 30.99054527282715, "learning_rate": 6.0649175915042134e-06, "loss": 0.56440363, "memory(GiB)": 34.88, "step": 67975, "train_speed(iter/s)": 0.412878 }, { "acc": 0.92578745, "epoch": 1.8406303305986516, "grad_norm": 8.789494514465332, "learning_rate": 6.0643708503210245e-06, "loss": 0.45839262, "memory(GiB)": 34.88, "step": 67980, "train_speed(iter/s)": 0.41288 }, { "acc": 0.90713387, "epoch": 1.8407657108818671, "grad_norm": 7.604754447937012, "learning_rate": 6.063824095810763e-06, "loss": 0.49704266, "memory(GiB)": 34.88, "step": 67985, "train_speed(iter/s)": 0.412881 }, { "acc": 0.9238596, "epoch": 1.8409010911650827, "grad_norm": 7.039242744445801, "learning_rate": 6.063277327980277e-06, "loss": 0.39167495, "memory(GiB)": 34.88, "step": 67990, "train_speed(iter/s)": 0.412883 }, { "acc": 0.92274494, "epoch": 1.8410364714482983, "grad_norm": 3.5534679889678955, "learning_rate": 6.062730546836417e-06, "loss": 0.4423542, "memory(GiB)": 34.88, "step": 67995, "train_speed(iter/s)": 0.412884 }, { "acc": 0.90575047, "epoch": 1.8411718517315139, "grad_norm": 7.0074944496154785, "learning_rate": 6.0621837523860315e-06, "loss": 0.5077096, "memory(GiB)": 34.88, "step": 68000, "train_speed(iter/s)": 0.412886 }, { "acc": 0.90732746, "epoch": 1.8413072320147292, "grad_norm": 12.768134117126465, "learning_rate": 6.061636944635972e-06, "loss": 0.55641537, "memory(GiB)": 34.88, "step": 68005, "train_speed(iter/s)": 0.412887 }, { "acc": 0.91932535, "epoch": 1.841442612297945, "grad_norm": 12.4852294921875, "learning_rate": 6.061090123593087e-06, "loss": 0.4333075, "memory(GiB)": 34.88, "step": 68010, "train_speed(iter/s)": 0.412889 }, { "acc": 0.90654306, "epoch": 1.8415779925811604, "grad_norm": 3.9147722721099854, "learning_rate": 6.0605432892642265e-06, "loss": 0.5865386, "memory(GiB)": 34.88, "step": 68015, "train_speed(iter/s)": 0.41289 }, { "acc": 0.88987675, "epoch": 1.8417133728643762, "grad_norm": 24.677215576171875, "learning_rate": 6.059996441656241e-06, "loss": 0.63105059, "memory(GiB)": 34.88, "step": 68020, "train_speed(iter/s)": 0.412892 }, { "acc": 0.8974556, "epoch": 1.8418487531475916, "grad_norm": 11.67959976196289, "learning_rate": 6.05944958077598e-06, "loss": 0.5759264, "memory(GiB)": 34.88, "step": 68025, "train_speed(iter/s)": 0.412893 }, { "acc": 0.91343031, "epoch": 1.8419841334308071, "grad_norm": 8.461555480957031, "learning_rate": 6.058902706630295e-06, "loss": 0.44832954, "memory(GiB)": 34.88, "step": 68030, "train_speed(iter/s)": 0.412895 }, { "acc": 0.92016449, "epoch": 1.8421195137140227, "grad_norm": 20.815086364746094, "learning_rate": 6.058355819226035e-06, "loss": 0.46689281, "memory(GiB)": 34.88, "step": 68035, "train_speed(iter/s)": 0.412897 }, { "acc": 0.90309792, "epoch": 1.8422548939972383, "grad_norm": 10.795670509338379, "learning_rate": 6.057808918570055e-06, "loss": 0.5389441, "memory(GiB)": 34.88, "step": 68040, "train_speed(iter/s)": 0.412898 }, { "acc": 0.89616299, "epoch": 1.8423902742804539, "grad_norm": 15.690181732177734, "learning_rate": 6.0572620046691995e-06, "loss": 0.61292572, "memory(GiB)": 34.88, "step": 68045, "train_speed(iter/s)": 0.4129 }, { "acc": 0.91493301, "epoch": 1.8425256545636692, "grad_norm": 16.768739700317383, "learning_rate": 6.056715077530326e-06, "loss": 0.49250555, "memory(GiB)": 34.88, "step": 68050, "train_speed(iter/s)": 0.412901 }, { "acc": 0.91565361, "epoch": 1.842661034846885, "grad_norm": 6.951451301574707, "learning_rate": 6.056168137160283e-06, "loss": 0.49444361, "memory(GiB)": 34.88, "step": 68055, "train_speed(iter/s)": 0.412903 }, { "acc": 0.91815214, "epoch": 1.8427964151301004, "grad_norm": 8.919888496398926, "learning_rate": 6.055621183565923e-06, "loss": 0.50532017, "memory(GiB)": 34.88, "step": 68060, "train_speed(iter/s)": 0.412904 }, { "acc": 0.90938892, "epoch": 1.842931795413316, "grad_norm": 34.4312629699707, "learning_rate": 6.055074216754095e-06, "loss": 0.56229854, "memory(GiB)": 34.88, "step": 68065, "train_speed(iter/s)": 0.412906 }, { "acc": 0.90023842, "epoch": 1.8430671756965316, "grad_norm": 6.164466381072998, "learning_rate": 6.054527236731655e-06, "loss": 0.53818874, "memory(GiB)": 34.88, "step": 68070, "train_speed(iter/s)": 0.412907 }, { "acc": 0.92295685, "epoch": 1.8432025559797471, "grad_norm": 13.003052711486816, "learning_rate": 6.05398024350545e-06, "loss": 0.40259867, "memory(GiB)": 34.88, "step": 68075, "train_speed(iter/s)": 0.412909 }, { "acc": 0.92770052, "epoch": 1.8433379362629627, "grad_norm": 6.699221611022949, "learning_rate": 6.0534332370823355e-06, "loss": 0.38444524, "memory(GiB)": 34.88, "step": 68080, "train_speed(iter/s)": 0.41291 }, { "acc": 0.90933123, "epoch": 1.843473316546178, "grad_norm": 10.387767791748047, "learning_rate": 6.052886217469165e-06, "loss": 0.44675007, "memory(GiB)": 34.88, "step": 68085, "train_speed(iter/s)": 0.412912 }, { "acc": 0.90849934, "epoch": 1.8436086968293939, "grad_norm": 2.6304996013641357, "learning_rate": 6.052339184672787e-06, "loss": 0.48114262, "memory(GiB)": 34.88, "step": 68090, "train_speed(iter/s)": 0.412913 }, { "acc": 0.91807537, "epoch": 1.8437440771126092, "grad_norm": 6.16566801071167, "learning_rate": 6.051792138700057e-06, "loss": 0.4618679, "memory(GiB)": 34.88, "step": 68095, "train_speed(iter/s)": 0.412915 }, { "acc": 0.89186239, "epoch": 1.8438794573958248, "grad_norm": 23.356456756591797, "learning_rate": 6.051245079557828e-06, "loss": 0.67509356, "memory(GiB)": 34.88, "step": 68100, "train_speed(iter/s)": 0.412916 }, { "acc": 0.90676203, "epoch": 1.8440148376790404, "grad_norm": 5.532340049743652, "learning_rate": 6.050698007252953e-06, "loss": 0.48048086, "memory(GiB)": 34.88, "step": 68105, "train_speed(iter/s)": 0.412917 }, { "acc": 0.90145407, "epoch": 1.844150217962256, "grad_norm": 4.93512487411499, "learning_rate": 6.050150921792282e-06, "loss": 0.56504602, "memory(GiB)": 34.88, "step": 68110, "train_speed(iter/s)": 0.412919 }, { "acc": 0.91794853, "epoch": 1.8442855982454716, "grad_norm": 6.035037994384766, "learning_rate": 6.049603823182672e-06, "loss": 0.48585806, "memory(GiB)": 34.88, "step": 68115, "train_speed(iter/s)": 0.41292 }, { "acc": 0.92495527, "epoch": 1.844420978528687, "grad_norm": 5.4862236976623535, "learning_rate": 6.049056711430974e-06, "loss": 0.39090042, "memory(GiB)": 34.88, "step": 68120, "train_speed(iter/s)": 0.412922 }, { "acc": 0.90929928, "epoch": 1.8445563588119027, "grad_norm": 4.308302879333496, "learning_rate": 6.048509586544042e-06, "loss": 0.46515121, "memory(GiB)": 34.88, "step": 68125, "train_speed(iter/s)": 0.412923 }, { "acc": 0.900776, "epoch": 1.844691739095118, "grad_norm": 6.2892537117004395, "learning_rate": 6.047962448528734e-06, "loss": 0.53305507, "memory(GiB)": 34.88, "step": 68130, "train_speed(iter/s)": 0.412925 }, { "acc": 0.92168446, "epoch": 1.8448271193783339, "grad_norm": 11.634146690368652, "learning_rate": 6.047415297391897e-06, "loss": 0.401053, "memory(GiB)": 34.88, "step": 68135, "train_speed(iter/s)": 0.412926 }, { "acc": 0.935637, "epoch": 1.8449624996615492, "grad_norm": 5.380860805511475, "learning_rate": 6.046868133140392e-06, "loss": 0.31190283, "memory(GiB)": 34.88, "step": 68140, "train_speed(iter/s)": 0.412927 }, { "acc": 0.90816727, "epoch": 1.8450978799447648, "grad_norm": 6.658108711242676, "learning_rate": 6.0463209557810685e-06, "loss": 0.49974251, "memory(GiB)": 34.88, "step": 68145, "train_speed(iter/s)": 0.412929 }, { "acc": 0.90327282, "epoch": 1.8452332602279804, "grad_norm": 5.8932013511657715, "learning_rate": 6.045773765320782e-06, "loss": 0.48046165, "memory(GiB)": 34.88, "step": 68150, "train_speed(iter/s)": 0.41293 }, { "acc": 0.92968531, "epoch": 1.845368640511196, "grad_norm": 4.7855987548828125, "learning_rate": 6.045226561766389e-06, "loss": 0.40770283, "memory(GiB)": 34.88, "step": 68155, "train_speed(iter/s)": 0.412932 }, { "acc": 0.9187542, "epoch": 1.8455040207944116, "grad_norm": 6.290533065795898, "learning_rate": 6.044679345124741e-06, "loss": 0.4255064, "memory(GiB)": 34.88, "step": 68160, "train_speed(iter/s)": 0.412933 }, { "acc": 0.91251526, "epoch": 1.845639401077627, "grad_norm": 7.691494464874268, "learning_rate": 6.044132115402696e-06, "loss": 0.43187017, "memory(GiB)": 34.88, "step": 68165, "train_speed(iter/s)": 0.412934 }, { "acc": 0.91012793, "epoch": 1.8457747813608427, "grad_norm": 7.598730564117432, "learning_rate": 6.043584872607107e-06, "loss": 0.4439332, "memory(GiB)": 34.88, "step": 68170, "train_speed(iter/s)": 0.412936 }, { "acc": 0.90097504, "epoch": 1.845910161644058, "grad_norm": 9.386537551879883, "learning_rate": 6.043037616744831e-06, "loss": 0.53411078, "memory(GiB)": 34.88, "step": 68175, "train_speed(iter/s)": 0.412937 }, { "acc": 0.91042957, "epoch": 1.8460455419272737, "grad_norm": 7.086554527282715, "learning_rate": 6.042490347822722e-06, "loss": 0.48888102, "memory(GiB)": 34.88, "step": 68180, "train_speed(iter/s)": 0.412939 }, { "acc": 0.90021467, "epoch": 1.8461809222104892, "grad_norm": 12.232768058776855, "learning_rate": 6.041943065847639e-06, "loss": 0.54544554, "memory(GiB)": 34.88, "step": 68185, "train_speed(iter/s)": 0.41294 }, { "acc": 0.91009884, "epoch": 1.8463163024937048, "grad_norm": 5.290570259094238, "learning_rate": 6.041395770826434e-06, "loss": 0.51105375, "memory(GiB)": 34.88, "step": 68190, "train_speed(iter/s)": 0.412942 }, { "acc": 0.88477592, "epoch": 1.8464516827769204, "grad_norm": 19.54966163635254, "learning_rate": 6.040848462765962e-06, "loss": 0.66268167, "memory(GiB)": 34.88, "step": 68195, "train_speed(iter/s)": 0.412943 }, { "acc": 0.91456394, "epoch": 1.8465870630601358, "grad_norm": 7.8340163230896, "learning_rate": 6.040301141673085e-06, "loss": 0.38807185, "memory(GiB)": 34.88, "step": 68200, "train_speed(iter/s)": 0.412944 }, { "acc": 0.92876358, "epoch": 1.8467224433433516, "grad_norm": 7.914631366729736, "learning_rate": 6.0397538075546524e-06, "loss": 0.39827213, "memory(GiB)": 34.88, "step": 68205, "train_speed(iter/s)": 0.412946 }, { "acc": 0.91203728, "epoch": 1.846857823626567, "grad_norm": 8.544227600097656, "learning_rate": 6.039206460417525e-06, "loss": 0.49155841, "memory(GiB)": 34.88, "step": 68210, "train_speed(iter/s)": 0.412948 }, { "acc": 0.90404778, "epoch": 1.8469932039097827, "grad_norm": 8.118523597717285, "learning_rate": 6.038659100268558e-06, "loss": 0.5457983, "memory(GiB)": 34.88, "step": 68215, "train_speed(iter/s)": 0.412949 }, { "acc": 0.91425247, "epoch": 1.847128584192998, "grad_norm": 402.14959716796875, "learning_rate": 6.0381117271146075e-06, "loss": 0.42698097, "memory(GiB)": 34.88, "step": 68220, "train_speed(iter/s)": 0.41295 }, { "acc": 0.92084064, "epoch": 1.8472639644762137, "grad_norm": 5.881852626800537, "learning_rate": 6.037564340962532e-06, "loss": 0.37884467, "memory(GiB)": 34.88, "step": 68225, "train_speed(iter/s)": 0.412952 }, { "acc": 0.91953726, "epoch": 1.8473993447594292, "grad_norm": 5.2557501792907715, "learning_rate": 6.037016941819187e-06, "loss": 0.4093585, "memory(GiB)": 34.88, "step": 68230, "train_speed(iter/s)": 0.412954 }, { "acc": 0.92508869, "epoch": 1.8475347250426448, "grad_norm": 10.316078186035156, "learning_rate": 6.036469529691432e-06, "loss": 0.40400629, "memory(GiB)": 34.88, "step": 68235, "train_speed(iter/s)": 0.412955 }, { "acc": 0.90399666, "epoch": 1.8476701053258604, "grad_norm": 6.653555393218994, "learning_rate": 6.035922104586121e-06, "loss": 0.54482388, "memory(GiB)": 34.88, "step": 68240, "train_speed(iter/s)": 0.412957 }, { "acc": 0.91860886, "epoch": 1.8478054856090758, "grad_norm": 5.173038005828857, "learning_rate": 6.0353746665101155e-06, "loss": 0.43714204, "memory(GiB)": 34.88, "step": 68245, "train_speed(iter/s)": 0.412958 }, { "acc": 0.92128582, "epoch": 1.8479408658922916, "grad_norm": 8.696946144104004, "learning_rate": 6.0348272154702705e-06, "loss": 0.41621475, "memory(GiB)": 34.88, "step": 68250, "train_speed(iter/s)": 0.412959 }, { "acc": 0.91010303, "epoch": 1.848076246175507, "grad_norm": 8.546356201171875, "learning_rate": 6.034279751473444e-06, "loss": 0.47538314, "memory(GiB)": 34.88, "step": 68255, "train_speed(iter/s)": 0.412961 }, { "acc": 0.91879349, "epoch": 1.8482116264587225, "grad_norm": 18.87676429748535, "learning_rate": 6.033732274526495e-06, "loss": 0.39334621, "memory(GiB)": 34.88, "step": 68260, "train_speed(iter/s)": 0.412963 }, { "acc": 0.91780119, "epoch": 1.848347006741938, "grad_norm": 16.102401733398438, "learning_rate": 6.033184784636283e-06, "loss": 0.49676557, "memory(GiB)": 34.88, "step": 68265, "train_speed(iter/s)": 0.412964 }, { "acc": 0.92854853, "epoch": 1.8484823870251537, "grad_norm": 5.8582963943481445, "learning_rate": 6.032637281809663e-06, "loss": 0.39910684, "memory(GiB)": 34.88, "step": 68270, "train_speed(iter/s)": 0.412966 }, { "acc": 0.91571836, "epoch": 1.8486177673083692, "grad_norm": 5.380622863769531, "learning_rate": 6.032089766053496e-06, "loss": 0.46054668, "memory(GiB)": 34.88, "step": 68275, "train_speed(iter/s)": 0.412967 }, { "acc": 0.90931005, "epoch": 1.8487531475915846, "grad_norm": 12.812809944152832, "learning_rate": 6.0315422373746405e-06, "loss": 0.56136732, "memory(GiB)": 34.88, "step": 68280, "train_speed(iter/s)": 0.412969 }, { "acc": 0.89677029, "epoch": 1.8488885278748004, "grad_norm": 12.882240295410156, "learning_rate": 6.030994695779953e-06, "loss": 0.59843111, "memory(GiB)": 34.88, "step": 68285, "train_speed(iter/s)": 0.41297 }, { "acc": 0.89950075, "epoch": 1.8490239081580158, "grad_norm": 14.071066856384277, "learning_rate": 6.0304471412762985e-06, "loss": 0.59746351, "memory(GiB)": 34.88, "step": 68290, "train_speed(iter/s)": 0.412971 }, { "acc": 0.88021288, "epoch": 1.8491592884412316, "grad_norm": 12.201358795166016, "learning_rate": 6.029899573870529e-06, "loss": 0.79874067, "memory(GiB)": 34.88, "step": 68295, "train_speed(iter/s)": 0.412973 }, { "acc": 0.91967783, "epoch": 1.849294668724447, "grad_norm": 7.408534049987793, "learning_rate": 6.029351993569508e-06, "loss": 0.47109661, "memory(GiB)": 34.88, "step": 68300, "train_speed(iter/s)": 0.412974 }, { "acc": 0.9276371, "epoch": 1.8494300490076625, "grad_norm": 11.333740234375, "learning_rate": 6.028804400380094e-06, "loss": 0.38126838, "memory(GiB)": 34.88, "step": 68305, "train_speed(iter/s)": 0.412976 }, { "acc": 0.91388359, "epoch": 1.849565429290878, "grad_norm": 5.7071757316589355, "learning_rate": 6.028256794309146e-06, "loss": 0.44867477, "memory(GiB)": 34.88, "step": 68310, "train_speed(iter/s)": 0.412977 }, { "acc": 0.89332533, "epoch": 1.8497008095740937, "grad_norm": 5.784611701965332, "learning_rate": 6.027709175363528e-06, "loss": 0.6400876, "memory(GiB)": 34.88, "step": 68315, "train_speed(iter/s)": 0.412978 }, { "acc": 0.90748701, "epoch": 1.8498361898573092, "grad_norm": 4.8741984367370605, "learning_rate": 6.027161543550093e-06, "loss": 0.54656429, "memory(GiB)": 34.88, "step": 68320, "train_speed(iter/s)": 0.41298 }, { "acc": 0.91491566, "epoch": 1.8499715701405246, "grad_norm": 6.222483158111572, "learning_rate": 6.026613898875706e-06, "loss": 0.43136153, "memory(GiB)": 34.88, "step": 68325, "train_speed(iter/s)": 0.412981 }, { "acc": 0.898489, "epoch": 1.8501069504237404, "grad_norm": 5.2473464012146, "learning_rate": 6.026066241347225e-06, "loss": 0.53542776, "memory(GiB)": 34.88, "step": 68330, "train_speed(iter/s)": 0.412983 }, { "acc": 0.90785522, "epoch": 1.8502423307069558, "grad_norm": 7.438668251037598, "learning_rate": 6.0255185709715125e-06, "loss": 0.56133442, "memory(GiB)": 34.88, "step": 68335, "train_speed(iter/s)": 0.412984 }, { "acc": 0.91254673, "epoch": 1.8503777109901713, "grad_norm": 10.828948974609375, "learning_rate": 6.024970887755428e-06, "loss": 0.56598797, "memory(GiB)": 34.88, "step": 68340, "train_speed(iter/s)": 0.412986 }, { "acc": 0.90946636, "epoch": 1.850513091273387, "grad_norm": 15.757014274597168, "learning_rate": 6.024423191705833e-06, "loss": 0.53768959, "memory(GiB)": 34.88, "step": 68345, "train_speed(iter/s)": 0.412987 }, { "acc": 0.91580067, "epoch": 1.8506484715566025, "grad_norm": 17.27956199645996, "learning_rate": 6.02387548282959e-06, "loss": 0.43413138, "memory(GiB)": 34.88, "step": 68350, "train_speed(iter/s)": 0.412989 }, { "acc": 0.91791725, "epoch": 1.850783851839818, "grad_norm": 6.812273025512695, "learning_rate": 6.023327761133555e-06, "loss": 0.45841198, "memory(GiB)": 34.88, "step": 68355, "train_speed(iter/s)": 0.41299 }, { "acc": 0.91153374, "epoch": 1.8509192321230334, "grad_norm": 11.525030136108398, "learning_rate": 6.022780026624594e-06, "loss": 0.47920918, "memory(GiB)": 34.88, "step": 68360, "train_speed(iter/s)": 0.412992 }, { "acc": 0.91538906, "epoch": 1.8510546124062492, "grad_norm": 15.907577514648438, "learning_rate": 6.0222322793095656e-06, "loss": 0.44372206, "memory(GiB)": 34.88, "step": 68365, "train_speed(iter/s)": 0.412993 }, { "acc": 0.90405998, "epoch": 1.8511899926894646, "grad_norm": 10.70768928527832, "learning_rate": 6.021684519195333e-06, "loss": 0.5511229, "memory(GiB)": 34.88, "step": 68370, "train_speed(iter/s)": 0.412994 }, { "acc": 0.93519688, "epoch": 1.8513253729726804, "grad_norm": 4.012269496917725, "learning_rate": 6.0211367462887585e-06, "loss": 0.34042387, "memory(GiB)": 34.88, "step": 68375, "train_speed(iter/s)": 0.412996 }, { "acc": 0.90268698, "epoch": 1.8514607532558958, "grad_norm": 7.41510534286499, "learning_rate": 6.020588960596702e-06, "loss": 0.56897631, "memory(GiB)": 34.88, "step": 68380, "train_speed(iter/s)": 0.412997 }, { "acc": 0.9306488, "epoch": 1.8515961335391113, "grad_norm": 7.820916175842285, "learning_rate": 6.020041162126027e-06, "loss": 0.39072013, "memory(GiB)": 34.88, "step": 68385, "train_speed(iter/s)": 0.412999 }, { "acc": 0.91805162, "epoch": 1.851731513822327, "grad_norm": 8.781622886657715, "learning_rate": 6.019493350883596e-06, "loss": 0.44276543, "memory(GiB)": 34.88, "step": 68390, "train_speed(iter/s)": 0.413 }, { "acc": 0.93011742, "epoch": 1.8518668941055425, "grad_norm": 3.0170891284942627, "learning_rate": 6.018945526876273e-06, "loss": 0.31376703, "memory(GiB)": 34.88, "step": 68395, "train_speed(iter/s)": 0.413002 }, { "acc": 0.89896517, "epoch": 1.852002274388758, "grad_norm": 9.220746994018555, "learning_rate": 6.018397690110916e-06, "loss": 0.5278482, "memory(GiB)": 34.88, "step": 68400, "train_speed(iter/s)": 0.413003 }, { "acc": 0.92878647, "epoch": 1.8521376546719734, "grad_norm": 10.847325325012207, "learning_rate": 6.0178498405943916e-06, "loss": 0.33476136, "memory(GiB)": 34.88, "step": 68405, "train_speed(iter/s)": 0.413005 }, { "acc": 0.91470051, "epoch": 1.8522730349551892, "grad_norm": 6.771998882293701, "learning_rate": 6.017301978333559e-06, "loss": 0.45921936, "memory(GiB)": 34.88, "step": 68410, "train_speed(iter/s)": 0.413006 }, { "acc": 0.9206316, "epoch": 1.8524084152384046, "grad_norm": 10.163171768188477, "learning_rate": 6.016754103335285e-06, "loss": 0.45684414, "memory(GiB)": 34.88, "step": 68415, "train_speed(iter/s)": 0.413007 }, { "acc": 0.9070013, "epoch": 1.8525437955216202, "grad_norm": 8.36756706237793, "learning_rate": 6.016206215606433e-06, "loss": 0.47804232, "memory(GiB)": 34.88, "step": 68420, "train_speed(iter/s)": 0.413009 }, { "acc": 0.90467463, "epoch": 1.8526791758048358, "grad_norm": 6.412593841552734, "learning_rate": 6.015658315153863e-06, "loss": 0.51752806, "memory(GiB)": 34.88, "step": 68425, "train_speed(iter/s)": 0.413011 }, { "acc": 0.91332483, "epoch": 1.8528145560880513, "grad_norm": 15.00720500946045, "learning_rate": 6.0151104019844405e-06, "loss": 0.45199604, "memory(GiB)": 34.88, "step": 68430, "train_speed(iter/s)": 0.413012 }, { "acc": 0.92307119, "epoch": 1.852949936371267, "grad_norm": 6.034698486328125, "learning_rate": 6.01456247610503e-06, "loss": 0.44283447, "memory(GiB)": 34.88, "step": 68435, "train_speed(iter/s)": 0.413013 }, { "acc": 0.91334829, "epoch": 1.8530853166544823, "grad_norm": 9.382540702819824, "learning_rate": 6.014014537522495e-06, "loss": 0.42050476, "memory(GiB)": 34.88, "step": 68440, "train_speed(iter/s)": 0.413014 }, { "acc": 0.903619, "epoch": 1.853220696937698, "grad_norm": 12.913902282714844, "learning_rate": 6.013466586243697e-06, "loss": 0.5156538, "memory(GiB)": 34.88, "step": 68445, "train_speed(iter/s)": 0.413016 }, { "acc": 0.91998615, "epoch": 1.8533560772209134, "grad_norm": 4.788187026977539, "learning_rate": 6.012918622275501e-06, "loss": 0.48028297, "memory(GiB)": 34.88, "step": 68450, "train_speed(iter/s)": 0.413017 }, { "acc": 0.91629372, "epoch": 1.8534914575041292, "grad_norm": 16.0861873626709, "learning_rate": 6.012370645624774e-06, "loss": 0.44046297, "memory(GiB)": 34.88, "step": 68455, "train_speed(iter/s)": 0.413018 }, { "acc": 0.90532665, "epoch": 1.8536268377873446, "grad_norm": 7.815676689147949, "learning_rate": 6.011822656298378e-06, "loss": 0.51450205, "memory(GiB)": 34.88, "step": 68460, "train_speed(iter/s)": 0.41302 }, { "acc": 0.91870861, "epoch": 1.8537622180705602, "grad_norm": 10.577764511108398, "learning_rate": 6.011274654303181e-06, "loss": 0.43545113, "memory(GiB)": 34.88, "step": 68465, "train_speed(iter/s)": 0.413021 }, { "acc": 0.91134119, "epoch": 1.8538975983537758, "grad_norm": 8.895824432373047, "learning_rate": 6.010726639646043e-06, "loss": 0.49418011, "memory(GiB)": 34.88, "step": 68470, "train_speed(iter/s)": 0.413023 }, { "acc": 0.92106867, "epoch": 1.8540329786369913, "grad_norm": 26.137264251708984, "learning_rate": 6.010178612333834e-06, "loss": 0.39446158, "memory(GiB)": 34.88, "step": 68475, "train_speed(iter/s)": 0.413024 }, { "acc": 0.91530857, "epoch": 1.854168358920207, "grad_norm": 9.991621971130371, "learning_rate": 6.009630572373412e-06, "loss": 0.50639019, "memory(GiB)": 34.88, "step": 68480, "train_speed(iter/s)": 0.413026 }, { "acc": 0.90930862, "epoch": 1.8543037392034223, "grad_norm": 9.338144302368164, "learning_rate": 6.009082519771648e-06, "loss": 0.50579357, "memory(GiB)": 34.88, "step": 68485, "train_speed(iter/s)": 0.413027 }, { "acc": 0.89765558, "epoch": 1.854439119486638, "grad_norm": 13.648096084594727, "learning_rate": 6.008534454535408e-06, "loss": 0.5294179, "memory(GiB)": 34.88, "step": 68490, "train_speed(iter/s)": 0.413028 }, { "acc": 0.90572681, "epoch": 1.8545744997698534, "grad_norm": 12.26276683807373, "learning_rate": 6.0079863766715544e-06, "loss": 0.52222376, "memory(GiB)": 34.88, "step": 68495, "train_speed(iter/s)": 0.413029 }, { "acc": 0.91170292, "epoch": 1.854709880053069, "grad_norm": 7.652110576629639, "learning_rate": 6.007438286186954e-06, "loss": 0.46569076, "memory(GiB)": 34.88, "step": 68500, "train_speed(iter/s)": 0.413031 }, { "acc": 0.93031254, "epoch": 1.8548452603362846, "grad_norm": 5.208948135375977, "learning_rate": 6.006890183088473e-06, "loss": 0.41304474, "memory(GiB)": 34.88, "step": 68505, "train_speed(iter/s)": 0.413032 }, { "acc": 0.90680838, "epoch": 1.8549806406195002, "grad_norm": 22.119199752807617, "learning_rate": 6.006342067382979e-06, "loss": 0.53720016, "memory(GiB)": 34.88, "step": 68510, "train_speed(iter/s)": 0.413033 }, { "acc": 0.93202095, "epoch": 1.8551160209027158, "grad_norm": 4.114516735076904, "learning_rate": 6.0057939390773336e-06, "loss": 0.35976658, "memory(GiB)": 34.88, "step": 68515, "train_speed(iter/s)": 0.413035 }, { "acc": 0.91334915, "epoch": 1.8552514011859311, "grad_norm": 35.75674057006836, "learning_rate": 6.00524579817841e-06, "loss": 0.55668921, "memory(GiB)": 34.88, "step": 68520, "train_speed(iter/s)": 0.413036 }, { "acc": 0.92395039, "epoch": 1.855386781469147, "grad_norm": 10.253387451171875, "learning_rate": 6.004697644693068e-06, "loss": 0.34097142, "memory(GiB)": 34.88, "step": 68525, "train_speed(iter/s)": 0.413037 }, { "acc": 0.90337601, "epoch": 1.8555221617523623, "grad_norm": 10.643340110778809, "learning_rate": 6.004149478628178e-06, "loss": 0.48610458, "memory(GiB)": 34.88, "step": 68530, "train_speed(iter/s)": 0.413039 }, { "acc": 0.90939293, "epoch": 1.855657542035578, "grad_norm": 16.510395050048828, "learning_rate": 6.003601299990607e-06, "loss": 0.45908976, "memory(GiB)": 34.88, "step": 68535, "train_speed(iter/s)": 0.41304 }, { "acc": 0.92664642, "epoch": 1.8557929223187934, "grad_norm": 9.340354919433594, "learning_rate": 6.003053108787219e-06, "loss": 0.39979014, "memory(GiB)": 34.88, "step": 68540, "train_speed(iter/s)": 0.413041 }, { "acc": 0.90019636, "epoch": 1.855928302602009, "grad_norm": 10.343717575073242, "learning_rate": 6.002504905024886e-06, "loss": 0.62540569, "memory(GiB)": 34.88, "step": 68545, "train_speed(iter/s)": 0.413043 }, { "acc": 0.91223917, "epoch": 1.8560636828852246, "grad_norm": 6.930725574493408, "learning_rate": 6.001956688710471e-06, "loss": 0.55108557, "memory(GiB)": 34.88, "step": 68550, "train_speed(iter/s)": 0.413044 }, { "acc": 0.90737991, "epoch": 1.8561990631684402, "grad_norm": 7.0317840576171875, "learning_rate": 6.001408459850843e-06, "loss": 0.41296148, "memory(GiB)": 34.88, "step": 68555, "train_speed(iter/s)": 0.413046 }, { "acc": 0.90931139, "epoch": 1.8563344434516558, "grad_norm": 10.618925094604492, "learning_rate": 6.000860218452871e-06, "loss": 0.52145534, "memory(GiB)": 34.88, "step": 68560, "train_speed(iter/s)": 0.413047 }, { "acc": 0.91065655, "epoch": 1.8564698237348711, "grad_norm": 8.803014755249023, "learning_rate": 6.0003119645234195e-06, "loss": 0.43926988, "memory(GiB)": 34.88, "step": 68565, "train_speed(iter/s)": 0.413048 }, { "acc": 0.89283485, "epoch": 1.856605204018087, "grad_norm": 7.819113254547119, "learning_rate": 5.999763698069361e-06, "loss": 0.60850182, "memory(GiB)": 34.88, "step": 68570, "train_speed(iter/s)": 0.413049 }, { "acc": 0.92314281, "epoch": 1.8567405843013023, "grad_norm": 9.054673194885254, "learning_rate": 5.999215419097559e-06, "loss": 0.3716949, "memory(GiB)": 34.88, "step": 68575, "train_speed(iter/s)": 0.413051 }, { "acc": 0.89801559, "epoch": 1.8568759645845179, "grad_norm": 17.109140396118164, "learning_rate": 5.9986671276148855e-06, "loss": 0.51277561, "memory(GiB)": 34.88, "step": 68580, "train_speed(iter/s)": 0.413052 }, { "acc": 0.91282396, "epoch": 1.8570113448677334, "grad_norm": 4.608651638031006, "learning_rate": 5.998118823628206e-06, "loss": 0.47905254, "memory(GiB)": 34.88, "step": 68585, "train_speed(iter/s)": 0.413053 }, { "acc": 0.92480717, "epoch": 1.857146725150949, "grad_norm": 9.220260620117188, "learning_rate": 5.997570507144391e-06, "loss": 0.43894053, "memory(GiB)": 34.88, "step": 68590, "train_speed(iter/s)": 0.413055 }, { "acc": 0.92505083, "epoch": 1.8572821054341646, "grad_norm": 10.823426246643066, "learning_rate": 5.997022178170309e-06, "loss": 0.46776538, "memory(GiB)": 34.88, "step": 68595, "train_speed(iter/s)": 0.413055 }, { "acc": 0.92526493, "epoch": 1.85741748571738, "grad_norm": 5.024027347564697, "learning_rate": 5.996473836712827e-06, "loss": 0.41096792, "memory(GiB)": 34.88, "step": 68600, "train_speed(iter/s)": 0.413057 }, { "acc": 0.89960976, "epoch": 1.8575528660005958, "grad_norm": 8.700909614562988, "learning_rate": 5.995925482778818e-06, "loss": 0.63257179, "memory(GiB)": 34.88, "step": 68605, "train_speed(iter/s)": 0.413057 }, { "acc": 0.93762455, "epoch": 1.8576882462838111, "grad_norm": 10.171231269836426, "learning_rate": 5.995377116375148e-06, "loss": 0.37586024, "memory(GiB)": 34.88, "step": 68610, "train_speed(iter/s)": 0.413059 }, { "acc": 0.90082226, "epoch": 1.857823626567027, "grad_norm": 16.565250396728516, "learning_rate": 5.9948287375086875e-06, "loss": 0.48259354, "memory(GiB)": 34.88, "step": 68615, "train_speed(iter/s)": 0.41306 }, { "acc": 0.93402863, "epoch": 1.8579590068502423, "grad_norm": 7.089176654815674, "learning_rate": 5.994280346186304e-06, "loss": 0.32056909, "memory(GiB)": 34.88, "step": 68620, "train_speed(iter/s)": 0.413062 }, { "acc": 0.91335754, "epoch": 1.8580943871334579, "grad_norm": 8.983210563659668, "learning_rate": 5.993731942414871e-06, "loss": 0.44798155, "memory(GiB)": 34.88, "step": 68625, "train_speed(iter/s)": 0.413063 }, { "acc": 0.91184721, "epoch": 1.8582297674166735, "grad_norm": 6.071950435638428, "learning_rate": 5.993183526201255e-06, "loss": 0.51613922, "memory(GiB)": 34.88, "step": 68630, "train_speed(iter/s)": 0.413064 }, { "acc": 0.88362036, "epoch": 1.858365147699889, "grad_norm": 7.1341071128845215, "learning_rate": 5.992635097552326e-06, "loss": 0.6800436, "memory(GiB)": 34.88, "step": 68635, "train_speed(iter/s)": 0.413065 }, { "acc": 0.90816574, "epoch": 1.8585005279831046, "grad_norm": 8.946586608886719, "learning_rate": 5.992086656474958e-06, "loss": 0.55905437, "memory(GiB)": 34.88, "step": 68640, "train_speed(iter/s)": 0.413066 }, { "acc": 0.93343658, "epoch": 1.85863590826632, "grad_norm": 8.35956859588623, "learning_rate": 5.991538202976017e-06, "loss": 0.38726013, "memory(GiB)": 34.88, "step": 68645, "train_speed(iter/s)": 0.413067 }, { "acc": 0.91753674, "epoch": 1.8587712885495358, "grad_norm": 6.557161808013916, "learning_rate": 5.990989737062376e-06, "loss": 0.42625885, "memory(GiB)": 34.88, "step": 68650, "train_speed(iter/s)": 0.413068 }, { "acc": 0.90397472, "epoch": 1.8589066688327511, "grad_norm": 8.388866424560547, "learning_rate": 5.990441258740905e-06, "loss": 0.46348028, "memory(GiB)": 34.88, "step": 68655, "train_speed(iter/s)": 0.41307 }, { "acc": 0.91897259, "epoch": 1.8590420491159667, "grad_norm": 7.660745620727539, "learning_rate": 5.989892768018474e-06, "loss": 0.48102388, "memory(GiB)": 34.88, "step": 68660, "train_speed(iter/s)": 0.413072 }, { "acc": 0.903088, "epoch": 1.8591774293991823, "grad_norm": 6.689230442047119, "learning_rate": 5.989344264901953e-06, "loss": 0.51572547, "memory(GiB)": 34.88, "step": 68665, "train_speed(iter/s)": 0.413073 }, { "acc": 0.91027679, "epoch": 1.8593128096823979, "grad_norm": 4.566244602203369, "learning_rate": 5.988795749398217e-06, "loss": 0.47942214, "memory(GiB)": 34.88, "step": 68670, "train_speed(iter/s)": 0.413075 }, { "acc": 0.91084347, "epoch": 1.8594481899656135, "grad_norm": 17.09418487548828, "learning_rate": 5.988247221514132e-06, "loss": 0.53149834, "memory(GiB)": 34.88, "step": 68675, "train_speed(iter/s)": 0.413076 }, { "acc": 0.9175169, "epoch": 1.8595835702488288, "grad_norm": 7.4839701652526855, "learning_rate": 5.987698681256574e-06, "loss": 0.39960711, "memory(GiB)": 34.88, "step": 68680, "train_speed(iter/s)": 0.413078 }, { "acc": 0.9222393, "epoch": 1.8597189505320446, "grad_norm": 6.01947021484375, "learning_rate": 5.987150128632414e-06, "loss": 0.40160584, "memory(GiB)": 34.88, "step": 68685, "train_speed(iter/s)": 0.413078 }, { "acc": 0.91491957, "epoch": 1.85985433081526, "grad_norm": 8.177022933959961, "learning_rate": 5.98660156364852e-06, "loss": 0.44597144, "memory(GiB)": 34.88, "step": 68690, "train_speed(iter/s)": 0.41308 }, { "acc": 0.9146904, "epoch": 1.8599897110984758, "grad_norm": 6.36451530456543, "learning_rate": 5.986052986311767e-06, "loss": 0.4731514, "memory(GiB)": 34.88, "step": 68695, "train_speed(iter/s)": 0.413081 }, { "acc": 0.92403297, "epoch": 1.8601250913816911, "grad_norm": 7.73947286605835, "learning_rate": 5.985504396629026e-06, "loss": 0.429878, "memory(GiB)": 34.88, "step": 68700, "train_speed(iter/s)": 0.413083 }, { "acc": 0.9146452, "epoch": 1.8602604716649067, "grad_norm": 8.542380332946777, "learning_rate": 5.98495579460717e-06, "loss": 0.47429762, "memory(GiB)": 34.88, "step": 68705, "train_speed(iter/s)": 0.413084 }, { "acc": 0.91554489, "epoch": 1.8603958519481223, "grad_norm": 10.973759651184082, "learning_rate": 5.98440718025307e-06, "loss": 0.42731738, "memory(GiB)": 34.88, "step": 68710, "train_speed(iter/s)": 0.413086 }, { "acc": 0.88752327, "epoch": 1.8605312322313379, "grad_norm": 11.868167877197266, "learning_rate": 5.983858553573598e-06, "loss": 0.59864964, "memory(GiB)": 34.88, "step": 68715, "train_speed(iter/s)": 0.413087 }, { "acc": 0.90323925, "epoch": 1.8606666125145535, "grad_norm": 6.141323089599609, "learning_rate": 5.98330991457563e-06, "loss": 0.52216558, "memory(GiB)": 34.88, "step": 68720, "train_speed(iter/s)": 0.413088 }, { "acc": 0.89111128, "epoch": 1.8608019927977688, "grad_norm": 5.447499752044678, "learning_rate": 5.982761263266036e-06, "loss": 0.61415348, "memory(GiB)": 34.88, "step": 68725, "train_speed(iter/s)": 0.41309 }, { "acc": 0.90194645, "epoch": 1.8609373730809846, "grad_norm": 9.70267391204834, "learning_rate": 5.98221259965169e-06, "loss": 0.59235258, "memory(GiB)": 34.88, "step": 68730, "train_speed(iter/s)": 0.413091 }, { "acc": 0.92049408, "epoch": 1.8610727533642, "grad_norm": 4.84675931930542, "learning_rate": 5.9816639237394635e-06, "loss": 0.37602301, "memory(GiB)": 34.88, "step": 68735, "train_speed(iter/s)": 0.413093 }, { "acc": 0.91563597, "epoch": 1.8612081336474156, "grad_norm": 9.629308700561523, "learning_rate": 5.9811152355362325e-06, "loss": 0.46665463, "memory(GiB)": 34.88, "step": 68740, "train_speed(iter/s)": 0.413094 }, { "acc": 0.93296509, "epoch": 1.8613435139306311, "grad_norm": 7.115152835845947, "learning_rate": 5.980566535048867e-06, "loss": 0.33684163, "memory(GiB)": 34.88, "step": 68745, "train_speed(iter/s)": 0.413095 }, { "acc": 0.92122555, "epoch": 1.8614788942138467, "grad_norm": 10.37309741973877, "learning_rate": 5.980017822284242e-06, "loss": 0.42289968, "memory(GiB)": 34.88, "step": 68750, "train_speed(iter/s)": 0.413096 }, { "acc": 0.90648346, "epoch": 1.8616142744970623, "grad_norm": 27.114059448242188, "learning_rate": 5.979469097249233e-06, "loss": 0.62038069, "memory(GiB)": 34.88, "step": 68755, "train_speed(iter/s)": 0.413097 }, { "acc": 0.91713409, "epoch": 1.8617496547802777, "grad_norm": 10.988609313964844, "learning_rate": 5.978920359950712e-06, "loss": 0.47285948, "memory(GiB)": 34.88, "step": 68760, "train_speed(iter/s)": 0.413098 }, { "acc": 0.91563625, "epoch": 1.8618850350634935, "grad_norm": 10.355486869812012, "learning_rate": 5.978371610395554e-06, "loss": 0.43997364, "memory(GiB)": 34.88, "step": 68765, "train_speed(iter/s)": 0.4131 }, { "acc": 0.92109489, "epoch": 1.8620204153467088, "grad_norm": 9.196155548095703, "learning_rate": 5.977822848590631e-06, "loss": 0.43166099, "memory(GiB)": 34.88, "step": 68770, "train_speed(iter/s)": 0.413101 }, { "acc": 0.93160524, "epoch": 1.8621557956299246, "grad_norm": 11.433623313903809, "learning_rate": 5.977274074542821e-06, "loss": 0.40071416, "memory(GiB)": 34.88, "step": 68775, "train_speed(iter/s)": 0.413102 }, { "acc": 0.91772594, "epoch": 1.86229117591314, "grad_norm": 5.219574451446533, "learning_rate": 5.976725288258994e-06, "loss": 0.45231743, "memory(GiB)": 34.88, "step": 68780, "train_speed(iter/s)": 0.413103 }, { "acc": 0.92226048, "epoch": 1.8624265561963556, "grad_norm": 11.125, "learning_rate": 5.976176489746027e-06, "loss": 0.43100228, "memory(GiB)": 34.88, "step": 68785, "train_speed(iter/s)": 0.413105 }, { "acc": 0.93192244, "epoch": 1.8625619364795711, "grad_norm": 5.477476119995117, "learning_rate": 5.975627679010798e-06, "loss": 0.35997834, "memory(GiB)": 34.88, "step": 68790, "train_speed(iter/s)": 0.413106 }, { "acc": 0.92097626, "epoch": 1.8626973167627867, "grad_norm": 6.370417594909668, "learning_rate": 5.975078856060175e-06, "loss": 0.50787954, "memory(GiB)": 34.88, "step": 68795, "train_speed(iter/s)": 0.413108 }, { "acc": 0.93171854, "epoch": 1.8628326970460023, "grad_norm": 8.453409194946289, "learning_rate": 5.974530020901039e-06, "loss": 0.45140228, "memory(GiB)": 34.88, "step": 68800, "train_speed(iter/s)": 0.413109 }, { "acc": 0.91371469, "epoch": 1.8629680773292177, "grad_norm": 6.722631931304932, "learning_rate": 5.973981173540262e-06, "loss": 0.46273556, "memory(GiB)": 34.88, "step": 68805, "train_speed(iter/s)": 0.413111 }, { "acc": 0.9013484, "epoch": 1.8631034576124335, "grad_norm": 10.483877182006836, "learning_rate": 5.973432313984721e-06, "loss": 0.63593721, "memory(GiB)": 34.88, "step": 68810, "train_speed(iter/s)": 0.413112 }, { "acc": 0.88887691, "epoch": 1.8632388378956488, "grad_norm": 13.17837905883789, "learning_rate": 5.972883442241292e-06, "loss": 0.58358722, "memory(GiB)": 34.88, "step": 68815, "train_speed(iter/s)": 0.413114 }, { "acc": 0.9046195, "epoch": 1.8633742181788644, "grad_norm": 5.710406303405762, "learning_rate": 5.972334558316847e-06, "loss": 0.60470099, "memory(GiB)": 34.88, "step": 68820, "train_speed(iter/s)": 0.413115 }, { "acc": 0.91628332, "epoch": 1.86350959846208, "grad_norm": 6.225043773651123, "learning_rate": 5.971785662218267e-06, "loss": 0.48548732, "memory(GiB)": 34.88, "step": 68825, "train_speed(iter/s)": 0.413117 }, { "acc": 0.90524931, "epoch": 1.8636449787452956, "grad_norm": 23.904447555541992, "learning_rate": 5.971236753952424e-06, "loss": 0.52228527, "memory(GiB)": 34.88, "step": 68830, "train_speed(iter/s)": 0.413118 }, { "acc": 0.90960598, "epoch": 1.8637803590285111, "grad_norm": 10.1414213180542, "learning_rate": 5.970687833526196e-06, "loss": 0.51304026, "memory(GiB)": 34.88, "step": 68835, "train_speed(iter/s)": 0.413119 }, { "acc": 0.91392498, "epoch": 1.8639157393117265, "grad_norm": 10.372089385986328, "learning_rate": 5.970138900946458e-06, "loss": 0.44286528, "memory(GiB)": 34.88, "step": 68840, "train_speed(iter/s)": 0.413121 }, { "acc": 0.91452026, "epoch": 1.8640511195949423, "grad_norm": 10.829832077026367, "learning_rate": 5.969589956220089e-06, "loss": 0.4411551, "memory(GiB)": 34.88, "step": 68845, "train_speed(iter/s)": 0.413122 }, { "acc": 0.91859255, "epoch": 1.8641864998781577, "grad_norm": 11.28958797454834, "learning_rate": 5.969040999353963e-06, "loss": 0.42910938, "memory(GiB)": 34.88, "step": 68850, "train_speed(iter/s)": 0.413124 }, { "acc": 0.926544, "epoch": 1.8643218801613735, "grad_norm": 5.464014053344727, "learning_rate": 5.968492030354961e-06, "loss": 0.36312404, "memory(GiB)": 34.88, "step": 68855, "train_speed(iter/s)": 0.413125 }, { "acc": 0.91647739, "epoch": 1.8644572604445888, "grad_norm": 4.77077054977417, "learning_rate": 5.967943049229953e-06, "loss": 0.46018095, "memory(GiB)": 34.88, "step": 68860, "train_speed(iter/s)": 0.413127 }, { "acc": 0.93306828, "epoch": 1.8645926407278044, "grad_norm": 4.417086601257324, "learning_rate": 5.96739405598582e-06, "loss": 0.31333053, "memory(GiB)": 34.88, "step": 68865, "train_speed(iter/s)": 0.413128 }, { "acc": 0.92886162, "epoch": 1.86472802101102, "grad_norm": 21.01896095275879, "learning_rate": 5.96684505062944e-06, "loss": 0.38399949, "memory(GiB)": 34.88, "step": 68870, "train_speed(iter/s)": 0.41313 }, { "acc": 0.89035873, "epoch": 1.8648634012942356, "grad_norm": 5.136898517608643, "learning_rate": 5.966296033167689e-06, "loss": 0.55610204, "memory(GiB)": 34.88, "step": 68875, "train_speed(iter/s)": 0.413131 }, { "acc": 0.93267031, "epoch": 1.8649987815774511, "grad_norm": 7.186666965484619, "learning_rate": 5.965747003607446e-06, "loss": 0.38466926, "memory(GiB)": 34.88, "step": 68880, "train_speed(iter/s)": 0.413133 }, { "acc": 0.90909252, "epoch": 1.8651341618606665, "grad_norm": 5.977478981018066, "learning_rate": 5.965197961955586e-06, "loss": 0.56343741, "memory(GiB)": 34.88, "step": 68885, "train_speed(iter/s)": 0.413134 }, { "acc": 0.91203804, "epoch": 1.8652695421438823, "grad_norm": 7.360466480255127, "learning_rate": 5.964648908218988e-06, "loss": 0.50109506, "memory(GiB)": 34.88, "step": 68890, "train_speed(iter/s)": 0.413136 }, { "acc": 0.90592022, "epoch": 1.8654049224270977, "grad_norm": 9.237229347229004, "learning_rate": 5.964099842404533e-06, "loss": 0.52545557, "memory(GiB)": 34.88, "step": 68895, "train_speed(iter/s)": 0.413137 }, { "acc": 0.88457508, "epoch": 1.8655403027103132, "grad_norm": 6.194947242736816, "learning_rate": 5.9635507645190945e-06, "loss": 0.66536493, "memory(GiB)": 34.88, "step": 68900, "train_speed(iter/s)": 0.413138 }, { "acc": 0.89419947, "epoch": 1.8656756829935288, "grad_norm": 8.606035232543945, "learning_rate": 5.963001674569554e-06, "loss": 0.53193521, "memory(GiB)": 34.88, "step": 68905, "train_speed(iter/s)": 0.41314 }, { "acc": 0.91787376, "epoch": 1.8658110632767444, "grad_norm": 4.03762149810791, "learning_rate": 5.962452572562787e-06, "loss": 0.37605371, "memory(GiB)": 34.88, "step": 68910, "train_speed(iter/s)": 0.413141 }, { "acc": 0.92965145, "epoch": 1.86594644355996, "grad_norm": 4.301311016082764, "learning_rate": 5.961903458505675e-06, "loss": 0.36899514, "memory(GiB)": 34.88, "step": 68915, "train_speed(iter/s)": 0.413143 }, { "acc": 0.91213474, "epoch": 1.8660818238431753, "grad_norm": 9.77807903289795, "learning_rate": 5.9613543324050945e-06, "loss": 0.56032076, "memory(GiB)": 34.88, "step": 68920, "train_speed(iter/s)": 0.413144 }, { "acc": 0.88785143, "epoch": 1.8662172041263911, "grad_norm": 8.560604095458984, "learning_rate": 5.9608051942679264e-06, "loss": 0.65940647, "memory(GiB)": 34.88, "step": 68925, "train_speed(iter/s)": 0.413146 }, { "acc": 0.9186636, "epoch": 1.8663525844096065, "grad_norm": 13.547797203063965, "learning_rate": 5.960256044101049e-06, "loss": 0.3797262, "memory(GiB)": 34.88, "step": 68930, "train_speed(iter/s)": 0.413147 }, { "acc": 0.92688665, "epoch": 1.8664879646928223, "grad_norm": 5.910703659057617, "learning_rate": 5.95970688191134e-06, "loss": 0.43247323, "memory(GiB)": 34.88, "step": 68935, "train_speed(iter/s)": 0.413149 }, { "acc": 0.90152092, "epoch": 1.8666233449760377, "grad_norm": 8.70260238647461, "learning_rate": 5.959157707705681e-06, "loss": 0.6207799, "memory(GiB)": 34.88, "step": 68940, "train_speed(iter/s)": 0.41315 }, { "acc": 0.91450443, "epoch": 1.8667587252592532, "grad_norm": 14.217304229736328, "learning_rate": 5.95860852149095e-06, "loss": 0.46558824, "memory(GiB)": 34.88, "step": 68945, "train_speed(iter/s)": 0.413152 }, { "acc": 0.92886209, "epoch": 1.8668941055424688, "grad_norm": 9.904983520507812, "learning_rate": 5.958059323274029e-06, "loss": 0.37075539, "memory(GiB)": 34.88, "step": 68950, "train_speed(iter/s)": 0.413153 }, { "acc": 0.89375954, "epoch": 1.8670294858256844, "grad_norm": 10.086719512939453, "learning_rate": 5.957510113061794e-06, "loss": 0.61484938, "memory(GiB)": 34.88, "step": 68955, "train_speed(iter/s)": 0.413155 }, { "acc": 0.89233274, "epoch": 1.8671648661089, "grad_norm": 9.479780197143555, "learning_rate": 5.956960890861127e-06, "loss": 0.59116106, "memory(GiB)": 34.88, "step": 68960, "train_speed(iter/s)": 0.413156 }, { "acc": 0.8958148, "epoch": 1.8673002463921153, "grad_norm": 9.594016075134277, "learning_rate": 5.956411656678907e-06, "loss": 0.63887095, "memory(GiB)": 34.88, "step": 68965, "train_speed(iter/s)": 0.413158 }, { "acc": 0.9164813, "epoch": 1.8674356266753311, "grad_norm": 8.389399528503418, "learning_rate": 5.955862410522016e-06, "loss": 0.46102195, "memory(GiB)": 34.88, "step": 68970, "train_speed(iter/s)": 0.413159 }, { "acc": 0.88251324, "epoch": 1.8675710069585465, "grad_norm": 13.557332038879395, "learning_rate": 5.955313152397334e-06, "loss": 0.63143153, "memory(GiB)": 34.88, "step": 68975, "train_speed(iter/s)": 0.413161 }, { "acc": 0.89814587, "epoch": 1.867706387241762, "grad_norm": 10.371782302856445, "learning_rate": 5.954763882311741e-06, "loss": 0.60469613, "memory(GiB)": 34.88, "step": 68980, "train_speed(iter/s)": 0.413162 }, { "acc": 0.90577278, "epoch": 1.8678417675249777, "grad_norm": 13.410228729248047, "learning_rate": 5.954214600272117e-06, "loss": 0.55283608, "memory(GiB)": 34.88, "step": 68985, "train_speed(iter/s)": 0.413164 }, { "acc": 0.91203671, "epoch": 1.8679771478081932, "grad_norm": 9.627169609069824, "learning_rate": 5.953665306285343e-06, "loss": 0.46940985, "memory(GiB)": 34.88, "step": 68990, "train_speed(iter/s)": 0.413165 }, { "acc": 0.90442181, "epoch": 1.8681125280914088, "grad_norm": 17.24291229248047, "learning_rate": 5.953116000358304e-06, "loss": 0.53320947, "memory(GiB)": 34.88, "step": 68995, "train_speed(iter/s)": 0.413167 }, { "acc": 0.92344704, "epoch": 1.8682479083746242, "grad_norm": 6.842240333557129, "learning_rate": 5.952566682497874e-06, "loss": 0.37987797, "memory(GiB)": 34.88, "step": 69000, "train_speed(iter/s)": 0.413168 }, { "acc": 0.93127499, "epoch": 1.86838328865784, "grad_norm": 5.841653347015381, "learning_rate": 5.9520173527109395e-06, "loss": 0.32179308, "memory(GiB)": 34.88, "step": 69005, "train_speed(iter/s)": 0.41317 }, { "acc": 0.89039555, "epoch": 1.8685186689410553, "grad_norm": 11.197617530822754, "learning_rate": 5.95146801100438e-06, "loss": 0.6907558, "memory(GiB)": 34.88, "step": 69010, "train_speed(iter/s)": 0.413171 }, { "acc": 0.90325851, "epoch": 1.8686540492242711, "grad_norm": 4.653451442718506, "learning_rate": 5.950918657385077e-06, "loss": 0.525069, "memory(GiB)": 34.88, "step": 69015, "train_speed(iter/s)": 0.413173 }, { "acc": 0.92769985, "epoch": 1.8687894295074865, "grad_norm": 7.0109148025512695, "learning_rate": 5.950369291859914e-06, "loss": 0.39595168, "memory(GiB)": 34.88, "step": 69020, "train_speed(iter/s)": 0.413174 }, { "acc": 0.90721474, "epoch": 1.868924809790702, "grad_norm": 16.337324142456055, "learning_rate": 5.949819914435771e-06, "loss": 0.48812542, "memory(GiB)": 34.88, "step": 69025, "train_speed(iter/s)": 0.413176 }, { "acc": 0.89764957, "epoch": 1.8690601900739177, "grad_norm": 8.72736930847168, "learning_rate": 5.94927052511953e-06, "loss": 0.57008438, "memory(GiB)": 34.88, "step": 69030, "train_speed(iter/s)": 0.413177 }, { "acc": 0.92699413, "epoch": 1.8691955703571332, "grad_norm": 13.964105606079102, "learning_rate": 5.948721123918074e-06, "loss": 0.40466967, "memory(GiB)": 34.88, "step": 69035, "train_speed(iter/s)": 0.413178 }, { "acc": 0.92539263, "epoch": 1.8693309506403488, "grad_norm": 23.26293182373047, "learning_rate": 5.948171710838287e-06, "loss": 0.41052256, "memory(GiB)": 34.88, "step": 69040, "train_speed(iter/s)": 0.41318 }, { "acc": 0.91871281, "epoch": 1.8694663309235642, "grad_norm": 8.88552188873291, "learning_rate": 5.947622285887048e-06, "loss": 0.45220232, "memory(GiB)": 34.88, "step": 69045, "train_speed(iter/s)": 0.413181 }, { "acc": 0.92796993, "epoch": 1.86960171120678, "grad_norm": 12.311637878417969, "learning_rate": 5.947072849071242e-06, "loss": 0.3348278, "memory(GiB)": 34.88, "step": 69050, "train_speed(iter/s)": 0.413183 }, { "acc": 0.91117249, "epoch": 1.8697370914899953, "grad_norm": 7.652368068695068, "learning_rate": 5.9465234003977505e-06, "loss": 0.44500999, "memory(GiB)": 34.88, "step": 69055, "train_speed(iter/s)": 0.413184 }, { "acc": 0.93863287, "epoch": 1.869872471773211, "grad_norm": 3.599439859390259, "learning_rate": 5.945973939873456e-06, "loss": 0.38127398, "memory(GiB)": 34.88, "step": 69060, "train_speed(iter/s)": 0.413186 }, { "acc": 0.91310158, "epoch": 1.8700078520564265, "grad_norm": 9.03130054473877, "learning_rate": 5.945424467505246e-06, "loss": 0.4309742, "memory(GiB)": 34.88, "step": 69065, "train_speed(iter/s)": 0.413187 }, { "acc": 0.9161746, "epoch": 1.870143232339642, "grad_norm": 15.704676628112793, "learning_rate": 5.944874983299996e-06, "loss": 0.45984583, "memory(GiB)": 34.88, "step": 69070, "train_speed(iter/s)": 0.413188 }, { "acc": 0.89930487, "epoch": 1.8702786126228577, "grad_norm": 16.120553970336914, "learning_rate": 5.944325487264598e-06, "loss": 0.56847887, "memory(GiB)": 34.88, "step": 69075, "train_speed(iter/s)": 0.41319 }, { "acc": 0.92115421, "epoch": 1.870413992906073, "grad_norm": 9.263195037841797, "learning_rate": 5.943775979405927e-06, "loss": 0.34184453, "memory(GiB)": 34.88, "step": 69080, "train_speed(iter/s)": 0.413191 }, { "acc": 0.88887901, "epoch": 1.8705493731892888, "grad_norm": 15.678448677062988, "learning_rate": 5.943226459730873e-06, "loss": 0.59914818, "memory(GiB)": 34.88, "step": 69085, "train_speed(iter/s)": 0.413193 }, { "acc": 0.91107044, "epoch": 1.8706847534725042, "grad_norm": 10.166773796081543, "learning_rate": 5.942676928246317e-06, "loss": 0.45050902, "memory(GiB)": 34.88, "step": 69090, "train_speed(iter/s)": 0.413194 }, { "acc": 0.90892296, "epoch": 1.87082013375572, "grad_norm": 12.564587593078613, "learning_rate": 5.942127384959143e-06, "loss": 0.50014315, "memory(GiB)": 34.88, "step": 69095, "train_speed(iter/s)": 0.413196 }, { "acc": 0.93542166, "epoch": 1.8709555140389353, "grad_norm": 15.556602478027344, "learning_rate": 5.941577829876237e-06, "loss": 0.34998302, "memory(GiB)": 34.88, "step": 69100, "train_speed(iter/s)": 0.413197 }, { "acc": 0.91967983, "epoch": 1.871090894322151, "grad_norm": 7.413012504577637, "learning_rate": 5.941028263004482e-06, "loss": 0.4453371, "memory(GiB)": 34.88, "step": 69105, "train_speed(iter/s)": 0.413199 }, { "acc": 0.91753588, "epoch": 1.8712262746053665, "grad_norm": 5.325165271759033, "learning_rate": 5.940478684350763e-06, "loss": 0.33509617, "memory(GiB)": 34.88, "step": 69110, "train_speed(iter/s)": 0.4132 }, { "acc": 0.91804562, "epoch": 1.871361654888582, "grad_norm": 10.381043434143066, "learning_rate": 5.9399290939219615e-06, "loss": 0.45277147, "memory(GiB)": 34.88, "step": 69115, "train_speed(iter/s)": 0.413202 }, { "acc": 0.91162663, "epoch": 1.8714970351717977, "grad_norm": 20.771894454956055, "learning_rate": 5.939379491724967e-06, "loss": 0.48802204, "memory(GiB)": 34.88, "step": 69120, "train_speed(iter/s)": 0.413203 }, { "acc": 0.9206686, "epoch": 1.871632415455013, "grad_norm": 5.19464635848999, "learning_rate": 5.93882987776666e-06, "loss": 0.44446802, "memory(GiB)": 34.88, "step": 69125, "train_speed(iter/s)": 0.413205 }, { "acc": 0.90125237, "epoch": 1.8717677957382288, "grad_norm": 10.051666259765625, "learning_rate": 5.938280252053928e-06, "loss": 0.59941239, "memory(GiB)": 34.88, "step": 69130, "train_speed(iter/s)": 0.413206 }, { "acc": 0.91467743, "epoch": 1.8719031760214442, "grad_norm": 6.127269268035889, "learning_rate": 5.937730614593657e-06, "loss": 0.43780279, "memory(GiB)": 34.88, "step": 69135, "train_speed(iter/s)": 0.413208 }, { "acc": 0.90565434, "epoch": 1.8720385563046598, "grad_norm": 13.53181266784668, "learning_rate": 5.9371809653927285e-06, "loss": 0.47220774, "memory(GiB)": 34.88, "step": 69140, "train_speed(iter/s)": 0.413209 }, { "acc": 0.89348822, "epoch": 1.8721739365878753, "grad_norm": 7.434578895568848, "learning_rate": 5.936631304458032e-06, "loss": 0.53947668, "memory(GiB)": 34.88, "step": 69145, "train_speed(iter/s)": 0.41321 }, { "acc": 0.91213493, "epoch": 1.872309316871091, "grad_norm": 6.672381401062012, "learning_rate": 5.936081631796451e-06, "loss": 0.43279114, "memory(GiB)": 34.88, "step": 69150, "train_speed(iter/s)": 0.413212 }, { "acc": 0.91372309, "epoch": 1.8724446971543065, "grad_norm": 7.915704250335693, "learning_rate": 5.9355319474148735e-06, "loss": 0.47675314, "memory(GiB)": 34.88, "step": 69155, "train_speed(iter/s)": 0.413213 }, { "acc": 0.8966465, "epoch": 1.8725800774375219, "grad_norm": 5.190457344055176, "learning_rate": 5.934982251320182e-06, "loss": 0.57201242, "memory(GiB)": 34.88, "step": 69160, "train_speed(iter/s)": 0.413215 }, { "acc": 0.89945126, "epoch": 1.8727154577207377, "grad_norm": 7.83678674697876, "learning_rate": 5.934432543519263e-06, "loss": 0.55019708, "memory(GiB)": 34.88, "step": 69165, "train_speed(iter/s)": 0.413216 }, { "acc": 0.90798025, "epoch": 1.872850838003953, "grad_norm": 16.475740432739258, "learning_rate": 5.933882824019004e-06, "loss": 0.5592802, "memory(GiB)": 34.88, "step": 69170, "train_speed(iter/s)": 0.413217 }, { "acc": 0.91069565, "epoch": 1.8729862182871686, "grad_norm": 7.211597919464111, "learning_rate": 5.9333330928262914e-06, "loss": 0.47276444, "memory(GiB)": 34.88, "step": 69175, "train_speed(iter/s)": 0.413219 }, { "acc": 0.89290371, "epoch": 1.8731215985703842, "grad_norm": 18.57186508178711, "learning_rate": 5.932783349948012e-06, "loss": 0.54556737, "memory(GiB)": 34.88, "step": 69180, "train_speed(iter/s)": 0.413221 }, { "acc": 0.91904602, "epoch": 1.8732569788535998, "grad_norm": 5.941802501678467, "learning_rate": 5.93223359539105e-06, "loss": 0.40727196, "memory(GiB)": 34.88, "step": 69185, "train_speed(iter/s)": 0.413222 }, { "acc": 0.89987202, "epoch": 1.8733923591368153, "grad_norm": 6.757813930511475, "learning_rate": 5.931683829162297e-06, "loss": 0.50595798, "memory(GiB)": 34.88, "step": 69190, "train_speed(iter/s)": 0.413224 }, { "acc": 0.89378595, "epoch": 1.8735277394200307, "grad_norm": 10.1707181930542, "learning_rate": 5.931134051268635e-06, "loss": 0.61328082, "memory(GiB)": 34.88, "step": 69195, "train_speed(iter/s)": 0.413225 }, { "acc": 0.90979424, "epoch": 1.8736631197032465, "grad_norm": 10.369783401489258, "learning_rate": 5.930584261716951e-06, "loss": 0.43081408, "memory(GiB)": 34.88, "step": 69200, "train_speed(iter/s)": 0.413227 }, { "acc": 0.93110256, "epoch": 1.8737984999864619, "grad_norm": 4.7876129150390625, "learning_rate": 5.930034460514137e-06, "loss": 0.36185422, "memory(GiB)": 34.88, "step": 69205, "train_speed(iter/s)": 0.413228 }, { "acc": 0.92103271, "epoch": 1.8739338802696777, "grad_norm": 8.27259349822998, "learning_rate": 5.929484647667075e-06, "loss": 0.40592952, "memory(GiB)": 34.88, "step": 69210, "train_speed(iter/s)": 0.41323 }, { "acc": 0.90851135, "epoch": 1.874069260552893, "grad_norm": 13.37545394897461, "learning_rate": 5.928934823182656e-06, "loss": 0.55228262, "memory(GiB)": 34.88, "step": 69215, "train_speed(iter/s)": 0.413231 }, { "acc": 0.92532158, "epoch": 1.8742046408361086, "grad_norm": 8.411720275878906, "learning_rate": 5.928384987067766e-06, "loss": 0.40426111, "memory(GiB)": 34.88, "step": 69220, "train_speed(iter/s)": 0.413233 }, { "acc": 0.89791756, "epoch": 1.8743400211193242, "grad_norm": 8.369730949401855, "learning_rate": 5.927835139329294e-06, "loss": 0.53845081, "memory(GiB)": 34.88, "step": 69225, "train_speed(iter/s)": 0.413234 }, { "acc": 0.91001663, "epoch": 1.8744754014025398, "grad_norm": 8.92867660522461, "learning_rate": 5.927285279974127e-06, "loss": 0.4653533, "memory(GiB)": 34.88, "step": 69230, "train_speed(iter/s)": 0.413236 }, { "acc": 0.92305794, "epoch": 1.8746107816857553, "grad_norm": 8.916678428649902, "learning_rate": 5.9267354090091526e-06, "loss": 0.36764016, "memory(GiB)": 34.88, "step": 69235, "train_speed(iter/s)": 0.413237 }, { "acc": 0.92219753, "epoch": 1.8747461619689707, "grad_norm": 15.495840072631836, "learning_rate": 5.9261855264412605e-06, "loss": 0.40754414, "memory(GiB)": 34.88, "step": 69240, "train_speed(iter/s)": 0.413238 }, { "acc": 0.91891861, "epoch": 1.8748815422521865, "grad_norm": 7.96095085144043, "learning_rate": 5.9256356322773365e-06, "loss": 0.41570621, "memory(GiB)": 34.88, "step": 69245, "train_speed(iter/s)": 0.41324 }, { "acc": 0.90075388, "epoch": 1.8750169225354019, "grad_norm": 8.11885929107666, "learning_rate": 5.925085726524272e-06, "loss": 0.55596361, "memory(GiB)": 34.88, "step": 69250, "train_speed(iter/s)": 0.413241 }, { "acc": 0.89611292, "epoch": 1.8751523028186174, "grad_norm": 5.270513534545898, "learning_rate": 5.924535809188955e-06, "loss": 0.55759068, "memory(GiB)": 34.88, "step": 69255, "train_speed(iter/s)": 0.413243 }, { "acc": 0.91566334, "epoch": 1.875287683101833, "grad_norm": 9.80510139465332, "learning_rate": 5.923985880278275e-06, "loss": 0.47344108, "memory(GiB)": 34.88, "step": 69260, "train_speed(iter/s)": 0.413244 }, { "acc": 0.91110325, "epoch": 1.8754230633850486, "grad_norm": 8.080906867980957, "learning_rate": 5.923435939799116e-06, "loss": 0.56729059, "memory(GiB)": 34.88, "step": 69265, "train_speed(iter/s)": 0.413246 }, { "acc": 0.92691889, "epoch": 1.8755584436682642, "grad_norm": 6.752442836761475, "learning_rate": 5.922885987758375e-06, "loss": 0.37248697, "memory(GiB)": 34.88, "step": 69270, "train_speed(iter/s)": 0.413247 }, { "acc": 0.91088524, "epoch": 1.8756938239514795, "grad_norm": 7.837948322296143, "learning_rate": 5.922336024162935e-06, "loss": 0.54064975, "memory(GiB)": 34.88, "step": 69275, "train_speed(iter/s)": 0.413248 }, { "acc": 0.90789957, "epoch": 1.8758292042346953, "grad_norm": 4.543706893920898, "learning_rate": 5.921786049019685e-06, "loss": 0.63579435, "memory(GiB)": 34.88, "step": 69280, "train_speed(iter/s)": 0.41325 }, { "acc": 0.89589319, "epoch": 1.8759645845179107, "grad_norm": 16.1866455078125, "learning_rate": 5.921236062335521e-06, "loss": 0.5504735, "memory(GiB)": 34.88, "step": 69285, "train_speed(iter/s)": 0.413251 }, { "acc": 0.93686924, "epoch": 1.8760999648011265, "grad_norm": 8.707481384277344, "learning_rate": 5.920686064117324e-06, "loss": 0.3736762, "memory(GiB)": 34.88, "step": 69290, "train_speed(iter/s)": 0.413253 }, { "acc": 0.89863281, "epoch": 1.8762353450843419, "grad_norm": 6.813481330871582, "learning_rate": 5.920136054371993e-06, "loss": 0.57705278, "memory(GiB)": 34.88, "step": 69295, "train_speed(iter/s)": 0.413254 }, { "acc": 0.92110596, "epoch": 1.8763707253675574, "grad_norm": 12.400747299194336, "learning_rate": 5.919586033106411e-06, "loss": 0.38462653, "memory(GiB)": 34.88, "step": 69300, "train_speed(iter/s)": 0.413256 }, { "acc": 0.89656048, "epoch": 1.876506105650773, "grad_norm": 12.040826797485352, "learning_rate": 5.919036000327472e-06, "loss": 0.6455224, "memory(GiB)": 34.88, "step": 69305, "train_speed(iter/s)": 0.413257 }, { "acc": 0.90917702, "epoch": 1.8766414859339886, "grad_norm": 11.595172882080078, "learning_rate": 5.918485956042062e-06, "loss": 0.42513857, "memory(GiB)": 34.88, "step": 69310, "train_speed(iter/s)": 0.413259 }, { "acc": 0.91354599, "epoch": 1.8767768662172042, "grad_norm": 6.37591028213501, "learning_rate": 5.917935900257076e-06, "loss": 0.46875906, "memory(GiB)": 34.88, "step": 69315, "train_speed(iter/s)": 0.41326 }, { "acc": 0.91565132, "epoch": 1.8769122465004195, "grad_norm": 15.361098289489746, "learning_rate": 5.917385832979403e-06, "loss": 0.48678217, "memory(GiB)": 34.88, "step": 69320, "train_speed(iter/s)": 0.413261 }, { "acc": 0.90452795, "epoch": 1.8770476267836353, "grad_norm": 11.307869911193848, "learning_rate": 5.916835754215931e-06, "loss": 0.54838886, "memory(GiB)": 34.88, "step": 69325, "train_speed(iter/s)": 0.413263 }, { "acc": 0.91624231, "epoch": 1.8771830070668507, "grad_norm": 15.176322937011719, "learning_rate": 5.916285663973556e-06, "loss": 0.46419988, "memory(GiB)": 34.88, "step": 69330, "train_speed(iter/s)": 0.413264 }, { "acc": 0.90715685, "epoch": 1.8773183873500663, "grad_norm": 10.414863586425781, "learning_rate": 5.915735562259164e-06, "loss": 0.49363689, "memory(GiB)": 34.88, "step": 69335, "train_speed(iter/s)": 0.413266 }, { "acc": 0.9055254, "epoch": 1.8774537676332819, "grad_norm": 12.647871971130371, "learning_rate": 5.915185449079648e-06, "loss": 0.51462393, "memory(GiB)": 34.88, "step": 69340, "train_speed(iter/s)": 0.413267 }, { "acc": 0.92018499, "epoch": 1.8775891479164974, "grad_norm": 6.543125152587891, "learning_rate": 5.9146353244418995e-06, "loss": 0.4683516, "memory(GiB)": 34.88, "step": 69345, "train_speed(iter/s)": 0.413269 }, { "acc": 0.91969433, "epoch": 1.877724528199713, "grad_norm": 12.453712463378906, "learning_rate": 5.9140851883528094e-06, "loss": 0.40942564, "memory(GiB)": 34.88, "step": 69350, "train_speed(iter/s)": 0.41327 }, { "acc": 0.90801096, "epoch": 1.8778599084829284, "grad_norm": 9.655685424804688, "learning_rate": 5.9135350408192726e-06, "loss": 0.49659042, "memory(GiB)": 34.88, "step": 69355, "train_speed(iter/s)": 0.413272 }, { "acc": 0.91768532, "epoch": 1.8779952887661442, "grad_norm": 7.0346808433532715, "learning_rate": 5.912984881848175e-06, "loss": 0.46985006, "memory(GiB)": 34.88, "step": 69360, "train_speed(iter/s)": 0.413273 }, { "acc": 0.90986557, "epoch": 1.8781306690493595, "grad_norm": 8.438255310058594, "learning_rate": 5.912434711446413e-06, "loss": 0.55876884, "memory(GiB)": 34.88, "step": 69365, "train_speed(iter/s)": 0.413275 }, { "acc": 0.91537476, "epoch": 1.8782660493325753, "grad_norm": 18.33661651611328, "learning_rate": 5.9118845296208745e-06, "loss": 0.49216728, "memory(GiB)": 34.88, "step": 69370, "train_speed(iter/s)": 0.413276 }, { "acc": 0.92563553, "epoch": 1.8784014296157907, "grad_norm": 9.20930004119873, "learning_rate": 5.911334336378457e-06, "loss": 0.39063394, "memory(GiB)": 34.88, "step": 69375, "train_speed(iter/s)": 0.413278 }, { "acc": 0.92107563, "epoch": 1.8785368098990063, "grad_norm": 4.800032138824463, "learning_rate": 5.910784131726047e-06, "loss": 0.42479849, "memory(GiB)": 34.88, "step": 69380, "train_speed(iter/s)": 0.413279 }, { "acc": 0.88715963, "epoch": 1.8786721901822219, "grad_norm": 13.34782600402832, "learning_rate": 5.910233915670539e-06, "loss": 0.61744084, "memory(GiB)": 34.88, "step": 69385, "train_speed(iter/s)": 0.413281 }, { "acc": 0.9164566, "epoch": 1.8788075704654374, "grad_norm": 7.749855995178223, "learning_rate": 5.909683688218829e-06, "loss": 0.41413507, "memory(GiB)": 34.88, "step": 69390, "train_speed(iter/s)": 0.413282 }, { "acc": 0.92189407, "epoch": 1.878942950748653, "grad_norm": 7.183866024017334, "learning_rate": 5.909133449377804e-06, "loss": 0.35469842, "memory(GiB)": 34.88, "step": 69395, "train_speed(iter/s)": 0.413284 }, { "acc": 0.89482298, "epoch": 1.8790783310318684, "grad_norm": 7.482753753662109, "learning_rate": 5.908583199154363e-06, "loss": 0.53918629, "memory(GiB)": 34.88, "step": 69400, "train_speed(iter/s)": 0.413285 }, { "acc": 0.91437569, "epoch": 1.8792137113150842, "grad_norm": 17.750669479370117, "learning_rate": 5.908032937555392e-06, "loss": 0.47268238, "memory(GiB)": 34.88, "step": 69405, "train_speed(iter/s)": 0.413286 }, { "acc": 0.90992231, "epoch": 1.8793490915982995, "grad_norm": 7.417503833770752, "learning_rate": 5.9074826645877905e-06, "loss": 0.4365015, "memory(GiB)": 34.88, "step": 69410, "train_speed(iter/s)": 0.413288 }, { "acc": 0.92467127, "epoch": 1.8794844718815151, "grad_norm": 7.736050128936768, "learning_rate": 5.906932380258447e-06, "loss": 0.42579336, "memory(GiB)": 34.88, "step": 69415, "train_speed(iter/s)": 0.413289 }, { "acc": 0.9239728, "epoch": 1.8796198521647307, "grad_norm": 4.318650722503662, "learning_rate": 5.906382084574258e-06, "loss": 0.46749201, "memory(GiB)": 34.88, "step": 69420, "train_speed(iter/s)": 0.413291 }, { "acc": 0.90574942, "epoch": 1.8797552324479463, "grad_norm": 8.313546180725098, "learning_rate": 5.905831777542114e-06, "loss": 0.50368223, "memory(GiB)": 34.88, "step": 69425, "train_speed(iter/s)": 0.413292 }, { "acc": 0.91401615, "epoch": 1.8798906127311619, "grad_norm": 4.892337799072266, "learning_rate": 5.905281459168911e-06, "loss": 0.44144411, "memory(GiB)": 34.88, "step": 69430, "train_speed(iter/s)": 0.413293 }, { "acc": 0.90788441, "epoch": 1.8800259930143772, "grad_norm": 6.487215995788574, "learning_rate": 5.904731129461544e-06, "loss": 0.468572, "memory(GiB)": 34.88, "step": 69435, "train_speed(iter/s)": 0.413295 }, { "acc": 0.90391512, "epoch": 1.880161373297593, "grad_norm": 8.485633850097656, "learning_rate": 5.904180788426904e-06, "loss": 0.54717083, "memory(GiB)": 34.88, "step": 69440, "train_speed(iter/s)": 0.413296 }, { "acc": 0.91427822, "epoch": 1.8802967535808084, "grad_norm": 16.22295379638672, "learning_rate": 5.903630436071886e-06, "loss": 0.48979311, "memory(GiB)": 34.88, "step": 69445, "train_speed(iter/s)": 0.413298 }, { "acc": 0.9238451, "epoch": 1.8804321338640242, "grad_norm": 4.802877426147461, "learning_rate": 5.903080072403385e-06, "loss": 0.38334212, "memory(GiB)": 34.88, "step": 69450, "train_speed(iter/s)": 0.413299 }, { "acc": 0.93929596, "epoch": 1.8805675141472395, "grad_norm": 5.683087348937988, "learning_rate": 5.902529697428297e-06, "loss": 0.32820821, "memory(GiB)": 34.88, "step": 69455, "train_speed(iter/s)": 0.413301 }, { "acc": 0.91341171, "epoch": 1.8807028944304551, "grad_norm": 10.66460132598877, "learning_rate": 5.901979311153512e-06, "loss": 0.54727039, "memory(GiB)": 34.88, "step": 69460, "train_speed(iter/s)": 0.413302 }, { "acc": 0.91526251, "epoch": 1.8808382747136707, "grad_norm": 10.965822219848633, "learning_rate": 5.901428913585928e-06, "loss": 0.43652496, "memory(GiB)": 34.88, "step": 69465, "train_speed(iter/s)": 0.413304 }, { "acc": 0.91952629, "epoch": 1.8809736549968863, "grad_norm": 15.021499633789062, "learning_rate": 5.900878504732439e-06, "loss": 0.47235932, "memory(GiB)": 34.88, "step": 69470, "train_speed(iter/s)": 0.413305 }, { "acc": 0.90763292, "epoch": 1.8811090352801019, "grad_norm": 3.5487401485443115, "learning_rate": 5.90032808459994e-06, "loss": 0.51062956, "memory(GiB)": 34.88, "step": 69475, "train_speed(iter/s)": 0.413307 }, { "acc": 0.92297573, "epoch": 1.8812444155633172, "grad_norm": 9.587187767028809, "learning_rate": 5.899777653195327e-06, "loss": 0.48888617, "memory(GiB)": 34.88, "step": 69480, "train_speed(iter/s)": 0.413308 }, { "acc": 0.91467094, "epoch": 1.881379795846533, "grad_norm": 7.593390941619873, "learning_rate": 5.899227210525493e-06, "loss": 0.48897581, "memory(GiB)": 34.88, "step": 69485, "train_speed(iter/s)": 0.413309 }, { "acc": 0.89818172, "epoch": 1.8815151761297484, "grad_norm": 15.221484184265137, "learning_rate": 5.898676756597336e-06, "loss": 0.58284712, "memory(GiB)": 34.88, "step": 69490, "train_speed(iter/s)": 0.41331 }, { "acc": 0.90918684, "epoch": 1.881650556412964, "grad_norm": 7.0989603996276855, "learning_rate": 5.898126291417749e-06, "loss": 0.45014272, "memory(GiB)": 34.88, "step": 69495, "train_speed(iter/s)": 0.413312 }, { "acc": 0.92646484, "epoch": 1.8817859366961796, "grad_norm": 8.921154975891113, "learning_rate": 5.897575814993629e-06, "loss": 0.4174602, "memory(GiB)": 34.88, "step": 69500, "train_speed(iter/s)": 0.413313 }, { "acc": 0.90369358, "epoch": 1.8819213169793951, "grad_norm": 15.788368225097656, "learning_rate": 5.897025327331872e-06, "loss": 0.52080007, "memory(GiB)": 34.88, "step": 69505, "train_speed(iter/s)": 0.413315 }, { "acc": 0.88603716, "epoch": 1.8820566972626107, "grad_norm": 12.822381019592285, "learning_rate": 5.896474828439373e-06, "loss": 0.6287137, "memory(GiB)": 34.88, "step": 69510, "train_speed(iter/s)": 0.413316 }, { "acc": 0.91941586, "epoch": 1.882192077545826, "grad_norm": 9.16733169555664, "learning_rate": 5.89592431832303e-06, "loss": 0.44537859, "memory(GiB)": 34.88, "step": 69515, "train_speed(iter/s)": 0.413318 }, { "acc": 0.9073947, "epoch": 1.8823274578290419, "grad_norm": 7.007957458496094, "learning_rate": 5.8953737969897366e-06, "loss": 0.51549096, "memory(GiB)": 34.88, "step": 69520, "train_speed(iter/s)": 0.413319 }, { "acc": 0.92173977, "epoch": 1.8824628381122572, "grad_norm": 6.401764392852783, "learning_rate": 5.894823264446392e-06, "loss": 0.39567225, "memory(GiB)": 34.88, "step": 69525, "train_speed(iter/s)": 0.413321 }, { "acc": 0.90640736, "epoch": 1.882598218395473, "grad_norm": 17.36490821838379, "learning_rate": 5.8942727206998895e-06, "loss": 0.53662119, "memory(GiB)": 34.88, "step": 69530, "train_speed(iter/s)": 0.413322 }, { "acc": 0.91642141, "epoch": 1.8827335986786884, "grad_norm": 12.576780319213867, "learning_rate": 5.893722165757128e-06, "loss": 0.48926482, "memory(GiB)": 34.88, "step": 69535, "train_speed(iter/s)": 0.413324 }, { "acc": 0.9141468, "epoch": 1.882868978961904, "grad_norm": 38.498985290527344, "learning_rate": 5.893171599625004e-06, "loss": 0.5517395, "memory(GiB)": 34.88, "step": 69540, "train_speed(iter/s)": 0.413325 }, { "acc": 0.90996389, "epoch": 1.8830043592451196, "grad_norm": 19.473724365234375, "learning_rate": 5.892621022310414e-06, "loss": 0.47315316, "memory(GiB)": 34.88, "step": 69545, "train_speed(iter/s)": 0.413327 }, { "acc": 0.89369278, "epoch": 1.8831397395283351, "grad_norm": 8.268498420715332, "learning_rate": 5.892070433820255e-06, "loss": 0.55386834, "memory(GiB)": 34.88, "step": 69550, "train_speed(iter/s)": 0.413328 }, { "acc": 0.91730518, "epoch": 1.8832751198115507, "grad_norm": 9.88885498046875, "learning_rate": 5.891519834161424e-06, "loss": 0.52783489, "memory(GiB)": 34.88, "step": 69555, "train_speed(iter/s)": 0.413329 }, { "acc": 0.90313292, "epoch": 1.883410500094766, "grad_norm": 3.880542278289795, "learning_rate": 5.89096922334082e-06, "loss": 0.50652924, "memory(GiB)": 34.88, "step": 69560, "train_speed(iter/s)": 0.413331 }, { "acc": 0.90706377, "epoch": 1.8835458803779819, "grad_norm": 10.96177864074707, "learning_rate": 5.8904186013653384e-06, "loss": 0.55017948, "memory(GiB)": 34.88, "step": 69565, "train_speed(iter/s)": 0.413332 }, { "acc": 0.91535892, "epoch": 1.8836812606611972, "grad_norm": 5.271006107330322, "learning_rate": 5.889867968241877e-06, "loss": 0.50242624, "memory(GiB)": 34.88, "step": 69570, "train_speed(iter/s)": 0.413334 }, { "acc": 0.8951828, "epoch": 1.8838166409444128, "grad_norm": 6.951358795166016, "learning_rate": 5.8893173239773345e-06, "loss": 0.56882505, "memory(GiB)": 34.88, "step": 69575, "train_speed(iter/s)": 0.413335 }, { "acc": 0.92464714, "epoch": 1.8839520212276284, "grad_norm": 7.365933895111084, "learning_rate": 5.888766668578609e-06, "loss": 0.43867254, "memory(GiB)": 34.88, "step": 69580, "train_speed(iter/s)": 0.413337 }, { "acc": 0.90600004, "epoch": 1.884087401510844, "grad_norm": 8.864566802978516, "learning_rate": 5.888216002052598e-06, "loss": 0.63540912, "memory(GiB)": 34.88, "step": 69585, "train_speed(iter/s)": 0.413338 }, { "acc": 0.93760281, "epoch": 1.8842227817940596, "grad_norm": 3.9245734214782715, "learning_rate": 5.8876653244062e-06, "loss": 0.32823162, "memory(GiB)": 34.88, "step": 69590, "train_speed(iter/s)": 0.41334 }, { "acc": 0.91562738, "epoch": 1.884358162077275, "grad_norm": 12.169498443603516, "learning_rate": 5.887114635646313e-06, "loss": 0.44523106, "memory(GiB)": 34.88, "step": 69595, "train_speed(iter/s)": 0.413341 }, { "acc": 0.91201468, "epoch": 1.8844935423604907, "grad_norm": 6.698673248291016, "learning_rate": 5.886563935779834e-06, "loss": 0.50293489, "memory(GiB)": 34.88, "step": 69600, "train_speed(iter/s)": 0.413342 }, { "acc": 0.90900345, "epoch": 1.884628922643706, "grad_norm": 18.14196014404297, "learning_rate": 5.886013224813665e-06, "loss": 0.480371, "memory(GiB)": 34.88, "step": 69605, "train_speed(iter/s)": 0.413344 }, { "acc": 0.90731888, "epoch": 1.8847643029269219, "grad_norm": 15.219720840454102, "learning_rate": 5.885462502754702e-06, "loss": 0.52768579, "memory(GiB)": 34.88, "step": 69610, "train_speed(iter/s)": 0.413345 }, { "acc": 0.90291328, "epoch": 1.8848996832101372, "grad_norm": 12.315189361572266, "learning_rate": 5.884911769609844e-06, "loss": 0.56092081, "memory(GiB)": 34.88, "step": 69615, "train_speed(iter/s)": 0.413347 }, { "acc": 0.91026592, "epoch": 1.8850350634933528, "grad_norm": 4.425899982452393, "learning_rate": 5.884361025385993e-06, "loss": 0.42223859, "memory(GiB)": 34.88, "step": 69620, "train_speed(iter/s)": 0.413348 }, { "acc": 0.90502434, "epoch": 1.8851704437765684, "grad_norm": 19.56256675720215, "learning_rate": 5.8838102700900435e-06, "loss": 0.57318807, "memory(GiB)": 34.88, "step": 69625, "train_speed(iter/s)": 0.41335 }, { "acc": 0.89973812, "epoch": 1.885305824059784, "grad_norm": 14.387184143066406, "learning_rate": 5.883259503728898e-06, "loss": 0.58354611, "memory(GiB)": 34.88, "step": 69630, "train_speed(iter/s)": 0.413351 }, { "acc": 0.88991776, "epoch": 1.8854412043429996, "grad_norm": 9.035870552062988, "learning_rate": 5.882708726309455e-06, "loss": 0.6465065, "memory(GiB)": 34.88, "step": 69635, "train_speed(iter/s)": 0.413353 }, { "acc": 0.92573032, "epoch": 1.885576584626215, "grad_norm": 21.660076141357422, "learning_rate": 5.8821579378386155e-06, "loss": 0.43160076, "memory(GiB)": 34.88, "step": 69640, "train_speed(iter/s)": 0.413354 }, { "acc": 0.9147315, "epoch": 1.8857119649094307, "grad_norm": 5.163581371307373, "learning_rate": 5.881607138323275e-06, "loss": 0.44467335, "memory(GiB)": 34.88, "step": 69645, "train_speed(iter/s)": 0.413355 }, { "acc": 0.90147514, "epoch": 1.885847345192646, "grad_norm": 36.4893913269043, "learning_rate": 5.881056327770338e-06, "loss": 0.45925207, "memory(GiB)": 34.88, "step": 69650, "train_speed(iter/s)": 0.413357 }, { "acc": 0.91818123, "epoch": 1.8859827254758617, "grad_norm": 8.552387237548828, "learning_rate": 5.880505506186703e-06, "loss": 0.50724916, "memory(GiB)": 34.88, "step": 69655, "train_speed(iter/s)": 0.413358 }, { "acc": 0.9126193, "epoch": 1.8861181057590772, "grad_norm": 6.319797039031982, "learning_rate": 5.87995467357927e-06, "loss": 0.49421644, "memory(GiB)": 34.88, "step": 69660, "train_speed(iter/s)": 0.41336 }, { "acc": 0.89661694, "epoch": 1.8862534860422928, "grad_norm": 8.029623985290527, "learning_rate": 5.879403829954938e-06, "loss": 0.58785415, "memory(GiB)": 34.88, "step": 69665, "train_speed(iter/s)": 0.413361 }, { "acc": 0.92819653, "epoch": 1.8863888663255084, "grad_norm": 10.120704650878906, "learning_rate": 5.878852975320608e-06, "loss": 0.42150207, "memory(GiB)": 34.88, "step": 69670, "train_speed(iter/s)": 0.413363 }, { "acc": 0.91057472, "epoch": 1.8865242466087238, "grad_norm": 8.003865242004395, "learning_rate": 5.878302109683183e-06, "loss": 0.53142395, "memory(GiB)": 34.88, "step": 69675, "train_speed(iter/s)": 0.413364 }, { "acc": 0.92066431, "epoch": 1.8866596268919396, "grad_norm": 4.640442371368408, "learning_rate": 5.877751233049561e-06, "loss": 0.39023173, "memory(GiB)": 34.88, "step": 69680, "train_speed(iter/s)": 0.413366 }, { "acc": 0.93117638, "epoch": 1.886795007175155, "grad_norm": 5.421475410461426, "learning_rate": 5.877200345426642e-06, "loss": 0.32945886, "memory(GiB)": 34.88, "step": 69685, "train_speed(iter/s)": 0.413367 }, { "acc": 0.910079, "epoch": 1.8869303874583707, "grad_norm": 12.383209228515625, "learning_rate": 5.876649446821331e-06, "loss": 0.5221633, "memory(GiB)": 34.88, "step": 69690, "train_speed(iter/s)": 0.413369 }, { "acc": 0.91661453, "epoch": 1.887065767741586, "grad_norm": 5.9830522537231445, "learning_rate": 5.876098537240525e-06, "loss": 0.48094053, "memory(GiB)": 34.88, "step": 69695, "train_speed(iter/s)": 0.41337 }, { "acc": 0.90588512, "epoch": 1.8872011480248017, "grad_norm": 9.762350082397461, "learning_rate": 5.875547616691127e-06, "loss": 0.49711857, "memory(GiB)": 34.88, "step": 69700, "train_speed(iter/s)": 0.413371 }, { "acc": 0.90879526, "epoch": 1.8873365283080172, "grad_norm": 6.058906555175781, "learning_rate": 5.874996685180038e-06, "loss": 0.54141421, "memory(GiB)": 34.88, "step": 69705, "train_speed(iter/s)": 0.413373 }, { "acc": 0.9118145, "epoch": 1.8874719085912328, "grad_norm": 15.567076683044434, "learning_rate": 5.87444574271416e-06, "loss": 0.50563917, "memory(GiB)": 34.88, "step": 69710, "train_speed(iter/s)": 0.413374 }, { "acc": 0.9226325, "epoch": 1.8876072888744484, "grad_norm": 5.409777641296387, "learning_rate": 5.873894789300394e-06, "loss": 0.44320221, "memory(GiB)": 34.88, "step": 69715, "train_speed(iter/s)": 0.413375 }, { "acc": 0.90042582, "epoch": 1.8877426691576638, "grad_norm": 13.729480743408203, "learning_rate": 5.873343824945642e-06, "loss": 0.62945395, "memory(GiB)": 34.88, "step": 69720, "train_speed(iter/s)": 0.413377 }, { "acc": 0.92356758, "epoch": 1.8878780494408796, "grad_norm": 9.645312309265137, "learning_rate": 5.872792849656805e-06, "loss": 0.43441038, "memory(GiB)": 34.88, "step": 69725, "train_speed(iter/s)": 0.413378 }, { "acc": 0.93024549, "epoch": 1.888013429724095, "grad_norm": 7.500481128692627, "learning_rate": 5.872241863440786e-06, "loss": 0.38055959, "memory(GiB)": 34.88, "step": 69730, "train_speed(iter/s)": 0.41338 }, { "acc": 0.91228275, "epoch": 1.8881488100073105, "grad_norm": 18.205163955688477, "learning_rate": 5.87169086630449e-06, "loss": 0.48183565, "memory(GiB)": 34.88, "step": 69735, "train_speed(iter/s)": 0.413381 }, { "acc": 0.91519747, "epoch": 1.888284190290526, "grad_norm": 8.387840270996094, "learning_rate": 5.871139858254815e-06, "loss": 0.46864281, "memory(GiB)": 34.88, "step": 69740, "train_speed(iter/s)": 0.413382 }, { "acc": 0.91698704, "epoch": 1.8884195705737417, "grad_norm": 7.104966163635254, "learning_rate": 5.8705888392986655e-06, "loss": 0.42883129, "memory(GiB)": 34.88, "step": 69745, "train_speed(iter/s)": 0.413384 }, { "acc": 0.90823574, "epoch": 1.8885549508569572, "grad_norm": 11.567007064819336, "learning_rate": 5.870037809442942e-06, "loss": 0.48020029, "memory(GiB)": 34.88, "step": 69750, "train_speed(iter/s)": 0.413385 }, { "acc": 0.9047061, "epoch": 1.8886903311401726, "grad_norm": 25.473060607910156, "learning_rate": 5.86948676869455e-06, "loss": 0.66115031, "memory(GiB)": 34.88, "step": 69755, "train_speed(iter/s)": 0.413387 }, { "acc": 0.90637417, "epoch": 1.8888257114233884, "grad_norm": 11.582453727722168, "learning_rate": 5.868935717060389e-06, "loss": 0.53639479, "memory(GiB)": 34.88, "step": 69760, "train_speed(iter/s)": 0.413388 }, { "acc": 0.92887201, "epoch": 1.8889610917066038, "grad_norm": 9.248261451721191, "learning_rate": 5.868384654547366e-06, "loss": 0.40660825, "memory(GiB)": 34.88, "step": 69765, "train_speed(iter/s)": 0.41339 }, { "acc": 0.91141272, "epoch": 1.8890964719898196, "grad_norm": 9.295589447021484, "learning_rate": 5.8678335811623835e-06, "loss": 0.48779407, "memory(GiB)": 34.88, "step": 69770, "train_speed(iter/s)": 0.413391 }, { "acc": 0.91077728, "epoch": 1.889231852273035, "grad_norm": 9.491167068481445, "learning_rate": 5.867282496912342e-06, "loss": 0.49238882, "memory(GiB)": 34.88, "step": 69775, "train_speed(iter/s)": 0.413393 }, { "acc": 0.91408939, "epoch": 1.8893672325562505, "grad_norm": 11.320104598999023, "learning_rate": 5.866731401804147e-06, "loss": 0.52273149, "memory(GiB)": 34.88, "step": 69780, "train_speed(iter/s)": 0.413394 }, { "acc": 0.92428436, "epoch": 1.889502612839466, "grad_norm": 10.991288185119629, "learning_rate": 5.866180295844701e-06, "loss": 0.37971582, "memory(GiB)": 34.88, "step": 69785, "train_speed(iter/s)": 0.413396 }, { "acc": 0.92366104, "epoch": 1.8896379931226817, "grad_norm": 4.433679103851318, "learning_rate": 5.8656291790409094e-06, "loss": 0.49621334, "memory(GiB)": 34.88, "step": 69790, "train_speed(iter/s)": 0.413397 }, { "acc": 0.90822144, "epoch": 1.8897733734058972, "grad_norm": 8.177590370178223, "learning_rate": 5.865078051399673e-06, "loss": 0.5513485, "memory(GiB)": 34.88, "step": 69795, "train_speed(iter/s)": 0.413399 }, { "acc": 0.91917133, "epoch": 1.8899087536891126, "grad_norm": 9.130359649658203, "learning_rate": 5.864526912927898e-06, "loss": 0.47903304, "memory(GiB)": 34.88, "step": 69800, "train_speed(iter/s)": 0.4134 }, { "acc": 0.9061265, "epoch": 1.8900441339723284, "grad_norm": 7.421965599060059, "learning_rate": 5.863975763632489e-06, "loss": 0.47029562, "memory(GiB)": 34.88, "step": 69805, "train_speed(iter/s)": 0.413401 }, { "acc": 0.92565117, "epoch": 1.8901795142555438, "grad_norm": 9.421907424926758, "learning_rate": 5.86342460352035e-06, "loss": 0.34114399, "memory(GiB)": 34.88, "step": 69810, "train_speed(iter/s)": 0.413403 }, { "acc": 0.90127697, "epoch": 1.8903148945387593, "grad_norm": 12.971029281616211, "learning_rate": 5.8628734325983835e-06, "loss": 0.52823029, "memory(GiB)": 34.88, "step": 69815, "train_speed(iter/s)": 0.413404 }, { "acc": 0.88667812, "epoch": 1.890450274821975, "grad_norm": 17.60029411315918, "learning_rate": 5.862322250873496e-06, "loss": 0.73138981, "memory(GiB)": 34.88, "step": 69820, "train_speed(iter/s)": 0.413406 }, { "acc": 0.91220074, "epoch": 1.8905856551051905, "grad_norm": 6.158237934112549, "learning_rate": 5.861771058352591e-06, "loss": 0.58121758, "memory(GiB)": 34.88, "step": 69825, "train_speed(iter/s)": 0.413407 }, { "acc": 0.91726818, "epoch": 1.890721035388406, "grad_norm": 7.321700572967529, "learning_rate": 5.861219855042574e-06, "loss": 0.47209692, "memory(GiB)": 34.88, "step": 69830, "train_speed(iter/s)": 0.413409 }, { "acc": 0.92121849, "epoch": 1.8908564156716214, "grad_norm": 9.788264274597168, "learning_rate": 5.8606686409503475e-06, "loss": 0.46798711, "memory(GiB)": 34.88, "step": 69835, "train_speed(iter/s)": 0.41341 }, { "acc": 0.92139206, "epoch": 1.8909917959548372, "grad_norm": 8.768321990966797, "learning_rate": 5.86011741608282e-06, "loss": 0.33741534, "memory(GiB)": 34.88, "step": 69840, "train_speed(iter/s)": 0.413412 }, { "acc": 0.8994894, "epoch": 1.8911271762380526, "grad_norm": 8.137944221496582, "learning_rate": 5.859566180446895e-06, "loss": 0.5510325, "memory(GiB)": 34.88, "step": 69845, "train_speed(iter/s)": 0.413413 }, { "acc": 0.93701763, "epoch": 1.8912625565212684, "grad_norm": 6.276263236999512, "learning_rate": 5.859014934049478e-06, "loss": 0.31972933, "memory(GiB)": 34.88, "step": 69850, "train_speed(iter/s)": 0.413414 }, { "acc": 0.89790983, "epoch": 1.8913979368044838, "grad_norm": 3.9465765953063965, "learning_rate": 5.8584636768974745e-06, "loss": 0.60393338, "memory(GiB)": 34.88, "step": 69855, "train_speed(iter/s)": 0.413416 }, { "acc": 0.931493, "epoch": 1.8915333170876993, "grad_norm": 5.754166603088379, "learning_rate": 5.8579124089977925e-06, "loss": 0.33576779, "memory(GiB)": 34.88, "step": 69860, "train_speed(iter/s)": 0.413417 }, { "acc": 0.89894676, "epoch": 1.891668697370915, "grad_norm": 9.147784233093262, "learning_rate": 5.8573611303573304e-06, "loss": 0.61805143, "memory(GiB)": 34.88, "step": 69865, "train_speed(iter/s)": 0.413419 }, { "acc": 0.90655384, "epoch": 1.8918040776541305, "grad_norm": 8.262896537780762, "learning_rate": 5.8568098409830034e-06, "loss": 0.55263309, "memory(GiB)": 34.88, "step": 69870, "train_speed(iter/s)": 0.41342 }, { "acc": 0.92596874, "epoch": 1.891939457937346, "grad_norm": 7.819955825805664, "learning_rate": 5.856258540881711e-06, "loss": 0.36222875, "memory(GiB)": 34.88, "step": 69875, "train_speed(iter/s)": 0.413422 }, { "acc": 0.89452801, "epoch": 1.8920748382205614, "grad_norm": 29.787246704101562, "learning_rate": 5.85570723006036e-06, "loss": 0.59778595, "memory(GiB)": 34.88, "step": 69880, "train_speed(iter/s)": 0.413423 }, { "acc": 0.90893478, "epoch": 1.8922102185037772, "grad_norm": 11.372523307800293, "learning_rate": 5.85515590852586e-06, "loss": 0.47790895, "memory(GiB)": 34.88, "step": 69885, "train_speed(iter/s)": 0.413425 }, { "acc": 0.90686436, "epoch": 1.8923455987869926, "grad_norm": 7.596898555755615, "learning_rate": 5.854604576285114e-06, "loss": 0.51873808, "memory(GiB)": 34.88, "step": 69890, "train_speed(iter/s)": 0.413426 }, { "acc": 0.92539845, "epoch": 1.8924809790702082, "grad_norm": 7.720739841461182, "learning_rate": 5.854053233345031e-06, "loss": 0.3896807, "memory(GiB)": 34.88, "step": 69895, "train_speed(iter/s)": 0.413427 }, { "acc": 0.94058552, "epoch": 1.8926163593534238, "grad_norm": 8.074347496032715, "learning_rate": 5.853501879712515e-06, "loss": 0.33532577, "memory(GiB)": 34.88, "step": 69900, "train_speed(iter/s)": 0.413429 }, { "acc": 0.90164099, "epoch": 1.8927517396366393, "grad_norm": 23.694021224975586, "learning_rate": 5.852950515394479e-06, "loss": 0.54714651, "memory(GiB)": 34.88, "step": 69905, "train_speed(iter/s)": 0.41343 }, { "acc": 0.91566248, "epoch": 1.892887119919855, "grad_norm": 8.958781242370605, "learning_rate": 5.85239914039782e-06, "loss": 0.50609846, "memory(GiB)": 34.88, "step": 69910, "train_speed(iter/s)": 0.413432 }, { "acc": 0.92293139, "epoch": 1.8930225002030703, "grad_norm": 7.967942237854004, "learning_rate": 5.851847754729452e-06, "loss": 0.41738205, "memory(GiB)": 34.88, "step": 69915, "train_speed(iter/s)": 0.413433 }, { "acc": 0.90824852, "epoch": 1.893157880486286, "grad_norm": 4.68381404876709, "learning_rate": 5.85129635839628e-06, "loss": 0.50375729, "memory(GiB)": 34.88, "step": 69920, "train_speed(iter/s)": 0.413435 }, { "acc": 0.90495338, "epoch": 1.8932932607695014, "grad_norm": 6.658134937286377, "learning_rate": 5.850744951405213e-06, "loss": 0.51677675, "memory(GiB)": 34.88, "step": 69925, "train_speed(iter/s)": 0.413436 }, { "acc": 0.89912262, "epoch": 1.8934286410527172, "grad_norm": 27.70630645751953, "learning_rate": 5.850193533763157e-06, "loss": 0.56389418, "memory(GiB)": 34.88, "step": 69930, "train_speed(iter/s)": 0.413438 }, { "acc": 0.88880138, "epoch": 1.8935640213359326, "grad_norm": 15.275429725646973, "learning_rate": 5.849642105477018e-06, "loss": 0.64688611, "memory(GiB)": 34.88, "step": 69935, "train_speed(iter/s)": 0.413439 }, { "acc": 0.91512375, "epoch": 1.8936994016191482, "grad_norm": 6.320425510406494, "learning_rate": 5.849090666553707e-06, "loss": 0.41951451, "memory(GiB)": 34.88, "step": 69940, "train_speed(iter/s)": 0.413441 }, { "acc": 0.88316956, "epoch": 1.8938347819023638, "grad_norm": 22.13029670715332, "learning_rate": 5.848539217000131e-06, "loss": 0.60369511, "memory(GiB)": 34.88, "step": 69945, "train_speed(iter/s)": 0.413442 }, { "acc": 0.91346903, "epoch": 1.8939701621855793, "grad_norm": 8.650355339050293, "learning_rate": 5.847987756823197e-06, "loss": 0.55202532, "memory(GiB)": 34.88, "step": 69950, "train_speed(iter/s)": 0.413443 }, { "acc": 0.92523632, "epoch": 1.894105542468795, "grad_norm": 9.277615547180176, "learning_rate": 5.847436286029814e-06, "loss": 0.46544027, "memory(GiB)": 34.88, "step": 69955, "train_speed(iter/s)": 0.413445 }, { "acc": 0.92356253, "epoch": 1.8942409227520103, "grad_norm": 9.080395698547363, "learning_rate": 5.846884804626888e-06, "loss": 0.40828414, "memory(GiB)": 34.88, "step": 69960, "train_speed(iter/s)": 0.413446 }, { "acc": 0.92145014, "epoch": 1.894376303035226, "grad_norm": 6.752386093139648, "learning_rate": 5.846333312621331e-06, "loss": 0.38452682, "memory(GiB)": 34.88, "step": 69965, "train_speed(iter/s)": 0.413448 }, { "acc": 0.91452141, "epoch": 1.8945116833184414, "grad_norm": 10.22541618347168, "learning_rate": 5.845781810020047e-06, "loss": 0.46358638, "memory(GiB)": 34.88, "step": 69970, "train_speed(iter/s)": 0.413449 }, { "acc": 0.91904163, "epoch": 1.894647063601657, "grad_norm": 11.283098220825195, "learning_rate": 5.84523029682995e-06, "loss": 0.44327726, "memory(GiB)": 34.88, "step": 69975, "train_speed(iter/s)": 0.413451 }, { "acc": 0.90598783, "epoch": 1.8947824438848726, "grad_norm": 15.296136856079102, "learning_rate": 5.844678773057946e-06, "loss": 0.52166066, "memory(GiB)": 34.88, "step": 69980, "train_speed(iter/s)": 0.413452 }, { "acc": 0.91920671, "epoch": 1.8949178241680882, "grad_norm": 6.353365898132324, "learning_rate": 5.844127238710942e-06, "loss": 0.41329079, "memory(GiB)": 34.88, "step": 69985, "train_speed(iter/s)": 0.413453 }, { "acc": 0.8873579, "epoch": 1.8950532044513038, "grad_norm": 8.954113006591797, "learning_rate": 5.843575693795852e-06, "loss": 0.68057432, "memory(GiB)": 34.88, "step": 69990, "train_speed(iter/s)": 0.413455 }, { "acc": 0.89678326, "epoch": 1.8951885847345191, "grad_norm": 14.15597152709961, "learning_rate": 5.843024138319582e-06, "loss": 0.56080637, "memory(GiB)": 34.88, "step": 69995, "train_speed(iter/s)": 0.413456 }, { "acc": 0.92512493, "epoch": 1.895323965017735, "grad_norm": 10.120418548583984, "learning_rate": 5.842472572289039e-06, "loss": 0.39546194, "memory(GiB)": 34.88, "step": 70000, "train_speed(iter/s)": 0.413457 }, { "epoch": 1.895323965017735, "eval_acc": 0.6104192166091353, "eval_loss": 1.1415523290634155, "eval_runtime": 1295.0925, "eval_samples_per_second": 66.64, "eval_steps_per_second": 2.083, "step": 70000 }, { "acc": 0.91267357, "epoch": 1.8954593453009503, "grad_norm": 9.276623725891113, "learning_rate": 5.8419209957111375e-06, "loss": 0.53787961, "memory(GiB)": 34.88, "step": 70005, "train_speed(iter/s)": 0.41026 }, { "acc": 0.8848856, "epoch": 1.895594725584166, "grad_norm": 11.67697811126709, "learning_rate": 5.841369408592783e-06, "loss": 0.71915884, "memory(GiB)": 34.88, "step": 70010, "train_speed(iter/s)": 0.410262 }, { "acc": 0.899617, "epoch": 1.8957301058673814, "grad_norm": 10.476056098937988, "learning_rate": 5.8408178109408885e-06, "loss": 0.4959321, "memory(GiB)": 34.88, "step": 70015, "train_speed(iter/s)": 0.410263 }, { "acc": 0.89793358, "epoch": 1.895865486150597, "grad_norm": 5.82137393951416, "learning_rate": 5.84026620276236e-06, "loss": 0.57270398, "memory(GiB)": 34.88, "step": 70020, "train_speed(iter/s)": 0.410265 }, { "acc": 0.92320576, "epoch": 1.8960008664338126, "grad_norm": 6.751720428466797, "learning_rate": 5.839714584064111e-06, "loss": 0.3957509, "memory(GiB)": 34.88, "step": 70025, "train_speed(iter/s)": 0.410267 }, { "acc": 0.90467463, "epoch": 1.8961362467170282, "grad_norm": 10.601316452026367, "learning_rate": 5.839162954853051e-06, "loss": 0.54249516, "memory(GiB)": 34.88, "step": 70030, "train_speed(iter/s)": 0.410268 }, { "acc": 0.91026611, "epoch": 1.8962716270002438, "grad_norm": 8.174508094787598, "learning_rate": 5.838611315136089e-06, "loss": 0.45685883, "memory(GiB)": 34.88, "step": 70035, "train_speed(iter/s)": 0.41027 }, { "acc": 0.90331478, "epoch": 1.8964070072834591, "grad_norm": 11.136336326599121, "learning_rate": 5.838059664920135e-06, "loss": 0.53497834, "memory(GiB)": 34.88, "step": 70040, "train_speed(iter/s)": 0.410271 }, { "acc": 0.90584545, "epoch": 1.896542387566675, "grad_norm": 18.944833755493164, "learning_rate": 5.837508004212102e-06, "loss": 0.43976617, "memory(GiB)": 34.88, "step": 70045, "train_speed(iter/s)": 0.410273 }, { "acc": 0.92532635, "epoch": 1.8966777678498903, "grad_norm": 10.882946968078613, "learning_rate": 5.836956333018897e-06, "loss": 0.41866856, "memory(GiB)": 34.88, "step": 70050, "train_speed(iter/s)": 0.410274 }, { "acc": 0.91398296, "epoch": 1.8968131481331059, "grad_norm": 33.070960998535156, "learning_rate": 5.836404651347435e-06, "loss": 0.52236896, "memory(GiB)": 34.88, "step": 70055, "train_speed(iter/s)": 0.410276 }, { "acc": 0.93007927, "epoch": 1.8969485284163214, "grad_norm": 9.24180793762207, "learning_rate": 5.8358529592046224e-06, "loss": 0.37472181, "memory(GiB)": 34.88, "step": 70060, "train_speed(iter/s)": 0.410278 }, { "acc": 0.92083416, "epoch": 1.897083908699537, "grad_norm": 6.078956604003906, "learning_rate": 5.8353012565973735e-06, "loss": 0.40773439, "memory(GiB)": 34.88, "step": 70065, "train_speed(iter/s)": 0.410279 }, { "acc": 0.8894062, "epoch": 1.8972192889827526, "grad_norm": 5.689085960388184, "learning_rate": 5.8347495435326e-06, "loss": 0.66857424, "memory(GiB)": 34.88, "step": 70070, "train_speed(iter/s)": 0.410281 }, { "acc": 0.9296442, "epoch": 1.897354669265968, "grad_norm": 5.125375747680664, "learning_rate": 5.834197820017209e-06, "loss": 0.39018245, "memory(GiB)": 34.88, "step": 70075, "train_speed(iter/s)": 0.410282 }, { "acc": 0.93263197, "epoch": 1.8974900495491838, "grad_norm": 8.18093490600586, "learning_rate": 5.833646086058117e-06, "loss": 0.40899343, "memory(GiB)": 34.88, "step": 70080, "train_speed(iter/s)": 0.410284 }, { "acc": 0.90306559, "epoch": 1.8976254298323991, "grad_norm": 7.428791046142578, "learning_rate": 5.833094341662232e-06, "loss": 0.61091175, "memory(GiB)": 34.88, "step": 70085, "train_speed(iter/s)": 0.410286 }, { "acc": 0.91843653, "epoch": 1.897760810115615, "grad_norm": 7.504619598388672, "learning_rate": 5.832542586836466e-06, "loss": 0.44490662, "memory(GiB)": 34.88, "step": 70090, "train_speed(iter/s)": 0.410287 }, { "acc": 0.89768791, "epoch": 1.8978961903988303, "grad_norm": 10.21427059173584, "learning_rate": 5.831990821587733e-06, "loss": 0.55533819, "memory(GiB)": 34.88, "step": 70095, "train_speed(iter/s)": 0.410289 }, { "acc": 0.91472073, "epoch": 1.8980315706820459, "grad_norm": 3.923283576965332, "learning_rate": 5.831439045922942e-06, "loss": 0.45166702, "memory(GiB)": 34.88, "step": 70100, "train_speed(iter/s)": 0.41029 }, { "acc": 0.9184906, "epoch": 1.8981669509652614, "grad_norm": 6.536043167114258, "learning_rate": 5.830887259849008e-06, "loss": 0.4498332, "memory(GiB)": 34.88, "step": 70105, "train_speed(iter/s)": 0.410292 }, { "acc": 0.90518713, "epoch": 1.898302331248477, "grad_norm": 7.508727550506592, "learning_rate": 5.830335463372842e-06, "loss": 0.46026425, "memory(GiB)": 34.88, "step": 70110, "train_speed(iter/s)": 0.410294 }, { "acc": 0.91559954, "epoch": 1.8984377115316926, "grad_norm": 9.158344268798828, "learning_rate": 5.829783656501358e-06, "loss": 0.43141127, "memory(GiB)": 34.88, "step": 70115, "train_speed(iter/s)": 0.410295 }, { "acc": 0.89515018, "epoch": 1.898573091814908, "grad_norm": 9.579834938049316, "learning_rate": 5.829231839241463e-06, "loss": 0.55334272, "memory(GiB)": 34.88, "step": 70120, "train_speed(iter/s)": 0.410297 }, { "acc": 0.90952606, "epoch": 1.8987084720981238, "grad_norm": 9.481760025024414, "learning_rate": 5.828680011600076e-06, "loss": 0.455791, "memory(GiB)": 34.88, "step": 70125, "train_speed(iter/s)": 0.410298 }, { "acc": 0.91405306, "epoch": 1.8988438523813391, "grad_norm": 5.465279579162598, "learning_rate": 5.828128173584104e-06, "loss": 0.43416858, "memory(GiB)": 34.88, "step": 70130, "train_speed(iter/s)": 0.4103 }, { "acc": 0.90286636, "epoch": 1.8989792326645547, "grad_norm": 7.660924911499023, "learning_rate": 5.827576325200465e-06, "loss": 0.55597029, "memory(GiB)": 34.88, "step": 70135, "train_speed(iter/s)": 0.410302 }, { "acc": 0.92898111, "epoch": 1.8991146129477703, "grad_norm": 5.183009147644043, "learning_rate": 5.8270244664560695e-06, "loss": 0.29101701, "memory(GiB)": 34.88, "step": 70140, "train_speed(iter/s)": 0.410303 }, { "acc": 0.91536865, "epoch": 1.8992499932309859, "grad_norm": 7.806501865386963, "learning_rate": 5.82647259735783e-06, "loss": 0.47345634, "memory(GiB)": 34.88, "step": 70145, "train_speed(iter/s)": 0.410305 }, { "acc": 0.89064875, "epoch": 1.8993853735142014, "grad_norm": 7.967672824859619, "learning_rate": 5.825920717912661e-06, "loss": 0.6334713, "memory(GiB)": 34.88, "step": 70150, "train_speed(iter/s)": 0.410306 }, { "acc": 0.91205158, "epoch": 1.8995207537974168, "grad_norm": 6.577601432800293, "learning_rate": 5.825368828127477e-06, "loss": 0.47827005, "memory(GiB)": 34.88, "step": 70155, "train_speed(iter/s)": 0.410308 }, { "acc": 0.91789894, "epoch": 1.8996561340806326, "grad_norm": 7.950336933135986, "learning_rate": 5.8248169280091896e-06, "loss": 0.43630104, "memory(GiB)": 34.88, "step": 70160, "train_speed(iter/s)": 0.41031 }, { "acc": 0.89814148, "epoch": 1.899791514363848, "grad_norm": 10.802872657775879, "learning_rate": 5.824265017564712e-06, "loss": 0.56646175, "memory(GiB)": 34.88, "step": 70165, "train_speed(iter/s)": 0.410311 }, { "acc": 0.91709185, "epoch": 1.8999268946470635, "grad_norm": 21.8012752532959, "learning_rate": 5.82371309680096e-06, "loss": 0.4330934, "memory(GiB)": 34.88, "step": 70170, "train_speed(iter/s)": 0.410313 }, { "acc": 0.91187696, "epoch": 1.9000622749302791, "grad_norm": 7.9261088371276855, "learning_rate": 5.823161165724845e-06, "loss": 0.42581859, "memory(GiB)": 34.88, "step": 70175, "train_speed(iter/s)": 0.410314 }, { "acc": 0.91911583, "epoch": 1.9001976552134947, "grad_norm": 6.98480749130249, "learning_rate": 5.822609224343283e-06, "loss": 0.55679741, "memory(GiB)": 34.88, "step": 70180, "train_speed(iter/s)": 0.410316 }, { "acc": 0.90091629, "epoch": 1.9003330354967103, "grad_norm": 13.48003101348877, "learning_rate": 5.8220572726631875e-06, "loss": 0.56287951, "memory(GiB)": 34.88, "step": 70185, "train_speed(iter/s)": 0.410317 }, { "acc": 0.92456789, "epoch": 1.9004684157799256, "grad_norm": 14.661079406738281, "learning_rate": 5.821505310691472e-06, "loss": 0.48512158, "memory(GiB)": 34.88, "step": 70190, "train_speed(iter/s)": 0.410319 }, { "acc": 0.89883633, "epoch": 1.9006037960631414, "grad_norm": 6.6367597579956055, "learning_rate": 5.820953338435054e-06, "loss": 0.46546764, "memory(GiB)": 34.88, "step": 70195, "train_speed(iter/s)": 0.41032 }, { "acc": 0.92214699, "epoch": 1.9007391763463568, "grad_norm": 13.23299789428711, "learning_rate": 5.820401355900844e-06, "loss": 0.45735192, "memory(GiB)": 34.88, "step": 70200, "train_speed(iter/s)": 0.410322 }, { "acc": 0.91457977, "epoch": 1.9008745566295726, "grad_norm": 6.627735137939453, "learning_rate": 5.819849363095759e-06, "loss": 0.49042602, "memory(GiB)": 34.88, "step": 70205, "train_speed(iter/s)": 0.410324 }, { "acc": 0.91668501, "epoch": 1.901009936912788, "grad_norm": 15.164460182189941, "learning_rate": 5.819297360026712e-06, "loss": 0.51299973, "memory(GiB)": 34.88, "step": 70210, "train_speed(iter/s)": 0.410325 }, { "acc": 0.913867, "epoch": 1.9011453171960035, "grad_norm": 9.876240730285645, "learning_rate": 5.818745346700621e-06, "loss": 0.46415563, "memory(GiB)": 34.88, "step": 70215, "train_speed(iter/s)": 0.410327 }, { "acc": 0.90676651, "epoch": 1.9012806974792191, "grad_norm": 9.290037155151367, "learning_rate": 5.818193323124398e-06, "loss": 0.45088644, "memory(GiB)": 34.88, "step": 70220, "train_speed(iter/s)": 0.410329 }, { "acc": 0.91108732, "epoch": 1.9014160777624347, "grad_norm": 10.657794952392578, "learning_rate": 5.817641289304958e-06, "loss": 0.43943815, "memory(GiB)": 34.88, "step": 70225, "train_speed(iter/s)": 0.41033 }, { "acc": 0.92083263, "epoch": 1.9015514580456503, "grad_norm": 6.688990116119385, "learning_rate": 5.817089245249221e-06, "loss": 0.37838469, "memory(GiB)": 34.88, "step": 70230, "train_speed(iter/s)": 0.410332 }, { "acc": 0.91826687, "epoch": 1.9016868383288656, "grad_norm": 7.88579797744751, "learning_rate": 5.816537190964095e-06, "loss": 0.4576057, "memory(GiB)": 34.88, "step": 70235, "train_speed(iter/s)": 0.410334 }, { "acc": 0.91119652, "epoch": 1.9018222186120814, "grad_norm": 6.599912166595459, "learning_rate": 5.815985126456506e-06, "loss": 0.50396709, "memory(GiB)": 34.88, "step": 70240, "train_speed(iter/s)": 0.410335 }, { "acc": 0.91087933, "epoch": 1.9019575988952968, "grad_norm": 8.499931335449219, "learning_rate": 5.815433051733358e-06, "loss": 0.48818145, "memory(GiB)": 34.88, "step": 70245, "train_speed(iter/s)": 0.410336 }, { "acc": 0.90760431, "epoch": 1.9020929791785124, "grad_norm": 10.112676620483398, "learning_rate": 5.8148809668015725e-06, "loss": 0.50617671, "memory(GiB)": 34.88, "step": 70250, "train_speed(iter/s)": 0.410338 }, { "acc": 0.90242519, "epoch": 1.902228359461728, "grad_norm": 9.25261116027832, "learning_rate": 5.8143288716680665e-06, "loss": 0.50864744, "memory(GiB)": 34.88, "step": 70255, "train_speed(iter/s)": 0.41034 }, { "acc": 0.92241554, "epoch": 1.9023637397449435, "grad_norm": 9.78761100769043, "learning_rate": 5.813776766339755e-06, "loss": 0.36188068, "memory(GiB)": 34.88, "step": 70260, "train_speed(iter/s)": 0.410341 }, { "acc": 0.90716028, "epoch": 1.9024991200281591, "grad_norm": 4.155581474304199, "learning_rate": 5.813224650823553e-06, "loss": 0.53130007, "memory(GiB)": 34.88, "step": 70265, "train_speed(iter/s)": 0.410343 }, { "acc": 0.9285223, "epoch": 1.9026345003113745, "grad_norm": 6.416916847229004, "learning_rate": 5.812672525126379e-06, "loss": 0.43761911, "memory(GiB)": 34.88, "step": 70270, "train_speed(iter/s)": 0.410345 }, { "acc": 0.90521965, "epoch": 1.9027698805945903, "grad_norm": 7.794285297393799, "learning_rate": 5.812120389255147e-06, "loss": 0.54893618, "memory(GiB)": 34.88, "step": 70275, "train_speed(iter/s)": 0.410346 }, { "acc": 0.92399559, "epoch": 1.9029052608778056, "grad_norm": 44.75910949707031, "learning_rate": 5.811568243216775e-06, "loss": 0.42946334, "memory(GiB)": 34.88, "step": 70280, "train_speed(iter/s)": 0.410348 }, { "acc": 0.90933895, "epoch": 1.9030406411610215, "grad_norm": 7.645723819732666, "learning_rate": 5.811016087018179e-06, "loss": 0.49472528, "memory(GiB)": 34.88, "step": 70285, "train_speed(iter/s)": 0.41035 }, { "acc": 0.9355092, "epoch": 1.9031760214442368, "grad_norm": 18.26410484313965, "learning_rate": 5.810463920666278e-06, "loss": 0.38389399, "memory(GiB)": 34.88, "step": 70290, "train_speed(iter/s)": 0.410351 }, { "acc": 0.89808445, "epoch": 1.9033114017274524, "grad_norm": 7.524174690246582, "learning_rate": 5.809911744167986e-06, "loss": 0.57949004, "memory(GiB)": 34.88, "step": 70295, "train_speed(iter/s)": 0.410353 }, { "acc": 0.91469183, "epoch": 1.903446782010668, "grad_norm": 9.55455493927002, "learning_rate": 5.809359557530222e-06, "loss": 0.491466, "memory(GiB)": 34.88, "step": 70300, "train_speed(iter/s)": 0.410355 }, { "acc": 0.91144438, "epoch": 1.9035821622938836, "grad_norm": 5.271230220794678, "learning_rate": 5.808807360759902e-06, "loss": 0.49395566, "memory(GiB)": 34.88, "step": 70305, "train_speed(iter/s)": 0.410356 }, { "acc": 0.91240234, "epoch": 1.9037175425770991, "grad_norm": 6.238227367401123, "learning_rate": 5.808255153863944e-06, "loss": 0.46403632, "memory(GiB)": 34.88, "step": 70310, "train_speed(iter/s)": 0.410358 }, { "acc": 0.9222847, "epoch": 1.9038529228603145, "grad_norm": 7.571127891540527, "learning_rate": 5.807702936849266e-06, "loss": 0.43169012, "memory(GiB)": 34.88, "step": 70315, "train_speed(iter/s)": 0.410359 }, { "acc": 0.91671133, "epoch": 1.9039883031435303, "grad_norm": 9.384002685546875, "learning_rate": 5.807150709722784e-06, "loss": 0.48331671, "memory(GiB)": 34.88, "step": 70320, "train_speed(iter/s)": 0.410361 }, { "acc": 0.91664524, "epoch": 1.9041236834267457, "grad_norm": 4.380995273590088, "learning_rate": 5.80659847249142e-06, "loss": 0.47473454, "memory(GiB)": 34.88, "step": 70325, "train_speed(iter/s)": 0.410363 }, { "acc": 0.93618832, "epoch": 1.9042590637099612, "grad_norm": 13.183969497680664, "learning_rate": 5.806046225162085e-06, "loss": 0.36572456, "memory(GiB)": 34.88, "step": 70330, "train_speed(iter/s)": 0.410364 }, { "acc": 0.91452417, "epoch": 1.9043944439931768, "grad_norm": 6.611415386199951, "learning_rate": 5.805493967741702e-06, "loss": 0.4408886, "memory(GiB)": 34.88, "step": 70335, "train_speed(iter/s)": 0.410366 }, { "acc": 0.91656132, "epoch": 1.9045298242763924, "grad_norm": 7.196827411651611, "learning_rate": 5.804941700237186e-06, "loss": 0.41576653, "memory(GiB)": 34.88, "step": 70340, "train_speed(iter/s)": 0.410368 }, { "acc": 0.93454409, "epoch": 1.904665204559608, "grad_norm": 6.6087775230407715, "learning_rate": 5.80438942265546e-06, "loss": 0.37139845, "memory(GiB)": 34.88, "step": 70345, "train_speed(iter/s)": 0.410369 }, { "acc": 0.90147743, "epoch": 1.9048005848428233, "grad_norm": 8.00797176361084, "learning_rate": 5.803837135003438e-06, "loss": 0.46335812, "memory(GiB)": 34.88, "step": 70350, "train_speed(iter/s)": 0.410371 }, { "acc": 0.89769812, "epoch": 1.9049359651260391, "grad_norm": 8.344250679016113, "learning_rate": 5.80328483728804e-06, "loss": 0.52741928, "memory(GiB)": 34.88, "step": 70355, "train_speed(iter/s)": 0.410372 }, { "acc": 0.91450243, "epoch": 1.9050713454092545, "grad_norm": 7.429085731506348, "learning_rate": 5.802732529516184e-06, "loss": 0.41317501, "memory(GiB)": 34.88, "step": 70360, "train_speed(iter/s)": 0.410374 }, { "acc": 0.92431736, "epoch": 1.9052067256924703, "grad_norm": 12.220916748046875, "learning_rate": 5.80218021169479e-06, "loss": 0.34139569, "memory(GiB)": 34.88, "step": 70365, "train_speed(iter/s)": 0.410376 }, { "acc": 0.91523314, "epoch": 1.9053421059756857, "grad_norm": 6.415884971618652, "learning_rate": 5.801627883830776e-06, "loss": 0.47569342, "memory(GiB)": 34.88, "step": 70370, "train_speed(iter/s)": 0.410377 }, { "acc": 0.90694237, "epoch": 1.9054774862589012, "grad_norm": 4.707883834838867, "learning_rate": 5.801075545931059e-06, "loss": 0.50134935, "memory(GiB)": 34.88, "step": 70375, "train_speed(iter/s)": 0.410378 }, { "acc": 0.91943512, "epoch": 1.9056128665421168, "grad_norm": 5.995208263397217, "learning_rate": 5.800523198002563e-06, "loss": 0.35278044, "memory(GiB)": 34.88, "step": 70380, "train_speed(iter/s)": 0.41038 }, { "acc": 0.89732466, "epoch": 1.9057482468253324, "grad_norm": 8.99317741394043, "learning_rate": 5.799970840052202e-06, "loss": 0.58303652, "memory(GiB)": 34.88, "step": 70385, "train_speed(iter/s)": 0.410382 }, { "acc": 0.91298361, "epoch": 1.905883627108548, "grad_norm": 6.8852996826171875, "learning_rate": 5.7994184720869005e-06, "loss": 0.41698632, "memory(GiB)": 34.88, "step": 70390, "train_speed(iter/s)": 0.410383 }, { "acc": 0.92070084, "epoch": 1.9060190073917633, "grad_norm": 6.111055850982666, "learning_rate": 5.798866094113572e-06, "loss": 0.4178195, "memory(GiB)": 34.88, "step": 70395, "train_speed(iter/s)": 0.410385 }, { "acc": 0.921453, "epoch": 1.9061543876749791, "grad_norm": 7.615340709686279, "learning_rate": 5.798313706139142e-06, "loss": 0.45241785, "memory(GiB)": 34.88, "step": 70400, "train_speed(iter/s)": 0.410386 }, { "acc": 0.91442862, "epoch": 1.9062897679581945, "grad_norm": 12.398491859436035, "learning_rate": 5.797761308170526e-06, "loss": 0.48162947, "memory(GiB)": 34.88, "step": 70405, "train_speed(iter/s)": 0.410388 }, { "acc": 0.89067736, "epoch": 1.90642514824141, "grad_norm": 11.160967826843262, "learning_rate": 5.797208900214647e-06, "loss": 0.60129466, "memory(GiB)": 34.88, "step": 70410, "train_speed(iter/s)": 0.410389 }, { "acc": 0.91474628, "epoch": 1.9065605285246257, "grad_norm": 12.613369941711426, "learning_rate": 5.7966564822784225e-06, "loss": 0.46346641, "memory(GiB)": 34.88, "step": 70415, "train_speed(iter/s)": 0.41039 }, { "acc": 0.93682795, "epoch": 1.9066959088078412, "grad_norm": 4.974160194396973, "learning_rate": 5.796104054368773e-06, "loss": 0.32966528, "memory(GiB)": 34.88, "step": 70420, "train_speed(iter/s)": 0.410391 }, { "acc": 0.91848907, "epoch": 1.9068312890910568, "grad_norm": 8.660080909729004, "learning_rate": 5.7955516164926215e-06, "loss": 0.44171553, "memory(GiB)": 34.88, "step": 70425, "train_speed(iter/s)": 0.410393 }, { "acc": 0.91840343, "epoch": 1.9069666693742722, "grad_norm": 8.386092185974121, "learning_rate": 5.794999168656884e-06, "loss": 0.43933582, "memory(GiB)": 34.88, "step": 70430, "train_speed(iter/s)": 0.410394 }, { "acc": 0.92101984, "epoch": 1.907102049657488, "grad_norm": 7.590791702270508, "learning_rate": 5.794446710868484e-06, "loss": 0.42181625, "memory(GiB)": 34.88, "step": 70435, "train_speed(iter/s)": 0.410396 }, { "acc": 0.93070145, "epoch": 1.9072374299407033, "grad_norm": 12.338927268981934, "learning_rate": 5.793894243134341e-06, "loss": 0.35302963, "memory(GiB)": 34.88, "step": 70440, "train_speed(iter/s)": 0.410397 }, { "acc": 0.89849052, "epoch": 1.9073728102239191, "grad_norm": 10.230752944946289, "learning_rate": 5.793341765461377e-06, "loss": 0.67739096, "memory(GiB)": 34.88, "step": 70445, "train_speed(iter/s)": 0.410399 }, { "acc": 0.93572044, "epoch": 1.9075081905071345, "grad_norm": 6.208020210266113, "learning_rate": 5.792789277856512e-06, "loss": 0.30277643, "memory(GiB)": 34.88, "step": 70450, "train_speed(iter/s)": 0.4104 }, { "acc": 0.89689388, "epoch": 1.90764357079035, "grad_norm": 6.841670989990234, "learning_rate": 5.792236780326667e-06, "loss": 0.63608818, "memory(GiB)": 34.88, "step": 70455, "train_speed(iter/s)": 0.410402 }, { "acc": 0.91402836, "epoch": 1.9077789510735657, "grad_norm": 8.194060325622559, "learning_rate": 5.7916842728787625e-06, "loss": 0.47971225, "memory(GiB)": 34.88, "step": 70460, "train_speed(iter/s)": 0.410403 }, { "acc": 0.92688847, "epoch": 1.9079143313567812, "grad_norm": 8.971151351928711, "learning_rate": 5.791131755519719e-06, "loss": 0.33029399, "memory(GiB)": 34.88, "step": 70465, "train_speed(iter/s)": 0.410405 }, { "acc": 0.90007868, "epoch": 1.9080497116399968, "grad_norm": 9.032825469970703, "learning_rate": 5.790579228256461e-06, "loss": 0.46285772, "memory(GiB)": 34.88, "step": 70470, "train_speed(iter/s)": 0.410406 }, { "acc": 0.92352848, "epoch": 1.9081850919232122, "grad_norm": 5.528526782989502, "learning_rate": 5.790026691095907e-06, "loss": 0.36101387, "memory(GiB)": 34.88, "step": 70475, "train_speed(iter/s)": 0.410407 }, { "acc": 0.92640066, "epoch": 1.908320472206428, "grad_norm": 3.163062572479248, "learning_rate": 5.78947414404498e-06, "loss": 0.37761493, "memory(GiB)": 34.88, "step": 70480, "train_speed(iter/s)": 0.410408 }, { "acc": 0.89692364, "epoch": 1.9084558524896433, "grad_norm": 7.262573719024658, "learning_rate": 5.788921587110603e-06, "loss": 0.6153656, "memory(GiB)": 34.88, "step": 70485, "train_speed(iter/s)": 0.41041 }, { "acc": 0.92590466, "epoch": 1.908591232772859, "grad_norm": 5.1521220207214355, "learning_rate": 5.788369020299695e-06, "loss": 0.30896735, "memory(GiB)": 34.88, "step": 70490, "train_speed(iter/s)": 0.410412 }, { "acc": 0.907372, "epoch": 1.9087266130560745, "grad_norm": 7.275547504425049, "learning_rate": 5.787816443619181e-06, "loss": 0.48473501, "memory(GiB)": 34.88, "step": 70495, "train_speed(iter/s)": 0.410413 }, { "acc": 0.92211475, "epoch": 1.90886199333929, "grad_norm": 12.651264190673828, "learning_rate": 5.787263857075979e-06, "loss": 0.3907382, "memory(GiB)": 34.88, "step": 70500, "train_speed(iter/s)": 0.410414 }, { "acc": 0.91493769, "epoch": 1.9089973736225057, "grad_norm": 18.94483184814453, "learning_rate": 5.786711260677016e-06, "loss": 0.4603775, "memory(GiB)": 34.88, "step": 70505, "train_speed(iter/s)": 0.410416 }, { "acc": 0.89973974, "epoch": 1.909132753905721, "grad_norm": 16.821353912353516, "learning_rate": 5.78615865442921e-06, "loss": 0.62330823, "memory(GiB)": 34.88, "step": 70510, "train_speed(iter/s)": 0.410417 }, { "acc": 0.91887074, "epoch": 1.9092681341889368, "grad_norm": 9.874629020690918, "learning_rate": 5.785606038339485e-06, "loss": 0.45140462, "memory(GiB)": 34.88, "step": 70515, "train_speed(iter/s)": 0.410418 }, { "acc": 0.90735168, "epoch": 1.9094035144721522, "grad_norm": 7.625173091888428, "learning_rate": 5.7850534124147665e-06, "loss": 0.53053665, "memory(GiB)": 34.88, "step": 70520, "train_speed(iter/s)": 0.41042 }, { "acc": 0.91317301, "epoch": 1.909538894755368, "grad_norm": 4.6137776374816895, "learning_rate": 5.784500776661973e-06, "loss": 0.58190751, "memory(GiB)": 34.88, "step": 70525, "train_speed(iter/s)": 0.410422 }, { "acc": 0.92110271, "epoch": 1.9096742750385833, "grad_norm": 6.655002593994141, "learning_rate": 5.783948131088032e-06, "loss": 0.44581785, "memory(GiB)": 34.88, "step": 70530, "train_speed(iter/s)": 0.410422 }, { "acc": 0.89014053, "epoch": 1.909809655321799, "grad_norm": 8.410004615783691, "learning_rate": 5.783395475699862e-06, "loss": 0.57657495, "memory(GiB)": 34.88, "step": 70535, "train_speed(iter/s)": 0.410424 }, { "acc": 0.9024085, "epoch": 1.9099450356050145, "grad_norm": 6.528416156768799, "learning_rate": 5.782842810504387e-06, "loss": 0.49986629, "memory(GiB)": 34.88, "step": 70540, "train_speed(iter/s)": 0.410425 }, { "acc": 0.91613388, "epoch": 1.91008041588823, "grad_norm": 7.578901767730713, "learning_rate": 5.7822901355085315e-06, "loss": 0.44801836, "memory(GiB)": 34.88, "step": 70545, "train_speed(iter/s)": 0.410427 }, { "acc": 0.89969006, "epoch": 1.9102157961714457, "grad_norm": 9.628077507019043, "learning_rate": 5.781737450719218e-06, "loss": 0.60698299, "memory(GiB)": 34.88, "step": 70550, "train_speed(iter/s)": 0.410428 }, { "acc": 0.89842844, "epoch": 1.910351176454661, "grad_norm": 8.581046104431152, "learning_rate": 5.781184756143371e-06, "loss": 0.53968267, "memory(GiB)": 34.88, "step": 70555, "train_speed(iter/s)": 0.41043 }, { "acc": 0.92245159, "epoch": 1.9104865567378768, "grad_norm": 5.095223903656006, "learning_rate": 5.780632051787913e-06, "loss": 0.34835875, "memory(GiB)": 34.88, "step": 70560, "train_speed(iter/s)": 0.410432 }, { "acc": 0.92403326, "epoch": 1.9106219370210922, "grad_norm": 4.607786655426025, "learning_rate": 5.780079337659768e-06, "loss": 0.41700754, "memory(GiB)": 34.88, "step": 70565, "train_speed(iter/s)": 0.410433 }, { "acc": 0.90561314, "epoch": 1.9107573173043078, "grad_norm": 5.114527702331543, "learning_rate": 5.77952661376586e-06, "loss": 0.52584591, "memory(GiB)": 34.88, "step": 70570, "train_speed(iter/s)": 0.410435 }, { "acc": 0.91109543, "epoch": 1.9108926975875233, "grad_norm": 9.280204772949219, "learning_rate": 5.778973880113115e-06, "loss": 0.56982088, "memory(GiB)": 34.88, "step": 70575, "train_speed(iter/s)": 0.410436 }, { "acc": 0.91788454, "epoch": 1.911028077870739, "grad_norm": 10.281208038330078, "learning_rate": 5.778421136708451e-06, "loss": 0.4347249, "memory(GiB)": 34.88, "step": 70580, "train_speed(iter/s)": 0.410438 }, { "acc": 0.91768303, "epoch": 1.9111634581539545, "grad_norm": 10.246049880981445, "learning_rate": 5.777868383558798e-06, "loss": 0.49542484, "memory(GiB)": 34.88, "step": 70585, "train_speed(iter/s)": 0.410439 }, { "acc": 0.91921482, "epoch": 1.9112988384371699, "grad_norm": 9.926590919494629, "learning_rate": 5.777315620671079e-06, "loss": 0.50840569, "memory(GiB)": 34.88, "step": 70590, "train_speed(iter/s)": 0.410441 }, { "acc": 0.92033606, "epoch": 1.9114342187203857, "grad_norm": 4.376121520996094, "learning_rate": 5.776762848052217e-06, "loss": 0.43717756, "memory(GiB)": 34.88, "step": 70595, "train_speed(iter/s)": 0.410442 }, { "acc": 0.93755302, "epoch": 1.911569599003601, "grad_norm": 8.259184837341309, "learning_rate": 5.776210065709139e-06, "loss": 0.36597214, "memory(GiB)": 34.88, "step": 70600, "train_speed(iter/s)": 0.410444 }, { "acc": 0.91985073, "epoch": 1.9117049792868168, "grad_norm": 5.126718044281006, "learning_rate": 5.775657273648767e-06, "loss": 0.36152871, "memory(GiB)": 34.88, "step": 70605, "train_speed(iter/s)": 0.410445 }, { "acc": 0.9223175, "epoch": 1.9118403595700322, "grad_norm": 12.768477439880371, "learning_rate": 5.7751044718780265e-06, "loss": 0.37865002, "memory(GiB)": 34.88, "step": 70610, "train_speed(iter/s)": 0.410447 }, { "acc": 0.90870428, "epoch": 1.9119757398532478, "grad_norm": 15.880378723144531, "learning_rate": 5.774551660403844e-06, "loss": 0.49138064, "memory(GiB)": 34.88, "step": 70615, "train_speed(iter/s)": 0.410448 }, { "acc": 0.91444311, "epoch": 1.9121111201364633, "grad_norm": 6.064108371734619, "learning_rate": 5.773998839233144e-06, "loss": 0.4374917, "memory(GiB)": 34.88, "step": 70620, "train_speed(iter/s)": 0.41045 }, { "acc": 0.9218154, "epoch": 1.912246500419679, "grad_norm": 8.883999824523926, "learning_rate": 5.773446008372849e-06, "loss": 0.44057751, "memory(GiB)": 34.88, "step": 70625, "train_speed(iter/s)": 0.410452 }, { "acc": 0.91458454, "epoch": 1.9123818807028945, "grad_norm": 4.017375469207764, "learning_rate": 5.772893167829889e-06, "loss": 0.39820778, "memory(GiB)": 34.88, "step": 70630, "train_speed(iter/s)": 0.410453 }, { "acc": 0.90833769, "epoch": 1.9125172609861099, "grad_norm": 6.774571895599365, "learning_rate": 5.7723403176111845e-06, "loss": 0.51362033, "memory(GiB)": 34.88, "step": 70635, "train_speed(iter/s)": 0.410455 }, { "acc": 0.91010075, "epoch": 1.9126526412693257, "grad_norm": 10.604439735412598, "learning_rate": 5.7717874577236635e-06, "loss": 0.55702906, "memory(GiB)": 34.88, "step": 70640, "train_speed(iter/s)": 0.410457 }, { "acc": 0.89632168, "epoch": 1.912788021552541, "grad_norm": 35.98624801635742, "learning_rate": 5.771234588174253e-06, "loss": 0.59194374, "memory(GiB)": 34.88, "step": 70645, "train_speed(iter/s)": 0.410458 }, { "acc": 0.9223175, "epoch": 1.9129234018357566, "grad_norm": 8.799016952514648, "learning_rate": 5.770681708969876e-06, "loss": 0.37547815, "memory(GiB)": 34.88, "step": 70650, "train_speed(iter/s)": 0.410459 }, { "acc": 0.91099377, "epoch": 1.9130587821189722, "grad_norm": 3.358107089996338, "learning_rate": 5.770128820117462e-06, "loss": 0.45947227, "memory(GiB)": 34.88, "step": 70655, "train_speed(iter/s)": 0.41046 }, { "acc": 0.9079361, "epoch": 1.9131941624021878, "grad_norm": 4.445573806762695, "learning_rate": 5.769575921623931e-06, "loss": 0.46990771, "memory(GiB)": 34.88, "step": 70660, "train_speed(iter/s)": 0.410462 }, { "acc": 0.92687941, "epoch": 1.9133295426854033, "grad_norm": 5.4775567054748535, "learning_rate": 5.7690230134962144e-06, "loss": 0.33766289, "memory(GiB)": 34.88, "step": 70665, "train_speed(iter/s)": 0.410464 }, { "acc": 0.9173461, "epoch": 1.9134649229686187, "grad_norm": 13.54434585571289, "learning_rate": 5.768470095741237e-06, "loss": 0.49985948, "memory(GiB)": 34.88, "step": 70670, "train_speed(iter/s)": 0.410465 }, { "acc": 0.9206953, "epoch": 1.9136003032518345, "grad_norm": 4.831932544708252, "learning_rate": 5.767917168365927e-06, "loss": 0.40918469, "memory(GiB)": 34.88, "step": 70675, "train_speed(iter/s)": 0.410467 }, { "acc": 0.92171021, "epoch": 1.9137356835350499, "grad_norm": 3.0494332313537598, "learning_rate": 5.767364231377207e-06, "loss": 0.43497729, "memory(GiB)": 34.88, "step": 70680, "train_speed(iter/s)": 0.410468 }, { "acc": 0.91082239, "epoch": 1.9138710638182657, "grad_norm": 12.32511043548584, "learning_rate": 5.766811284782005e-06, "loss": 0.4970283, "memory(GiB)": 34.88, "step": 70685, "train_speed(iter/s)": 0.41047 }, { "acc": 0.92617178, "epoch": 1.914006444101481, "grad_norm": 12.262711524963379, "learning_rate": 5.766258328587249e-06, "loss": 0.42553873, "memory(GiB)": 34.88, "step": 70690, "train_speed(iter/s)": 0.410472 }, { "acc": 0.91786013, "epoch": 1.9141418243846966, "grad_norm": 7.510475158691406, "learning_rate": 5.7657053627998645e-06, "loss": 0.55138741, "memory(GiB)": 34.88, "step": 70695, "train_speed(iter/s)": 0.410473 }, { "acc": 0.91444817, "epoch": 1.9142772046679122, "grad_norm": 6.134126663208008, "learning_rate": 5.765152387426781e-06, "loss": 0.46393681, "memory(GiB)": 34.88, "step": 70700, "train_speed(iter/s)": 0.410475 }, { "acc": 0.91055117, "epoch": 1.9144125849511278, "grad_norm": 5.655228137969971, "learning_rate": 5.764599402474924e-06, "loss": 0.48205581, "memory(GiB)": 34.88, "step": 70705, "train_speed(iter/s)": 0.410477 }, { "acc": 0.92070503, "epoch": 1.9145479652343433, "grad_norm": 4.027164936065674, "learning_rate": 5.764046407951219e-06, "loss": 0.42919807, "memory(GiB)": 34.88, "step": 70710, "train_speed(iter/s)": 0.410478 }, { "acc": 0.92283096, "epoch": 1.9146833455175587, "grad_norm": 6.239463806152344, "learning_rate": 5.763493403862597e-06, "loss": 0.33176789, "memory(GiB)": 34.88, "step": 70715, "train_speed(iter/s)": 0.41048 }, { "acc": 0.90760212, "epoch": 1.9148187258007745, "grad_norm": 6.594693183898926, "learning_rate": 5.7629403902159795e-06, "loss": 0.48745542, "memory(GiB)": 34.88, "step": 70720, "train_speed(iter/s)": 0.410481 }, { "acc": 0.92127876, "epoch": 1.9149541060839899, "grad_norm": 11.36589241027832, "learning_rate": 5.7623873670183015e-06, "loss": 0.47919865, "memory(GiB)": 34.88, "step": 70725, "train_speed(iter/s)": 0.410483 }, { "acc": 0.91719809, "epoch": 1.9150894863672054, "grad_norm": 10.478275299072266, "learning_rate": 5.761834334276485e-06, "loss": 0.49441895, "memory(GiB)": 34.88, "step": 70730, "train_speed(iter/s)": 0.410485 }, { "acc": 0.92022247, "epoch": 1.915224866650421, "grad_norm": 21.239521026611328, "learning_rate": 5.761281291997461e-06, "loss": 0.48394265, "memory(GiB)": 34.88, "step": 70735, "train_speed(iter/s)": 0.410486 }, { "acc": 0.92436199, "epoch": 1.9153602469336366, "grad_norm": 5.428368091583252, "learning_rate": 5.760728240188157e-06, "loss": 0.41910563, "memory(GiB)": 34.88, "step": 70740, "train_speed(iter/s)": 0.410488 }, { "acc": 0.91676397, "epoch": 1.9154956272168522, "grad_norm": 11.22725772857666, "learning_rate": 5.7601751788555e-06, "loss": 0.50679379, "memory(GiB)": 34.88, "step": 70745, "train_speed(iter/s)": 0.41049 }, { "acc": 0.90520477, "epoch": 1.9156310075000675, "grad_norm": 10.964020729064941, "learning_rate": 5.759622108006417e-06, "loss": 0.57878761, "memory(GiB)": 34.88, "step": 70750, "train_speed(iter/s)": 0.410491 }, { "acc": 0.90951748, "epoch": 1.9157663877832833, "grad_norm": 6.356231689453125, "learning_rate": 5.759069027647839e-06, "loss": 0.44704504, "memory(GiB)": 34.88, "step": 70755, "train_speed(iter/s)": 0.410493 }, { "acc": 0.91359444, "epoch": 1.9159017680664987, "grad_norm": 5.942599296569824, "learning_rate": 5.758515937786694e-06, "loss": 0.42407131, "memory(GiB)": 34.88, "step": 70760, "train_speed(iter/s)": 0.410495 }, { "acc": 0.9212244, "epoch": 1.9160371483497145, "grad_norm": 20.450414657592773, "learning_rate": 5.757962838429909e-06, "loss": 0.44414129, "memory(GiB)": 34.88, "step": 70765, "train_speed(iter/s)": 0.410496 }, { "acc": 0.91092567, "epoch": 1.9161725286329299, "grad_norm": 9.292776107788086, "learning_rate": 5.757409729584415e-06, "loss": 0.52664013, "memory(GiB)": 34.88, "step": 70770, "train_speed(iter/s)": 0.410498 }, { "acc": 0.92000942, "epoch": 1.9163079089161454, "grad_norm": 24.455942153930664, "learning_rate": 5.756856611257138e-06, "loss": 0.44080715, "memory(GiB)": 34.88, "step": 70775, "train_speed(iter/s)": 0.410499 }, { "acc": 0.90250559, "epoch": 1.916443289199361, "grad_norm": 10.819375991821289, "learning_rate": 5.756303483455008e-06, "loss": 0.53322453, "memory(GiB)": 34.88, "step": 70780, "train_speed(iter/s)": 0.410501 }, { "acc": 0.9166234, "epoch": 1.9165786694825766, "grad_norm": 5.548829078674316, "learning_rate": 5.755750346184955e-06, "loss": 0.48522072, "memory(GiB)": 34.88, "step": 70785, "train_speed(iter/s)": 0.410503 }, { "acc": 0.91108685, "epoch": 1.9167140497657922, "grad_norm": 9.599855422973633, "learning_rate": 5.7551971994539056e-06, "loss": 0.54722056, "memory(GiB)": 34.88, "step": 70790, "train_speed(iter/s)": 0.410504 }, { "acc": 0.90721531, "epoch": 1.9168494300490075, "grad_norm": 9.575563430786133, "learning_rate": 5.754644043268794e-06, "loss": 0.59446335, "memory(GiB)": 34.88, "step": 70795, "train_speed(iter/s)": 0.410506 }, { "acc": 0.92063007, "epoch": 1.9169848103322233, "grad_norm": 7.63362455368042, "learning_rate": 5.754090877636545e-06, "loss": 0.39322982, "memory(GiB)": 34.88, "step": 70800, "train_speed(iter/s)": 0.410507 }, { "acc": 0.91568356, "epoch": 1.9171201906154387, "grad_norm": 8.40721321105957, "learning_rate": 5.753537702564088e-06, "loss": 0.42088094, "memory(GiB)": 34.88, "step": 70805, "train_speed(iter/s)": 0.410509 }, { "acc": 0.92141933, "epoch": 1.9172555708986543, "grad_norm": 9.447131156921387, "learning_rate": 5.752984518058355e-06, "loss": 0.43691616, "memory(GiB)": 34.88, "step": 70810, "train_speed(iter/s)": 0.41051 }, { "acc": 0.91317635, "epoch": 1.9173909511818699, "grad_norm": 16.105688095092773, "learning_rate": 5.752431324126274e-06, "loss": 0.42027431, "memory(GiB)": 34.88, "step": 70815, "train_speed(iter/s)": 0.410512 }, { "acc": 0.90149269, "epoch": 1.9175263314650854, "grad_norm": 9.715767860412598, "learning_rate": 5.751878120774778e-06, "loss": 0.53554964, "memory(GiB)": 34.88, "step": 70820, "train_speed(iter/s)": 0.410514 }, { "acc": 0.92037983, "epoch": 1.917661711748301, "grad_norm": 16.16769790649414, "learning_rate": 5.7513249080107925e-06, "loss": 0.4244411, "memory(GiB)": 34.88, "step": 70825, "train_speed(iter/s)": 0.410515 }, { "acc": 0.92787943, "epoch": 1.9177970920315164, "grad_norm": 5.980078220367432, "learning_rate": 5.750771685841251e-06, "loss": 0.37055874, "memory(GiB)": 34.88, "step": 70830, "train_speed(iter/s)": 0.410517 }, { "acc": 0.89262724, "epoch": 1.9179324723147322, "grad_norm": 11.668720245361328, "learning_rate": 5.75021845427308e-06, "loss": 0.61005054, "memory(GiB)": 34.88, "step": 70835, "train_speed(iter/s)": 0.410518 }, { "acc": 0.91492081, "epoch": 1.9180678525979475, "grad_norm": 11.867995262145996, "learning_rate": 5.749665213313215e-06, "loss": 0.47241507, "memory(GiB)": 34.88, "step": 70840, "train_speed(iter/s)": 0.41052 }, { "acc": 0.90247068, "epoch": 1.9182032328811633, "grad_norm": 8.888895034790039, "learning_rate": 5.749111962968581e-06, "loss": 0.51536307, "memory(GiB)": 34.88, "step": 70845, "train_speed(iter/s)": 0.410521 }, { "acc": 0.89853897, "epoch": 1.9183386131643787, "grad_norm": 10.585232734680176, "learning_rate": 5.748558703246113e-06, "loss": 0.59818935, "memory(GiB)": 34.88, "step": 70850, "train_speed(iter/s)": 0.410523 }, { "acc": 0.92020655, "epoch": 1.9184739934475943, "grad_norm": 7.943453311920166, "learning_rate": 5.748005434152739e-06, "loss": 0.40556116, "memory(GiB)": 34.88, "step": 70855, "train_speed(iter/s)": 0.410525 }, { "acc": 0.92375727, "epoch": 1.9186093737308099, "grad_norm": 5.431248188018799, "learning_rate": 5.74745215569539e-06, "loss": 0.37057157, "memory(GiB)": 34.88, "step": 70860, "train_speed(iter/s)": 0.410526 }, { "acc": 0.92478085, "epoch": 1.9187447540140254, "grad_norm": 11.863696098327637, "learning_rate": 5.746898867881e-06, "loss": 0.41169243, "memory(GiB)": 34.88, "step": 70865, "train_speed(iter/s)": 0.410528 }, { "acc": 0.92079163, "epoch": 1.918880134297241, "grad_norm": 8.729337692260742, "learning_rate": 5.746345570716495e-06, "loss": 0.42663579, "memory(GiB)": 34.88, "step": 70870, "train_speed(iter/s)": 0.41053 }, { "acc": 0.89962578, "epoch": 1.9190155145804564, "grad_norm": 7.992612361907959, "learning_rate": 5.7457922642088106e-06, "loss": 0.5122787, "memory(GiB)": 34.88, "step": 70875, "train_speed(iter/s)": 0.410531 }, { "acc": 0.92543011, "epoch": 1.9191508948636722, "grad_norm": 8.499804496765137, "learning_rate": 5.745238948364874e-06, "loss": 0.43509688, "memory(GiB)": 34.88, "step": 70880, "train_speed(iter/s)": 0.410532 }, { "acc": 0.90591154, "epoch": 1.9192862751468875, "grad_norm": 10.574386596679688, "learning_rate": 5.7446856231916195e-06, "loss": 0.48385792, "memory(GiB)": 34.88, "step": 70885, "train_speed(iter/s)": 0.410534 }, { "acc": 0.92856007, "epoch": 1.9194216554301031, "grad_norm": 6.925952434539795, "learning_rate": 5.744132288695979e-06, "loss": 0.40170736, "memory(GiB)": 34.88, "step": 70890, "train_speed(iter/s)": 0.410536 }, { "acc": 0.90353756, "epoch": 1.9195570357133187, "grad_norm": 8.159696578979492, "learning_rate": 5.743578944884881e-06, "loss": 0.54042964, "memory(GiB)": 34.88, "step": 70895, "train_speed(iter/s)": 0.410537 }, { "acc": 0.91032763, "epoch": 1.9196924159965343, "grad_norm": 7.524468421936035, "learning_rate": 5.74302559176526e-06, "loss": 0.46288757, "memory(GiB)": 34.88, "step": 70900, "train_speed(iter/s)": 0.410539 }, { "acc": 0.91805305, "epoch": 1.9198277962797499, "grad_norm": 5.012442588806152, "learning_rate": 5.742472229344046e-06, "loss": 0.37241039, "memory(GiB)": 34.88, "step": 70905, "train_speed(iter/s)": 0.41054 }, { "acc": 0.9032136, "epoch": 1.9199631765629652, "grad_norm": 11.941920280456543, "learning_rate": 5.741918857628175e-06, "loss": 0.56000118, "memory(GiB)": 34.88, "step": 70910, "train_speed(iter/s)": 0.410542 }, { "acc": 0.90537052, "epoch": 1.920098556846181, "grad_norm": 5.492307186126709, "learning_rate": 5.741365476624572e-06, "loss": 0.56759939, "memory(GiB)": 34.88, "step": 70915, "train_speed(iter/s)": 0.410544 }, { "acc": 0.90870171, "epoch": 1.9202339371293964, "grad_norm": 17.752729415893555, "learning_rate": 5.740812086340175e-06, "loss": 0.52319145, "memory(GiB)": 34.88, "step": 70920, "train_speed(iter/s)": 0.410545 }, { "acc": 0.92093077, "epoch": 1.9203693174126122, "grad_norm": 6.235820293426514, "learning_rate": 5.740258686781914e-06, "loss": 0.40544577, "memory(GiB)": 34.88, "step": 70925, "train_speed(iter/s)": 0.410547 }, { "acc": 0.93136845, "epoch": 1.9205046976958275, "grad_norm": 3.6188852787017822, "learning_rate": 5.739705277956722e-06, "loss": 0.36027091, "memory(GiB)": 34.88, "step": 70930, "train_speed(iter/s)": 0.410548 }, { "acc": 0.9078743, "epoch": 1.9206400779790431, "grad_norm": 16.58599090576172, "learning_rate": 5.739151859871531e-06, "loss": 0.46090956, "memory(GiB)": 34.88, "step": 70935, "train_speed(iter/s)": 0.410549 }, { "acc": 0.91820965, "epoch": 1.9207754582622587, "grad_norm": 3.7817189693450928, "learning_rate": 5.738598432533273e-06, "loss": 0.41053705, "memory(GiB)": 34.88, "step": 70940, "train_speed(iter/s)": 0.410551 }, { "acc": 0.90918598, "epoch": 1.9209108385454743, "grad_norm": 6.988000392913818, "learning_rate": 5.738044995948883e-06, "loss": 0.52280455, "memory(GiB)": 34.88, "step": 70945, "train_speed(iter/s)": 0.410553 }, { "acc": 0.92228527, "epoch": 1.9210462188286899, "grad_norm": 7.190613746643066, "learning_rate": 5.7374915501252946e-06, "loss": 0.39624944, "memory(GiB)": 34.88, "step": 70950, "train_speed(iter/s)": 0.410554 }, { "acc": 0.92230902, "epoch": 1.9211815991119052, "grad_norm": 8.7463960647583, "learning_rate": 5.736938095069436e-06, "loss": 0.41911626, "memory(GiB)": 34.88, "step": 70955, "train_speed(iter/s)": 0.410556 }, { "acc": 0.94740095, "epoch": 1.921316979395121, "grad_norm": 4.01151704788208, "learning_rate": 5.736384630788246e-06, "loss": 0.27896523, "memory(GiB)": 34.88, "step": 70960, "train_speed(iter/s)": 0.410557 }, { "acc": 0.90967159, "epoch": 1.9214523596783364, "grad_norm": 8.855456352233887, "learning_rate": 5.735831157288651e-06, "loss": 0.48094578, "memory(GiB)": 34.88, "step": 70965, "train_speed(iter/s)": 0.410559 }, { "acc": 0.89309692, "epoch": 1.921587739961552, "grad_norm": 10.544401168823242, "learning_rate": 5.735277674577592e-06, "loss": 0.59879313, "memory(GiB)": 34.88, "step": 70970, "train_speed(iter/s)": 0.41056 }, { "acc": 0.90902729, "epoch": 1.9217231202447675, "grad_norm": 5.227902889251709, "learning_rate": 5.7347241826619945e-06, "loss": 0.42899327, "memory(GiB)": 34.88, "step": 70975, "train_speed(iter/s)": 0.410562 }, { "acc": 0.90876617, "epoch": 1.9218585005279831, "grad_norm": 11.360522270202637, "learning_rate": 5.734170681548799e-06, "loss": 0.52871356, "memory(GiB)": 34.88, "step": 70980, "train_speed(iter/s)": 0.410564 }, { "acc": 0.93664608, "epoch": 1.9219938808111987, "grad_norm": 12.29393196105957, "learning_rate": 5.7336171712449375e-06, "loss": 0.34181616, "memory(GiB)": 34.88, "step": 70985, "train_speed(iter/s)": 0.410565 }, { "acc": 0.9271306, "epoch": 1.922129261094414, "grad_norm": 15.36620044708252, "learning_rate": 5.733063651757342e-06, "loss": 0.37499702, "memory(GiB)": 34.88, "step": 70990, "train_speed(iter/s)": 0.410567 }, { "acc": 0.94166393, "epoch": 1.9222646413776299, "grad_norm": 7.581046104431152, "learning_rate": 5.7325101230929485e-06, "loss": 0.32652817, "memory(GiB)": 34.88, "step": 70995, "train_speed(iter/s)": 0.410569 }, { "acc": 0.90631905, "epoch": 1.9224000216608452, "grad_norm": 10.298369407653809, "learning_rate": 5.731956585258688e-06, "loss": 0.51564484, "memory(GiB)": 34.88, "step": 71000, "train_speed(iter/s)": 0.41057 }, { "acc": 0.91912479, "epoch": 1.922535401944061, "grad_norm": 4.821171283721924, "learning_rate": 5.731403038261497e-06, "loss": 0.36274586, "memory(GiB)": 34.88, "step": 71005, "train_speed(iter/s)": 0.410572 }, { "acc": 0.91216087, "epoch": 1.9226707822272764, "grad_norm": 5.696347713470459, "learning_rate": 5.730849482108308e-06, "loss": 0.55523596, "memory(GiB)": 34.88, "step": 71010, "train_speed(iter/s)": 0.410573 }, { "acc": 0.92414417, "epoch": 1.922806162510492, "grad_norm": 7.3445353507995605, "learning_rate": 5.730295916806059e-06, "loss": 0.41980457, "memory(GiB)": 34.88, "step": 71015, "train_speed(iter/s)": 0.410575 }, { "acc": 0.90241823, "epoch": 1.9229415427937075, "grad_norm": 11.243765830993652, "learning_rate": 5.7297423423616806e-06, "loss": 0.5054522, "memory(GiB)": 34.88, "step": 71020, "train_speed(iter/s)": 0.410576 }, { "acc": 0.91836243, "epoch": 1.9230769230769231, "grad_norm": 12.359089851379395, "learning_rate": 5.729188758782109e-06, "loss": 0.48292317, "memory(GiB)": 34.88, "step": 71025, "train_speed(iter/s)": 0.410578 }, { "acc": 0.91067009, "epoch": 1.9232123033601387, "grad_norm": 6.5189738273620605, "learning_rate": 5.728635166074279e-06, "loss": 0.51259069, "memory(GiB)": 34.88, "step": 71030, "train_speed(iter/s)": 0.41058 }, { "acc": 0.92277508, "epoch": 1.923347683643354, "grad_norm": 5.222663879394531, "learning_rate": 5.728081564245126e-06, "loss": 0.394731, "memory(GiB)": 34.88, "step": 71035, "train_speed(iter/s)": 0.410581 }, { "acc": 0.93000412, "epoch": 1.9234830639265699, "grad_norm": 5.410915374755859, "learning_rate": 5.727527953301583e-06, "loss": 0.40582633, "memory(GiB)": 34.88, "step": 71040, "train_speed(iter/s)": 0.410583 }, { "acc": 0.91478615, "epoch": 1.9236184442097852, "grad_norm": 4.768998622894287, "learning_rate": 5.726974333250587e-06, "loss": 0.44816327, "memory(GiB)": 34.88, "step": 71045, "train_speed(iter/s)": 0.410584 }, { "acc": 0.91573114, "epoch": 1.9237538244930008, "grad_norm": 6.9700236320495605, "learning_rate": 5.726420704099073e-06, "loss": 0.44215302, "memory(GiB)": 34.88, "step": 71050, "train_speed(iter/s)": 0.410586 }, { "acc": 0.93375912, "epoch": 1.9238892047762164, "grad_norm": 5.974765300750732, "learning_rate": 5.7258670658539735e-06, "loss": 0.2900403, "memory(GiB)": 34.88, "step": 71055, "train_speed(iter/s)": 0.410587 }, { "acc": 0.90450516, "epoch": 1.924024585059432, "grad_norm": 8.167376518249512, "learning_rate": 5.725313418522229e-06, "loss": 0.54728479, "memory(GiB)": 34.88, "step": 71060, "train_speed(iter/s)": 0.410589 }, { "acc": 0.92091026, "epoch": 1.9241599653426475, "grad_norm": 5.619307518005371, "learning_rate": 5.72475976211077e-06, "loss": 0.38505769, "memory(GiB)": 34.88, "step": 71065, "train_speed(iter/s)": 0.410591 }, { "acc": 0.90092144, "epoch": 1.924295345625863, "grad_norm": 23.128826141357422, "learning_rate": 5.724206096626535e-06, "loss": 0.60433617, "memory(GiB)": 34.88, "step": 71070, "train_speed(iter/s)": 0.410592 }, { "acc": 0.89839315, "epoch": 1.9244307259090787, "grad_norm": 10.204497337341309, "learning_rate": 5.723652422076461e-06, "loss": 0.57235651, "memory(GiB)": 34.88, "step": 71075, "train_speed(iter/s)": 0.410593 }, { "acc": 0.92131538, "epoch": 1.924566106192294, "grad_norm": 21.798307418823242, "learning_rate": 5.723098738467479e-06, "loss": 0.4878253, "memory(GiB)": 34.88, "step": 71080, "train_speed(iter/s)": 0.410595 }, { "acc": 0.91041994, "epoch": 1.9247014864755099, "grad_norm": 4.688326835632324, "learning_rate": 5.722545045806529e-06, "loss": 0.49551558, "memory(GiB)": 34.88, "step": 71085, "train_speed(iter/s)": 0.410596 }, { "acc": 0.9065115, "epoch": 1.9248368667587252, "grad_norm": 7.948265552520752, "learning_rate": 5.721991344100546e-06, "loss": 0.41540198, "memory(GiB)": 34.88, "step": 71090, "train_speed(iter/s)": 0.410598 }, { "acc": 0.9123004, "epoch": 1.9249722470419408, "grad_norm": 11.335500717163086, "learning_rate": 5.721437633356467e-06, "loss": 0.44711571, "memory(GiB)": 34.88, "step": 71095, "train_speed(iter/s)": 0.410599 }, { "acc": 0.89062157, "epoch": 1.9251076273251564, "grad_norm": 14.479744911193848, "learning_rate": 5.720883913581226e-06, "loss": 0.58565731, "memory(GiB)": 34.88, "step": 71100, "train_speed(iter/s)": 0.410601 }, { "acc": 0.91200886, "epoch": 1.925243007608372, "grad_norm": 12.46861457824707, "learning_rate": 5.720330184781763e-06, "loss": 0.48465185, "memory(GiB)": 34.88, "step": 71105, "train_speed(iter/s)": 0.410603 }, { "acc": 0.92211256, "epoch": 1.9253783878915876, "grad_norm": 7.800611972808838, "learning_rate": 5.719776446965009e-06, "loss": 0.4461874, "memory(GiB)": 34.88, "step": 71110, "train_speed(iter/s)": 0.410604 }, { "acc": 0.89861984, "epoch": 1.925513768174803, "grad_norm": 9.912343978881836, "learning_rate": 5.719222700137906e-06, "loss": 0.53018713, "memory(GiB)": 34.88, "step": 71115, "train_speed(iter/s)": 0.410606 }, { "acc": 0.92526932, "epoch": 1.9256491484580187, "grad_norm": 10.645964622497559, "learning_rate": 5.718668944307391e-06, "loss": 0.34497886, "memory(GiB)": 34.88, "step": 71120, "train_speed(iter/s)": 0.410608 }, { "acc": 0.900916, "epoch": 1.925784528741234, "grad_norm": 11.339990615844727, "learning_rate": 5.718115179480397e-06, "loss": 0.52580795, "memory(GiB)": 34.88, "step": 71125, "train_speed(iter/s)": 0.410609 }, { "acc": 0.88694859, "epoch": 1.9259199090244496, "grad_norm": 18.145904541015625, "learning_rate": 5.717561405663862e-06, "loss": 0.7021039, "memory(GiB)": 34.88, "step": 71130, "train_speed(iter/s)": 0.410611 }, { "acc": 0.89310341, "epoch": 1.9260552893076652, "grad_norm": 11.13391399383545, "learning_rate": 5.7170076228647245e-06, "loss": 0.59006252, "memory(GiB)": 34.88, "step": 71135, "train_speed(iter/s)": 0.410612 }, { "acc": 0.90843706, "epoch": 1.9261906695908808, "grad_norm": 12.243459701538086, "learning_rate": 5.716453831089921e-06, "loss": 0.55672479, "memory(GiB)": 34.88, "step": 71140, "train_speed(iter/s)": 0.410614 }, { "acc": 0.90879345, "epoch": 1.9263260498740964, "grad_norm": 7.19215202331543, "learning_rate": 5.715900030346388e-06, "loss": 0.5053535, "memory(GiB)": 34.88, "step": 71145, "train_speed(iter/s)": 0.410615 }, { "acc": 0.90536098, "epoch": 1.9264614301573117, "grad_norm": 9.053780555725098, "learning_rate": 5.7153462206410656e-06, "loss": 0.53080688, "memory(GiB)": 34.88, "step": 71150, "train_speed(iter/s)": 0.410617 }, { "acc": 0.91715527, "epoch": 1.9265968104405276, "grad_norm": 19.836286544799805, "learning_rate": 5.7147924019808885e-06, "loss": 0.4508853, "memory(GiB)": 34.88, "step": 71155, "train_speed(iter/s)": 0.410618 }, { "acc": 0.93632526, "epoch": 1.926732190723743, "grad_norm": 6.297203540802002, "learning_rate": 5.714238574372797e-06, "loss": 0.39541113, "memory(GiB)": 34.88, "step": 71160, "train_speed(iter/s)": 0.41062 }, { "acc": 0.90020514, "epoch": 1.9268675710069587, "grad_norm": 6.943846225738525, "learning_rate": 5.713684737823726e-06, "loss": 0.52035089, "memory(GiB)": 34.88, "step": 71165, "train_speed(iter/s)": 0.410622 }, { "acc": 0.91309004, "epoch": 1.927002951290174, "grad_norm": 7.963282585144043, "learning_rate": 5.713130892340616e-06, "loss": 0.39435496, "memory(GiB)": 34.88, "step": 71170, "train_speed(iter/s)": 0.410623 }, { "acc": 0.89078131, "epoch": 1.9271383315733897, "grad_norm": 14.549701690673828, "learning_rate": 5.712577037930403e-06, "loss": 0.62332549, "memory(GiB)": 34.88, "step": 71175, "train_speed(iter/s)": 0.410625 }, { "acc": 0.92594604, "epoch": 1.9272737118566052, "grad_norm": 5.562927722930908, "learning_rate": 5.712023174600025e-06, "loss": 0.36941521, "memory(GiB)": 34.88, "step": 71180, "train_speed(iter/s)": 0.410627 }, { "acc": 0.9188345, "epoch": 1.9274090921398206, "grad_norm": 5.757603645324707, "learning_rate": 5.711469302356421e-06, "loss": 0.47609158, "memory(GiB)": 34.88, "step": 71185, "train_speed(iter/s)": 0.410628 }, { "acc": 0.9181139, "epoch": 1.9275444724230364, "grad_norm": 7.878871440887451, "learning_rate": 5.71091542120653e-06, "loss": 0.48800421, "memory(GiB)": 34.88, "step": 71190, "train_speed(iter/s)": 0.41063 }, { "acc": 0.87299318, "epoch": 1.9276798527062518, "grad_norm": 9.902660369873047, "learning_rate": 5.710361531157289e-06, "loss": 0.64153218, "memory(GiB)": 34.88, "step": 71195, "train_speed(iter/s)": 0.410631 }, { "acc": 0.92788773, "epoch": 1.9278152329894676, "grad_norm": 8.339797973632812, "learning_rate": 5.709807632215641e-06, "loss": 0.40605297, "memory(GiB)": 34.88, "step": 71200, "train_speed(iter/s)": 0.410633 }, { "acc": 0.92428761, "epoch": 1.927950613272683, "grad_norm": 5.500674724578857, "learning_rate": 5.709253724388517e-06, "loss": 0.35990095, "memory(GiB)": 34.88, "step": 71205, "train_speed(iter/s)": 0.410635 }, { "acc": 0.9126482, "epoch": 1.9280859935558985, "grad_norm": 6.111651420593262, "learning_rate": 5.7086998076828635e-06, "loss": 0.50677176, "memory(GiB)": 34.88, "step": 71210, "train_speed(iter/s)": 0.410636 }, { "acc": 0.89258862, "epoch": 1.928221373839114, "grad_norm": 14.356614112854004, "learning_rate": 5.708145882105612e-06, "loss": 0.62739334, "memory(GiB)": 34.88, "step": 71215, "train_speed(iter/s)": 0.410638 }, { "acc": 0.91745129, "epoch": 1.9283567541223297, "grad_norm": 4.622082710266113, "learning_rate": 5.707591947663709e-06, "loss": 0.41352472, "memory(GiB)": 34.88, "step": 71220, "train_speed(iter/s)": 0.410639 }, { "acc": 0.90402946, "epoch": 1.9284921344055452, "grad_norm": 5.940860271453857, "learning_rate": 5.707038004364086e-06, "loss": 0.53628626, "memory(GiB)": 34.88, "step": 71225, "train_speed(iter/s)": 0.41064 }, { "acc": 0.92790194, "epoch": 1.9286275146887606, "grad_norm": 4.599722385406494, "learning_rate": 5.7064840522136874e-06, "loss": 0.33566465, "memory(GiB)": 34.88, "step": 71230, "train_speed(iter/s)": 0.410642 }, { "acc": 0.91384068, "epoch": 1.9287628949719764, "grad_norm": 6.013388156890869, "learning_rate": 5.705930091219453e-06, "loss": 0.45702453, "memory(GiB)": 34.88, "step": 71235, "train_speed(iter/s)": 0.410644 }, { "acc": 0.9144165, "epoch": 1.9288982752551918, "grad_norm": 6.712153911590576, "learning_rate": 5.7053761213883194e-06, "loss": 0.44923077, "memory(GiB)": 34.88, "step": 71240, "train_speed(iter/s)": 0.410645 }, { "acc": 0.9095396, "epoch": 1.9290336555384073, "grad_norm": 5.204911231994629, "learning_rate": 5.704822142727229e-06, "loss": 0.46114702, "memory(GiB)": 34.88, "step": 71245, "train_speed(iter/s)": 0.410647 }, { "acc": 0.91534615, "epoch": 1.929169035821623, "grad_norm": 5.869836807250977, "learning_rate": 5.7042681552431175e-06, "loss": 0.4411128, "memory(GiB)": 34.88, "step": 71250, "train_speed(iter/s)": 0.410648 }, { "acc": 0.91110172, "epoch": 1.9293044161048385, "grad_norm": 5.340203285217285, "learning_rate": 5.703714158942929e-06, "loss": 0.5357667, "memory(GiB)": 34.88, "step": 71255, "train_speed(iter/s)": 0.410649 }, { "acc": 0.90074778, "epoch": 1.929439796388054, "grad_norm": 12.019087791442871, "learning_rate": 5.7031601538336e-06, "loss": 0.56258354, "memory(GiB)": 34.88, "step": 71260, "train_speed(iter/s)": 0.410651 }, { "acc": 0.88887243, "epoch": 1.9295751766712694, "grad_norm": 25.161367416381836, "learning_rate": 5.702606139922071e-06, "loss": 0.6235342, "memory(GiB)": 34.88, "step": 71265, "train_speed(iter/s)": 0.410653 }, { "acc": 0.90692949, "epoch": 1.9297105569544852, "grad_norm": 7.717591762542725, "learning_rate": 5.702052117215284e-06, "loss": 0.46897416, "memory(GiB)": 34.88, "step": 71270, "train_speed(iter/s)": 0.410654 }, { "acc": 0.92057552, "epoch": 1.9298459372377006, "grad_norm": 15.352932929992676, "learning_rate": 5.701498085720177e-06, "loss": 0.42862768, "memory(GiB)": 34.88, "step": 71275, "train_speed(iter/s)": 0.410656 }, { "acc": 0.92277937, "epoch": 1.9299813175209164, "grad_norm": 6.358725547790527, "learning_rate": 5.700944045443694e-06, "loss": 0.49357367, "memory(GiB)": 34.88, "step": 71280, "train_speed(iter/s)": 0.410657 }, { "acc": 0.91818933, "epoch": 1.9301166978041318, "grad_norm": 10.195756912231445, "learning_rate": 5.700389996392772e-06, "loss": 0.45950503, "memory(GiB)": 34.88, "step": 71285, "train_speed(iter/s)": 0.410659 }, { "acc": 0.92367325, "epoch": 1.9302520780873473, "grad_norm": 12.733593940734863, "learning_rate": 5.699835938574352e-06, "loss": 0.44876137, "memory(GiB)": 34.88, "step": 71290, "train_speed(iter/s)": 0.410661 }, { "acc": 0.92905817, "epoch": 1.930387458370563, "grad_norm": 49.867435455322266, "learning_rate": 5.6992818719953745e-06, "loss": 0.29549584, "memory(GiB)": 34.88, "step": 71295, "train_speed(iter/s)": 0.410662 }, { "acc": 0.93552456, "epoch": 1.9305228386537785, "grad_norm": 8.334688186645508, "learning_rate": 5.698727796662782e-06, "loss": 0.39420242, "memory(GiB)": 34.88, "step": 71300, "train_speed(iter/s)": 0.410664 }, { "acc": 0.90894413, "epoch": 1.930658218936994, "grad_norm": 3.7057876586914062, "learning_rate": 5.6981737125835145e-06, "loss": 0.54644022, "memory(GiB)": 34.88, "step": 71305, "train_speed(iter/s)": 0.410666 }, { "acc": 0.90398874, "epoch": 1.9307935992202094, "grad_norm": 17.903165817260742, "learning_rate": 5.697619619764512e-06, "loss": 0.58397193, "memory(GiB)": 34.88, "step": 71310, "train_speed(iter/s)": 0.410667 }, { "acc": 0.93030186, "epoch": 1.9309289795034252, "grad_norm": 10.64526081085205, "learning_rate": 5.697065518212717e-06, "loss": 0.40317259, "memory(GiB)": 34.88, "step": 71315, "train_speed(iter/s)": 0.410669 }, { "acc": 0.88864861, "epoch": 1.9310643597866406, "grad_norm": 7.718283653259277, "learning_rate": 5.696511407935069e-06, "loss": 0.57237229, "memory(GiB)": 34.88, "step": 71320, "train_speed(iter/s)": 0.41067 }, { "acc": 0.90489435, "epoch": 1.9311997400698562, "grad_norm": 13.251885414123535, "learning_rate": 5.695957288938512e-06, "loss": 0.52901802, "memory(GiB)": 34.88, "step": 71325, "train_speed(iter/s)": 0.410672 }, { "acc": 0.90217628, "epoch": 1.9313351203530718, "grad_norm": 4.200152397155762, "learning_rate": 5.695403161229985e-06, "loss": 0.51027279, "memory(GiB)": 34.88, "step": 71330, "train_speed(iter/s)": 0.410673 }, { "acc": 0.9093153, "epoch": 1.9314705006362873, "grad_norm": 7.697919845581055, "learning_rate": 5.694849024816431e-06, "loss": 0.45452328, "memory(GiB)": 34.88, "step": 71335, "train_speed(iter/s)": 0.410675 }, { "acc": 0.92121563, "epoch": 1.931605880919503, "grad_norm": 7.15998649597168, "learning_rate": 5.694294879704791e-06, "loss": 0.44694118, "memory(GiB)": 34.88, "step": 71340, "train_speed(iter/s)": 0.410677 }, { "acc": 0.92412891, "epoch": 1.9317412612027183, "grad_norm": 5.802159309387207, "learning_rate": 5.693740725902006e-06, "loss": 0.37893724, "memory(GiB)": 34.88, "step": 71345, "train_speed(iter/s)": 0.410678 }, { "acc": 0.91045303, "epoch": 1.931876641485934, "grad_norm": 15.554612159729004, "learning_rate": 5.69318656341502e-06, "loss": 0.4603982, "memory(GiB)": 34.88, "step": 71350, "train_speed(iter/s)": 0.41068 }, { "acc": 0.9102869, "epoch": 1.9320120217691494, "grad_norm": 6.726929187774658, "learning_rate": 5.6926323922507735e-06, "loss": 0.54832587, "memory(GiB)": 34.88, "step": 71355, "train_speed(iter/s)": 0.410681 }, { "acc": 0.89271965, "epoch": 1.9321474020523652, "grad_norm": 10.269030570983887, "learning_rate": 5.6920782124162085e-06, "loss": 0.59439344, "memory(GiB)": 34.88, "step": 71360, "train_speed(iter/s)": 0.410683 }, { "acc": 0.89841728, "epoch": 1.9322827823355806, "grad_norm": 16.07463836669922, "learning_rate": 5.691524023918266e-06, "loss": 0.56339283, "memory(GiB)": 34.88, "step": 71365, "train_speed(iter/s)": 0.410685 }, { "acc": 0.92456799, "epoch": 1.9324181626187962, "grad_norm": 6.933966159820557, "learning_rate": 5.690969826763892e-06, "loss": 0.41224027, "memory(GiB)": 34.88, "step": 71370, "train_speed(iter/s)": 0.410686 }, { "acc": 0.90550251, "epoch": 1.9325535429020118, "grad_norm": 9.10245418548584, "learning_rate": 5.6904156209600255e-06, "loss": 0.59045305, "memory(GiB)": 34.88, "step": 71375, "train_speed(iter/s)": 0.410688 }, { "acc": 0.91183023, "epoch": 1.9326889231852273, "grad_norm": 6.163895606994629, "learning_rate": 5.689861406513609e-06, "loss": 0.54743905, "memory(GiB)": 34.88, "step": 71380, "train_speed(iter/s)": 0.410689 }, { "acc": 0.92179947, "epoch": 1.932824303468443, "grad_norm": 11.88021183013916, "learning_rate": 5.689307183431588e-06, "loss": 0.46835537, "memory(GiB)": 34.88, "step": 71385, "train_speed(iter/s)": 0.410691 }, { "acc": 0.90569286, "epoch": 1.9329596837516583, "grad_norm": 4.704657554626465, "learning_rate": 5.688752951720903e-06, "loss": 0.49560585, "memory(GiB)": 34.88, "step": 71390, "train_speed(iter/s)": 0.410692 }, { "acc": 0.90560131, "epoch": 1.933095064034874, "grad_norm": 9.748579025268555, "learning_rate": 5.688198711388498e-06, "loss": 0.53169875, "memory(GiB)": 34.88, "step": 71395, "train_speed(iter/s)": 0.410694 }, { "acc": 0.91375523, "epoch": 1.9332304443180894, "grad_norm": 5.193241119384766, "learning_rate": 5.6876444624413144e-06, "loss": 0.530443, "memory(GiB)": 34.88, "step": 71400, "train_speed(iter/s)": 0.410696 }, { "acc": 0.91270027, "epoch": 1.933365824601305, "grad_norm": 8.805798530578613, "learning_rate": 5.687090204886296e-06, "loss": 0.49612589, "memory(GiB)": 34.88, "step": 71405, "train_speed(iter/s)": 0.410697 }, { "acc": 0.91242905, "epoch": 1.9335012048845206, "grad_norm": 12.830096244812012, "learning_rate": 5.6865359387303874e-06, "loss": 0.40675921, "memory(GiB)": 34.88, "step": 71410, "train_speed(iter/s)": 0.410699 }, { "acc": 0.92313232, "epoch": 1.9336365851677362, "grad_norm": 6.322011470794678, "learning_rate": 5.685981663980529e-06, "loss": 0.42640944, "memory(GiB)": 34.88, "step": 71415, "train_speed(iter/s)": 0.4107 }, { "acc": 0.90459404, "epoch": 1.9337719654509518, "grad_norm": 4.459234237670898, "learning_rate": 5.6854273806436676e-06, "loss": 0.51503086, "memory(GiB)": 34.88, "step": 71420, "train_speed(iter/s)": 0.410701 }, { "acc": 0.91328316, "epoch": 1.9339073457341671, "grad_norm": 17.398269653320312, "learning_rate": 5.6848730887267435e-06, "loss": 0.41790032, "memory(GiB)": 34.88, "step": 71425, "train_speed(iter/s)": 0.410703 }, { "acc": 0.90897884, "epoch": 1.934042726017383, "grad_norm": 8.668642044067383, "learning_rate": 5.684318788236702e-06, "loss": 0.53778944, "memory(GiB)": 34.88, "step": 71430, "train_speed(iter/s)": 0.410705 }, { "acc": 0.93012581, "epoch": 1.9341781063005983, "grad_norm": 9.421181678771973, "learning_rate": 5.683764479180487e-06, "loss": 0.35301847, "memory(GiB)": 34.88, "step": 71435, "train_speed(iter/s)": 0.410706 }, { "acc": 0.90247593, "epoch": 1.934313486583814, "grad_norm": 7.3467607498168945, "learning_rate": 5.683210161565041e-06, "loss": 0.54623499, "memory(GiB)": 34.88, "step": 71440, "train_speed(iter/s)": 0.410708 }, { "acc": 0.92304411, "epoch": 1.9344488668670294, "grad_norm": 6.134523391723633, "learning_rate": 5.682655835397308e-06, "loss": 0.46012478, "memory(GiB)": 34.88, "step": 71445, "train_speed(iter/s)": 0.410709 }, { "acc": 0.91512165, "epoch": 1.934584247150245, "grad_norm": 4.764509201049805, "learning_rate": 5.682101500684234e-06, "loss": 0.43965049, "memory(GiB)": 34.88, "step": 71450, "train_speed(iter/s)": 0.41071 }, { "acc": 0.91232109, "epoch": 1.9347196274334606, "grad_norm": 5.113310813903809, "learning_rate": 5.681547157432762e-06, "loss": 0.52343655, "memory(GiB)": 34.88, "step": 71455, "train_speed(iter/s)": 0.410712 }, { "acc": 0.92372942, "epoch": 1.9348550077166762, "grad_norm": 9.510449409484863, "learning_rate": 5.6809928056498364e-06, "loss": 0.47531686, "memory(GiB)": 34.88, "step": 71460, "train_speed(iter/s)": 0.410714 }, { "acc": 0.91361923, "epoch": 1.9349903879998918, "grad_norm": 6.246018886566162, "learning_rate": 5.680438445342401e-06, "loss": 0.39322352, "memory(GiB)": 34.88, "step": 71465, "train_speed(iter/s)": 0.410715 }, { "acc": 0.90565453, "epoch": 1.9351257682831071, "grad_norm": 5.6146464347839355, "learning_rate": 5.679884076517399e-06, "loss": 0.54379373, "memory(GiB)": 34.88, "step": 71470, "train_speed(iter/s)": 0.410717 }, { "acc": 0.92013702, "epoch": 1.935261148566323, "grad_norm": 9.076892852783203, "learning_rate": 5.679329699181777e-06, "loss": 0.41545372, "memory(GiB)": 34.88, "step": 71475, "train_speed(iter/s)": 0.410719 }, { "acc": 0.90193024, "epoch": 1.9353965288495383, "grad_norm": 9.971704483032227, "learning_rate": 5.67877531334248e-06, "loss": 0.57244101, "memory(GiB)": 34.88, "step": 71480, "train_speed(iter/s)": 0.41072 }, { "acc": 0.90980759, "epoch": 1.9355319091327539, "grad_norm": 2.6714916229248047, "learning_rate": 5.678220919006452e-06, "loss": 0.49054217, "memory(GiB)": 34.88, "step": 71485, "train_speed(iter/s)": 0.410722 }, { "acc": 0.91182442, "epoch": 1.9356672894159694, "grad_norm": 7.0323357582092285, "learning_rate": 5.6776665161806365e-06, "loss": 0.42531986, "memory(GiB)": 34.88, "step": 71490, "train_speed(iter/s)": 0.410723 }, { "acc": 0.91752224, "epoch": 1.935802669699185, "grad_norm": 8.530436515808105, "learning_rate": 5.6771121048719816e-06, "loss": 0.46445532, "memory(GiB)": 34.88, "step": 71495, "train_speed(iter/s)": 0.410725 }, { "acc": 0.92104816, "epoch": 1.9359380499824006, "grad_norm": 5.986329555511475, "learning_rate": 5.67655768508743e-06, "loss": 0.48080158, "memory(GiB)": 34.88, "step": 71500, "train_speed(iter/s)": 0.410726 }, { "acc": 0.90210438, "epoch": 1.936073430265616, "grad_norm": 10.709624290466309, "learning_rate": 5.676003256833926e-06, "loss": 0.57696681, "memory(GiB)": 34.88, "step": 71505, "train_speed(iter/s)": 0.410728 }, { "acc": 0.90272369, "epoch": 1.9362088105488318, "grad_norm": 11.461516380310059, "learning_rate": 5.675448820118417e-06, "loss": 0.5621212, "memory(GiB)": 34.88, "step": 71510, "train_speed(iter/s)": 0.41073 }, { "acc": 0.92106323, "epoch": 1.9363441908320471, "grad_norm": 6.189877033233643, "learning_rate": 5.674894374947849e-06, "loss": 0.45998149, "memory(GiB)": 34.88, "step": 71515, "train_speed(iter/s)": 0.410731 }, { "acc": 0.90766468, "epoch": 1.936479571115263, "grad_norm": 18.98580551147461, "learning_rate": 5.674339921329165e-06, "loss": 0.45594902, "memory(GiB)": 34.88, "step": 71520, "train_speed(iter/s)": 0.410733 }, { "acc": 0.89572086, "epoch": 1.9366149513984783, "grad_norm": 39.58536148071289, "learning_rate": 5.673785459269312e-06, "loss": 0.66389031, "memory(GiB)": 34.88, "step": 71525, "train_speed(iter/s)": 0.410734 }, { "acc": 0.92114658, "epoch": 1.9367503316816939, "grad_norm": 7.3731584548950195, "learning_rate": 5.673230988775235e-06, "loss": 0.43710771, "memory(GiB)": 34.88, "step": 71530, "train_speed(iter/s)": 0.410736 }, { "acc": 0.91316938, "epoch": 1.9368857119649094, "grad_norm": 12.092292785644531, "learning_rate": 5.672676509853881e-06, "loss": 0.47315688, "memory(GiB)": 34.88, "step": 71535, "train_speed(iter/s)": 0.410738 }, { "acc": 0.91908293, "epoch": 1.937021092248125, "grad_norm": 22.660879135131836, "learning_rate": 5.6721220225121965e-06, "loss": 0.50899696, "memory(GiB)": 34.88, "step": 71540, "train_speed(iter/s)": 0.410739 }, { "acc": 0.90089169, "epoch": 1.9371564725313406, "grad_norm": 13.121023178100586, "learning_rate": 5.6715675267571265e-06, "loss": 0.54501133, "memory(GiB)": 34.88, "step": 71545, "train_speed(iter/s)": 0.410741 }, { "acc": 0.90951042, "epoch": 1.937291852814556, "grad_norm": 10.141382217407227, "learning_rate": 5.671013022595615e-06, "loss": 0.51263199, "memory(GiB)": 34.88, "step": 71550, "train_speed(iter/s)": 0.410743 }, { "acc": 0.89970608, "epoch": 1.9374272330977718, "grad_norm": 10.882344245910645, "learning_rate": 5.670458510034612e-06, "loss": 0.61863341, "memory(GiB)": 34.88, "step": 71555, "train_speed(iter/s)": 0.410744 }, { "acc": 0.90978355, "epoch": 1.9375626133809871, "grad_norm": 10.327436447143555, "learning_rate": 5.6699039890810616e-06, "loss": 0.51783881, "memory(GiB)": 34.88, "step": 71560, "train_speed(iter/s)": 0.410746 }, { "acc": 0.9226284, "epoch": 1.9376979936642027, "grad_norm": 11.428085327148438, "learning_rate": 5.669349459741911e-06, "loss": 0.47948599, "memory(GiB)": 34.88, "step": 71565, "train_speed(iter/s)": 0.410747 }, { "acc": 0.91786423, "epoch": 1.9378333739474183, "grad_norm": 10.67019271850586, "learning_rate": 5.6687949220241055e-06, "loss": 0.4908679, "memory(GiB)": 34.88, "step": 71570, "train_speed(iter/s)": 0.410749 }, { "acc": 0.9184516, "epoch": 1.9379687542306339, "grad_norm": 11.038080215454102, "learning_rate": 5.668240375934594e-06, "loss": 0.45492806, "memory(GiB)": 34.88, "step": 71575, "train_speed(iter/s)": 0.41075 }, { "acc": 0.90283794, "epoch": 1.9381041345138494, "grad_norm": 6.604957580566406, "learning_rate": 5.6676858214803245e-06, "loss": 0.57631731, "memory(GiB)": 34.88, "step": 71580, "train_speed(iter/s)": 0.410752 }, { "acc": 0.91810503, "epoch": 1.9382395147970648, "grad_norm": 6.466598987579346, "learning_rate": 5.667131258668238e-06, "loss": 0.44733744, "memory(GiB)": 34.88, "step": 71585, "train_speed(iter/s)": 0.410754 }, { "acc": 0.91882839, "epoch": 1.9383748950802806, "grad_norm": 5.583327770233154, "learning_rate": 5.666576687505287e-06, "loss": 0.38467329, "memory(GiB)": 34.88, "step": 71590, "train_speed(iter/s)": 0.410755 }, { "acc": 0.89810333, "epoch": 1.938510275363496, "grad_norm": 10.703125953674316, "learning_rate": 5.666022107998415e-06, "loss": 0.59704647, "memory(GiB)": 34.88, "step": 71595, "train_speed(iter/s)": 0.410757 }, { "acc": 0.9129921, "epoch": 1.9386456556467118, "grad_norm": 3.5298945903778076, "learning_rate": 5.665467520154572e-06, "loss": 0.40993586, "memory(GiB)": 34.88, "step": 71600, "train_speed(iter/s)": 0.410758 }, { "acc": 0.92063942, "epoch": 1.9387810359299271, "grad_norm": 6.261803150177002, "learning_rate": 5.664912923980704e-06, "loss": 0.4226263, "memory(GiB)": 34.88, "step": 71605, "train_speed(iter/s)": 0.41076 }, { "acc": 0.91686621, "epoch": 1.9389164162131427, "grad_norm": 11.087691307067871, "learning_rate": 5.664358319483758e-06, "loss": 0.52989521, "memory(GiB)": 34.88, "step": 71610, "train_speed(iter/s)": 0.410762 }, { "acc": 0.9352663, "epoch": 1.9390517964963583, "grad_norm": 6.295312404632568, "learning_rate": 5.663803706670683e-06, "loss": 0.31639485, "memory(GiB)": 34.88, "step": 71615, "train_speed(iter/s)": 0.410763 }, { "acc": 0.93130703, "epoch": 1.9391871767795739, "grad_norm": 6.78889274597168, "learning_rate": 5.663249085548427e-06, "loss": 0.33896754, "memory(GiB)": 34.88, "step": 71620, "train_speed(iter/s)": 0.410765 }, { "acc": 0.93419609, "epoch": 1.9393225570627894, "grad_norm": 4.952636241912842, "learning_rate": 5.662694456123934e-06, "loss": 0.30050302, "memory(GiB)": 34.88, "step": 71625, "train_speed(iter/s)": 0.410766 }, { "acc": 0.90583706, "epoch": 1.9394579373460048, "grad_norm": 15.19015121459961, "learning_rate": 5.662139818404154e-06, "loss": 0.54129734, "memory(GiB)": 34.88, "step": 71630, "train_speed(iter/s)": 0.410768 }, { "acc": 0.91750069, "epoch": 1.9395933176292206, "grad_norm": 4.352296352386475, "learning_rate": 5.6615851723960355e-06, "loss": 0.43635073, "memory(GiB)": 34.88, "step": 71635, "train_speed(iter/s)": 0.410769 }, { "acc": 0.92724199, "epoch": 1.939728697912436, "grad_norm": 8.57851791381836, "learning_rate": 5.661030518106528e-06, "loss": 0.42220039, "memory(GiB)": 34.88, "step": 71640, "train_speed(iter/s)": 0.410771 }, { "acc": 0.91953964, "epoch": 1.9398640781956515, "grad_norm": 4.676213264465332, "learning_rate": 5.660475855542577e-06, "loss": 0.49349155, "memory(GiB)": 34.88, "step": 71645, "train_speed(iter/s)": 0.410772 }, { "acc": 0.91547976, "epoch": 1.9399994584788671, "grad_norm": 5.652477741241455, "learning_rate": 5.65992118471113e-06, "loss": 0.49607015, "memory(GiB)": 34.88, "step": 71650, "train_speed(iter/s)": 0.410774 }, { "acc": 0.91245089, "epoch": 1.9401348387620827, "grad_norm": 4.196532726287842, "learning_rate": 5.659366505619138e-06, "loss": 0.47112236, "memory(GiB)": 34.88, "step": 71655, "train_speed(iter/s)": 0.410775 }, { "acc": 0.9282239, "epoch": 1.9402702190452983, "grad_norm": 4.420686721801758, "learning_rate": 5.658811818273549e-06, "loss": 0.32806435, "memory(GiB)": 34.88, "step": 71660, "train_speed(iter/s)": 0.410777 }, { "acc": 0.91702366, "epoch": 1.9404055993285136, "grad_norm": 17.622570037841797, "learning_rate": 5.658257122681311e-06, "loss": 0.56813164, "memory(GiB)": 34.88, "step": 71665, "train_speed(iter/s)": 0.410778 }, { "acc": 0.90488091, "epoch": 1.9405409796117294, "grad_norm": 11.056812286376953, "learning_rate": 5.657702418849371e-06, "loss": 0.60743084, "memory(GiB)": 34.88, "step": 71670, "train_speed(iter/s)": 0.41078 }, { "acc": 0.92013102, "epoch": 1.9406763598949448, "grad_norm": 10.082213401794434, "learning_rate": 5.65714770678468e-06, "loss": 0.41850743, "memory(GiB)": 34.88, "step": 71675, "train_speed(iter/s)": 0.410781 }, { "acc": 0.92718163, "epoch": 1.9408117401781606, "grad_norm": 3.245551586151123, "learning_rate": 5.656592986494186e-06, "loss": 0.3381649, "memory(GiB)": 34.88, "step": 71680, "train_speed(iter/s)": 0.410782 }, { "acc": 0.90352039, "epoch": 1.940947120461376, "grad_norm": 7.1037678718566895, "learning_rate": 5.656038257984838e-06, "loss": 0.5786931, "memory(GiB)": 34.88, "step": 71685, "train_speed(iter/s)": 0.410784 }, { "acc": 0.92478628, "epoch": 1.9410825007445915, "grad_norm": 13.017966270446777, "learning_rate": 5.655483521263586e-06, "loss": 0.4932106, "memory(GiB)": 34.88, "step": 71690, "train_speed(iter/s)": 0.410785 }, { "acc": 0.91640091, "epoch": 1.9412178810278071, "grad_norm": 18.094057083129883, "learning_rate": 5.654928776337378e-06, "loss": 0.4606319, "memory(GiB)": 34.88, "step": 71695, "train_speed(iter/s)": 0.410787 }, { "acc": 0.90471601, "epoch": 1.9413532613110227, "grad_norm": 4.698531627655029, "learning_rate": 5.654374023213164e-06, "loss": 0.49976807, "memory(GiB)": 34.88, "step": 71700, "train_speed(iter/s)": 0.410788 }, { "acc": 0.92475328, "epoch": 1.9414886415942383, "grad_norm": 8.007797241210938, "learning_rate": 5.653819261897894e-06, "loss": 0.44107533, "memory(GiB)": 34.88, "step": 71705, "train_speed(iter/s)": 0.41079 }, { "acc": 0.92764549, "epoch": 1.9416240218774536, "grad_norm": 7.9352498054504395, "learning_rate": 5.653264492398515e-06, "loss": 0.30292187, "memory(GiB)": 34.88, "step": 71710, "train_speed(iter/s)": 0.410792 }, { "acc": 0.89796209, "epoch": 1.9417594021606694, "grad_norm": 17.02090835571289, "learning_rate": 5.6527097147219766e-06, "loss": 0.57922816, "memory(GiB)": 34.88, "step": 71715, "train_speed(iter/s)": 0.410793 }, { "acc": 0.92260418, "epoch": 1.9418947824438848, "grad_norm": 13.07659912109375, "learning_rate": 5.652154928875232e-06, "loss": 0.39670434, "memory(GiB)": 34.88, "step": 71720, "train_speed(iter/s)": 0.410794 }, { "acc": 0.92029953, "epoch": 1.9420301627271004, "grad_norm": 14.71956729888916, "learning_rate": 5.651600134865228e-06, "loss": 0.48235455, "memory(GiB)": 34.88, "step": 71725, "train_speed(iter/s)": 0.410796 }, { "acc": 0.89129333, "epoch": 1.942165543010316, "grad_norm": 16.932764053344727, "learning_rate": 5.651045332698916e-06, "loss": 0.62569942, "memory(GiB)": 34.88, "step": 71730, "train_speed(iter/s)": 0.410798 }, { "acc": 0.91549244, "epoch": 1.9423009232935315, "grad_norm": 7.176146984100342, "learning_rate": 5.650490522383244e-06, "loss": 0.41505218, "memory(GiB)": 34.88, "step": 71735, "train_speed(iter/s)": 0.410799 }, { "acc": 0.92419348, "epoch": 1.9424363035767471, "grad_norm": 9.199615478515625, "learning_rate": 5.649935703925165e-06, "loss": 0.38031874, "memory(GiB)": 34.88, "step": 71740, "train_speed(iter/s)": 0.410801 }, { "acc": 0.90760212, "epoch": 1.9425716838599625, "grad_norm": 10.39315414428711, "learning_rate": 5.649380877331628e-06, "loss": 0.53411684, "memory(GiB)": 34.88, "step": 71745, "train_speed(iter/s)": 0.410803 }, { "acc": 0.92357702, "epoch": 1.9427070641431783, "grad_norm": 7.200096130371094, "learning_rate": 5.648826042609582e-06, "loss": 0.42838783, "memory(GiB)": 34.88, "step": 71750, "train_speed(iter/s)": 0.410804 }, { "acc": 0.93819838, "epoch": 1.9428424444263936, "grad_norm": 10.445565223693848, "learning_rate": 5.648271199765979e-06, "loss": 0.32863131, "memory(GiB)": 34.88, "step": 71755, "train_speed(iter/s)": 0.410806 }, { "acc": 0.92466431, "epoch": 1.9429778247096094, "grad_norm": 6.734112739562988, "learning_rate": 5.647716348807768e-06, "loss": 0.42565775, "memory(GiB)": 34.88, "step": 71760, "train_speed(iter/s)": 0.410808 }, { "acc": 0.902528, "epoch": 1.9431132049928248, "grad_norm": 11.231856346130371, "learning_rate": 5.647161489741901e-06, "loss": 0.57272186, "memory(GiB)": 34.88, "step": 71765, "train_speed(iter/s)": 0.410809 }, { "acc": 0.92555799, "epoch": 1.9432485852760404, "grad_norm": 4.473602294921875, "learning_rate": 5.646606622575327e-06, "loss": 0.33333006, "memory(GiB)": 34.88, "step": 71770, "train_speed(iter/s)": 0.410811 }, { "acc": 0.91415653, "epoch": 1.943383965559256, "grad_norm": 6.547257423400879, "learning_rate": 5.646051747315e-06, "loss": 0.44761882, "memory(GiB)": 34.88, "step": 71775, "train_speed(iter/s)": 0.410812 }, { "acc": 0.91211281, "epoch": 1.9435193458424715, "grad_norm": 7.695331573486328, "learning_rate": 5.645496863967868e-06, "loss": 0.43619733, "memory(GiB)": 34.88, "step": 71780, "train_speed(iter/s)": 0.410814 }, { "acc": 0.90941639, "epoch": 1.9436547261256871, "grad_norm": 11.210928916931152, "learning_rate": 5.644941972540882e-06, "loss": 0.50562782, "memory(GiB)": 34.88, "step": 71785, "train_speed(iter/s)": 0.410815 }, { "acc": 0.92728767, "epoch": 1.9437901064089025, "grad_norm": 8.075199127197266, "learning_rate": 5.644387073040995e-06, "loss": 0.43220048, "memory(GiB)": 34.88, "step": 71790, "train_speed(iter/s)": 0.410817 }, { "acc": 0.90478535, "epoch": 1.9439254866921183, "grad_norm": 14.086166381835938, "learning_rate": 5.643832165475157e-06, "loss": 0.58614206, "memory(GiB)": 34.88, "step": 71795, "train_speed(iter/s)": 0.410819 }, { "acc": 0.93300304, "epoch": 1.9440608669753336, "grad_norm": 9.335354804992676, "learning_rate": 5.64327724985032e-06, "loss": 0.40141354, "memory(GiB)": 34.88, "step": 71800, "train_speed(iter/s)": 0.41082 }, { "acc": 0.89617901, "epoch": 1.9441962472585492, "grad_norm": 11.44621467590332, "learning_rate": 5.642722326173434e-06, "loss": 0.59412107, "memory(GiB)": 34.88, "step": 71805, "train_speed(iter/s)": 0.410822 }, { "acc": 0.91848793, "epoch": 1.9443316275417648, "grad_norm": 4.997931003570557, "learning_rate": 5.6421673944514525e-06, "loss": 0.41972899, "memory(GiB)": 34.88, "step": 71810, "train_speed(iter/s)": 0.410823 }, { "acc": 0.89923525, "epoch": 1.9444670078249804, "grad_norm": 8.036127090454102, "learning_rate": 5.6416124546913246e-06, "loss": 0.47525997, "memory(GiB)": 34.88, "step": 71815, "train_speed(iter/s)": 0.410825 }, { "acc": 0.91842585, "epoch": 1.944602388108196, "grad_norm": 3.661928415298462, "learning_rate": 5.641057506900004e-06, "loss": 0.46533785, "memory(GiB)": 34.88, "step": 71820, "train_speed(iter/s)": 0.410826 }, { "acc": 0.91989784, "epoch": 1.9447377683914113, "grad_norm": 4.825316429138184, "learning_rate": 5.640502551084442e-06, "loss": 0.46158428, "memory(GiB)": 34.88, "step": 71825, "train_speed(iter/s)": 0.410828 }, { "acc": 0.90900393, "epoch": 1.9448731486746271, "grad_norm": 31.455230712890625, "learning_rate": 5.639947587251591e-06, "loss": 0.42950931, "memory(GiB)": 34.88, "step": 71830, "train_speed(iter/s)": 0.41083 }, { "acc": 0.90114441, "epoch": 1.9450085289578425, "grad_norm": 8.056849479675293, "learning_rate": 5.639392615408403e-06, "loss": 0.56746993, "memory(GiB)": 34.88, "step": 71835, "train_speed(iter/s)": 0.410831 }, { "acc": 0.90832176, "epoch": 1.9451439092410583, "grad_norm": 5.403973579406738, "learning_rate": 5.638837635561827e-06, "loss": 0.57279387, "memory(GiB)": 34.88, "step": 71840, "train_speed(iter/s)": 0.410833 }, { "acc": 0.90330248, "epoch": 1.9452792895242736, "grad_norm": 15.435091972351074, "learning_rate": 5.638282647718819e-06, "loss": 0.61434669, "memory(GiB)": 34.88, "step": 71845, "train_speed(iter/s)": 0.410835 }, { "acc": 0.93326149, "epoch": 1.9454146698074892, "grad_norm": 5.345468044281006, "learning_rate": 5.63772765188633e-06, "loss": 0.32153602, "memory(GiB)": 34.88, "step": 71850, "train_speed(iter/s)": 0.410836 }, { "acc": 0.90742826, "epoch": 1.9455500500907048, "grad_norm": 7.402547359466553, "learning_rate": 5.637172648071313e-06, "loss": 0.5146915, "memory(GiB)": 34.88, "step": 71855, "train_speed(iter/s)": 0.410837 }, { "acc": 0.93253231, "epoch": 1.9456854303739204, "grad_norm": 5.503798961639404, "learning_rate": 5.636617636280719e-06, "loss": 0.35456083, "memory(GiB)": 34.88, "step": 71860, "train_speed(iter/s)": 0.410839 }, { "acc": 0.90432644, "epoch": 1.945820810657136, "grad_norm": 9.849028587341309, "learning_rate": 5.636062616521503e-06, "loss": 0.56082048, "memory(GiB)": 34.88, "step": 71865, "train_speed(iter/s)": 0.41084 }, { "acc": 0.90431185, "epoch": 1.9459561909403513, "grad_norm": 7.637630939483643, "learning_rate": 5.635507588800615e-06, "loss": 0.65112019, "memory(GiB)": 34.88, "step": 71870, "train_speed(iter/s)": 0.410842 }, { "acc": 0.90206289, "epoch": 1.9460915712235671, "grad_norm": 13.825461387634277, "learning_rate": 5.634952553125011e-06, "loss": 0.52165718, "memory(GiB)": 34.88, "step": 71875, "train_speed(iter/s)": 0.410844 }, { "acc": 0.90633869, "epoch": 1.9462269515067825, "grad_norm": 8.84158992767334, "learning_rate": 5.634397509501641e-06, "loss": 0.42991962, "memory(GiB)": 34.88, "step": 71880, "train_speed(iter/s)": 0.410845 }, { "acc": 0.91739311, "epoch": 1.946362331789998, "grad_norm": 6.018726825714111, "learning_rate": 5.633842457937458e-06, "loss": 0.44046593, "memory(GiB)": 34.88, "step": 71885, "train_speed(iter/s)": 0.410847 }, { "acc": 0.92446365, "epoch": 1.9464977120732136, "grad_norm": 4.171277046203613, "learning_rate": 5.633287398439419e-06, "loss": 0.373278, "memory(GiB)": 34.88, "step": 71890, "train_speed(iter/s)": 0.410848 }, { "acc": 0.9108078, "epoch": 1.9466330923564292, "grad_norm": 15.615154266357422, "learning_rate": 5.632732331014472e-06, "loss": 0.52815032, "memory(GiB)": 34.88, "step": 71895, "train_speed(iter/s)": 0.41085 }, { "acc": 0.91927242, "epoch": 1.9467684726396448, "grad_norm": 7.568596363067627, "learning_rate": 5.632177255669573e-06, "loss": 0.49113326, "memory(GiB)": 34.88, "step": 71900, "train_speed(iter/s)": 0.410851 }, { "acc": 0.89686337, "epoch": 1.9469038529228602, "grad_norm": 16.163068771362305, "learning_rate": 5.631622172411676e-06, "loss": 0.54331207, "memory(GiB)": 34.88, "step": 71905, "train_speed(iter/s)": 0.410853 }, { "acc": 0.91726761, "epoch": 1.947039233206076, "grad_norm": 11.764860153198242, "learning_rate": 5.631067081247733e-06, "loss": 0.42942042, "memory(GiB)": 34.88, "step": 71910, "train_speed(iter/s)": 0.410855 }, { "acc": 0.91850204, "epoch": 1.9471746134892913, "grad_norm": 10.362103462219238, "learning_rate": 5.6305119821847e-06, "loss": 0.41257687, "memory(GiB)": 34.88, "step": 71915, "train_speed(iter/s)": 0.410856 }, { "acc": 0.91046543, "epoch": 1.9473099937725071, "grad_norm": 13.101672172546387, "learning_rate": 5.629956875229526e-06, "loss": 0.45820293, "memory(GiB)": 34.88, "step": 71920, "train_speed(iter/s)": 0.410858 }, { "acc": 0.92147961, "epoch": 1.9474453740557225, "grad_norm": 5.959527015686035, "learning_rate": 5.6294017603891704e-06, "loss": 0.41421719, "memory(GiB)": 34.88, "step": 71925, "train_speed(iter/s)": 0.410859 }, { "acc": 0.9075139, "epoch": 1.947580754338938, "grad_norm": 9.893194198608398, "learning_rate": 5.628846637670582e-06, "loss": 0.51702557, "memory(GiB)": 34.88, "step": 71930, "train_speed(iter/s)": 0.410861 }, { "acc": 0.92347431, "epoch": 1.9477161346221536, "grad_norm": 13.72963809967041, "learning_rate": 5.628291507080719e-06, "loss": 0.44662232, "memory(GiB)": 34.88, "step": 71935, "train_speed(iter/s)": 0.410862 }, { "acc": 0.93735676, "epoch": 1.9478515149053692, "grad_norm": 7.794275760650635, "learning_rate": 5.627736368626534e-06, "loss": 0.34383092, "memory(GiB)": 34.88, "step": 71940, "train_speed(iter/s)": 0.410864 }, { "acc": 0.91164722, "epoch": 1.9479868951885848, "grad_norm": 15.65433120727539, "learning_rate": 5.627181222314979e-06, "loss": 0.46424618, "memory(GiB)": 34.88, "step": 71945, "train_speed(iter/s)": 0.410865 }, { "acc": 0.91518192, "epoch": 1.9481222754718002, "grad_norm": 5.944410800933838, "learning_rate": 5.626626068153012e-06, "loss": 0.51190143, "memory(GiB)": 34.88, "step": 71950, "train_speed(iter/s)": 0.410867 }, { "acc": 0.91440601, "epoch": 1.948257655755016, "grad_norm": 7.41021728515625, "learning_rate": 5.626070906147585e-06, "loss": 0.40668535, "memory(GiB)": 34.88, "step": 71955, "train_speed(iter/s)": 0.410869 }, { "acc": 0.91517982, "epoch": 1.9483930360382313, "grad_norm": 16.29928207397461, "learning_rate": 5.625515736305654e-06, "loss": 0.53518362, "memory(GiB)": 34.88, "step": 71960, "train_speed(iter/s)": 0.41087 }, { "acc": 0.90768709, "epoch": 1.948528416321447, "grad_norm": 9.401281356811523, "learning_rate": 5.624960558634172e-06, "loss": 0.5022903, "memory(GiB)": 34.88, "step": 71965, "train_speed(iter/s)": 0.410872 }, { "acc": 0.93204212, "epoch": 1.9486637966046625, "grad_norm": 6.17862606048584, "learning_rate": 5.624405373140095e-06, "loss": 0.32005577, "memory(GiB)": 34.88, "step": 71970, "train_speed(iter/s)": 0.410874 }, { "acc": 0.9003603, "epoch": 1.948799176887878, "grad_norm": 5.713304042816162, "learning_rate": 5.6238501798303755e-06, "loss": 0.55436153, "memory(GiB)": 34.88, "step": 71975, "train_speed(iter/s)": 0.410875 }, { "acc": 0.92705584, "epoch": 1.9489345571710937, "grad_norm": 8.874825477600098, "learning_rate": 5.62329497871197e-06, "loss": 0.43786335, "memory(GiB)": 34.88, "step": 71980, "train_speed(iter/s)": 0.410877 }, { "acc": 0.9178091, "epoch": 1.949069937454309, "grad_norm": 6.645376682281494, "learning_rate": 5.622739769791837e-06, "loss": 0.41242256, "memory(GiB)": 34.88, "step": 71985, "train_speed(iter/s)": 0.410878 }, { "acc": 0.90867395, "epoch": 1.9492053177375248, "grad_norm": 7.34419059753418, "learning_rate": 5.6221845530769245e-06, "loss": 0.48953056, "memory(GiB)": 34.88, "step": 71990, "train_speed(iter/s)": 0.41088 }, { "acc": 0.94173231, "epoch": 1.9493406980207402, "grad_norm": 3.2673611640930176, "learning_rate": 5.6216293285741945e-06, "loss": 0.29999287, "memory(GiB)": 34.88, "step": 71995, "train_speed(iter/s)": 0.410881 }, { "acc": 0.92582874, "epoch": 1.949476078303956, "grad_norm": 10.188399314880371, "learning_rate": 5.621074096290597e-06, "loss": 0.43005276, "memory(GiB)": 34.88, "step": 72000, "train_speed(iter/s)": 0.410883 }, { "acc": 0.92106152, "epoch": 1.9496114585871713, "grad_norm": 9.094581604003906, "learning_rate": 5.620518856233091e-06, "loss": 0.47507486, "memory(GiB)": 34.88, "step": 72005, "train_speed(iter/s)": 0.410885 }, { "acc": 0.92319393, "epoch": 1.949746838870387, "grad_norm": 5.478862285614014, "learning_rate": 5.6199636084086296e-06, "loss": 0.35026591, "memory(GiB)": 34.88, "step": 72010, "train_speed(iter/s)": 0.410886 }, { "acc": 0.91334944, "epoch": 1.9498822191536025, "grad_norm": 14.253142356872559, "learning_rate": 5.6194083528241696e-06, "loss": 0.44288163, "memory(GiB)": 34.88, "step": 72015, "train_speed(iter/s)": 0.410888 }, { "acc": 0.90709553, "epoch": 1.950017599436818, "grad_norm": 6.491940975189209, "learning_rate": 5.618853089486665e-06, "loss": 0.48042698, "memory(GiB)": 34.88, "step": 72020, "train_speed(iter/s)": 0.410889 }, { "acc": 0.92996473, "epoch": 1.9501529797200337, "grad_norm": 7.506862163543701, "learning_rate": 5.618297818403074e-06, "loss": 0.42333579, "memory(GiB)": 34.88, "step": 72025, "train_speed(iter/s)": 0.410891 }, { "acc": 0.91605825, "epoch": 1.950288360003249, "grad_norm": 7.447164535522461, "learning_rate": 5.617742539580351e-06, "loss": 0.54895649, "memory(GiB)": 34.88, "step": 72030, "train_speed(iter/s)": 0.410892 }, { "acc": 0.92155609, "epoch": 1.9504237402864648, "grad_norm": 10.290441513061523, "learning_rate": 5.617187253025452e-06, "loss": 0.39415905, "memory(GiB)": 34.88, "step": 72035, "train_speed(iter/s)": 0.410894 }, { "acc": 0.92827225, "epoch": 1.9505591205696802, "grad_norm": 6.569644927978516, "learning_rate": 5.6166319587453354e-06, "loss": 0.35611105, "memory(GiB)": 34.88, "step": 72040, "train_speed(iter/s)": 0.410895 }, { "acc": 0.92864285, "epoch": 1.9506945008528958, "grad_norm": 5.166143417358398, "learning_rate": 5.616076656746954e-06, "loss": 0.4497838, "memory(GiB)": 34.88, "step": 72045, "train_speed(iter/s)": 0.410897 }, { "acc": 0.90698824, "epoch": 1.9508298811361113, "grad_norm": 10.00222110748291, "learning_rate": 5.615521347037265e-06, "loss": 0.45760045, "memory(GiB)": 34.88, "step": 72050, "train_speed(iter/s)": 0.410898 }, { "acc": 0.92802563, "epoch": 1.950965261419327, "grad_norm": 2.8317155838012695, "learning_rate": 5.614966029623226e-06, "loss": 0.34387896, "memory(GiB)": 34.88, "step": 72055, "train_speed(iter/s)": 0.4109 }, { "acc": 0.90162897, "epoch": 1.9511006417025425, "grad_norm": 8.989034652709961, "learning_rate": 5.61441070451179e-06, "loss": 0.60224123, "memory(GiB)": 34.88, "step": 72060, "train_speed(iter/s)": 0.410901 }, { "acc": 0.91443157, "epoch": 1.9512360219857579, "grad_norm": 13.030416488647461, "learning_rate": 5.61385537170992e-06, "loss": 0.47702107, "memory(GiB)": 34.88, "step": 72065, "train_speed(iter/s)": 0.410903 }, { "acc": 0.91659927, "epoch": 1.9513714022689737, "grad_norm": 10.626998901367188, "learning_rate": 5.613300031224565e-06, "loss": 0.43719735, "memory(GiB)": 34.88, "step": 72070, "train_speed(iter/s)": 0.410904 }, { "acc": 0.9213398, "epoch": 1.951506782552189, "grad_norm": 9.681955337524414, "learning_rate": 5.612744683062689e-06, "loss": 0.47815056, "memory(GiB)": 34.88, "step": 72075, "train_speed(iter/s)": 0.410906 }, { "acc": 0.91658964, "epoch": 1.9516421628354048, "grad_norm": 5.682193279266357, "learning_rate": 5.612189327231242e-06, "loss": 0.38064208, "memory(GiB)": 34.88, "step": 72080, "train_speed(iter/s)": 0.410907 }, { "acc": 0.90972691, "epoch": 1.9517775431186202, "grad_norm": 5.7794575691223145, "learning_rate": 5.611633963737187e-06, "loss": 0.43790684, "memory(GiB)": 34.88, "step": 72085, "train_speed(iter/s)": 0.410909 }, { "acc": 0.91414948, "epoch": 1.9519129234018358, "grad_norm": 8.551109313964844, "learning_rate": 5.6110785925874764e-06, "loss": 0.41977639, "memory(GiB)": 34.88, "step": 72090, "train_speed(iter/s)": 0.41091 }, { "acc": 0.91583939, "epoch": 1.9520483036850513, "grad_norm": 5.326061725616455, "learning_rate": 5.61052321378907e-06, "loss": 0.44257789, "memory(GiB)": 34.88, "step": 72095, "train_speed(iter/s)": 0.410912 }, { "acc": 0.89963837, "epoch": 1.952183683968267, "grad_norm": 6.7673468589782715, "learning_rate": 5.609967827348924e-06, "loss": 0.57182474, "memory(GiB)": 34.88, "step": 72100, "train_speed(iter/s)": 0.410913 }, { "acc": 0.90824213, "epoch": 1.9523190642514825, "grad_norm": 7.577733039855957, "learning_rate": 5.609412433273995e-06, "loss": 0.48724971, "memory(GiB)": 34.88, "step": 72105, "train_speed(iter/s)": 0.410915 }, { "acc": 0.92285738, "epoch": 1.9524544445346979, "grad_norm": 21.794300079345703, "learning_rate": 5.608857031571242e-06, "loss": 0.42592216, "memory(GiB)": 34.88, "step": 72110, "train_speed(iter/s)": 0.410916 }, { "acc": 0.88484402, "epoch": 1.9525898248179137, "grad_norm": 8.35853385925293, "learning_rate": 5.608301622247622e-06, "loss": 0.64070921, "memory(GiB)": 34.88, "step": 72115, "train_speed(iter/s)": 0.410918 }, { "acc": 0.90259914, "epoch": 1.952725205101129, "grad_norm": 10.543851852416992, "learning_rate": 5.6077462053100915e-06, "loss": 0.55398822, "memory(GiB)": 34.88, "step": 72120, "train_speed(iter/s)": 0.410919 }, { "acc": 0.91336489, "epoch": 1.9528605853843446, "grad_norm": 7.091681003570557, "learning_rate": 5.607190780765611e-06, "loss": 0.50251598, "memory(GiB)": 34.88, "step": 72125, "train_speed(iter/s)": 0.410921 }, { "acc": 0.90305138, "epoch": 1.9529959656675602, "grad_norm": 9.026449203491211, "learning_rate": 5.606635348621133e-06, "loss": 0.53134146, "memory(GiB)": 34.88, "step": 72130, "train_speed(iter/s)": 0.410922 }, { "acc": 0.92282028, "epoch": 1.9531313459507758, "grad_norm": 7.564903259277344, "learning_rate": 5.606079908883623e-06, "loss": 0.38640792, "memory(GiB)": 34.88, "step": 72135, "train_speed(iter/s)": 0.410924 }, { "acc": 0.92215242, "epoch": 1.9532667262339913, "grad_norm": 5.5379157066345215, "learning_rate": 5.60552446156003e-06, "loss": 0.40754137, "memory(GiB)": 34.88, "step": 72140, "train_speed(iter/s)": 0.410925 }, { "acc": 0.90771847, "epoch": 1.9534021065172067, "grad_norm": 7.918568134307861, "learning_rate": 5.6049690066573206e-06, "loss": 0.4699369, "memory(GiB)": 34.88, "step": 72145, "train_speed(iter/s)": 0.410926 }, { "acc": 0.91501789, "epoch": 1.9535374868004225, "grad_norm": 10.580745697021484, "learning_rate": 5.604413544182447e-06, "loss": 0.47423687, "memory(GiB)": 34.88, "step": 72150, "train_speed(iter/s)": 0.410928 }, { "acc": 0.89743795, "epoch": 1.9536728670836379, "grad_norm": 6.085945129394531, "learning_rate": 5.603858074142371e-06, "loss": 0.58767123, "memory(GiB)": 34.88, "step": 72155, "train_speed(iter/s)": 0.410929 }, { "acc": 0.90951338, "epoch": 1.9538082473668537, "grad_norm": 13.995838165283203, "learning_rate": 5.603302596544047e-06, "loss": 0.4826509, "memory(GiB)": 34.88, "step": 72160, "train_speed(iter/s)": 0.410931 }, { "acc": 0.92024078, "epoch": 1.953943627650069, "grad_norm": 12.689321517944336, "learning_rate": 5.60274711139444e-06, "loss": 0.51764951, "memory(GiB)": 34.88, "step": 72165, "train_speed(iter/s)": 0.410933 }, { "acc": 0.9110961, "epoch": 1.9540790079332846, "grad_norm": 7.522601127624512, "learning_rate": 5.602191618700501e-06, "loss": 0.42519631, "memory(GiB)": 34.88, "step": 72170, "train_speed(iter/s)": 0.410934 }, { "acc": 0.91890593, "epoch": 1.9542143882165002, "grad_norm": 6.667364120483398, "learning_rate": 5.601636118469193e-06, "loss": 0.39580245, "memory(GiB)": 34.88, "step": 72175, "train_speed(iter/s)": 0.410936 }, { "acc": 0.91611347, "epoch": 1.9543497684997158, "grad_norm": 8.27250862121582, "learning_rate": 5.601080610707474e-06, "loss": 0.44732456, "memory(GiB)": 34.88, "step": 72180, "train_speed(iter/s)": 0.410938 }, { "acc": 0.92730656, "epoch": 1.9544851487829313, "grad_norm": 4.9287309646606445, "learning_rate": 5.600525095422304e-06, "loss": 0.41312265, "memory(GiB)": 34.88, "step": 72185, "train_speed(iter/s)": 0.410939 }, { "acc": 0.92685337, "epoch": 1.9546205290661467, "grad_norm": 6.8400959968566895, "learning_rate": 5.59996957262064e-06, "loss": 0.36608844, "memory(GiB)": 34.88, "step": 72190, "train_speed(iter/s)": 0.410941 }, { "acc": 0.91806211, "epoch": 1.9547559093493625, "grad_norm": 27.257225036621094, "learning_rate": 5.59941404230944e-06, "loss": 0.55559869, "memory(GiB)": 34.88, "step": 72195, "train_speed(iter/s)": 0.410942 }, { "acc": 0.91876535, "epoch": 1.9548912896325779, "grad_norm": 3.6351494789123535, "learning_rate": 5.598858504495666e-06, "loss": 0.39031305, "memory(GiB)": 34.88, "step": 72200, "train_speed(iter/s)": 0.410943 }, { "acc": 0.91643925, "epoch": 1.9550266699157934, "grad_norm": 10.96828842163086, "learning_rate": 5.598302959186275e-06, "loss": 0.45923328, "memory(GiB)": 34.88, "step": 72205, "train_speed(iter/s)": 0.410945 }, { "acc": 0.91057339, "epoch": 1.955162050199009, "grad_norm": 7.513296127319336, "learning_rate": 5.597747406388228e-06, "loss": 0.45671258, "memory(GiB)": 34.88, "step": 72210, "train_speed(iter/s)": 0.410947 }, { "acc": 0.91888561, "epoch": 1.9552974304822246, "grad_norm": 7.2553486824035645, "learning_rate": 5.597191846108485e-06, "loss": 0.39513955, "memory(GiB)": 34.88, "step": 72215, "train_speed(iter/s)": 0.410948 }, { "acc": 0.89885082, "epoch": 1.9554328107654402, "grad_norm": 4.98555850982666, "learning_rate": 5.596636278354003e-06, "loss": 0.52730532, "memory(GiB)": 34.88, "step": 72220, "train_speed(iter/s)": 0.410949 }, { "acc": 0.92550774, "epoch": 1.9555681910486555, "grad_norm": 8.32841682434082, "learning_rate": 5.596080703131743e-06, "loss": 0.38617461, "memory(GiB)": 34.88, "step": 72225, "train_speed(iter/s)": 0.410951 }, { "acc": 0.92079573, "epoch": 1.9557035713318713, "grad_norm": 7.703598976135254, "learning_rate": 5.595525120448664e-06, "loss": 0.49115868, "memory(GiB)": 34.88, "step": 72230, "train_speed(iter/s)": 0.410953 }, { "acc": 0.92462435, "epoch": 1.9558389516150867, "grad_norm": 6.046196937561035, "learning_rate": 5.594969530311726e-06, "loss": 0.41733904, "memory(GiB)": 34.88, "step": 72235, "train_speed(iter/s)": 0.410954 }, { "acc": 0.91375713, "epoch": 1.9559743318983023, "grad_norm": 6.52655553817749, "learning_rate": 5.594413932727888e-06, "loss": 0.46085143, "memory(GiB)": 34.88, "step": 72240, "train_speed(iter/s)": 0.410956 }, { "acc": 0.91023521, "epoch": 1.9561097121815179, "grad_norm": 4.405921459197998, "learning_rate": 5.593858327704112e-06, "loss": 0.5057333, "memory(GiB)": 34.88, "step": 72245, "train_speed(iter/s)": 0.410957 }, { "acc": 0.93399019, "epoch": 1.9562450924647334, "grad_norm": 8.10010814666748, "learning_rate": 5.593302715247358e-06, "loss": 0.33779583, "memory(GiB)": 34.88, "step": 72250, "train_speed(iter/s)": 0.410959 }, { "acc": 0.91165094, "epoch": 1.956380472747949, "grad_norm": 6.9785027503967285, "learning_rate": 5.592747095364584e-06, "loss": 0.50538869, "memory(GiB)": 34.88, "step": 72255, "train_speed(iter/s)": 0.41096 }, { "acc": 0.92316532, "epoch": 1.9565158530311644, "grad_norm": 7.6780853271484375, "learning_rate": 5.592191468062752e-06, "loss": 0.46882353, "memory(GiB)": 34.88, "step": 72260, "train_speed(iter/s)": 0.410962 }, { "acc": 0.90706835, "epoch": 1.9566512333143802, "grad_norm": 9.588905334472656, "learning_rate": 5.591635833348821e-06, "loss": 0.58345098, "memory(GiB)": 34.88, "step": 72265, "train_speed(iter/s)": 0.410963 }, { "acc": 0.89253998, "epoch": 1.9567866135975955, "grad_norm": 12.767617225646973, "learning_rate": 5.591080191229753e-06, "loss": 0.68684077, "memory(GiB)": 34.88, "step": 72270, "train_speed(iter/s)": 0.410965 }, { "acc": 0.90546083, "epoch": 1.9569219938808113, "grad_norm": 8.440658569335938, "learning_rate": 5.590524541712506e-06, "loss": 0.49912124, "memory(GiB)": 34.88, "step": 72275, "train_speed(iter/s)": 0.410966 }, { "acc": 0.92305174, "epoch": 1.9570573741640267, "grad_norm": 9.912542343139648, "learning_rate": 5.5899688848040435e-06, "loss": 0.45490255, "memory(GiB)": 34.88, "step": 72280, "train_speed(iter/s)": 0.410968 }, { "acc": 0.90638618, "epoch": 1.9571927544472423, "grad_norm": 13.433096885681152, "learning_rate": 5.589413220511325e-06, "loss": 0.45265832, "memory(GiB)": 34.88, "step": 72285, "train_speed(iter/s)": 0.410969 }, { "acc": 0.9149395, "epoch": 1.9573281347304579, "grad_norm": 6.7351837158203125, "learning_rate": 5.588857548841312e-06, "loss": 0.42432728, "memory(GiB)": 34.88, "step": 72290, "train_speed(iter/s)": 0.410971 }, { "acc": 0.92579298, "epoch": 1.9574635150136734, "grad_norm": 5.57674503326416, "learning_rate": 5.588301869800964e-06, "loss": 0.42270207, "memory(GiB)": 34.88, "step": 72295, "train_speed(iter/s)": 0.410973 }, { "acc": 0.92389383, "epoch": 1.957598895296889, "grad_norm": 9.0237398147583, "learning_rate": 5.587746183397241e-06, "loss": 0.38745303, "memory(GiB)": 34.88, "step": 72300, "train_speed(iter/s)": 0.410974 }, { "acc": 0.92684631, "epoch": 1.9577342755801044, "grad_norm": 6.16483211517334, "learning_rate": 5.587190489637106e-06, "loss": 0.37088091, "memory(GiB)": 34.88, "step": 72305, "train_speed(iter/s)": 0.410976 }, { "acc": 0.90736923, "epoch": 1.9578696558633202, "grad_norm": 5.926167011260986, "learning_rate": 5.586634788527521e-06, "loss": 0.44641309, "memory(GiB)": 34.88, "step": 72310, "train_speed(iter/s)": 0.410977 }, { "acc": 0.8990901, "epoch": 1.9580050361465355, "grad_norm": 10.632433891296387, "learning_rate": 5.586079080075445e-06, "loss": 0.59323816, "memory(GiB)": 34.88, "step": 72315, "train_speed(iter/s)": 0.410979 }, { "acc": 0.92252293, "epoch": 1.9581404164297511, "grad_norm": 15.572674751281738, "learning_rate": 5.585523364287842e-06, "loss": 0.41850228, "memory(GiB)": 34.88, "step": 72320, "train_speed(iter/s)": 0.410981 }, { "acc": 0.92143059, "epoch": 1.9582757967129667, "grad_norm": 4.130681037902832, "learning_rate": 5.584967641171669e-06, "loss": 0.402634, "memory(GiB)": 34.88, "step": 72325, "train_speed(iter/s)": 0.410982 }, { "acc": 0.91836958, "epoch": 1.9584111769961823, "grad_norm": 15.766992568969727, "learning_rate": 5.584411910733893e-06, "loss": 0.44621453, "memory(GiB)": 34.88, "step": 72330, "train_speed(iter/s)": 0.410984 }, { "acc": 0.9122736, "epoch": 1.9585465572793979, "grad_norm": 6.574179649353027, "learning_rate": 5.583856172981472e-06, "loss": 0.48559279, "memory(GiB)": 34.88, "step": 72335, "train_speed(iter/s)": 0.410985 }, { "acc": 0.91751251, "epoch": 1.9586819375626132, "grad_norm": 9.235548973083496, "learning_rate": 5.583300427921369e-06, "loss": 0.46788979, "memory(GiB)": 34.88, "step": 72340, "train_speed(iter/s)": 0.410986 }, { "acc": 0.92974224, "epoch": 1.958817317845829, "grad_norm": 4.490020751953125, "learning_rate": 5.582744675560544e-06, "loss": 0.38257122, "memory(GiB)": 34.88, "step": 72345, "train_speed(iter/s)": 0.410988 }, { "acc": 0.92023849, "epoch": 1.9589526981290444, "grad_norm": 6.858920574188232, "learning_rate": 5.58218891590596e-06, "loss": 0.46354837, "memory(GiB)": 34.88, "step": 72350, "train_speed(iter/s)": 0.410989 }, { "acc": 0.92112389, "epoch": 1.9590880784122602, "grad_norm": 8.776268005371094, "learning_rate": 5.581633148964581e-06, "loss": 0.48074617, "memory(GiB)": 34.88, "step": 72355, "train_speed(iter/s)": 0.410991 }, { "acc": 0.92054176, "epoch": 1.9592234586954755, "grad_norm": 8.49212646484375, "learning_rate": 5.581077374743368e-06, "loss": 0.53320799, "memory(GiB)": 34.88, "step": 72360, "train_speed(iter/s)": 0.410993 }, { "acc": 0.9096611, "epoch": 1.9593588389786911, "grad_norm": 8.528672218322754, "learning_rate": 5.580521593249281e-06, "loss": 0.48567066, "memory(GiB)": 34.88, "step": 72365, "train_speed(iter/s)": 0.410994 }, { "acc": 0.90593805, "epoch": 1.9594942192619067, "grad_norm": 12.930295944213867, "learning_rate": 5.579965804489284e-06, "loss": 0.53976793, "memory(GiB)": 34.88, "step": 72370, "train_speed(iter/s)": 0.410996 }, { "acc": 0.90587254, "epoch": 1.9596295995451223, "grad_norm": 10.070234298706055, "learning_rate": 5.579410008470341e-06, "loss": 0.53392382, "memory(GiB)": 34.88, "step": 72375, "train_speed(iter/s)": 0.410997 }, { "acc": 0.91470289, "epoch": 1.9597649798283379, "grad_norm": 14.395696640014648, "learning_rate": 5.57885420519941e-06, "loss": 0.43424892, "memory(GiB)": 34.88, "step": 72380, "train_speed(iter/s)": 0.410998 }, { "acc": 0.9047308, "epoch": 1.9599003601115532, "grad_norm": 7.181760311126709, "learning_rate": 5.578298394683457e-06, "loss": 0.49288816, "memory(GiB)": 34.88, "step": 72385, "train_speed(iter/s)": 0.411 }, { "acc": 0.91024618, "epoch": 1.960035740394769, "grad_norm": 10.363999366760254, "learning_rate": 5.5777425769294444e-06, "loss": 0.47665124, "memory(GiB)": 34.88, "step": 72390, "train_speed(iter/s)": 0.411001 }, { "acc": 0.92371016, "epoch": 1.9601711206779844, "grad_norm": 11.286377906799316, "learning_rate": 5.577186751944334e-06, "loss": 0.40276117, "memory(GiB)": 34.88, "step": 72395, "train_speed(iter/s)": 0.411003 }, { "acc": 0.92138977, "epoch": 1.9603065009612, "grad_norm": 12.3565092086792, "learning_rate": 5.5766309197350875e-06, "loss": 0.43232379, "memory(GiB)": 34.88, "step": 72400, "train_speed(iter/s)": 0.411005 }, { "acc": 0.91953716, "epoch": 1.9604418812444155, "grad_norm": 4.525123596191406, "learning_rate": 5.576075080308671e-06, "loss": 0.43885078, "memory(GiB)": 34.88, "step": 72405, "train_speed(iter/s)": 0.411006 }, { "acc": 0.9298008, "epoch": 1.9605772615276311, "grad_norm": 6.564281940460205, "learning_rate": 5.5755192336720444e-06, "loss": 0.34918137, "memory(GiB)": 34.88, "step": 72410, "train_speed(iter/s)": 0.411008 }, { "acc": 0.91461487, "epoch": 1.9607126418108467, "grad_norm": 6.059726238250732, "learning_rate": 5.574963379832171e-06, "loss": 0.54152384, "memory(GiB)": 34.88, "step": 72415, "train_speed(iter/s)": 0.411009 }, { "acc": 0.90369205, "epoch": 1.960848022094062, "grad_norm": 6.102774143218994, "learning_rate": 5.574407518796019e-06, "loss": 0.57484431, "memory(GiB)": 34.88, "step": 72420, "train_speed(iter/s)": 0.411011 }, { "acc": 0.91419239, "epoch": 1.9609834023772779, "grad_norm": 4.023983955383301, "learning_rate": 5.573851650570544e-06, "loss": 0.47997632, "memory(GiB)": 34.88, "step": 72425, "train_speed(iter/s)": 0.411012 }, { "acc": 0.91613903, "epoch": 1.9611187826604932, "grad_norm": 10.80958080291748, "learning_rate": 5.5732957751627134e-06, "loss": 0.51634903, "memory(GiB)": 34.88, "step": 72430, "train_speed(iter/s)": 0.411014 }, { "acc": 0.91511755, "epoch": 1.961254162943709, "grad_norm": 10.60827922821045, "learning_rate": 5.57273989257949e-06, "loss": 0.57099504, "memory(GiB)": 34.88, "step": 72435, "train_speed(iter/s)": 0.411015 }, { "acc": 0.90825386, "epoch": 1.9613895432269244, "grad_norm": 6.195457935333252, "learning_rate": 5.572184002827838e-06, "loss": 0.48217001, "memory(GiB)": 34.88, "step": 72440, "train_speed(iter/s)": 0.411017 }, { "acc": 0.89967308, "epoch": 1.96152492351014, "grad_norm": 26.679258346557617, "learning_rate": 5.571628105914722e-06, "loss": 0.54858494, "memory(GiB)": 34.88, "step": 72445, "train_speed(iter/s)": 0.411018 }, { "acc": 0.91526184, "epoch": 1.9616603037933555, "grad_norm": 6.634458541870117, "learning_rate": 5.571072201847102e-06, "loss": 0.45416651, "memory(GiB)": 34.88, "step": 72450, "train_speed(iter/s)": 0.41102 }, { "acc": 0.90757351, "epoch": 1.9617956840765711, "grad_norm": 3.776289463043213, "learning_rate": 5.570516290631947e-06, "loss": 0.46683545, "memory(GiB)": 34.88, "step": 72455, "train_speed(iter/s)": 0.411021 }, { "acc": 0.90318432, "epoch": 1.9619310643597867, "grad_norm": 12.696186065673828, "learning_rate": 5.569960372276214e-06, "loss": 0.56776795, "memory(GiB)": 34.88, "step": 72460, "train_speed(iter/s)": 0.411023 }, { "acc": 0.90292931, "epoch": 1.962066444643002, "grad_norm": 11.75072956085205, "learning_rate": 5.569404446786874e-06, "loss": 0.4967804, "memory(GiB)": 34.88, "step": 72465, "train_speed(iter/s)": 0.411024 }, { "acc": 0.91459875, "epoch": 1.9622018249262179, "grad_norm": 10.791891098022461, "learning_rate": 5.568848514170886e-06, "loss": 0.41172657, "memory(GiB)": 34.88, "step": 72470, "train_speed(iter/s)": 0.411026 }, { "acc": 0.9197978, "epoch": 1.9623372052094332, "grad_norm": 13.424273490905762, "learning_rate": 5.568292574435217e-06, "loss": 0.40578132, "memory(GiB)": 34.88, "step": 72475, "train_speed(iter/s)": 0.411027 }, { "acc": 0.90779476, "epoch": 1.9624725854926488, "grad_norm": 7.413901329040527, "learning_rate": 5.5677366275868296e-06, "loss": 0.49684672, "memory(GiB)": 34.88, "step": 72480, "train_speed(iter/s)": 0.411029 }, { "acc": 0.89820414, "epoch": 1.9626079657758644, "grad_norm": 5.437154769897461, "learning_rate": 5.567180673632688e-06, "loss": 0.50554175, "memory(GiB)": 34.88, "step": 72485, "train_speed(iter/s)": 0.41103 }, { "acc": 0.92456665, "epoch": 1.96274334605908, "grad_norm": 10.610698699951172, "learning_rate": 5.56662471257976e-06, "loss": 0.45006266, "memory(GiB)": 34.88, "step": 72490, "train_speed(iter/s)": 0.411031 }, { "acc": 0.90175896, "epoch": 1.9628787263422955, "grad_norm": 7.968721866607666, "learning_rate": 5.566068744435005e-06, "loss": 0.41979303, "memory(GiB)": 34.88, "step": 72495, "train_speed(iter/s)": 0.411033 }, { "acc": 0.9154294, "epoch": 1.963014106625511, "grad_norm": 7.038567066192627, "learning_rate": 5.5655127692053915e-06, "loss": 0.48740883, "memory(GiB)": 34.88, "step": 72500, "train_speed(iter/s)": 0.411035 }, { "acc": 0.91244011, "epoch": 1.9631494869087267, "grad_norm": 6.094086170196533, "learning_rate": 5.564956786897883e-06, "loss": 0.54233179, "memory(GiB)": 34.88, "step": 72505, "train_speed(iter/s)": 0.411036 }, { "acc": 0.92584019, "epoch": 1.963284867191942, "grad_norm": 9.607439041137695, "learning_rate": 5.564400797519444e-06, "loss": 0.37615285, "memory(GiB)": 34.88, "step": 72510, "train_speed(iter/s)": 0.411037 }, { "acc": 0.89560032, "epoch": 1.9634202474751579, "grad_norm": 9.579087257385254, "learning_rate": 5.563844801077039e-06, "loss": 0.51429482, "memory(GiB)": 34.88, "step": 72515, "train_speed(iter/s)": 0.411039 }, { "acc": 0.91777477, "epoch": 1.9635556277583732, "grad_norm": 15.59404182434082, "learning_rate": 5.563288797577633e-06, "loss": 0.49908485, "memory(GiB)": 34.88, "step": 72520, "train_speed(iter/s)": 0.41104 }, { "acc": 0.90283947, "epoch": 1.9636910080415888, "grad_norm": 29.134122848510742, "learning_rate": 5.562732787028192e-06, "loss": 0.47539239, "memory(GiB)": 34.88, "step": 72525, "train_speed(iter/s)": 0.411042 }, { "acc": 0.90709038, "epoch": 1.9638263883248044, "grad_norm": 6.825927257537842, "learning_rate": 5.56217676943568e-06, "loss": 0.60890188, "memory(GiB)": 34.88, "step": 72530, "train_speed(iter/s)": 0.411044 }, { "acc": 0.92424126, "epoch": 1.96396176860802, "grad_norm": 8.047344207763672, "learning_rate": 5.561620744807063e-06, "loss": 0.46107593, "memory(GiB)": 34.88, "step": 72535, "train_speed(iter/s)": 0.411044 }, { "acc": 0.91564713, "epoch": 1.9640971488912355, "grad_norm": 6.289666175842285, "learning_rate": 5.561064713149307e-06, "loss": 0.44406686, "memory(GiB)": 34.88, "step": 72540, "train_speed(iter/s)": 0.411046 }, { "acc": 0.92146635, "epoch": 1.964232529174451, "grad_norm": 8.083953857421875, "learning_rate": 5.560508674469376e-06, "loss": 0.48711534, "memory(GiB)": 34.88, "step": 72545, "train_speed(iter/s)": 0.411048 }, { "acc": 0.9029315, "epoch": 1.9643679094576667, "grad_norm": 11.197494506835938, "learning_rate": 5.559952628774235e-06, "loss": 0.66935143, "memory(GiB)": 34.88, "step": 72550, "train_speed(iter/s)": 0.411049 }, { "acc": 0.91609421, "epoch": 1.964503289740882, "grad_norm": 11.909745216369629, "learning_rate": 5.559396576070851e-06, "loss": 0.48911762, "memory(GiB)": 34.88, "step": 72555, "train_speed(iter/s)": 0.411051 }, { "acc": 0.89407444, "epoch": 1.9646386700240976, "grad_norm": 14.386940002441406, "learning_rate": 5.558840516366191e-06, "loss": 0.60521336, "memory(GiB)": 34.88, "step": 72560, "train_speed(iter/s)": 0.411053 }, { "acc": 0.90737896, "epoch": 1.9647740503073132, "grad_norm": 7.676341533660889, "learning_rate": 5.558284449667215e-06, "loss": 0.49044199, "memory(GiB)": 34.88, "step": 72565, "train_speed(iter/s)": 0.411054 }, { "acc": 0.89557953, "epoch": 1.9649094305905288, "grad_norm": 21.91895866394043, "learning_rate": 5.557728375980895e-06, "loss": 0.57594976, "memory(GiB)": 34.88, "step": 72570, "train_speed(iter/s)": 0.411055 }, { "acc": 0.91569633, "epoch": 1.9650448108737444, "grad_norm": 7.787173748016357, "learning_rate": 5.557172295314195e-06, "loss": 0.48008637, "memory(GiB)": 34.88, "step": 72575, "train_speed(iter/s)": 0.411057 }, { "acc": 0.92025108, "epoch": 1.9651801911569597, "grad_norm": 4.951420783996582, "learning_rate": 5.556616207674079e-06, "loss": 0.37752352, "memory(GiB)": 34.88, "step": 72580, "train_speed(iter/s)": 0.411058 }, { "acc": 0.92099285, "epoch": 1.9653155714401755, "grad_norm": 14.38686752319336, "learning_rate": 5.556060113067518e-06, "loss": 0.41474972, "memory(GiB)": 34.88, "step": 72585, "train_speed(iter/s)": 0.41106 }, { "acc": 0.90527401, "epoch": 1.965450951723391, "grad_norm": 4.34271240234375, "learning_rate": 5.555504011501474e-06, "loss": 0.51300263, "memory(GiB)": 34.88, "step": 72590, "train_speed(iter/s)": 0.411061 }, { "acc": 0.90569038, "epoch": 1.9655863320066067, "grad_norm": 9.693333625793457, "learning_rate": 5.554947902982911e-06, "loss": 0.5107172, "memory(GiB)": 34.88, "step": 72595, "train_speed(iter/s)": 0.411063 }, { "acc": 0.92172642, "epoch": 1.965721712289822, "grad_norm": 12.073613166809082, "learning_rate": 5.554391787518801e-06, "loss": 0.45328541, "memory(GiB)": 34.88, "step": 72600, "train_speed(iter/s)": 0.411064 }, { "acc": 0.89999247, "epoch": 1.9658570925730376, "grad_norm": 7.15146541595459, "learning_rate": 5.553835665116107e-06, "loss": 0.57423115, "memory(GiB)": 34.88, "step": 72605, "train_speed(iter/s)": 0.411066 }, { "acc": 0.89730806, "epoch": 1.9659924728562532, "grad_norm": 13.82519245147705, "learning_rate": 5.553279535781797e-06, "loss": 0.56586685, "memory(GiB)": 34.88, "step": 72610, "train_speed(iter/s)": 0.411067 }, { "acc": 0.91563435, "epoch": 1.9661278531394688, "grad_norm": 7.214001178741455, "learning_rate": 5.552723399522837e-06, "loss": 0.45444498, "memory(GiB)": 34.88, "step": 72615, "train_speed(iter/s)": 0.411069 }, { "acc": 0.91329393, "epoch": 1.9662632334226844, "grad_norm": 11.545568466186523, "learning_rate": 5.552167256346195e-06, "loss": 0.50127172, "memory(GiB)": 34.88, "step": 72620, "train_speed(iter/s)": 0.41107 }, { "acc": 0.91392908, "epoch": 1.9663986137058997, "grad_norm": 5.712383270263672, "learning_rate": 5.551611106258834e-06, "loss": 0.47437496, "memory(GiB)": 34.88, "step": 72625, "train_speed(iter/s)": 0.411072 }, { "acc": 0.91380863, "epoch": 1.9665339939891155, "grad_norm": 7.184857368469238, "learning_rate": 5.551054949267728e-06, "loss": 0.47482529, "memory(GiB)": 34.88, "step": 72630, "train_speed(iter/s)": 0.411073 }, { "acc": 0.9254118, "epoch": 1.966669374272331, "grad_norm": 7.243633270263672, "learning_rate": 5.550498785379836e-06, "loss": 0.38798714, "memory(GiB)": 34.88, "step": 72635, "train_speed(iter/s)": 0.411075 }, { "acc": 0.91320848, "epoch": 1.9668047545555465, "grad_norm": 7.799530982971191, "learning_rate": 5.549942614602128e-06, "loss": 0.48858099, "memory(GiB)": 34.88, "step": 72640, "train_speed(iter/s)": 0.411077 }, { "acc": 0.91984892, "epoch": 1.966940134838762, "grad_norm": 10.537176132202148, "learning_rate": 5.549386436941572e-06, "loss": 0.44086375, "memory(GiB)": 34.88, "step": 72645, "train_speed(iter/s)": 0.411078 }, { "acc": 0.89949083, "epoch": 1.9670755151219776, "grad_norm": 4.44490909576416, "learning_rate": 5.548830252405135e-06, "loss": 0.5348896, "memory(GiB)": 34.88, "step": 72650, "train_speed(iter/s)": 0.41108 }, { "acc": 0.92011623, "epoch": 1.9672108954051932, "grad_norm": 5.9542951583862305, "learning_rate": 5.5482740609997846e-06, "loss": 0.48910866, "memory(GiB)": 34.88, "step": 72655, "train_speed(iter/s)": 0.411081 }, { "acc": 0.9225317, "epoch": 1.9673462756884086, "grad_norm": 6.857608795166016, "learning_rate": 5.547717862732488e-06, "loss": 0.45754228, "memory(GiB)": 34.88, "step": 72660, "train_speed(iter/s)": 0.411082 }, { "acc": 0.91432247, "epoch": 1.9674816559716244, "grad_norm": 6.691021919250488, "learning_rate": 5.547161657610212e-06, "loss": 0.43667316, "memory(GiB)": 34.88, "step": 72665, "train_speed(iter/s)": 0.411084 }, { "acc": 0.9032012, "epoch": 1.9676170362548397, "grad_norm": 19.088088989257812, "learning_rate": 5.5466054456399255e-06, "loss": 0.55062456, "memory(GiB)": 34.88, "step": 72670, "train_speed(iter/s)": 0.411086 }, { "acc": 0.89760036, "epoch": 1.9677524165380555, "grad_norm": 13.698637008666992, "learning_rate": 5.5460492268285934e-06, "loss": 0.64778466, "memory(GiB)": 34.88, "step": 72675, "train_speed(iter/s)": 0.411087 }, { "acc": 0.92330799, "epoch": 1.967887796821271, "grad_norm": 8.719374656677246, "learning_rate": 5.545493001183186e-06, "loss": 0.41109567, "memory(GiB)": 34.88, "step": 72680, "train_speed(iter/s)": 0.411089 }, { "acc": 0.93277721, "epoch": 1.9680231771044865, "grad_norm": 6.224876403808594, "learning_rate": 5.54493676871067e-06, "loss": 0.33386207, "memory(GiB)": 34.88, "step": 72685, "train_speed(iter/s)": 0.41109 }, { "acc": 0.91590538, "epoch": 1.968158557387702, "grad_norm": 7.731494903564453, "learning_rate": 5.544380529418014e-06, "loss": 0.49063382, "memory(GiB)": 34.88, "step": 72690, "train_speed(iter/s)": 0.411091 }, { "acc": 0.92379627, "epoch": 1.9682939376709176, "grad_norm": 6.455098628997803, "learning_rate": 5.5438242833121844e-06, "loss": 0.47470245, "memory(GiB)": 34.88, "step": 72695, "train_speed(iter/s)": 0.411093 }, { "acc": 0.91032906, "epoch": 1.9684293179541332, "grad_norm": 18.254379272460938, "learning_rate": 5.543268030400153e-06, "loss": 0.53156881, "memory(GiB)": 34.88, "step": 72700, "train_speed(iter/s)": 0.411094 }, { "acc": 0.90812712, "epoch": 1.9685646982373486, "grad_norm": 13.376118659973145, "learning_rate": 5.542711770688885e-06, "loss": 0.4269311, "memory(GiB)": 34.88, "step": 72705, "train_speed(iter/s)": 0.411096 }, { "acc": 0.92105284, "epoch": 1.9687000785205644, "grad_norm": 15.949678421020508, "learning_rate": 5.542155504185349e-06, "loss": 0.46171784, "memory(GiB)": 34.88, "step": 72710, "train_speed(iter/s)": 0.411098 }, { "acc": 0.9195365, "epoch": 1.9688354588037797, "grad_norm": 7.2587666511535645, "learning_rate": 5.541599230896512e-06, "loss": 0.37347507, "memory(GiB)": 34.88, "step": 72715, "train_speed(iter/s)": 0.411099 }, { "acc": 0.91228561, "epoch": 1.9689708390869953, "grad_norm": 10.694279670715332, "learning_rate": 5.541042950829345e-06, "loss": 0.53047357, "memory(GiB)": 34.88, "step": 72720, "train_speed(iter/s)": 0.411101 }, { "acc": 0.91085529, "epoch": 1.969106219370211, "grad_norm": 15.30089282989502, "learning_rate": 5.540486663990815e-06, "loss": 0.48602662, "memory(GiB)": 34.88, "step": 72725, "train_speed(iter/s)": 0.411102 }, { "acc": 0.91302519, "epoch": 1.9692415996534265, "grad_norm": 10.15368366241455, "learning_rate": 5.53993037038789e-06, "loss": 0.4984993, "memory(GiB)": 34.88, "step": 72730, "train_speed(iter/s)": 0.411103 }, { "acc": 0.91426287, "epoch": 1.969376979936642, "grad_norm": 19.748390197753906, "learning_rate": 5.539374070027542e-06, "loss": 0.4782618, "memory(GiB)": 34.88, "step": 72735, "train_speed(iter/s)": 0.411105 }, { "acc": 0.90915365, "epoch": 1.9695123602198574, "grad_norm": 21.182037353515625, "learning_rate": 5.538817762916735e-06, "loss": 0.45695372, "memory(GiB)": 34.88, "step": 72740, "train_speed(iter/s)": 0.411106 }, { "acc": 0.92848225, "epoch": 1.9696477405030732, "grad_norm": 7.651490688323975, "learning_rate": 5.538261449062442e-06, "loss": 0.285233, "memory(GiB)": 34.88, "step": 72745, "train_speed(iter/s)": 0.411108 }, { "acc": 0.91429186, "epoch": 1.9697831207862886, "grad_norm": 14.601677894592285, "learning_rate": 5.537705128471629e-06, "loss": 0.52405024, "memory(GiB)": 34.88, "step": 72750, "train_speed(iter/s)": 0.411109 }, { "acc": 0.92315483, "epoch": 1.9699185010695044, "grad_norm": 4.536989212036133, "learning_rate": 5.537148801151268e-06, "loss": 0.46229935, "memory(GiB)": 34.88, "step": 72755, "train_speed(iter/s)": 0.411111 }, { "acc": 0.92450409, "epoch": 1.9700538813527197, "grad_norm": 10.577204704284668, "learning_rate": 5.536592467108324e-06, "loss": 0.47443156, "memory(GiB)": 34.88, "step": 72760, "train_speed(iter/s)": 0.411112 }, { "acc": 0.9203001, "epoch": 1.9701892616359353, "grad_norm": 7.861096382141113, "learning_rate": 5.5360361263497695e-06, "loss": 0.40205503, "memory(GiB)": 34.88, "step": 72765, "train_speed(iter/s)": 0.411114 }, { "acc": 0.87856274, "epoch": 1.970324641919151, "grad_norm": 8.447505950927734, "learning_rate": 5.535479778882572e-06, "loss": 0.75380025, "memory(GiB)": 34.88, "step": 72770, "train_speed(iter/s)": 0.411115 }, { "acc": 0.93689508, "epoch": 1.9704600222023665, "grad_norm": 5.566712379455566, "learning_rate": 5.534923424713702e-06, "loss": 0.36854489, "memory(GiB)": 34.88, "step": 72775, "train_speed(iter/s)": 0.411117 }, { "acc": 0.9270359, "epoch": 1.970595402485582, "grad_norm": 6.039619445800781, "learning_rate": 5.534367063850129e-06, "loss": 0.42287006, "memory(GiB)": 34.88, "step": 72780, "train_speed(iter/s)": 0.411118 }, { "acc": 0.91838398, "epoch": 1.9707307827687974, "grad_norm": 5.803371906280518, "learning_rate": 5.533810696298821e-06, "loss": 0.4089221, "memory(GiB)": 34.88, "step": 72785, "train_speed(iter/s)": 0.41112 }, { "acc": 0.91859961, "epoch": 1.9708661630520132, "grad_norm": 7.545964241027832, "learning_rate": 5.53325432206675e-06, "loss": 0.41942606, "memory(GiB)": 34.88, "step": 72790, "train_speed(iter/s)": 0.411121 }, { "acc": 0.93320417, "epoch": 1.9710015433352286, "grad_norm": 5.196355819702148, "learning_rate": 5.532697941160884e-06, "loss": 0.36085637, "memory(GiB)": 34.88, "step": 72795, "train_speed(iter/s)": 0.411122 }, { "acc": 0.92183075, "epoch": 1.9711369236184442, "grad_norm": 7.686777591705322, "learning_rate": 5.532141553588191e-06, "loss": 0.43176899, "memory(GiB)": 34.88, "step": 72800, "train_speed(iter/s)": 0.411123 }, { "acc": 0.92190628, "epoch": 1.9712723039016598, "grad_norm": 9.679020881652832, "learning_rate": 5.531585159355644e-06, "loss": 0.43438807, "memory(GiB)": 34.88, "step": 72805, "train_speed(iter/s)": 0.411125 }, { "acc": 0.91682215, "epoch": 1.9714076841848753, "grad_norm": 7.742908000946045, "learning_rate": 5.531028758470213e-06, "loss": 0.43051128, "memory(GiB)": 34.88, "step": 72810, "train_speed(iter/s)": 0.411126 }, { "acc": 0.91159649, "epoch": 1.971543064468091, "grad_norm": 24.6164608001709, "learning_rate": 5.530472350938865e-06, "loss": 0.43841429, "memory(GiB)": 34.88, "step": 72815, "train_speed(iter/s)": 0.411128 }, { "acc": 0.9263526, "epoch": 1.9716784447513063, "grad_norm": 8.686304092407227, "learning_rate": 5.529915936768573e-06, "loss": 0.37408581, "memory(GiB)": 34.88, "step": 72820, "train_speed(iter/s)": 0.411129 }, { "acc": 0.91263275, "epoch": 1.971813825034522, "grad_norm": 29.14269256591797, "learning_rate": 5.529359515966306e-06, "loss": 0.46245384, "memory(GiB)": 34.88, "step": 72825, "train_speed(iter/s)": 0.41113 }, { "acc": 0.92113304, "epoch": 1.9719492053177374, "grad_norm": 9.387563705444336, "learning_rate": 5.528803088539033e-06, "loss": 0.46225624, "memory(GiB)": 34.88, "step": 72830, "train_speed(iter/s)": 0.411132 }, { "acc": 0.90559826, "epoch": 1.9720845856009532, "grad_norm": 7.6842217445373535, "learning_rate": 5.528246654493729e-06, "loss": 0.49676857, "memory(GiB)": 34.88, "step": 72835, "train_speed(iter/s)": 0.411133 }, { "acc": 0.91163597, "epoch": 1.9722199658841686, "grad_norm": 19.825014114379883, "learning_rate": 5.5276902138373586e-06, "loss": 0.40033288, "memory(GiB)": 34.88, "step": 72840, "train_speed(iter/s)": 0.411135 }, { "acc": 0.91490803, "epoch": 1.9723553461673842, "grad_norm": 6.269681930541992, "learning_rate": 5.5271337665768945e-06, "loss": 0.46216302, "memory(GiB)": 34.88, "step": 72845, "train_speed(iter/s)": 0.411136 }, { "acc": 0.89911938, "epoch": 1.9724907264505998, "grad_norm": 5.647072792053223, "learning_rate": 5.526577312719309e-06, "loss": 0.64382763, "memory(GiB)": 34.88, "step": 72850, "train_speed(iter/s)": 0.411137 }, { "acc": 0.92858696, "epoch": 1.9726261067338153, "grad_norm": 4.774610996246338, "learning_rate": 5.52602085227157e-06, "loss": 0.41858444, "memory(GiB)": 34.88, "step": 72855, "train_speed(iter/s)": 0.411139 }, { "acc": 0.90816545, "epoch": 1.972761487017031, "grad_norm": 15.07231330871582, "learning_rate": 5.525464385240651e-06, "loss": 0.57041197, "memory(GiB)": 34.88, "step": 72860, "train_speed(iter/s)": 0.41114 }, { "acc": 0.93214569, "epoch": 1.9728968673002463, "grad_norm": 5.190518856048584, "learning_rate": 5.524907911633521e-06, "loss": 0.37159319, "memory(GiB)": 34.88, "step": 72865, "train_speed(iter/s)": 0.411141 }, { "acc": 0.92653589, "epoch": 1.973032247583462, "grad_norm": 7.492187976837158, "learning_rate": 5.524351431457152e-06, "loss": 0.38743579, "memory(GiB)": 34.88, "step": 72870, "train_speed(iter/s)": 0.411143 }, { "acc": 0.9388607, "epoch": 1.9731676278666774, "grad_norm": 6.951829433441162, "learning_rate": 5.5237949447185125e-06, "loss": 0.34850559, "memory(GiB)": 34.88, "step": 72875, "train_speed(iter/s)": 0.411144 }, { "acc": 0.9163805, "epoch": 1.973303008149893, "grad_norm": 18.65853500366211, "learning_rate": 5.523238451424578e-06, "loss": 0.47493567, "memory(GiB)": 34.88, "step": 72880, "train_speed(iter/s)": 0.411146 }, { "acc": 0.93132267, "epoch": 1.9734383884331086, "grad_norm": 15.228694915771484, "learning_rate": 5.522681951582316e-06, "loss": 0.38323994, "memory(GiB)": 34.88, "step": 72885, "train_speed(iter/s)": 0.411147 }, { "acc": 0.92959728, "epoch": 1.9735737687163242, "grad_norm": 3.0985372066497803, "learning_rate": 5.522125445198699e-06, "loss": 0.40531807, "memory(GiB)": 34.88, "step": 72890, "train_speed(iter/s)": 0.411148 }, { "acc": 0.90202827, "epoch": 1.9737091489995398, "grad_norm": 7.424294471740723, "learning_rate": 5.521568932280698e-06, "loss": 0.54751854, "memory(GiB)": 34.88, "step": 72895, "train_speed(iter/s)": 0.41115 }, { "acc": 0.90734529, "epoch": 1.9738445292827551, "grad_norm": 8.753070831298828, "learning_rate": 5.5210124128352845e-06, "loss": 0.56958613, "memory(GiB)": 34.88, "step": 72900, "train_speed(iter/s)": 0.411151 }, { "acc": 0.90532188, "epoch": 1.973979909565971, "grad_norm": 7.173427581787109, "learning_rate": 5.520455886869432e-06, "loss": 0.46116638, "memory(GiB)": 34.88, "step": 72905, "train_speed(iter/s)": 0.411153 }, { "acc": 0.92799501, "epoch": 1.9741152898491863, "grad_norm": 16.506900787353516, "learning_rate": 5.519899354390109e-06, "loss": 0.38632708, "memory(GiB)": 34.88, "step": 72910, "train_speed(iter/s)": 0.411154 }, { "acc": 0.90620155, "epoch": 1.974250670132402, "grad_norm": 9.04714298248291, "learning_rate": 5.519342815404289e-06, "loss": 0.54115429, "memory(GiB)": 34.88, "step": 72915, "train_speed(iter/s)": 0.411156 }, { "acc": 0.92815485, "epoch": 1.9743860504156174, "grad_norm": 4.605462074279785, "learning_rate": 5.518786269918944e-06, "loss": 0.38758185, "memory(GiB)": 34.88, "step": 72920, "train_speed(iter/s)": 0.411157 }, { "acc": 0.91785936, "epoch": 1.974521430698833, "grad_norm": 5.05829381942749, "learning_rate": 5.518229717941043e-06, "loss": 0.4869205, "memory(GiB)": 34.88, "step": 72925, "train_speed(iter/s)": 0.411158 }, { "acc": 0.93325872, "epoch": 1.9746568109820486, "grad_norm": 7.340729713439941, "learning_rate": 5.517673159477561e-06, "loss": 0.38713374, "memory(GiB)": 34.88, "step": 72930, "train_speed(iter/s)": 0.41116 }, { "acc": 0.9123497, "epoch": 1.9747921912652642, "grad_norm": 5.473964214324951, "learning_rate": 5.517116594535469e-06, "loss": 0.45132027, "memory(GiB)": 34.88, "step": 72935, "train_speed(iter/s)": 0.411161 }, { "acc": 0.91539841, "epoch": 1.9749275715484798, "grad_norm": 10.77468490600586, "learning_rate": 5.516560023121738e-06, "loss": 0.49076152, "memory(GiB)": 34.88, "step": 72940, "train_speed(iter/s)": 0.411162 }, { "acc": 0.91173229, "epoch": 1.9750629518316951, "grad_norm": 10.032638549804688, "learning_rate": 5.516003445243342e-06, "loss": 0.47905774, "memory(GiB)": 34.88, "step": 72945, "train_speed(iter/s)": 0.411164 }, { "acc": 0.90781498, "epoch": 1.975198332114911, "grad_norm": 15.131366729736328, "learning_rate": 5.515446860907252e-06, "loss": 0.55769634, "memory(GiB)": 34.88, "step": 72950, "train_speed(iter/s)": 0.411165 }, { "acc": 0.9258275, "epoch": 1.9753337123981263, "grad_norm": 8.165446281433105, "learning_rate": 5.514890270120441e-06, "loss": 0.388749, "memory(GiB)": 34.88, "step": 72955, "train_speed(iter/s)": 0.411166 }, { "acc": 0.92759781, "epoch": 1.9754690926813419, "grad_norm": 14.207120895385742, "learning_rate": 5.514333672889881e-06, "loss": 0.39049153, "memory(GiB)": 34.88, "step": 72960, "train_speed(iter/s)": 0.411168 }, { "acc": 0.92096329, "epoch": 1.9756044729645574, "grad_norm": 13.86333179473877, "learning_rate": 5.513777069222547e-06, "loss": 0.44078918, "memory(GiB)": 34.88, "step": 72965, "train_speed(iter/s)": 0.411169 }, { "acc": 0.92525539, "epoch": 1.975739853247773, "grad_norm": 7.579282283782959, "learning_rate": 5.5132204591254056e-06, "loss": 0.44101601, "memory(GiB)": 34.88, "step": 72970, "train_speed(iter/s)": 0.411171 }, { "acc": 0.90556641, "epoch": 1.9758752335309886, "grad_norm": 10.348908424377441, "learning_rate": 5.512663842605434e-06, "loss": 0.58781223, "memory(GiB)": 34.88, "step": 72975, "train_speed(iter/s)": 0.411172 }, { "acc": 0.93246651, "epoch": 1.976010613814204, "grad_norm": 5.304495811462402, "learning_rate": 5.512107219669604e-06, "loss": 0.34124713, "memory(GiB)": 34.88, "step": 72980, "train_speed(iter/s)": 0.411174 }, { "acc": 0.91870403, "epoch": 1.9761459940974198, "grad_norm": 8.804394721984863, "learning_rate": 5.511550590324889e-06, "loss": 0.38408158, "memory(GiB)": 34.88, "step": 72985, "train_speed(iter/s)": 0.411175 }, { "acc": 0.89569626, "epoch": 1.9762813743806351, "grad_norm": 8.444511413574219, "learning_rate": 5.51099395457826e-06, "loss": 0.58948469, "memory(GiB)": 34.88, "step": 72990, "train_speed(iter/s)": 0.411177 }, { "acc": 0.89734097, "epoch": 1.976416754663851, "grad_norm": 7.8790507316589355, "learning_rate": 5.510437312436691e-06, "loss": 0.53039083, "memory(GiB)": 34.88, "step": 72995, "train_speed(iter/s)": 0.411178 }, { "acc": 0.91068325, "epoch": 1.9765521349470663, "grad_norm": 9.900106430053711, "learning_rate": 5.509880663907156e-06, "loss": 0.5315805, "memory(GiB)": 34.88, "step": 73000, "train_speed(iter/s)": 0.41118 }, { "acc": 0.90781698, "epoch": 1.9766875152302819, "grad_norm": 9.292655944824219, "learning_rate": 5.509324008996627e-06, "loss": 0.4853364, "memory(GiB)": 34.88, "step": 73005, "train_speed(iter/s)": 0.411181 }, { "acc": 0.9111557, "epoch": 1.9768228955134974, "grad_norm": 7.897695064544678, "learning_rate": 5.5087673477120785e-06, "loss": 0.4519681, "memory(GiB)": 34.88, "step": 73010, "train_speed(iter/s)": 0.411182 }, { "acc": 0.91406994, "epoch": 1.976958275796713, "grad_norm": 6.311891555786133, "learning_rate": 5.5082106800604805e-06, "loss": 0.53749228, "memory(GiB)": 34.88, "step": 73015, "train_speed(iter/s)": 0.411184 }, { "acc": 0.89603567, "epoch": 1.9770936560799286, "grad_norm": 15.416446685791016, "learning_rate": 5.50765400604881e-06, "loss": 0.69909139, "memory(GiB)": 34.88, "step": 73020, "train_speed(iter/s)": 0.411185 }, { "acc": 0.91354437, "epoch": 1.977229036363144, "grad_norm": 5.260969161987305, "learning_rate": 5.507097325684039e-06, "loss": 0.46582847, "memory(GiB)": 34.88, "step": 73025, "train_speed(iter/s)": 0.411187 }, { "acc": 0.90131254, "epoch": 1.9773644166463598, "grad_norm": 6.313706874847412, "learning_rate": 5.50654063897314e-06, "loss": 0.57781763, "memory(GiB)": 34.88, "step": 73030, "train_speed(iter/s)": 0.411188 }, { "acc": 0.92420769, "epoch": 1.9774997969295751, "grad_norm": 6.0648698806762695, "learning_rate": 5.505983945923089e-06, "loss": 0.39743977, "memory(GiB)": 34.88, "step": 73035, "train_speed(iter/s)": 0.41119 }, { "acc": 0.90701542, "epoch": 1.9776351772127907, "grad_norm": 6.666240692138672, "learning_rate": 5.505427246540857e-06, "loss": 0.51808109, "memory(GiB)": 34.88, "step": 73040, "train_speed(iter/s)": 0.411191 }, { "acc": 0.91351767, "epoch": 1.9777705574960063, "grad_norm": 11.20890998840332, "learning_rate": 5.504870540833421e-06, "loss": 0.47865849, "memory(GiB)": 34.88, "step": 73045, "train_speed(iter/s)": 0.411193 }, { "acc": 0.92650423, "epoch": 1.9779059377792219, "grad_norm": 6.14170503616333, "learning_rate": 5.5043138288077515e-06, "loss": 0.40199437, "memory(GiB)": 34.88, "step": 73050, "train_speed(iter/s)": 0.411194 }, { "acc": 0.91830902, "epoch": 1.9780413180624374, "grad_norm": 4.603003978729248, "learning_rate": 5.503757110470824e-06, "loss": 0.50212274, "memory(GiB)": 34.88, "step": 73055, "train_speed(iter/s)": 0.411195 }, { "acc": 0.90997162, "epoch": 1.9781766983456528, "grad_norm": 4.201251029968262, "learning_rate": 5.503200385829612e-06, "loss": 0.47964149, "memory(GiB)": 34.88, "step": 73060, "train_speed(iter/s)": 0.411197 }, { "acc": 0.9254097, "epoch": 1.9783120786288686, "grad_norm": 4.083062648773193, "learning_rate": 5.50264365489109e-06, "loss": 0.41618032, "memory(GiB)": 34.88, "step": 73065, "train_speed(iter/s)": 0.411199 }, { "acc": 0.91713257, "epoch": 1.978447458912084, "grad_norm": 8.402125358581543, "learning_rate": 5.502086917662232e-06, "loss": 0.42676697, "memory(GiB)": 34.88, "step": 73070, "train_speed(iter/s)": 0.4112 }, { "acc": 0.9195899, "epoch": 1.9785828391952998, "grad_norm": 6.026612281799316, "learning_rate": 5.501530174150011e-06, "loss": 0.39581492, "memory(GiB)": 34.88, "step": 73075, "train_speed(iter/s)": 0.411202 }, { "acc": 0.9194788, "epoch": 1.9787182194785151, "grad_norm": 6.030203342437744, "learning_rate": 5.500973424361406e-06, "loss": 0.37650976, "memory(GiB)": 34.88, "step": 73080, "train_speed(iter/s)": 0.411203 }, { "acc": 0.91901569, "epoch": 1.9788535997617307, "grad_norm": 25.42763900756836, "learning_rate": 5.500416668303384e-06, "loss": 0.45674624, "memory(GiB)": 34.88, "step": 73085, "train_speed(iter/s)": 0.411205 }, { "acc": 0.91477003, "epoch": 1.9789889800449463, "grad_norm": 8.064851760864258, "learning_rate": 5.499859905982927e-06, "loss": 0.5061799, "memory(GiB)": 34.88, "step": 73090, "train_speed(iter/s)": 0.411206 }, { "acc": 0.90005426, "epoch": 1.9791243603281619, "grad_norm": 14.942733764648438, "learning_rate": 5.499303137407003e-06, "loss": 0.56102147, "memory(GiB)": 34.88, "step": 73095, "train_speed(iter/s)": 0.411208 }, { "acc": 0.91529503, "epoch": 1.9792597406113774, "grad_norm": 12.479715347290039, "learning_rate": 5.498746362582591e-06, "loss": 0.53057771, "memory(GiB)": 34.88, "step": 73100, "train_speed(iter/s)": 0.411209 }, { "acc": 0.91931629, "epoch": 1.9793951208945928, "grad_norm": 6.478157997131348, "learning_rate": 5.498189581516664e-06, "loss": 0.42251358, "memory(GiB)": 34.88, "step": 73105, "train_speed(iter/s)": 0.411211 }, { "acc": 0.92536764, "epoch": 1.9795305011778086, "grad_norm": 4.480937480926514, "learning_rate": 5.4976327942161965e-06, "loss": 0.40694437, "memory(GiB)": 34.88, "step": 73110, "train_speed(iter/s)": 0.411212 }, { "acc": 0.90507956, "epoch": 1.979665881461024, "grad_norm": 8.084312438964844, "learning_rate": 5.497076000688165e-06, "loss": 0.54224129, "memory(GiB)": 34.88, "step": 73115, "train_speed(iter/s)": 0.411214 }, { "acc": 0.91915007, "epoch": 1.9798012617442395, "grad_norm": 13.18753433227539, "learning_rate": 5.496519200939542e-06, "loss": 0.46180658, "memory(GiB)": 34.88, "step": 73120, "train_speed(iter/s)": 0.411215 }, { "acc": 0.89593782, "epoch": 1.9799366420274551, "grad_norm": 8.238089561462402, "learning_rate": 5.495962394977306e-06, "loss": 0.49408126, "memory(GiB)": 34.88, "step": 73125, "train_speed(iter/s)": 0.411217 }, { "acc": 0.9024931, "epoch": 1.9800720223106707, "grad_norm": 14.351704597473145, "learning_rate": 5.495405582808429e-06, "loss": 0.49463544, "memory(GiB)": 34.88, "step": 73130, "train_speed(iter/s)": 0.411219 }, { "acc": 0.92552156, "epoch": 1.9802074025938863, "grad_norm": 5.668534755706787, "learning_rate": 5.494848764439885e-06, "loss": 0.37360044, "memory(GiB)": 34.88, "step": 73135, "train_speed(iter/s)": 0.41122 }, { "acc": 0.91188221, "epoch": 1.9803427828771016, "grad_norm": 6.66045618057251, "learning_rate": 5.494291939878653e-06, "loss": 0.56010056, "memory(GiB)": 34.88, "step": 73140, "train_speed(iter/s)": 0.411222 }, { "acc": 0.93062582, "epoch": 1.9804781631603174, "grad_norm": 12.906777381896973, "learning_rate": 5.493735109131707e-06, "loss": 0.39898918, "memory(GiB)": 34.88, "step": 73145, "train_speed(iter/s)": 0.411223 }, { "acc": 0.90464029, "epoch": 1.9806135434435328, "grad_norm": 6.693474769592285, "learning_rate": 5.4931782722060214e-06, "loss": 0.53381815, "memory(GiB)": 34.88, "step": 73150, "train_speed(iter/s)": 0.411224 }, { "acc": 0.90462914, "epoch": 1.9807489237267486, "grad_norm": 15.298192024230957, "learning_rate": 5.492621429108572e-06, "loss": 0.50040131, "memory(GiB)": 34.88, "step": 73155, "train_speed(iter/s)": 0.411226 }, { "acc": 0.91793423, "epoch": 1.980884304009964, "grad_norm": 6.501978397369385, "learning_rate": 5.4920645798463365e-06, "loss": 0.45910163, "memory(GiB)": 34.88, "step": 73160, "train_speed(iter/s)": 0.411227 }, { "acc": 0.92890234, "epoch": 1.9810196842931795, "grad_norm": 8.32496166229248, "learning_rate": 5.491507724426286e-06, "loss": 0.39725173, "memory(GiB)": 34.88, "step": 73165, "train_speed(iter/s)": 0.411229 }, { "acc": 0.92671041, "epoch": 1.9811550645763951, "grad_norm": 8.376669883728027, "learning_rate": 5.490950862855402e-06, "loss": 0.37839718, "memory(GiB)": 34.88, "step": 73170, "train_speed(iter/s)": 0.41123 }, { "acc": 0.92084684, "epoch": 1.9812904448596107, "grad_norm": 3.6126649379730225, "learning_rate": 5.490393995140656e-06, "loss": 0.43750648, "memory(GiB)": 34.88, "step": 73175, "train_speed(iter/s)": 0.411232 }, { "acc": 0.93381233, "epoch": 1.9814258251428263, "grad_norm": 2.9118974208831787, "learning_rate": 5.4898371212890235e-06, "loss": 0.41706467, "memory(GiB)": 34.88, "step": 73180, "train_speed(iter/s)": 0.411233 }, { "acc": 0.9159668, "epoch": 1.9815612054260416, "grad_norm": 6.309537410736084, "learning_rate": 5.489280241307483e-06, "loss": 0.46171217, "memory(GiB)": 34.88, "step": 73185, "train_speed(iter/s)": 0.411235 }, { "acc": 0.91074305, "epoch": 1.9816965857092574, "grad_norm": 18.51388931274414, "learning_rate": 5.48872335520301e-06, "loss": 0.43147583, "memory(GiB)": 34.88, "step": 73190, "train_speed(iter/s)": 0.411236 }, { "acc": 0.9099411, "epoch": 1.9818319659924728, "grad_norm": 82.42733764648438, "learning_rate": 5.488166462982579e-06, "loss": 0.49896331, "memory(GiB)": 34.88, "step": 73195, "train_speed(iter/s)": 0.411238 }, { "acc": 0.92630348, "epoch": 1.9819673462756884, "grad_norm": 5.065298557281494, "learning_rate": 5.487609564653168e-06, "loss": 0.38255641, "memory(GiB)": 34.88, "step": 73200, "train_speed(iter/s)": 0.411239 }, { "acc": 0.9070837, "epoch": 1.982102726558904, "grad_norm": 12.051284790039062, "learning_rate": 5.487052660221753e-06, "loss": 0.46043701, "memory(GiB)": 34.88, "step": 73205, "train_speed(iter/s)": 0.411241 }, { "acc": 0.89560375, "epoch": 1.9822381068421195, "grad_norm": 8.502039909362793, "learning_rate": 5.486495749695309e-06, "loss": 0.60430355, "memory(GiB)": 34.88, "step": 73210, "train_speed(iter/s)": 0.411242 }, { "acc": 0.90618429, "epoch": 1.9823734871253351, "grad_norm": 15.93232250213623, "learning_rate": 5.4859388330808146e-06, "loss": 0.58170385, "memory(GiB)": 34.88, "step": 73215, "train_speed(iter/s)": 0.411244 }, { "acc": 0.92075329, "epoch": 1.9825088674085505, "grad_norm": 9.140340805053711, "learning_rate": 5.485381910385245e-06, "loss": 0.40237064, "memory(GiB)": 34.88, "step": 73220, "train_speed(iter/s)": 0.411245 }, { "acc": 0.94039106, "epoch": 1.9826442476917663, "grad_norm": 5.312128067016602, "learning_rate": 5.4848249816155756e-06, "loss": 0.3481672, "memory(GiB)": 34.88, "step": 73225, "train_speed(iter/s)": 0.411247 }, { "acc": 0.91107788, "epoch": 1.9827796279749816, "grad_norm": 17.908931732177734, "learning_rate": 5.484268046778784e-06, "loss": 0.48996587, "memory(GiB)": 34.88, "step": 73230, "train_speed(iter/s)": 0.411248 }, { "acc": 0.92634907, "epoch": 1.9829150082581974, "grad_norm": 11.23164176940918, "learning_rate": 5.483711105881846e-06, "loss": 0.37205298, "memory(GiB)": 34.88, "step": 73235, "train_speed(iter/s)": 0.411249 }, { "acc": 0.92098875, "epoch": 1.9830503885414128, "grad_norm": 8.276154518127441, "learning_rate": 5.4831541589317416e-06, "loss": 0.42948742, "memory(GiB)": 34.88, "step": 73240, "train_speed(iter/s)": 0.41125 }, { "acc": 0.92129822, "epoch": 1.9831857688246284, "grad_norm": 10.945358276367188, "learning_rate": 5.4825972059354434e-06, "loss": 0.41393809, "memory(GiB)": 34.88, "step": 73245, "train_speed(iter/s)": 0.411252 }, { "acc": 0.91490936, "epoch": 1.983321149107844, "grad_norm": 94.56903076171875, "learning_rate": 5.4820402468999326e-06, "loss": 0.50773134, "memory(GiB)": 34.88, "step": 73250, "train_speed(iter/s)": 0.411253 }, { "acc": 0.91662788, "epoch": 1.9834565293910593, "grad_norm": 5.6772847175598145, "learning_rate": 5.4814832818321825e-06, "loss": 0.39312234, "memory(GiB)": 34.88, "step": 73255, "train_speed(iter/s)": 0.411255 }, { "acc": 0.92250328, "epoch": 1.9835919096742751, "grad_norm": 4.523080825805664, "learning_rate": 5.480926310739172e-06, "loss": 0.369998, "memory(GiB)": 34.88, "step": 73260, "train_speed(iter/s)": 0.411256 }, { "acc": 0.92024307, "epoch": 1.9837272899574905, "grad_norm": 6.987105369567871, "learning_rate": 5.480369333627878e-06, "loss": 0.4591073, "memory(GiB)": 34.88, "step": 73265, "train_speed(iter/s)": 0.411258 }, { "acc": 0.91520061, "epoch": 1.9838626702407063, "grad_norm": 4.788520336151123, "learning_rate": 5.479812350505278e-06, "loss": 0.49429197, "memory(GiB)": 34.88, "step": 73270, "train_speed(iter/s)": 0.411259 }, { "acc": 0.89680586, "epoch": 1.9839980505239216, "grad_norm": 5.781600475311279, "learning_rate": 5.479255361378348e-06, "loss": 0.4918293, "memory(GiB)": 34.88, "step": 73275, "train_speed(iter/s)": 0.411261 }, { "acc": 0.92541656, "epoch": 1.9841334308071372, "grad_norm": 6.433377742767334, "learning_rate": 5.4786983662540675e-06, "loss": 0.37994928, "memory(GiB)": 34.88, "step": 73280, "train_speed(iter/s)": 0.411263 }, { "acc": 0.92229843, "epoch": 1.9842688110903528, "grad_norm": 9.42933177947998, "learning_rate": 5.4781413651394124e-06, "loss": 0.4538981, "memory(GiB)": 34.88, "step": 73285, "train_speed(iter/s)": 0.411264 }, { "acc": 0.91910648, "epoch": 1.9844041913735684, "grad_norm": 8.909453392028809, "learning_rate": 5.477584358041359e-06, "loss": 0.45032353, "memory(GiB)": 34.88, "step": 73290, "train_speed(iter/s)": 0.411265 }, { "acc": 0.92164402, "epoch": 1.984539571656784, "grad_norm": 8.357044219970703, "learning_rate": 5.4770273449668884e-06, "loss": 0.40629673, "memory(GiB)": 34.88, "step": 73295, "train_speed(iter/s)": 0.411267 }, { "acc": 0.92823963, "epoch": 1.9846749519399993, "grad_norm": 6.8040313720703125, "learning_rate": 5.476470325922977e-06, "loss": 0.45193939, "memory(GiB)": 34.88, "step": 73300, "train_speed(iter/s)": 0.411269 }, { "acc": 0.92665596, "epoch": 1.9848103322232151, "grad_norm": 4.3580708503723145, "learning_rate": 5.4759133009166e-06, "loss": 0.36384015, "memory(GiB)": 34.88, "step": 73305, "train_speed(iter/s)": 0.41127 }, { "acc": 0.89127884, "epoch": 1.9849457125064305, "grad_norm": 11.316366195678711, "learning_rate": 5.475356269954739e-06, "loss": 0.65913153, "memory(GiB)": 34.88, "step": 73310, "train_speed(iter/s)": 0.411272 }, { "acc": 0.92203789, "epoch": 1.985081092789646, "grad_norm": 9.187187194824219, "learning_rate": 5.474799233044369e-06, "loss": 0.39321027, "memory(GiB)": 34.88, "step": 73315, "train_speed(iter/s)": 0.411273 }, { "acc": 0.92345715, "epoch": 1.9852164730728616, "grad_norm": 8.370380401611328, "learning_rate": 5.4742421901924704e-06, "loss": 0.42142992, "memory(GiB)": 34.88, "step": 73320, "train_speed(iter/s)": 0.411275 }, { "acc": 0.91134338, "epoch": 1.9853518533560772, "grad_norm": 6.798229694366455, "learning_rate": 5.4736851414060185e-06, "loss": 0.51280432, "memory(GiB)": 34.88, "step": 73325, "train_speed(iter/s)": 0.411276 }, { "acc": 0.91867142, "epoch": 1.9854872336392928, "grad_norm": 11.595222473144531, "learning_rate": 5.473128086691993e-06, "loss": 0.42427325, "memory(GiB)": 34.88, "step": 73330, "train_speed(iter/s)": 0.411278 }, { "acc": 0.92480946, "epoch": 1.9856226139225082, "grad_norm": 6.253127098083496, "learning_rate": 5.472571026057372e-06, "loss": 0.36781514, "memory(GiB)": 34.88, "step": 73335, "train_speed(iter/s)": 0.411279 }, { "acc": 0.91663857, "epoch": 1.985757994205724, "grad_norm": 13.153521537780762, "learning_rate": 5.472013959509135e-06, "loss": 0.51126919, "memory(GiB)": 34.88, "step": 73340, "train_speed(iter/s)": 0.411281 }, { "acc": 0.9230299, "epoch": 1.9858933744889393, "grad_norm": 10.956798553466797, "learning_rate": 5.4714568870542584e-06, "loss": 0.43449798, "memory(GiB)": 34.88, "step": 73345, "train_speed(iter/s)": 0.411282 }, { "acc": 0.91044054, "epoch": 1.9860287547721551, "grad_norm": 9.640331268310547, "learning_rate": 5.470899808699721e-06, "loss": 0.4908401, "memory(GiB)": 34.88, "step": 73350, "train_speed(iter/s)": 0.411284 }, { "acc": 0.92335453, "epoch": 1.9861641350553705, "grad_norm": 5.486423015594482, "learning_rate": 5.470342724452502e-06, "loss": 0.34270334, "memory(GiB)": 34.88, "step": 73355, "train_speed(iter/s)": 0.411285 }, { "acc": 0.91275902, "epoch": 1.986299515338586, "grad_norm": 5.875246524810791, "learning_rate": 5.4697856343195795e-06, "loss": 0.48337684, "memory(GiB)": 34.88, "step": 73360, "train_speed(iter/s)": 0.411286 }, { "acc": 0.91326342, "epoch": 1.9864348956218016, "grad_norm": 5.520746231079102, "learning_rate": 5.469228538307931e-06, "loss": 0.45121946, "memory(GiB)": 34.88, "step": 73365, "train_speed(iter/s)": 0.411288 }, { "acc": 0.89637775, "epoch": 1.9865702759050172, "grad_norm": 5.218002796173096, "learning_rate": 5.468671436424539e-06, "loss": 0.55508971, "memory(GiB)": 34.88, "step": 73370, "train_speed(iter/s)": 0.411289 }, { "acc": 0.90903168, "epoch": 1.9867056561882328, "grad_norm": 7.971405029296875, "learning_rate": 5.468114328676378e-06, "loss": 0.44188681, "memory(GiB)": 34.88, "step": 73375, "train_speed(iter/s)": 0.411291 }, { "acc": 0.91707411, "epoch": 1.9868410364714482, "grad_norm": 21.767112731933594, "learning_rate": 5.467557215070431e-06, "loss": 0.41846743, "memory(GiB)": 34.88, "step": 73380, "train_speed(iter/s)": 0.411292 }, { "acc": 0.91076336, "epoch": 1.986976416754664, "grad_norm": 7.137288570404053, "learning_rate": 5.467000095613672e-06, "loss": 0.44811368, "memory(GiB)": 34.88, "step": 73385, "train_speed(iter/s)": 0.411294 }, { "acc": 0.9103591, "epoch": 1.9871117970378793, "grad_norm": 9.803218841552734, "learning_rate": 5.466442970313085e-06, "loss": 0.51524143, "memory(GiB)": 34.88, "step": 73390, "train_speed(iter/s)": 0.411295 }, { "acc": 0.90449657, "epoch": 1.987247177321095, "grad_norm": 17.728883743286133, "learning_rate": 5.465885839175645e-06, "loss": 0.57214861, "memory(GiB)": 34.88, "step": 73395, "train_speed(iter/s)": 0.411296 }, { "acc": 0.88759537, "epoch": 1.9873825576043105, "grad_norm": 13.96045970916748, "learning_rate": 5.465328702208331e-06, "loss": 0.53787203, "memory(GiB)": 34.88, "step": 73400, "train_speed(iter/s)": 0.411298 }, { "acc": 0.92258224, "epoch": 1.987517937887526, "grad_norm": 8.048929214477539, "learning_rate": 5.464771559418127e-06, "loss": 0.40858212, "memory(GiB)": 34.88, "step": 73405, "train_speed(iter/s)": 0.411299 }, { "acc": 0.91627693, "epoch": 1.9876533181707416, "grad_norm": 20.327608108520508, "learning_rate": 5.464214410812009e-06, "loss": 0.4182786, "memory(GiB)": 34.88, "step": 73410, "train_speed(iter/s)": 0.411301 }, { "acc": 0.89716749, "epoch": 1.987788698453957, "grad_norm": 18.881553649902344, "learning_rate": 5.4636572563969584e-06, "loss": 0.56999416, "memory(GiB)": 34.88, "step": 73415, "train_speed(iter/s)": 0.411302 }, { "acc": 0.91292086, "epoch": 1.9879240787371728, "grad_norm": 11.74410629272461, "learning_rate": 5.4631000961799485e-06, "loss": 0.51095819, "memory(GiB)": 34.88, "step": 73420, "train_speed(iter/s)": 0.411304 }, { "acc": 0.94070511, "epoch": 1.9880594590203882, "grad_norm": 4.010350227355957, "learning_rate": 5.462542930167968e-06, "loss": 0.36679807, "memory(GiB)": 34.88, "step": 73425, "train_speed(iter/s)": 0.411305 }, { "acc": 0.91956959, "epoch": 1.988194839303604, "grad_norm": 6.870511531829834, "learning_rate": 5.461985758367991e-06, "loss": 0.49022064, "memory(GiB)": 34.88, "step": 73430, "train_speed(iter/s)": 0.411307 }, { "acc": 0.88808002, "epoch": 1.9883302195868193, "grad_norm": 21.40894317626953, "learning_rate": 5.461428580786997e-06, "loss": 0.61317158, "memory(GiB)": 34.88, "step": 73435, "train_speed(iter/s)": 0.411308 }, { "acc": 0.91947565, "epoch": 1.988465599870035, "grad_norm": 5.62557315826416, "learning_rate": 5.460871397431966e-06, "loss": 0.40902815, "memory(GiB)": 34.88, "step": 73440, "train_speed(iter/s)": 0.41131 }, { "acc": 0.91098299, "epoch": 1.9886009801532505, "grad_norm": 11.912487983703613, "learning_rate": 5.460314208309879e-06, "loss": 0.52659993, "memory(GiB)": 34.88, "step": 73445, "train_speed(iter/s)": 0.411311 }, { "acc": 0.92021809, "epoch": 1.988736360436466, "grad_norm": 9.542818069458008, "learning_rate": 5.4597570134277165e-06, "loss": 0.44094734, "memory(GiB)": 34.88, "step": 73450, "train_speed(iter/s)": 0.411313 }, { "acc": 0.91573219, "epoch": 1.9888717407196816, "grad_norm": 4.568320274353027, "learning_rate": 5.459199812792456e-06, "loss": 0.45137758, "memory(GiB)": 34.88, "step": 73455, "train_speed(iter/s)": 0.411314 }, { "acc": 0.91946573, "epoch": 1.989007121002897, "grad_norm": 7.733668327331543, "learning_rate": 5.458642606411081e-06, "loss": 0.47393856, "memory(GiB)": 34.88, "step": 73460, "train_speed(iter/s)": 0.411316 }, { "acc": 0.8994976, "epoch": 1.9891425012861128, "grad_norm": 17.65771484375, "learning_rate": 5.458085394290568e-06, "loss": 0.47861195, "memory(GiB)": 34.88, "step": 73465, "train_speed(iter/s)": 0.411317 }, { "acc": 0.91856556, "epoch": 1.9892778815693282, "grad_norm": 4.8552141189575195, "learning_rate": 5.4575281764379e-06, "loss": 0.40303354, "memory(GiB)": 34.88, "step": 73470, "train_speed(iter/s)": 0.411319 }, { "acc": 0.91792965, "epoch": 1.9894132618525437, "grad_norm": 8.737444877624512, "learning_rate": 5.456970952860055e-06, "loss": 0.45191398, "memory(GiB)": 34.88, "step": 73475, "train_speed(iter/s)": 0.41132 }, { "acc": 0.90071249, "epoch": 1.9895486421357593, "grad_norm": 12.464262962341309, "learning_rate": 5.4564137235640146e-06, "loss": 0.57105675, "memory(GiB)": 34.88, "step": 73480, "train_speed(iter/s)": 0.411322 }, { "acc": 0.92839117, "epoch": 1.989684022418975, "grad_norm": 4.846814155578613, "learning_rate": 5.455856488556759e-06, "loss": 0.41375971, "memory(GiB)": 34.88, "step": 73485, "train_speed(iter/s)": 0.411323 }, { "acc": 0.92858086, "epoch": 1.9898194027021905, "grad_norm": 5.27717399597168, "learning_rate": 5.455299247845268e-06, "loss": 0.35161407, "memory(GiB)": 34.88, "step": 73490, "train_speed(iter/s)": 0.411324 }, { "acc": 0.92361012, "epoch": 1.9899547829854058, "grad_norm": 5.736794471740723, "learning_rate": 5.454742001436523e-06, "loss": 0.421876, "memory(GiB)": 34.88, "step": 73495, "train_speed(iter/s)": 0.411326 }, { "acc": 0.90992641, "epoch": 1.9900901632686216, "grad_norm": 19.2849063873291, "learning_rate": 5.454184749337505e-06, "loss": 0.52415676, "memory(GiB)": 34.88, "step": 73500, "train_speed(iter/s)": 0.411327 }, { "acc": 0.93582468, "epoch": 1.990225543551837, "grad_norm": 9.623586654663086, "learning_rate": 5.453627491555196e-06, "loss": 0.35173187, "memory(GiB)": 34.88, "step": 73505, "train_speed(iter/s)": 0.411329 }, { "acc": 0.91030703, "epoch": 1.9903609238350528, "grad_norm": 13.987607955932617, "learning_rate": 5.453070228096572e-06, "loss": 0.47265849, "memory(GiB)": 34.88, "step": 73510, "train_speed(iter/s)": 0.41133 }, { "acc": 0.90755501, "epoch": 1.9904963041182682, "grad_norm": 7.096586227416992, "learning_rate": 5.4525129589686166e-06, "loss": 0.47106509, "memory(GiB)": 34.88, "step": 73515, "train_speed(iter/s)": 0.411332 }, { "acc": 0.93708544, "epoch": 1.9906316844014837, "grad_norm": 7.067678928375244, "learning_rate": 5.451955684178312e-06, "loss": 0.3595546, "memory(GiB)": 34.88, "step": 73520, "train_speed(iter/s)": 0.411333 }, { "acc": 0.91280718, "epoch": 1.9907670646846993, "grad_norm": 13.100645065307617, "learning_rate": 5.451398403732636e-06, "loss": 0.47298203, "memory(GiB)": 34.88, "step": 73525, "train_speed(iter/s)": 0.411334 }, { "acc": 0.90550871, "epoch": 1.990902444967915, "grad_norm": 7.361644268035889, "learning_rate": 5.450841117638573e-06, "loss": 0.58599253, "memory(GiB)": 34.88, "step": 73530, "train_speed(iter/s)": 0.411336 }, { "acc": 0.90725374, "epoch": 1.9910378252511305, "grad_norm": 18.5909481048584, "learning_rate": 5.4502838259031025e-06, "loss": 0.47271385, "memory(GiB)": 34.88, "step": 73535, "train_speed(iter/s)": 0.411337 }, { "acc": 0.91725111, "epoch": 1.9911732055343458, "grad_norm": 6.3299031257629395, "learning_rate": 5.449726528533205e-06, "loss": 0.44805832, "memory(GiB)": 34.88, "step": 73540, "train_speed(iter/s)": 0.411339 }, { "acc": 0.90437202, "epoch": 1.9913085858175616, "grad_norm": 6.968249797821045, "learning_rate": 5.449169225535863e-06, "loss": 0.5033227, "memory(GiB)": 34.88, "step": 73545, "train_speed(iter/s)": 0.411341 }, { "acc": 0.90242424, "epoch": 1.991443966100777, "grad_norm": 5.108956336975098, "learning_rate": 5.448611916918058e-06, "loss": 0.52294588, "memory(GiB)": 34.88, "step": 73550, "train_speed(iter/s)": 0.411342 }, { "acc": 0.91170168, "epoch": 1.9915793463839926, "grad_norm": 9.47042465209961, "learning_rate": 5.448054602686771e-06, "loss": 0.40842724, "memory(GiB)": 34.88, "step": 73555, "train_speed(iter/s)": 0.411344 }, { "acc": 0.91660538, "epoch": 1.9917147266672082, "grad_norm": 9.46476936340332, "learning_rate": 5.44749728284898e-06, "loss": 0.47639713, "memory(GiB)": 34.88, "step": 73560, "train_speed(iter/s)": 0.411345 }, { "acc": 0.92019596, "epoch": 1.9918501069504237, "grad_norm": 13.954773902893066, "learning_rate": 5.446939957411672e-06, "loss": 0.42782722, "memory(GiB)": 34.88, "step": 73565, "train_speed(iter/s)": 0.411347 }, { "acc": 0.9187088, "epoch": 1.9919854872336393, "grad_norm": 5.475799560546875, "learning_rate": 5.446382626381827e-06, "loss": 0.38724365, "memory(GiB)": 34.88, "step": 73570, "train_speed(iter/s)": 0.411348 }, { "acc": 0.91483574, "epoch": 1.9921208675168547, "grad_norm": 6.9057464599609375, "learning_rate": 5.445825289766424e-06, "loss": 0.46264858, "memory(GiB)": 34.88, "step": 73575, "train_speed(iter/s)": 0.41135 }, { "acc": 0.91461525, "epoch": 1.9922562478000705, "grad_norm": 5.428499698638916, "learning_rate": 5.445267947572448e-06, "loss": 0.4497982, "memory(GiB)": 34.88, "step": 73580, "train_speed(iter/s)": 0.411352 }, { "acc": 0.90918217, "epoch": 1.9923916280832858, "grad_norm": 7.373392105102539, "learning_rate": 5.444710599806879e-06, "loss": 0.55631399, "memory(GiB)": 34.88, "step": 73585, "train_speed(iter/s)": 0.411353 }, { "acc": 0.93035822, "epoch": 1.9925270083665017, "grad_norm": 4.266242980957031, "learning_rate": 5.444153246476701e-06, "loss": 0.3448112, "memory(GiB)": 34.88, "step": 73590, "train_speed(iter/s)": 0.411355 }, { "acc": 0.92360954, "epoch": 1.992662388649717, "grad_norm": 10.604283332824707, "learning_rate": 5.443595887588892e-06, "loss": 0.49274302, "memory(GiB)": 34.88, "step": 73595, "train_speed(iter/s)": 0.411356 }, { "acc": 0.90397053, "epoch": 1.9927977689329326, "grad_norm": 12.392776489257812, "learning_rate": 5.443038523150438e-06, "loss": 0.58604259, "memory(GiB)": 34.88, "step": 73600, "train_speed(iter/s)": 0.411358 }, { "acc": 0.91385288, "epoch": 1.9929331492161482, "grad_norm": 17.38469886779785, "learning_rate": 5.442481153168318e-06, "loss": 0.4737606, "memory(GiB)": 34.88, "step": 73605, "train_speed(iter/s)": 0.411359 }, { "acc": 0.90981846, "epoch": 1.9930685294993638, "grad_norm": 19.789024353027344, "learning_rate": 5.441923777649517e-06, "loss": 0.54751616, "memory(GiB)": 34.88, "step": 73610, "train_speed(iter/s)": 0.411361 }, { "acc": 0.91958237, "epoch": 1.9932039097825793, "grad_norm": 8.576870918273926, "learning_rate": 5.441366396601014e-06, "loss": 0.4117013, "memory(GiB)": 34.88, "step": 73615, "train_speed(iter/s)": 0.411362 }, { "acc": 0.9093811, "epoch": 1.9933392900657947, "grad_norm": 13.98188304901123, "learning_rate": 5.440809010029794e-06, "loss": 0.50315704, "memory(GiB)": 34.88, "step": 73620, "train_speed(iter/s)": 0.411364 }, { "acc": 0.89608393, "epoch": 1.9934746703490105, "grad_norm": 5.987534523010254, "learning_rate": 5.440251617942838e-06, "loss": 0.53912401, "memory(GiB)": 34.88, "step": 73625, "train_speed(iter/s)": 0.411365 }, { "acc": 0.9110733, "epoch": 1.9936100506322258, "grad_norm": 6.192651748657227, "learning_rate": 5.439694220347129e-06, "loss": 0.54666572, "memory(GiB)": 34.88, "step": 73630, "train_speed(iter/s)": 0.411367 }, { "acc": 0.90824909, "epoch": 1.9937454309154414, "grad_norm": 5.403343200683594, "learning_rate": 5.4391368172496504e-06, "loss": 0.52943783, "memory(GiB)": 34.88, "step": 73635, "train_speed(iter/s)": 0.411368 }, { "acc": 0.90885763, "epoch": 1.993880811198657, "grad_norm": 9.9205322265625, "learning_rate": 5.438579408657383e-06, "loss": 0.54881287, "memory(GiB)": 34.88, "step": 73640, "train_speed(iter/s)": 0.41137 }, { "acc": 0.91120815, "epoch": 1.9940161914818726, "grad_norm": 5.806155681610107, "learning_rate": 5.438021994577311e-06, "loss": 0.44016504, "memory(GiB)": 34.88, "step": 73645, "train_speed(iter/s)": 0.411372 }, { "acc": 0.93584709, "epoch": 1.9941515717650882, "grad_norm": 6.9718194007873535, "learning_rate": 5.437464575016415e-06, "loss": 0.36065118, "memory(GiB)": 34.88, "step": 73650, "train_speed(iter/s)": 0.411373 }, { "acc": 0.9104763, "epoch": 1.9942869520483035, "grad_norm": 4.648110866546631, "learning_rate": 5.436907149981681e-06, "loss": 0.39957514, "memory(GiB)": 34.88, "step": 73655, "train_speed(iter/s)": 0.411375 }, { "acc": 0.90558434, "epoch": 1.9944223323315193, "grad_norm": 5.290199279785156, "learning_rate": 5.436349719480086e-06, "loss": 0.56065235, "memory(GiB)": 34.88, "step": 73660, "train_speed(iter/s)": 0.411376 }, { "acc": 0.91004982, "epoch": 1.9945577126147347, "grad_norm": 5.852704048156738, "learning_rate": 5.43579228351862e-06, "loss": 0.44767942, "memory(GiB)": 34.88, "step": 73665, "train_speed(iter/s)": 0.411377 }, { "acc": 0.91667557, "epoch": 1.9946930928979505, "grad_norm": 12.908055305480957, "learning_rate": 5.435234842104262e-06, "loss": 0.56171141, "memory(GiB)": 34.88, "step": 73670, "train_speed(iter/s)": 0.411379 }, { "acc": 0.91793823, "epoch": 1.9948284731811659, "grad_norm": 14.165842056274414, "learning_rate": 5.434677395243997e-06, "loss": 0.36494572, "memory(GiB)": 34.88, "step": 73675, "train_speed(iter/s)": 0.41138 }, { "acc": 0.90598783, "epoch": 1.9949638534643814, "grad_norm": 8.634047508239746, "learning_rate": 5.434119942944807e-06, "loss": 0.53539205, "memory(GiB)": 34.88, "step": 73680, "train_speed(iter/s)": 0.411382 }, { "acc": 0.90122719, "epoch": 1.995099233747597, "grad_norm": 16.734516143798828, "learning_rate": 5.433562485213673e-06, "loss": 0.63831139, "memory(GiB)": 34.88, "step": 73685, "train_speed(iter/s)": 0.411383 }, { "acc": 0.91201811, "epoch": 1.9952346140308126, "grad_norm": 8.154834747314453, "learning_rate": 5.433005022057581e-06, "loss": 0.43418446, "memory(GiB)": 34.88, "step": 73690, "train_speed(iter/s)": 0.411385 }, { "acc": 0.91241446, "epoch": 1.9953699943140282, "grad_norm": 9.644246101379395, "learning_rate": 5.432447553483514e-06, "loss": 0.51055179, "memory(GiB)": 34.88, "step": 73695, "train_speed(iter/s)": 0.411386 }, { "acc": 0.91840019, "epoch": 1.9955053745972435, "grad_norm": 5.324258804321289, "learning_rate": 5.431890079498455e-06, "loss": 0.49123182, "memory(GiB)": 34.88, "step": 73700, "train_speed(iter/s)": 0.411388 }, { "acc": 0.90785418, "epoch": 1.9956407548804593, "grad_norm": 15.654272079467773, "learning_rate": 5.431332600109389e-06, "loss": 0.52239199, "memory(GiB)": 34.88, "step": 73705, "train_speed(iter/s)": 0.411389 }, { "acc": 0.92876463, "epoch": 1.9957761351636747, "grad_norm": 8.887896537780762, "learning_rate": 5.430775115323296e-06, "loss": 0.33322287, "memory(GiB)": 34.88, "step": 73710, "train_speed(iter/s)": 0.411391 }, { "acc": 0.91752911, "epoch": 1.9959115154468903, "grad_norm": 5.453503131866455, "learning_rate": 5.4302176251471635e-06, "loss": 0.40427456, "memory(GiB)": 34.88, "step": 73715, "train_speed(iter/s)": 0.411392 }, { "acc": 0.9216074, "epoch": 1.9960468957301059, "grad_norm": 2.4017179012298584, "learning_rate": 5.429660129587973e-06, "loss": 0.43849716, "memory(GiB)": 34.88, "step": 73720, "train_speed(iter/s)": 0.411394 }, { "acc": 0.92107906, "epoch": 1.9961822760133214, "grad_norm": 13.471151351928711, "learning_rate": 5.429102628652709e-06, "loss": 0.46828585, "memory(GiB)": 34.88, "step": 73725, "train_speed(iter/s)": 0.411395 }, { "acc": 0.92846918, "epoch": 1.996317656296537, "grad_norm": 7.619379043579102, "learning_rate": 5.428545122348353e-06, "loss": 0.38997746, "memory(GiB)": 34.88, "step": 73730, "train_speed(iter/s)": 0.411397 }, { "acc": 0.89695244, "epoch": 1.9964530365797524, "grad_norm": 10.991472244262695, "learning_rate": 5.427987610681892e-06, "loss": 0.54073939, "memory(GiB)": 34.88, "step": 73735, "train_speed(iter/s)": 0.411398 }, { "acc": 0.91545582, "epoch": 1.9965884168629682, "grad_norm": 8.075636863708496, "learning_rate": 5.4274300936603085e-06, "loss": 0.45612617, "memory(GiB)": 34.88, "step": 73740, "train_speed(iter/s)": 0.4114 }, { "acc": 0.91658497, "epoch": 1.9967237971461835, "grad_norm": 12.145601272583008, "learning_rate": 5.426872571290587e-06, "loss": 0.4314539, "memory(GiB)": 34.88, "step": 73745, "train_speed(iter/s)": 0.411401 }, { "acc": 0.90813236, "epoch": 1.9968591774293993, "grad_norm": 7.6884918212890625, "learning_rate": 5.426315043579711e-06, "loss": 0.45670562, "memory(GiB)": 34.88, "step": 73750, "train_speed(iter/s)": 0.411403 }, { "acc": 0.92052202, "epoch": 1.9969945577126147, "grad_norm": 10.100313186645508, "learning_rate": 5.4257575105346625e-06, "loss": 0.34576309, "memory(GiB)": 34.88, "step": 73755, "train_speed(iter/s)": 0.411404 }, { "acc": 0.92972469, "epoch": 1.9971299379958303, "grad_norm": 8.551419258117676, "learning_rate": 5.425199972162433e-06, "loss": 0.40595908, "memory(GiB)": 34.88, "step": 73760, "train_speed(iter/s)": 0.411406 }, { "acc": 0.91448402, "epoch": 1.9972653182790459, "grad_norm": 10.510516166687012, "learning_rate": 5.424642428469997e-06, "loss": 0.55014029, "memory(GiB)": 34.88, "step": 73765, "train_speed(iter/s)": 0.411407 }, { "acc": 0.92673702, "epoch": 1.9974006985622614, "grad_norm": 12.809232711791992, "learning_rate": 5.424084879464348e-06, "loss": 0.49982276, "memory(GiB)": 34.88, "step": 73770, "train_speed(iter/s)": 0.411408 }, { "acc": 0.92659283, "epoch": 1.997536078845477, "grad_norm": 10.363443374633789, "learning_rate": 5.423527325152462e-06, "loss": 0.40877252, "memory(GiB)": 34.88, "step": 73775, "train_speed(iter/s)": 0.41141 }, { "acc": 0.9201704, "epoch": 1.9976714591286924, "grad_norm": 6.774865627288818, "learning_rate": 5.422969765541329e-06, "loss": 0.36043425, "memory(GiB)": 34.88, "step": 73780, "train_speed(iter/s)": 0.411411 }, { "acc": 0.91652727, "epoch": 1.9978068394119082, "grad_norm": 15.056550025939941, "learning_rate": 5.422412200637932e-06, "loss": 0.49409637, "memory(GiB)": 34.88, "step": 73785, "train_speed(iter/s)": 0.411413 }, { "acc": 0.91987886, "epoch": 1.9979422196951235, "grad_norm": 6.872361183166504, "learning_rate": 5.421854630449256e-06, "loss": 0.43518376, "memory(GiB)": 34.88, "step": 73790, "train_speed(iter/s)": 0.411414 }, { "acc": 0.93040876, "epoch": 1.9980775999783391, "grad_norm": 9.258070945739746, "learning_rate": 5.421297054982286e-06, "loss": 0.37766266, "memory(GiB)": 34.88, "step": 73795, "train_speed(iter/s)": 0.411416 }, { "acc": 0.90104733, "epoch": 1.9982129802615547, "grad_norm": 12.716947555541992, "learning_rate": 5.420739474244006e-06, "loss": 0.55807405, "memory(GiB)": 34.88, "step": 73800, "train_speed(iter/s)": 0.411417 }, { "acc": 0.91008863, "epoch": 1.9983483605447703, "grad_norm": 9.440683364868164, "learning_rate": 5.420181888241401e-06, "loss": 0.42917867, "memory(GiB)": 34.88, "step": 73805, "train_speed(iter/s)": 0.411419 }, { "acc": 0.91825085, "epoch": 1.9984837408279859, "grad_norm": 8.902557373046875, "learning_rate": 5.419624296981453e-06, "loss": 0.4295639, "memory(GiB)": 34.88, "step": 73810, "train_speed(iter/s)": 0.41142 }, { "acc": 0.91484461, "epoch": 1.9986191211112012, "grad_norm": 8.07412052154541, "learning_rate": 5.4190667004711505e-06, "loss": 0.37008338, "memory(GiB)": 34.88, "step": 73815, "train_speed(iter/s)": 0.411422 }, { "acc": 0.9231204, "epoch": 1.998754501394417, "grad_norm": 3.233138084411621, "learning_rate": 5.41850909871748e-06, "loss": 0.37556088, "memory(GiB)": 34.88, "step": 73820, "train_speed(iter/s)": 0.411423 }, { "acc": 0.89952526, "epoch": 1.9988898816776324, "grad_norm": 12.174944877624512, "learning_rate": 5.41795149172742e-06, "loss": 0.56230993, "memory(GiB)": 34.88, "step": 73825, "train_speed(iter/s)": 0.411425 }, { "acc": 0.91220407, "epoch": 1.9990252619608482, "grad_norm": 4.975595474243164, "learning_rate": 5.417393879507964e-06, "loss": 0.48051705, "memory(GiB)": 34.88, "step": 73830, "train_speed(iter/s)": 0.411426 }, { "acc": 0.91494598, "epoch": 1.9991606422440635, "grad_norm": 7.338964939117432, "learning_rate": 5.41683626206609e-06, "loss": 0.44717822, "memory(GiB)": 34.88, "step": 73835, "train_speed(iter/s)": 0.411428 }, { "acc": 0.91443691, "epoch": 1.9992960225272791, "grad_norm": 13.07582950592041, "learning_rate": 5.4162786394087885e-06, "loss": 0.47393322, "memory(GiB)": 34.88, "step": 73840, "train_speed(iter/s)": 0.411429 }, { "acc": 0.91315432, "epoch": 1.9994314028104947, "grad_norm": 13.822891235351562, "learning_rate": 5.415721011543041e-06, "loss": 0.47913675, "memory(GiB)": 34.88, "step": 73845, "train_speed(iter/s)": 0.411431 }, { "acc": 0.92765141, "epoch": 1.9995667830937103, "grad_norm": 4.19852352142334, "learning_rate": 5.415163378475835e-06, "loss": 0.39897361, "memory(GiB)": 34.88, "step": 73850, "train_speed(iter/s)": 0.411432 }, { "acc": 0.91536274, "epoch": 1.9997021633769259, "grad_norm": 4.240123271942139, "learning_rate": 5.414605740214157e-06, "loss": 0.42658558, "memory(GiB)": 34.88, "step": 73855, "train_speed(iter/s)": 0.411434 }, { "acc": 0.91620655, "epoch": 1.9998375436601412, "grad_norm": 5.594632625579834, "learning_rate": 5.414048096764989e-06, "loss": 0.42208753, "memory(GiB)": 34.88, "step": 73860, "train_speed(iter/s)": 0.411435 }, { "acc": 0.91580143, "epoch": 1.999972923943357, "grad_norm": 8.37076473236084, "learning_rate": 5.413490448135318e-06, "loss": 0.47427397, "memory(GiB)": 34.88, "step": 73865, "train_speed(iter/s)": 0.411437 }, { "acc": 0.92226992, "epoch": 2.0001083042265724, "grad_norm": 8.585848808288574, "learning_rate": 5.4129327943321306e-06, "loss": 0.44455652, "memory(GiB)": 34.88, "step": 73870, "train_speed(iter/s)": 0.411435 }, { "acc": 0.89985895, "epoch": 2.000243684509788, "grad_norm": 4.5355072021484375, "learning_rate": 5.412375135362413e-06, "loss": 0.58301182, "memory(GiB)": 34.88, "step": 73875, "train_speed(iter/s)": 0.411436 }, { "acc": 0.9092103, "epoch": 2.0003790647930035, "grad_norm": 7.088291645050049, "learning_rate": 5.411817471233148e-06, "loss": 0.47138495, "memory(GiB)": 34.88, "step": 73880, "train_speed(iter/s)": 0.411438 }, { "acc": 0.89594202, "epoch": 2.000514445076219, "grad_norm": 7.859660625457764, "learning_rate": 5.411259801951325e-06, "loss": 0.61583128, "memory(GiB)": 34.88, "step": 73885, "train_speed(iter/s)": 0.411439 }, { "acc": 0.91881351, "epoch": 2.0006498253594347, "grad_norm": 3.3169965744018555, "learning_rate": 5.410702127523929e-06, "loss": 0.38303223, "memory(GiB)": 34.88, "step": 73890, "train_speed(iter/s)": 0.41144 }, { "acc": 0.90932884, "epoch": 2.00078520564265, "grad_norm": 8.778715133666992, "learning_rate": 5.4101444479579435e-06, "loss": 0.50320339, "memory(GiB)": 34.88, "step": 73895, "train_speed(iter/s)": 0.411442 }, { "acc": 0.91369781, "epoch": 2.000920585925866, "grad_norm": 4.236811637878418, "learning_rate": 5.409586763260359e-06, "loss": 0.40948792, "memory(GiB)": 34.88, "step": 73900, "train_speed(iter/s)": 0.411444 }, { "acc": 0.89264393, "epoch": 2.001055966209081, "grad_norm": 9.383291244506836, "learning_rate": 5.409029073438154e-06, "loss": 0.59700298, "memory(GiB)": 34.88, "step": 73905, "train_speed(iter/s)": 0.411445 }, { "acc": 0.92717733, "epoch": 2.001191346492297, "grad_norm": 6.15760612487793, "learning_rate": 5.408471378498325e-06, "loss": 0.37656822, "memory(GiB)": 34.88, "step": 73910, "train_speed(iter/s)": 0.411447 }, { "acc": 0.91485872, "epoch": 2.0013267267755124, "grad_norm": 15.729329109191895, "learning_rate": 5.407913678447849e-06, "loss": 0.4604969, "memory(GiB)": 34.88, "step": 73915, "train_speed(iter/s)": 0.411448 }, { "acc": 0.92220707, "epoch": 2.001462107058728, "grad_norm": 5.581347465515137, "learning_rate": 5.407355973293718e-06, "loss": 0.38662019, "memory(GiB)": 34.88, "step": 73920, "train_speed(iter/s)": 0.411449 }, { "acc": 0.9023139, "epoch": 2.0015974873419435, "grad_norm": 4.69690465927124, "learning_rate": 5.406798263042917e-06, "loss": 0.55322847, "memory(GiB)": 34.88, "step": 73925, "train_speed(iter/s)": 0.411451 }, { "acc": 0.93177338, "epoch": 2.001732867625159, "grad_norm": 11.705719947814941, "learning_rate": 5.406240547702431e-06, "loss": 0.42218466, "memory(GiB)": 34.88, "step": 73930, "train_speed(iter/s)": 0.411452 }, { "acc": 0.90767822, "epoch": 2.0018682479083747, "grad_norm": 8.57370662689209, "learning_rate": 5.4056828272792475e-06, "loss": 0.553509, "memory(GiB)": 34.88, "step": 73935, "train_speed(iter/s)": 0.411454 }, { "acc": 0.90902224, "epoch": 2.00200362819159, "grad_norm": 6.498610496520996, "learning_rate": 5.405125101780352e-06, "loss": 0.51774879, "memory(GiB)": 34.88, "step": 73940, "train_speed(iter/s)": 0.411455 }, { "acc": 0.91882172, "epoch": 2.002139008474806, "grad_norm": 5.666444301605225, "learning_rate": 5.404567371212733e-06, "loss": 0.42702937, "memory(GiB)": 34.88, "step": 73945, "train_speed(iter/s)": 0.411457 }, { "acc": 0.92974052, "epoch": 2.002274388758021, "grad_norm": 5.809591770172119, "learning_rate": 5.404009635583376e-06, "loss": 0.4668613, "memory(GiB)": 34.88, "step": 73950, "train_speed(iter/s)": 0.411458 }, { "acc": 0.89378433, "epoch": 2.002409769041237, "grad_norm": 16.757503509521484, "learning_rate": 5.403451894899268e-06, "loss": 0.51754751, "memory(GiB)": 34.88, "step": 73955, "train_speed(iter/s)": 0.41146 }, { "acc": 0.91750975, "epoch": 2.0025451493244524, "grad_norm": 9.565108299255371, "learning_rate": 5.402894149167395e-06, "loss": 0.46881151, "memory(GiB)": 34.88, "step": 73960, "train_speed(iter/s)": 0.411461 }, { "acc": 0.90769291, "epoch": 2.0026805296076677, "grad_norm": 4.856780529022217, "learning_rate": 5.4023363983947445e-06, "loss": 0.52645807, "memory(GiB)": 34.88, "step": 73965, "train_speed(iter/s)": 0.411463 }, { "acc": 0.90654049, "epoch": 2.0028159098908835, "grad_norm": 4.827094554901123, "learning_rate": 5.401778642588306e-06, "loss": 0.52390623, "memory(GiB)": 34.88, "step": 73970, "train_speed(iter/s)": 0.411464 }, { "acc": 0.90881805, "epoch": 2.002951290174099, "grad_norm": 6.925702095031738, "learning_rate": 5.40122088175506e-06, "loss": 0.56083236, "memory(GiB)": 34.88, "step": 73975, "train_speed(iter/s)": 0.411466 }, { "acc": 0.91340561, "epoch": 2.0030866704573147, "grad_norm": 3.8724544048309326, "learning_rate": 5.400663115902e-06, "loss": 0.47094116, "memory(GiB)": 34.88, "step": 73980, "train_speed(iter/s)": 0.411467 }, { "acc": 0.92176781, "epoch": 2.00322205074053, "grad_norm": 11.07270622253418, "learning_rate": 5.400105345036111e-06, "loss": 0.34956408, "memory(GiB)": 34.88, "step": 73985, "train_speed(iter/s)": 0.411468 }, { "acc": 0.91939144, "epoch": 2.003357431023746, "grad_norm": 5.190020561218262, "learning_rate": 5.399547569164379e-06, "loss": 0.47070727, "memory(GiB)": 34.88, "step": 73990, "train_speed(iter/s)": 0.41147 }, { "acc": 0.90887451, "epoch": 2.003492811306961, "grad_norm": 7.228893756866455, "learning_rate": 5.398989788293793e-06, "loss": 0.54906807, "memory(GiB)": 34.88, "step": 73995, "train_speed(iter/s)": 0.411471 }, { "acc": 0.89597263, "epoch": 2.003628191590177, "grad_norm": 13.16473388671875, "learning_rate": 5.398432002431338e-06, "loss": 0.64793758, "memory(GiB)": 34.88, "step": 74000, "train_speed(iter/s)": 0.411472 }, { "acc": 0.91478233, "epoch": 2.0037635718733924, "grad_norm": 23.152690887451172, "learning_rate": 5.397874211584003e-06, "loss": 0.54342399, "memory(GiB)": 34.88, "step": 74005, "train_speed(iter/s)": 0.411474 }, { "acc": 0.90047903, "epoch": 2.0038989521566077, "grad_norm": 8.376169204711914, "learning_rate": 5.397316415758775e-06, "loss": 0.50952263, "memory(GiB)": 34.88, "step": 74010, "train_speed(iter/s)": 0.411475 }, { "acc": 0.91395397, "epoch": 2.0040343324398235, "grad_norm": 6.463737487792969, "learning_rate": 5.396758614962644e-06, "loss": 0.54268856, "memory(GiB)": 34.88, "step": 74015, "train_speed(iter/s)": 0.411477 }, { "acc": 0.9146431, "epoch": 2.004169712723039, "grad_norm": 20.14563751220703, "learning_rate": 5.396200809202593e-06, "loss": 0.49564557, "memory(GiB)": 34.88, "step": 74020, "train_speed(iter/s)": 0.411478 }, { "acc": 0.89776192, "epoch": 2.0043050930062547, "grad_norm": 8.05700969696045, "learning_rate": 5.395642998485613e-06, "loss": 0.53725176, "memory(GiB)": 34.88, "step": 74025, "train_speed(iter/s)": 0.41148 }, { "acc": 0.90242615, "epoch": 2.00444047328947, "grad_norm": 6.771648406982422, "learning_rate": 5.395085182818688e-06, "loss": 0.54373512, "memory(GiB)": 34.88, "step": 74030, "train_speed(iter/s)": 0.411481 }, { "acc": 0.91931849, "epoch": 2.004575853572686, "grad_norm": 3.6342594623565674, "learning_rate": 5.394527362208811e-06, "loss": 0.40869226, "memory(GiB)": 34.88, "step": 74035, "train_speed(iter/s)": 0.411483 }, { "acc": 0.899333, "epoch": 2.004711233855901, "grad_norm": 7.978682041168213, "learning_rate": 5.393969536662965e-06, "loss": 0.57499094, "memory(GiB)": 34.88, "step": 74040, "train_speed(iter/s)": 0.411484 }, { "acc": 0.91501255, "epoch": 2.0048466141391166, "grad_norm": 7.03420352935791, "learning_rate": 5.393411706188139e-06, "loss": 0.44965439, "memory(GiB)": 34.88, "step": 74045, "train_speed(iter/s)": 0.411486 }, { "acc": 0.92474747, "epoch": 2.0049819944223324, "grad_norm": 5.599025726318359, "learning_rate": 5.392853870791325e-06, "loss": 0.36138268, "memory(GiB)": 34.88, "step": 74050, "train_speed(iter/s)": 0.411487 }, { "acc": 0.91868238, "epoch": 2.0051173747055477, "grad_norm": 25.309873580932617, "learning_rate": 5.392296030479507e-06, "loss": 0.50518293, "memory(GiB)": 34.88, "step": 74055, "train_speed(iter/s)": 0.411488 }, { "acc": 0.92603216, "epoch": 2.0052527549887635, "grad_norm": 8.351922035217285, "learning_rate": 5.391738185259674e-06, "loss": 0.4515234, "memory(GiB)": 34.88, "step": 74060, "train_speed(iter/s)": 0.41149 }, { "acc": 0.92348728, "epoch": 2.005388135271979, "grad_norm": 11.21417236328125, "learning_rate": 5.391180335138811e-06, "loss": 0.51065531, "memory(GiB)": 34.88, "step": 74065, "train_speed(iter/s)": 0.411491 }, { "acc": 0.91427279, "epoch": 2.0055235155551947, "grad_norm": 8.985602378845215, "learning_rate": 5.390622480123912e-06, "loss": 0.43394918, "memory(GiB)": 34.88, "step": 74070, "train_speed(iter/s)": 0.411493 }, { "acc": 0.9221139, "epoch": 2.00565889583841, "grad_norm": 9.573773384094238, "learning_rate": 5.390064620221963e-06, "loss": 0.42467813, "memory(GiB)": 34.88, "step": 74075, "train_speed(iter/s)": 0.411494 }, { "acc": 0.91242847, "epoch": 2.005794276121626, "grad_norm": 3.1308705806732178, "learning_rate": 5.389506755439948e-06, "loss": 0.53065996, "memory(GiB)": 34.88, "step": 74080, "train_speed(iter/s)": 0.411496 }, { "acc": 0.90093975, "epoch": 2.005929656404841, "grad_norm": 6.8125128746032715, "learning_rate": 5.388948885784863e-06, "loss": 0.63386745, "memory(GiB)": 34.88, "step": 74085, "train_speed(iter/s)": 0.411497 }, { "acc": 0.93190727, "epoch": 2.0060650366880566, "grad_norm": 3.075819730758667, "learning_rate": 5.3883910112636895e-06, "loss": 0.39581513, "memory(GiB)": 34.88, "step": 74090, "train_speed(iter/s)": 0.411498 }, { "acc": 0.90655184, "epoch": 2.0062004169712724, "grad_norm": 8.17919635772705, "learning_rate": 5.387833131883422e-06, "loss": 0.49219251, "memory(GiB)": 34.88, "step": 74095, "train_speed(iter/s)": 0.4115 }, { "acc": 0.93200626, "epoch": 2.0063357972544877, "grad_norm": 7.9673171043396, "learning_rate": 5.387275247651044e-06, "loss": 0.32804933, "memory(GiB)": 34.88, "step": 74100, "train_speed(iter/s)": 0.411502 }, { "acc": 0.92031059, "epoch": 2.0064711775377035, "grad_norm": 4.906068325042725, "learning_rate": 5.386717358573546e-06, "loss": 0.44944677, "memory(GiB)": 34.88, "step": 74105, "train_speed(iter/s)": 0.411503 }, { "acc": 0.91969109, "epoch": 2.006606557820919, "grad_norm": 8.423721313476562, "learning_rate": 5.386159464657915e-06, "loss": 0.46447425, "memory(GiB)": 34.88, "step": 74110, "train_speed(iter/s)": 0.411505 }, { "acc": 0.9050787, "epoch": 2.0067419381041347, "grad_norm": 5.3592095375061035, "learning_rate": 5.385601565911143e-06, "loss": 0.47676983, "memory(GiB)": 34.88, "step": 74115, "train_speed(iter/s)": 0.411506 }, { "acc": 0.93418894, "epoch": 2.00687731838735, "grad_norm": 4.124181270599365, "learning_rate": 5.385043662340218e-06, "loss": 0.28874996, "memory(GiB)": 34.88, "step": 74120, "train_speed(iter/s)": 0.411508 }, { "acc": 0.89347019, "epoch": 2.0070126986705654, "grad_norm": 12.182440757751465, "learning_rate": 5.384485753952127e-06, "loss": 0.71328335, "memory(GiB)": 34.88, "step": 74125, "train_speed(iter/s)": 0.411509 }, { "acc": 0.91668701, "epoch": 2.0071480789537812, "grad_norm": 21.49160385131836, "learning_rate": 5.383927840753861e-06, "loss": 0.41925983, "memory(GiB)": 34.88, "step": 74130, "train_speed(iter/s)": 0.411511 }, { "acc": 0.9073555, "epoch": 2.0072834592369966, "grad_norm": 9.140203475952148, "learning_rate": 5.3833699227524074e-06, "loss": 0.48837399, "memory(GiB)": 34.88, "step": 74135, "train_speed(iter/s)": 0.411512 }, { "acc": 0.92629051, "epoch": 2.0074188395202124, "grad_norm": 12.143536567687988, "learning_rate": 5.382811999954756e-06, "loss": 0.39509377, "memory(GiB)": 34.88, "step": 74140, "train_speed(iter/s)": 0.411514 }, { "acc": 0.92488604, "epoch": 2.0075542198034277, "grad_norm": 11.504706382751465, "learning_rate": 5.382254072367894e-06, "loss": 0.37826126, "memory(GiB)": 34.88, "step": 74145, "train_speed(iter/s)": 0.411515 }, { "acc": 0.92989664, "epoch": 2.0076896000866435, "grad_norm": 7.759973049163818, "learning_rate": 5.381696139998812e-06, "loss": 0.42815123, "memory(GiB)": 34.88, "step": 74150, "train_speed(iter/s)": 0.411517 }, { "acc": 0.90428238, "epoch": 2.007824980369859, "grad_norm": 14.515305519104004, "learning_rate": 5.3811382028545e-06, "loss": 0.69184437, "memory(GiB)": 34.88, "step": 74155, "train_speed(iter/s)": 0.411518 }, { "acc": 0.90942125, "epoch": 2.0079603606530747, "grad_norm": 6.53777551651001, "learning_rate": 5.380580260941947e-06, "loss": 0.45490198, "memory(GiB)": 34.88, "step": 74160, "train_speed(iter/s)": 0.41152 }, { "acc": 0.90194063, "epoch": 2.00809574093629, "grad_norm": 12.391213417053223, "learning_rate": 5.380022314268141e-06, "loss": 0.52000623, "memory(GiB)": 34.88, "step": 74165, "train_speed(iter/s)": 0.411521 }, { "acc": 0.89627905, "epoch": 2.0082311212195054, "grad_norm": 6.847182273864746, "learning_rate": 5.379464362840074e-06, "loss": 0.56319351, "memory(GiB)": 34.88, "step": 74170, "train_speed(iter/s)": 0.411523 }, { "acc": 0.90900011, "epoch": 2.0083665015027212, "grad_norm": 7.07934045791626, "learning_rate": 5.378906406664731e-06, "loss": 0.51509991, "memory(GiB)": 34.88, "step": 74175, "train_speed(iter/s)": 0.411524 }, { "acc": 0.91695995, "epoch": 2.0085018817859366, "grad_norm": 7.959634304046631, "learning_rate": 5.378348445749108e-06, "loss": 0.50241261, "memory(GiB)": 34.88, "step": 74180, "train_speed(iter/s)": 0.411526 }, { "acc": 0.89878817, "epoch": 2.0086372620691524, "grad_norm": 11.227274894714355, "learning_rate": 5.377790480100189e-06, "loss": 0.62775207, "memory(GiB)": 34.88, "step": 74185, "train_speed(iter/s)": 0.411527 }, { "acc": 0.92136106, "epoch": 2.0087726423523677, "grad_norm": 8.088595390319824, "learning_rate": 5.3772325097249645e-06, "loss": 0.46892152, "memory(GiB)": 34.88, "step": 74190, "train_speed(iter/s)": 0.411529 }, { "acc": 0.92066431, "epoch": 2.0089080226355835, "grad_norm": 6.176218509674072, "learning_rate": 5.376674534630424e-06, "loss": 0.39701343, "memory(GiB)": 34.88, "step": 74195, "train_speed(iter/s)": 0.41153 }, { "acc": 0.91682014, "epoch": 2.009043402918799, "grad_norm": 8.1803560256958, "learning_rate": 5.376116554823561e-06, "loss": 0.46199827, "memory(GiB)": 34.88, "step": 74200, "train_speed(iter/s)": 0.411532 }, { "acc": 0.91747704, "epoch": 2.0091787832020143, "grad_norm": 6.29666805267334, "learning_rate": 5.37555857031136e-06, "loss": 0.45119247, "memory(GiB)": 34.88, "step": 74205, "train_speed(iter/s)": 0.411533 }, { "acc": 0.9307271, "epoch": 2.00931416348523, "grad_norm": 5.484376907348633, "learning_rate": 5.375000581100814e-06, "loss": 0.37518308, "memory(GiB)": 34.88, "step": 74210, "train_speed(iter/s)": 0.411535 }, { "acc": 0.91761417, "epoch": 2.0094495437684454, "grad_norm": 8.44007396697998, "learning_rate": 5.374442587198912e-06, "loss": 0.4952817, "memory(GiB)": 34.88, "step": 74215, "train_speed(iter/s)": 0.411536 }, { "acc": 0.920924, "epoch": 2.0095849240516612, "grad_norm": 12.635259628295898, "learning_rate": 5.373884588612648e-06, "loss": 0.48150353, "memory(GiB)": 34.88, "step": 74220, "train_speed(iter/s)": 0.411537 }, { "acc": 0.91573257, "epoch": 2.0097203043348766, "grad_norm": 7.939012050628662, "learning_rate": 5.373326585349006e-06, "loss": 0.50554028, "memory(GiB)": 34.88, "step": 74225, "train_speed(iter/s)": 0.411539 }, { "acc": 0.91061563, "epoch": 2.0098556846180924, "grad_norm": 12.685254096984863, "learning_rate": 5.372768577414977e-06, "loss": 0.45323362, "memory(GiB)": 34.88, "step": 74230, "train_speed(iter/s)": 0.41154 }, { "acc": 0.90932579, "epoch": 2.0099910649013077, "grad_norm": 6.8055033683776855, "learning_rate": 5.3722105648175534e-06, "loss": 0.52275352, "memory(GiB)": 34.88, "step": 74235, "train_speed(iter/s)": 0.411541 }, { "acc": 0.92963409, "epoch": 2.0101264451845235, "grad_norm": 6.9573187828063965, "learning_rate": 5.371652547563725e-06, "loss": 0.36008992, "memory(GiB)": 34.88, "step": 74240, "train_speed(iter/s)": 0.411543 }, { "acc": 0.87696171, "epoch": 2.010261825467739, "grad_norm": 8.635132789611816, "learning_rate": 5.371094525660482e-06, "loss": 0.75134401, "memory(GiB)": 34.88, "step": 74245, "train_speed(iter/s)": 0.411544 }, { "acc": 0.91812649, "epoch": 2.0103972057509543, "grad_norm": 9.153443336486816, "learning_rate": 5.370536499114814e-06, "loss": 0.45334973, "memory(GiB)": 34.88, "step": 74250, "train_speed(iter/s)": 0.411545 }, { "acc": 0.91371307, "epoch": 2.01053258603417, "grad_norm": 12.357067108154297, "learning_rate": 5.369978467933711e-06, "loss": 0.48250675, "memory(GiB)": 34.88, "step": 74255, "train_speed(iter/s)": 0.411547 }, { "acc": 0.92076855, "epoch": 2.0106679663173854, "grad_norm": 7.073089599609375, "learning_rate": 5.369420432124164e-06, "loss": 0.5188838, "memory(GiB)": 34.88, "step": 74260, "train_speed(iter/s)": 0.411548 }, { "acc": 0.91464767, "epoch": 2.0108033466006012, "grad_norm": 5.462121486663818, "learning_rate": 5.3688623916931635e-06, "loss": 0.41414032, "memory(GiB)": 34.88, "step": 74265, "train_speed(iter/s)": 0.41155 }, { "acc": 0.93157377, "epoch": 2.0109387268838166, "grad_norm": 3.7489800453186035, "learning_rate": 5.368304346647702e-06, "loss": 0.39854834, "memory(GiB)": 34.88, "step": 74270, "train_speed(iter/s)": 0.411552 }, { "acc": 0.89669991, "epoch": 2.0110741071670324, "grad_norm": 9.431914329528809, "learning_rate": 5.367746296994768e-06, "loss": 0.59413419, "memory(GiB)": 34.88, "step": 74275, "train_speed(iter/s)": 0.411553 }, { "acc": 0.92802305, "epoch": 2.0112094874502477, "grad_norm": 4.367056369781494, "learning_rate": 5.3671882427413516e-06, "loss": 0.41401196, "memory(GiB)": 34.88, "step": 74280, "train_speed(iter/s)": 0.411554 }, { "acc": 0.9242733, "epoch": 2.011344867733463, "grad_norm": 4.952782154083252, "learning_rate": 5.366630183894444e-06, "loss": 0.45905027, "memory(GiB)": 34.88, "step": 74285, "train_speed(iter/s)": 0.411556 }, { "acc": 0.91909142, "epoch": 2.011480248016679, "grad_norm": 5.716054916381836, "learning_rate": 5.366072120461038e-06, "loss": 0.50002995, "memory(GiB)": 34.88, "step": 74290, "train_speed(iter/s)": 0.411557 }, { "acc": 0.93473892, "epoch": 2.0116156282998943, "grad_norm": 5.519766807556152, "learning_rate": 5.3655140524481205e-06, "loss": 0.30818224, "memory(GiB)": 34.88, "step": 74295, "train_speed(iter/s)": 0.411559 }, { "acc": 0.8981514, "epoch": 2.01175100858311, "grad_norm": 9.344869613647461, "learning_rate": 5.364955979862687e-06, "loss": 0.51126828, "memory(GiB)": 34.88, "step": 74300, "train_speed(iter/s)": 0.41156 }, { "acc": 0.91912975, "epoch": 2.0118863888663254, "grad_norm": 15.251283645629883, "learning_rate": 5.364397902711725e-06, "loss": 0.45259838, "memory(GiB)": 34.88, "step": 74305, "train_speed(iter/s)": 0.411562 }, { "acc": 0.90827265, "epoch": 2.0120217691495412, "grad_norm": 8.370086669921875, "learning_rate": 5.3638398210022266e-06, "loss": 0.58309994, "memory(GiB)": 34.88, "step": 74310, "train_speed(iter/s)": 0.411563 }, { "acc": 0.90018864, "epoch": 2.0121571494327566, "grad_norm": 13.653156280517578, "learning_rate": 5.363281734741185e-06, "loss": 0.6549902, "memory(GiB)": 34.88, "step": 74315, "train_speed(iter/s)": 0.411565 }, { "acc": 0.93107243, "epoch": 2.012292529715972, "grad_norm": 10.996809959411621, "learning_rate": 5.362723643935587e-06, "loss": 0.37527609, "memory(GiB)": 34.88, "step": 74320, "train_speed(iter/s)": 0.411566 }, { "acc": 0.91940641, "epoch": 2.0124279099991877, "grad_norm": 5.741309642791748, "learning_rate": 5.362165548592428e-06, "loss": 0.39229469, "memory(GiB)": 34.88, "step": 74325, "train_speed(iter/s)": 0.411567 }, { "acc": 0.92389927, "epoch": 2.012563290282403, "grad_norm": 13.487905502319336, "learning_rate": 5.361607448718694e-06, "loss": 0.47299461, "memory(GiB)": 34.88, "step": 74330, "train_speed(iter/s)": 0.411569 }, { "acc": 0.90570335, "epoch": 2.012698670565619, "grad_norm": 5.304316997528076, "learning_rate": 5.361049344321383e-06, "loss": 0.59457254, "memory(GiB)": 34.88, "step": 74335, "train_speed(iter/s)": 0.41157 }, { "acc": 0.89680853, "epoch": 2.0128340508488343, "grad_norm": 8.466493606567383, "learning_rate": 5.3604912354074815e-06, "loss": 0.53263512, "memory(GiB)": 34.88, "step": 74340, "train_speed(iter/s)": 0.411571 }, { "acc": 0.92167778, "epoch": 2.01296943113205, "grad_norm": 8.080967903137207, "learning_rate": 5.359933121983983e-06, "loss": 0.47907214, "memory(GiB)": 34.88, "step": 74345, "train_speed(iter/s)": 0.411573 }, { "acc": 0.910672, "epoch": 2.0131048114152654, "grad_norm": 10.53726863861084, "learning_rate": 5.359375004057879e-06, "loss": 0.45447412, "memory(GiB)": 34.88, "step": 74350, "train_speed(iter/s)": 0.411574 }, { "acc": 0.91590214, "epoch": 2.0132401916984812, "grad_norm": 10.119902610778809, "learning_rate": 5.358816881636157e-06, "loss": 0.47228189, "memory(GiB)": 34.88, "step": 74355, "train_speed(iter/s)": 0.411576 }, { "acc": 0.92462931, "epoch": 2.0133755719816966, "grad_norm": 6.504130840301514, "learning_rate": 5.358258754725816e-06, "loss": 0.42871037, "memory(GiB)": 34.88, "step": 74360, "train_speed(iter/s)": 0.411577 }, { "acc": 0.91687479, "epoch": 2.013510952264912, "grad_norm": 7.582219123840332, "learning_rate": 5.357700623333842e-06, "loss": 0.42192459, "memory(GiB)": 34.88, "step": 74365, "train_speed(iter/s)": 0.411579 }, { "acc": 0.91673994, "epoch": 2.0136463325481277, "grad_norm": 7.573298454284668, "learning_rate": 5.357142487467228e-06, "loss": 0.42931428, "memory(GiB)": 34.88, "step": 74370, "train_speed(iter/s)": 0.41158 }, { "acc": 0.91397171, "epoch": 2.013781712831343, "grad_norm": 8.40475082397461, "learning_rate": 5.356584347132966e-06, "loss": 0.45029707, "memory(GiB)": 34.88, "step": 74375, "train_speed(iter/s)": 0.411582 }, { "acc": 0.92809486, "epoch": 2.013917093114559, "grad_norm": 4.569094181060791, "learning_rate": 5.3560262023380475e-06, "loss": 0.39403863, "memory(GiB)": 34.88, "step": 74380, "train_speed(iter/s)": 0.411583 }, { "acc": 0.92176285, "epoch": 2.0140524733977743, "grad_norm": 11.133678436279297, "learning_rate": 5.355468053089467e-06, "loss": 0.46524482, "memory(GiB)": 34.88, "step": 74385, "train_speed(iter/s)": 0.411584 }, { "acc": 0.90460796, "epoch": 2.01418785368099, "grad_norm": 8.836898803710938, "learning_rate": 5.3549098993942114e-06, "loss": 0.60460014, "memory(GiB)": 34.88, "step": 74390, "train_speed(iter/s)": 0.411586 }, { "acc": 0.90489082, "epoch": 2.0143232339642054, "grad_norm": 13.696541786193848, "learning_rate": 5.354351741259279e-06, "loss": 0.52310772, "memory(GiB)": 34.88, "step": 74395, "train_speed(iter/s)": 0.411587 }, { "acc": 0.9219574, "epoch": 2.014458614247421, "grad_norm": 10.286426544189453, "learning_rate": 5.353793578691654e-06, "loss": 0.36699626, "memory(GiB)": 34.88, "step": 74400, "train_speed(iter/s)": 0.411588 }, { "acc": 0.92580767, "epoch": 2.0145939945306366, "grad_norm": 12.717161178588867, "learning_rate": 5.353235411698334e-06, "loss": 0.37187672, "memory(GiB)": 34.88, "step": 74405, "train_speed(iter/s)": 0.41159 }, { "acc": 0.92977543, "epoch": 2.014729374813852, "grad_norm": 5.517381191253662, "learning_rate": 5.352677240286311e-06, "loss": 0.41532998, "memory(GiB)": 34.88, "step": 74410, "train_speed(iter/s)": 0.411591 }, { "acc": 0.92037048, "epoch": 2.0148647550970678, "grad_norm": 7.511716365814209, "learning_rate": 5.352119064462573e-06, "loss": 0.48773532, "memory(GiB)": 34.88, "step": 74415, "train_speed(iter/s)": 0.411593 }, { "acc": 0.90074234, "epoch": 2.015000135380283, "grad_norm": 18.80245018005371, "learning_rate": 5.35156088423412e-06, "loss": 0.47374926, "memory(GiB)": 34.88, "step": 74420, "train_speed(iter/s)": 0.411595 }, { "acc": 0.91054173, "epoch": 2.015135515663499, "grad_norm": 9.222315788269043, "learning_rate": 5.351002699607936e-06, "loss": 0.45907874, "memory(GiB)": 34.88, "step": 74425, "train_speed(iter/s)": 0.411596 }, { "acc": 0.92752161, "epoch": 2.0152708959467143, "grad_norm": 7.734927177429199, "learning_rate": 5.350444510591019e-06, "loss": 0.45522833, "memory(GiB)": 34.88, "step": 74430, "train_speed(iter/s)": 0.411597 }, { "acc": 0.90234718, "epoch": 2.01540627622993, "grad_norm": 10.259159088134766, "learning_rate": 5.349886317190358e-06, "loss": 0.50468988, "memory(GiB)": 34.88, "step": 74435, "train_speed(iter/s)": 0.411598 }, { "acc": 0.91370077, "epoch": 2.0155416565131454, "grad_norm": 3.7395081520080566, "learning_rate": 5.349328119412948e-06, "loss": 0.4258605, "memory(GiB)": 34.88, "step": 74440, "train_speed(iter/s)": 0.4116 }, { "acc": 0.91908875, "epoch": 2.015677036796361, "grad_norm": 11.105141639709473, "learning_rate": 5.348769917265779e-06, "loss": 0.420366, "memory(GiB)": 34.88, "step": 74445, "train_speed(iter/s)": 0.411601 }, { "acc": 0.89598999, "epoch": 2.0158124170795766, "grad_norm": 10.752299308776855, "learning_rate": 5.348211710755845e-06, "loss": 0.62467303, "memory(GiB)": 34.88, "step": 74450, "train_speed(iter/s)": 0.411603 }, { "acc": 0.9067934, "epoch": 2.015947797362792, "grad_norm": 5.5647382736206055, "learning_rate": 5.347653499890141e-06, "loss": 0.50284872, "memory(GiB)": 34.88, "step": 74455, "train_speed(iter/s)": 0.411604 }, { "acc": 0.92156811, "epoch": 2.0160831776460078, "grad_norm": 9.075301170349121, "learning_rate": 5.347095284675654e-06, "loss": 0.43843117, "memory(GiB)": 34.88, "step": 74460, "train_speed(iter/s)": 0.411605 }, { "acc": 0.89944057, "epoch": 2.016218557929223, "grad_norm": 52.14472961425781, "learning_rate": 5.346537065119381e-06, "loss": 0.5563405, "memory(GiB)": 34.88, "step": 74465, "train_speed(iter/s)": 0.411607 }, { "acc": 0.92773809, "epoch": 2.016353938212439, "grad_norm": 7.4743499755859375, "learning_rate": 5.345978841228315e-06, "loss": 0.39691935, "memory(GiB)": 34.88, "step": 74470, "train_speed(iter/s)": 0.411608 }, { "acc": 0.91849499, "epoch": 2.0164893184956543, "grad_norm": 11.227300643920898, "learning_rate": 5.345420613009448e-06, "loss": 0.46587462, "memory(GiB)": 34.88, "step": 74475, "train_speed(iter/s)": 0.41161 }, { "acc": 0.90620842, "epoch": 2.0166246987788696, "grad_norm": 14.031352996826172, "learning_rate": 5.344862380469771e-06, "loss": 0.51461492, "memory(GiB)": 34.88, "step": 74480, "train_speed(iter/s)": 0.411612 }, { "acc": 0.92623024, "epoch": 2.0167600790620854, "grad_norm": 3.085583209991455, "learning_rate": 5.344304143616279e-06, "loss": 0.44262195, "memory(GiB)": 34.88, "step": 74485, "train_speed(iter/s)": 0.411613 }, { "acc": 0.91178675, "epoch": 2.016895459345301, "grad_norm": 12.771830558776855, "learning_rate": 5.343745902455965e-06, "loss": 0.5009181, "memory(GiB)": 34.88, "step": 74490, "train_speed(iter/s)": 0.411615 }, { "acc": 0.9222578, "epoch": 2.0170308396285166, "grad_norm": 6.860193729400635, "learning_rate": 5.343187656995819e-06, "loss": 0.50654383, "memory(GiB)": 34.88, "step": 74495, "train_speed(iter/s)": 0.411616 }, { "acc": 0.9066906, "epoch": 2.017166219911732, "grad_norm": 6.840606689453125, "learning_rate": 5.342629407242841e-06, "loss": 0.55930481, "memory(GiB)": 34.88, "step": 74500, "train_speed(iter/s)": 0.411618 }, { "acc": 0.91583052, "epoch": 2.0173016001949478, "grad_norm": 12.174544334411621, "learning_rate": 5.342071153204016e-06, "loss": 0.50257678, "memory(GiB)": 34.88, "step": 74505, "train_speed(iter/s)": 0.411619 }, { "acc": 0.92984715, "epoch": 2.017436980478163, "grad_norm": 3.349820137023926, "learning_rate": 5.341512894886344e-06, "loss": 0.38571811, "memory(GiB)": 34.88, "step": 74510, "train_speed(iter/s)": 0.411621 }, { "acc": 0.91801796, "epoch": 2.017572360761379, "grad_norm": 6.740668296813965, "learning_rate": 5.340954632296814e-06, "loss": 0.42128725, "memory(GiB)": 34.88, "step": 74515, "train_speed(iter/s)": 0.411622 }, { "acc": 0.92452822, "epoch": 2.0177077410445943, "grad_norm": 12.206611633300781, "learning_rate": 5.340396365442421e-06, "loss": 0.44967766, "memory(GiB)": 34.88, "step": 74520, "train_speed(iter/s)": 0.411624 }, { "acc": 0.92824001, "epoch": 2.0178431213278096, "grad_norm": 3.1593058109283447, "learning_rate": 5.339838094330159e-06, "loss": 0.37970932, "memory(GiB)": 34.88, "step": 74525, "train_speed(iter/s)": 0.411625 }, { "acc": 0.90553865, "epoch": 2.0179785016110254, "grad_norm": 18.628429412841797, "learning_rate": 5.339279818967018e-06, "loss": 0.52303753, "memory(GiB)": 34.88, "step": 74530, "train_speed(iter/s)": 0.411626 }, { "acc": 0.9081583, "epoch": 2.018113881894241, "grad_norm": 15.505532264709473, "learning_rate": 5.338721539359996e-06, "loss": 0.48747029, "memory(GiB)": 34.88, "step": 74535, "train_speed(iter/s)": 0.411628 }, { "acc": 0.9123806, "epoch": 2.0182492621774566, "grad_norm": 10.11861801147461, "learning_rate": 5.338163255516085e-06, "loss": 0.48335872, "memory(GiB)": 34.88, "step": 74540, "train_speed(iter/s)": 0.411629 }, { "acc": 0.91385393, "epoch": 2.018384642460672, "grad_norm": 9.949549674987793, "learning_rate": 5.337604967442278e-06, "loss": 0.55126295, "memory(GiB)": 34.88, "step": 74545, "train_speed(iter/s)": 0.411631 }, { "acc": 0.9105341, "epoch": 2.0185200227438878, "grad_norm": 7.4033355712890625, "learning_rate": 5.337046675145568e-06, "loss": 0.49069643, "memory(GiB)": 34.88, "step": 74550, "train_speed(iter/s)": 0.411632 }, { "acc": 0.91854944, "epoch": 2.018655403027103, "grad_norm": 8.094972610473633, "learning_rate": 5.336488378632951e-06, "loss": 0.35233555, "memory(GiB)": 34.88, "step": 74555, "train_speed(iter/s)": 0.411634 }, { "acc": 0.90875835, "epoch": 2.0187907833103185, "grad_norm": 8.146645545959473, "learning_rate": 5.335930077911417e-06, "loss": 0.55755267, "memory(GiB)": 34.88, "step": 74560, "train_speed(iter/s)": 0.411635 }, { "acc": 0.91461802, "epoch": 2.0189261635935343, "grad_norm": 7.453762531280518, "learning_rate": 5.335371772987964e-06, "loss": 0.41354113, "memory(GiB)": 34.88, "step": 74565, "train_speed(iter/s)": 0.411637 }, { "acc": 0.91411514, "epoch": 2.0190615438767496, "grad_norm": 9.323802947998047, "learning_rate": 5.334813463869584e-06, "loss": 0.4133028, "memory(GiB)": 34.88, "step": 74570, "train_speed(iter/s)": 0.411638 }, { "acc": 0.91362267, "epoch": 2.0191969241599654, "grad_norm": 10.536746978759766, "learning_rate": 5.334255150563269e-06, "loss": 0.48616672, "memory(GiB)": 34.88, "step": 74575, "train_speed(iter/s)": 0.411639 }, { "acc": 0.94439106, "epoch": 2.019332304443181, "grad_norm": 2.4207046031951904, "learning_rate": 5.333696833076017e-06, "loss": 0.28253493, "memory(GiB)": 34.88, "step": 74580, "train_speed(iter/s)": 0.411641 }, { "acc": 0.93334246, "epoch": 2.0194676847263966, "grad_norm": 2.566114902496338, "learning_rate": 5.333138511414819e-06, "loss": 0.35766153, "memory(GiB)": 34.88, "step": 74585, "train_speed(iter/s)": 0.411642 }, { "acc": 0.93144684, "epoch": 2.019603065009612, "grad_norm": 4.750153064727783, "learning_rate": 5.332580185586671e-06, "loss": 0.35469036, "memory(GiB)": 34.88, "step": 74590, "train_speed(iter/s)": 0.411644 }, { "acc": 0.90653524, "epoch": 2.0197384452928278, "grad_norm": 9.265291213989258, "learning_rate": 5.3320218555985644e-06, "loss": 0.54985819, "memory(GiB)": 34.88, "step": 74595, "train_speed(iter/s)": 0.411646 }, { "acc": 0.89806118, "epoch": 2.019873825576043, "grad_norm": 9.828126907348633, "learning_rate": 5.3314635214574976e-06, "loss": 0.51010847, "memory(GiB)": 34.88, "step": 74600, "train_speed(iter/s)": 0.411647 }, { "acc": 0.89997587, "epoch": 2.0200092058592585, "grad_norm": 11.628131866455078, "learning_rate": 5.33090518317046e-06, "loss": 0.59764605, "memory(GiB)": 34.88, "step": 74605, "train_speed(iter/s)": 0.411649 }, { "acc": 0.90765991, "epoch": 2.0201445861424743, "grad_norm": 5.431634426116943, "learning_rate": 5.330346840744449e-06, "loss": 0.48329549, "memory(GiB)": 34.88, "step": 74610, "train_speed(iter/s)": 0.41165 }, { "acc": 0.93374901, "epoch": 2.0202799664256896, "grad_norm": 7.902227401733398, "learning_rate": 5.32978849418646e-06, "loss": 0.3417429, "memory(GiB)": 34.88, "step": 74615, "train_speed(iter/s)": 0.411652 }, { "acc": 0.91222286, "epoch": 2.0204153467089054, "grad_norm": 12.10787582397461, "learning_rate": 5.329230143503482e-06, "loss": 0.51263413, "memory(GiB)": 34.88, "step": 74620, "train_speed(iter/s)": 0.411653 }, { "acc": 0.91419973, "epoch": 2.020550726992121, "grad_norm": 6.655828952789307, "learning_rate": 5.328671788702516e-06, "loss": 0.52685719, "memory(GiB)": 34.88, "step": 74625, "train_speed(iter/s)": 0.411654 }, { "acc": 0.92536936, "epoch": 2.0206861072753366, "grad_norm": 10.11334228515625, "learning_rate": 5.328113429790552e-06, "loss": 0.38506839, "memory(GiB)": 34.88, "step": 74630, "train_speed(iter/s)": 0.411656 }, { "acc": 0.9030241, "epoch": 2.020821487558552, "grad_norm": 8.062275886535645, "learning_rate": 5.327555066774587e-06, "loss": 0.47145891, "memory(GiB)": 34.88, "step": 74635, "train_speed(iter/s)": 0.411657 }, { "acc": 0.91551914, "epoch": 2.0209568678417673, "grad_norm": 3.4567253589630127, "learning_rate": 5.326996699661614e-06, "loss": 0.3961767, "memory(GiB)": 34.88, "step": 74640, "train_speed(iter/s)": 0.411659 }, { "acc": 0.92404823, "epoch": 2.021092248124983, "grad_norm": 12.868234634399414, "learning_rate": 5.326438328458628e-06, "loss": 0.43851748, "memory(GiB)": 34.88, "step": 74645, "train_speed(iter/s)": 0.41166 }, { "acc": 0.90152273, "epoch": 2.0212276284081985, "grad_norm": 8.452164649963379, "learning_rate": 5.325879953172626e-06, "loss": 0.55608931, "memory(GiB)": 34.88, "step": 74650, "train_speed(iter/s)": 0.411662 }, { "acc": 0.90667257, "epoch": 2.0213630086914143, "grad_norm": 12.591572761535645, "learning_rate": 5.325321573810597e-06, "loss": 0.47187395, "memory(GiB)": 34.88, "step": 74655, "train_speed(iter/s)": 0.411663 }, { "acc": 0.921068, "epoch": 2.0214983889746296, "grad_norm": 6.922299861907959, "learning_rate": 5.324763190379542e-06, "loss": 0.505762, "memory(GiB)": 34.88, "step": 74660, "train_speed(iter/s)": 0.411665 }, { "acc": 0.9367981, "epoch": 2.0216337692578454, "grad_norm": 5.9757280349731445, "learning_rate": 5.324204802886453e-06, "loss": 0.37634001, "memory(GiB)": 34.88, "step": 74665, "train_speed(iter/s)": 0.411666 }, { "acc": 0.92119808, "epoch": 2.021769149541061, "grad_norm": 9.747469902038574, "learning_rate": 5.323646411338325e-06, "loss": 0.3871841, "memory(GiB)": 34.88, "step": 74670, "train_speed(iter/s)": 0.411667 }, { "acc": 0.91607265, "epoch": 2.0219045298242766, "grad_norm": 8.071521759033203, "learning_rate": 5.323088015742153e-06, "loss": 0.47060452, "memory(GiB)": 34.88, "step": 74675, "train_speed(iter/s)": 0.411669 }, { "acc": 0.91725416, "epoch": 2.022039910107492, "grad_norm": 6.066309452056885, "learning_rate": 5.322529616104933e-06, "loss": 0.46365294, "memory(GiB)": 34.88, "step": 74680, "train_speed(iter/s)": 0.41167 }, { "acc": 0.92816105, "epoch": 2.0221752903907073, "grad_norm": 11.726118087768555, "learning_rate": 5.32197121243366e-06, "loss": 0.35787973, "memory(GiB)": 34.88, "step": 74685, "train_speed(iter/s)": 0.411672 }, { "acc": 0.9251543, "epoch": 2.022310670673923, "grad_norm": 3.3799595832824707, "learning_rate": 5.321412804735326e-06, "loss": 0.36559434, "memory(GiB)": 34.88, "step": 74690, "train_speed(iter/s)": 0.411673 }, { "acc": 0.89730635, "epoch": 2.0224460509571385, "grad_norm": 8.693296432495117, "learning_rate": 5.320854393016931e-06, "loss": 0.5813426, "memory(GiB)": 34.88, "step": 74695, "train_speed(iter/s)": 0.411675 }, { "acc": 0.89512196, "epoch": 2.0225814312403543, "grad_norm": 13.158843994140625, "learning_rate": 5.320295977285466e-06, "loss": 0.64826078, "memory(GiB)": 34.88, "step": 74700, "train_speed(iter/s)": 0.411676 }, { "acc": 0.91784086, "epoch": 2.0227168115235696, "grad_norm": 5.608232498168945, "learning_rate": 5.319737557547929e-06, "loss": 0.38825257, "memory(GiB)": 34.88, "step": 74705, "train_speed(iter/s)": 0.411678 }, { "acc": 0.89525852, "epoch": 2.0228521918067854, "grad_norm": 13.433633804321289, "learning_rate": 5.319179133811313e-06, "loss": 0.54810677, "memory(GiB)": 34.88, "step": 74710, "train_speed(iter/s)": 0.411679 }, { "acc": 0.91501198, "epoch": 2.022987572090001, "grad_norm": 14.35410213470459, "learning_rate": 5.318620706082614e-06, "loss": 0.54612226, "memory(GiB)": 34.88, "step": 74715, "train_speed(iter/s)": 0.411681 }, { "acc": 0.9115346, "epoch": 2.023122952373216, "grad_norm": 7.574885368347168, "learning_rate": 5.3180622743688304e-06, "loss": 0.49878569, "memory(GiB)": 34.88, "step": 74720, "train_speed(iter/s)": 0.411682 }, { "acc": 0.90752497, "epoch": 2.023258332656432, "grad_norm": 21.411569595336914, "learning_rate": 5.317503838676955e-06, "loss": 0.49646177, "memory(GiB)": 34.88, "step": 74725, "train_speed(iter/s)": 0.411683 }, { "acc": 0.90502949, "epoch": 2.0233937129396473, "grad_norm": 10.335053443908691, "learning_rate": 5.316945399013984e-06, "loss": 0.54393177, "memory(GiB)": 34.88, "step": 74730, "train_speed(iter/s)": 0.411684 }, { "acc": 0.91587896, "epoch": 2.023529093222863, "grad_norm": 6.2595906257629395, "learning_rate": 5.316386955386911e-06, "loss": 0.43475475, "memory(GiB)": 34.88, "step": 74735, "train_speed(iter/s)": 0.411685 }, { "acc": 0.90970669, "epoch": 2.0236644735060785, "grad_norm": 15.980595588684082, "learning_rate": 5.3158285078027324e-06, "loss": 0.50260363, "memory(GiB)": 34.88, "step": 74740, "train_speed(iter/s)": 0.411687 }, { "acc": 0.94224606, "epoch": 2.0237998537892943, "grad_norm": 10.497802734375, "learning_rate": 5.315270056268446e-06, "loss": 0.3009414, "memory(GiB)": 34.88, "step": 74745, "train_speed(iter/s)": 0.411688 }, { "acc": 0.9118742, "epoch": 2.0239352340725096, "grad_norm": 15.656888008117676, "learning_rate": 5.314711600791048e-06, "loss": 0.49133081, "memory(GiB)": 34.88, "step": 74750, "train_speed(iter/s)": 0.41169 }, { "acc": 0.9011548, "epoch": 2.0240706143557254, "grad_norm": 11.273344993591309, "learning_rate": 5.3141531413775286e-06, "loss": 0.5361908, "memory(GiB)": 34.88, "step": 74755, "train_speed(iter/s)": 0.411691 }, { "acc": 0.91139374, "epoch": 2.024205994638941, "grad_norm": 11.44426441192627, "learning_rate": 5.313594678034889e-06, "loss": 0.5301466, "memory(GiB)": 34.88, "step": 74760, "train_speed(iter/s)": 0.411692 }, { "acc": 0.91784315, "epoch": 2.024341374922156, "grad_norm": 12.49996566772461, "learning_rate": 5.313036210770125e-06, "loss": 0.47671113, "memory(GiB)": 34.88, "step": 74765, "train_speed(iter/s)": 0.411694 }, { "acc": 0.89544544, "epoch": 2.024476755205372, "grad_norm": 7.613010406494141, "learning_rate": 5.312477739590228e-06, "loss": 0.49128342, "memory(GiB)": 34.88, "step": 74770, "train_speed(iter/s)": 0.411695 }, { "acc": 0.92110634, "epoch": 2.0246121354885873, "grad_norm": 8.996892929077148, "learning_rate": 5.3119192645021985e-06, "loss": 0.45899706, "memory(GiB)": 34.88, "step": 74775, "train_speed(iter/s)": 0.411697 }, { "acc": 0.9137311, "epoch": 2.024747515771803, "grad_norm": 12.563855171203613, "learning_rate": 5.31136078551303e-06, "loss": 0.44782801, "memory(GiB)": 34.88, "step": 74780, "train_speed(iter/s)": 0.411698 }, { "acc": 0.91325645, "epoch": 2.0248828960550185, "grad_norm": 10.178606986999512, "learning_rate": 5.310802302629721e-06, "loss": 0.44508262, "memory(GiB)": 34.88, "step": 74785, "train_speed(iter/s)": 0.411699 }, { "acc": 0.92521515, "epoch": 2.0250182763382343, "grad_norm": 7.833935260772705, "learning_rate": 5.310243815859263e-06, "loss": 0.47588243, "memory(GiB)": 34.88, "step": 74790, "train_speed(iter/s)": 0.411701 }, { "acc": 0.91055155, "epoch": 2.0251536566214496, "grad_norm": 11.67895793914795, "learning_rate": 5.309685325208657e-06, "loss": 0.49223981, "memory(GiB)": 34.88, "step": 74795, "train_speed(iter/s)": 0.411702 }, { "acc": 0.92353811, "epoch": 2.025289036904665, "grad_norm": 6.931780815124512, "learning_rate": 5.3091268306848954e-06, "loss": 0.38086028, "memory(GiB)": 34.88, "step": 74800, "train_speed(iter/s)": 0.411704 }, { "acc": 0.93988352, "epoch": 2.025424417187881, "grad_norm": 8.13871955871582, "learning_rate": 5.308568332294978e-06, "loss": 0.37973399, "memory(GiB)": 34.88, "step": 74805, "train_speed(iter/s)": 0.411705 }, { "acc": 0.92403307, "epoch": 2.025559797471096, "grad_norm": 6.312144756317139, "learning_rate": 5.308009830045902e-06, "loss": 0.34212162, "memory(GiB)": 34.88, "step": 74810, "train_speed(iter/s)": 0.411707 }, { "acc": 0.90047913, "epoch": 2.025695177754312, "grad_norm": 7.863615989685059, "learning_rate": 5.307451323944655e-06, "loss": 0.572967, "memory(GiB)": 34.88, "step": 74815, "train_speed(iter/s)": 0.411708 }, { "acc": 0.91478777, "epoch": 2.0258305580375273, "grad_norm": 11.347331047058105, "learning_rate": 5.3068928139982445e-06, "loss": 0.44719, "memory(GiB)": 34.88, "step": 74820, "train_speed(iter/s)": 0.41171 }, { "acc": 0.90485077, "epoch": 2.025965938320743, "grad_norm": 8.378952980041504, "learning_rate": 5.306334300213659e-06, "loss": 0.53735628, "memory(GiB)": 34.88, "step": 74825, "train_speed(iter/s)": 0.411711 }, { "acc": 0.90283184, "epoch": 2.0261013186039585, "grad_norm": 22.08359718322754, "learning_rate": 5.305775782597898e-06, "loss": 0.58167639, "memory(GiB)": 34.88, "step": 74830, "train_speed(iter/s)": 0.411712 }, { "acc": 0.92025928, "epoch": 2.0262366988871743, "grad_norm": 7.293854713439941, "learning_rate": 5.30521726115796e-06, "loss": 0.44920511, "memory(GiB)": 34.88, "step": 74835, "train_speed(iter/s)": 0.411714 }, { "acc": 0.92296219, "epoch": 2.0263720791703896, "grad_norm": 14.324546813964844, "learning_rate": 5.3046587359008375e-06, "loss": 0.40207715, "memory(GiB)": 34.88, "step": 74840, "train_speed(iter/s)": 0.411715 }, { "acc": 0.9168642, "epoch": 2.026507459453605, "grad_norm": 13.835672378540039, "learning_rate": 5.304100206833532e-06, "loss": 0.49269171, "memory(GiB)": 34.88, "step": 74845, "train_speed(iter/s)": 0.411717 }, { "acc": 0.92361374, "epoch": 2.026642839736821, "grad_norm": 9.481945991516113, "learning_rate": 5.303541673963037e-06, "loss": 0.39421945, "memory(GiB)": 34.88, "step": 74850, "train_speed(iter/s)": 0.411718 }, { "acc": 0.91842833, "epoch": 2.026778220020036, "grad_norm": 7.656486988067627, "learning_rate": 5.302983137296347e-06, "loss": 0.4056715, "memory(GiB)": 34.88, "step": 74855, "train_speed(iter/s)": 0.41172 }, { "acc": 0.91437407, "epoch": 2.026913600303252, "grad_norm": 3.985503911972046, "learning_rate": 5.302424596840462e-06, "loss": 0.37294888, "memory(GiB)": 34.88, "step": 74860, "train_speed(iter/s)": 0.411721 }, { "acc": 0.90668755, "epoch": 2.0270489805864673, "grad_norm": 15.574234962463379, "learning_rate": 5.301866052602378e-06, "loss": 0.49522276, "memory(GiB)": 34.88, "step": 74865, "train_speed(iter/s)": 0.411722 }, { "acc": 0.90765381, "epoch": 2.027184360869683, "grad_norm": 10.44523811340332, "learning_rate": 5.3013075045890924e-06, "loss": 0.45532179, "memory(GiB)": 34.88, "step": 74870, "train_speed(iter/s)": 0.411724 }, { "acc": 0.91197929, "epoch": 2.0273197411528985, "grad_norm": 5.109789848327637, "learning_rate": 5.3007489528076015e-06, "loss": 0.4171267, "memory(GiB)": 34.88, "step": 74875, "train_speed(iter/s)": 0.411725 }, { "acc": 0.90668335, "epoch": 2.027455121436114, "grad_norm": 9.114892959594727, "learning_rate": 5.300190397264904e-06, "loss": 0.49439583, "memory(GiB)": 34.88, "step": 74880, "train_speed(iter/s)": 0.411727 }, { "acc": 0.92799053, "epoch": 2.0275905017193296, "grad_norm": 6.33397912979126, "learning_rate": 5.299631837967994e-06, "loss": 0.42234945, "memory(GiB)": 34.88, "step": 74885, "train_speed(iter/s)": 0.411728 }, { "acc": 0.91479826, "epoch": 2.027725882002545, "grad_norm": 36.257362365722656, "learning_rate": 5.299073274923871e-06, "loss": 0.45865388, "memory(GiB)": 34.88, "step": 74890, "train_speed(iter/s)": 0.411729 }, { "acc": 0.93854847, "epoch": 2.027861262285761, "grad_norm": 8.62825870513916, "learning_rate": 5.29851470813953e-06, "loss": 0.30801909, "memory(GiB)": 34.88, "step": 74895, "train_speed(iter/s)": 0.411731 }, { "acc": 0.92114992, "epoch": 2.027996642568976, "grad_norm": 6.068847179412842, "learning_rate": 5.297956137621968e-06, "loss": 0.40361557, "memory(GiB)": 34.88, "step": 74900, "train_speed(iter/s)": 0.411732 }, { "acc": 0.93207226, "epoch": 2.028132022852192, "grad_norm": 7.450922012329102, "learning_rate": 5.297397563378186e-06, "loss": 0.29045622, "memory(GiB)": 34.88, "step": 74905, "train_speed(iter/s)": 0.411733 }, { "acc": 0.92413235, "epoch": 2.0282674031354073, "grad_norm": 16.988325119018555, "learning_rate": 5.296838985415177e-06, "loss": 0.4474905, "memory(GiB)": 34.88, "step": 74910, "train_speed(iter/s)": 0.411735 }, { "acc": 0.90737333, "epoch": 2.028402783418623, "grad_norm": 5.961079120635986, "learning_rate": 5.296280403739941e-06, "loss": 0.51056643, "memory(GiB)": 34.88, "step": 74915, "train_speed(iter/s)": 0.411736 }, { "acc": 0.88864975, "epoch": 2.0285381637018385, "grad_norm": 16.552875518798828, "learning_rate": 5.2957218183594724e-06, "loss": 0.63489447, "memory(GiB)": 34.88, "step": 74920, "train_speed(iter/s)": 0.411738 }, { "acc": 0.91245461, "epoch": 2.028673543985054, "grad_norm": 8.691701889038086, "learning_rate": 5.295163229280772e-06, "loss": 0.48598342, "memory(GiB)": 34.88, "step": 74925, "train_speed(iter/s)": 0.411739 }, { "acc": 0.89811945, "epoch": 2.0288089242682696, "grad_norm": 10.541682243347168, "learning_rate": 5.294604636510835e-06, "loss": 0.53700166, "memory(GiB)": 34.88, "step": 74930, "train_speed(iter/s)": 0.41174 }, { "acc": 0.93094969, "epoch": 2.028944304551485, "grad_norm": 5.558298587799072, "learning_rate": 5.294046040056662e-06, "loss": 0.37521143, "memory(GiB)": 34.88, "step": 74935, "train_speed(iter/s)": 0.411742 }, { "acc": 0.89451656, "epoch": 2.029079684834701, "grad_norm": 16.90987205505371, "learning_rate": 5.293487439925245e-06, "loss": 0.59305458, "memory(GiB)": 34.88, "step": 74940, "train_speed(iter/s)": 0.411743 }, { "acc": 0.92888184, "epoch": 2.029215065117916, "grad_norm": 5.530575275421143, "learning_rate": 5.292928836123584e-06, "loss": 0.41561646, "memory(GiB)": 34.88, "step": 74945, "train_speed(iter/s)": 0.411745 }, { "acc": 0.92181263, "epoch": 2.029350445401132, "grad_norm": 4.293664455413818, "learning_rate": 5.292370228658679e-06, "loss": 0.42590189, "memory(GiB)": 34.88, "step": 74950, "train_speed(iter/s)": 0.411746 }, { "acc": 0.89083033, "epoch": 2.0294858256843473, "grad_norm": 5.438327789306641, "learning_rate": 5.291811617537525e-06, "loss": 0.55782456, "memory(GiB)": 34.88, "step": 74955, "train_speed(iter/s)": 0.411747 }, { "acc": 0.92046146, "epoch": 2.0296212059675627, "grad_norm": 5.948268890380859, "learning_rate": 5.291253002767121e-06, "loss": 0.36617846, "memory(GiB)": 34.88, "step": 74960, "train_speed(iter/s)": 0.411749 }, { "acc": 0.92049818, "epoch": 2.0297565862507785, "grad_norm": 6.173386096954346, "learning_rate": 5.290694384354465e-06, "loss": 0.44578714, "memory(GiB)": 34.88, "step": 74965, "train_speed(iter/s)": 0.41175 }, { "acc": 0.91000805, "epoch": 2.029891966533994, "grad_norm": 11.192139625549316, "learning_rate": 5.290135762306553e-06, "loss": 0.51730494, "memory(GiB)": 34.88, "step": 74970, "train_speed(iter/s)": 0.411752 }, { "acc": 0.92962999, "epoch": 2.0300273468172096, "grad_norm": 5.081089019775391, "learning_rate": 5.289577136630385e-06, "loss": 0.42757959, "memory(GiB)": 34.88, "step": 74975, "train_speed(iter/s)": 0.411753 }, { "acc": 0.92474241, "epoch": 2.030162727100425, "grad_norm": 7.150134086608887, "learning_rate": 5.289018507332957e-06, "loss": 0.35801091, "memory(GiB)": 34.88, "step": 74980, "train_speed(iter/s)": 0.411755 }, { "acc": 0.9276927, "epoch": 2.030298107383641, "grad_norm": 13.89746379852295, "learning_rate": 5.288459874421268e-06, "loss": 0.32414873, "memory(GiB)": 34.88, "step": 74985, "train_speed(iter/s)": 0.411756 }, { "acc": 0.93129406, "epoch": 2.030433487666856, "grad_norm": 6.094651222229004, "learning_rate": 5.2879012379023145e-06, "loss": 0.37086353, "memory(GiB)": 34.88, "step": 74990, "train_speed(iter/s)": 0.411758 }, { "acc": 0.9201766, "epoch": 2.030568867950072, "grad_norm": 3.8155813217163086, "learning_rate": 5.287342597783097e-06, "loss": 0.44997358, "memory(GiB)": 34.88, "step": 74995, "train_speed(iter/s)": 0.411759 }, { "acc": 0.90479794, "epoch": 2.0307042482332873, "grad_norm": 6.636913776397705, "learning_rate": 5.286783954070612e-06, "loss": 0.51678991, "memory(GiB)": 34.88, "step": 75000, "train_speed(iter/s)": 0.411761 }, { "acc": 0.91927252, "epoch": 2.0308396285165027, "grad_norm": 7.059623718261719, "learning_rate": 5.286225306771859e-06, "loss": 0.49892721, "memory(GiB)": 34.88, "step": 75005, "train_speed(iter/s)": 0.411762 }, { "acc": 0.91356659, "epoch": 2.0309750087997185, "grad_norm": 14.820732116699219, "learning_rate": 5.285666655893834e-06, "loss": 0.45758114, "memory(GiB)": 34.88, "step": 75010, "train_speed(iter/s)": 0.411763 }, { "acc": 0.92350445, "epoch": 2.031110389082934, "grad_norm": 14.201761245727539, "learning_rate": 5.285108001443536e-06, "loss": 0.41084743, "memory(GiB)": 34.88, "step": 75015, "train_speed(iter/s)": 0.411765 }, { "acc": 0.92255783, "epoch": 2.0312457693661496, "grad_norm": 6.167854309082031, "learning_rate": 5.284549343427966e-06, "loss": 0.4414432, "memory(GiB)": 34.88, "step": 75020, "train_speed(iter/s)": 0.411766 }, { "acc": 0.90794849, "epoch": 2.031381149649365, "grad_norm": 11.997061729431152, "learning_rate": 5.283990681854117e-06, "loss": 0.49574375, "memory(GiB)": 34.88, "step": 75025, "train_speed(iter/s)": 0.411767 }, { "acc": 0.92074957, "epoch": 2.031516529932581, "grad_norm": 22.288551330566406, "learning_rate": 5.283432016728992e-06, "loss": 0.40199065, "memory(GiB)": 34.88, "step": 75030, "train_speed(iter/s)": 0.411769 }, { "acc": 0.92119055, "epoch": 2.031651910215796, "grad_norm": 18.189781188964844, "learning_rate": 5.282873348059586e-06, "loss": 0.42318869, "memory(GiB)": 34.88, "step": 75035, "train_speed(iter/s)": 0.41177 }, { "acc": 0.92626095, "epoch": 2.0317872904990115, "grad_norm": 6.428101539611816, "learning_rate": 5.2823146758529e-06, "loss": 0.4571147, "memory(GiB)": 34.88, "step": 75040, "train_speed(iter/s)": 0.411772 }, { "acc": 0.94030085, "epoch": 2.0319226707822273, "grad_norm": 6.530611515045166, "learning_rate": 5.28175600011593e-06, "loss": 0.27230685, "memory(GiB)": 34.88, "step": 75045, "train_speed(iter/s)": 0.411773 }, { "acc": 0.91348133, "epoch": 2.0320580510654427, "grad_norm": 7.179056167602539, "learning_rate": 5.281197320855677e-06, "loss": 0.48613758, "memory(GiB)": 34.88, "step": 75050, "train_speed(iter/s)": 0.411775 }, { "acc": 0.90494633, "epoch": 2.0321934313486585, "grad_norm": 9.981220245361328, "learning_rate": 5.280638638079139e-06, "loss": 0.57063618, "memory(GiB)": 34.88, "step": 75055, "train_speed(iter/s)": 0.411776 }, { "acc": 0.91352882, "epoch": 2.032328811631874, "grad_norm": 8.13534927368164, "learning_rate": 5.280079951793312e-06, "loss": 0.43018146, "memory(GiB)": 34.88, "step": 75060, "train_speed(iter/s)": 0.411778 }, { "acc": 0.89381237, "epoch": 2.0324641919150896, "grad_norm": 6.977474689483643, "learning_rate": 5.279521262005199e-06, "loss": 0.64326715, "memory(GiB)": 34.88, "step": 75065, "train_speed(iter/s)": 0.411779 }, { "acc": 0.90853786, "epoch": 2.032599572198305, "grad_norm": 5.981897354125977, "learning_rate": 5.278962568721796e-06, "loss": 0.54453659, "memory(GiB)": 34.88, "step": 75070, "train_speed(iter/s)": 0.411781 }, { "acc": 0.90385151, "epoch": 2.032734952481521, "grad_norm": 7.775233268737793, "learning_rate": 5.278403871950102e-06, "loss": 0.56438513, "memory(GiB)": 34.88, "step": 75075, "train_speed(iter/s)": 0.411782 }, { "acc": 0.91518078, "epoch": 2.032870332764736, "grad_norm": 6.056304454803467, "learning_rate": 5.277845171697114e-06, "loss": 0.44128542, "memory(GiB)": 34.88, "step": 75080, "train_speed(iter/s)": 0.411783 }, { "acc": 0.92433281, "epoch": 2.0330057130479515, "grad_norm": 6.07438850402832, "learning_rate": 5.277286467969836e-06, "loss": 0.40738692, "memory(GiB)": 34.88, "step": 75085, "train_speed(iter/s)": 0.411785 }, { "acc": 0.92010183, "epoch": 2.0331410933311673, "grad_norm": 6.977869033813477, "learning_rate": 5.276727760775261e-06, "loss": 0.45086489, "memory(GiB)": 34.88, "step": 75090, "train_speed(iter/s)": 0.411786 }, { "acc": 0.90431833, "epoch": 2.0332764736143827, "grad_norm": 8.188567161560059, "learning_rate": 5.276169050120391e-06, "loss": 0.55503702, "memory(GiB)": 34.88, "step": 75095, "train_speed(iter/s)": 0.411787 }, { "acc": 0.91801529, "epoch": 2.0334118538975985, "grad_norm": 14.444899559020996, "learning_rate": 5.275610336012227e-06, "loss": 0.49890785, "memory(GiB)": 34.88, "step": 75100, "train_speed(iter/s)": 0.411789 }, { "acc": 0.93736, "epoch": 2.033547234180814, "grad_norm": 4.813068389892578, "learning_rate": 5.275051618457763e-06, "loss": 0.33317842, "memory(GiB)": 34.88, "step": 75105, "train_speed(iter/s)": 0.41179 }, { "acc": 0.89394951, "epoch": 2.0336826144640296, "grad_norm": 6.797693729400635, "learning_rate": 5.2744928974640004e-06, "loss": 0.68163414, "memory(GiB)": 34.88, "step": 75110, "train_speed(iter/s)": 0.411791 }, { "acc": 0.93255749, "epoch": 2.033817994747245, "grad_norm": 5.555469989776611, "learning_rate": 5.273934173037939e-06, "loss": 0.36491766, "memory(GiB)": 34.88, "step": 75115, "train_speed(iter/s)": 0.411792 }, { "acc": 0.92729406, "epoch": 2.0339533750304604, "grad_norm": 6.4108500480651855, "learning_rate": 5.273375445186577e-06, "loss": 0.33303638, "memory(GiB)": 34.88, "step": 75120, "train_speed(iter/s)": 0.411794 }, { "acc": 0.91105366, "epoch": 2.034088755313676, "grad_norm": 18.822601318359375, "learning_rate": 5.272816713916913e-06, "loss": 0.51086855, "memory(GiB)": 34.88, "step": 75125, "train_speed(iter/s)": 0.411795 }, { "acc": 0.91281528, "epoch": 2.0342241355968915, "grad_norm": 11.80740737915039, "learning_rate": 5.2722579792359474e-06, "loss": 0.44133759, "memory(GiB)": 34.88, "step": 75130, "train_speed(iter/s)": 0.411796 }, { "acc": 0.91673555, "epoch": 2.0343595158801073, "grad_norm": 5.925273418426514, "learning_rate": 5.27169924115068e-06, "loss": 0.55180645, "memory(GiB)": 34.88, "step": 75135, "train_speed(iter/s)": 0.411798 }, { "acc": 0.92765923, "epoch": 2.0344948961633227, "grad_norm": 5.701741695404053, "learning_rate": 5.271140499668109e-06, "loss": 0.35583212, "memory(GiB)": 34.88, "step": 75140, "train_speed(iter/s)": 0.411799 }, { "acc": 0.91456032, "epoch": 2.0346302764465385, "grad_norm": 12.03982162475586, "learning_rate": 5.270581754795236e-06, "loss": 0.50559173, "memory(GiB)": 34.88, "step": 75145, "train_speed(iter/s)": 0.411801 }, { "acc": 0.90615215, "epoch": 2.034765656729754, "grad_norm": 14.377107620239258, "learning_rate": 5.270023006539055e-06, "loss": 0.58107548, "memory(GiB)": 34.88, "step": 75150, "train_speed(iter/s)": 0.411802 }, { "acc": 0.90830269, "epoch": 2.0349010370129696, "grad_norm": 4.1522369384765625, "learning_rate": 5.269464254906571e-06, "loss": 0.47232113, "memory(GiB)": 34.88, "step": 75155, "train_speed(iter/s)": 0.411803 }, { "acc": 0.93062401, "epoch": 2.035036417296185, "grad_norm": 6.125380039215088, "learning_rate": 5.26890549990478e-06, "loss": 0.33315299, "memory(GiB)": 34.88, "step": 75160, "train_speed(iter/s)": 0.411804 }, { "acc": 0.93428936, "epoch": 2.0351717975794004, "grad_norm": 14.307072639465332, "learning_rate": 5.2683467415406815e-06, "loss": 0.37490008, "memory(GiB)": 34.88, "step": 75165, "train_speed(iter/s)": 0.411806 }, { "acc": 0.93602085, "epoch": 2.035307177862616, "grad_norm": 8.262960433959961, "learning_rate": 5.267787979821278e-06, "loss": 0.35280471, "memory(GiB)": 34.88, "step": 75170, "train_speed(iter/s)": 0.411807 }, { "acc": 0.92962542, "epoch": 2.0354425581458315, "grad_norm": 3.3752362728118896, "learning_rate": 5.267229214753567e-06, "loss": 0.35063837, "memory(GiB)": 34.88, "step": 75175, "train_speed(iter/s)": 0.411808 }, { "acc": 0.9224823, "epoch": 2.0355779384290473, "grad_norm": 5.340884208679199, "learning_rate": 5.26667044634455e-06, "loss": 0.40081735, "memory(GiB)": 34.88, "step": 75180, "train_speed(iter/s)": 0.411809 }, { "acc": 0.90540028, "epoch": 2.0357133187122627, "grad_norm": 16.569124221801758, "learning_rate": 5.266111674601222e-06, "loss": 0.50759296, "memory(GiB)": 34.88, "step": 75185, "train_speed(iter/s)": 0.41181 }, { "acc": 0.91573582, "epoch": 2.0358486989954785, "grad_norm": 9.455479621887207, "learning_rate": 5.265552899530589e-06, "loss": 0.44327502, "memory(GiB)": 34.88, "step": 75190, "train_speed(iter/s)": 0.411811 }, { "acc": 0.91739683, "epoch": 2.035984079278694, "grad_norm": 7.358386516571045, "learning_rate": 5.264994121139645e-06, "loss": 0.44679031, "memory(GiB)": 34.88, "step": 75195, "train_speed(iter/s)": 0.411812 }, { "acc": 0.91922894, "epoch": 2.036119459561909, "grad_norm": 9.401822090148926, "learning_rate": 5.264435339435392e-06, "loss": 0.37491493, "memory(GiB)": 34.88, "step": 75200, "train_speed(iter/s)": 0.411813 }, { "acc": 0.92781076, "epoch": 2.036254839845125, "grad_norm": 12.237220764160156, "learning_rate": 5.263876554424833e-06, "loss": 0.40558805, "memory(GiB)": 34.88, "step": 75205, "train_speed(iter/s)": 0.411815 }, { "acc": 0.91855593, "epoch": 2.0363902201283404, "grad_norm": 4.725921630859375, "learning_rate": 5.263317766114962e-06, "loss": 0.40389428, "memory(GiB)": 34.88, "step": 75210, "train_speed(iter/s)": 0.411816 }, { "acc": 0.928613, "epoch": 2.036525600411556, "grad_norm": 6.309226989746094, "learning_rate": 5.262758974512785e-06, "loss": 0.42208595, "memory(GiB)": 34.88, "step": 75215, "train_speed(iter/s)": 0.411818 }, { "acc": 0.92574177, "epoch": 2.0366609806947715, "grad_norm": 8.388181686401367, "learning_rate": 5.262200179625298e-06, "loss": 0.42432594, "memory(GiB)": 34.88, "step": 75220, "train_speed(iter/s)": 0.411819 }, { "acc": 0.9299077, "epoch": 2.0367963609779873, "grad_norm": 8.731771469116211, "learning_rate": 5.261641381459503e-06, "loss": 0.39241126, "memory(GiB)": 34.88, "step": 75225, "train_speed(iter/s)": 0.41182 }, { "acc": 0.92449284, "epoch": 2.0369317412612027, "grad_norm": 8.857755661010742, "learning_rate": 5.261082580022397e-06, "loss": 0.44433026, "memory(GiB)": 34.88, "step": 75230, "train_speed(iter/s)": 0.411821 }, { "acc": 0.91635571, "epoch": 2.0370671215444185, "grad_norm": 24.147615432739258, "learning_rate": 5.260523775320983e-06, "loss": 0.47711773, "memory(GiB)": 34.88, "step": 75235, "train_speed(iter/s)": 0.411822 }, { "acc": 0.92614803, "epoch": 2.037202501827634, "grad_norm": 18.55624771118164, "learning_rate": 5.25996496736226e-06, "loss": 0.4406292, "memory(GiB)": 34.88, "step": 75240, "train_speed(iter/s)": 0.411823 }, { "acc": 0.91566944, "epoch": 2.037337882110849, "grad_norm": 5.484357833862305, "learning_rate": 5.259406156153228e-06, "loss": 0.40655079, "memory(GiB)": 34.88, "step": 75245, "train_speed(iter/s)": 0.411825 }, { "acc": 0.91626129, "epoch": 2.037473262394065, "grad_norm": 11.20675277709961, "learning_rate": 5.258847341700889e-06, "loss": 0.41626849, "memory(GiB)": 34.88, "step": 75250, "train_speed(iter/s)": 0.411827 }, { "acc": 0.91333027, "epoch": 2.0376086426772804, "grad_norm": 7.4561591148376465, "learning_rate": 5.258288524012242e-06, "loss": 0.47715359, "memory(GiB)": 34.88, "step": 75255, "train_speed(iter/s)": 0.411828 }, { "acc": 0.92318945, "epoch": 2.037744022960496, "grad_norm": 4.695702075958252, "learning_rate": 5.257729703094285e-06, "loss": 0.38387561, "memory(GiB)": 34.88, "step": 75260, "train_speed(iter/s)": 0.411829 }, { "acc": 0.93906002, "epoch": 2.0378794032437115, "grad_norm": 4.069492816925049, "learning_rate": 5.257170878954022e-06, "loss": 0.33690474, "memory(GiB)": 34.88, "step": 75265, "train_speed(iter/s)": 0.41183 }, { "acc": 0.92276192, "epoch": 2.0380147835269273, "grad_norm": 44.58914566040039, "learning_rate": 5.256612051598453e-06, "loss": 0.39914689, "memory(GiB)": 34.88, "step": 75270, "train_speed(iter/s)": 0.411832 }, { "acc": 0.90663414, "epoch": 2.0381501638101427, "grad_norm": 4.825751781463623, "learning_rate": 5.256053221034576e-06, "loss": 0.56131058, "memory(GiB)": 34.88, "step": 75275, "train_speed(iter/s)": 0.411833 }, { "acc": 0.92353382, "epoch": 2.038285544093358, "grad_norm": 12.746695518493652, "learning_rate": 5.255494387269392e-06, "loss": 0.45586166, "memory(GiB)": 34.88, "step": 75280, "train_speed(iter/s)": 0.411834 }, { "acc": 0.90858994, "epoch": 2.038420924376574, "grad_norm": 12.706036567687988, "learning_rate": 5.254935550309904e-06, "loss": 0.60447464, "memory(GiB)": 34.88, "step": 75285, "train_speed(iter/s)": 0.411835 }, { "acc": 0.92175684, "epoch": 2.038556304659789, "grad_norm": 12.311208724975586, "learning_rate": 5.2543767101631085e-06, "loss": 0.42359533, "memory(GiB)": 34.88, "step": 75290, "train_speed(iter/s)": 0.411837 }, { "acc": 0.92182121, "epoch": 2.038691684943005, "grad_norm": 8.252730369567871, "learning_rate": 5.253817866836012e-06, "loss": 0.42956429, "memory(GiB)": 34.88, "step": 75295, "train_speed(iter/s)": 0.411837 }, { "acc": 0.9238327, "epoch": 2.0388270652262204, "grad_norm": 5.350368976593018, "learning_rate": 5.253259020335607e-06, "loss": 0.48011703, "memory(GiB)": 34.88, "step": 75300, "train_speed(iter/s)": 0.411839 }, { "acc": 0.92045965, "epoch": 2.038962445509436, "grad_norm": 13.307766914367676, "learning_rate": 5.252700170668902e-06, "loss": 0.42603807, "memory(GiB)": 34.88, "step": 75305, "train_speed(iter/s)": 0.41184 }, { "acc": 0.91076689, "epoch": 2.0390978257926515, "grad_norm": 18.25785255432129, "learning_rate": 5.2521413178428926e-06, "loss": 0.45127339, "memory(GiB)": 34.88, "step": 75310, "train_speed(iter/s)": 0.411841 }, { "acc": 0.90809317, "epoch": 2.039233206075867, "grad_norm": 7.980143070220947, "learning_rate": 5.251582461864581e-06, "loss": 0.53622169, "memory(GiB)": 34.88, "step": 75315, "train_speed(iter/s)": 0.411842 }, { "acc": 0.90962086, "epoch": 2.0393685863590827, "grad_norm": 10.60815715789795, "learning_rate": 5.25102360274097e-06, "loss": 0.44213047, "memory(GiB)": 34.88, "step": 75320, "train_speed(iter/s)": 0.411843 }, { "acc": 0.91282663, "epoch": 2.039503966642298, "grad_norm": 7.927907943725586, "learning_rate": 5.250464740479057e-06, "loss": 0.44568267, "memory(GiB)": 34.88, "step": 75325, "train_speed(iter/s)": 0.411844 }, { "acc": 0.91370163, "epoch": 2.039639346925514, "grad_norm": 13.142635345458984, "learning_rate": 5.249905875085846e-06, "loss": 0.5418191, "memory(GiB)": 34.88, "step": 75330, "train_speed(iter/s)": 0.411846 }, { "acc": 0.90268211, "epoch": 2.039774727208729, "grad_norm": 6.234899044036865, "learning_rate": 5.249347006568334e-06, "loss": 0.53786077, "memory(GiB)": 34.88, "step": 75335, "train_speed(iter/s)": 0.411847 }, { "acc": 0.92373981, "epoch": 2.039910107491945, "grad_norm": 14.102080345153809, "learning_rate": 5.248788134933526e-06, "loss": 0.41668043, "memory(GiB)": 34.88, "step": 75340, "train_speed(iter/s)": 0.411849 }, { "acc": 0.93177195, "epoch": 2.0400454877751604, "grad_norm": 12.726903915405273, "learning_rate": 5.24822926018842e-06, "loss": 0.44656854, "memory(GiB)": 34.88, "step": 75345, "train_speed(iter/s)": 0.41185 }, { "acc": 0.9140604, "epoch": 2.040180868058376, "grad_norm": 15.405840873718262, "learning_rate": 5.247670382340019e-06, "loss": 0.40289049, "memory(GiB)": 34.88, "step": 75350, "train_speed(iter/s)": 0.411852 }, { "acc": 0.93078289, "epoch": 2.0403162483415915, "grad_norm": 6.7238006591796875, "learning_rate": 5.247111501395324e-06, "loss": 0.33240137, "memory(GiB)": 34.88, "step": 75355, "train_speed(iter/s)": 0.411853 }, { "acc": 0.92809238, "epoch": 2.040451628624807, "grad_norm": 10.356046676635742, "learning_rate": 5.2465526173613335e-06, "loss": 0.44538908, "memory(GiB)": 34.88, "step": 75360, "train_speed(iter/s)": 0.411854 }, { "acc": 0.91768103, "epoch": 2.0405870089080227, "grad_norm": 12.057201385498047, "learning_rate": 5.245993730245053e-06, "loss": 0.49713278, "memory(GiB)": 34.88, "step": 75365, "train_speed(iter/s)": 0.411856 }, { "acc": 0.92538357, "epoch": 2.040722389191238, "grad_norm": 7.327394962310791, "learning_rate": 5.245434840053479e-06, "loss": 0.41781464, "memory(GiB)": 34.88, "step": 75370, "train_speed(iter/s)": 0.411857 }, { "acc": 0.89780502, "epoch": 2.040857769474454, "grad_norm": 8.636056900024414, "learning_rate": 5.244875946793616e-06, "loss": 0.60867157, "memory(GiB)": 34.88, "step": 75375, "train_speed(iter/s)": 0.411859 }, { "acc": 0.91532631, "epoch": 2.040993149757669, "grad_norm": 6.4411749839782715, "learning_rate": 5.244317050472463e-06, "loss": 0.47614288, "memory(GiB)": 34.88, "step": 75380, "train_speed(iter/s)": 0.41186 }, { "acc": 0.90639915, "epoch": 2.041128530040885, "grad_norm": 6.451934814453125, "learning_rate": 5.243758151097023e-06, "loss": 0.5311595, "memory(GiB)": 34.88, "step": 75385, "train_speed(iter/s)": 0.411862 }, { "acc": 0.91817131, "epoch": 2.0412639103241004, "grad_norm": 8.983531951904297, "learning_rate": 5.243199248674296e-06, "loss": 0.49822922, "memory(GiB)": 34.88, "step": 75390, "train_speed(iter/s)": 0.411863 }, { "acc": 0.92297993, "epoch": 2.041399290607316, "grad_norm": 4.117244243621826, "learning_rate": 5.242640343211285e-06, "loss": 0.43095264, "memory(GiB)": 34.88, "step": 75395, "train_speed(iter/s)": 0.411864 }, { "acc": 0.91762123, "epoch": 2.0415346708905315, "grad_norm": 7.599877834320068, "learning_rate": 5.242081434714991e-06, "loss": 0.41487532, "memory(GiB)": 34.88, "step": 75400, "train_speed(iter/s)": 0.411865 }, { "acc": 0.91707268, "epoch": 2.041670051173747, "grad_norm": 6.7499165534973145, "learning_rate": 5.241522523192413e-06, "loss": 0.57218223, "memory(GiB)": 34.88, "step": 75405, "train_speed(iter/s)": 0.411867 }, { "acc": 0.92222176, "epoch": 2.0418054314569627, "grad_norm": 13.159893035888672, "learning_rate": 5.240963608650555e-06, "loss": 0.41784492, "memory(GiB)": 34.88, "step": 75410, "train_speed(iter/s)": 0.411868 }, { "acc": 0.90890865, "epoch": 2.041940811740178, "grad_norm": 8.202885627746582, "learning_rate": 5.240404691096418e-06, "loss": 0.53824077, "memory(GiB)": 34.88, "step": 75415, "train_speed(iter/s)": 0.411869 }, { "acc": 0.91690474, "epoch": 2.042076192023394, "grad_norm": 8.575675964355469, "learning_rate": 5.239845770537003e-06, "loss": 0.379989, "memory(GiB)": 34.88, "step": 75420, "train_speed(iter/s)": 0.41187 }, { "acc": 0.91526623, "epoch": 2.042211572306609, "grad_norm": 14.930811882019043, "learning_rate": 5.239286846979312e-06, "loss": 0.48330784, "memory(GiB)": 34.88, "step": 75425, "train_speed(iter/s)": 0.411872 }, { "acc": 0.92110672, "epoch": 2.042346952589825, "grad_norm": 6.548800468444824, "learning_rate": 5.238727920430345e-06, "loss": 0.4730474, "memory(GiB)": 34.88, "step": 75430, "train_speed(iter/s)": 0.411873 }, { "acc": 0.92389469, "epoch": 2.0424823328730404, "grad_norm": 5.57316255569458, "learning_rate": 5.238168990897109e-06, "loss": 0.44964924, "memory(GiB)": 34.88, "step": 75435, "train_speed(iter/s)": 0.411875 }, { "acc": 0.92612267, "epoch": 2.0426177131562557, "grad_norm": 3.7555885314941406, "learning_rate": 5.2376100583866e-06, "loss": 0.39591579, "memory(GiB)": 34.88, "step": 75440, "train_speed(iter/s)": 0.411876 }, { "acc": 0.92961693, "epoch": 2.0427530934394715, "grad_norm": 14.589354515075684, "learning_rate": 5.237051122905821e-06, "loss": 0.33735142, "memory(GiB)": 34.88, "step": 75445, "train_speed(iter/s)": 0.411877 }, { "acc": 0.92051029, "epoch": 2.042888473722687, "grad_norm": 7.9356303215026855, "learning_rate": 5.236492184461775e-06, "loss": 0.43627005, "memory(GiB)": 34.88, "step": 75450, "train_speed(iter/s)": 0.411878 }, { "acc": 0.90303869, "epoch": 2.0430238540059027, "grad_norm": 5.162539005279541, "learning_rate": 5.235933243061462e-06, "loss": 0.55377598, "memory(GiB)": 34.88, "step": 75455, "train_speed(iter/s)": 0.41188 }, { "acc": 0.92252941, "epoch": 2.043159234289118, "grad_norm": 10.684768676757812, "learning_rate": 5.235374298711886e-06, "loss": 0.40572309, "memory(GiB)": 34.88, "step": 75460, "train_speed(iter/s)": 0.411881 }, { "acc": 0.9147893, "epoch": 2.043294614572334, "grad_norm": 10.999652862548828, "learning_rate": 5.234815351420048e-06, "loss": 0.45405517, "memory(GiB)": 34.88, "step": 75465, "train_speed(iter/s)": 0.411882 }, { "acc": 0.90963039, "epoch": 2.043429994855549, "grad_norm": 7.827668190002441, "learning_rate": 5.23425640119295e-06, "loss": 0.48160324, "memory(GiB)": 34.88, "step": 75470, "train_speed(iter/s)": 0.411884 }, { "acc": 0.91866493, "epoch": 2.0435653751387646, "grad_norm": 23.148597717285156, "learning_rate": 5.233697448037591e-06, "loss": 0.50455956, "memory(GiB)": 34.88, "step": 75475, "train_speed(iter/s)": 0.411885 }, { "acc": 0.90717945, "epoch": 2.0437007554219804, "grad_norm": 8.579601287841797, "learning_rate": 5.233138491960981e-06, "loss": 0.50360045, "memory(GiB)": 34.88, "step": 75480, "train_speed(iter/s)": 0.411887 }, { "acc": 0.9174614, "epoch": 2.0438361357051957, "grad_norm": 5.793147563934326, "learning_rate": 5.232579532970112e-06, "loss": 0.47853727, "memory(GiB)": 34.88, "step": 75485, "train_speed(iter/s)": 0.411888 }, { "acc": 0.88939972, "epoch": 2.0439715159884115, "grad_norm": 16.716188430786133, "learning_rate": 5.232020571071993e-06, "loss": 0.68253732, "memory(GiB)": 34.88, "step": 75490, "train_speed(iter/s)": 0.41189 }, { "acc": 0.91470938, "epoch": 2.044106896271627, "grad_norm": 7.440156936645508, "learning_rate": 5.2314616062736235e-06, "loss": 0.48744249, "memory(GiB)": 34.88, "step": 75495, "train_speed(iter/s)": 0.411891 }, { "acc": 0.90354424, "epoch": 2.0442422765548427, "grad_norm": 7.810952663421631, "learning_rate": 5.230902638582006e-06, "loss": 0.50715714, "memory(GiB)": 34.88, "step": 75500, "train_speed(iter/s)": 0.411893 }, { "acc": 0.92681866, "epoch": 2.044377656838058, "grad_norm": 5.307308197021484, "learning_rate": 5.230343668004143e-06, "loss": 0.38600407, "memory(GiB)": 34.88, "step": 75505, "train_speed(iter/s)": 0.411894 }, { "acc": 0.92297249, "epoch": 2.044513037121274, "grad_norm": 9.596323013305664, "learning_rate": 5.229784694547035e-06, "loss": 0.36277499, "memory(GiB)": 34.88, "step": 75510, "train_speed(iter/s)": 0.411895 }, { "acc": 0.90367041, "epoch": 2.044648417404489, "grad_norm": 10.917649269104004, "learning_rate": 5.229225718217688e-06, "loss": 0.58985286, "memory(GiB)": 34.88, "step": 75515, "train_speed(iter/s)": 0.411897 }, { "acc": 0.91191883, "epoch": 2.0447837976877046, "grad_norm": 8.708586692810059, "learning_rate": 5.2286667390231005e-06, "loss": 0.4859539, "memory(GiB)": 34.88, "step": 75520, "train_speed(iter/s)": 0.411898 }, { "acc": 0.91269398, "epoch": 2.0449191779709204, "grad_norm": 10.021076202392578, "learning_rate": 5.228107756970277e-06, "loss": 0.50636311, "memory(GiB)": 34.88, "step": 75525, "train_speed(iter/s)": 0.4119 }, { "acc": 0.92226858, "epoch": 2.0450545582541357, "grad_norm": 6.797264575958252, "learning_rate": 5.2275487720662175e-06, "loss": 0.39487774, "memory(GiB)": 34.88, "step": 75530, "train_speed(iter/s)": 0.411901 }, { "acc": 0.90504112, "epoch": 2.0451899385373515, "grad_norm": 10.459299087524414, "learning_rate": 5.226989784317927e-06, "loss": 0.46001101, "memory(GiB)": 34.88, "step": 75535, "train_speed(iter/s)": 0.411902 }, { "acc": 0.92480602, "epoch": 2.045325318820567, "grad_norm": 9.725324630737305, "learning_rate": 5.226430793732408e-06, "loss": 0.44202518, "memory(GiB)": 34.88, "step": 75540, "train_speed(iter/s)": 0.411904 }, { "acc": 0.90891209, "epoch": 2.0454606991037827, "grad_norm": 29.242202758789062, "learning_rate": 5.2258718003166595e-06, "loss": 0.5592988, "memory(GiB)": 34.88, "step": 75545, "train_speed(iter/s)": 0.411905 }, { "acc": 0.91870899, "epoch": 2.045596079386998, "grad_norm": 12.740062713623047, "learning_rate": 5.225312804077688e-06, "loss": 0.52265472, "memory(GiB)": 34.88, "step": 75550, "train_speed(iter/s)": 0.411906 }, { "acc": 0.91614714, "epoch": 2.0457314596702134, "grad_norm": 10.414530754089355, "learning_rate": 5.224753805022494e-06, "loss": 0.4551579, "memory(GiB)": 34.88, "step": 75555, "train_speed(iter/s)": 0.411908 }, { "acc": 0.91553164, "epoch": 2.045866839953429, "grad_norm": 11.005023002624512, "learning_rate": 5.22419480315808e-06, "loss": 0.47209454, "memory(GiB)": 34.88, "step": 75560, "train_speed(iter/s)": 0.411909 }, { "acc": 0.91934509, "epoch": 2.0460022202366446, "grad_norm": 5.526026725769043, "learning_rate": 5.223635798491451e-06, "loss": 0.412219, "memory(GiB)": 34.88, "step": 75565, "train_speed(iter/s)": 0.411911 }, { "acc": 0.89209824, "epoch": 2.0461376005198604, "grad_norm": 15.129313468933105, "learning_rate": 5.2230767910296066e-06, "loss": 0.55236874, "memory(GiB)": 34.88, "step": 75570, "train_speed(iter/s)": 0.411912 }, { "acc": 0.90565252, "epoch": 2.0462729808030757, "grad_norm": 9.553194046020508, "learning_rate": 5.222517780779549e-06, "loss": 0.54003162, "memory(GiB)": 34.88, "step": 75575, "train_speed(iter/s)": 0.411913 }, { "acc": 0.9060936, "epoch": 2.0464083610862915, "grad_norm": 6.635427474975586, "learning_rate": 5.221958767748283e-06, "loss": 0.49766436, "memory(GiB)": 34.88, "step": 75580, "train_speed(iter/s)": 0.411915 }, { "acc": 0.90912418, "epoch": 2.046543741369507, "grad_norm": 7.174096584320068, "learning_rate": 5.221399751942811e-06, "loss": 0.52311678, "memory(GiB)": 34.88, "step": 75585, "train_speed(iter/s)": 0.411916 }, { "acc": 0.91373444, "epoch": 2.0466791216527227, "grad_norm": 11.666217803955078, "learning_rate": 5.220840733370136e-06, "loss": 0.49779396, "memory(GiB)": 34.88, "step": 75590, "train_speed(iter/s)": 0.411917 }, { "acc": 0.92259216, "epoch": 2.046814501935938, "grad_norm": 4.169027328491211, "learning_rate": 5.2202817120372605e-06, "loss": 0.41997423, "memory(GiB)": 34.88, "step": 75595, "train_speed(iter/s)": 0.411919 }, { "acc": 0.90216064, "epoch": 2.0469498822191534, "grad_norm": 16.003761291503906, "learning_rate": 5.219722687951187e-06, "loss": 0.56950636, "memory(GiB)": 34.88, "step": 75600, "train_speed(iter/s)": 0.41192 }, { "acc": 0.91439705, "epoch": 2.047085262502369, "grad_norm": 9.86011028289795, "learning_rate": 5.219163661118919e-06, "loss": 0.48815832, "memory(GiB)": 34.88, "step": 75605, "train_speed(iter/s)": 0.411922 }, { "acc": 0.91923923, "epoch": 2.0472206427855846, "grad_norm": 13.890422821044922, "learning_rate": 5.2186046315474584e-06, "loss": 0.42551212, "memory(GiB)": 34.88, "step": 75610, "train_speed(iter/s)": 0.411923 }, { "acc": 0.91109171, "epoch": 2.0473560230688004, "grad_norm": 14.005671501159668, "learning_rate": 5.218045599243808e-06, "loss": 0.4256443, "memory(GiB)": 34.88, "step": 75615, "train_speed(iter/s)": 0.411924 }, { "acc": 0.92141342, "epoch": 2.0474914033520157, "grad_norm": 4.638712406158447, "learning_rate": 5.217486564214972e-06, "loss": 0.4117866, "memory(GiB)": 34.88, "step": 75620, "train_speed(iter/s)": 0.411926 }, { "acc": 0.93238049, "epoch": 2.0476267836352315, "grad_norm": 16.439746856689453, "learning_rate": 5.216927526467953e-06, "loss": 0.3706634, "memory(GiB)": 34.88, "step": 75625, "train_speed(iter/s)": 0.411927 }, { "acc": 0.9254612, "epoch": 2.047762163918447, "grad_norm": 8.811236381530762, "learning_rate": 5.216368486009754e-06, "loss": 0.40252218, "memory(GiB)": 34.88, "step": 75630, "train_speed(iter/s)": 0.411929 }, { "acc": 0.91902895, "epoch": 2.0478975442016623, "grad_norm": 6.564209461212158, "learning_rate": 5.215809442847377e-06, "loss": 0.3967952, "memory(GiB)": 34.88, "step": 75635, "train_speed(iter/s)": 0.41193 }, { "acc": 0.92447977, "epoch": 2.048032924484878, "grad_norm": 7.930619716644287, "learning_rate": 5.215250396987828e-06, "loss": 0.42611456, "memory(GiB)": 34.88, "step": 75640, "train_speed(iter/s)": 0.411931 }, { "acc": 0.90671206, "epoch": 2.0481683047680934, "grad_norm": 4.8469133377075195, "learning_rate": 5.214691348438109e-06, "loss": 0.50053091, "memory(GiB)": 34.88, "step": 75645, "train_speed(iter/s)": 0.411933 }, { "acc": 0.92438383, "epoch": 2.048303685051309, "grad_norm": 4.930509090423584, "learning_rate": 5.21413229720522e-06, "loss": 0.37658994, "memory(GiB)": 34.88, "step": 75650, "train_speed(iter/s)": 0.411934 }, { "acc": 0.91452456, "epoch": 2.0484390653345246, "grad_norm": 11.153068542480469, "learning_rate": 5.2135732432961684e-06, "loss": 0.48837643, "memory(GiB)": 34.88, "step": 75655, "train_speed(iter/s)": 0.411936 }, { "acc": 0.92497044, "epoch": 2.0485744456177404, "grad_norm": 21.387361526489258, "learning_rate": 5.213014186717955e-06, "loss": 0.44063301, "memory(GiB)": 34.88, "step": 75660, "train_speed(iter/s)": 0.411937 }, { "acc": 0.90813646, "epoch": 2.0487098259009557, "grad_norm": 7.276275157928467, "learning_rate": 5.2124551274775835e-06, "loss": 0.53075857, "memory(GiB)": 34.88, "step": 75665, "train_speed(iter/s)": 0.411939 }, { "acc": 0.91219215, "epoch": 2.0488452061841715, "grad_norm": 5.7055792808532715, "learning_rate": 5.211896065582057e-06, "loss": 0.41161742, "memory(GiB)": 34.88, "step": 75670, "train_speed(iter/s)": 0.41194 }, { "acc": 0.91724501, "epoch": 2.048980586467387, "grad_norm": 6.92390251159668, "learning_rate": 5.211337001038381e-06, "loss": 0.46082544, "memory(GiB)": 34.88, "step": 75675, "train_speed(iter/s)": 0.411941 }, { "acc": 0.92760248, "epoch": 2.0491159667506023, "grad_norm": 10.290733337402344, "learning_rate": 5.2107779338535556e-06, "loss": 0.4360919, "memory(GiB)": 34.88, "step": 75680, "train_speed(iter/s)": 0.411943 }, { "acc": 0.91123619, "epoch": 2.049251347033818, "grad_norm": 18.117965698242188, "learning_rate": 5.210218864034587e-06, "loss": 0.46511917, "memory(GiB)": 34.88, "step": 75685, "train_speed(iter/s)": 0.411945 }, { "acc": 0.90931425, "epoch": 2.0493867273170334, "grad_norm": 6.255557537078857, "learning_rate": 5.2096597915884785e-06, "loss": 0.4571497, "memory(GiB)": 34.88, "step": 75690, "train_speed(iter/s)": 0.411946 }, { "acc": 0.93159428, "epoch": 2.049522107600249, "grad_norm": 19.85140609741211, "learning_rate": 5.209100716522232e-06, "loss": 0.3603236, "memory(GiB)": 34.88, "step": 75695, "train_speed(iter/s)": 0.411947 }, { "acc": 0.91304159, "epoch": 2.0496574878834646, "grad_norm": 11.364118576049805, "learning_rate": 5.208541638842851e-06, "loss": 0.4661809, "memory(GiB)": 34.88, "step": 75700, "train_speed(iter/s)": 0.411949 }, { "acc": 0.90679226, "epoch": 2.0497928681666804, "grad_norm": 7.278343677520752, "learning_rate": 5.207982558557339e-06, "loss": 0.56417055, "memory(GiB)": 34.88, "step": 75705, "train_speed(iter/s)": 0.41195 }, { "acc": 0.90286341, "epoch": 2.0499282484498957, "grad_norm": 21.468833923339844, "learning_rate": 5.207423475672702e-06, "loss": 0.60753756, "memory(GiB)": 34.88, "step": 75710, "train_speed(iter/s)": 0.411952 }, { "acc": 0.92704391, "epoch": 2.050063628733111, "grad_norm": 7.6854777336120605, "learning_rate": 5.206864390195939e-06, "loss": 0.39815497, "memory(GiB)": 34.88, "step": 75715, "train_speed(iter/s)": 0.411953 }, { "acc": 0.92820377, "epoch": 2.050199009016327, "grad_norm": 21.81559181213379, "learning_rate": 5.2063053021340595e-06, "loss": 0.40011406, "memory(GiB)": 34.88, "step": 75720, "train_speed(iter/s)": 0.411955 }, { "acc": 0.91182623, "epoch": 2.0503343892995423, "grad_norm": 10.065682411193848, "learning_rate": 5.2057462114940625e-06, "loss": 0.47911582, "memory(GiB)": 34.88, "step": 75725, "train_speed(iter/s)": 0.411956 }, { "acc": 0.90966702, "epoch": 2.050469769582758, "grad_norm": 9.752829551696777, "learning_rate": 5.205187118282954e-06, "loss": 0.58470731, "memory(GiB)": 34.88, "step": 75730, "train_speed(iter/s)": 0.411957 }, { "acc": 0.92787266, "epoch": 2.0506051498659734, "grad_norm": 7.1751298904418945, "learning_rate": 5.204628022507738e-06, "loss": 0.35517349, "memory(GiB)": 34.88, "step": 75735, "train_speed(iter/s)": 0.411959 }, { "acc": 0.90862827, "epoch": 2.0507405301491892, "grad_norm": 4.50250768661499, "learning_rate": 5.2040689241754165e-06, "loss": 0.54680033, "memory(GiB)": 34.88, "step": 75740, "train_speed(iter/s)": 0.41196 }, { "acc": 0.91828203, "epoch": 2.0508759104324046, "grad_norm": 7.384693145751953, "learning_rate": 5.203509823292995e-06, "loss": 0.47882485, "memory(GiB)": 34.88, "step": 75745, "train_speed(iter/s)": 0.411962 }, { "acc": 0.92524929, "epoch": 2.0510112907156204, "grad_norm": 10.167916297912598, "learning_rate": 5.202950719867475e-06, "loss": 0.42660656, "memory(GiB)": 34.88, "step": 75750, "train_speed(iter/s)": 0.411963 }, { "acc": 0.92152901, "epoch": 2.0511466709988357, "grad_norm": 12.529054641723633, "learning_rate": 5.202391613905863e-06, "loss": 0.3735626, "memory(GiB)": 34.88, "step": 75755, "train_speed(iter/s)": 0.411964 }, { "acc": 0.90650139, "epoch": 2.051282051282051, "grad_norm": 9.6802396774292, "learning_rate": 5.20183250541516e-06, "loss": 0.56155629, "memory(GiB)": 34.88, "step": 75760, "train_speed(iter/s)": 0.411966 }, { "acc": 0.91924133, "epoch": 2.051417431565267, "grad_norm": 4.581286907196045, "learning_rate": 5.201273394402373e-06, "loss": 0.41617861, "memory(GiB)": 34.88, "step": 75765, "train_speed(iter/s)": 0.411967 }, { "acc": 0.92438393, "epoch": 2.0515528118484823, "grad_norm": 11.737680435180664, "learning_rate": 5.200714280874507e-06, "loss": 0.48773909, "memory(GiB)": 34.88, "step": 75770, "train_speed(iter/s)": 0.411969 }, { "acc": 0.91179972, "epoch": 2.051688192131698, "grad_norm": 6.631356239318848, "learning_rate": 5.20015516483856e-06, "loss": 0.47082367, "memory(GiB)": 34.88, "step": 75775, "train_speed(iter/s)": 0.41197 }, { "acc": 0.89577084, "epoch": 2.0518235724149134, "grad_norm": 7.6844635009765625, "learning_rate": 5.1995960463015425e-06, "loss": 0.52043443, "memory(GiB)": 34.88, "step": 75780, "train_speed(iter/s)": 0.411972 }, { "acc": 0.92904549, "epoch": 2.0519589526981292, "grad_norm": 11.582148551940918, "learning_rate": 5.199036925270454e-06, "loss": 0.36273496, "memory(GiB)": 34.88, "step": 75785, "train_speed(iter/s)": 0.411973 }, { "acc": 0.90318441, "epoch": 2.0520943329813446, "grad_norm": 15.746119499206543, "learning_rate": 5.198477801752302e-06, "loss": 0.47871804, "memory(GiB)": 34.88, "step": 75790, "train_speed(iter/s)": 0.411974 }, { "acc": 0.91253891, "epoch": 2.05222971326456, "grad_norm": 8.28637981414795, "learning_rate": 5.197918675754088e-06, "loss": 0.5049109, "memory(GiB)": 34.88, "step": 75795, "train_speed(iter/s)": 0.411976 }, { "acc": 0.92239885, "epoch": 2.0523650935477757, "grad_norm": 7.651721477508545, "learning_rate": 5.197359547282818e-06, "loss": 0.44744072, "memory(GiB)": 34.88, "step": 75800, "train_speed(iter/s)": 0.411977 }, { "acc": 0.90826607, "epoch": 2.052500473830991, "grad_norm": 16.677658081054688, "learning_rate": 5.1968004163454935e-06, "loss": 0.55434432, "memory(GiB)": 34.88, "step": 75805, "train_speed(iter/s)": 0.411978 }, { "acc": 0.91535912, "epoch": 2.052635854114207, "grad_norm": 4.412323951721191, "learning_rate": 5.1962412829491215e-06, "loss": 0.43202453, "memory(GiB)": 34.88, "step": 75810, "train_speed(iter/s)": 0.41198 }, { "acc": 0.89868431, "epoch": 2.0527712343974223, "grad_norm": 10.506645202636719, "learning_rate": 5.195682147100707e-06, "loss": 0.54531727, "memory(GiB)": 34.88, "step": 75815, "train_speed(iter/s)": 0.411981 }, { "acc": 0.89756031, "epoch": 2.052906614680638, "grad_norm": 9.826994895935059, "learning_rate": 5.19512300880725e-06, "loss": 0.56863074, "memory(GiB)": 34.88, "step": 75820, "train_speed(iter/s)": 0.411983 }, { "acc": 0.90761309, "epoch": 2.0530419949638534, "grad_norm": 13.055310249328613, "learning_rate": 5.194563868075758e-06, "loss": 0.56325421, "memory(GiB)": 34.88, "step": 75825, "train_speed(iter/s)": 0.411984 }, { "acc": 0.92518482, "epoch": 2.0531773752470692, "grad_norm": 8.76566219329834, "learning_rate": 5.194004724913235e-06, "loss": 0.37867324, "memory(GiB)": 34.88, "step": 75830, "train_speed(iter/s)": 0.411986 }, { "acc": 0.91016455, "epoch": 2.0533127555302846, "grad_norm": 8.74493408203125, "learning_rate": 5.193445579326687e-06, "loss": 0.56791167, "memory(GiB)": 34.88, "step": 75835, "train_speed(iter/s)": 0.411987 }, { "acc": 0.89865417, "epoch": 2.0534481358135, "grad_norm": 9.825227737426758, "learning_rate": 5.192886431323113e-06, "loss": 0.63835802, "memory(GiB)": 34.88, "step": 75840, "train_speed(iter/s)": 0.411989 }, { "acc": 0.90862827, "epoch": 2.0535835160967157, "grad_norm": 10.78901195526123, "learning_rate": 5.192327280909523e-06, "loss": 0.55637293, "memory(GiB)": 34.88, "step": 75845, "train_speed(iter/s)": 0.41199 }, { "acc": 0.92941217, "epoch": 2.053718896379931, "grad_norm": 7.72172737121582, "learning_rate": 5.191768128092919e-06, "loss": 0.4623332, "memory(GiB)": 34.88, "step": 75850, "train_speed(iter/s)": 0.411991 }, { "acc": 0.92862854, "epoch": 2.053854276663147, "grad_norm": 8.330887794494629, "learning_rate": 5.191208972880306e-06, "loss": 0.38324761, "memory(GiB)": 34.88, "step": 75855, "train_speed(iter/s)": 0.411993 }, { "acc": 0.92215424, "epoch": 2.0539896569463623, "grad_norm": 8.144723892211914, "learning_rate": 5.190649815278689e-06, "loss": 0.44551873, "memory(GiB)": 34.88, "step": 75860, "train_speed(iter/s)": 0.411994 }, { "acc": 0.9194376, "epoch": 2.054125037229578, "grad_norm": 5.976974964141846, "learning_rate": 5.190090655295072e-06, "loss": 0.4633914, "memory(GiB)": 34.88, "step": 75865, "train_speed(iter/s)": 0.411996 }, { "acc": 0.91964474, "epoch": 2.0542604175127934, "grad_norm": 3.932792901992798, "learning_rate": 5.189531492936457e-06, "loss": 0.46278763, "memory(GiB)": 34.88, "step": 75870, "train_speed(iter/s)": 0.411997 }, { "acc": 0.92714672, "epoch": 2.054395797796009, "grad_norm": 12.691156387329102, "learning_rate": 5.188972328209853e-06, "loss": 0.51317482, "memory(GiB)": 34.88, "step": 75875, "train_speed(iter/s)": 0.411999 }, { "acc": 0.9104702, "epoch": 2.0545311780792246, "grad_norm": 7.071701526641846, "learning_rate": 5.188413161122261e-06, "loss": 0.51371927, "memory(GiB)": 34.88, "step": 75880, "train_speed(iter/s)": 0.412 }, { "acc": 0.91739273, "epoch": 2.05466655836244, "grad_norm": 13.482487678527832, "learning_rate": 5.1878539916806905e-06, "loss": 0.42779436, "memory(GiB)": 34.88, "step": 75885, "train_speed(iter/s)": 0.412001 }, { "acc": 0.91765022, "epoch": 2.0548019386456557, "grad_norm": 4.111752510070801, "learning_rate": 5.18729481989214e-06, "loss": 0.41174011, "memory(GiB)": 34.88, "step": 75890, "train_speed(iter/s)": 0.412003 }, { "acc": 0.90127659, "epoch": 2.054937318928871, "grad_norm": 47.81161117553711, "learning_rate": 5.18673564576362e-06, "loss": 0.52323222, "memory(GiB)": 34.88, "step": 75895, "train_speed(iter/s)": 0.412004 }, { "acc": 0.90147085, "epoch": 2.055072699212087, "grad_norm": 10.098160743713379, "learning_rate": 5.186176469302132e-06, "loss": 0.53803606, "memory(GiB)": 34.88, "step": 75900, "train_speed(iter/s)": 0.412005 }, { "acc": 0.93635998, "epoch": 2.0552080794953023, "grad_norm": 3.713975667953491, "learning_rate": 5.185617290514681e-06, "loss": 0.36001289, "memory(GiB)": 34.88, "step": 75905, "train_speed(iter/s)": 0.412007 }, { "acc": 0.9159009, "epoch": 2.055343459778518, "grad_norm": 6.696571350097656, "learning_rate": 5.185058109408271e-06, "loss": 0.46819067, "memory(GiB)": 34.88, "step": 75910, "train_speed(iter/s)": 0.412008 }, { "acc": 0.92264328, "epoch": 2.0554788400617334, "grad_norm": 16.74771499633789, "learning_rate": 5.184498925989909e-06, "loss": 0.38307867, "memory(GiB)": 34.88, "step": 75915, "train_speed(iter/s)": 0.412009 }, { "acc": 0.91633892, "epoch": 2.055614220344949, "grad_norm": 17.939783096313477, "learning_rate": 5.1839397402666e-06, "loss": 0.47166948, "memory(GiB)": 34.88, "step": 75920, "train_speed(iter/s)": 0.412011 }, { "acc": 0.93004818, "epoch": 2.0557496006281646, "grad_norm": 3.941301107406616, "learning_rate": 5.183380552245345e-06, "loss": 0.36466055, "memory(GiB)": 34.88, "step": 75925, "train_speed(iter/s)": 0.412012 }, { "acc": 0.89607201, "epoch": 2.05588498091138, "grad_norm": 9.422086715698242, "learning_rate": 5.182821361933155e-06, "loss": 0.59862585, "memory(GiB)": 34.88, "step": 75930, "train_speed(iter/s)": 0.412013 }, { "acc": 0.89994526, "epoch": 2.0560203611945957, "grad_norm": 9.779781341552734, "learning_rate": 5.182262169337028e-06, "loss": 0.51151094, "memory(GiB)": 34.88, "step": 75935, "train_speed(iter/s)": 0.412015 }, { "acc": 0.92102518, "epoch": 2.056155741477811, "grad_norm": 8.713615417480469, "learning_rate": 5.1817029744639766e-06, "loss": 0.45715761, "memory(GiB)": 34.88, "step": 75940, "train_speed(iter/s)": 0.412016 }, { "acc": 0.92791634, "epoch": 2.056291121761027, "grad_norm": 15.648764610290527, "learning_rate": 5.181143777320999e-06, "loss": 0.41903167, "memory(GiB)": 34.88, "step": 75945, "train_speed(iter/s)": 0.412018 }, { "acc": 0.89955635, "epoch": 2.0564265020442423, "grad_norm": 7.3778252601623535, "learning_rate": 5.180584577915104e-06, "loss": 0.53370872, "memory(GiB)": 34.88, "step": 75950, "train_speed(iter/s)": 0.412019 }, { "acc": 0.93003426, "epoch": 2.0565618823274576, "grad_norm": 7.546649932861328, "learning_rate": 5.180025376253296e-06, "loss": 0.36475291, "memory(GiB)": 34.88, "step": 75955, "train_speed(iter/s)": 0.412021 }, { "acc": 0.91035328, "epoch": 2.0566972626106734, "grad_norm": 10.174689292907715, "learning_rate": 5.17946617234258e-06, "loss": 0.4497148, "memory(GiB)": 34.88, "step": 75960, "train_speed(iter/s)": 0.412022 }, { "acc": 0.89999638, "epoch": 2.056832642893889, "grad_norm": 7.6360859870910645, "learning_rate": 5.17890696618996e-06, "loss": 0.59553018, "memory(GiB)": 34.88, "step": 75965, "train_speed(iter/s)": 0.412023 }, { "acc": 0.94128971, "epoch": 2.0569680231771046, "grad_norm": 7.006633281707764, "learning_rate": 5.178347757802443e-06, "loss": 0.33568921, "memory(GiB)": 34.88, "step": 75970, "train_speed(iter/s)": 0.412024 }, { "acc": 0.92427006, "epoch": 2.05710340346032, "grad_norm": 7.226271152496338, "learning_rate": 5.177788547187035e-06, "loss": 0.40269876, "memory(GiB)": 34.88, "step": 75975, "train_speed(iter/s)": 0.412026 }, { "acc": 0.92528, "epoch": 2.0572387837435357, "grad_norm": 7.7068963050842285, "learning_rate": 5.177229334350739e-06, "loss": 0.4012557, "memory(GiB)": 34.88, "step": 75980, "train_speed(iter/s)": 0.412027 }, { "acc": 0.92649746, "epoch": 2.057374164026751, "grad_norm": 5.630521774291992, "learning_rate": 5.176670119300558e-06, "loss": 0.3889698, "memory(GiB)": 34.88, "step": 75985, "train_speed(iter/s)": 0.412029 }, { "acc": 0.9242836, "epoch": 2.057509544309967, "grad_norm": 7.804257869720459, "learning_rate": 5.176110902043502e-06, "loss": 0.28670878, "memory(GiB)": 34.88, "step": 75990, "train_speed(iter/s)": 0.41203 }, { "acc": 0.91880131, "epoch": 2.0576449245931823, "grad_norm": 6.3832316398620605, "learning_rate": 5.1755516825865745e-06, "loss": 0.45863094, "memory(GiB)": 34.88, "step": 75995, "train_speed(iter/s)": 0.412032 }, { "acc": 0.92481508, "epoch": 2.0577803048763976, "grad_norm": 14.347253799438477, "learning_rate": 5.174992460936781e-06, "loss": 0.42608194, "memory(GiB)": 34.88, "step": 76000, "train_speed(iter/s)": 0.412033 }, { "acc": 0.92299051, "epoch": 2.0579156851596134, "grad_norm": 6.674795627593994, "learning_rate": 5.174433237101125e-06, "loss": 0.39389112, "memory(GiB)": 34.88, "step": 76005, "train_speed(iter/s)": 0.412035 }, { "acc": 0.91816139, "epoch": 2.058051065442829, "grad_norm": 11.859410285949707, "learning_rate": 5.173874011086615e-06, "loss": 0.47204695, "memory(GiB)": 34.88, "step": 76010, "train_speed(iter/s)": 0.412036 }, { "acc": 0.91663847, "epoch": 2.0581864457260446, "grad_norm": 7.7115254402160645, "learning_rate": 5.173314782900254e-06, "loss": 0.45120955, "memory(GiB)": 34.88, "step": 76015, "train_speed(iter/s)": 0.412037 }, { "acc": 0.91402836, "epoch": 2.05832182600926, "grad_norm": 7.826476573944092, "learning_rate": 5.172755552549048e-06, "loss": 0.41617985, "memory(GiB)": 34.88, "step": 76020, "train_speed(iter/s)": 0.412039 }, { "acc": 0.90800648, "epoch": 2.0584572062924757, "grad_norm": 11.987432479858398, "learning_rate": 5.172196320040004e-06, "loss": 0.55681472, "memory(GiB)": 34.88, "step": 76025, "train_speed(iter/s)": 0.41204 }, { "acc": 0.91990538, "epoch": 2.058592586575691, "grad_norm": 9.325102806091309, "learning_rate": 5.171637085380126e-06, "loss": 0.38758032, "memory(GiB)": 34.88, "step": 76030, "train_speed(iter/s)": 0.412042 }, { "acc": 0.92733421, "epoch": 2.0587279668589065, "grad_norm": 7.774277210235596, "learning_rate": 5.171077848576419e-06, "loss": 0.3466881, "memory(GiB)": 34.88, "step": 76035, "train_speed(iter/s)": 0.412043 }, { "acc": 0.90433369, "epoch": 2.0588633471421223, "grad_norm": 15.452300071716309, "learning_rate": 5.170518609635889e-06, "loss": 0.56445494, "memory(GiB)": 34.88, "step": 76040, "train_speed(iter/s)": 0.412045 }, { "acc": 0.92426834, "epoch": 2.0589987274253376, "grad_norm": 7.071631908416748, "learning_rate": 5.169959368565543e-06, "loss": 0.46213031, "memory(GiB)": 34.88, "step": 76045, "train_speed(iter/s)": 0.412046 }, { "acc": 0.91727285, "epoch": 2.0591341077085534, "grad_norm": 9.735926628112793, "learning_rate": 5.169400125372383e-06, "loss": 0.48199182, "memory(GiB)": 34.88, "step": 76050, "train_speed(iter/s)": 0.412048 }, { "acc": 0.90473194, "epoch": 2.059269487991769, "grad_norm": 12.02209186553955, "learning_rate": 5.16884088006342e-06, "loss": 0.54482632, "memory(GiB)": 34.88, "step": 76055, "train_speed(iter/s)": 0.412049 }, { "acc": 0.93641739, "epoch": 2.0594048682749846, "grad_norm": 8.401993751525879, "learning_rate": 5.168281632645654e-06, "loss": 0.34727731, "memory(GiB)": 34.88, "step": 76060, "train_speed(iter/s)": 0.41205 }, { "acc": 0.91207409, "epoch": 2.0595402485582, "grad_norm": 13.357823371887207, "learning_rate": 5.167722383126096e-06, "loss": 0.45158343, "memory(GiB)": 34.88, "step": 76065, "train_speed(iter/s)": 0.412052 }, { "acc": 0.92768993, "epoch": 2.0596756288414158, "grad_norm": 7.081072807312012, "learning_rate": 5.167163131511747e-06, "loss": 0.42063961, "memory(GiB)": 34.88, "step": 76070, "train_speed(iter/s)": 0.412053 }, { "acc": 0.91911774, "epoch": 2.059811009124631, "grad_norm": 12.325150489807129, "learning_rate": 5.166603877809616e-06, "loss": 0.52619076, "memory(GiB)": 34.88, "step": 76075, "train_speed(iter/s)": 0.412054 }, { "acc": 0.92520638, "epoch": 2.0599463894078465, "grad_norm": 4.85159969329834, "learning_rate": 5.166044622026708e-06, "loss": 0.3570466, "memory(GiB)": 34.88, "step": 76080, "train_speed(iter/s)": 0.412055 }, { "acc": 0.91189404, "epoch": 2.0600817696910623, "grad_norm": 8.082430839538574, "learning_rate": 5.165485364170026e-06, "loss": 0.36220937, "memory(GiB)": 34.88, "step": 76085, "train_speed(iter/s)": 0.412057 }, { "acc": 0.91479702, "epoch": 2.0602171499742776, "grad_norm": 9.654558181762695, "learning_rate": 5.164926104246582e-06, "loss": 0.46333599, "memory(GiB)": 34.88, "step": 76090, "train_speed(iter/s)": 0.412058 }, { "acc": 0.90355072, "epoch": 2.0603525302574934, "grad_norm": 12.92779541015625, "learning_rate": 5.164366842263374e-06, "loss": 0.55652256, "memory(GiB)": 34.88, "step": 76095, "train_speed(iter/s)": 0.412059 }, { "acc": 0.931884, "epoch": 2.060487910540709, "grad_norm": 6.802155494689941, "learning_rate": 5.1638075782274146e-06, "loss": 0.36687469, "memory(GiB)": 34.88, "step": 76100, "train_speed(iter/s)": 0.412061 }, { "acc": 0.92575855, "epoch": 2.0606232908239246, "grad_norm": 10.43070125579834, "learning_rate": 5.163248312145705e-06, "loss": 0.40382609, "memory(GiB)": 34.88, "step": 76105, "train_speed(iter/s)": 0.412062 }, { "acc": 0.91347818, "epoch": 2.06075867110714, "grad_norm": 39.40552520751953, "learning_rate": 5.162689044025253e-06, "loss": 0.54695826, "memory(GiB)": 34.88, "step": 76110, "train_speed(iter/s)": 0.412064 }, { "acc": 0.91748543, "epoch": 2.0608940513903553, "grad_norm": 10.096187591552734, "learning_rate": 5.162129773873066e-06, "loss": 0.41258144, "memory(GiB)": 34.88, "step": 76115, "train_speed(iter/s)": 0.412065 }, { "acc": 0.92356186, "epoch": 2.061029431673571, "grad_norm": 5.677067279815674, "learning_rate": 5.161570501696147e-06, "loss": 0.36119614, "memory(GiB)": 34.88, "step": 76120, "train_speed(iter/s)": 0.412067 }, { "acc": 0.93346176, "epoch": 2.0611648119567865, "grad_norm": 5.472710132598877, "learning_rate": 5.161011227501506e-06, "loss": 0.35765786, "memory(GiB)": 34.88, "step": 76125, "train_speed(iter/s)": 0.412068 }, { "acc": 0.91131859, "epoch": 2.0613001922400023, "grad_norm": 10.287467002868652, "learning_rate": 5.160451951296143e-06, "loss": 0.45633068, "memory(GiB)": 34.88, "step": 76130, "train_speed(iter/s)": 0.41207 }, { "acc": 0.9155036, "epoch": 2.0614355725232176, "grad_norm": 5.9572978019714355, "learning_rate": 5.159892673087069e-06, "loss": 0.43671379, "memory(GiB)": 34.88, "step": 76135, "train_speed(iter/s)": 0.412071 }, { "acc": 0.9168088, "epoch": 2.0615709528064334, "grad_norm": 5.5171308517456055, "learning_rate": 5.1593333928812885e-06, "loss": 0.40764346, "memory(GiB)": 34.88, "step": 76140, "train_speed(iter/s)": 0.412073 }, { "acc": 0.91407719, "epoch": 2.061706333089649, "grad_norm": 5.732615947723389, "learning_rate": 5.158774110685807e-06, "loss": 0.51632318, "memory(GiB)": 34.88, "step": 76145, "train_speed(iter/s)": 0.412074 }, { "acc": 0.92381039, "epoch": 2.0618417133728646, "grad_norm": 6.83809232711792, "learning_rate": 5.158214826507634e-06, "loss": 0.41870265, "memory(GiB)": 34.88, "step": 76150, "train_speed(iter/s)": 0.412076 }, { "acc": 0.91768265, "epoch": 2.06197709365608, "grad_norm": 46.8052978515625, "learning_rate": 5.15765554035377e-06, "loss": 0.47805223, "memory(GiB)": 34.88, "step": 76155, "train_speed(iter/s)": 0.412077 }, { "acc": 0.91919918, "epoch": 2.0621124739392953, "grad_norm": 11.528791427612305, "learning_rate": 5.157096252231225e-06, "loss": 0.37690387, "memory(GiB)": 34.88, "step": 76160, "train_speed(iter/s)": 0.412079 }, { "acc": 0.92428799, "epoch": 2.062247854222511, "grad_norm": 6.255442142486572, "learning_rate": 5.156536962147004e-06, "loss": 0.37405612, "memory(GiB)": 34.88, "step": 76165, "train_speed(iter/s)": 0.41208 }, { "acc": 0.91525393, "epoch": 2.0623832345057265, "grad_norm": 4.719343185424805, "learning_rate": 5.1559776701081135e-06, "loss": 0.50283213, "memory(GiB)": 34.88, "step": 76170, "train_speed(iter/s)": 0.412081 }, { "acc": 0.90856552, "epoch": 2.0625186147889423, "grad_norm": 7.856030464172363, "learning_rate": 5.155418376121559e-06, "loss": 0.50254407, "memory(GiB)": 34.88, "step": 76175, "train_speed(iter/s)": 0.412082 }, { "acc": 0.92811003, "epoch": 2.0626539950721576, "grad_norm": 8.021008491516113, "learning_rate": 5.1548590801943485e-06, "loss": 0.36871586, "memory(GiB)": 34.88, "step": 76180, "train_speed(iter/s)": 0.412084 }, { "acc": 0.93102169, "epoch": 2.0627893753553734, "grad_norm": 5.993244647979736, "learning_rate": 5.154299782333487e-06, "loss": 0.3717875, "memory(GiB)": 34.88, "step": 76185, "train_speed(iter/s)": 0.412085 }, { "acc": 0.90893669, "epoch": 2.062924755638589, "grad_norm": 5.049189567565918, "learning_rate": 5.153740482545983e-06, "loss": 0.43008838, "memory(GiB)": 34.88, "step": 76190, "train_speed(iter/s)": 0.412086 }, { "acc": 0.91104002, "epoch": 2.063060135921804, "grad_norm": 7.979150295257568, "learning_rate": 5.153181180838838e-06, "loss": 0.50003586, "memory(GiB)": 34.88, "step": 76195, "train_speed(iter/s)": 0.412088 }, { "acc": 0.93724308, "epoch": 2.06319551620502, "grad_norm": 7.504006862640381, "learning_rate": 5.1526218772190614e-06, "loss": 0.35418701, "memory(GiB)": 34.88, "step": 76200, "train_speed(iter/s)": 0.412089 }, { "acc": 0.91195211, "epoch": 2.0633308964882353, "grad_norm": 22.880691528320312, "learning_rate": 5.15206257169366e-06, "loss": 0.47167854, "memory(GiB)": 34.88, "step": 76205, "train_speed(iter/s)": 0.412091 }, { "acc": 0.92281647, "epoch": 2.063466276771451, "grad_norm": 8.704244613647461, "learning_rate": 5.151503264269639e-06, "loss": 0.42682009, "memory(GiB)": 34.88, "step": 76210, "train_speed(iter/s)": 0.412092 }, { "acc": 0.9208806, "epoch": 2.0636016570546665, "grad_norm": 10.112812042236328, "learning_rate": 5.150943954954005e-06, "loss": 0.40129681, "memory(GiB)": 34.88, "step": 76215, "train_speed(iter/s)": 0.412093 }, { "acc": 0.91920948, "epoch": 2.0637370373378823, "grad_norm": 7.147854328155518, "learning_rate": 5.150384643753767e-06, "loss": 0.4956028, "memory(GiB)": 34.88, "step": 76220, "train_speed(iter/s)": 0.412095 }, { "acc": 0.89783134, "epoch": 2.0638724176210976, "grad_norm": 11.55931282043457, "learning_rate": 5.149825330675927e-06, "loss": 0.60729108, "memory(GiB)": 34.88, "step": 76225, "train_speed(iter/s)": 0.412096 }, { "acc": 0.90321007, "epoch": 2.0640077979043134, "grad_norm": 4.8260393142700195, "learning_rate": 5.1492660157274955e-06, "loss": 0.52177262, "memory(GiB)": 34.88, "step": 76230, "train_speed(iter/s)": 0.412097 }, { "acc": 0.9287035, "epoch": 2.064143178187529, "grad_norm": 6.971898078918457, "learning_rate": 5.148706698915477e-06, "loss": 0.34899116, "memory(GiB)": 34.88, "step": 76235, "train_speed(iter/s)": 0.412099 }, { "acc": 0.90121822, "epoch": 2.064278558470744, "grad_norm": 8.185640335083008, "learning_rate": 5.148147380246878e-06, "loss": 0.62626228, "memory(GiB)": 34.88, "step": 76240, "train_speed(iter/s)": 0.4121 }, { "acc": 0.9356802, "epoch": 2.06441393875396, "grad_norm": 8.038500785827637, "learning_rate": 5.1475880597287055e-06, "loss": 0.39095974, "memory(GiB)": 34.88, "step": 76245, "train_speed(iter/s)": 0.412101 }, { "acc": 0.9292923, "epoch": 2.0645493190371753, "grad_norm": 9.115398406982422, "learning_rate": 5.147028737367966e-06, "loss": 0.35786686, "memory(GiB)": 34.88, "step": 76250, "train_speed(iter/s)": 0.412103 }, { "acc": 0.91561117, "epoch": 2.064684699320391, "grad_norm": 7.3597307205200195, "learning_rate": 5.146469413171668e-06, "loss": 0.53955951, "memory(GiB)": 34.88, "step": 76255, "train_speed(iter/s)": 0.412104 }, { "acc": 0.92310352, "epoch": 2.0648200796036065, "grad_norm": 8.513952255249023, "learning_rate": 5.145910087146813e-06, "loss": 0.47240005, "memory(GiB)": 34.88, "step": 76260, "train_speed(iter/s)": 0.412105 }, { "acc": 0.92015991, "epoch": 2.0649554598868223, "grad_norm": 3.885187864303589, "learning_rate": 5.145350759300413e-06, "loss": 0.3620959, "memory(GiB)": 34.88, "step": 76265, "train_speed(iter/s)": 0.412107 }, { "acc": 0.90756569, "epoch": 2.0650908401700376, "grad_norm": 13.642952919006348, "learning_rate": 5.1447914296394705e-06, "loss": 0.4940268, "memory(GiB)": 34.88, "step": 76270, "train_speed(iter/s)": 0.412108 }, { "acc": 0.90540161, "epoch": 2.065226220453253, "grad_norm": 12.59099292755127, "learning_rate": 5.144232098170998e-06, "loss": 0.48422427, "memory(GiB)": 34.88, "step": 76275, "train_speed(iter/s)": 0.41211 }, { "acc": 0.92172489, "epoch": 2.065361600736469, "grad_norm": 8.175097465515137, "learning_rate": 5.143672764901995e-06, "loss": 0.46990156, "memory(GiB)": 34.88, "step": 76280, "train_speed(iter/s)": 0.412111 }, { "acc": 0.92746067, "epoch": 2.065496981019684, "grad_norm": 5.003134250640869, "learning_rate": 5.1431134298394734e-06, "loss": 0.42729731, "memory(GiB)": 34.88, "step": 76285, "train_speed(iter/s)": 0.412113 }, { "acc": 0.91629477, "epoch": 2.0656323613029, "grad_norm": 6.581203460693359, "learning_rate": 5.142554092990437e-06, "loss": 0.44990606, "memory(GiB)": 34.88, "step": 76290, "train_speed(iter/s)": 0.412114 }, { "acc": 0.91708488, "epoch": 2.0657677415861153, "grad_norm": 12.024483680725098, "learning_rate": 5.141994754361895e-06, "loss": 0.44949241, "memory(GiB)": 34.88, "step": 76295, "train_speed(iter/s)": 0.412115 }, { "acc": 0.92607937, "epoch": 2.065903121869331, "grad_norm": 8.909232139587402, "learning_rate": 5.141435413960855e-06, "loss": 0.33913486, "memory(GiB)": 34.88, "step": 76300, "train_speed(iter/s)": 0.412117 }, { "acc": 0.92427197, "epoch": 2.0660385021525465, "grad_norm": 24.057973861694336, "learning_rate": 5.140876071794318e-06, "loss": 0.40671487, "memory(GiB)": 34.88, "step": 76305, "train_speed(iter/s)": 0.412118 }, { "acc": 0.92930403, "epoch": 2.066173882435762, "grad_norm": 9.428515434265137, "learning_rate": 5.140316727869298e-06, "loss": 0.38366377, "memory(GiB)": 34.88, "step": 76310, "train_speed(iter/s)": 0.41212 }, { "acc": 0.90085583, "epoch": 2.0663092627189776, "grad_norm": 5.6270365715026855, "learning_rate": 5.139757382192799e-06, "loss": 0.46001844, "memory(GiB)": 34.88, "step": 76315, "train_speed(iter/s)": 0.412121 }, { "acc": 0.91071796, "epoch": 2.066444643002193, "grad_norm": 8.999611854553223, "learning_rate": 5.139198034771826e-06, "loss": 0.514925, "memory(GiB)": 34.88, "step": 76320, "train_speed(iter/s)": 0.412122 }, { "acc": 0.94920378, "epoch": 2.066580023285409, "grad_norm": 5.733975410461426, "learning_rate": 5.138638685613387e-06, "loss": 0.21995111, "memory(GiB)": 34.88, "step": 76325, "train_speed(iter/s)": 0.412124 }, { "acc": 0.93014193, "epoch": 2.066715403568624, "grad_norm": 6.248637676239014, "learning_rate": 5.138079334724491e-06, "loss": 0.40275316, "memory(GiB)": 34.88, "step": 76330, "train_speed(iter/s)": 0.412125 }, { "acc": 0.92322674, "epoch": 2.06685078385184, "grad_norm": 4.350526332855225, "learning_rate": 5.137519982112143e-06, "loss": 0.40762858, "memory(GiB)": 34.88, "step": 76335, "train_speed(iter/s)": 0.412127 }, { "acc": 0.91126595, "epoch": 2.0669861641350553, "grad_norm": 6.847938060760498, "learning_rate": 5.136960627783349e-06, "loss": 0.50839252, "memory(GiB)": 34.88, "step": 76340, "train_speed(iter/s)": 0.412128 }, { "acc": 0.91749477, "epoch": 2.067121544418271, "grad_norm": 16.853321075439453, "learning_rate": 5.13640127174512e-06, "loss": 0.43094459, "memory(GiB)": 34.88, "step": 76345, "train_speed(iter/s)": 0.412129 }, { "acc": 0.93054371, "epoch": 2.0672569247014865, "grad_norm": 5.249043941497803, "learning_rate": 5.135841914004459e-06, "loss": 0.38678069, "memory(GiB)": 34.88, "step": 76350, "train_speed(iter/s)": 0.412131 }, { "acc": 0.93636303, "epoch": 2.067392304984702, "grad_norm": 7.917070388793945, "learning_rate": 5.135282554568376e-06, "loss": 0.33012848, "memory(GiB)": 34.88, "step": 76355, "train_speed(iter/s)": 0.412132 }, { "acc": 0.90740509, "epoch": 2.0675276852679176, "grad_norm": 8.828838348388672, "learning_rate": 5.134723193443876e-06, "loss": 0.55356503, "memory(GiB)": 34.88, "step": 76360, "train_speed(iter/s)": 0.412134 }, { "acc": 0.90729733, "epoch": 2.067663065551133, "grad_norm": 6.841408729553223, "learning_rate": 5.134163830637967e-06, "loss": 0.57413268, "memory(GiB)": 34.88, "step": 76365, "train_speed(iter/s)": 0.412135 }, { "acc": 0.9257719, "epoch": 2.067798445834349, "grad_norm": 5.941582679748535, "learning_rate": 5.133604466157656e-06, "loss": 0.4934248, "memory(GiB)": 34.88, "step": 76370, "train_speed(iter/s)": 0.412136 }, { "acc": 0.92303782, "epoch": 2.067933826117564, "grad_norm": 6.697588920593262, "learning_rate": 5.133045100009948e-06, "loss": 0.39111676, "memory(GiB)": 34.88, "step": 76375, "train_speed(iter/s)": 0.412138 }, { "acc": 0.91867046, "epoch": 2.06806920640078, "grad_norm": 6.7737836837768555, "learning_rate": 5.132485732201854e-06, "loss": 0.44142542, "memory(GiB)": 34.88, "step": 76380, "train_speed(iter/s)": 0.412139 }, { "acc": 0.92707796, "epoch": 2.0682045866839953, "grad_norm": 5.34832763671875, "learning_rate": 5.1319263627403784e-06, "loss": 0.40926991, "memory(GiB)": 34.88, "step": 76385, "train_speed(iter/s)": 0.41214 }, { "acc": 0.92928982, "epoch": 2.068339966967211, "grad_norm": 6.993253231048584, "learning_rate": 5.13136699163253e-06, "loss": 0.31949065, "memory(GiB)": 34.88, "step": 76390, "train_speed(iter/s)": 0.412141 }, { "acc": 0.92547226, "epoch": 2.0684753472504265, "grad_norm": 4.129997730255127, "learning_rate": 5.130807618885315e-06, "loss": 0.42696934, "memory(GiB)": 34.88, "step": 76395, "train_speed(iter/s)": 0.412143 }, { "acc": 0.91267052, "epoch": 2.068610727533642, "grad_norm": 7.84583854675293, "learning_rate": 5.130248244505742e-06, "loss": 0.44260941, "memory(GiB)": 34.88, "step": 76400, "train_speed(iter/s)": 0.412144 }, { "acc": 0.89713497, "epoch": 2.0687461078168576, "grad_norm": 11.337960243225098, "learning_rate": 5.129688868500818e-06, "loss": 0.60104499, "memory(GiB)": 34.88, "step": 76405, "train_speed(iter/s)": 0.412145 }, { "acc": 0.91672859, "epoch": 2.068881488100073, "grad_norm": 5.7399420738220215, "learning_rate": 5.129129490877547e-06, "loss": 0.48792825, "memory(GiB)": 34.88, "step": 76410, "train_speed(iter/s)": 0.412147 }, { "acc": 0.92170439, "epoch": 2.069016868383289, "grad_norm": 8.353523254394531, "learning_rate": 5.1285701116429395e-06, "loss": 0.38472581, "memory(GiB)": 34.88, "step": 76415, "train_speed(iter/s)": 0.412148 }, { "acc": 0.90990887, "epoch": 2.069152248666504, "grad_norm": 4.76020622253418, "learning_rate": 5.1280107308040024e-06, "loss": 0.50311842, "memory(GiB)": 34.88, "step": 76420, "train_speed(iter/s)": 0.41215 }, { "acc": 0.90956001, "epoch": 2.06928762894972, "grad_norm": 8.832751274108887, "learning_rate": 5.127451348367743e-06, "loss": 0.5561245, "memory(GiB)": 34.88, "step": 76425, "train_speed(iter/s)": 0.412151 }, { "acc": 0.9273325, "epoch": 2.0694230092329353, "grad_norm": 8.516356468200684, "learning_rate": 5.126891964341169e-06, "loss": 0.38241053, "memory(GiB)": 34.88, "step": 76430, "train_speed(iter/s)": 0.412152 }, { "acc": 0.91952362, "epoch": 2.0695583895161507, "grad_norm": 4.87365198135376, "learning_rate": 5.1263325787312865e-06, "loss": 0.43526144, "memory(GiB)": 34.88, "step": 76435, "train_speed(iter/s)": 0.412154 }, { "acc": 0.91582375, "epoch": 2.0696937697993665, "grad_norm": 12.659830093383789, "learning_rate": 5.125773191545104e-06, "loss": 0.45861282, "memory(GiB)": 34.88, "step": 76440, "train_speed(iter/s)": 0.412155 }, { "acc": 0.92878017, "epoch": 2.069829150082582, "grad_norm": 6.92242431640625, "learning_rate": 5.125213802789629e-06, "loss": 0.40376482, "memory(GiB)": 34.88, "step": 76445, "train_speed(iter/s)": 0.412157 }, { "acc": 0.89636726, "epoch": 2.0699645303657976, "grad_norm": 10.631895065307617, "learning_rate": 5.124654412471869e-06, "loss": 0.60063982, "memory(GiB)": 34.88, "step": 76450, "train_speed(iter/s)": 0.412158 }, { "acc": 0.91091614, "epoch": 2.070099910649013, "grad_norm": 17.28713035583496, "learning_rate": 5.124095020598829e-06, "loss": 0.47109532, "memory(GiB)": 34.88, "step": 76455, "train_speed(iter/s)": 0.412159 }, { "acc": 0.94957457, "epoch": 2.070235290932229, "grad_norm": 6.720155715942383, "learning_rate": 5.123535627177522e-06, "loss": 0.30404189, "memory(GiB)": 34.88, "step": 76460, "train_speed(iter/s)": 0.412161 }, { "acc": 0.90861855, "epoch": 2.070370671215444, "grad_norm": 8.822903633117676, "learning_rate": 5.122976232214949e-06, "loss": 0.48703957, "memory(GiB)": 34.88, "step": 76465, "train_speed(iter/s)": 0.412162 }, { "acc": 0.94407558, "epoch": 2.0705060514986595, "grad_norm": 5.985462188720703, "learning_rate": 5.122416835718122e-06, "loss": 0.27505426, "memory(GiB)": 34.88, "step": 76470, "train_speed(iter/s)": 0.412164 }, { "acc": 0.93023825, "epoch": 2.0706414317818753, "grad_norm": 7.3481268882751465, "learning_rate": 5.121857437694046e-06, "loss": 0.39725068, "memory(GiB)": 34.88, "step": 76475, "train_speed(iter/s)": 0.412165 }, { "acc": 0.91685467, "epoch": 2.0707768120650907, "grad_norm": 9.614982604980469, "learning_rate": 5.121298038149728e-06, "loss": 0.44924374, "memory(GiB)": 34.88, "step": 76480, "train_speed(iter/s)": 0.412166 }, { "acc": 0.92154665, "epoch": 2.0709121923483065, "grad_norm": 5.28849458694458, "learning_rate": 5.120738637092181e-06, "loss": 0.37360058, "memory(GiB)": 34.88, "step": 76485, "train_speed(iter/s)": 0.412168 }, { "acc": 0.93823013, "epoch": 2.071047572631522, "grad_norm": 6.668564319610596, "learning_rate": 5.120179234528407e-06, "loss": 0.31431782, "memory(GiB)": 34.88, "step": 76490, "train_speed(iter/s)": 0.412169 }, { "acc": 0.93690243, "epoch": 2.0711829529147376, "grad_norm": 5.10995626449585, "learning_rate": 5.119619830465415e-06, "loss": 0.32183447, "memory(GiB)": 34.88, "step": 76495, "train_speed(iter/s)": 0.41217 }, { "acc": 0.93395786, "epoch": 2.071318333197953, "grad_norm": 3.7378599643707275, "learning_rate": 5.119060424910214e-06, "loss": 0.41237907, "memory(GiB)": 34.88, "step": 76500, "train_speed(iter/s)": 0.412172 }, { "acc": 0.91525068, "epoch": 2.071453713481169, "grad_norm": 5.84889030456543, "learning_rate": 5.1185010178698105e-06, "loss": 0.55477057, "memory(GiB)": 34.88, "step": 76505, "train_speed(iter/s)": 0.412173 }, { "acc": 0.91368208, "epoch": 2.071589093764384, "grad_norm": 6.950631618499756, "learning_rate": 5.117941609351211e-06, "loss": 0.46992664, "memory(GiB)": 34.88, "step": 76510, "train_speed(iter/s)": 0.412175 }, { "acc": 0.88977242, "epoch": 2.0717244740475995, "grad_norm": 8.416596412658691, "learning_rate": 5.117382199361426e-06, "loss": 0.57413568, "memory(GiB)": 34.88, "step": 76515, "train_speed(iter/s)": 0.412176 }, { "acc": 0.92240086, "epoch": 2.0718598543308153, "grad_norm": 11.824557304382324, "learning_rate": 5.116822787907461e-06, "loss": 0.42557564, "memory(GiB)": 34.88, "step": 76520, "train_speed(iter/s)": 0.412177 }, { "acc": 0.91764479, "epoch": 2.0719952346140307, "grad_norm": 6.8751349449157715, "learning_rate": 5.116263374996325e-06, "loss": 0.40705261, "memory(GiB)": 34.88, "step": 76525, "train_speed(iter/s)": 0.412179 }, { "acc": 0.92117767, "epoch": 2.0721306148972465, "grad_norm": 7.118514060974121, "learning_rate": 5.115703960635026e-06, "loss": 0.47535782, "memory(GiB)": 34.88, "step": 76530, "train_speed(iter/s)": 0.41218 }, { "acc": 0.91470966, "epoch": 2.072265995180462, "grad_norm": 5.545430660247803, "learning_rate": 5.11514454483057e-06, "loss": 0.50583496, "memory(GiB)": 34.88, "step": 76535, "train_speed(iter/s)": 0.412182 }, { "acc": 0.91656713, "epoch": 2.0724013754636776, "grad_norm": 20.300317764282227, "learning_rate": 5.114585127589967e-06, "loss": 0.42287292, "memory(GiB)": 34.88, "step": 76540, "train_speed(iter/s)": 0.412183 }, { "acc": 0.91667032, "epoch": 2.072536755746893, "grad_norm": 7.764150619506836, "learning_rate": 5.114025708920222e-06, "loss": 0.4718689, "memory(GiB)": 34.88, "step": 76545, "train_speed(iter/s)": 0.412184 }, { "acc": 0.91516495, "epoch": 2.072672136030109, "grad_norm": 5.30519962310791, "learning_rate": 5.113466288828345e-06, "loss": 0.52909269, "memory(GiB)": 34.88, "step": 76550, "train_speed(iter/s)": 0.412186 }, { "acc": 0.92503376, "epoch": 2.072807516313324, "grad_norm": 3.9195916652679443, "learning_rate": 5.1129068673213425e-06, "loss": 0.38686178, "memory(GiB)": 34.88, "step": 76555, "train_speed(iter/s)": 0.412187 }, { "acc": 0.89994678, "epoch": 2.0729428965965395, "grad_norm": 7.124534606933594, "learning_rate": 5.112347444406224e-06, "loss": 0.56216974, "memory(GiB)": 34.88, "step": 76560, "train_speed(iter/s)": 0.412188 }, { "acc": 0.91713743, "epoch": 2.0730782768797553, "grad_norm": 4.8391923904418945, "learning_rate": 5.1117880200899965e-06, "loss": 0.49965453, "memory(GiB)": 34.88, "step": 76565, "train_speed(iter/s)": 0.41219 }, { "acc": 0.9154582, "epoch": 2.0732136571629707, "grad_norm": 8.484885215759277, "learning_rate": 5.111228594379668e-06, "loss": 0.43970499, "memory(GiB)": 34.88, "step": 76570, "train_speed(iter/s)": 0.412191 }, { "acc": 0.93569698, "epoch": 2.0733490374461865, "grad_norm": 3.766078472137451, "learning_rate": 5.110669167282247e-06, "loss": 0.39441123, "memory(GiB)": 34.88, "step": 76575, "train_speed(iter/s)": 0.412192 }, { "acc": 0.92933483, "epoch": 2.073484417729402, "grad_norm": 6.0966477394104, "learning_rate": 5.11010973880474e-06, "loss": 0.31774964, "memory(GiB)": 34.88, "step": 76580, "train_speed(iter/s)": 0.412194 }, { "acc": 0.90527506, "epoch": 2.0736197980126176, "grad_norm": 9.402658462524414, "learning_rate": 5.1095503089541546e-06, "loss": 0.60674086, "memory(GiB)": 34.88, "step": 76585, "train_speed(iter/s)": 0.412195 }, { "acc": 0.91916542, "epoch": 2.073755178295833, "grad_norm": 13.661836624145508, "learning_rate": 5.108990877737499e-06, "loss": 0.45590591, "memory(GiB)": 34.88, "step": 76590, "train_speed(iter/s)": 0.412197 }, { "acc": 0.91571369, "epoch": 2.0738905585790484, "grad_norm": 11.374465942382812, "learning_rate": 5.108431445161783e-06, "loss": 0.41186571, "memory(GiB)": 34.88, "step": 76595, "train_speed(iter/s)": 0.412198 }, { "acc": 0.92917109, "epoch": 2.074025938862264, "grad_norm": 9.068734169006348, "learning_rate": 5.107872011234017e-06, "loss": 0.38552012, "memory(GiB)": 34.88, "step": 76600, "train_speed(iter/s)": 0.4122 }, { "acc": 0.9285759, "epoch": 2.0741613191454795, "grad_norm": 7.0532732009887695, "learning_rate": 5.107312575961201e-06, "loss": 0.44336052, "memory(GiB)": 34.88, "step": 76605, "train_speed(iter/s)": 0.412201 }, { "acc": 0.9218338, "epoch": 2.0742966994286953, "grad_norm": 9.18872356414795, "learning_rate": 5.10675313935035e-06, "loss": 0.43268213, "memory(GiB)": 34.88, "step": 76610, "train_speed(iter/s)": 0.412202 }, { "acc": 0.93697062, "epoch": 2.0744320797119107, "grad_norm": 14.052931785583496, "learning_rate": 5.106193701408468e-06, "loss": 0.39375505, "memory(GiB)": 34.88, "step": 76615, "train_speed(iter/s)": 0.412204 }, { "acc": 0.92687778, "epoch": 2.0745674599951265, "grad_norm": 7.182693004608154, "learning_rate": 5.105634262142567e-06, "loss": 0.39593658, "memory(GiB)": 34.88, "step": 76620, "train_speed(iter/s)": 0.412205 }, { "acc": 0.9197093, "epoch": 2.074702840278342, "grad_norm": 10.041362762451172, "learning_rate": 5.10507482155965e-06, "loss": 0.4627203, "memory(GiB)": 34.88, "step": 76625, "train_speed(iter/s)": 0.412207 }, { "acc": 0.92196884, "epoch": 2.074838220561557, "grad_norm": 8.365499496459961, "learning_rate": 5.104515379666729e-06, "loss": 0.38253655, "memory(GiB)": 34.88, "step": 76630, "train_speed(iter/s)": 0.412208 }, { "acc": 0.92068176, "epoch": 2.074973600844773, "grad_norm": 4.3157782554626465, "learning_rate": 5.103955936470812e-06, "loss": 0.44153552, "memory(GiB)": 34.88, "step": 76635, "train_speed(iter/s)": 0.412209 }, { "acc": 0.944207, "epoch": 2.0751089811279884, "grad_norm": 4.189300060272217, "learning_rate": 5.1033964919789035e-06, "loss": 0.28370595, "memory(GiB)": 34.88, "step": 76640, "train_speed(iter/s)": 0.41221 }, { "acc": 0.91206608, "epoch": 2.075244361411204, "grad_norm": 2.899157762527466, "learning_rate": 5.102837046198017e-06, "loss": 0.52055616, "memory(GiB)": 34.88, "step": 76645, "train_speed(iter/s)": 0.412212 }, { "acc": 0.91745396, "epoch": 2.0753797416944195, "grad_norm": 4.38067626953125, "learning_rate": 5.102277599135157e-06, "loss": 0.39688845, "memory(GiB)": 34.88, "step": 76650, "train_speed(iter/s)": 0.412213 }, { "acc": 0.93222198, "epoch": 2.0755151219776353, "grad_norm": 2.552269697189331, "learning_rate": 5.101718150797333e-06, "loss": 0.37101808, "memory(GiB)": 34.88, "step": 76655, "train_speed(iter/s)": 0.412215 }, { "acc": 0.91591015, "epoch": 2.0756505022608507, "grad_norm": 14.63507080078125, "learning_rate": 5.1011587011915535e-06, "loss": 0.47298088, "memory(GiB)": 34.88, "step": 76660, "train_speed(iter/s)": 0.412216 }, { "acc": 0.92399426, "epoch": 2.0757858825440665, "grad_norm": 7.5286173820495605, "learning_rate": 5.100599250324824e-06, "loss": 0.45243115, "memory(GiB)": 34.88, "step": 76665, "train_speed(iter/s)": 0.412218 }, { "acc": 0.88948269, "epoch": 2.075921262827282, "grad_norm": 7.197139739990234, "learning_rate": 5.100039798204156e-06, "loss": 0.59854798, "memory(GiB)": 34.88, "step": 76670, "train_speed(iter/s)": 0.412219 }, { "acc": 0.91584682, "epoch": 2.076056643110497, "grad_norm": 19.60922622680664, "learning_rate": 5.099480344836556e-06, "loss": 0.47078047, "memory(GiB)": 34.88, "step": 76675, "train_speed(iter/s)": 0.41222 }, { "acc": 0.92245789, "epoch": 2.076192023393713, "grad_norm": 6.094127655029297, "learning_rate": 5.098920890229033e-06, "loss": 0.35596428, "memory(GiB)": 34.88, "step": 76680, "train_speed(iter/s)": 0.412222 }, { "acc": 0.92473087, "epoch": 2.0763274036769284, "grad_norm": 6.308176517486572, "learning_rate": 5.098361434388592e-06, "loss": 0.42071223, "memory(GiB)": 34.88, "step": 76685, "train_speed(iter/s)": 0.412223 }, { "acc": 0.924786, "epoch": 2.076462783960144, "grad_norm": 5.192559719085693, "learning_rate": 5.097801977322249e-06, "loss": 0.40510554, "memory(GiB)": 34.88, "step": 76690, "train_speed(iter/s)": 0.412224 }, { "acc": 0.91580915, "epoch": 2.0765981642433595, "grad_norm": 6.917126178741455, "learning_rate": 5.097242519037006e-06, "loss": 0.45415077, "memory(GiB)": 34.88, "step": 76695, "train_speed(iter/s)": 0.412226 }, { "acc": 0.90471134, "epoch": 2.0767335445265753, "grad_norm": 10.639335632324219, "learning_rate": 5.096683059539871e-06, "loss": 0.57168331, "memory(GiB)": 34.88, "step": 76700, "train_speed(iter/s)": 0.412227 }, { "acc": 0.91274967, "epoch": 2.0768689248097907, "grad_norm": 4.504687309265137, "learning_rate": 5.096123598837856e-06, "loss": 0.56179543, "memory(GiB)": 34.88, "step": 76705, "train_speed(iter/s)": 0.412229 }, { "acc": 0.92706451, "epoch": 2.077004305093006, "grad_norm": 7.007269859313965, "learning_rate": 5.095564136937966e-06, "loss": 0.37889774, "memory(GiB)": 34.88, "step": 76710, "train_speed(iter/s)": 0.41223 }, { "acc": 0.9113245, "epoch": 2.077139685376222, "grad_norm": 13.301168441772461, "learning_rate": 5.095004673847212e-06, "loss": 0.53072195, "memory(GiB)": 34.88, "step": 76715, "train_speed(iter/s)": 0.412231 }, { "acc": 0.91151772, "epoch": 2.077275065659437, "grad_norm": 15.566096305847168, "learning_rate": 5.094445209572602e-06, "loss": 0.49890966, "memory(GiB)": 34.88, "step": 76720, "train_speed(iter/s)": 0.412233 }, { "acc": 0.92029266, "epoch": 2.077410445942653, "grad_norm": 24.79530906677246, "learning_rate": 5.093885744121142e-06, "loss": 0.50039663, "memory(GiB)": 34.88, "step": 76725, "train_speed(iter/s)": 0.412234 }, { "acc": 0.92127018, "epoch": 2.0775458262258684, "grad_norm": 15.40746784210205, "learning_rate": 5.093326277499841e-06, "loss": 0.4278729, "memory(GiB)": 34.88, "step": 76730, "train_speed(iter/s)": 0.412235 }, { "acc": 0.931042, "epoch": 2.077681206509084, "grad_norm": 6.994389533996582, "learning_rate": 5.092766809715712e-06, "loss": 0.37678032, "memory(GiB)": 34.88, "step": 76735, "train_speed(iter/s)": 0.412237 }, { "acc": 0.92280178, "epoch": 2.0778165867922995, "grad_norm": 7.488739013671875, "learning_rate": 5.092207340775759e-06, "loss": 0.38324981, "memory(GiB)": 34.88, "step": 76740, "train_speed(iter/s)": 0.412238 }, { "acc": 0.89989672, "epoch": 2.0779519670755153, "grad_norm": 11.381322860717773, "learning_rate": 5.09164787068699e-06, "loss": 0.49746375, "memory(GiB)": 34.88, "step": 76745, "train_speed(iter/s)": 0.41224 }, { "acc": 0.92599039, "epoch": 2.0780873473587307, "grad_norm": 7.269526481628418, "learning_rate": 5.0910883994564154e-06, "loss": 0.37100484, "memory(GiB)": 34.88, "step": 76750, "train_speed(iter/s)": 0.412241 }, { "acc": 0.9165309, "epoch": 2.078222727641946, "grad_norm": 8.21350383758545, "learning_rate": 5.090528927091042e-06, "loss": 0.39963419, "memory(GiB)": 34.88, "step": 76755, "train_speed(iter/s)": 0.412242 }, { "acc": 0.91837921, "epoch": 2.078358107925162, "grad_norm": 8.717782020568848, "learning_rate": 5.089969453597882e-06, "loss": 0.4190464, "memory(GiB)": 34.88, "step": 76760, "train_speed(iter/s)": 0.412244 }, { "acc": 0.93351574, "epoch": 2.078493488208377, "grad_norm": 11.957386016845703, "learning_rate": 5.089409978983939e-06, "loss": 0.36344862, "memory(GiB)": 34.88, "step": 76765, "train_speed(iter/s)": 0.412245 }, { "acc": 0.92559919, "epoch": 2.078628868491593, "grad_norm": 10.746603965759277, "learning_rate": 5.088850503256224e-06, "loss": 0.34975958, "memory(GiB)": 34.88, "step": 76770, "train_speed(iter/s)": 0.412246 }, { "acc": 0.90403681, "epoch": 2.0787642487748084, "grad_norm": 11.514193534851074, "learning_rate": 5.088291026421746e-06, "loss": 0.60294342, "memory(GiB)": 34.88, "step": 76775, "train_speed(iter/s)": 0.412248 }, { "acc": 0.90690174, "epoch": 2.078899629058024, "grad_norm": 16.618000030517578, "learning_rate": 5.0877315484875125e-06, "loss": 0.48463469, "memory(GiB)": 34.88, "step": 76780, "train_speed(iter/s)": 0.412249 }, { "acc": 0.93432255, "epoch": 2.0790350093412395, "grad_norm": 6.6935248374938965, "learning_rate": 5.087172069460533e-06, "loss": 0.34726095, "memory(GiB)": 34.88, "step": 76785, "train_speed(iter/s)": 0.41225 }, { "acc": 0.90529547, "epoch": 2.079170389624455, "grad_norm": 13.642027854919434, "learning_rate": 5.086612589347815e-06, "loss": 0.52407598, "memory(GiB)": 34.88, "step": 76790, "train_speed(iter/s)": 0.412252 }, { "acc": 0.90578384, "epoch": 2.0793057699076707, "grad_norm": 5.509688377380371, "learning_rate": 5.086053108156368e-06, "loss": 0.56440077, "memory(GiB)": 34.88, "step": 76795, "train_speed(iter/s)": 0.412253 }, { "acc": 0.89456921, "epoch": 2.079441150190886, "grad_norm": 34.13629913330078, "learning_rate": 5.0854936258931995e-06, "loss": 0.63430681, "memory(GiB)": 34.88, "step": 76800, "train_speed(iter/s)": 0.412254 }, { "acc": 0.89699097, "epoch": 2.079576530474102, "grad_norm": 11.052530288696289, "learning_rate": 5.08493414256532e-06, "loss": 0.67125125, "memory(GiB)": 34.88, "step": 76805, "train_speed(iter/s)": 0.412256 }, { "acc": 0.91642742, "epoch": 2.079711910757317, "grad_norm": 19.008920669555664, "learning_rate": 5.084374658179735e-06, "loss": 0.48579884, "memory(GiB)": 34.88, "step": 76810, "train_speed(iter/s)": 0.412257 }, { "acc": 0.90544205, "epoch": 2.079847291040533, "grad_norm": 6.80743408203125, "learning_rate": 5.083815172743457e-06, "loss": 0.49893699, "memory(GiB)": 34.88, "step": 76815, "train_speed(iter/s)": 0.412258 }, { "acc": 0.93559809, "epoch": 2.0799826713237484, "grad_norm": 10.292664527893066, "learning_rate": 5.083255686263492e-06, "loss": 0.40416708, "memory(GiB)": 34.88, "step": 76820, "train_speed(iter/s)": 0.41226 }, { "acc": 0.90569324, "epoch": 2.080118051606964, "grad_norm": 8.960908889770508, "learning_rate": 5.082696198746849e-06, "loss": 0.46557274, "memory(GiB)": 34.88, "step": 76825, "train_speed(iter/s)": 0.412261 }, { "acc": 0.91308212, "epoch": 2.0802534318901795, "grad_norm": 18.380661010742188, "learning_rate": 5.082136710200539e-06, "loss": 0.45978251, "memory(GiB)": 34.88, "step": 76830, "train_speed(iter/s)": 0.412262 }, { "acc": 0.91536131, "epoch": 2.080388812173395, "grad_norm": 6.757516860961914, "learning_rate": 5.081577220631566e-06, "loss": 0.40983391, "memory(GiB)": 34.88, "step": 76835, "train_speed(iter/s)": 0.412264 }, { "acc": 0.91821547, "epoch": 2.0805241924566107, "grad_norm": 8.709810256958008, "learning_rate": 5.081017730046944e-06, "loss": 0.47932444, "memory(GiB)": 34.88, "step": 76840, "train_speed(iter/s)": 0.412265 }, { "acc": 0.92116909, "epoch": 2.080659572739826, "grad_norm": 19.495481491088867, "learning_rate": 5.080458238453677e-06, "loss": 0.41461329, "memory(GiB)": 34.88, "step": 76845, "train_speed(iter/s)": 0.412266 }, { "acc": 0.9263258, "epoch": 2.080794953023042, "grad_norm": 4.384284019470215, "learning_rate": 5.079898745858778e-06, "loss": 0.40517411, "memory(GiB)": 34.88, "step": 76850, "train_speed(iter/s)": 0.412268 }, { "acc": 0.90819263, "epoch": 2.080930333306257, "grad_norm": 10.098759651184082, "learning_rate": 5.079339252269252e-06, "loss": 0.55078192, "memory(GiB)": 34.88, "step": 76855, "train_speed(iter/s)": 0.412269 }, { "acc": 0.90957413, "epoch": 2.081065713589473, "grad_norm": 9.601400375366211, "learning_rate": 5.07877975769211e-06, "loss": 0.49483752, "memory(GiB)": 34.88, "step": 76860, "train_speed(iter/s)": 0.41227 }, { "acc": 0.91561756, "epoch": 2.0812010938726884, "grad_norm": 7.205135345458984, "learning_rate": 5.078220262134361e-06, "loss": 0.46996126, "memory(GiB)": 34.88, "step": 76865, "train_speed(iter/s)": 0.412272 }, { "acc": 0.90946569, "epoch": 2.0813364741559037, "grad_norm": 12.003228187561035, "learning_rate": 5.077660765603012e-06, "loss": 0.43699484, "memory(GiB)": 34.88, "step": 76870, "train_speed(iter/s)": 0.412273 }, { "acc": 0.91742687, "epoch": 2.0814718544391195, "grad_norm": 7.486634731292725, "learning_rate": 5.077101268105074e-06, "loss": 0.42037935, "memory(GiB)": 34.88, "step": 76875, "train_speed(iter/s)": 0.412274 }, { "acc": 0.916751, "epoch": 2.081607234722335, "grad_norm": 9.093174934387207, "learning_rate": 5.076541769647553e-06, "loss": 0.44409132, "memory(GiB)": 34.88, "step": 76880, "train_speed(iter/s)": 0.412276 }, { "acc": 0.91653757, "epoch": 2.0817426150055507, "grad_norm": 7.439766883850098, "learning_rate": 5.07598227023746e-06, "loss": 0.44790907, "memory(GiB)": 34.88, "step": 76885, "train_speed(iter/s)": 0.412277 }, { "acc": 0.93825016, "epoch": 2.081877995288766, "grad_norm": 9.032000541687012, "learning_rate": 5.075422769881804e-06, "loss": 0.32842479, "memory(GiB)": 34.88, "step": 76890, "train_speed(iter/s)": 0.412278 }, { "acc": 0.91509285, "epoch": 2.082013375571982, "grad_norm": 6.149618148803711, "learning_rate": 5.074863268587591e-06, "loss": 0.42319336, "memory(GiB)": 34.88, "step": 76895, "train_speed(iter/s)": 0.41228 }, { "acc": 0.90943661, "epoch": 2.082148755855197, "grad_norm": 5.905712604522705, "learning_rate": 5.074303766361834e-06, "loss": 0.42710199, "memory(GiB)": 34.88, "step": 76900, "train_speed(iter/s)": 0.412281 }, { "acc": 0.93924932, "epoch": 2.082284136138413, "grad_norm": 6.890336036682129, "learning_rate": 5.07374426321154e-06, "loss": 0.33958595, "memory(GiB)": 34.88, "step": 76905, "train_speed(iter/s)": 0.412283 }, { "acc": 0.91658726, "epoch": 2.0824195164216284, "grad_norm": 9.479536056518555, "learning_rate": 5.073184759143718e-06, "loss": 0.5311563, "memory(GiB)": 34.88, "step": 76910, "train_speed(iter/s)": 0.412284 }, { "acc": 0.9203618, "epoch": 2.0825548967048437, "grad_norm": 9.525885581970215, "learning_rate": 5.0726252541653746e-06, "loss": 0.48001223, "memory(GiB)": 34.88, "step": 76915, "train_speed(iter/s)": 0.412285 }, { "acc": 0.90565147, "epoch": 2.0826902769880595, "grad_norm": 9.660057067871094, "learning_rate": 5.072065748283522e-06, "loss": 0.59110432, "memory(GiB)": 34.88, "step": 76920, "train_speed(iter/s)": 0.412287 }, { "acc": 0.92965736, "epoch": 2.082825657271275, "grad_norm": 9.365145683288574, "learning_rate": 5.071506241505166e-06, "loss": 0.40604553, "memory(GiB)": 34.88, "step": 76925, "train_speed(iter/s)": 0.412288 }, { "acc": 0.91721344, "epoch": 2.0829610375544907, "grad_norm": 11.974844932556152, "learning_rate": 5.070946733837318e-06, "loss": 0.46109858, "memory(GiB)": 34.88, "step": 76930, "train_speed(iter/s)": 0.412289 }, { "acc": 0.90697041, "epoch": 2.083096417837706, "grad_norm": 17.122291564941406, "learning_rate": 5.070387225286988e-06, "loss": 0.57822084, "memory(GiB)": 34.88, "step": 76935, "train_speed(iter/s)": 0.41229 }, { "acc": 0.92258434, "epoch": 2.083231798120922, "grad_norm": 7.923457145690918, "learning_rate": 5.069827715861182e-06, "loss": 0.46132832, "memory(GiB)": 34.88, "step": 76940, "train_speed(iter/s)": 0.412292 }, { "acc": 0.92873135, "epoch": 2.083367178404137, "grad_norm": 7.362943649291992, "learning_rate": 5.069268205566912e-06, "loss": 0.38941202, "memory(GiB)": 34.88, "step": 76945, "train_speed(iter/s)": 0.412293 }, { "acc": 0.92082949, "epoch": 2.0835025586873526, "grad_norm": 9.429305076599121, "learning_rate": 5.068708694411182e-06, "loss": 0.41448855, "memory(GiB)": 34.88, "step": 76950, "train_speed(iter/s)": 0.412295 }, { "acc": 0.93700085, "epoch": 2.0836379389705684, "grad_norm": 5.130819320678711, "learning_rate": 5.0681491824010054e-06, "loss": 0.27224054, "memory(GiB)": 34.88, "step": 76955, "train_speed(iter/s)": 0.412296 }, { "acc": 0.91390533, "epoch": 2.0837733192537837, "grad_norm": 18.37616729736328, "learning_rate": 5.067589669543389e-06, "loss": 0.46996007, "memory(GiB)": 34.88, "step": 76960, "train_speed(iter/s)": 0.412297 }, { "acc": 0.9263875, "epoch": 2.0839086995369995, "grad_norm": 5.155484676361084, "learning_rate": 5.0670301558453424e-06, "loss": 0.40011997, "memory(GiB)": 34.88, "step": 76965, "train_speed(iter/s)": 0.412299 }, { "acc": 0.93116865, "epoch": 2.084044079820215, "grad_norm": 4.998206615447998, "learning_rate": 5.066470641313878e-06, "loss": 0.33702826, "memory(GiB)": 34.88, "step": 76970, "train_speed(iter/s)": 0.4123 }, { "acc": 0.92206631, "epoch": 2.0841794601034307, "grad_norm": 8.615119934082031, "learning_rate": 5.0659111259559975e-06, "loss": 0.46776133, "memory(GiB)": 34.88, "step": 76975, "train_speed(iter/s)": 0.412302 }, { "acc": 0.92595329, "epoch": 2.084314840386646, "grad_norm": 2.379701614379883, "learning_rate": 5.065351609778718e-06, "loss": 0.46291127, "memory(GiB)": 34.88, "step": 76980, "train_speed(iter/s)": 0.412303 }, { "acc": 0.91427593, "epoch": 2.084450220669862, "grad_norm": 9.704434394836426, "learning_rate": 5.0647920927890416e-06, "loss": 0.52397609, "memory(GiB)": 34.88, "step": 76985, "train_speed(iter/s)": 0.412304 }, { "acc": 0.90261917, "epoch": 2.084585600953077, "grad_norm": 12.131916999816895, "learning_rate": 5.064232574993983e-06, "loss": 0.60821381, "memory(GiB)": 34.88, "step": 76990, "train_speed(iter/s)": 0.412306 }, { "acc": 0.92981148, "epoch": 2.0847209812362926, "grad_norm": 8.693649291992188, "learning_rate": 5.063673056400547e-06, "loss": 0.39476099, "memory(GiB)": 34.88, "step": 76995, "train_speed(iter/s)": 0.412307 }, { "acc": 0.93183193, "epoch": 2.0848563615195084, "grad_norm": 4.106732368469238, "learning_rate": 5.063113537015746e-06, "loss": 0.33749294, "memory(GiB)": 34.88, "step": 77000, "train_speed(iter/s)": 0.412309 }, { "acc": 0.92236881, "epoch": 2.0849917418027237, "grad_norm": 5.111997127532959, "learning_rate": 5.0625540168465845e-06, "loss": 0.32353258, "memory(GiB)": 34.88, "step": 77005, "train_speed(iter/s)": 0.41231 }, { "acc": 0.92598991, "epoch": 2.0851271220859395, "grad_norm": 7.356438159942627, "learning_rate": 5.061994495900076e-06, "loss": 0.42694526, "memory(GiB)": 34.88, "step": 77010, "train_speed(iter/s)": 0.412311 }, { "acc": 0.9114502, "epoch": 2.085262502369155, "grad_norm": 6.754521369934082, "learning_rate": 5.061434974183228e-06, "loss": 0.57959824, "memory(GiB)": 34.88, "step": 77015, "train_speed(iter/s)": 0.412313 }, { "acc": 0.91503048, "epoch": 2.0853978826523707, "grad_norm": 21.22248649597168, "learning_rate": 5.06087545170305e-06, "loss": 0.47608685, "memory(GiB)": 34.88, "step": 77020, "train_speed(iter/s)": 0.412314 }, { "acc": 0.9350502, "epoch": 2.085533262935586, "grad_norm": 4.639435291290283, "learning_rate": 5.0603159284665515e-06, "loss": 0.3673038, "memory(GiB)": 34.88, "step": 77025, "train_speed(iter/s)": 0.412315 }, { "acc": 0.92332325, "epoch": 2.0856686432188014, "grad_norm": 3.053753137588501, "learning_rate": 5.059756404480741e-06, "loss": 0.46420565, "memory(GiB)": 34.88, "step": 77030, "train_speed(iter/s)": 0.412317 }, { "acc": 0.93404503, "epoch": 2.085804023502017, "grad_norm": 14.1166353225708, "learning_rate": 5.059196879752627e-06, "loss": 0.30658612, "memory(GiB)": 34.88, "step": 77035, "train_speed(iter/s)": 0.412318 }, { "acc": 0.92094431, "epoch": 2.0859394037852326, "grad_norm": 6.906944751739502, "learning_rate": 5.0586373542892185e-06, "loss": 0.42611613, "memory(GiB)": 34.88, "step": 77040, "train_speed(iter/s)": 0.412319 }, { "acc": 0.9023221, "epoch": 2.0860747840684484, "grad_norm": 9.537567138671875, "learning_rate": 5.058077828097525e-06, "loss": 0.60025244, "memory(GiB)": 34.88, "step": 77045, "train_speed(iter/s)": 0.412321 }, { "acc": 0.91899071, "epoch": 2.0862101643516637, "grad_norm": 14.308670997619629, "learning_rate": 5.057518301184557e-06, "loss": 0.44489317, "memory(GiB)": 34.88, "step": 77050, "train_speed(iter/s)": 0.412322 }, { "acc": 0.9047802, "epoch": 2.0863455446348795, "grad_norm": 33.531097412109375, "learning_rate": 5.056958773557323e-06, "loss": 0.5669127, "memory(GiB)": 34.88, "step": 77055, "train_speed(iter/s)": 0.412324 }, { "acc": 0.91750154, "epoch": 2.086480924918095, "grad_norm": 12.031476020812988, "learning_rate": 5.056399245222832e-06, "loss": 0.42541194, "memory(GiB)": 34.88, "step": 77060, "train_speed(iter/s)": 0.412325 }, { "acc": 0.92993145, "epoch": 2.0866163052013107, "grad_norm": 15.181410789489746, "learning_rate": 5.055839716188091e-06, "loss": 0.35846262, "memory(GiB)": 34.88, "step": 77065, "train_speed(iter/s)": 0.412326 }, { "acc": 0.92339163, "epoch": 2.086751685484526, "grad_norm": 6.776115894317627, "learning_rate": 5.0552801864601155e-06, "loss": 0.38734407, "memory(GiB)": 34.88, "step": 77070, "train_speed(iter/s)": 0.412327 }, { "acc": 0.9010313, "epoch": 2.0868870657677414, "grad_norm": 10.742097854614258, "learning_rate": 5.054720656045908e-06, "loss": 0.54428062, "memory(GiB)": 34.88, "step": 77075, "train_speed(iter/s)": 0.412329 }, { "acc": 0.90139999, "epoch": 2.087022446050957, "grad_norm": 9.03848648071289, "learning_rate": 5.054161124952479e-06, "loss": 0.54659901, "memory(GiB)": 34.88, "step": 77080, "train_speed(iter/s)": 0.41233 }, { "acc": 0.89407997, "epoch": 2.0871578263341726, "grad_norm": 17.974750518798828, "learning_rate": 5.053601593186841e-06, "loss": 0.59200191, "memory(GiB)": 34.88, "step": 77085, "train_speed(iter/s)": 0.412331 }, { "acc": 0.91289177, "epoch": 2.0872932066173884, "grad_norm": 9.927807807922363, "learning_rate": 5.053042060755999e-06, "loss": 0.52008104, "memory(GiB)": 34.88, "step": 77090, "train_speed(iter/s)": 0.412333 }, { "acc": 0.91300621, "epoch": 2.0874285869006037, "grad_norm": 5.058974742889404, "learning_rate": 5.0524825276669675e-06, "loss": 0.45857024, "memory(GiB)": 34.88, "step": 77095, "train_speed(iter/s)": 0.412334 }, { "acc": 0.92178593, "epoch": 2.0875639671838195, "grad_norm": 8.44747543334961, "learning_rate": 5.05192299392675e-06, "loss": 0.47702498, "memory(GiB)": 34.88, "step": 77100, "train_speed(iter/s)": 0.412336 }, { "acc": 0.91207476, "epoch": 2.087699347467035, "grad_norm": 7.787415027618408, "learning_rate": 5.05136345954236e-06, "loss": 0.47296777, "memory(GiB)": 34.88, "step": 77105, "train_speed(iter/s)": 0.412337 }, { "acc": 0.93279486, "epoch": 2.0878347277502503, "grad_norm": 7.014225482940674, "learning_rate": 5.050803924520805e-06, "loss": 0.44472404, "memory(GiB)": 34.88, "step": 77110, "train_speed(iter/s)": 0.412338 }, { "acc": 0.92310658, "epoch": 2.087970108033466, "grad_norm": 9.429932594299316, "learning_rate": 5.050244388869095e-06, "loss": 0.42565908, "memory(GiB)": 34.88, "step": 77115, "train_speed(iter/s)": 0.41234 }, { "acc": 0.93222437, "epoch": 2.0881054883166814, "grad_norm": 10.689001083374023, "learning_rate": 5.049684852594239e-06, "loss": 0.36917367, "memory(GiB)": 34.88, "step": 77120, "train_speed(iter/s)": 0.412341 }, { "acc": 0.93020992, "epoch": 2.088240868599897, "grad_norm": 4.083495616912842, "learning_rate": 5.049125315703244e-06, "loss": 0.37871046, "memory(GiB)": 34.88, "step": 77125, "train_speed(iter/s)": 0.412342 }, { "acc": 0.92792301, "epoch": 2.0883762488831126, "grad_norm": 3.822038412094116, "learning_rate": 5.048565778203123e-06, "loss": 0.41683788, "memory(GiB)": 34.88, "step": 77130, "train_speed(iter/s)": 0.412343 }, { "acc": 0.92764435, "epoch": 2.0885116291663284, "grad_norm": 6.185860633850098, "learning_rate": 5.048006240100882e-06, "loss": 0.44091625, "memory(GiB)": 34.88, "step": 77135, "train_speed(iter/s)": 0.412345 }, { "acc": 0.92460623, "epoch": 2.0886470094495437, "grad_norm": 6.63896369934082, "learning_rate": 5.047446701403533e-06, "loss": 0.45879021, "memory(GiB)": 34.88, "step": 77140, "train_speed(iter/s)": 0.412346 }, { "acc": 0.8985857, "epoch": 2.088782389732759, "grad_norm": 7.817228317260742, "learning_rate": 5.046887162118085e-06, "loss": 0.52551212, "memory(GiB)": 34.88, "step": 77145, "train_speed(iter/s)": 0.412347 }, { "acc": 0.91593246, "epoch": 2.088917770015975, "grad_norm": 8.812535285949707, "learning_rate": 5.046327622251547e-06, "loss": 0.52550516, "memory(GiB)": 34.88, "step": 77150, "train_speed(iter/s)": 0.412349 }, { "acc": 0.92838917, "epoch": 2.0890531502991903, "grad_norm": 4.846108436584473, "learning_rate": 5.045768081810927e-06, "loss": 0.40206223, "memory(GiB)": 34.88, "step": 77155, "train_speed(iter/s)": 0.41235 }, { "acc": 0.89467821, "epoch": 2.089188530582406, "grad_norm": 6.174314975738525, "learning_rate": 5.045208540803235e-06, "loss": 0.50964637, "memory(GiB)": 34.88, "step": 77160, "train_speed(iter/s)": 0.412351 }, { "acc": 0.91083908, "epoch": 2.0893239108656214, "grad_norm": 6.260019302368164, "learning_rate": 5.044648999235482e-06, "loss": 0.47277489, "memory(GiB)": 34.88, "step": 77165, "train_speed(iter/s)": 0.412353 }, { "acc": 0.92339287, "epoch": 2.089459291148837, "grad_norm": 3.1540470123291016, "learning_rate": 5.044089457114674e-06, "loss": 0.41738901, "memory(GiB)": 34.88, "step": 77170, "train_speed(iter/s)": 0.412354 }, { "acc": 0.904883, "epoch": 2.0895946714320526, "grad_norm": 8.347816467285156, "learning_rate": 5.043529914447825e-06, "loss": 0.52529764, "memory(GiB)": 34.88, "step": 77175, "train_speed(iter/s)": 0.412355 }, { "acc": 0.91532707, "epoch": 2.0897300517152684, "grad_norm": 8.159250259399414, "learning_rate": 5.04297037124194e-06, "loss": 0.45626783, "memory(GiB)": 34.88, "step": 77180, "train_speed(iter/s)": 0.412356 }, { "acc": 0.91420212, "epoch": 2.0898654319984837, "grad_norm": 10.891358375549316, "learning_rate": 5.042410827504029e-06, "loss": 0.42013149, "memory(GiB)": 34.88, "step": 77185, "train_speed(iter/s)": 0.412357 }, { "acc": 0.91077957, "epoch": 2.090000812281699, "grad_norm": 8.404449462890625, "learning_rate": 5.041851283241105e-06, "loss": 0.45960541, "memory(GiB)": 34.88, "step": 77190, "train_speed(iter/s)": 0.412359 }, { "acc": 0.90707264, "epoch": 2.090136192564915, "grad_norm": 11.786739349365234, "learning_rate": 5.0412917384601725e-06, "loss": 0.57450981, "memory(GiB)": 34.88, "step": 77195, "train_speed(iter/s)": 0.41236 }, { "acc": 0.9386879, "epoch": 2.0902715728481303, "grad_norm": 4.314515590667725, "learning_rate": 5.040732193168246e-06, "loss": 0.28858967, "memory(GiB)": 34.88, "step": 77200, "train_speed(iter/s)": 0.412361 }, { "acc": 0.91343613, "epoch": 2.090406953131346, "grad_norm": 6.851580619812012, "learning_rate": 5.040172647372331e-06, "loss": 0.46573668, "memory(GiB)": 34.88, "step": 77205, "train_speed(iter/s)": 0.412363 }, { "acc": 0.93143463, "epoch": 2.0905423334145614, "grad_norm": 85.00812530517578, "learning_rate": 5.039613101079438e-06, "loss": 0.42867007, "memory(GiB)": 34.88, "step": 77210, "train_speed(iter/s)": 0.412364 }, { "acc": 0.92063408, "epoch": 2.090677713697777, "grad_norm": 5.810434818267822, "learning_rate": 5.039053554296577e-06, "loss": 0.49746284, "memory(GiB)": 34.88, "step": 77215, "train_speed(iter/s)": 0.412365 }, { "acc": 0.9398365, "epoch": 2.0908130939809926, "grad_norm": 4.4790472984313965, "learning_rate": 5.0384940070307576e-06, "loss": 0.37479711, "memory(GiB)": 34.88, "step": 77220, "train_speed(iter/s)": 0.412367 }, { "acc": 0.91286888, "epoch": 2.0909484742642084, "grad_norm": 20.018613815307617, "learning_rate": 5.037934459288987e-06, "loss": 0.55131474, "memory(GiB)": 34.88, "step": 77225, "train_speed(iter/s)": 0.412368 }, { "acc": 0.91373119, "epoch": 2.0910838545474237, "grad_norm": 13.2548828125, "learning_rate": 5.0373749110782756e-06, "loss": 0.458992, "memory(GiB)": 34.88, "step": 77230, "train_speed(iter/s)": 0.412369 }, { "acc": 0.92976561, "epoch": 2.091219234830639, "grad_norm": 3.7302603721618652, "learning_rate": 5.036815362405636e-06, "loss": 0.36201477, "memory(GiB)": 34.88, "step": 77235, "train_speed(iter/s)": 0.412371 }, { "acc": 0.92730875, "epoch": 2.091354615113855, "grad_norm": 6.742576599121094, "learning_rate": 5.036255813278073e-06, "loss": 0.38674378, "memory(GiB)": 34.88, "step": 77240, "train_speed(iter/s)": 0.412372 }, { "acc": 0.91279583, "epoch": 2.0914899953970703, "grad_norm": 15.741196632385254, "learning_rate": 5.035696263702601e-06, "loss": 0.44053545, "memory(GiB)": 34.88, "step": 77245, "train_speed(iter/s)": 0.412373 }, { "acc": 0.93285007, "epoch": 2.091625375680286, "grad_norm": 22.028640747070312, "learning_rate": 5.035136713686224e-06, "loss": 0.36905417, "memory(GiB)": 34.88, "step": 77250, "train_speed(iter/s)": 0.412375 }, { "acc": 0.92282906, "epoch": 2.0917607559635014, "grad_norm": 7.1914215087890625, "learning_rate": 5.034577163235954e-06, "loss": 0.37746699, "memory(GiB)": 34.88, "step": 77255, "train_speed(iter/s)": 0.412376 }, { "acc": 0.91584024, "epoch": 2.091896136246717, "grad_norm": 9.432780265808105, "learning_rate": 5.034017612358801e-06, "loss": 0.46152554, "memory(GiB)": 34.88, "step": 77260, "train_speed(iter/s)": 0.412377 }, { "acc": 0.9045845, "epoch": 2.0920315165299326, "grad_norm": 12.955732345581055, "learning_rate": 5.0334580610617745e-06, "loss": 0.57170172, "memory(GiB)": 34.88, "step": 77265, "train_speed(iter/s)": 0.412379 }, { "acc": 0.90828466, "epoch": 2.092166896813148, "grad_norm": 6.425115585327148, "learning_rate": 5.032898509351883e-06, "loss": 0.48046255, "memory(GiB)": 34.88, "step": 77270, "train_speed(iter/s)": 0.41238 }, { "acc": 0.92813988, "epoch": 2.0923022770963637, "grad_norm": 7.835244655609131, "learning_rate": 5.032338957236138e-06, "loss": 0.34608483, "memory(GiB)": 34.88, "step": 77275, "train_speed(iter/s)": 0.412381 }, { "acc": 0.92880001, "epoch": 2.092437657379579, "grad_norm": 16.663898468017578, "learning_rate": 5.031779404721548e-06, "loss": 0.39746094, "memory(GiB)": 34.88, "step": 77280, "train_speed(iter/s)": 0.412382 }, { "acc": 0.91218529, "epoch": 2.092573037662795, "grad_norm": 6.654139041900635, "learning_rate": 5.0312198518151205e-06, "loss": 0.55548811, "memory(GiB)": 34.88, "step": 77285, "train_speed(iter/s)": 0.412384 }, { "acc": 0.91135368, "epoch": 2.0927084179460103, "grad_norm": 6.943584442138672, "learning_rate": 5.030660298523865e-06, "loss": 0.49331617, "memory(GiB)": 34.88, "step": 77290, "train_speed(iter/s)": 0.412385 }, { "acc": 0.91824627, "epoch": 2.092843798229226, "grad_norm": 8.9677734375, "learning_rate": 5.030100744854795e-06, "loss": 0.45193129, "memory(GiB)": 34.88, "step": 77295, "train_speed(iter/s)": 0.412386 }, { "acc": 0.8952158, "epoch": 2.0929791785124414, "grad_norm": 8.906379699707031, "learning_rate": 5.029541190814916e-06, "loss": 0.4424078, "memory(GiB)": 34.88, "step": 77300, "train_speed(iter/s)": 0.412388 }, { "acc": 0.92870102, "epoch": 2.0931145587956568, "grad_norm": 8.34093952178955, "learning_rate": 5.028981636411239e-06, "loss": 0.37834468, "memory(GiB)": 34.88, "step": 77305, "train_speed(iter/s)": 0.412389 }, { "acc": 0.92757072, "epoch": 2.0932499390788726, "grad_norm": 11.99709415435791, "learning_rate": 5.028422081650774e-06, "loss": 0.41094565, "memory(GiB)": 34.88, "step": 77310, "train_speed(iter/s)": 0.41239 }, { "acc": 0.9324892, "epoch": 2.093385319362088, "grad_norm": 6.881655216217041, "learning_rate": 5.02786252654053e-06, "loss": 0.40630465, "memory(GiB)": 34.88, "step": 77315, "train_speed(iter/s)": 0.412392 }, { "acc": 0.93577156, "epoch": 2.0935206996453037, "grad_norm": 7.774738311767578, "learning_rate": 5.0273029710875154e-06, "loss": 0.30333309, "memory(GiB)": 34.88, "step": 77320, "train_speed(iter/s)": 0.412393 }, { "acc": 0.90165892, "epoch": 2.093656079928519, "grad_norm": 12.853699684143066, "learning_rate": 5.0267434152987436e-06, "loss": 0.51697664, "memory(GiB)": 34.88, "step": 77325, "train_speed(iter/s)": 0.412394 }, { "acc": 0.9219285, "epoch": 2.093791460211735, "grad_norm": 7.067419528961182, "learning_rate": 5.0261838591812196e-06, "loss": 0.41408639, "memory(GiB)": 34.88, "step": 77330, "train_speed(iter/s)": 0.412396 }, { "acc": 0.90404224, "epoch": 2.0939268404949503, "grad_norm": 13.868514060974121, "learning_rate": 5.025624302741956e-06, "loss": 0.60416737, "memory(GiB)": 34.88, "step": 77335, "train_speed(iter/s)": 0.412397 }, { "acc": 0.92810936, "epoch": 2.094062220778166, "grad_norm": 10.998040199279785, "learning_rate": 5.02506474598796e-06, "loss": 0.3355006, "memory(GiB)": 34.88, "step": 77340, "train_speed(iter/s)": 0.412398 }, { "acc": 0.90410709, "epoch": 2.0941976010613814, "grad_norm": 9.554231643676758, "learning_rate": 5.024505188926242e-06, "loss": 0.53753676, "memory(GiB)": 34.88, "step": 77345, "train_speed(iter/s)": 0.4124 }, { "acc": 0.92513056, "epoch": 2.094332981344597, "grad_norm": 4.68093204498291, "learning_rate": 5.023945631563813e-06, "loss": 0.42738724, "memory(GiB)": 34.88, "step": 77350, "train_speed(iter/s)": 0.412401 }, { "acc": 0.93191891, "epoch": 2.0944683616278126, "grad_norm": 6.336528301239014, "learning_rate": 5.023386073907681e-06, "loss": 0.41682301, "memory(GiB)": 34.88, "step": 77355, "train_speed(iter/s)": 0.412403 }, { "acc": 0.92030897, "epoch": 2.094603741911028, "grad_norm": 5.742295742034912, "learning_rate": 5.022826515964856e-06, "loss": 0.43218164, "memory(GiB)": 34.88, "step": 77360, "train_speed(iter/s)": 0.412404 }, { "acc": 0.93092918, "epoch": 2.0947391221942437, "grad_norm": 8.742593765258789, "learning_rate": 5.022266957742348e-06, "loss": 0.38658051, "memory(GiB)": 34.88, "step": 77365, "train_speed(iter/s)": 0.412405 }, { "acc": 0.92054768, "epoch": 2.094874502477459, "grad_norm": 26.9872989654541, "learning_rate": 5.021707399247165e-06, "loss": 0.53417444, "memory(GiB)": 34.88, "step": 77370, "train_speed(iter/s)": 0.412406 }, { "acc": 0.91715393, "epoch": 2.095009882760675, "grad_norm": 5.577307224273682, "learning_rate": 5.021147840486318e-06, "loss": 0.50084972, "memory(GiB)": 34.88, "step": 77375, "train_speed(iter/s)": 0.412408 }, { "acc": 0.90047827, "epoch": 2.0951452630438903, "grad_norm": 10.666462898254395, "learning_rate": 5.020588281466815e-06, "loss": 0.56346388, "memory(GiB)": 34.88, "step": 77380, "train_speed(iter/s)": 0.412409 }, { "acc": 0.91752548, "epoch": 2.095280643327106, "grad_norm": 11.211994171142578, "learning_rate": 5.020028722195669e-06, "loss": 0.39669905, "memory(GiB)": 34.88, "step": 77385, "train_speed(iter/s)": 0.412411 }, { "acc": 0.92203579, "epoch": 2.0954160236103214, "grad_norm": 5.292820453643799, "learning_rate": 5.019469162679885e-06, "loss": 0.4275548, "memory(GiB)": 34.88, "step": 77390, "train_speed(iter/s)": 0.412412 }, { "acc": 0.91680222, "epoch": 2.095551403893537, "grad_norm": 7.658663272857666, "learning_rate": 5.018909602926478e-06, "loss": 0.55265479, "memory(GiB)": 34.88, "step": 77395, "train_speed(iter/s)": 0.412413 }, { "acc": 0.91748981, "epoch": 2.0956867841767526, "grad_norm": 12.938505172729492, "learning_rate": 5.018350042942453e-06, "loss": 0.47405148, "memory(GiB)": 34.88, "step": 77400, "train_speed(iter/s)": 0.412414 }, { "acc": 0.9076519, "epoch": 2.095822164459968, "grad_norm": 17.106727600097656, "learning_rate": 5.017790482734822e-06, "loss": 0.50527077, "memory(GiB)": 34.88, "step": 77405, "train_speed(iter/s)": 0.412416 }, { "acc": 0.90753136, "epoch": 2.0959575447431837, "grad_norm": 8.928986549377441, "learning_rate": 5.017230922310593e-06, "loss": 0.48069906, "memory(GiB)": 34.88, "step": 77410, "train_speed(iter/s)": 0.412417 }, { "acc": 0.89617958, "epoch": 2.096092925026399, "grad_norm": 9.300871849060059, "learning_rate": 5.016671361676775e-06, "loss": 0.63739786, "memory(GiB)": 34.88, "step": 77415, "train_speed(iter/s)": 0.412419 }, { "acc": 0.93772488, "epoch": 2.096228305309615, "grad_norm": 9.296173095703125, "learning_rate": 5.01611180084038e-06, "loss": 0.31244555, "memory(GiB)": 34.88, "step": 77420, "train_speed(iter/s)": 0.41242 }, { "acc": 0.92691126, "epoch": 2.0963636855928303, "grad_norm": 5.10632848739624, "learning_rate": 5.015552239808417e-06, "loss": 0.34207344, "memory(GiB)": 34.88, "step": 77425, "train_speed(iter/s)": 0.412421 }, { "acc": 0.90104723, "epoch": 2.0964990658760456, "grad_norm": 10.827110290527344, "learning_rate": 5.014992678587895e-06, "loss": 0.56524, "memory(GiB)": 34.88, "step": 77430, "train_speed(iter/s)": 0.412423 }, { "acc": 0.91968288, "epoch": 2.0966344461592614, "grad_norm": 7.183972358703613, "learning_rate": 5.014433117185823e-06, "loss": 0.43712735, "memory(GiB)": 34.88, "step": 77435, "train_speed(iter/s)": 0.412424 }, { "acc": 0.91619873, "epoch": 2.096769826442477, "grad_norm": 10.37271499633789, "learning_rate": 5.013873555609212e-06, "loss": 0.5125608, "memory(GiB)": 34.88, "step": 77440, "train_speed(iter/s)": 0.412425 }, { "acc": 0.89690208, "epoch": 2.0969052067256926, "grad_norm": 9.396788597106934, "learning_rate": 5.01331399386507e-06, "loss": 0.70808649, "memory(GiB)": 34.88, "step": 77445, "train_speed(iter/s)": 0.412427 }, { "acc": 0.9176754, "epoch": 2.097040587008908, "grad_norm": 5.279230117797852, "learning_rate": 5.012754431960409e-06, "loss": 0.48792925, "memory(GiB)": 34.88, "step": 77450, "train_speed(iter/s)": 0.412428 }, { "acc": 0.9218214, "epoch": 2.0971759672921237, "grad_norm": 12.023809432983398, "learning_rate": 5.0121948699022374e-06, "loss": 0.49819298, "memory(GiB)": 34.88, "step": 77455, "train_speed(iter/s)": 0.41243 }, { "acc": 0.89936457, "epoch": 2.097311347575339, "grad_norm": 12.968724250793457, "learning_rate": 5.011635307697562e-06, "loss": 0.60495968, "memory(GiB)": 34.88, "step": 77460, "train_speed(iter/s)": 0.412431 }, { "acc": 0.93262339, "epoch": 2.0974467278585545, "grad_norm": 6.235046863555908, "learning_rate": 5.011075745353398e-06, "loss": 0.41969376, "memory(GiB)": 34.88, "step": 77465, "train_speed(iter/s)": 0.412432 }, { "acc": 0.89587946, "epoch": 2.0975821081417703, "grad_norm": 11.753750801086426, "learning_rate": 5.0105161828767504e-06, "loss": 0.6316555, "memory(GiB)": 34.88, "step": 77470, "train_speed(iter/s)": 0.412434 }, { "acc": 0.91395741, "epoch": 2.0977174884249856, "grad_norm": 13.492183685302734, "learning_rate": 5.009956620274631e-06, "loss": 0.54668326, "memory(GiB)": 34.88, "step": 77475, "train_speed(iter/s)": 0.412435 }, { "acc": 0.93556938, "epoch": 2.0978528687082014, "grad_norm": 5.4296183586120605, "learning_rate": 5.009397057554047e-06, "loss": 0.30269513, "memory(GiB)": 34.88, "step": 77480, "train_speed(iter/s)": 0.412437 }, { "acc": 0.92491531, "epoch": 2.097988248991417, "grad_norm": 8.662129402160645, "learning_rate": 5.008837494722012e-06, "loss": 0.46953583, "memory(GiB)": 34.88, "step": 77485, "train_speed(iter/s)": 0.412438 }, { "acc": 0.91410484, "epoch": 2.0981236292746326, "grad_norm": 12.216926574707031, "learning_rate": 5.008277931785534e-06, "loss": 0.48150234, "memory(GiB)": 34.88, "step": 77490, "train_speed(iter/s)": 0.412439 }, { "acc": 0.92331581, "epoch": 2.098259009557848, "grad_norm": 5.979701995849609, "learning_rate": 5.00771836875162e-06, "loss": 0.47375531, "memory(GiB)": 34.88, "step": 77495, "train_speed(iter/s)": 0.412441 }, { "acc": 0.90530043, "epoch": 2.0983943898410637, "grad_norm": 8.679309844970703, "learning_rate": 5.007158805627283e-06, "loss": 0.56891994, "memory(GiB)": 34.88, "step": 77500, "train_speed(iter/s)": 0.412442 }, { "acc": 0.90979776, "epoch": 2.098529770124279, "grad_norm": 10.274801254272461, "learning_rate": 5.006599242419531e-06, "loss": 0.53015032, "memory(GiB)": 34.88, "step": 77505, "train_speed(iter/s)": 0.412443 }, { "acc": 0.92459373, "epoch": 2.0986651504074945, "grad_norm": 10.1507568359375, "learning_rate": 5.006039679135374e-06, "loss": 0.41985893, "memory(GiB)": 34.88, "step": 77510, "train_speed(iter/s)": 0.412445 }, { "acc": 0.90948887, "epoch": 2.0988005306907103, "grad_norm": 9.982353210449219, "learning_rate": 5.005480115781821e-06, "loss": 0.54622226, "memory(GiB)": 34.88, "step": 77515, "train_speed(iter/s)": 0.412446 }, { "acc": 0.9235877, "epoch": 2.0989359109739256, "grad_norm": 4.614534378051758, "learning_rate": 5.004920552365883e-06, "loss": 0.38394752, "memory(GiB)": 34.88, "step": 77520, "train_speed(iter/s)": 0.412447 }, { "acc": 0.92483234, "epoch": 2.0990712912571414, "grad_norm": 5.744265079498291, "learning_rate": 5.004360988894569e-06, "loss": 0.35525725, "memory(GiB)": 34.88, "step": 77525, "train_speed(iter/s)": 0.412449 }, { "acc": 0.91454725, "epoch": 2.099206671540357, "grad_norm": 11.456182479858398, "learning_rate": 5.003801425374888e-06, "loss": 0.49462924, "memory(GiB)": 34.88, "step": 77530, "train_speed(iter/s)": 0.41245 }, { "acc": 0.93772659, "epoch": 2.0993420518235726, "grad_norm": 5.875028133392334, "learning_rate": 5.003241861813851e-06, "loss": 0.32530086, "memory(GiB)": 34.88, "step": 77535, "train_speed(iter/s)": 0.412452 }, { "acc": 0.92178307, "epoch": 2.099477432106788, "grad_norm": 8.626230239868164, "learning_rate": 5.002682298218466e-06, "loss": 0.40547304, "memory(GiB)": 34.88, "step": 77540, "train_speed(iter/s)": 0.412453 }, { "acc": 0.91606989, "epoch": 2.0996128123900037, "grad_norm": 9.229496955871582, "learning_rate": 5.002122734595746e-06, "loss": 0.41690397, "memory(GiB)": 34.88, "step": 77545, "train_speed(iter/s)": 0.412454 }, { "acc": 0.92048664, "epoch": 2.099748192673219, "grad_norm": 10.082585334777832, "learning_rate": 5.001563170952695e-06, "loss": 0.40046515, "memory(GiB)": 34.88, "step": 77550, "train_speed(iter/s)": 0.412456 }, { "acc": 0.93407583, "epoch": 2.0998835729564345, "grad_norm": 5.21587610244751, "learning_rate": 5.001003607296326e-06, "loss": 0.31167445, "memory(GiB)": 34.88, "step": 77555, "train_speed(iter/s)": 0.412457 }, { "acc": 0.90917797, "epoch": 2.1000189532396503, "grad_norm": 11.206315040588379, "learning_rate": 5.000444043633649e-06, "loss": 0.4593997, "memory(GiB)": 34.88, "step": 77560, "train_speed(iter/s)": 0.412458 }, { "acc": 0.92087717, "epoch": 2.1001543335228656, "grad_norm": 7.827392578125, "learning_rate": 4.9998844799716725e-06, "loss": 0.48331857, "memory(GiB)": 34.88, "step": 77565, "train_speed(iter/s)": 0.412459 }, { "acc": 0.93247738, "epoch": 2.1002897138060814, "grad_norm": 5.839849472045898, "learning_rate": 4.999324916317407e-06, "loss": 0.33235877, "memory(GiB)": 34.88, "step": 77570, "train_speed(iter/s)": 0.41246 }, { "acc": 0.91456661, "epoch": 2.100425094089297, "grad_norm": 62.69026184082031, "learning_rate": 4.998765352677862e-06, "loss": 0.51649976, "memory(GiB)": 34.88, "step": 77575, "train_speed(iter/s)": 0.412461 }, { "acc": 0.90866089, "epoch": 2.1005604743725126, "grad_norm": 10.50822639465332, "learning_rate": 4.998205789060047e-06, "loss": 0.57902637, "memory(GiB)": 34.88, "step": 77580, "train_speed(iter/s)": 0.412462 }, { "acc": 0.90974102, "epoch": 2.100695854655728, "grad_norm": 11.803861618041992, "learning_rate": 4.99764622547097e-06, "loss": 0.518431, "memory(GiB)": 34.88, "step": 77585, "train_speed(iter/s)": 0.412464 }, { "acc": 0.92097092, "epoch": 2.1008312349389433, "grad_norm": 6.964588642120361, "learning_rate": 4.997086661917644e-06, "loss": 0.49598885, "memory(GiB)": 34.88, "step": 77590, "train_speed(iter/s)": 0.412465 }, { "acc": 0.92170706, "epoch": 2.100966615222159, "grad_norm": 8.802204132080078, "learning_rate": 4.996527098407075e-06, "loss": 0.42580457, "memory(GiB)": 34.88, "step": 77595, "train_speed(iter/s)": 0.412466 }, { "acc": 0.93990383, "epoch": 2.1011019955053745, "grad_norm": 8.170425415039062, "learning_rate": 4.995967534946276e-06, "loss": 0.33875198, "memory(GiB)": 34.88, "step": 77600, "train_speed(iter/s)": 0.412467 }, { "acc": 0.90098457, "epoch": 2.1012373757885903, "grad_norm": 15.712462425231934, "learning_rate": 4.995407971542254e-06, "loss": 0.59000788, "memory(GiB)": 34.88, "step": 77605, "train_speed(iter/s)": 0.412468 }, { "acc": 0.91598186, "epoch": 2.1013727560718056, "grad_norm": 17.103708267211914, "learning_rate": 4.994848408202019e-06, "loss": 0.46019549, "memory(GiB)": 34.88, "step": 77610, "train_speed(iter/s)": 0.412469 }, { "acc": 0.90778189, "epoch": 2.1015081363550214, "grad_norm": 13.193206787109375, "learning_rate": 4.9942888449325845e-06, "loss": 0.46704144, "memory(GiB)": 34.88, "step": 77615, "train_speed(iter/s)": 0.412471 }, { "acc": 0.92235041, "epoch": 2.101643516638237, "grad_norm": 33.219627380371094, "learning_rate": 4.993729281740955e-06, "loss": 0.46772423, "memory(GiB)": 34.88, "step": 77620, "train_speed(iter/s)": 0.412472 }, { "acc": 0.91717958, "epoch": 2.101778896921452, "grad_norm": 11.64022445678711, "learning_rate": 4.993169718634142e-06, "loss": 0.50822792, "memory(GiB)": 34.88, "step": 77625, "train_speed(iter/s)": 0.412474 }, { "acc": 0.9283803, "epoch": 2.101914277204668, "grad_norm": 8.525774955749512, "learning_rate": 4.992610155619154e-06, "loss": 0.39020686, "memory(GiB)": 34.88, "step": 77630, "train_speed(iter/s)": 0.412475 }, { "acc": 0.92898426, "epoch": 2.1020496574878833, "grad_norm": 4.265761375427246, "learning_rate": 4.992050592703003e-06, "loss": 0.33215375, "memory(GiB)": 34.88, "step": 77635, "train_speed(iter/s)": 0.412476 }, { "acc": 0.91820278, "epoch": 2.102185037771099, "grad_norm": 7.39650821685791, "learning_rate": 4.9914910298926964e-06, "loss": 0.49229655, "memory(GiB)": 34.88, "step": 77640, "train_speed(iter/s)": 0.412478 }, { "acc": 0.91707621, "epoch": 2.1023204180543145, "grad_norm": 6.717000484466553, "learning_rate": 4.990931467195245e-06, "loss": 0.46920671, "memory(GiB)": 34.88, "step": 77645, "train_speed(iter/s)": 0.412478 }, { "acc": 0.92051907, "epoch": 2.1024557983375303, "grad_norm": 15.031652450561523, "learning_rate": 4.990371904617659e-06, "loss": 0.42830772, "memory(GiB)": 34.88, "step": 77650, "train_speed(iter/s)": 0.41248 }, { "acc": 0.93024445, "epoch": 2.1025911786207456, "grad_norm": 12.235013961791992, "learning_rate": 4.989812342166947e-06, "loss": 0.36363912, "memory(GiB)": 34.88, "step": 77655, "train_speed(iter/s)": 0.412481 }, { "acc": 0.914116, "epoch": 2.1027265589039614, "grad_norm": 6.4838690757751465, "learning_rate": 4.989252779850121e-06, "loss": 0.46231251, "memory(GiB)": 34.88, "step": 77660, "train_speed(iter/s)": 0.412483 }, { "acc": 0.90434952, "epoch": 2.102861939187177, "grad_norm": 6.457553863525391, "learning_rate": 4.988693217674186e-06, "loss": 0.56170754, "memory(GiB)": 34.88, "step": 77665, "train_speed(iter/s)": 0.412484 }, { "acc": 0.93353586, "epoch": 2.102997319470392, "grad_norm": 4.727122783660889, "learning_rate": 4.988133655646155e-06, "loss": 0.32114739, "memory(GiB)": 34.88, "step": 77670, "train_speed(iter/s)": 0.412485 }, { "acc": 0.91069574, "epoch": 2.103132699753608, "grad_norm": 10.669988632202148, "learning_rate": 4.987574093773036e-06, "loss": 0.4967061, "memory(GiB)": 34.88, "step": 77675, "train_speed(iter/s)": 0.412486 }, { "acc": 0.91750584, "epoch": 2.1032680800368233, "grad_norm": 11.74686336517334, "learning_rate": 4.98701453206184e-06, "loss": 0.47020392, "memory(GiB)": 34.88, "step": 77680, "train_speed(iter/s)": 0.412487 }, { "acc": 0.92925854, "epoch": 2.103403460320039, "grad_norm": 10.10350227355957, "learning_rate": 4.9864549705195755e-06, "loss": 0.44072886, "memory(GiB)": 34.88, "step": 77685, "train_speed(iter/s)": 0.412488 }, { "acc": 0.92700043, "epoch": 2.1035388406032545, "grad_norm": 7.281593322753906, "learning_rate": 4.9858954091532526e-06, "loss": 0.41414986, "memory(GiB)": 34.88, "step": 77690, "train_speed(iter/s)": 0.412489 }, { "acc": 0.91259499, "epoch": 2.1036742208864703, "grad_norm": 8.246822357177734, "learning_rate": 4.985335847969883e-06, "loss": 0.48807774, "memory(GiB)": 34.88, "step": 77695, "train_speed(iter/s)": 0.412491 }, { "acc": 0.91876125, "epoch": 2.1038096011696856, "grad_norm": 9.00771427154541, "learning_rate": 4.984776286976473e-06, "loss": 0.39961712, "memory(GiB)": 34.88, "step": 77700, "train_speed(iter/s)": 0.412492 }, { "acc": 0.92029667, "epoch": 2.103944981452901, "grad_norm": 4.477232456207275, "learning_rate": 4.984216726180033e-06, "loss": 0.42959418, "memory(GiB)": 34.88, "step": 77705, "train_speed(iter/s)": 0.412493 }, { "acc": 0.92098751, "epoch": 2.104080361736117, "grad_norm": 4.338476181030273, "learning_rate": 4.983657165587572e-06, "loss": 0.40331402, "memory(GiB)": 34.88, "step": 77710, "train_speed(iter/s)": 0.412494 }, { "acc": 0.91492043, "epoch": 2.104215742019332, "grad_norm": 7.350397109985352, "learning_rate": 4.983097605206101e-06, "loss": 0.48090076, "memory(GiB)": 34.88, "step": 77715, "train_speed(iter/s)": 0.412495 }, { "acc": 0.91631985, "epoch": 2.104351122302548, "grad_norm": 20.602968215942383, "learning_rate": 4.98253804504263e-06, "loss": 0.45563188, "memory(GiB)": 34.88, "step": 77720, "train_speed(iter/s)": 0.412496 }, { "acc": 0.89636421, "epoch": 2.1044865025857633, "grad_norm": 18.56211280822754, "learning_rate": 4.981978485104168e-06, "loss": 0.56137733, "memory(GiB)": 34.88, "step": 77725, "train_speed(iter/s)": 0.412498 }, { "acc": 0.94097757, "epoch": 2.104621882868979, "grad_norm": 8.892716407775879, "learning_rate": 4.981418925397724e-06, "loss": 0.283834, "memory(GiB)": 34.88, "step": 77730, "train_speed(iter/s)": 0.412499 }, { "acc": 0.91336546, "epoch": 2.1047572631521945, "grad_norm": 5.620635509490967, "learning_rate": 4.980859365930307e-06, "loss": 0.42841616, "memory(GiB)": 34.88, "step": 77735, "train_speed(iter/s)": 0.4125 }, { "acc": 0.9194581, "epoch": 2.1048926434354103, "grad_norm": 5.253425598144531, "learning_rate": 4.980299806708929e-06, "loss": 0.40066056, "memory(GiB)": 34.88, "step": 77740, "train_speed(iter/s)": 0.412502 }, { "acc": 0.94397411, "epoch": 2.1050280237186256, "grad_norm": 4.121646881103516, "learning_rate": 4.979740247740597e-06, "loss": 0.28108258, "memory(GiB)": 34.88, "step": 77745, "train_speed(iter/s)": 0.412503 }, { "acc": 0.92852821, "epoch": 2.105163404001841, "grad_norm": 8.187614440917969, "learning_rate": 4.979180689032324e-06, "loss": 0.43016114, "memory(GiB)": 34.88, "step": 77750, "train_speed(iter/s)": 0.412504 }, { "acc": 0.91667471, "epoch": 2.105298784285057, "grad_norm": 6.793515205383301, "learning_rate": 4.978621130591115e-06, "loss": 0.39101021, "memory(GiB)": 34.88, "step": 77755, "train_speed(iter/s)": 0.412505 }, { "acc": 0.93398037, "epoch": 2.105434164568272, "grad_norm": 8.186622619628906, "learning_rate": 4.978061572423981e-06, "loss": 0.32386293, "memory(GiB)": 34.88, "step": 77760, "train_speed(iter/s)": 0.412507 }, { "acc": 0.90748405, "epoch": 2.105569544851488, "grad_norm": 4.60110330581665, "learning_rate": 4.977502014537935e-06, "loss": 0.50983143, "memory(GiB)": 34.88, "step": 77765, "train_speed(iter/s)": 0.412508 }, { "acc": 0.90488091, "epoch": 2.1057049251347033, "grad_norm": 13.038261413574219, "learning_rate": 4.976942456939981e-06, "loss": 0.59454579, "memory(GiB)": 34.88, "step": 77770, "train_speed(iter/s)": 0.412509 }, { "acc": 0.94229355, "epoch": 2.105840305417919, "grad_norm": 6.160707950592041, "learning_rate": 4.976382899637134e-06, "loss": 0.32626469, "memory(GiB)": 34.88, "step": 77775, "train_speed(iter/s)": 0.41251 }, { "acc": 0.92227955, "epoch": 2.1059756857011345, "grad_norm": 8.493446350097656, "learning_rate": 4.9758233426364e-06, "loss": 0.39322538, "memory(GiB)": 34.88, "step": 77780, "train_speed(iter/s)": 0.412512 }, { "acc": 0.91508017, "epoch": 2.10611106598435, "grad_norm": 28.585063934326172, "learning_rate": 4.975263785944791e-06, "loss": 0.4867178, "memory(GiB)": 34.88, "step": 77785, "train_speed(iter/s)": 0.412513 }, { "acc": 0.93097973, "epoch": 2.1062464462675656, "grad_norm": 6.397141456604004, "learning_rate": 4.974704229569314e-06, "loss": 0.34695022, "memory(GiB)": 34.88, "step": 77790, "train_speed(iter/s)": 0.412514 }, { "acc": 0.93422203, "epoch": 2.106381826550781, "grad_norm": 3.732564687728882, "learning_rate": 4.974144673516978e-06, "loss": 0.34103119, "memory(GiB)": 34.88, "step": 77795, "train_speed(iter/s)": 0.412516 }, { "acc": 0.93140421, "epoch": 2.106517206833997, "grad_norm": 4.805734634399414, "learning_rate": 4.9735851177947975e-06, "loss": 0.37184699, "memory(GiB)": 34.88, "step": 77800, "train_speed(iter/s)": 0.412517 }, { "acc": 0.92214613, "epoch": 2.106652587117212, "grad_norm": 7.389309883117676, "learning_rate": 4.973025562409777e-06, "loss": 0.39890888, "memory(GiB)": 34.88, "step": 77805, "train_speed(iter/s)": 0.412518 }, { "acc": 0.91790276, "epoch": 2.106787967400428, "grad_norm": 9.797974586486816, "learning_rate": 4.972466007368928e-06, "loss": 0.39574342, "memory(GiB)": 34.88, "step": 77810, "train_speed(iter/s)": 0.412519 }, { "acc": 0.91533298, "epoch": 2.1069233476836433, "grad_norm": 5.063022613525391, "learning_rate": 4.9719064526792585e-06, "loss": 0.40688481, "memory(GiB)": 34.88, "step": 77815, "train_speed(iter/s)": 0.41252 }, { "acc": 0.91942158, "epoch": 2.107058727966859, "grad_norm": 7.471014499664307, "learning_rate": 4.971346898347782e-06, "loss": 0.41573377, "memory(GiB)": 34.88, "step": 77820, "train_speed(iter/s)": 0.412522 }, { "acc": 0.92384663, "epoch": 2.1071941082500745, "grad_norm": 6.654553413391113, "learning_rate": 4.970787344381505e-06, "loss": 0.46524277, "memory(GiB)": 34.88, "step": 77825, "train_speed(iter/s)": 0.412523 }, { "acc": 0.91812906, "epoch": 2.10732948853329, "grad_norm": 7.810727596282959, "learning_rate": 4.970227790787435e-06, "loss": 0.46006937, "memory(GiB)": 34.88, "step": 77830, "train_speed(iter/s)": 0.412524 }, { "acc": 0.93004723, "epoch": 2.1074648688165056, "grad_norm": 14.409378051757812, "learning_rate": 4.9696682375725866e-06, "loss": 0.44415193, "memory(GiB)": 34.88, "step": 77835, "train_speed(iter/s)": 0.412525 }, { "acc": 0.9144104, "epoch": 2.107600249099721, "grad_norm": 19.144315719604492, "learning_rate": 4.969108684743965e-06, "loss": 0.54016919, "memory(GiB)": 34.88, "step": 77840, "train_speed(iter/s)": 0.412526 }, { "acc": 0.92601433, "epoch": 2.107735629382937, "grad_norm": 24.814693450927734, "learning_rate": 4.9685491323085824e-06, "loss": 0.41063032, "memory(GiB)": 34.88, "step": 77845, "train_speed(iter/s)": 0.412528 }, { "acc": 0.9203908, "epoch": 2.107871009666152, "grad_norm": 14.008477210998535, "learning_rate": 4.9679895802734455e-06, "loss": 0.47849531, "memory(GiB)": 34.88, "step": 77850, "train_speed(iter/s)": 0.412529 }, { "acc": 0.94740543, "epoch": 2.108006389949368, "grad_norm": 5.355861186981201, "learning_rate": 4.967430028645567e-06, "loss": 0.24519744, "memory(GiB)": 34.88, "step": 77855, "train_speed(iter/s)": 0.41253 }, { "acc": 0.91529808, "epoch": 2.1081417702325833, "grad_norm": 12.662368774414062, "learning_rate": 4.966870477431954e-06, "loss": 0.45716524, "memory(GiB)": 34.88, "step": 77860, "train_speed(iter/s)": 0.412531 }, { "acc": 0.93391266, "epoch": 2.1082771505157987, "grad_norm": 8.609060287475586, "learning_rate": 4.966310926639618e-06, "loss": 0.37301121, "memory(GiB)": 34.88, "step": 77865, "train_speed(iter/s)": 0.412533 }, { "acc": 0.9230341, "epoch": 2.1084125307990145, "grad_norm": 7.999373435974121, "learning_rate": 4.9657513762755675e-06, "loss": 0.43497567, "memory(GiB)": 34.88, "step": 77870, "train_speed(iter/s)": 0.412534 }, { "acc": 0.91381721, "epoch": 2.10854791108223, "grad_norm": 6.907642841339111, "learning_rate": 4.965191826346809e-06, "loss": 0.52031727, "memory(GiB)": 34.88, "step": 77875, "train_speed(iter/s)": 0.412535 }, { "acc": 0.91259308, "epoch": 2.1086832913654456, "grad_norm": 12.575760841369629, "learning_rate": 4.964632276860358e-06, "loss": 0.47958212, "memory(GiB)": 34.88, "step": 77880, "train_speed(iter/s)": 0.412537 }, { "acc": 0.93136806, "epoch": 2.108818671648661, "grad_norm": 8.086546897888184, "learning_rate": 4.964072727823219e-06, "loss": 0.40021162, "memory(GiB)": 34.88, "step": 77885, "train_speed(iter/s)": 0.412538 }, { "acc": 0.91213703, "epoch": 2.108954051931877, "grad_norm": 13.31785774230957, "learning_rate": 4.963513179242404e-06, "loss": 0.49028606, "memory(GiB)": 34.88, "step": 77890, "train_speed(iter/s)": 0.412539 }, { "acc": 0.90127792, "epoch": 2.109089432215092, "grad_norm": 15.472050666809082, "learning_rate": 4.962953631124921e-06, "loss": 0.62875943, "memory(GiB)": 34.88, "step": 77895, "train_speed(iter/s)": 0.412541 }, { "acc": 0.90159168, "epoch": 2.109224812498308, "grad_norm": 7.8052659034729, "learning_rate": 4.962394083477781e-06, "loss": 0.46981502, "memory(GiB)": 34.88, "step": 77900, "train_speed(iter/s)": 0.412542 }, { "acc": 0.91122971, "epoch": 2.1093601927815233, "grad_norm": 6.560529708862305, "learning_rate": 4.961834536307991e-06, "loss": 0.45176687, "memory(GiB)": 34.88, "step": 77905, "train_speed(iter/s)": 0.412543 }, { "acc": 0.90268631, "epoch": 2.1094955730647387, "grad_norm": 13.161168098449707, "learning_rate": 4.9612749896225625e-06, "loss": 0.5779984, "memory(GiB)": 34.88, "step": 77910, "train_speed(iter/s)": 0.412544 }, { "acc": 0.91604385, "epoch": 2.1096309533479545, "grad_norm": 7.198761940002441, "learning_rate": 4.9607154434285055e-06, "loss": 0.5111517, "memory(GiB)": 34.88, "step": 77915, "train_speed(iter/s)": 0.412546 }, { "acc": 0.92129345, "epoch": 2.10976633363117, "grad_norm": 5.428976058959961, "learning_rate": 4.960155897732827e-06, "loss": 0.48630033, "memory(GiB)": 34.88, "step": 77920, "train_speed(iter/s)": 0.412547 }, { "acc": 0.93057976, "epoch": 2.1099017139143856, "grad_norm": 4.69307279586792, "learning_rate": 4.9595963525425375e-06, "loss": 0.31806622, "memory(GiB)": 34.88, "step": 77925, "train_speed(iter/s)": 0.412549 }, { "acc": 0.93960295, "epoch": 2.110037094197601, "grad_norm": 17.753725051879883, "learning_rate": 4.959036807864646e-06, "loss": 0.25982714, "memory(GiB)": 34.88, "step": 77930, "train_speed(iter/s)": 0.41255 }, { "acc": 0.93199224, "epoch": 2.110172474480817, "grad_norm": 3.0876922607421875, "learning_rate": 4.958477263706165e-06, "loss": 0.38574328, "memory(GiB)": 34.88, "step": 77935, "train_speed(iter/s)": 0.412551 }, { "acc": 0.93026628, "epoch": 2.110307854764032, "grad_norm": 10.23539924621582, "learning_rate": 4.957917720074098e-06, "loss": 0.4439784, "memory(GiB)": 34.88, "step": 77940, "train_speed(iter/s)": 0.412553 }, { "acc": 0.93189936, "epoch": 2.1104432350472475, "grad_norm": 6.5590620040893555, "learning_rate": 4.957358176975459e-06, "loss": 0.32816648, "memory(GiB)": 34.88, "step": 77945, "train_speed(iter/s)": 0.412554 }, { "acc": 0.91925011, "epoch": 2.1105786153304633, "grad_norm": 9.680481910705566, "learning_rate": 4.956798634417258e-06, "loss": 0.44054527, "memory(GiB)": 34.88, "step": 77950, "train_speed(iter/s)": 0.412556 }, { "acc": 0.90868988, "epoch": 2.1107139956136787, "grad_norm": 13.682635307312012, "learning_rate": 4.9562390924065004e-06, "loss": 0.57555442, "memory(GiB)": 34.88, "step": 77955, "train_speed(iter/s)": 0.412557 }, { "acc": 0.91791916, "epoch": 2.1108493758968945, "grad_norm": 4.806941509246826, "learning_rate": 4.955679550950199e-06, "loss": 0.41323843, "memory(GiB)": 34.88, "step": 77960, "train_speed(iter/s)": 0.412558 }, { "acc": 0.92293739, "epoch": 2.11098475618011, "grad_norm": 5.625175476074219, "learning_rate": 4.95512001005536e-06, "loss": 0.38589821, "memory(GiB)": 34.88, "step": 77965, "train_speed(iter/s)": 0.41256 }, { "acc": 0.92606554, "epoch": 2.1111201364633256, "grad_norm": 2.8266494274139404, "learning_rate": 4.954560469728997e-06, "loss": 0.35300879, "memory(GiB)": 34.88, "step": 77970, "train_speed(iter/s)": 0.412561 }, { "acc": 0.92444515, "epoch": 2.111255516746541, "grad_norm": 8.388933181762695, "learning_rate": 4.9540009299781145e-06, "loss": 0.41040602, "memory(GiB)": 34.88, "step": 77975, "train_speed(iter/s)": 0.412562 }, { "acc": 0.91145391, "epoch": 2.111390897029757, "grad_norm": 11.724837303161621, "learning_rate": 4.953441390809725e-06, "loss": 0.51717567, "memory(GiB)": 34.88, "step": 77980, "train_speed(iter/s)": 0.412564 }, { "acc": 0.94189987, "epoch": 2.111526277312972, "grad_norm": 13.39356803894043, "learning_rate": 4.952881852230837e-06, "loss": 0.26142206, "memory(GiB)": 34.88, "step": 77985, "train_speed(iter/s)": 0.412565 }, { "acc": 0.92844419, "epoch": 2.1116616575961875, "grad_norm": 6.335355758666992, "learning_rate": 4.95232231424846e-06, "loss": 0.35856872, "memory(GiB)": 34.88, "step": 77990, "train_speed(iter/s)": 0.412566 }, { "acc": 0.91210127, "epoch": 2.1117970378794033, "grad_norm": 7.372396945953369, "learning_rate": 4.951762776869605e-06, "loss": 0.52954884, "memory(GiB)": 34.88, "step": 77995, "train_speed(iter/s)": 0.412567 }, { "acc": 0.91493073, "epoch": 2.1119324181626187, "grad_norm": 7.206256866455078, "learning_rate": 4.951203240101277e-06, "loss": 0.49515309, "memory(GiB)": 34.88, "step": 78000, "train_speed(iter/s)": 0.412569 }, { "acc": 0.91346703, "epoch": 2.1120677984458345, "grad_norm": 14.133042335510254, "learning_rate": 4.950643703950489e-06, "loss": 0.48592782, "memory(GiB)": 34.88, "step": 78005, "train_speed(iter/s)": 0.41257 }, { "acc": 0.91968594, "epoch": 2.11220317872905, "grad_norm": 10.54345417022705, "learning_rate": 4.950084168424249e-06, "loss": 0.41352592, "memory(GiB)": 34.88, "step": 78010, "train_speed(iter/s)": 0.412572 }, { "acc": 0.90792999, "epoch": 2.1123385590122656, "grad_norm": 10.606361389160156, "learning_rate": 4.949524633529565e-06, "loss": 0.55267167, "memory(GiB)": 34.88, "step": 78015, "train_speed(iter/s)": 0.412573 }, { "acc": 0.91901455, "epoch": 2.112473939295481, "grad_norm": 5.739599227905273, "learning_rate": 4.948965099273449e-06, "loss": 0.42163305, "memory(GiB)": 34.88, "step": 78020, "train_speed(iter/s)": 0.412574 }, { "acc": 0.92691612, "epoch": 2.1126093195786964, "grad_norm": 5.929614067077637, "learning_rate": 4.948405565662908e-06, "loss": 0.41002607, "memory(GiB)": 34.88, "step": 78025, "train_speed(iter/s)": 0.412576 }, { "acc": 0.90164375, "epoch": 2.112744699861912, "grad_norm": 15.712173461914062, "learning_rate": 4.947846032704953e-06, "loss": 0.57073007, "memory(GiB)": 34.88, "step": 78030, "train_speed(iter/s)": 0.412577 }, { "acc": 0.92783966, "epoch": 2.1128800801451275, "grad_norm": 8.847760200500488, "learning_rate": 4.9472865004065925e-06, "loss": 0.36881869, "memory(GiB)": 34.88, "step": 78035, "train_speed(iter/s)": 0.412579 }, { "acc": 0.92124166, "epoch": 2.1130154604283433, "grad_norm": 40.56121826171875, "learning_rate": 4.946726968774836e-06, "loss": 0.43943725, "memory(GiB)": 34.88, "step": 78040, "train_speed(iter/s)": 0.41258 }, { "acc": 0.92501259, "epoch": 2.1131508407115587, "grad_norm": 7.406393051147461, "learning_rate": 4.946167437816691e-06, "loss": 0.42419462, "memory(GiB)": 34.88, "step": 78045, "train_speed(iter/s)": 0.412581 }, { "acc": 0.91896086, "epoch": 2.1132862209947745, "grad_norm": 11.154632568359375, "learning_rate": 4.945607907539169e-06, "loss": 0.45731506, "memory(GiB)": 34.88, "step": 78050, "train_speed(iter/s)": 0.412583 }, { "acc": 0.92008419, "epoch": 2.11342160127799, "grad_norm": 13.775213241577148, "learning_rate": 4.945048377949278e-06, "loss": 0.45294781, "memory(GiB)": 34.88, "step": 78055, "train_speed(iter/s)": 0.412584 }, { "acc": 0.91765518, "epoch": 2.1135569815612056, "grad_norm": 11.954221725463867, "learning_rate": 4.944488849054027e-06, "loss": 0.51382723, "memory(GiB)": 34.88, "step": 78060, "train_speed(iter/s)": 0.412585 }, { "acc": 0.91333332, "epoch": 2.113692361844421, "grad_norm": 8.549400329589844, "learning_rate": 4.943929320860427e-06, "loss": 0.46828585, "memory(GiB)": 34.88, "step": 78065, "train_speed(iter/s)": 0.412587 }, { "acc": 0.91184273, "epoch": 2.1138277421276364, "grad_norm": 13.403179168701172, "learning_rate": 4.943369793375485e-06, "loss": 0.55056181, "memory(GiB)": 34.88, "step": 78070, "train_speed(iter/s)": 0.412588 }, { "acc": 0.91905985, "epoch": 2.113963122410852, "grad_norm": 6.211099147796631, "learning_rate": 4.942810266606211e-06, "loss": 0.47339773, "memory(GiB)": 34.88, "step": 78075, "train_speed(iter/s)": 0.412589 }, { "acc": 0.92712193, "epoch": 2.1140985026940675, "grad_norm": 7.283705234527588, "learning_rate": 4.942250740559616e-06, "loss": 0.3826164, "memory(GiB)": 34.88, "step": 78080, "train_speed(iter/s)": 0.41259 }, { "acc": 0.91854811, "epoch": 2.1142338829772833, "grad_norm": 10.28061294555664, "learning_rate": 4.941691215242706e-06, "loss": 0.4835001, "memory(GiB)": 34.88, "step": 78085, "train_speed(iter/s)": 0.412592 }, { "acc": 0.92530041, "epoch": 2.1143692632604987, "grad_norm": 5.944972515106201, "learning_rate": 4.941131690662491e-06, "loss": 0.38663211, "memory(GiB)": 34.88, "step": 78090, "train_speed(iter/s)": 0.412593 }, { "acc": 0.93646479, "epoch": 2.1145046435437145, "grad_norm": 9.657596588134766, "learning_rate": 4.940572166825981e-06, "loss": 0.29214199, "memory(GiB)": 34.88, "step": 78095, "train_speed(iter/s)": 0.412594 }, { "acc": 0.90821333, "epoch": 2.11464002382693, "grad_norm": 7.03255033493042, "learning_rate": 4.940012643740185e-06, "loss": 0.52026577, "memory(GiB)": 34.88, "step": 78100, "train_speed(iter/s)": 0.412595 }, { "acc": 0.92344246, "epoch": 2.114775404110145, "grad_norm": 7.490869998931885, "learning_rate": 4.939453121412112e-06, "loss": 0.4587285, "memory(GiB)": 34.88, "step": 78105, "train_speed(iter/s)": 0.412597 }, { "acc": 0.90873423, "epoch": 2.114910784393361, "grad_norm": 9.147411346435547, "learning_rate": 4.9388935998487715e-06, "loss": 0.57126827, "memory(GiB)": 34.88, "step": 78110, "train_speed(iter/s)": 0.412598 }, { "acc": 0.90959663, "epoch": 2.1150461646765764, "grad_norm": 7.708556175231934, "learning_rate": 4.938334079057172e-06, "loss": 0.49889359, "memory(GiB)": 34.88, "step": 78115, "train_speed(iter/s)": 0.412599 }, { "acc": 0.92033253, "epoch": 2.115181544959792, "grad_norm": 10.187789916992188, "learning_rate": 4.9377745590443245e-06, "loss": 0.45193276, "memory(GiB)": 34.88, "step": 78120, "train_speed(iter/s)": 0.412601 }, { "acc": 0.90226717, "epoch": 2.1153169252430075, "grad_norm": 7.88932991027832, "learning_rate": 4.9372150398172334e-06, "loss": 0.58155928, "memory(GiB)": 34.88, "step": 78125, "train_speed(iter/s)": 0.412602 }, { "acc": 0.9214345, "epoch": 2.1154523055262233, "grad_norm": 5.917634010314941, "learning_rate": 4.936655521382912e-06, "loss": 0.40720663, "memory(GiB)": 34.88, "step": 78130, "train_speed(iter/s)": 0.412603 }, { "acc": 0.92622967, "epoch": 2.1155876858094387, "grad_norm": 5.439866065979004, "learning_rate": 4.936096003748369e-06, "loss": 0.45739007, "memory(GiB)": 34.88, "step": 78135, "train_speed(iter/s)": 0.412605 }, { "acc": 0.92854786, "epoch": 2.1157230660926545, "grad_norm": 6.357886791229248, "learning_rate": 4.935536486920611e-06, "loss": 0.37668195, "memory(GiB)": 34.88, "step": 78140, "train_speed(iter/s)": 0.412606 }, { "acc": 0.90159941, "epoch": 2.11585844637587, "grad_norm": 15.333094596862793, "learning_rate": 4.93497697090665e-06, "loss": 0.52364125, "memory(GiB)": 34.88, "step": 78145, "train_speed(iter/s)": 0.412607 }, { "acc": 0.93168812, "epoch": 2.115993826659085, "grad_norm": 7.532017230987549, "learning_rate": 4.934417455713492e-06, "loss": 0.37396429, "memory(GiB)": 34.88, "step": 78150, "train_speed(iter/s)": 0.412609 }, { "acc": 0.92143345, "epoch": 2.116129206942301, "grad_norm": 6.286296844482422, "learning_rate": 4.93385794134815e-06, "loss": 0.35912783, "memory(GiB)": 34.88, "step": 78155, "train_speed(iter/s)": 0.41261 }, { "acc": 0.92904005, "epoch": 2.1162645872255164, "grad_norm": 6.307848930358887, "learning_rate": 4.933298427817631e-06, "loss": 0.39169459, "memory(GiB)": 34.88, "step": 78160, "train_speed(iter/s)": 0.412611 }, { "acc": 0.91145344, "epoch": 2.116399967508732, "grad_norm": 15.092470169067383, "learning_rate": 4.932738915128941e-06, "loss": 0.51223645, "memory(GiB)": 34.88, "step": 78165, "train_speed(iter/s)": 0.412612 }, { "acc": 0.91627092, "epoch": 2.1165353477919475, "grad_norm": 10.120536804199219, "learning_rate": 4.932179403289094e-06, "loss": 0.45577106, "memory(GiB)": 34.88, "step": 78170, "train_speed(iter/s)": 0.412614 }, { "acc": 0.924685, "epoch": 2.1166707280751633, "grad_norm": 8.12655258178711, "learning_rate": 4.9316198923050954e-06, "loss": 0.41509209, "memory(GiB)": 34.88, "step": 78175, "train_speed(iter/s)": 0.412615 }, { "acc": 0.92094564, "epoch": 2.1168061083583787, "grad_norm": 7.817474842071533, "learning_rate": 4.931060382183957e-06, "loss": 0.38713136, "memory(GiB)": 34.88, "step": 78180, "train_speed(iter/s)": 0.412616 }, { "acc": 0.9261364, "epoch": 2.116941488641594, "grad_norm": 7.6881866455078125, "learning_rate": 4.930500872932684e-06, "loss": 0.44249339, "memory(GiB)": 34.88, "step": 78185, "train_speed(iter/s)": 0.412618 }, { "acc": 0.91137505, "epoch": 2.11707686892481, "grad_norm": 8.235109329223633, "learning_rate": 4.92994136455829e-06, "loss": 0.42674198, "memory(GiB)": 34.88, "step": 78190, "train_speed(iter/s)": 0.412619 }, { "acc": 0.92723637, "epoch": 2.117212249208025, "grad_norm": 10.630974769592285, "learning_rate": 4.92938185706778e-06, "loss": 0.41490393, "memory(GiB)": 34.88, "step": 78195, "train_speed(iter/s)": 0.412621 }, { "acc": 0.92867718, "epoch": 2.117347629491241, "grad_norm": 6.072803974151611, "learning_rate": 4.928822350468165e-06, "loss": 0.37382121, "memory(GiB)": 34.88, "step": 78200, "train_speed(iter/s)": 0.412622 }, { "acc": 0.91699505, "epoch": 2.1174830097744564, "grad_norm": 7.686919689178467, "learning_rate": 4.928262844766454e-06, "loss": 0.41977186, "memory(GiB)": 34.88, "step": 78205, "train_speed(iter/s)": 0.412623 }, { "acc": 0.90742588, "epoch": 2.117618390057672, "grad_norm": 16.949899673461914, "learning_rate": 4.927703339969654e-06, "loss": 0.50156293, "memory(GiB)": 34.88, "step": 78210, "train_speed(iter/s)": 0.412625 }, { "acc": 0.92398968, "epoch": 2.1177537703408875, "grad_norm": 14.693406105041504, "learning_rate": 4.927143836084776e-06, "loss": 0.4485404, "memory(GiB)": 34.88, "step": 78215, "train_speed(iter/s)": 0.412625 }, { "acc": 0.93396606, "epoch": 2.1178891506241033, "grad_norm": 5.321606159210205, "learning_rate": 4.926584333118827e-06, "loss": 0.30288723, "memory(GiB)": 34.88, "step": 78220, "train_speed(iter/s)": 0.412627 }, { "acc": 0.9313261, "epoch": 2.1180245309073187, "grad_norm": 5.335952281951904, "learning_rate": 4.92602483107882e-06, "loss": 0.37262866, "memory(GiB)": 34.88, "step": 78225, "train_speed(iter/s)": 0.412628 }, { "acc": 0.92400246, "epoch": 2.118159911190534, "grad_norm": 12.076033592224121, "learning_rate": 4.925465329971757e-06, "loss": 0.49944057, "memory(GiB)": 34.88, "step": 78230, "train_speed(iter/s)": 0.41263 }, { "acc": 0.92078114, "epoch": 2.11829529147375, "grad_norm": 11.274256706237793, "learning_rate": 4.924905829804654e-06, "loss": 0.47327871, "memory(GiB)": 34.88, "step": 78235, "train_speed(iter/s)": 0.412631 }, { "acc": 0.92229652, "epoch": 2.118430671756965, "grad_norm": 7.165733337402344, "learning_rate": 4.9243463305845134e-06, "loss": 0.38732719, "memory(GiB)": 34.88, "step": 78240, "train_speed(iter/s)": 0.412632 }, { "acc": 0.93898687, "epoch": 2.118566052040181, "grad_norm": 5.9772419929504395, "learning_rate": 4.9237868323183496e-06, "loss": 0.32800674, "memory(GiB)": 34.88, "step": 78245, "train_speed(iter/s)": 0.412634 }, { "acc": 0.90333891, "epoch": 2.1187014323233964, "grad_norm": 7.554220199584961, "learning_rate": 4.923227335013168e-06, "loss": 0.5899138, "memory(GiB)": 34.88, "step": 78250, "train_speed(iter/s)": 0.412635 }, { "acc": 0.91021938, "epoch": 2.118836812606612, "grad_norm": 6.623067855834961, "learning_rate": 4.922667838675979e-06, "loss": 0.53522205, "memory(GiB)": 34.88, "step": 78255, "train_speed(iter/s)": 0.412636 }, { "acc": 0.89806795, "epoch": 2.1189721928898275, "grad_norm": 15.189743041992188, "learning_rate": 4.92210834331379e-06, "loss": 0.57947083, "memory(GiB)": 34.88, "step": 78260, "train_speed(iter/s)": 0.412637 }, { "acc": 0.92792902, "epoch": 2.119107573173043, "grad_norm": 2.427609443664551, "learning_rate": 4.92154884893361e-06, "loss": 0.38331964, "memory(GiB)": 34.88, "step": 78265, "train_speed(iter/s)": 0.412639 }, { "acc": 0.93580437, "epoch": 2.1192429534562587, "grad_norm": 5.102367401123047, "learning_rate": 4.92098935554245e-06, "loss": 0.29360313, "memory(GiB)": 34.88, "step": 78270, "train_speed(iter/s)": 0.41264 }, { "acc": 0.93827896, "epoch": 2.119378333739474, "grad_norm": 5.661515235900879, "learning_rate": 4.920429863147315e-06, "loss": 0.32911479, "memory(GiB)": 34.88, "step": 78275, "train_speed(iter/s)": 0.412641 }, { "acc": 0.92786379, "epoch": 2.11951371402269, "grad_norm": 12.13707160949707, "learning_rate": 4.919870371755217e-06, "loss": 0.41069431, "memory(GiB)": 34.88, "step": 78280, "train_speed(iter/s)": 0.412642 }, { "acc": 0.93855686, "epoch": 2.119649094305905, "grad_norm": 7.461833953857422, "learning_rate": 4.919310881373164e-06, "loss": 0.32731304, "memory(GiB)": 34.88, "step": 78285, "train_speed(iter/s)": 0.412644 }, { "acc": 0.91828346, "epoch": 2.119784474589121, "grad_norm": 20.227031707763672, "learning_rate": 4.918751392008164e-06, "loss": 0.51945248, "memory(GiB)": 34.88, "step": 78290, "train_speed(iter/s)": 0.412645 }, { "acc": 0.91239243, "epoch": 2.1199198548723364, "grad_norm": 9.747269630432129, "learning_rate": 4.918191903667226e-06, "loss": 0.50288696, "memory(GiB)": 34.88, "step": 78295, "train_speed(iter/s)": 0.412646 }, { "acc": 0.91719055, "epoch": 2.1200552351555517, "grad_norm": 8.404370307922363, "learning_rate": 4.917632416357358e-06, "loss": 0.46773205, "memory(GiB)": 34.88, "step": 78300, "train_speed(iter/s)": 0.412648 }, { "acc": 0.92565718, "epoch": 2.1201906154387675, "grad_norm": 8.128844261169434, "learning_rate": 4.91707293008557e-06, "loss": 0.37093935, "memory(GiB)": 34.88, "step": 78305, "train_speed(iter/s)": 0.412649 }, { "acc": 0.90900784, "epoch": 2.120325995721983, "grad_norm": 7.292636394500732, "learning_rate": 4.916513444858869e-06, "loss": 0.533322, "memory(GiB)": 34.88, "step": 78310, "train_speed(iter/s)": 0.41265 }, { "acc": 0.92026119, "epoch": 2.1204613760051987, "grad_norm": 7.089195251464844, "learning_rate": 4.915953960684266e-06, "loss": 0.37549365, "memory(GiB)": 34.88, "step": 78315, "train_speed(iter/s)": 0.412651 }, { "acc": 0.91856098, "epoch": 2.120596756288414, "grad_norm": 7.3537678718566895, "learning_rate": 4.915394477568766e-06, "loss": 0.48287477, "memory(GiB)": 34.88, "step": 78320, "train_speed(iter/s)": 0.412653 }, { "acc": 0.92479477, "epoch": 2.12073213657163, "grad_norm": 14.303153991699219, "learning_rate": 4.914834995519381e-06, "loss": 0.46582012, "memory(GiB)": 34.88, "step": 78325, "train_speed(iter/s)": 0.412654 }, { "acc": 0.91093311, "epoch": 2.120867516854845, "grad_norm": 10.347631454467773, "learning_rate": 4.914275514543121e-06, "loss": 0.53989811, "memory(GiB)": 34.88, "step": 78330, "train_speed(iter/s)": 0.412656 }, { "acc": 0.91711197, "epoch": 2.121002897138061, "grad_norm": 7.304515838623047, "learning_rate": 4.91371603464699e-06, "loss": 0.46374545, "memory(GiB)": 34.88, "step": 78335, "train_speed(iter/s)": 0.412657 }, { "acc": 0.92078838, "epoch": 2.1211382774212764, "grad_norm": 7.730088710784912, "learning_rate": 4.913156555837998e-06, "loss": 0.40617819, "memory(GiB)": 34.88, "step": 78340, "train_speed(iter/s)": 0.412659 }, { "acc": 0.9288312, "epoch": 2.1212736577044917, "grad_norm": 6.509599685668945, "learning_rate": 4.912597078123155e-06, "loss": 0.3441381, "memory(GiB)": 34.88, "step": 78345, "train_speed(iter/s)": 0.41266 }, { "acc": 0.92798061, "epoch": 2.1214090379877075, "grad_norm": 50.84972381591797, "learning_rate": 4.912037601509469e-06, "loss": 0.40013719, "memory(GiB)": 34.88, "step": 78350, "train_speed(iter/s)": 0.412661 }, { "acc": 0.9310154, "epoch": 2.121544418270923, "grad_norm": 5.376920700073242, "learning_rate": 4.911478126003948e-06, "loss": 0.37708571, "memory(GiB)": 34.88, "step": 78355, "train_speed(iter/s)": 0.412663 }, { "acc": 0.93303614, "epoch": 2.1216797985541387, "grad_norm": 7.521826267242432, "learning_rate": 4.910918651613601e-06, "loss": 0.38048253, "memory(GiB)": 34.88, "step": 78360, "train_speed(iter/s)": 0.412664 }, { "acc": 0.93441353, "epoch": 2.121815178837354, "grad_norm": 8.407726287841797, "learning_rate": 4.9103591783454365e-06, "loss": 0.34897933, "memory(GiB)": 34.88, "step": 78365, "train_speed(iter/s)": 0.412665 }, { "acc": 0.94878435, "epoch": 2.12195055912057, "grad_norm": 3.8078792095184326, "learning_rate": 4.909799706206463e-06, "loss": 0.30055647, "memory(GiB)": 34.88, "step": 78370, "train_speed(iter/s)": 0.412667 }, { "acc": 0.92203131, "epoch": 2.122085939403785, "grad_norm": 6.721165657043457, "learning_rate": 4.90924023520369e-06, "loss": 0.39129524, "memory(GiB)": 34.88, "step": 78375, "train_speed(iter/s)": 0.412668 }, { "acc": 0.90810108, "epoch": 2.122221319687001, "grad_norm": 14.7810640335083, "learning_rate": 4.908680765344123e-06, "loss": 0.54165683, "memory(GiB)": 34.88, "step": 78380, "train_speed(iter/s)": 0.412669 }, { "acc": 0.90638351, "epoch": 2.1223566999702164, "grad_norm": 7.917021751403809, "learning_rate": 4.908121296634774e-06, "loss": 0.4795692, "memory(GiB)": 34.88, "step": 78385, "train_speed(iter/s)": 0.41267 }, { "acc": 0.93722725, "epoch": 2.1224920802534317, "grad_norm": 7.639892578125, "learning_rate": 4.907561829082647e-06, "loss": 0.31113253, "memory(GiB)": 34.88, "step": 78390, "train_speed(iter/s)": 0.412672 }, { "acc": 0.92742662, "epoch": 2.1226274605366475, "grad_norm": 7.078020095825195, "learning_rate": 4.907002362694755e-06, "loss": 0.3860323, "memory(GiB)": 34.88, "step": 78395, "train_speed(iter/s)": 0.412673 }, { "acc": 0.91651125, "epoch": 2.122762840819863, "grad_norm": 7.105672836303711, "learning_rate": 4.9064428974781054e-06, "loss": 0.50770111, "memory(GiB)": 34.88, "step": 78400, "train_speed(iter/s)": 0.412674 }, { "acc": 0.93420305, "epoch": 2.1228982211030787, "grad_norm": 5.211424827575684, "learning_rate": 4.905883433439705e-06, "loss": 0.35240345, "memory(GiB)": 34.88, "step": 78405, "train_speed(iter/s)": 0.412675 }, { "acc": 0.93281651, "epoch": 2.123033601386294, "grad_norm": 6.230680465698242, "learning_rate": 4.905323970586563e-06, "loss": 0.34724731, "memory(GiB)": 34.88, "step": 78410, "train_speed(iter/s)": 0.412676 }, { "acc": 0.93546658, "epoch": 2.12316898166951, "grad_norm": 7.372137546539307, "learning_rate": 4.904764508925689e-06, "loss": 0.35721796, "memory(GiB)": 34.88, "step": 78415, "train_speed(iter/s)": 0.412678 }, { "acc": 0.91708918, "epoch": 2.123304361952725, "grad_norm": 24.559389114379883, "learning_rate": 4.9042050484640894e-06, "loss": 0.42864618, "memory(GiB)": 34.88, "step": 78420, "train_speed(iter/s)": 0.412679 }, { "acc": 0.92389078, "epoch": 2.1234397422359406, "grad_norm": 7.387168884277344, "learning_rate": 4.903645589208772e-06, "loss": 0.42838149, "memory(GiB)": 34.88, "step": 78425, "train_speed(iter/s)": 0.41268 }, { "acc": 0.91732683, "epoch": 2.1235751225191564, "grad_norm": 8.634598731994629, "learning_rate": 4.903086131166747e-06, "loss": 0.49049129, "memory(GiB)": 34.88, "step": 78430, "train_speed(iter/s)": 0.412682 }, { "acc": 0.91362228, "epoch": 2.1237105028023717, "grad_norm": 8.132768630981445, "learning_rate": 4.902526674345024e-06, "loss": 0.51863194, "memory(GiB)": 34.88, "step": 78435, "train_speed(iter/s)": 0.412683 }, { "acc": 0.92415829, "epoch": 2.1238458830855875, "grad_norm": 5.505020618438721, "learning_rate": 4.901967218750606e-06, "loss": 0.36953435, "memory(GiB)": 34.88, "step": 78440, "train_speed(iter/s)": 0.412684 }, { "acc": 0.93463182, "epoch": 2.123981263368803, "grad_norm": 4.082830429077148, "learning_rate": 4.901407764390508e-06, "loss": 0.3425869, "memory(GiB)": 34.88, "step": 78445, "train_speed(iter/s)": 0.412686 }, { "acc": 0.93074055, "epoch": 2.1241166436520187, "grad_norm": 5.138868808746338, "learning_rate": 4.900848311271733e-06, "loss": 0.40440593, "memory(GiB)": 34.88, "step": 78450, "train_speed(iter/s)": 0.412687 }, { "acc": 0.92831745, "epoch": 2.124252023935234, "grad_norm": 6.202043056488037, "learning_rate": 4.900288859401293e-06, "loss": 0.4405899, "memory(GiB)": 34.88, "step": 78455, "train_speed(iter/s)": 0.412688 }, { "acc": 0.90215864, "epoch": 2.1243874042184494, "grad_norm": 9.017473220825195, "learning_rate": 4.899729408786193e-06, "loss": 0.48758259, "memory(GiB)": 34.88, "step": 78460, "train_speed(iter/s)": 0.41269 }, { "acc": 0.93544216, "epoch": 2.124522784501665, "grad_norm": 8.382620811462402, "learning_rate": 4.899169959433442e-06, "loss": 0.32530408, "memory(GiB)": 34.88, "step": 78465, "train_speed(iter/s)": 0.412691 }, { "acc": 0.91860142, "epoch": 2.1246581647848806, "grad_norm": 42.932640075683594, "learning_rate": 4.8986105113500505e-06, "loss": 0.47748499, "memory(GiB)": 34.88, "step": 78470, "train_speed(iter/s)": 0.412692 }, { "acc": 0.93358135, "epoch": 2.1247935450680964, "grad_norm": 13.311880111694336, "learning_rate": 4.898051064543024e-06, "loss": 0.34120922, "memory(GiB)": 34.88, "step": 78475, "train_speed(iter/s)": 0.412694 }, { "acc": 0.90366201, "epoch": 2.1249289253513117, "grad_norm": 7.9731550216674805, "learning_rate": 4.897491619019372e-06, "loss": 0.51778669, "memory(GiB)": 34.88, "step": 78480, "train_speed(iter/s)": 0.412695 }, { "acc": 0.91944714, "epoch": 2.1250643056345275, "grad_norm": 9.347299575805664, "learning_rate": 4.896932174786101e-06, "loss": 0.41485519, "memory(GiB)": 34.88, "step": 78485, "train_speed(iter/s)": 0.412696 }, { "acc": 0.91167259, "epoch": 2.125199685917743, "grad_norm": 15.364043235778809, "learning_rate": 4.896372731850223e-06, "loss": 0.4629838, "memory(GiB)": 34.88, "step": 78490, "train_speed(iter/s)": 0.412697 }, { "acc": 0.93465633, "epoch": 2.1253350662009587, "grad_norm": 6.131515026092529, "learning_rate": 4.895813290218743e-06, "loss": 0.35661507, "memory(GiB)": 34.88, "step": 78495, "train_speed(iter/s)": 0.412698 }, { "acc": 0.90708542, "epoch": 2.125470446484174, "grad_norm": 8.822517395019531, "learning_rate": 4.895253849898671e-06, "loss": 0.49651432, "memory(GiB)": 34.88, "step": 78500, "train_speed(iter/s)": 0.4127 }, { "acc": 0.90522518, "epoch": 2.1256058267673894, "grad_norm": 6.2517218589782715, "learning_rate": 4.894694410897011e-06, "loss": 0.41137772, "memory(GiB)": 34.88, "step": 78505, "train_speed(iter/s)": 0.412701 }, { "acc": 0.91408844, "epoch": 2.125741207050605, "grad_norm": 10.804098129272461, "learning_rate": 4.894134973220774e-06, "loss": 0.53073692, "memory(GiB)": 34.88, "step": 78510, "train_speed(iter/s)": 0.412702 }, { "acc": 0.92636261, "epoch": 2.1258765873338206, "grad_norm": 17.779502868652344, "learning_rate": 4.893575536876971e-06, "loss": 0.43066096, "memory(GiB)": 34.88, "step": 78515, "train_speed(iter/s)": 0.412704 }, { "acc": 0.91488028, "epoch": 2.1260119676170364, "grad_norm": 13.42508602142334, "learning_rate": 4.893016101872604e-06, "loss": 0.53191843, "memory(GiB)": 34.88, "step": 78520, "train_speed(iter/s)": 0.412705 }, { "acc": 0.91048288, "epoch": 2.1261473479002517, "grad_norm": 4.292149543762207, "learning_rate": 4.892456668214685e-06, "loss": 0.44818516, "memory(GiB)": 34.88, "step": 78525, "train_speed(iter/s)": 0.412706 }, { "acc": 0.91448631, "epoch": 2.1262827281834675, "grad_norm": 11.978217124938965, "learning_rate": 4.8918972359102215e-06, "loss": 0.51249161, "memory(GiB)": 34.88, "step": 78530, "train_speed(iter/s)": 0.412707 }, { "acc": 0.91671524, "epoch": 2.126418108466683, "grad_norm": 10.749939918518066, "learning_rate": 4.891337804966221e-06, "loss": 0.51296072, "memory(GiB)": 34.88, "step": 78535, "train_speed(iter/s)": 0.412709 }, { "acc": 0.92439938, "epoch": 2.1265534887498987, "grad_norm": 11.217583656311035, "learning_rate": 4.890778375389692e-06, "loss": 0.42843456, "memory(GiB)": 34.88, "step": 78540, "train_speed(iter/s)": 0.41271 }, { "acc": 0.91095133, "epoch": 2.126688869033114, "grad_norm": 8.657696723937988, "learning_rate": 4.89021894718764e-06, "loss": 0.51562505, "memory(GiB)": 34.88, "step": 78545, "train_speed(iter/s)": 0.412711 }, { "acc": 0.91250753, "epoch": 2.1268242493163294, "grad_norm": 3.7347004413604736, "learning_rate": 4.889659520367076e-06, "loss": 0.52817688, "memory(GiB)": 34.88, "step": 78550, "train_speed(iter/s)": 0.412713 }, { "acc": 0.93412685, "epoch": 2.126959629599545, "grad_norm": 3.0658857822418213, "learning_rate": 4.889100094935007e-06, "loss": 0.34432225, "memory(GiB)": 34.88, "step": 78555, "train_speed(iter/s)": 0.412714 }, { "acc": 0.90586376, "epoch": 2.1270950098827606, "grad_norm": 4.4714860916137695, "learning_rate": 4.8885406708984404e-06, "loss": 0.457794, "memory(GiB)": 34.88, "step": 78560, "train_speed(iter/s)": 0.412715 }, { "acc": 0.91699333, "epoch": 2.1272303901659764, "grad_norm": 7.987968444824219, "learning_rate": 4.887981248264384e-06, "loss": 0.54234138, "memory(GiB)": 34.88, "step": 78565, "train_speed(iter/s)": 0.412717 }, { "acc": 0.92223186, "epoch": 2.1273657704491917, "grad_norm": 9.021925926208496, "learning_rate": 4.8874218270398475e-06, "loss": 0.47141457, "memory(GiB)": 34.88, "step": 78570, "train_speed(iter/s)": 0.412718 }, { "acc": 0.92746458, "epoch": 2.1275011507324075, "grad_norm": 8.346098899841309, "learning_rate": 4.886862407231836e-06, "loss": 0.361409, "memory(GiB)": 34.88, "step": 78575, "train_speed(iter/s)": 0.412719 }, { "acc": 0.90831575, "epoch": 2.127636531015623, "grad_norm": 14.440879821777344, "learning_rate": 4.886302988847359e-06, "loss": 0.54819489, "memory(GiB)": 34.88, "step": 78580, "train_speed(iter/s)": 0.412721 }, { "acc": 0.92739439, "epoch": 2.1277719112988382, "grad_norm": 6.022726535797119, "learning_rate": 4.885743571893425e-06, "loss": 0.39937794, "memory(GiB)": 34.88, "step": 78585, "train_speed(iter/s)": 0.412722 }, { "acc": 0.92963829, "epoch": 2.127907291582054, "grad_norm": 23.826757431030273, "learning_rate": 4.885184156377039e-06, "loss": 0.35891752, "memory(GiB)": 34.88, "step": 78590, "train_speed(iter/s)": 0.412724 }, { "acc": 0.922756, "epoch": 2.1280426718652694, "grad_norm": 5.453671932220459, "learning_rate": 4.884624742305212e-06, "loss": 0.43940854, "memory(GiB)": 34.88, "step": 78595, "train_speed(iter/s)": 0.412725 }, { "acc": 0.90954666, "epoch": 2.128178052148485, "grad_norm": 9.4180908203125, "learning_rate": 4.884065329684949e-06, "loss": 0.53733387, "memory(GiB)": 34.88, "step": 78600, "train_speed(iter/s)": 0.412726 }, { "acc": 0.92276573, "epoch": 2.1283134324317006, "grad_norm": 9.121691703796387, "learning_rate": 4.88350591852326e-06, "loss": 0.47217054, "memory(GiB)": 34.88, "step": 78605, "train_speed(iter/s)": 0.412727 }, { "acc": 0.92179832, "epoch": 2.1284488127149164, "grad_norm": 4.353687286376953, "learning_rate": 4.882946508827151e-06, "loss": 0.37541661, "memory(GiB)": 34.88, "step": 78610, "train_speed(iter/s)": 0.412729 }, { "acc": 0.92352114, "epoch": 2.1285841929981317, "grad_norm": 6.2100114822387695, "learning_rate": 4.882387100603631e-06, "loss": 0.42068281, "memory(GiB)": 34.88, "step": 78615, "train_speed(iter/s)": 0.41273 }, { "acc": 0.93298244, "epoch": 2.128719573281347, "grad_norm": 6.267658710479736, "learning_rate": 4.881827693859707e-06, "loss": 0.3517096, "memory(GiB)": 34.88, "step": 78620, "train_speed(iter/s)": 0.412731 }, { "acc": 0.91680088, "epoch": 2.128854953564563, "grad_norm": 20.495975494384766, "learning_rate": 4.881268288602387e-06, "loss": 0.51196833, "memory(GiB)": 34.88, "step": 78625, "train_speed(iter/s)": 0.412733 }, { "acc": 0.90616589, "epoch": 2.1289903338477782, "grad_norm": 20.35567283630371, "learning_rate": 4.880708884838679e-06, "loss": 0.57991767, "memory(GiB)": 34.88, "step": 78630, "train_speed(iter/s)": 0.412734 }, { "acc": 0.92551785, "epoch": 2.129125714130994, "grad_norm": 4.565165996551514, "learning_rate": 4.880149482575588e-06, "loss": 0.39397342, "memory(GiB)": 34.88, "step": 78635, "train_speed(iter/s)": 0.412735 }, { "acc": 0.90420094, "epoch": 2.1292610944142094, "grad_norm": 11.173778533935547, "learning_rate": 4.879590081820127e-06, "loss": 0.57676449, "memory(GiB)": 34.88, "step": 78640, "train_speed(iter/s)": 0.412737 }, { "acc": 0.92409801, "epoch": 2.129396474697425, "grad_norm": 8.723310470581055, "learning_rate": 4.879030682579297e-06, "loss": 0.31385193, "memory(GiB)": 34.88, "step": 78645, "train_speed(iter/s)": 0.412738 }, { "acc": 0.9228548, "epoch": 2.1295318549806406, "grad_norm": 6.765504837036133, "learning_rate": 4.8784712848601115e-06, "loss": 0.46694613, "memory(GiB)": 34.88, "step": 78650, "train_speed(iter/s)": 0.412739 }, { "acc": 0.91417751, "epoch": 2.1296672352638564, "grad_norm": 11.59580135345459, "learning_rate": 4.877911888669573e-06, "loss": 0.45896435, "memory(GiB)": 34.88, "step": 78655, "train_speed(iter/s)": 0.412741 }, { "acc": 0.92268047, "epoch": 2.1298026155470717, "grad_norm": 32.67228317260742, "learning_rate": 4.8773524940146926e-06, "loss": 0.46570101, "memory(GiB)": 34.88, "step": 78660, "train_speed(iter/s)": 0.412742 }, { "acc": 0.91747789, "epoch": 2.129937995830287, "grad_norm": 9.74207592010498, "learning_rate": 4.8767931009024785e-06, "loss": 0.44694924, "memory(GiB)": 34.88, "step": 78665, "train_speed(iter/s)": 0.412743 }, { "acc": 0.917908, "epoch": 2.130073376113503, "grad_norm": 15.414761543273926, "learning_rate": 4.876233709339934e-06, "loss": 0.49255781, "memory(GiB)": 34.88, "step": 78670, "train_speed(iter/s)": 0.412745 }, { "acc": 0.92525415, "epoch": 2.1302087563967183, "grad_norm": 8.404175758361816, "learning_rate": 4.875674319334071e-06, "loss": 0.46029277, "memory(GiB)": 34.88, "step": 78675, "train_speed(iter/s)": 0.412746 }, { "acc": 0.91722498, "epoch": 2.130344136679934, "grad_norm": 11.89442253112793, "learning_rate": 4.8751149308918925e-06, "loss": 0.50471087, "memory(GiB)": 34.88, "step": 78680, "train_speed(iter/s)": 0.412747 }, { "acc": 0.92536497, "epoch": 2.1304795169631494, "grad_norm": 4.484126567840576, "learning_rate": 4.8745555440204105e-06, "loss": 0.4220736, "memory(GiB)": 34.88, "step": 78685, "train_speed(iter/s)": 0.412749 }, { "acc": 0.90969601, "epoch": 2.130614897246365, "grad_norm": 13.067739486694336, "learning_rate": 4.873996158726628e-06, "loss": 0.47740116, "memory(GiB)": 34.88, "step": 78690, "train_speed(iter/s)": 0.41275 }, { "acc": 0.9420187, "epoch": 2.1307502775295806, "grad_norm": 8.146435737609863, "learning_rate": 4.873436775017556e-06, "loss": 0.29886713, "memory(GiB)": 34.88, "step": 78695, "train_speed(iter/s)": 0.412751 }, { "acc": 0.91537781, "epoch": 2.1308856578127964, "grad_norm": 10.630535125732422, "learning_rate": 4.872877392900201e-06, "loss": 0.40453386, "memory(GiB)": 34.88, "step": 78700, "train_speed(iter/s)": 0.412753 }, { "acc": 0.92320499, "epoch": 2.1310210380960117, "grad_norm": 8.903504371643066, "learning_rate": 4.87231801238157e-06, "loss": 0.49437647, "memory(GiB)": 34.88, "step": 78705, "train_speed(iter/s)": 0.412754 }, { "acc": 0.91154394, "epoch": 2.131156418379227, "grad_norm": 7.538166522979736, "learning_rate": 4.871758633468671e-06, "loss": 0.48803759, "memory(GiB)": 34.88, "step": 78710, "train_speed(iter/s)": 0.412755 }, { "acc": 0.9232996, "epoch": 2.131291798662443, "grad_norm": 9.635603904724121, "learning_rate": 4.871199256168509e-06, "loss": 0.44528074, "memory(GiB)": 34.88, "step": 78715, "train_speed(iter/s)": 0.412756 }, { "acc": 0.92196894, "epoch": 2.1314271789456583, "grad_norm": 10.56330680847168, "learning_rate": 4.870639880488094e-06, "loss": 0.41472673, "memory(GiB)": 34.88, "step": 78720, "train_speed(iter/s)": 0.412758 }, { "acc": 0.9027729, "epoch": 2.131562559228874, "grad_norm": 15.973945617675781, "learning_rate": 4.87008050643443e-06, "loss": 0.58515186, "memory(GiB)": 34.88, "step": 78725, "train_speed(iter/s)": 0.412759 }, { "acc": 0.91691341, "epoch": 2.1316979395120894, "grad_norm": 12.194746017456055, "learning_rate": 4.869521134014527e-06, "loss": 0.46526651, "memory(GiB)": 34.88, "step": 78730, "train_speed(iter/s)": 0.41276 }, { "acc": 0.92804661, "epoch": 2.131833319795305, "grad_norm": 13.735489845275879, "learning_rate": 4.868961763235393e-06, "loss": 0.38115788, "memory(GiB)": 34.88, "step": 78735, "train_speed(iter/s)": 0.412762 }, { "acc": 0.91489849, "epoch": 2.1319687000785206, "grad_norm": 6.129720211029053, "learning_rate": 4.868402394104033e-06, "loss": 0.50082493, "memory(GiB)": 34.88, "step": 78740, "train_speed(iter/s)": 0.412763 }, { "acc": 0.91037464, "epoch": 2.132104080361736, "grad_norm": 9.003281593322754, "learning_rate": 4.867843026627456e-06, "loss": 0.52752743, "memory(GiB)": 34.88, "step": 78745, "train_speed(iter/s)": 0.412764 }, { "acc": 0.93164864, "epoch": 2.1322394606449517, "grad_norm": 6.255021095275879, "learning_rate": 4.867283660812668e-06, "loss": 0.35000434, "memory(GiB)": 34.88, "step": 78750, "train_speed(iter/s)": 0.412765 }, { "acc": 0.9334713, "epoch": 2.132374840928167, "grad_norm": 6.89622688293457, "learning_rate": 4.866724296666677e-06, "loss": 0.37762692, "memory(GiB)": 34.88, "step": 78755, "train_speed(iter/s)": 0.412767 }, { "acc": 0.92649879, "epoch": 2.132510221211383, "grad_norm": 3.0860238075256348, "learning_rate": 4.866164934196488e-06, "loss": 0.31577652, "memory(GiB)": 34.88, "step": 78760, "train_speed(iter/s)": 0.412768 }, { "acc": 0.9247365, "epoch": 2.1326456014945983, "grad_norm": 20.831796646118164, "learning_rate": 4.86560557340911e-06, "loss": 0.44390197, "memory(GiB)": 34.88, "step": 78765, "train_speed(iter/s)": 0.412769 }, { "acc": 0.9249342, "epoch": 2.132780981777814, "grad_norm": 5.057246685028076, "learning_rate": 4.8650462143115516e-06, "loss": 0.34440131, "memory(GiB)": 34.88, "step": 78770, "train_speed(iter/s)": 0.412771 }, { "acc": 0.93612728, "epoch": 2.1329163620610294, "grad_norm": 9.2407865524292, "learning_rate": 4.864486856910816e-06, "loss": 0.29055858, "memory(GiB)": 34.88, "step": 78775, "train_speed(iter/s)": 0.412772 }, { "acc": 0.88988619, "epoch": 2.1330517423442448, "grad_norm": 11.003528594970703, "learning_rate": 4.863927501213913e-06, "loss": 0.63444233, "memory(GiB)": 34.88, "step": 78780, "train_speed(iter/s)": 0.412773 }, { "acc": 0.92876263, "epoch": 2.1331871226274606, "grad_norm": 5.029807090759277, "learning_rate": 4.863368147227848e-06, "loss": 0.3590363, "memory(GiB)": 34.88, "step": 78785, "train_speed(iter/s)": 0.412775 }, { "acc": 0.91130314, "epoch": 2.133322502910676, "grad_norm": 9.519123077392578, "learning_rate": 4.862808794959631e-06, "loss": 0.498141, "memory(GiB)": 34.88, "step": 78790, "train_speed(iter/s)": 0.412776 }, { "acc": 0.90343304, "epoch": 2.1334578831938917, "grad_norm": 10.743931770324707, "learning_rate": 4.862249444416265e-06, "loss": 0.51989737, "memory(GiB)": 34.88, "step": 78795, "train_speed(iter/s)": 0.412777 }, { "acc": 0.9272213, "epoch": 2.133593263477107, "grad_norm": 7.902379035949707, "learning_rate": 4.86169009560476e-06, "loss": 0.38879244, "memory(GiB)": 34.88, "step": 78800, "train_speed(iter/s)": 0.412779 }, { "acc": 0.91309175, "epoch": 2.133728643760323, "grad_norm": 7.7221221923828125, "learning_rate": 4.8611307485321214e-06, "loss": 0.49226642, "memory(GiB)": 34.88, "step": 78805, "train_speed(iter/s)": 0.41278 }, { "acc": 0.92037697, "epoch": 2.1338640240435383, "grad_norm": 6.302852630615234, "learning_rate": 4.860571403205356e-06, "loss": 0.36065359, "memory(GiB)": 34.88, "step": 78810, "train_speed(iter/s)": 0.412781 }, { "acc": 0.92065697, "epoch": 2.133999404326754, "grad_norm": 4.833805084228516, "learning_rate": 4.860012059631472e-06, "loss": 0.50535278, "memory(GiB)": 34.88, "step": 78815, "train_speed(iter/s)": 0.412783 }, { "acc": 0.92526321, "epoch": 2.1341347846099694, "grad_norm": 5.845205307006836, "learning_rate": 4.859452717817474e-06, "loss": 0.37221127, "memory(GiB)": 34.88, "step": 78820, "train_speed(iter/s)": 0.412784 }, { "acc": 0.9090848, "epoch": 2.1342701648931848, "grad_norm": 9.418832778930664, "learning_rate": 4.858893377770373e-06, "loss": 0.4616982, "memory(GiB)": 34.88, "step": 78825, "train_speed(iter/s)": 0.412785 }, { "acc": 0.9291707, "epoch": 2.1344055451764006, "grad_norm": 13.20413875579834, "learning_rate": 4.858334039497171e-06, "loss": 0.40948744, "memory(GiB)": 34.88, "step": 78830, "train_speed(iter/s)": 0.412787 }, { "acc": 0.91474667, "epoch": 2.134540925459616, "grad_norm": 13.246110916137695, "learning_rate": 4.8577747030048795e-06, "loss": 0.46654329, "memory(GiB)": 34.88, "step": 78835, "train_speed(iter/s)": 0.412788 }, { "acc": 0.92836866, "epoch": 2.1346763057428317, "grad_norm": 6.7706170082092285, "learning_rate": 4.857215368300501e-06, "loss": 0.38982556, "memory(GiB)": 34.88, "step": 78840, "train_speed(iter/s)": 0.412789 }, { "acc": 0.91575642, "epoch": 2.134811686026047, "grad_norm": 4.071252822875977, "learning_rate": 4.856656035391043e-06, "loss": 0.40284142, "memory(GiB)": 34.88, "step": 78845, "train_speed(iter/s)": 0.412791 }, { "acc": 0.93228264, "epoch": 2.134947066309263, "grad_norm": 8.762504577636719, "learning_rate": 4.856096704283515e-06, "loss": 0.40878797, "memory(GiB)": 34.88, "step": 78850, "train_speed(iter/s)": 0.412792 }, { "acc": 0.92376976, "epoch": 2.1350824465924783, "grad_norm": 6.857961654663086, "learning_rate": 4.855537374984921e-06, "loss": 0.38982615, "memory(GiB)": 34.88, "step": 78855, "train_speed(iter/s)": 0.412793 }, { "acc": 0.93961897, "epoch": 2.135217826875694, "grad_norm": 4.321908950805664, "learning_rate": 4.854978047502269e-06, "loss": 0.3416374, "memory(GiB)": 34.88, "step": 78860, "train_speed(iter/s)": 0.412795 }, { "acc": 0.92544174, "epoch": 2.1353532071589094, "grad_norm": 8.041069030761719, "learning_rate": 4.854418721842566e-06, "loss": 0.38623204, "memory(GiB)": 34.88, "step": 78865, "train_speed(iter/s)": 0.412796 }, { "acc": 0.92154999, "epoch": 2.1354885874421248, "grad_norm": 6.512853622436523, "learning_rate": 4.853859398012818e-06, "loss": 0.41358676, "memory(GiB)": 34.88, "step": 78870, "train_speed(iter/s)": 0.412797 }, { "acc": 0.90369873, "epoch": 2.1356239677253406, "grad_norm": 13.209662437438965, "learning_rate": 4.853300076020032e-06, "loss": 0.558079, "memory(GiB)": 34.88, "step": 78875, "train_speed(iter/s)": 0.412798 }, { "acc": 0.93930759, "epoch": 2.135759348008556, "grad_norm": 4.820408344268799, "learning_rate": 4.852740755871213e-06, "loss": 0.36131279, "memory(GiB)": 34.88, "step": 78880, "train_speed(iter/s)": 0.4128 }, { "acc": 0.91607094, "epoch": 2.1358947282917717, "grad_norm": 8.679191589355469, "learning_rate": 4.85218143757337e-06, "loss": 0.38724923, "memory(GiB)": 34.88, "step": 78885, "train_speed(iter/s)": 0.412801 }, { "acc": 0.91031723, "epoch": 2.136030108574987, "grad_norm": 5.970695972442627, "learning_rate": 4.851622121133507e-06, "loss": 0.46503897, "memory(GiB)": 34.88, "step": 78890, "train_speed(iter/s)": 0.412802 }, { "acc": 0.91270132, "epoch": 2.136165488858203, "grad_norm": 10.937017440795898, "learning_rate": 4.851062806558634e-06, "loss": 0.57622294, "memory(GiB)": 34.88, "step": 78895, "train_speed(iter/s)": 0.412804 }, { "acc": 0.9285655, "epoch": 2.1363008691414183, "grad_norm": 7.308473587036133, "learning_rate": 4.850503493855753e-06, "loss": 0.34955788, "memory(GiB)": 34.88, "step": 78900, "train_speed(iter/s)": 0.412805 }, { "acc": 0.92356758, "epoch": 2.1364362494246336, "grad_norm": 4.615642070770264, "learning_rate": 4.849944183031876e-06, "loss": 0.33033364, "memory(GiB)": 34.88, "step": 78905, "train_speed(iter/s)": 0.412806 }, { "acc": 0.92631855, "epoch": 2.1365716297078494, "grad_norm": 12.481917381286621, "learning_rate": 4.849384874094004e-06, "loss": 0.45118294, "memory(GiB)": 34.88, "step": 78910, "train_speed(iter/s)": 0.412807 }, { "acc": 0.93002472, "epoch": 2.1367070099910648, "grad_norm": 26.589656829833984, "learning_rate": 4.8488255670491476e-06, "loss": 0.40709724, "memory(GiB)": 34.88, "step": 78915, "train_speed(iter/s)": 0.412809 }, { "acc": 0.9143693, "epoch": 2.1368423902742806, "grad_norm": 11.1859130859375, "learning_rate": 4.848266261904311e-06, "loss": 0.50579214, "memory(GiB)": 34.88, "step": 78920, "train_speed(iter/s)": 0.41281 }, { "acc": 0.89548388, "epoch": 2.136977770557496, "grad_norm": 11.638400077819824, "learning_rate": 4.847706958666501e-06, "loss": 0.52343779, "memory(GiB)": 34.88, "step": 78925, "train_speed(iter/s)": 0.412812 }, { "acc": 0.92576885, "epoch": 2.1371131508407117, "grad_norm": 9.611177444458008, "learning_rate": 4.847147657342723e-06, "loss": 0.38658936, "memory(GiB)": 34.88, "step": 78930, "train_speed(iter/s)": 0.412813 }, { "acc": 0.91455822, "epoch": 2.137248531123927, "grad_norm": 6.710977554321289, "learning_rate": 4.846588357939985e-06, "loss": 0.48582783, "memory(GiB)": 34.88, "step": 78935, "train_speed(iter/s)": 0.412814 }, { "acc": 0.92301636, "epoch": 2.1373839114071425, "grad_norm": 9.472908973693848, "learning_rate": 4.846029060465294e-06, "loss": 0.45242848, "memory(GiB)": 34.88, "step": 78940, "train_speed(iter/s)": 0.412816 }, { "acc": 0.91781559, "epoch": 2.1375192916903583, "grad_norm": 8.435279846191406, "learning_rate": 4.845469764925653e-06, "loss": 0.44531794, "memory(GiB)": 34.88, "step": 78945, "train_speed(iter/s)": 0.412817 }, { "acc": 0.92775621, "epoch": 2.1376546719735736, "grad_norm": 5.736998558044434, "learning_rate": 4.844910471328072e-06, "loss": 0.41993446, "memory(GiB)": 34.88, "step": 78950, "train_speed(iter/s)": 0.412818 }, { "acc": 0.92604809, "epoch": 2.1377900522567894, "grad_norm": 17.056413650512695, "learning_rate": 4.844351179679555e-06, "loss": 0.45371523, "memory(GiB)": 34.88, "step": 78955, "train_speed(iter/s)": 0.412819 }, { "acc": 0.92350607, "epoch": 2.1379254325400048, "grad_norm": 6.355310440063477, "learning_rate": 4.843791889987109e-06, "loss": 0.47601595, "memory(GiB)": 34.88, "step": 78960, "train_speed(iter/s)": 0.412821 }, { "acc": 0.91298294, "epoch": 2.1380608128232206, "grad_norm": 18.71189308166504, "learning_rate": 4.843232602257741e-06, "loss": 0.55435553, "memory(GiB)": 34.88, "step": 78965, "train_speed(iter/s)": 0.412822 }, { "acc": 0.90963802, "epoch": 2.138196193106436, "grad_norm": 14.281970024108887, "learning_rate": 4.842673316498454e-06, "loss": 0.48427153, "memory(GiB)": 34.88, "step": 78970, "train_speed(iter/s)": 0.412823 }, { "acc": 0.91842451, "epoch": 2.1383315733896513, "grad_norm": 14.502614974975586, "learning_rate": 4.842114032716258e-06, "loss": 0.44442973, "memory(GiB)": 34.88, "step": 78975, "train_speed(iter/s)": 0.412824 }, { "acc": 0.91706247, "epoch": 2.138466953672867, "grad_norm": 5.750558376312256, "learning_rate": 4.8415547509181565e-06, "loss": 0.47125311, "memory(GiB)": 34.88, "step": 78980, "train_speed(iter/s)": 0.412826 }, { "acc": 0.92036648, "epoch": 2.1386023339560825, "grad_norm": 6.6044230461120605, "learning_rate": 4.840995471111157e-06, "loss": 0.47973232, "memory(GiB)": 34.88, "step": 78985, "train_speed(iter/s)": 0.412827 }, { "acc": 0.93206558, "epoch": 2.1387377142392983, "grad_norm": 7.536403656005859, "learning_rate": 4.840436193302265e-06, "loss": 0.35834336, "memory(GiB)": 34.88, "step": 78990, "train_speed(iter/s)": 0.412828 }, { "acc": 0.92785864, "epoch": 2.1388730945225136, "grad_norm": 5.874798774719238, "learning_rate": 4.839876917498486e-06, "loss": 0.41999187, "memory(GiB)": 34.88, "step": 78995, "train_speed(iter/s)": 0.412829 }, { "acc": 0.92423897, "epoch": 2.1390084748057294, "grad_norm": 7.632827281951904, "learning_rate": 4.839317643706829e-06, "loss": 0.44932961, "memory(GiB)": 34.88, "step": 79000, "train_speed(iter/s)": 0.412831 }, { "acc": 0.91367531, "epoch": 2.139143855088945, "grad_norm": 9.514111518859863, "learning_rate": 4.838758371934296e-06, "loss": 0.39802985, "memory(GiB)": 34.88, "step": 79005, "train_speed(iter/s)": 0.412832 }, { "acc": 0.92444582, "epoch": 2.1392792353721606, "grad_norm": 8.678155899047852, "learning_rate": 4.838199102187896e-06, "loss": 0.42806349, "memory(GiB)": 34.88, "step": 79010, "train_speed(iter/s)": 0.412833 }, { "acc": 0.89924736, "epoch": 2.139414615655376, "grad_norm": 9.588777542114258, "learning_rate": 4.837639834474631e-06, "loss": 0.55898705, "memory(GiB)": 34.88, "step": 79015, "train_speed(iter/s)": 0.412835 }, { "acc": 0.9282196, "epoch": 2.1395499959385913, "grad_norm": 3.7411670684814453, "learning_rate": 4.837080568801511e-06, "loss": 0.39473672, "memory(GiB)": 34.88, "step": 79020, "train_speed(iter/s)": 0.412836 }, { "acc": 0.89163561, "epoch": 2.139685376221807, "grad_norm": 12.322585105895996, "learning_rate": 4.836521305175541e-06, "loss": 0.64809337, "memory(GiB)": 34.88, "step": 79025, "train_speed(iter/s)": 0.412837 }, { "acc": 0.93138123, "epoch": 2.1398207565050225, "grad_norm": 11.020344734191895, "learning_rate": 4.835962043603725e-06, "loss": 0.37510865, "memory(GiB)": 34.88, "step": 79030, "train_speed(iter/s)": 0.412839 }, { "acc": 0.93150978, "epoch": 2.1399561367882383, "grad_norm": 6.263613700866699, "learning_rate": 4.835402784093072e-06, "loss": 0.43379927, "memory(GiB)": 34.88, "step": 79035, "train_speed(iter/s)": 0.41284 }, { "acc": 0.9081851, "epoch": 2.1400915170714536, "grad_norm": 9.185508728027344, "learning_rate": 4.834843526650586e-06, "loss": 0.51552987, "memory(GiB)": 34.88, "step": 79040, "train_speed(iter/s)": 0.412841 }, { "acc": 0.92026749, "epoch": 2.1402268973546694, "grad_norm": 5.879621505737305, "learning_rate": 4.834284271283273e-06, "loss": 0.38606546, "memory(GiB)": 34.88, "step": 79045, "train_speed(iter/s)": 0.412842 }, { "acc": 0.92751951, "epoch": 2.140362277637885, "grad_norm": 9.892837524414062, "learning_rate": 4.833725017998137e-06, "loss": 0.33050117, "memory(GiB)": 34.88, "step": 79050, "train_speed(iter/s)": 0.412844 }, { "acc": 0.93611946, "epoch": 2.1404976579211006, "grad_norm": 7.047737121582031, "learning_rate": 4.833165766802187e-06, "loss": 0.37940197, "memory(GiB)": 34.88, "step": 79055, "train_speed(iter/s)": 0.412845 }, { "acc": 0.92377462, "epoch": 2.140633038204316, "grad_norm": 17.340059280395508, "learning_rate": 4.832606517702425e-06, "loss": 0.42936339, "memory(GiB)": 34.88, "step": 79060, "train_speed(iter/s)": 0.412846 }, { "acc": 0.93244686, "epoch": 2.1407684184875313, "grad_norm": 8.198748588562012, "learning_rate": 4.832047270705861e-06, "loss": 0.34996173, "memory(GiB)": 34.88, "step": 79065, "train_speed(iter/s)": 0.412848 }, { "acc": 0.9117857, "epoch": 2.140903798770747, "grad_norm": 9.74201488494873, "learning_rate": 4.831488025819497e-06, "loss": 0.45856972, "memory(GiB)": 34.88, "step": 79070, "train_speed(iter/s)": 0.412849 }, { "acc": 0.93711061, "epoch": 2.1410391790539625, "grad_norm": 5.960544586181641, "learning_rate": 4.83092878305034e-06, "loss": 0.34320142, "memory(GiB)": 34.88, "step": 79075, "train_speed(iter/s)": 0.41285 }, { "acc": 0.92259979, "epoch": 2.1411745593371783, "grad_norm": 6.0351104736328125, "learning_rate": 4.830369542405396e-06, "loss": 0.48085155, "memory(GiB)": 34.88, "step": 79080, "train_speed(iter/s)": 0.412851 }, { "acc": 0.91531811, "epoch": 2.1413099396203936, "grad_norm": 12.632452011108398, "learning_rate": 4.829810303891671e-06, "loss": 0.53266044, "memory(GiB)": 34.88, "step": 79085, "train_speed(iter/s)": 0.412853 }, { "acc": 0.92091408, "epoch": 2.1414453199036094, "grad_norm": 10.074396133422852, "learning_rate": 4.82925106751617e-06, "loss": 0.44794159, "memory(GiB)": 34.88, "step": 79090, "train_speed(iter/s)": 0.412854 }, { "acc": 0.92826281, "epoch": 2.141580700186825, "grad_norm": 10.373674392700195, "learning_rate": 4.828691833285898e-06, "loss": 0.35335035, "memory(GiB)": 34.88, "step": 79095, "train_speed(iter/s)": 0.412855 }, { "acc": 0.93075323, "epoch": 2.14171608047004, "grad_norm": 10.270594596862793, "learning_rate": 4.828132601207861e-06, "loss": 0.45436854, "memory(GiB)": 34.88, "step": 79100, "train_speed(iter/s)": 0.412856 }, { "acc": 0.91042004, "epoch": 2.141851460753256, "grad_norm": 8.94747257232666, "learning_rate": 4.827573371289063e-06, "loss": 0.52834592, "memory(GiB)": 34.88, "step": 79105, "train_speed(iter/s)": 0.412858 }, { "acc": 0.91918468, "epoch": 2.1419868410364713, "grad_norm": 6.0503082275390625, "learning_rate": 4.827014143536511e-06, "loss": 0.39809947, "memory(GiB)": 34.88, "step": 79110, "train_speed(iter/s)": 0.412859 }, { "acc": 0.88912601, "epoch": 2.142122221319687, "grad_norm": 21.646005630493164, "learning_rate": 4.826454917957212e-06, "loss": 0.6222177, "memory(GiB)": 34.88, "step": 79115, "train_speed(iter/s)": 0.41286 }, { "acc": 0.92512131, "epoch": 2.1422576016029025, "grad_norm": 4.39598274230957, "learning_rate": 4.825895694558168e-06, "loss": 0.42633729, "memory(GiB)": 34.88, "step": 79120, "train_speed(iter/s)": 0.412861 }, { "acc": 0.92400417, "epoch": 2.1423929818861183, "grad_norm": 10.590904235839844, "learning_rate": 4.825336473346389e-06, "loss": 0.39946275, "memory(GiB)": 34.88, "step": 79125, "train_speed(iter/s)": 0.412863 }, { "acc": 0.92118168, "epoch": 2.1425283621693336, "grad_norm": 8.673312187194824, "learning_rate": 4.824777254328874e-06, "loss": 0.44274778, "memory(GiB)": 34.88, "step": 79130, "train_speed(iter/s)": 0.412864 }, { "acc": 0.91590567, "epoch": 2.142663742452549, "grad_norm": 14.837345123291016, "learning_rate": 4.824218037512633e-06, "loss": 0.47847509, "memory(GiB)": 34.88, "step": 79135, "train_speed(iter/s)": 0.412865 }, { "acc": 0.91404819, "epoch": 2.142799122735765, "grad_norm": 5.553480625152588, "learning_rate": 4.82365882290467e-06, "loss": 0.40389929, "memory(GiB)": 34.88, "step": 79140, "train_speed(iter/s)": 0.412867 }, { "acc": 0.90612831, "epoch": 2.14293450301898, "grad_norm": 22.097061157226562, "learning_rate": 4.82309961051199e-06, "loss": 0.59516878, "memory(GiB)": 34.88, "step": 79145, "train_speed(iter/s)": 0.412868 }, { "acc": 0.92349186, "epoch": 2.143069883302196, "grad_norm": 5.997864723205566, "learning_rate": 4.822540400341598e-06, "loss": 0.37601914, "memory(GiB)": 34.88, "step": 79150, "train_speed(iter/s)": 0.412869 }, { "acc": 0.93868504, "epoch": 2.1432052635854113, "grad_norm": 5.616617679595947, "learning_rate": 4.8219811924005e-06, "loss": 0.34681885, "memory(GiB)": 34.88, "step": 79155, "train_speed(iter/s)": 0.412871 }, { "acc": 0.908006, "epoch": 2.143340643868627, "grad_norm": 9.904367446899414, "learning_rate": 4.821421986695701e-06, "loss": 0.50204105, "memory(GiB)": 34.88, "step": 79160, "train_speed(iter/s)": 0.412872 }, { "acc": 0.91421642, "epoch": 2.1434760241518425, "grad_norm": 6.119482517242432, "learning_rate": 4.8208627832342055e-06, "loss": 0.48163805, "memory(GiB)": 34.88, "step": 79165, "train_speed(iter/s)": 0.412873 }, { "acc": 0.93079033, "epoch": 2.1436114044350583, "grad_norm": 7.915513515472412, "learning_rate": 4.820303582023021e-06, "loss": 0.33789928, "memory(GiB)": 34.88, "step": 79170, "train_speed(iter/s)": 0.412874 }, { "acc": 0.92680054, "epoch": 2.1437467847182736, "grad_norm": 10.282256126403809, "learning_rate": 4.81974438306915e-06, "loss": 0.39780262, "memory(GiB)": 34.88, "step": 79175, "train_speed(iter/s)": 0.412876 }, { "acc": 0.91078243, "epoch": 2.143882165001489, "grad_norm": 4.113945960998535, "learning_rate": 4.819185186379596e-06, "loss": 0.50661573, "memory(GiB)": 34.88, "step": 79180, "train_speed(iter/s)": 0.412877 }, { "acc": 0.91711178, "epoch": 2.144017545284705, "grad_norm": 7.77779483795166, "learning_rate": 4.8186259919613695e-06, "loss": 0.47244167, "memory(GiB)": 34.88, "step": 79185, "train_speed(iter/s)": 0.412878 }, { "acc": 0.90773907, "epoch": 2.14415292556792, "grad_norm": 6.65711784362793, "learning_rate": 4.8180667998214695e-06, "loss": 0.54532328, "memory(GiB)": 34.88, "step": 79190, "train_speed(iter/s)": 0.412879 }, { "acc": 0.91792355, "epoch": 2.144288305851136, "grad_norm": 8.692415237426758, "learning_rate": 4.817507609966906e-06, "loss": 0.44300628, "memory(GiB)": 34.88, "step": 79195, "train_speed(iter/s)": 0.41288 }, { "acc": 0.93899059, "epoch": 2.1444236861343513, "grad_norm": 4.222531795501709, "learning_rate": 4.816948422404679e-06, "loss": 0.35870626, "memory(GiB)": 34.88, "step": 79200, "train_speed(iter/s)": 0.412882 }, { "acc": 0.93501644, "epoch": 2.144559066417567, "grad_norm": 10.374984741210938, "learning_rate": 4.8163892371418e-06, "loss": 0.40882058, "memory(GiB)": 34.88, "step": 79205, "train_speed(iter/s)": 0.412883 }, { "acc": 0.9108285, "epoch": 2.1446944467007825, "grad_norm": 4.344604015350342, "learning_rate": 4.815830054185268e-06, "loss": 0.47112522, "memory(GiB)": 34.88, "step": 79210, "train_speed(iter/s)": 0.412884 }, { "acc": 0.91704025, "epoch": 2.1448298269839983, "grad_norm": 10.072318077087402, "learning_rate": 4.81527087354209e-06, "loss": 0.43732209, "memory(GiB)": 34.88, "step": 79215, "train_speed(iter/s)": 0.412886 }, { "acc": 0.92253733, "epoch": 2.1449652072672136, "grad_norm": 8.162999153137207, "learning_rate": 4.814711695219271e-06, "loss": 0.46927962, "memory(GiB)": 34.88, "step": 79220, "train_speed(iter/s)": 0.412887 }, { "acc": 0.9020195, "epoch": 2.145100587550429, "grad_norm": 5.4831390380859375, "learning_rate": 4.8141525192238156e-06, "loss": 0.5802269, "memory(GiB)": 34.88, "step": 79225, "train_speed(iter/s)": 0.412888 }, { "acc": 0.9298625, "epoch": 2.145235967833645, "grad_norm": 3.12200927734375, "learning_rate": 4.813593345562729e-06, "loss": 0.36504645, "memory(GiB)": 34.88, "step": 79230, "train_speed(iter/s)": 0.412889 }, { "acc": 0.91455898, "epoch": 2.14537134811686, "grad_norm": 6.395054340362549, "learning_rate": 4.813034174243015e-06, "loss": 0.47388058, "memory(GiB)": 34.88, "step": 79235, "train_speed(iter/s)": 0.41289 }, { "acc": 0.90765676, "epoch": 2.145506728400076, "grad_norm": 5.715871810913086, "learning_rate": 4.812475005271678e-06, "loss": 0.53864126, "memory(GiB)": 34.88, "step": 79240, "train_speed(iter/s)": 0.412892 }, { "acc": 0.91883965, "epoch": 2.1456421086832913, "grad_norm": 13.484823226928711, "learning_rate": 4.811915838655724e-06, "loss": 0.47017746, "memory(GiB)": 34.88, "step": 79245, "train_speed(iter/s)": 0.412893 }, { "acc": 0.90551872, "epoch": 2.145777488966507, "grad_norm": 18.6622371673584, "learning_rate": 4.811356674402158e-06, "loss": 0.56768475, "memory(GiB)": 34.88, "step": 79250, "train_speed(iter/s)": 0.412894 }, { "acc": 0.91822739, "epoch": 2.1459128692497225, "grad_norm": 3.9055702686309814, "learning_rate": 4.810797512517984e-06, "loss": 0.42400694, "memory(GiB)": 34.88, "step": 79255, "train_speed(iter/s)": 0.412895 }, { "acc": 0.92144585, "epoch": 2.146048249532938, "grad_norm": 6.54197883605957, "learning_rate": 4.810238353010206e-06, "loss": 0.45103841, "memory(GiB)": 34.88, "step": 79260, "train_speed(iter/s)": 0.412897 }, { "acc": 0.91923485, "epoch": 2.1461836298161536, "grad_norm": 4.802916049957275, "learning_rate": 4.809679195885829e-06, "loss": 0.44190254, "memory(GiB)": 34.88, "step": 79265, "train_speed(iter/s)": 0.412898 }, { "acc": 0.91653967, "epoch": 2.146319010099369, "grad_norm": 5.393738746643066, "learning_rate": 4.8091200411518565e-06, "loss": 0.42592554, "memory(GiB)": 34.88, "step": 79270, "train_speed(iter/s)": 0.412899 }, { "acc": 0.91953316, "epoch": 2.146454390382585, "grad_norm": 11.404875755310059, "learning_rate": 4.808560888815295e-06, "loss": 0.36701162, "memory(GiB)": 34.88, "step": 79275, "train_speed(iter/s)": 0.412901 }, { "acc": 0.91778889, "epoch": 2.1465897706658, "grad_norm": 5.747551441192627, "learning_rate": 4.808001738883148e-06, "loss": 0.43516474, "memory(GiB)": 34.88, "step": 79280, "train_speed(iter/s)": 0.412902 }, { "acc": 0.91907787, "epoch": 2.146725150949016, "grad_norm": 7.358582496643066, "learning_rate": 4.80744259136242e-06, "loss": 0.43159628, "memory(GiB)": 34.88, "step": 79285, "train_speed(iter/s)": 0.412903 }, { "acc": 0.91073723, "epoch": 2.1468605312322313, "grad_norm": 10.133036613464355, "learning_rate": 4.8068834462601145e-06, "loss": 0.56775093, "memory(GiB)": 34.88, "step": 79290, "train_speed(iter/s)": 0.412904 }, { "acc": 0.92477617, "epoch": 2.1469959115154467, "grad_norm": 14.86215591430664, "learning_rate": 4.806324303583238e-06, "loss": 0.44864397, "memory(GiB)": 34.88, "step": 79295, "train_speed(iter/s)": 0.412905 }, { "acc": 0.90876656, "epoch": 2.1471312917986625, "grad_norm": 9.670669555664062, "learning_rate": 4.8057651633387934e-06, "loss": 0.49684324, "memory(GiB)": 34.88, "step": 79300, "train_speed(iter/s)": 0.412907 }, { "acc": 0.90467796, "epoch": 2.147266672081878, "grad_norm": 10.187969207763672, "learning_rate": 4.805206025533785e-06, "loss": 0.55648794, "memory(GiB)": 34.88, "step": 79305, "train_speed(iter/s)": 0.412908 }, { "acc": 0.92911825, "epoch": 2.1474020523650936, "grad_norm": 17.149127960205078, "learning_rate": 4.804646890175218e-06, "loss": 0.4003602, "memory(GiB)": 34.88, "step": 79310, "train_speed(iter/s)": 0.412909 }, { "acc": 0.92559109, "epoch": 2.147537432648309, "grad_norm": 6.648436546325684, "learning_rate": 4.804087757270094e-06, "loss": 0.35467534, "memory(GiB)": 34.88, "step": 79315, "train_speed(iter/s)": 0.41291 }, { "acc": 0.93724957, "epoch": 2.147672812931525, "grad_norm": 7.154228210449219, "learning_rate": 4.803528626825422e-06, "loss": 0.32841916, "memory(GiB)": 34.88, "step": 79320, "train_speed(iter/s)": 0.412911 }, { "acc": 0.91877937, "epoch": 2.14780819321474, "grad_norm": 8.97032356262207, "learning_rate": 4.8029694988482015e-06, "loss": 0.44107594, "memory(GiB)": 34.88, "step": 79325, "train_speed(iter/s)": 0.412913 }, { "acc": 0.90800076, "epoch": 2.147943573497956, "grad_norm": 10.813798904418945, "learning_rate": 4.802410373345439e-06, "loss": 0.55309563, "memory(GiB)": 34.88, "step": 79330, "train_speed(iter/s)": 0.412914 }, { "acc": 0.91040077, "epoch": 2.1480789537811713, "grad_norm": 7.867447853088379, "learning_rate": 4.8018512503241406e-06, "loss": 0.50819621, "memory(GiB)": 34.88, "step": 79335, "train_speed(iter/s)": 0.412915 }, { "acc": 0.91476517, "epoch": 2.1482143340643867, "grad_norm": 17.766748428344727, "learning_rate": 4.801292129791305e-06, "loss": 0.53141403, "memory(GiB)": 34.88, "step": 79340, "train_speed(iter/s)": 0.412916 }, { "acc": 0.92099867, "epoch": 2.1483497143476025, "grad_norm": 4.040147304534912, "learning_rate": 4.8007330117539415e-06, "loss": 0.43881269, "memory(GiB)": 34.88, "step": 79345, "train_speed(iter/s)": 0.412918 }, { "acc": 0.92193508, "epoch": 2.148485094630818, "grad_norm": 8.544143676757812, "learning_rate": 4.800173896219052e-06, "loss": 0.46054897, "memory(GiB)": 34.88, "step": 79350, "train_speed(iter/s)": 0.412919 }, { "acc": 0.92074337, "epoch": 2.1486204749140336, "grad_norm": 8.804139137268066, "learning_rate": 4.799614783193639e-06, "loss": 0.46467247, "memory(GiB)": 34.88, "step": 79355, "train_speed(iter/s)": 0.41292 }, { "acc": 0.91915722, "epoch": 2.148755855197249, "grad_norm": 8.618033409118652, "learning_rate": 4.799055672684709e-06, "loss": 0.42832208, "memory(GiB)": 34.88, "step": 79360, "train_speed(iter/s)": 0.412921 }, { "acc": 0.94411707, "epoch": 2.148891235480465, "grad_norm": 8.174803733825684, "learning_rate": 4.798496564699265e-06, "loss": 0.33084409, "memory(GiB)": 34.88, "step": 79365, "train_speed(iter/s)": 0.412923 }, { "acc": 0.93101444, "epoch": 2.14902661576368, "grad_norm": 7.949905872344971, "learning_rate": 4.7979374592443104e-06, "loss": 0.4325974, "memory(GiB)": 34.88, "step": 79370, "train_speed(iter/s)": 0.412924 }, { "acc": 0.93150692, "epoch": 2.149161996046896, "grad_norm": 5.078936576843262, "learning_rate": 4.79737835632685e-06, "loss": 0.29063969, "memory(GiB)": 34.88, "step": 79375, "train_speed(iter/s)": 0.412925 }, { "acc": 0.9073864, "epoch": 2.1492973763301113, "grad_norm": 10.525277137756348, "learning_rate": 4.796819255953889e-06, "loss": 0.45998797, "memory(GiB)": 34.88, "step": 79380, "train_speed(iter/s)": 0.412927 }, { "acc": 0.91398077, "epoch": 2.1494327566133267, "grad_norm": 11.37070083618164, "learning_rate": 4.7962601581324275e-06, "loss": 0.51069193, "memory(GiB)": 34.88, "step": 79385, "train_speed(iter/s)": 0.412928 }, { "acc": 0.92157869, "epoch": 2.1495681368965425, "grad_norm": 11.235054016113281, "learning_rate": 4.795701062869473e-06, "loss": 0.42618909, "memory(GiB)": 34.88, "step": 79390, "train_speed(iter/s)": 0.412929 }, { "acc": 0.93698931, "epoch": 2.149703517179758, "grad_norm": 8.693994522094727, "learning_rate": 4.795141970172026e-06, "loss": 0.31678205, "memory(GiB)": 34.88, "step": 79395, "train_speed(iter/s)": 0.412931 }, { "acc": 0.91453314, "epoch": 2.1498388974629736, "grad_norm": 28.418363571166992, "learning_rate": 4.794582880047093e-06, "loss": 0.48208795, "memory(GiB)": 34.88, "step": 79400, "train_speed(iter/s)": 0.412932 }, { "acc": 0.91331587, "epoch": 2.149974277746189, "grad_norm": 8.24815559387207, "learning_rate": 4.794023792501676e-06, "loss": 0.48254743, "memory(GiB)": 34.88, "step": 79405, "train_speed(iter/s)": 0.412933 }, { "acc": 0.91523647, "epoch": 2.150109658029405, "grad_norm": 6.129149436950684, "learning_rate": 4.793464707542778e-06, "loss": 0.41476526, "memory(GiB)": 34.88, "step": 79410, "train_speed(iter/s)": 0.412934 }, { "acc": 0.90759325, "epoch": 2.15024503831262, "grad_norm": 11.536237716674805, "learning_rate": 4.792905625177405e-06, "loss": 0.57062984, "memory(GiB)": 34.88, "step": 79415, "train_speed(iter/s)": 0.412936 }, { "acc": 0.92515583, "epoch": 2.1503804185958355, "grad_norm": 6.107390880584717, "learning_rate": 4.79234654541256e-06, "loss": 0.36547816, "memory(GiB)": 34.88, "step": 79420, "train_speed(iter/s)": 0.412937 }, { "acc": 0.94484892, "epoch": 2.1505157988790513, "grad_norm": 4.1901702880859375, "learning_rate": 4.791787468255246e-06, "loss": 0.25915413, "memory(GiB)": 34.88, "step": 79425, "train_speed(iter/s)": 0.412938 }, { "acc": 0.90613976, "epoch": 2.1506511791622667, "grad_norm": 12.133245468139648, "learning_rate": 4.791228393712466e-06, "loss": 0.52838631, "memory(GiB)": 34.88, "step": 79430, "train_speed(iter/s)": 0.412939 }, { "acc": 0.90795574, "epoch": 2.1507865594454825, "grad_norm": 35.383060455322266, "learning_rate": 4.790669321791225e-06, "loss": 0.63515801, "memory(GiB)": 34.88, "step": 79435, "train_speed(iter/s)": 0.412941 }, { "acc": 0.91649914, "epoch": 2.150921939728698, "grad_norm": 5.541571140289307, "learning_rate": 4.790110252498523e-06, "loss": 0.45670705, "memory(GiB)": 34.88, "step": 79440, "train_speed(iter/s)": 0.412942 }, { "acc": 0.92964611, "epoch": 2.1510573200119136, "grad_norm": 6.9572224617004395, "learning_rate": 4.789551185841368e-06, "loss": 0.35756447, "memory(GiB)": 34.88, "step": 79445, "train_speed(iter/s)": 0.412943 }, { "acc": 0.91662464, "epoch": 2.151192700295129, "grad_norm": 13.020352363586426, "learning_rate": 4.788992121826762e-06, "loss": 0.49147329, "memory(GiB)": 34.88, "step": 79450, "train_speed(iter/s)": 0.412945 }, { "acc": 0.9415308, "epoch": 2.1513280805783443, "grad_norm": 5.745183944702148, "learning_rate": 4.7884330604617065e-06, "loss": 0.30737567, "memory(GiB)": 34.88, "step": 79455, "train_speed(iter/s)": 0.412946 }, { "acc": 0.931248, "epoch": 2.15146346086156, "grad_norm": 9.803441047668457, "learning_rate": 4.7878740017532075e-06, "loss": 0.37196116, "memory(GiB)": 34.88, "step": 79460, "train_speed(iter/s)": 0.412947 }, { "acc": 0.91634941, "epoch": 2.1515988411447755, "grad_norm": 6.723595142364502, "learning_rate": 4.7873149457082654e-06, "loss": 0.51015892, "memory(GiB)": 34.88, "step": 79465, "train_speed(iter/s)": 0.412949 }, { "acc": 0.93221521, "epoch": 2.1517342214279913, "grad_norm": 9.04503059387207, "learning_rate": 4.786755892333887e-06, "loss": 0.36266973, "memory(GiB)": 34.88, "step": 79470, "train_speed(iter/s)": 0.41295 }, { "acc": 0.92350492, "epoch": 2.1518696017112067, "grad_norm": 4.919328689575195, "learning_rate": 4.786196841637071e-06, "loss": 0.41957679, "memory(GiB)": 34.88, "step": 79475, "train_speed(iter/s)": 0.412951 }, { "acc": 0.91463089, "epoch": 2.1520049819944225, "grad_norm": 11.703573226928711, "learning_rate": 4.785637793624824e-06, "loss": 0.48034372, "memory(GiB)": 34.88, "step": 79480, "train_speed(iter/s)": 0.412952 }, { "acc": 0.91468458, "epoch": 2.152140362277638, "grad_norm": 18.241395950317383, "learning_rate": 4.78507874830415e-06, "loss": 0.44690571, "memory(GiB)": 34.88, "step": 79485, "train_speed(iter/s)": 0.412954 }, { "acc": 0.93616142, "epoch": 2.1522757425608536, "grad_norm": 7.900692939758301, "learning_rate": 4.784519705682048e-06, "loss": 0.36241343, "memory(GiB)": 34.88, "step": 79490, "train_speed(iter/s)": 0.412955 }, { "acc": 0.92179298, "epoch": 2.152411122844069, "grad_norm": 8.338563919067383, "learning_rate": 4.783960665765526e-06, "loss": 0.42242002, "memory(GiB)": 34.88, "step": 79495, "train_speed(iter/s)": 0.412956 }, { "acc": 0.92897673, "epoch": 2.1525465031272843, "grad_norm": 8.407577514648438, "learning_rate": 4.783401628561582e-06, "loss": 0.40677686, "memory(GiB)": 34.88, "step": 79500, "train_speed(iter/s)": 0.412958 }, { "acc": 0.92936211, "epoch": 2.1526818834105, "grad_norm": 6.705472469329834, "learning_rate": 4.782842594077225e-06, "loss": 0.41381264, "memory(GiB)": 34.88, "step": 79505, "train_speed(iter/s)": 0.412959 }, { "acc": 0.92162571, "epoch": 2.1528172636937155, "grad_norm": 7.3890180587768555, "learning_rate": 4.782283562319453e-06, "loss": 0.38857965, "memory(GiB)": 34.88, "step": 79510, "train_speed(iter/s)": 0.41296 }, { "acc": 0.91297417, "epoch": 2.1529526439769313, "grad_norm": 13.877685546875, "learning_rate": 4.781724533295269e-06, "loss": 0.45427852, "memory(GiB)": 34.88, "step": 79515, "train_speed(iter/s)": 0.412961 }, { "acc": 0.90137835, "epoch": 2.1530880242601467, "grad_norm": 14.620171546936035, "learning_rate": 4.78116550701168e-06, "loss": 0.64781094, "memory(GiB)": 34.88, "step": 79520, "train_speed(iter/s)": 0.412963 }, { "acc": 0.9290123, "epoch": 2.1532234045433625, "grad_norm": 4.921783924102783, "learning_rate": 4.780606483475686e-06, "loss": 0.39727542, "memory(GiB)": 34.88, "step": 79525, "train_speed(iter/s)": 0.412964 }, { "acc": 0.92048874, "epoch": 2.153358784826578, "grad_norm": 7.1097235679626465, "learning_rate": 4.78004746269429e-06, "loss": 0.39561448, "memory(GiB)": 34.88, "step": 79530, "train_speed(iter/s)": 0.412965 }, { "acc": 0.91700087, "epoch": 2.1534941651097936, "grad_norm": 4.3653883934021, "learning_rate": 4.779488444674495e-06, "loss": 0.49330096, "memory(GiB)": 34.88, "step": 79535, "train_speed(iter/s)": 0.412966 }, { "acc": 0.92886715, "epoch": 2.153629545393009, "grad_norm": 6.42205286026001, "learning_rate": 4.778929429423304e-06, "loss": 0.31125932, "memory(GiB)": 34.88, "step": 79540, "train_speed(iter/s)": 0.412967 }, { "acc": 0.92513914, "epoch": 2.1537649256762244, "grad_norm": 8.142126083374023, "learning_rate": 4.778370416947721e-06, "loss": 0.41778774, "memory(GiB)": 34.88, "step": 79545, "train_speed(iter/s)": 0.412969 }, { "acc": 0.9183898, "epoch": 2.15390030595944, "grad_norm": 7.115795612335205, "learning_rate": 4.7778114072547475e-06, "loss": 0.36907296, "memory(GiB)": 34.88, "step": 79550, "train_speed(iter/s)": 0.41297 }, { "acc": 0.92392626, "epoch": 2.1540356862426555, "grad_norm": 5.7336344718933105, "learning_rate": 4.777252400351384e-06, "loss": 0.48066568, "memory(GiB)": 34.88, "step": 79555, "train_speed(iter/s)": 0.412971 }, { "acc": 0.91268396, "epoch": 2.1541710665258713, "grad_norm": 11.774785041809082, "learning_rate": 4.776693396244636e-06, "loss": 0.49070945, "memory(GiB)": 34.88, "step": 79560, "train_speed(iter/s)": 0.412973 }, { "acc": 0.91639795, "epoch": 2.1543064468090867, "grad_norm": 5.994266510009766, "learning_rate": 4.776134394941507e-06, "loss": 0.47942791, "memory(GiB)": 34.88, "step": 79565, "train_speed(iter/s)": 0.412974 }, { "acc": 0.92408733, "epoch": 2.1544418270923025, "grad_norm": 6.161120891571045, "learning_rate": 4.775575396448997e-06, "loss": 0.38890424, "memory(GiB)": 34.88, "step": 79570, "train_speed(iter/s)": 0.412975 }, { "acc": 0.92169333, "epoch": 2.154577207375518, "grad_norm": 14.887008666992188, "learning_rate": 4.77501640077411e-06, "loss": 0.38620708, "memory(GiB)": 34.88, "step": 79575, "train_speed(iter/s)": 0.412976 }, { "acc": 0.92375689, "epoch": 2.154712587658733, "grad_norm": 17.065553665161133, "learning_rate": 4.774457407923847e-06, "loss": 0.41344204, "memory(GiB)": 34.88, "step": 79580, "train_speed(iter/s)": 0.412978 }, { "acc": 0.93203239, "epoch": 2.154847967941949, "grad_norm": 3.2868566513061523, "learning_rate": 4.773898417905215e-06, "loss": 0.41755409, "memory(GiB)": 34.88, "step": 79585, "train_speed(iter/s)": 0.412979 }, { "acc": 0.92677212, "epoch": 2.1549833482251644, "grad_norm": 12.483661651611328, "learning_rate": 4.77333943072521e-06, "loss": 0.36319118, "memory(GiB)": 34.88, "step": 79590, "train_speed(iter/s)": 0.41298 }, { "acc": 0.91737843, "epoch": 2.15511872850838, "grad_norm": 12.006592750549316, "learning_rate": 4.772780446390837e-06, "loss": 0.53081741, "memory(GiB)": 34.88, "step": 79595, "train_speed(iter/s)": 0.412982 }, { "acc": 0.92148247, "epoch": 2.1552541087915955, "grad_norm": 9.168079376220703, "learning_rate": 4.772221464909101e-06, "loss": 0.39108069, "memory(GiB)": 34.88, "step": 79600, "train_speed(iter/s)": 0.412983 }, { "acc": 0.92261486, "epoch": 2.1553894890748113, "grad_norm": 3.633296251296997, "learning_rate": 4.771662486287001e-06, "loss": 0.47587166, "memory(GiB)": 34.88, "step": 79605, "train_speed(iter/s)": 0.412984 }, { "acc": 0.94022522, "epoch": 2.1555248693580267, "grad_norm": 5.079583644866943, "learning_rate": 4.771103510531541e-06, "loss": 0.35119889, "memory(GiB)": 34.88, "step": 79610, "train_speed(iter/s)": 0.412985 }, { "acc": 0.92005329, "epoch": 2.155660249641242, "grad_norm": 7.497159004211426, "learning_rate": 4.770544537649722e-06, "loss": 0.53452134, "memory(GiB)": 34.88, "step": 79615, "train_speed(iter/s)": 0.412987 }, { "acc": 0.91274681, "epoch": 2.155795629924458, "grad_norm": 13.633106231689453, "learning_rate": 4.7699855676485485e-06, "loss": 0.51280046, "memory(GiB)": 34.88, "step": 79620, "train_speed(iter/s)": 0.412988 }, { "acc": 0.93076801, "epoch": 2.155931010207673, "grad_norm": 6.732118606567383, "learning_rate": 4.769426600535019e-06, "loss": 0.34649973, "memory(GiB)": 34.88, "step": 79625, "train_speed(iter/s)": 0.412989 }, { "acc": 0.92571659, "epoch": 2.156066390490889, "grad_norm": 3.58673095703125, "learning_rate": 4.768867636316141e-06, "loss": 0.45949998, "memory(GiB)": 34.88, "step": 79630, "train_speed(iter/s)": 0.41299 }, { "acc": 0.92242298, "epoch": 2.1562017707741044, "grad_norm": 7.672634124755859, "learning_rate": 4.768308674998912e-06, "loss": 0.44543219, "memory(GiB)": 34.88, "step": 79635, "train_speed(iter/s)": 0.412992 }, { "acc": 0.91997728, "epoch": 2.15633715105732, "grad_norm": 8.262466430664062, "learning_rate": 4.767749716590335e-06, "loss": 0.47867384, "memory(GiB)": 34.88, "step": 79640, "train_speed(iter/s)": 0.412993 }, { "acc": 0.92282791, "epoch": 2.1564725313405355, "grad_norm": 4.217452049255371, "learning_rate": 4.7671907610974136e-06, "loss": 0.43369918, "memory(GiB)": 34.88, "step": 79645, "train_speed(iter/s)": 0.412994 }, { "acc": 0.90630035, "epoch": 2.1566079116237513, "grad_norm": 19.974178314208984, "learning_rate": 4.766631808527147e-06, "loss": 0.5723031, "memory(GiB)": 34.88, "step": 79650, "train_speed(iter/s)": 0.412995 }, { "acc": 0.91384096, "epoch": 2.1567432919069667, "grad_norm": 7.234443664550781, "learning_rate": 4.7660728588865415e-06, "loss": 0.45043154, "memory(GiB)": 34.88, "step": 79655, "train_speed(iter/s)": 0.412996 }, { "acc": 0.92079859, "epoch": 2.156878672190182, "grad_norm": 7.634335041046143, "learning_rate": 4.765513912182595e-06, "loss": 0.45367465, "memory(GiB)": 34.88, "step": 79660, "train_speed(iter/s)": 0.412998 }, { "acc": 0.92315445, "epoch": 2.157014052473398, "grad_norm": 7.827051162719727, "learning_rate": 4.764954968422311e-06, "loss": 0.47803955, "memory(GiB)": 34.88, "step": 79665, "train_speed(iter/s)": 0.412999 }, { "acc": 0.93226147, "epoch": 2.157149432756613, "grad_norm": 8.481030464172363, "learning_rate": 4.7643960276126926e-06, "loss": 0.37255263, "memory(GiB)": 34.88, "step": 79670, "train_speed(iter/s)": 0.413 }, { "acc": 0.93210649, "epoch": 2.157284813039829, "grad_norm": 4.003203868865967, "learning_rate": 4.763837089760739e-06, "loss": 0.42369361, "memory(GiB)": 34.88, "step": 79675, "train_speed(iter/s)": 0.413001 }, { "acc": 0.91351357, "epoch": 2.1574201933230444, "grad_norm": 9.436697006225586, "learning_rate": 4.763278154873455e-06, "loss": 0.51123753, "memory(GiB)": 34.88, "step": 79680, "train_speed(iter/s)": 0.413002 }, { "acc": 0.91855001, "epoch": 2.15755557360626, "grad_norm": 8.079412460327148, "learning_rate": 4.762719222957839e-06, "loss": 0.35397704, "memory(GiB)": 34.88, "step": 79685, "train_speed(iter/s)": 0.413003 }, { "acc": 0.91793098, "epoch": 2.1576909538894755, "grad_norm": 4.390631675720215, "learning_rate": 4.7621602940208955e-06, "loss": 0.48029757, "memory(GiB)": 34.88, "step": 79690, "train_speed(iter/s)": 0.413005 }, { "acc": 0.91089859, "epoch": 2.1578263341726913, "grad_norm": 7.399205684661865, "learning_rate": 4.761601368069625e-06, "loss": 0.59772568, "memory(GiB)": 34.88, "step": 79695, "train_speed(iter/s)": 0.413006 }, { "acc": 0.93860331, "epoch": 2.1579617144559067, "grad_norm": 4.943047046661377, "learning_rate": 4.7610424451110294e-06, "loss": 0.33800957, "memory(GiB)": 34.88, "step": 79700, "train_speed(iter/s)": 0.413007 }, { "acc": 0.93752289, "epoch": 2.158097094739122, "grad_norm": 15.296243667602539, "learning_rate": 4.760483525152109e-06, "loss": 0.29226322, "memory(GiB)": 34.88, "step": 79705, "train_speed(iter/s)": 0.413009 }, { "acc": 0.91917248, "epoch": 2.158232475022338, "grad_norm": 8.287906646728516, "learning_rate": 4.759924608199867e-06, "loss": 0.38195865, "memory(GiB)": 34.88, "step": 79710, "train_speed(iter/s)": 0.41301 }, { "acc": 0.92071648, "epoch": 2.158367855305553, "grad_norm": 8.276261329650879, "learning_rate": 4.759365694261307e-06, "loss": 0.44251604, "memory(GiB)": 34.88, "step": 79715, "train_speed(iter/s)": 0.413011 }, { "acc": 0.90825481, "epoch": 2.158503235588769, "grad_norm": 10.563000679016113, "learning_rate": 4.758806783343425e-06, "loss": 0.46482458, "memory(GiB)": 34.88, "step": 79720, "train_speed(iter/s)": 0.413012 }, { "acc": 0.9098999, "epoch": 2.1586386158719844, "grad_norm": 7.714794635772705, "learning_rate": 4.758247875453228e-06, "loss": 0.4731843, "memory(GiB)": 34.88, "step": 79725, "train_speed(iter/s)": 0.413014 }, { "acc": 0.91266346, "epoch": 2.1587739961552, "grad_norm": 7.582237243652344, "learning_rate": 4.757688970597713e-06, "loss": 0.48939295, "memory(GiB)": 34.88, "step": 79730, "train_speed(iter/s)": 0.413015 }, { "acc": 0.91142015, "epoch": 2.1589093764384155, "grad_norm": 10.977925300598145, "learning_rate": 4.757130068783884e-06, "loss": 0.54565654, "memory(GiB)": 34.88, "step": 79735, "train_speed(iter/s)": 0.413016 }, { "acc": 0.93359718, "epoch": 2.159044756721631, "grad_norm": 10.181501388549805, "learning_rate": 4.75657117001874e-06, "loss": 0.35412683, "memory(GiB)": 34.88, "step": 79740, "train_speed(iter/s)": 0.413018 }, { "acc": 0.93019304, "epoch": 2.1591801370048467, "grad_norm": 6.254547595977783, "learning_rate": 4.756012274309285e-06, "loss": 0.36855628, "memory(GiB)": 34.88, "step": 79745, "train_speed(iter/s)": 0.413019 }, { "acc": 0.93386288, "epoch": 2.159315517288062, "grad_norm": 12.909419059753418, "learning_rate": 4.75545338166252e-06, "loss": 0.3250489, "memory(GiB)": 34.88, "step": 79750, "train_speed(iter/s)": 0.41302 }, { "acc": 0.89858007, "epoch": 2.159450897571278, "grad_norm": 12.313372611999512, "learning_rate": 4.754894492085445e-06, "loss": 0.58162117, "memory(GiB)": 34.88, "step": 79755, "train_speed(iter/s)": 0.413022 }, { "acc": 0.91849537, "epoch": 2.159586277854493, "grad_norm": 4.845367908477783, "learning_rate": 4.7543356055850624e-06, "loss": 0.49352541, "memory(GiB)": 34.88, "step": 79760, "train_speed(iter/s)": 0.413023 }, { "acc": 0.91695395, "epoch": 2.159721658137709, "grad_norm": 7.294454097747803, "learning_rate": 4.753776722168371e-06, "loss": 0.46792998, "memory(GiB)": 34.88, "step": 79765, "train_speed(iter/s)": 0.413024 }, { "acc": 0.92211695, "epoch": 2.1598570384209244, "grad_norm": 8.170083999633789, "learning_rate": 4.753217841842375e-06, "loss": 0.41266432, "memory(GiB)": 34.88, "step": 79770, "train_speed(iter/s)": 0.413026 }, { "acc": 0.9162138, "epoch": 2.1599924187041397, "grad_norm": 9.63770866394043, "learning_rate": 4.752658964614072e-06, "loss": 0.45475769, "memory(GiB)": 34.88, "step": 79775, "train_speed(iter/s)": 0.413027 }, { "acc": 0.91741257, "epoch": 2.1601277989873555, "grad_norm": 14.906280517578125, "learning_rate": 4.752100090490466e-06, "loss": 0.49466019, "memory(GiB)": 34.88, "step": 79780, "train_speed(iter/s)": 0.413028 }, { "acc": 0.92143736, "epoch": 2.160263179270571, "grad_norm": 15.36988639831543, "learning_rate": 4.751541219478557e-06, "loss": 0.42502708, "memory(GiB)": 34.88, "step": 79785, "train_speed(iter/s)": 0.413029 }, { "acc": 0.92596674, "epoch": 2.1603985595537867, "grad_norm": 8.41077995300293, "learning_rate": 4.7509823515853455e-06, "loss": 0.42996035, "memory(GiB)": 34.88, "step": 79790, "train_speed(iter/s)": 0.41303 }, { "acc": 0.92413073, "epoch": 2.160533939837002, "grad_norm": 7.283998489379883, "learning_rate": 4.750423486817834e-06, "loss": 0.32398725, "memory(GiB)": 34.88, "step": 79795, "train_speed(iter/s)": 0.413031 }, { "acc": 0.9181694, "epoch": 2.160669320120218, "grad_norm": 3.7148361206054688, "learning_rate": 4.749864625183021e-06, "loss": 0.48671145, "memory(GiB)": 34.88, "step": 79800, "train_speed(iter/s)": 0.413033 }, { "acc": 0.9100729, "epoch": 2.160804700403433, "grad_norm": 24.812742233276367, "learning_rate": 4.74930576668791e-06, "loss": 0.57637973, "memory(GiB)": 34.88, "step": 79805, "train_speed(iter/s)": 0.413034 }, { "acc": 0.90040798, "epoch": 2.160940080686649, "grad_norm": 9.216221809387207, "learning_rate": 4.748746911339499e-06, "loss": 0.65719686, "memory(GiB)": 34.88, "step": 79810, "train_speed(iter/s)": 0.413035 }, { "acc": 0.92331715, "epoch": 2.1610754609698644, "grad_norm": 13.654298782348633, "learning_rate": 4.748188059144791e-06, "loss": 0.42923155, "memory(GiB)": 34.88, "step": 79815, "train_speed(iter/s)": 0.413036 }, { "acc": 0.92408314, "epoch": 2.1612108412530797, "grad_norm": 26.291988372802734, "learning_rate": 4.747629210110784e-06, "loss": 0.42979584, "memory(GiB)": 34.88, "step": 79820, "train_speed(iter/s)": 0.413037 }, { "acc": 0.9246067, "epoch": 2.1613462215362955, "grad_norm": 8.130620956420898, "learning_rate": 4.747070364244481e-06, "loss": 0.46150694, "memory(GiB)": 34.88, "step": 79825, "train_speed(iter/s)": 0.413038 }, { "acc": 0.92249832, "epoch": 2.161481601819511, "grad_norm": 8.768128395080566, "learning_rate": 4.746511521552885e-06, "loss": 0.40326052, "memory(GiB)": 34.88, "step": 79830, "train_speed(iter/s)": 0.41304 }, { "acc": 0.91754513, "epoch": 2.1616169821027267, "grad_norm": 11.090459823608398, "learning_rate": 4.74595268204299e-06, "loss": 0.42355547, "memory(GiB)": 34.88, "step": 79835, "train_speed(iter/s)": 0.413041 }, { "acc": 0.93018579, "epoch": 2.161752362385942, "grad_norm": 4.604835033416748, "learning_rate": 4.745393845721804e-06, "loss": 0.34878008, "memory(GiB)": 34.88, "step": 79840, "train_speed(iter/s)": 0.413043 }, { "acc": 0.93113804, "epoch": 2.161887742669158, "grad_norm": 10.143538475036621, "learning_rate": 4.7448350125963225e-06, "loss": 0.45853672, "memory(GiB)": 34.88, "step": 79845, "train_speed(iter/s)": 0.413044 }, { "acc": 0.92675228, "epoch": 2.162023122952373, "grad_norm": 12.643975257873535, "learning_rate": 4.744276182673547e-06, "loss": 0.451019, "memory(GiB)": 34.88, "step": 79850, "train_speed(iter/s)": 0.413045 }, { "acc": 0.92444839, "epoch": 2.162158503235589, "grad_norm": 5.881933689117432, "learning_rate": 4.743717355960477e-06, "loss": 0.4143363, "memory(GiB)": 34.88, "step": 79855, "train_speed(iter/s)": 0.413046 }, { "acc": 0.90824947, "epoch": 2.1622938835188044, "grad_norm": 12.092860221862793, "learning_rate": 4.743158532464115e-06, "loss": 0.49573641, "memory(GiB)": 34.88, "step": 79860, "train_speed(iter/s)": 0.413047 }, { "acc": 0.91450777, "epoch": 2.1624292638020197, "grad_norm": 7.923347473144531, "learning_rate": 4.7425997121914616e-06, "loss": 0.41400924, "memory(GiB)": 34.88, "step": 79865, "train_speed(iter/s)": 0.413049 }, { "acc": 0.91660023, "epoch": 2.1625646440852355, "grad_norm": 6.91914176940918, "learning_rate": 4.742040895149515e-06, "loss": 0.43872409, "memory(GiB)": 34.88, "step": 79870, "train_speed(iter/s)": 0.41305 }, { "acc": 0.92336369, "epoch": 2.162700024368451, "grad_norm": 3.1703624725341797, "learning_rate": 4.741482081345278e-06, "loss": 0.41139717, "memory(GiB)": 34.88, "step": 79875, "train_speed(iter/s)": 0.413051 }, { "acc": 0.92869167, "epoch": 2.1628354046516667, "grad_norm": 5.918044090270996, "learning_rate": 4.740923270785748e-06, "loss": 0.40407228, "memory(GiB)": 34.88, "step": 79880, "train_speed(iter/s)": 0.413052 }, { "acc": 0.91876965, "epoch": 2.162970784934882, "grad_norm": 10.525945663452148, "learning_rate": 4.740364463477928e-06, "loss": 0.39354672, "memory(GiB)": 34.88, "step": 79885, "train_speed(iter/s)": 0.413053 }, { "acc": 0.90791063, "epoch": 2.163106165218098, "grad_norm": 8.095348358154297, "learning_rate": 4.739805659428816e-06, "loss": 0.49601717, "memory(GiB)": 34.88, "step": 79890, "train_speed(iter/s)": 0.413055 }, { "acc": 0.93228369, "epoch": 2.163241545501313, "grad_norm": 6.436591625213623, "learning_rate": 4.739246858645411e-06, "loss": 0.34784675, "memory(GiB)": 34.88, "step": 79895, "train_speed(iter/s)": 0.413056 }, { "acc": 0.91382141, "epoch": 2.1633769257845286, "grad_norm": 8.136282920837402, "learning_rate": 4.738688061134718e-06, "loss": 0.49884429, "memory(GiB)": 34.88, "step": 79900, "train_speed(iter/s)": 0.413057 }, { "acc": 0.9271265, "epoch": 2.1635123060677444, "grad_norm": 7.04243278503418, "learning_rate": 4.738129266903731e-06, "loss": 0.38205152, "memory(GiB)": 34.88, "step": 79905, "train_speed(iter/s)": 0.413058 }, { "acc": 0.90919399, "epoch": 2.1636476863509597, "grad_norm": 5.697340965270996, "learning_rate": 4.737570475959455e-06, "loss": 0.4828877, "memory(GiB)": 34.88, "step": 79910, "train_speed(iter/s)": 0.413059 }, { "acc": 0.92887554, "epoch": 2.1637830666341755, "grad_norm": 9.44787883758545, "learning_rate": 4.737011688308887e-06, "loss": 0.42451963, "memory(GiB)": 34.88, "step": 79915, "train_speed(iter/s)": 0.413061 }, { "acc": 0.9249239, "epoch": 2.163918446917391, "grad_norm": 6.675095558166504, "learning_rate": 4.736452903959029e-06, "loss": 0.36564653, "memory(GiB)": 34.88, "step": 79920, "train_speed(iter/s)": 0.413062 }, { "acc": 0.92095156, "epoch": 2.1640538272006067, "grad_norm": 3.7748382091522217, "learning_rate": 4.73589412291688e-06, "loss": 0.41522369, "memory(GiB)": 34.88, "step": 79925, "train_speed(iter/s)": 0.413063 }, { "acc": 0.92855892, "epoch": 2.164189207483822, "grad_norm": 6.444827556610107, "learning_rate": 4.735335345189437e-06, "loss": 0.44541535, "memory(GiB)": 34.88, "step": 79930, "train_speed(iter/s)": 0.413064 }, { "acc": 0.93122063, "epoch": 2.1643245877670374, "grad_norm": 7.926358699798584, "learning_rate": 4.734776570783706e-06, "loss": 0.39672194, "memory(GiB)": 34.88, "step": 79935, "train_speed(iter/s)": 0.413065 }, { "acc": 0.94463444, "epoch": 2.164459968050253, "grad_norm": 9.406810760498047, "learning_rate": 4.73421779970668e-06, "loss": 0.30415459, "memory(GiB)": 34.88, "step": 79940, "train_speed(iter/s)": 0.413066 }, { "acc": 0.91485138, "epoch": 2.1645953483334686, "grad_norm": 4.507162094116211, "learning_rate": 4.733659031965364e-06, "loss": 0.54778652, "memory(GiB)": 34.88, "step": 79945, "train_speed(iter/s)": 0.413068 }, { "acc": 0.92586203, "epoch": 2.1647307286166844, "grad_norm": 6.995028972625732, "learning_rate": 4.733100267566753e-06, "loss": 0.36483333, "memory(GiB)": 34.88, "step": 79950, "train_speed(iter/s)": 0.413069 }, { "acc": 0.92693386, "epoch": 2.1648661088998997, "grad_norm": 7.134260654449463, "learning_rate": 4.732541506517851e-06, "loss": 0.41896019, "memory(GiB)": 34.88, "step": 79955, "train_speed(iter/s)": 0.41307 }, { "acc": 0.91886158, "epoch": 2.1650014891831155, "grad_norm": 5.997186183929443, "learning_rate": 4.731982748825655e-06, "loss": 0.42016845, "memory(GiB)": 34.88, "step": 79960, "train_speed(iter/s)": 0.413071 }, { "acc": 0.92316856, "epoch": 2.165136869466331, "grad_norm": 11.653451919555664, "learning_rate": 4.731423994497165e-06, "loss": 0.45215836, "memory(GiB)": 34.88, "step": 79965, "train_speed(iter/s)": 0.413072 }, { "acc": 0.95014181, "epoch": 2.1652722497495462, "grad_norm": 3.601591110229492, "learning_rate": 4.730865243539382e-06, "loss": 0.23624191, "memory(GiB)": 34.88, "step": 79970, "train_speed(iter/s)": 0.413073 }, { "acc": 0.94226799, "epoch": 2.165407630032762, "grad_norm": 9.903886795043945, "learning_rate": 4.730306495959303e-06, "loss": 0.34814086, "memory(GiB)": 34.88, "step": 79975, "train_speed(iter/s)": 0.413074 }, { "acc": 0.92964306, "epoch": 2.1655430103159774, "grad_norm": 11.589351654052734, "learning_rate": 4.729747751763929e-06, "loss": 0.42076077, "memory(GiB)": 34.88, "step": 79980, "train_speed(iter/s)": 0.413075 }, { "acc": 0.93034201, "epoch": 2.165678390599193, "grad_norm": 9.499051094055176, "learning_rate": 4.729189010960258e-06, "loss": 0.33468008, "memory(GiB)": 34.88, "step": 79985, "train_speed(iter/s)": 0.413076 }, { "acc": 0.9300992, "epoch": 2.1658137708824086, "grad_norm": 6.924678325653076, "learning_rate": 4.728630273555291e-06, "loss": 0.37696614, "memory(GiB)": 34.88, "step": 79990, "train_speed(iter/s)": 0.413077 }, { "acc": 0.91231098, "epoch": 2.1659491511656244, "grad_norm": 5.679424285888672, "learning_rate": 4.728071539556026e-06, "loss": 0.43666754, "memory(GiB)": 34.88, "step": 79995, "train_speed(iter/s)": 0.413078 }, { "acc": 0.91698742, "epoch": 2.1660845314488397, "grad_norm": 4.1750264167785645, "learning_rate": 4.727512808969465e-06, "loss": 0.48995333, "memory(GiB)": 34.88, "step": 80000, "train_speed(iter/s)": 0.41308 }, { "epoch": 2.1660845314488397, "eval_acc": 0.6154971245143144, "eval_loss": 1.1167627573013306, "eval_runtime": 1301.6229, "eval_samples_per_second": 66.306, "eval_steps_per_second": 2.073, "step": 80000 }, { "acc": 0.91750546, "epoch": 2.1662199117320555, "grad_norm": 9.257186889648438, "learning_rate": 4.726954081802603e-06, "loss": 0.5111084, "memory(GiB)": 34.88, "step": 80005, "train_speed(iter/s)": 0.41027 }, { "acc": 0.92479858, "epoch": 2.166355292015271, "grad_norm": 2.720837354660034, "learning_rate": 4.726395358062441e-06, "loss": 0.40417252, "memory(GiB)": 34.88, "step": 80010, "train_speed(iter/s)": 0.410272 }, { "acc": 0.90742302, "epoch": 2.1664906722984862, "grad_norm": 6.02114725112915, "learning_rate": 4.725836637755979e-06, "loss": 0.531534, "memory(GiB)": 34.88, "step": 80015, "train_speed(iter/s)": 0.410273 }, { "acc": 0.9220932, "epoch": 2.166626052581702, "grad_norm": 16.295066833496094, "learning_rate": 4.7252779208902145e-06, "loss": 0.41221833, "memory(GiB)": 34.88, "step": 80020, "train_speed(iter/s)": 0.410275 }, { "acc": 0.89592772, "epoch": 2.1667614328649174, "grad_norm": 8.66185188293457, "learning_rate": 4.724719207472149e-06, "loss": 0.55445795, "memory(GiB)": 34.88, "step": 80025, "train_speed(iter/s)": 0.410276 }, { "acc": 0.92928391, "epoch": 2.166896813148133, "grad_norm": 7.761828899383545, "learning_rate": 4.724160497508778e-06, "loss": 0.33501847, "memory(GiB)": 34.88, "step": 80030, "train_speed(iter/s)": 0.410278 }, { "acc": 0.90828056, "epoch": 2.1670321934313486, "grad_norm": 3.898176431655884, "learning_rate": 4.723601791007103e-06, "loss": 0.47829299, "memory(GiB)": 34.88, "step": 80035, "train_speed(iter/s)": 0.410278 }, { "acc": 0.90251541, "epoch": 2.1671675737145644, "grad_norm": 7.29157829284668, "learning_rate": 4.723043087974121e-06, "loss": 0.54869995, "memory(GiB)": 34.88, "step": 80040, "train_speed(iter/s)": 0.41028 }, { "acc": 0.9308835, "epoch": 2.1673029539977797, "grad_norm": 6.752634525299072, "learning_rate": 4.722484388416832e-06, "loss": 0.44211545, "memory(GiB)": 34.88, "step": 80045, "train_speed(iter/s)": 0.410281 }, { "acc": 0.90697308, "epoch": 2.1674383342809955, "grad_norm": 8.776893615722656, "learning_rate": 4.721925692342239e-06, "loss": 0.55275197, "memory(GiB)": 34.88, "step": 80050, "train_speed(iter/s)": 0.410283 }, { "acc": 0.93764095, "epoch": 2.167573714564211, "grad_norm": 4.274861812591553, "learning_rate": 4.721366999757333e-06, "loss": 0.31104903, "memory(GiB)": 34.88, "step": 80055, "train_speed(iter/s)": 0.410284 }, { "acc": 0.92185106, "epoch": 2.1677090948474262, "grad_norm": 9.047001838684082, "learning_rate": 4.720808310669117e-06, "loss": 0.46010036, "memory(GiB)": 34.88, "step": 80060, "train_speed(iter/s)": 0.410286 }, { "acc": 0.9107317, "epoch": 2.167844475130642, "grad_norm": 5.953428745269775, "learning_rate": 4.720249625084588e-06, "loss": 0.56476216, "memory(GiB)": 34.88, "step": 80065, "train_speed(iter/s)": 0.410287 }, { "acc": 0.92761288, "epoch": 2.1679798554138574, "grad_norm": 7.1844258308410645, "learning_rate": 4.719690943010748e-06, "loss": 0.404882, "memory(GiB)": 34.88, "step": 80070, "train_speed(iter/s)": 0.410289 }, { "acc": 0.91303072, "epoch": 2.168115235697073, "grad_norm": 12.016141891479492, "learning_rate": 4.71913226445459e-06, "loss": 0.49982634, "memory(GiB)": 34.88, "step": 80075, "train_speed(iter/s)": 0.41029 }, { "acc": 0.92739754, "epoch": 2.1682506159802886, "grad_norm": 9.138367652893066, "learning_rate": 4.718573589423117e-06, "loss": 0.40791645, "memory(GiB)": 34.88, "step": 80080, "train_speed(iter/s)": 0.410292 }, { "acc": 0.91658192, "epoch": 2.1683859962635044, "grad_norm": 10.1088228225708, "learning_rate": 4.718014917923328e-06, "loss": 0.42586412, "memory(GiB)": 34.88, "step": 80085, "train_speed(iter/s)": 0.410293 }, { "acc": 0.92340155, "epoch": 2.1685213765467197, "grad_norm": 15.954806327819824, "learning_rate": 4.7174562499622186e-06, "loss": 0.45997505, "memory(GiB)": 34.88, "step": 80090, "train_speed(iter/s)": 0.410295 }, { "acc": 0.93093948, "epoch": 2.168656756829935, "grad_norm": 10.642931938171387, "learning_rate": 4.716897585546789e-06, "loss": 0.37740757, "memory(GiB)": 34.88, "step": 80095, "train_speed(iter/s)": 0.410296 }, { "acc": 0.916047, "epoch": 2.168792137113151, "grad_norm": 2.457496166229248, "learning_rate": 4.716338924684036e-06, "loss": 0.44285188, "memory(GiB)": 34.88, "step": 80100, "train_speed(iter/s)": 0.410297 }, { "acc": 0.94192324, "epoch": 2.1689275173963662, "grad_norm": 4.130098819732666, "learning_rate": 4.715780267380959e-06, "loss": 0.33893626, "memory(GiB)": 34.88, "step": 80105, "train_speed(iter/s)": 0.410299 }, { "acc": 0.94555531, "epoch": 2.169062897679582, "grad_norm": 20.088090896606445, "learning_rate": 4.715221613644556e-06, "loss": 0.31596513, "memory(GiB)": 34.88, "step": 80110, "train_speed(iter/s)": 0.4103 }, { "acc": 0.91827202, "epoch": 2.1691982779627974, "grad_norm": 6.453779697418213, "learning_rate": 4.714662963481826e-06, "loss": 0.47106767, "memory(GiB)": 34.88, "step": 80115, "train_speed(iter/s)": 0.410302 }, { "acc": 0.91249123, "epoch": 2.169333658246013, "grad_norm": 14.257079124450684, "learning_rate": 4.714104316899766e-06, "loss": 0.44279337, "memory(GiB)": 34.88, "step": 80120, "train_speed(iter/s)": 0.410303 }, { "acc": 0.93002129, "epoch": 2.1694690385292286, "grad_norm": 7.273792743682861, "learning_rate": 4.713545673905374e-06, "loss": 0.36475728, "memory(GiB)": 34.88, "step": 80125, "train_speed(iter/s)": 0.410305 }, { "acc": 0.93007526, "epoch": 2.169604418812444, "grad_norm": 6.842994213104248, "learning_rate": 4.712987034505651e-06, "loss": 0.35749214, "memory(GiB)": 34.88, "step": 80130, "train_speed(iter/s)": 0.410306 }, { "acc": 0.92085438, "epoch": 2.1697397990956597, "grad_norm": 12.222311019897461, "learning_rate": 4.712428398707591e-06, "loss": 0.46673799, "memory(GiB)": 34.88, "step": 80135, "train_speed(iter/s)": 0.410307 }, { "acc": 0.91867523, "epoch": 2.169875179378875, "grad_norm": 3.8004307746887207, "learning_rate": 4.711869766518195e-06, "loss": 0.49115334, "memory(GiB)": 34.88, "step": 80140, "train_speed(iter/s)": 0.410309 }, { "acc": 0.92341137, "epoch": 2.170010559662091, "grad_norm": 6.659850597381592, "learning_rate": 4.711311137944458e-06, "loss": 0.31650939, "memory(GiB)": 34.88, "step": 80145, "train_speed(iter/s)": 0.41031 }, { "acc": 0.92916355, "epoch": 2.1701459399453062, "grad_norm": 16.817018508911133, "learning_rate": 4.710752512993382e-06, "loss": 0.35825701, "memory(GiB)": 34.88, "step": 80150, "train_speed(iter/s)": 0.410312 }, { "acc": 0.93350725, "epoch": 2.170281320228522, "grad_norm": 13.199238777160645, "learning_rate": 4.710193891671961e-06, "loss": 0.31861165, "memory(GiB)": 34.88, "step": 80155, "train_speed(iter/s)": 0.410313 }, { "acc": 0.91455574, "epoch": 2.1704167005117374, "grad_norm": 2.818014621734619, "learning_rate": 4.709635273987194e-06, "loss": 0.39500985, "memory(GiB)": 34.88, "step": 80160, "train_speed(iter/s)": 0.410315 }, { "acc": 0.91514053, "epoch": 2.170552080794953, "grad_norm": 10.362772941589355, "learning_rate": 4.70907665994608e-06, "loss": 0.46850042, "memory(GiB)": 34.88, "step": 80165, "train_speed(iter/s)": 0.410316 }, { "acc": 0.92542458, "epoch": 2.1706874610781686, "grad_norm": 30.348169326782227, "learning_rate": 4.708518049555617e-06, "loss": 0.41413918, "memory(GiB)": 34.88, "step": 80170, "train_speed(iter/s)": 0.410318 }, { "acc": 0.92264614, "epoch": 2.170822841361384, "grad_norm": 11.904391288757324, "learning_rate": 4.707959442822802e-06, "loss": 0.48934245, "memory(GiB)": 34.88, "step": 80175, "train_speed(iter/s)": 0.410319 }, { "acc": 0.9208004, "epoch": 2.1709582216445997, "grad_norm": 11.37419319152832, "learning_rate": 4.707400839754631e-06, "loss": 0.46048594, "memory(GiB)": 34.88, "step": 80180, "train_speed(iter/s)": 0.410321 }, { "acc": 0.93408937, "epoch": 2.171093601927815, "grad_norm": 6.414155960083008, "learning_rate": 4.706842240358103e-06, "loss": 0.38316021, "memory(GiB)": 34.88, "step": 80185, "train_speed(iter/s)": 0.410322 }, { "acc": 0.92227211, "epoch": 2.171228982211031, "grad_norm": 2.9438061714172363, "learning_rate": 4.706283644640215e-06, "loss": 0.45895543, "memory(GiB)": 34.88, "step": 80190, "train_speed(iter/s)": 0.410324 }, { "acc": 0.91569262, "epoch": 2.1713643624942462, "grad_norm": 15.427459716796875, "learning_rate": 4.705725052607966e-06, "loss": 0.46980476, "memory(GiB)": 34.88, "step": 80195, "train_speed(iter/s)": 0.410325 }, { "acc": 0.92527304, "epoch": 2.171499742777462, "grad_norm": 4.429372310638428, "learning_rate": 4.705166464268352e-06, "loss": 0.40234051, "memory(GiB)": 34.88, "step": 80200, "train_speed(iter/s)": 0.410327 }, { "acc": 0.9144681, "epoch": 2.1716351230606774, "grad_norm": 3.1614253520965576, "learning_rate": 4.704607879628371e-06, "loss": 0.5146852, "memory(GiB)": 34.88, "step": 80205, "train_speed(iter/s)": 0.410328 }, { "acc": 0.91351395, "epoch": 2.171770503343893, "grad_norm": 10.722900390625, "learning_rate": 4.704049298695019e-06, "loss": 0.50823622, "memory(GiB)": 34.88, "step": 80210, "train_speed(iter/s)": 0.410329 }, { "acc": 0.90841198, "epoch": 2.1719058836271086, "grad_norm": 10.740321159362793, "learning_rate": 4.703490721475297e-06, "loss": 0.50830784, "memory(GiB)": 34.88, "step": 80215, "train_speed(iter/s)": 0.410331 }, { "acc": 0.93213177, "epoch": 2.172041263910324, "grad_norm": 9.246381759643555, "learning_rate": 4.7029321479762e-06, "loss": 0.31246319, "memory(GiB)": 34.88, "step": 80220, "train_speed(iter/s)": 0.410332 }, { "acc": 0.91661606, "epoch": 2.1721766441935397, "grad_norm": 29.905439376831055, "learning_rate": 4.7023735782047225e-06, "loss": 0.42890406, "memory(GiB)": 34.88, "step": 80225, "train_speed(iter/s)": 0.410334 }, { "acc": 0.91523743, "epoch": 2.172312024476755, "grad_norm": 11.670465469360352, "learning_rate": 4.701815012167864e-06, "loss": 0.44162827, "memory(GiB)": 34.88, "step": 80230, "train_speed(iter/s)": 0.410335 }, { "acc": 0.91919498, "epoch": 2.172447404759971, "grad_norm": 6.632110595703125, "learning_rate": 4.7012564498726244e-06, "loss": 0.40635772, "memory(GiB)": 34.88, "step": 80235, "train_speed(iter/s)": 0.410336 }, { "acc": 0.90940533, "epoch": 2.1725827850431862, "grad_norm": 7.6676459312438965, "learning_rate": 4.7006978913259965e-06, "loss": 0.47233863, "memory(GiB)": 34.88, "step": 80240, "train_speed(iter/s)": 0.410338 }, { "acc": 0.91579132, "epoch": 2.172718165326402, "grad_norm": 7.036863327026367, "learning_rate": 4.70013933653498e-06, "loss": 0.55318208, "memory(GiB)": 34.88, "step": 80245, "train_speed(iter/s)": 0.410339 }, { "acc": 0.91389713, "epoch": 2.1728535456096174, "grad_norm": 7.481264591217041, "learning_rate": 4.69958078550657e-06, "loss": 0.49867415, "memory(GiB)": 34.88, "step": 80250, "train_speed(iter/s)": 0.410341 }, { "acc": 0.92511702, "epoch": 2.1729889258928328, "grad_norm": 6.958128929138184, "learning_rate": 4.699022238247765e-06, "loss": 0.35047946, "memory(GiB)": 34.88, "step": 80255, "train_speed(iter/s)": 0.410342 }, { "acc": 0.91617661, "epoch": 2.1731243061760486, "grad_norm": 13.03368091583252, "learning_rate": 4.698463694765563e-06, "loss": 0.47844596, "memory(GiB)": 34.88, "step": 80260, "train_speed(iter/s)": 0.410344 }, { "acc": 0.92311611, "epoch": 2.173259686459264, "grad_norm": 7.68010950088501, "learning_rate": 4.697905155066958e-06, "loss": 0.42491055, "memory(GiB)": 34.88, "step": 80265, "train_speed(iter/s)": 0.410345 }, { "acc": 0.9159193, "epoch": 2.1733950667424797, "grad_norm": 8.349374771118164, "learning_rate": 4.697346619158948e-06, "loss": 0.40154533, "memory(GiB)": 34.88, "step": 80270, "train_speed(iter/s)": 0.410347 }, { "acc": 0.90990601, "epoch": 2.173530447025695, "grad_norm": 8.284561157226562, "learning_rate": 4.696788087048528e-06, "loss": 0.47288175, "memory(GiB)": 34.88, "step": 80275, "train_speed(iter/s)": 0.410348 }, { "acc": 0.93915987, "epoch": 2.173665827308911, "grad_norm": 6.833616733551025, "learning_rate": 4.6962295587426996e-06, "loss": 0.33962259, "memory(GiB)": 34.88, "step": 80280, "train_speed(iter/s)": 0.410349 }, { "acc": 0.92398291, "epoch": 2.1738012075921263, "grad_norm": 8.152656555175781, "learning_rate": 4.695671034248454e-06, "loss": 0.49148993, "memory(GiB)": 34.88, "step": 80285, "train_speed(iter/s)": 0.410351 }, { "acc": 0.92249622, "epoch": 2.1739365878753416, "grad_norm": 6.50863790512085, "learning_rate": 4.695112513572792e-06, "loss": 0.46847429, "memory(GiB)": 34.88, "step": 80290, "train_speed(iter/s)": 0.410352 }, { "acc": 0.93145428, "epoch": 2.1740719681585574, "grad_norm": 5.759753227233887, "learning_rate": 4.694553996722707e-06, "loss": 0.3457026, "memory(GiB)": 34.88, "step": 80295, "train_speed(iter/s)": 0.410353 }, { "acc": 0.92814236, "epoch": 2.1742073484417728, "grad_norm": 8.284269332885742, "learning_rate": 4.693995483705199e-06, "loss": 0.43073435, "memory(GiB)": 34.88, "step": 80300, "train_speed(iter/s)": 0.410355 }, { "acc": 0.91257315, "epoch": 2.1743427287249886, "grad_norm": 7.832427978515625, "learning_rate": 4.69343697452726e-06, "loss": 0.51046491, "memory(GiB)": 34.88, "step": 80305, "train_speed(iter/s)": 0.410356 }, { "acc": 0.92688828, "epoch": 2.174478109008204, "grad_norm": 7.45196533203125, "learning_rate": 4.692878469195888e-06, "loss": 0.37352116, "memory(GiB)": 34.88, "step": 80310, "train_speed(iter/s)": 0.410358 }, { "acc": 0.92192631, "epoch": 2.1746134892914197, "grad_norm": 6.180957794189453, "learning_rate": 4.692319967718082e-06, "loss": 0.39116774, "memory(GiB)": 34.88, "step": 80315, "train_speed(iter/s)": 0.410359 }, { "acc": 0.93960142, "epoch": 2.174748869574635, "grad_norm": 6.772156238555908, "learning_rate": 4.691761470100836e-06, "loss": 0.32708349, "memory(GiB)": 34.88, "step": 80320, "train_speed(iter/s)": 0.410361 }, { "acc": 0.92066488, "epoch": 2.174884249857851, "grad_norm": 17.561697006225586, "learning_rate": 4.6912029763511475e-06, "loss": 0.4907577, "memory(GiB)": 34.88, "step": 80325, "train_speed(iter/s)": 0.410362 }, { "acc": 0.92309418, "epoch": 2.1750196301410663, "grad_norm": 5.714501857757568, "learning_rate": 4.690644486476011e-06, "loss": 0.43294926, "memory(GiB)": 34.88, "step": 80330, "train_speed(iter/s)": 0.410363 }, { "acc": 0.89466267, "epoch": 2.1751550104242816, "grad_norm": 8.672281265258789, "learning_rate": 4.690086000482424e-06, "loss": 0.57264204, "memory(GiB)": 34.88, "step": 80335, "train_speed(iter/s)": 0.410364 }, { "acc": 0.92410116, "epoch": 2.1752903907074974, "grad_norm": 8.335766792297363, "learning_rate": 4.689527518377383e-06, "loss": 0.49025893, "memory(GiB)": 34.88, "step": 80340, "train_speed(iter/s)": 0.410366 }, { "acc": 0.92589436, "epoch": 2.1754257709907128, "grad_norm": 10.85690689086914, "learning_rate": 4.688969040167883e-06, "loss": 0.39265656, "memory(GiB)": 34.88, "step": 80345, "train_speed(iter/s)": 0.410367 }, { "acc": 0.93175325, "epoch": 2.1755611512739286, "grad_norm": 7.479310035705566, "learning_rate": 4.68841056586092e-06, "loss": 0.41479855, "memory(GiB)": 34.88, "step": 80350, "train_speed(iter/s)": 0.410369 }, { "acc": 0.89545326, "epoch": 2.175696531557144, "grad_norm": 10.785990715026855, "learning_rate": 4.68785209546349e-06, "loss": 0.59862003, "memory(GiB)": 34.88, "step": 80355, "train_speed(iter/s)": 0.41037 }, { "acc": 0.93520765, "epoch": 2.1758319118403597, "grad_norm": 5.560571670532227, "learning_rate": 4.68729362898259e-06, "loss": 0.29186108, "memory(GiB)": 34.88, "step": 80360, "train_speed(iter/s)": 0.410372 }, { "acc": 0.92968483, "epoch": 2.175967292123575, "grad_norm": 10.785138130187988, "learning_rate": 4.686735166425215e-06, "loss": 0.33013592, "memory(GiB)": 34.88, "step": 80365, "train_speed(iter/s)": 0.410373 }, { "acc": 0.89569931, "epoch": 2.176102672406791, "grad_norm": 8.599560737609863, "learning_rate": 4.686176707798362e-06, "loss": 0.46714411, "memory(GiB)": 34.88, "step": 80370, "train_speed(iter/s)": 0.410375 }, { "acc": 0.9250802, "epoch": 2.1762380526900063, "grad_norm": 4.481630325317383, "learning_rate": 4.685618253109024e-06, "loss": 0.46887608, "memory(GiB)": 34.88, "step": 80375, "train_speed(iter/s)": 0.410376 }, { "acc": 0.92795296, "epoch": 2.1763734329732216, "grad_norm": 7.752699851989746, "learning_rate": 4.6850598023642e-06, "loss": 0.35352602, "memory(GiB)": 34.88, "step": 80380, "train_speed(iter/s)": 0.410377 }, { "acc": 0.90253077, "epoch": 2.1765088132564374, "grad_norm": 5.691132545471191, "learning_rate": 4.684501355570885e-06, "loss": 0.5260488, "memory(GiB)": 34.88, "step": 80385, "train_speed(iter/s)": 0.410379 }, { "acc": 0.92728682, "epoch": 2.1766441935396528, "grad_norm": 13.009315490722656, "learning_rate": 4.6839429127360725e-06, "loss": 0.35160177, "memory(GiB)": 34.88, "step": 80390, "train_speed(iter/s)": 0.41038 }, { "acc": 0.92969055, "epoch": 2.1767795738228686, "grad_norm": 7.147658824920654, "learning_rate": 4.68338447386676e-06, "loss": 0.41143322, "memory(GiB)": 34.88, "step": 80395, "train_speed(iter/s)": 0.410381 }, { "acc": 0.91584845, "epoch": 2.176914954106084, "grad_norm": 6.217944622039795, "learning_rate": 4.682826038969942e-06, "loss": 0.35962338, "memory(GiB)": 34.88, "step": 80400, "train_speed(iter/s)": 0.410383 }, { "acc": 0.92355461, "epoch": 2.1770503343892997, "grad_norm": 8.014399528503418, "learning_rate": 4.6822676080526154e-06, "loss": 0.45499468, "memory(GiB)": 34.88, "step": 80405, "train_speed(iter/s)": 0.410384 }, { "acc": 0.92640953, "epoch": 2.177185714672515, "grad_norm": 12.721205711364746, "learning_rate": 4.681709181121774e-06, "loss": 0.41260748, "memory(GiB)": 34.88, "step": 80410, "train_speed(iter/s)": 0.410385 }, { "acc": 0.91883183, "epoch": 2.1773210949557305, "grad_norm": 8.169939994812012, "learning_rate": 4.6811507581844165e-06, "loss": 0.46941266, "memory(GiB)": 34.88, "step": 80415, "train_speed(iter/s)": 0.410387 }, { "acc": 0.92457924, "epoch": 2.1774564752389463, "grad_norm": 7.896052837371826, "learning_rate": 4.680592339247533e-06, "loss": 0.42418756, "memory(GiB)": 34.88, "step": 80420, "train_speed(iter/s)": 0.410388 }, { "acc": 0.90102711, "epoch": 2.1775918555221616, "grad_norm": 15.158565521240234, "learning_rate": 4.680033924318123e-06, "loss": 0.57398663, "memory(GiB)": 34.88, "step": 80425, "train_speed(iter/s)": 0.41039 }, { "acc": 0.91198082, "epoch": 2.1777272358053774, "grad_norm": 9.603266716003418, "learning_rate": 4.67947551340318e-06, "loss": 0.45157456, "memory(GiB)": 34.88, "step": 80430, "train_speed(iter/s)": 0.410391 }, { "acc": 0.9349308, "epoch": 2.1778626160885928, "grad_norm": 10.509710311889648, "learning_rate": 4.678917106509698e-06, "loss": 0.35091047, "memory(GiB)": 34.88, "step": 80435, "train_speed(iter/s)": 0.410392 }, { "acc": 0.91977005, "epoch": 2.1779979963718086, "grad_norm": 11.670570373535156, "learning_rate": 4.678358703644676e-06, "loss": 0.44734602, "memory(GiB)": 34.88, "step": 80440, "train_speed(iter/s)": 0.410394 }, { "acc": 0.91590271, "epoch": 2.178133376655024, "grad_norm": 7.439319133758545, "learning_rate": 4.6778003048151055e-06, "loss": 0.50445218, "memory(GiB)": 34.88, "step": 80445, "train_speed(iter/s)": 0.410395 }, { "acc": 0.90465527, "epoch": 2.1782687569382393, "grad_norm": 19.57333755493164, "learning_rate": 4.677241910027983e-06, "loss": 0.52078562, "memory(GiB)": 34.88, "step": 80450, "train_speed(iter/s)": 0.410397 }, { "acc": 0.9065464, "epoch": 2.178404137221455, "grad_norm": 9.494301795959473, "learning_rate": 4.676683519290303e-06, "loss": 0.4883678, "memory(GiB)": 34.88, "step": 80455, "train_speed(iter/s)": 0.410398 }, { "acc": 0.91193037, "epoch": 2.1785395175046705, "grad_norm": 7.943759441375732, "learning_rate": 4.676125132609061e-06, "loss": 0.55580149, "memory(GiB)": 34.88, "step": 80460, "train_speed(iter/s)": 0.4104 }, { "acc": 0.90657463, "epoch": 2.1786748977878863, "grad_norm": 11.82972240447998, "learning_rate": 4.675566749991252e-06, "loss": 0.51545153, "memory(GiB)": 34.88, "step": 80465, "train_speed(iter/s)": 0.410401 }, { "acc": 0.91175137, "epoch": 2.1788102780711016, "grad_norm": 8.640837669372559, "learning_rate": 4.675008371443871e-06, "loss": 0.46628919, "memory(GiB)": 34.88, "step": 80470, "train_speed(iter/s)": 0.410403 }, { "acc": 0.93612423, "epoch": 2.1789456583543174, "grad_norm": 19.762617111206055, "learning_rate": 4.674449996973911e-06, "loss": 0.32801206, "memory(GiB)": 34.88, "step": 80475, "train_speed(iter/s)": 0.410404 }, { "acc": 0.9200223, "epoch": 2.1790810386375328, "grad_norm": 12.419808387756348, "learning_rate": 4.673891626588368e-06, "loss": 0.44773536, "memory(GiB)": 34.88, "step": 80480, "train_speed(iter/s)": 0.410406 }, { "acc": 0.91628666, "epoch": 2.1792164189207486, "grad_norm": 3.9289724826812744, "learning_rate": 4.673333260294238e-06, "loss": 0.41115236, "memory(GiB)": 34.88, "step": 80485, "train_speed(iter/s)": 0.410407 }, { "acc": 0.9062521, "epoch": 2.179351799203964, "grad_norm": 7.711493015289307, "learning_rate": 4.672774898098512e-06, "loss": 0.55718861, "memory(GiB)": 34.88, "step": 80490, "train_speed(iter/s)": 0.410408 }, { "acc": 0.90646839, "epoch": 2.1794871794871793, "grad_norm": 7.852197170257568, "learning_rate": 4.672216540008188e-06, "loss": 0.58739519, "memory(GiB)": 34.88, "step": 80495, "train_speed(iter/s)": 0.410409 }, { "acc": 0.93085814, "epoch": 2.179622559770395, "grad_norm": 7.622407913208008, "learning_rate": 4.67165818603026e-06, "loss": 0.37281044, "memory(GiB)": 34.88, "step": 80500, "train_speed(iter/s)": 0.41041 }, { "acc": 0.9317873, "epoch": 2.1797579400536105, "grad_norm": 8.565619468688965, "learning_rate": 4.6710998361717206e-06, "loss": 0.3254261, "memory(GiB)": 34.88, "step": 80505, "train_speed(iter/s)": 0.410412 }, { "acc": 0.89934072, "epoch": 2.1798933203368263, "grad_norm": 14.537199974060059, "learning_rate": 4.670541490439568e-06, "loss": 0.59113374, "memory(GiB)": 34.88, "step": 80510, "train_speed(iter/s)": 0.410413 }, { "acc": 0.91648436, "epoch": 2.1800287006200416, "grad_norm": 7.107328414916992, "learning_rate": 4.6699831488407925e-06, "loss": 0.46049309, "memory(GiB)": 34.88, "step": 80515, "train_speed(iter/s)": 0.410415 }, { "acc": 0.92519846, "epoch": 2.1801640809032574, "grad_norm": 12.442619323730469, "learning_rate": 4.669424811382389e-06, "loss": 0.4483449, "memory(GiB)": 34.88, "step": 80520, "train_speed(iter/s)": 0.410416 }, { "acc": 0.91925621, "epoch": 2.1802994611864728, "grad_norm": 7.0512495040893555, "learning_rate": 4.668866478071354e-06, "loss": 0.46913567, "memory(GiB)": 34.88, "step": 80525, "train_speed(iter/s)": 0.410418 }, { "acc": 0.91221972, "epoch": 2.1804348414696886, "grad_norm": 26.927326202392578, "learning_rate": 4.668308148914679e-06, "loss": 0.53365812, "memory(GiB)": 34.88, "step": 80530, "train_speed(iter/s)": 0.410419 }, { "acc": 0.93170109, "epoch": 2.180570221752904, "grad_norm": 9.495412826538086, "learning_rate": 4.667749823919362e-06, "loss": 0.36881037, "memory(GiB)": 34.88, "step": 80535, "train_speed(iter/s)": 0.41042 }, { "acc": 0.9293642, "epoch": 2.1807056020361193, "grad_norm": 6.339000701904297, "learning_rate": 4.667191503092393e-06, "loss": 0.36409976, "memory(GiB)": 34.88, "step": 80540, "train_speed(iter/s)": 0.410422 }, { "acc": 0.91334839, "epoch": 2.180840982319335, "grad_norm": 7.026427268981934, "learning_rate": 4.66663318644077e-06, "loss": 0.50340042, "memory(GiB)": 34.88, "step": 80545, "train_speed(iter/s)": 0.410423 }, { "acc": 0.91701345, "epoch": 2.1809763626025505, "grad_norm": 6.422005653381348, "learning_rate": 4.666074873971484e-06, "loss": 0.41842799, "memory(GiB)": 34.88, "step": 80550, "train_speed(iter/s)": 0.410425 }, { "acc": 0.91868238, "epoch": 2.1811117428857663, "grad_norm": 10.5442533493042, "learning_rate": 4.66551656569153e-06, "loss": 0.54118977, "memory(GiB)": 34.88, "step": 80555, "train_speed(iter/s)": 0.410426 }, { "acc": 0.90914383, "epoch": 2.1812471231689816, "grad_norm": 6.126565456390381, "learning_rate": 4.664958261607901e-06, "loss": 0.59848895, "memory(GiB)": 34.88, "step": 80560, "train_speed(iter/s)": 0.410428 }, { "acc": 0.92568073, "epoch": 2.1813825034521974, "grad_norm": 11.028103828430176, "learning_rate": 4.664399961727591e-06, "loss": 0.48492756, "memory(GiB)": 34.88, "step": 80565, "train_speed(iter/s)": 0.410429 }, { "acc": 0.92939501, "epoch": 2.1815178837354128, "grad_norm": 7.373507022857666, "learning_rate": 4.663841666057595e-06, "loss": 0.39213834, "memory(GiB)": 34.88, "step": 80570, "train_speed(iter/s)": 0.41043 }, { "acc": 0.9131218, "epoch": 2.181653264018628, "grad_norm": 4.136754512786865, "learning_rate": 4.663283374604906e-06, "loss": 0.46546001, "memory(GiB)": 34.88, "step": 80575, "train_speed(iter/s)": 0.410431 }, { "acc": 0.92906837, "epoch": 2.181788644301844, "grad_norm": 5.430534839630127, "learning_rate": 4.662725087376518e-06, "loss": 0.39895558, "memory(GiB)": 34.88, "step": 80580, "train_speed(iter/s)": 0.410433 }, { "acc": 0.91332483, "epoch": 2.1819240245850593, "grad_norm": 10.055266380310059, "learning_rate": 4.662166804379425e-06, "loss": 0.49577966, "memory(GiB)": 34.88, "step": 80585, "train_speed(iter/s)": 0.410434 }, { "acc": 0.92817745, "epoch": 2.182059404868275, "grad_norm": 9.0669527053833, "learning_rate": 4.66160852562062e-06, "loss": 0.37833931, "memory(GiB)": 34.88, "step": 80590, "train_speed(iter/s)": 0.410436 }, { "acc": 0.92805157, "epoch": 2.1821947851514905, "grad_norm": 6.306207656860352, "learning_rate": 4.661050251107097e-06, "loss": 0.32023187, "memory(GiB)": 34.88, "step": 80595, "train_speed(iter/s)": 0.410437 }, { "acc": 0.92238998, "epoch": 2.1823301654347063, "grad_norm": 13.570318222045898, "learning_rate": 4.6604919808458495e-06, "loss": 0.47060485, "memory(GiB)": 34.88, "step": 80600, "train_speed(iter/s)": 0.410439 }, { "acc": 0.91586685, "epoch": 2.1824655457179216, "grad_norm": 4.312826156616211, "learning_rate": 4.659933714843869e-06, "loss": 0.45144286, "memory(GiB)": 34.88, "step": 80605, "train_speed(iter/s)": 0.41044 }, { "acc": 0.93691854, "epoch": 2.182600926001137, "grad_norm": 14.410955429077148, "learning_rate": 4.6593754531081505e-06, "loss": 0.35951135, "memory(GiB)": 34.88, "step": 80610, "train_speed(iter/s)": 0.410442 }, { "acc": 0.9247509, "epoch": 2.182736306284353, "grad_norm": 5.844623565673828, "learning_rate": 4.658817195645688e-06, "loss": 0.42434173, "memory(GiB)": 34.88, "step": 80615, "train_speed(iter/s)": 0.410443 }, { "acc": 0.91183376, "epoch": 2.182871686567568, "grad_norm": 10.521252632141113, "learning_rate": 4.658258942463473e-06, "loss": 0.50849476, "memory(GiB)": 34.88, "step": 80620, "train_speed(iter/s)": 0.410445 }, { "acc": 0.90592327, "epoch": 2.183007066850784, "grad_norm": 5.399693965911865, "learning_rate": 4.657700693568502e-06, "loss": 0.55350943, "memory(GiB)": 34.88, "step": 80625, "train_speed(iter/s)": 0.410446 }, { "acc": 0.90386581, "epoch": 2.1831424471339993, "grad_norm": 10.240729331970215, "learning_rate": 4.657142448967764e-06, "loss": 0.55522389, "memory(GiB)": 34.88, "step": 80630, "train_speed(iter/s)": 0.410447 }, { "acc": 0.91711931, "epoch": 2.183277827417215, "grad_norm": 23.18355941772461, "learning_rate": 4.6565842086682555e-06, "loss": 0.51765251, "memory(GiB)": 34.88, "step": 80635, "train_speed(iter/s)": 0.410449 }, { "acc": 0.92038136, "epoch": 2.1834132077004305, "grad_norm": 7.790699481964111, "learning_rate": 4.656025972676967e-06, "loss": 0.37977114, "memory(GiB)": 34.88, "step": 80640, "train_speed(iter/s)": 0.41045 }, { "acc": 0.91578598, "epoch": 2.1835485879836463, "grad_norm": 5.547852993011475, "learning_rate": 4.655467741000892e-06, "loss": 0.4094368, "memory(GiB)": 34.88, "step": 80645, "train_speed(iter/s)": 0.410452 }, { "acc": 0.92413492, "epoch": 2.1836839682668616, "grad_norm": 9.027459144592285, "learning_rate": 4.654909513647025e-06, "loss": 0.44719553, "memory(GiB)": 34.88, "step": 80650, "train_speed(iter/s)": 0.410453 }, { "acc": 0.92919598, "epoch": 2.183819348550077, "grad_norm": 13.944836616516113, "learning_rate": 4.6543512906223574e-06, "loss": 0.44982686, "memory(GiB)": 34.88, "step": 80655, "train_speed(iter/s)": 0.410454 }, { "acc": 0.89572945, "epoch": 2.183954728833293, "grad_norm": 8.080153465270996, "learning_rate": 4.653793071933883e-06, "loss": 0.71256733, "memory(GiB)": 34.88, "step": 80660, "train_speed(iter/s)": 0.410455 }, { "acc": 0.88994322, "epoch": 2.184090109116508, "grad_norm": 6.082553863525391, "learning_rate": 4.653234857588594e-06, "loss": 0.69325056, "memory(GiB)": 34.88, "step": 80665, "train_speed(iter/s)": 0.410457 }, { "acc": 0.92123985, "epoch": 2.184225489399724, "grad_norm": 6.93927526473999, "learning_rate": 4.652676647593483e-06, "loss": 0.51134501, "memory(GiB)": 34.88, "step": 80670, "train_speed(iter/s)": 0.410458 }, { "acc": 0.9236784, "epoch": 2.1843608696829393, "grad_norm": 10.273932456970215, "learning_rate": 4.652118441955544e-06, "loss": 0.48398952, "memory(GiB)": 34.88, "step": 80675, "train_speed(iter/s)": 0.41046 }, { "acc": 0.91373253, "epoch": 2.184496249966155, "grad_norm": 20.78706169128418, "learning_rate": 4.651560240681767e-06, "loss": 0.42071195, "memory(GiB)": 34.88, "step": 80680, "train_speed(iter/s)": 0.410461 }, { "acc": 0.90628624, "epoch": 2.1846316302493705, "grad_norm": 5.943722248077393, "learning_rate": 4.651002043779147e-06, "loss": 0.56396565, "memory(GiB)": 34.88, "step": 80685, "train_speed(iter/s)": 0.410463 }, { "acc": 0.91421261, "epoch": 2.1847670105325863, "grad_norm": 7.246055603027344, "learning_rate": 4.650443851254676e-06, "loss": 0.53182936, "memory(GiB)": 34.88, "step": 80690, "train_speed(iter/s)": 0.410464 }, { "acc": 0.91129551, "epoch": 2.1849023908158016, "grad_norm": 7.423525333404541, "learning_rate": 4.649885663115346e-06, "loss": 0.50766582, "memory(GiB)": 34.88, "step": 80695, "train_speed(iter/s)": 0.410465 }, { "acc": 0.92573853, "epoch": 2.185037771099017, "grad_norm": 8.670125007629395, "learning_rate": 4.649327479368148e-06, "loss": 0.39115901, "memory(GiB)": 34.88, "step": 80700, "train_speed(iter/s)": 0.410467 }, { "acc": 0.9118845, "epoch": 2.185173151382233, "grad_norm": 7.0561933517456055, "learning_rate": 4.648769300020078e-06, "loss": 0.51206598, "memory(GiB)": 34.88, "step": 80705, "train_speed(iter/s)": 0.410468 }, { "acc": 0.91185036, "epoch": 2.185308531665448, "grad_norm": 13.337985038757324, "learning_rate": 4.6482111250781255e-06, "loss": 0.53007226, "memory(GiB)": 34.88, "step": 80710, "train_speed(iter/s)": 0.41047 }, { "acc": 0.93488331, "epoch": 2.185443911948664, "grad_norm": 4.713595390319824, "learning_rate": 4.647652954549283e-06, "loss": 0.386691, "memory(GiB)": 34.88, "step": 80715, "train_speed(iter/s)": 0.410471 }, { "acc": 0.91176338, "epoch": 2.1855792922318793, "grad_norm": 9.070640563964844, "learning_rate": 4.6470947884405445e-06, "loss": 0.49212303, "memory(GiB)": 34.88, "step": 80720, "train_speed(iter/s)": 0.410472 }, { "acc": 0.92716007, "epoch": 2.185714672515095, "grad_norm": 11.811625480651855, "learning_rate": 4.646536626758898e-06, "loss": 0.42043114, "memory(GiB)": 34.88, "step": 80725, "train_speed(iter/s)": 0.410474 }, { "acc": 0.90159969, "epoch": 2.1858500527983105, "grad_norm": 90.45244598388672, "learning_rate": 4.645978469511341e-06, "loss": 0.51758685, "memory(GiB)": 34.88, "step": 80730, "train_speed(iter/s)": 0.410475 }, { "acc": 0.91435347, "epoch": 2.185985433081526, "grad_norm": 12.18337345123291, "learning_rate": 4.645420316704861e-06, "loss": 0.47866135, "memory(GiB)": 34.88, "step": 80735, "train_speed(iter/s)": 0.410476 }, { "acc": 0.93182755, "epoch": 2.1861208133647416, "grad_norm": 27.720352172851562, "learning_rate": 4.644862168346453e-06, "loss": 0.39437594, "memory(GiB)": 34.88, "step": 80740, "train_speed(iter/s)": 0.410478 }, { "acc": 0.92144737, "epoch": 2.186256193647957, "grad_norm": 5.378882884979248, "learning_rate": 4.644304024443107e-06, "loss": 0.35533762, "memory(GiB)": 34.88, "step": 80745, "train_speed(iter/s)": 0.410479 }, { "acc": 0.92230577, "epoch": 2.186391573931173, "grad_norm": 10.544206619262695, "learning_rate": 4.643745885001816e-06, "loss": 0.48410597, "memory(GiB)": 34.88, "step": 80750, "train_speed(iter/s)": 0.41048 }, { "acc": 0.92735691, "epoch": 2.186526954214388, "grad_norm": 5.433672904968262, "learning_rate": 4.64318775002957e-06, "loss": 0.41108284, "memory(GiB)": 34.88, "step": 80755, "train_speed(iter/s)": 0.410482 }, { "acc": 0.91496391, "epoch": 2.186662334497604, "grad_norm": 10.598705291748047, "learning_rate": 4.642629619533363e-06, "loss": 0.5621685, "memory(GiB)": 34.88, "step": 80760, "train_speed(iter/s)": 0.410483 }, { "acc": 0.91536007, "epoch": 2.1867977147808193, "grad_norm": 26.444419860839844, "learning_rate": 4.6420714935201865e-06, "loss": 0.5119278, "memory(GiB)": 34.88, "step": 80765, "train_speed(iter/s)": 0.410485 }, { "acc": 0.90949564, "epoch": 2.1869330950640347, "grad_norm": 7.284717559814453, "learning_rate": 4.64151337199703e-06, "loss": 0.49112349, "memory(GiB)": 34.88, "step": 80770, "train_speed(iter/s)": 0.410486 }, { "acc": 0.92048626, "epoch": 2.1870684753472505, "grad_norm": 8.226622581481934, "learning_rate": 4.640955254970888e-06, "loss": 0.4181705, "memory(GiB)": 34.88, "step": 80775, "train_speed(iter/s)": 0.410487 }, { "acc": 0.92701206, "epoch": 2.187203855630466, "grad_norm": 3.6266872882843018, "learning_rate": 4.6403971424487476e-06, "loss": 0.3584507, "memory(GiB)": 34.88, "step": 80780, "train_speed(iter/s)": 0.410489 }, { "acc": 0.93091984, "epoch": 2.1873392359136816, "grad_norm": 9.692612648010254, "learning_rate": 4.639839034437606e-06, "loss": 0.41135845, "memory(GiB)": 34.88, "step": 80785, "train_speed(iter/s)": 0.41049 }, { "acc": 0.90750999, "epoch": 2.187474616196897, "grad_norm": 9.673538208007812, "learning_rate": 4.63928093094445e-06, "loss": 0.52890658, "memory(GiB)": 34.88, "step": 80790, "train_speed(iter/s)": 0.410492 }, { "acc": 0.93013649, "epoch": 2.187609996480113, "grad_norm": 9.26118278503418, "learning_rate": 4.638722831976272e-06, "loss": 0.36148472, "memory(GiB)": 34.88, "step": 80795, "train_speed(iter/s)": 0.410493 }, { "acc": 0.91623096, "epoch": 2.187745376763328, "grad_norm": 23.323650360107422, "learning_rate": 4.638164737540066e-06, "loss": 0.46802545, "memory(GiB)": 34.88, "step": 80800, "train_speed(iter/s)": 0.410494 }, { "acc": 0.9268672, "epoch": 2.187880757046544, "grad_norm": 7.168575286865234, "learning_rate": 4.6376066476428204e-06, "loss": 0.39539752, "memory(GiB)": 34.88, "step": 80805, "train_speed(iter/s)": 0.410496 }, { "acc": 0.92952757, "epoch": 2.1880161373297593, "grad_norm": 10.206439018249512, "learning_rate": 4.637048562291528e-06, "loss": 0.40508237, "memory(GiB)": 34.88, "step": 80810, "train_speed(iter/s)": 0.410497 }, { "acc": 0.91982212, "epoch": 2.1881515176129747, "grad_norm": 9.484100341796875, "learning_rate": 4.636490481493177e-06, "loss": 0.45453596, "memory(GiB)": 34.88, "step": 80815, "train_speed(iter/s)": 0.410498 }, { "acc": 0.93200722, "epoch": 2.1882868978961905, "grad_norm": 30.86281967163086, "learning_rate": 4.635932405254763e-06, "loss": 0.41104636, "memory(GiB)": 34.88, "step": 80820, "train_speed(iter/s)": 0.4105 }, { "acc": 0.91305866, "epoch": 2.188422278179406, "grad_norm": 8.286643028259277, "learning_rate": 4.635374333583272e-06, "loss": 0.47803288, "memory(GiB)": 34.88, "step": 80825, "train_speed(iter/s)": 0.410501 }, { "acc": 0.9175355, "epoch": 2.1885576584626216, "grad_norm": 5.757458686828613, "learning_rate": 4.634816266485697e-06, "loss": 0.45917158, "memory(GiB)": 34.88, "step": 80830, "train_speed(iter/s)": 0.410503 }, { "acc": 0.89118137, "epoch": 2.188693038745837, "grad_norm": 11.0047607421875, "learning_rate": 4.634258203969032e-06, "loss": 0.68321838, "memory(GiB)": 34.88, "step": 80835, "train_speed(iter/s)": 0.410504 }, { "acc": 0.90194626, "epoch": 2.188828419029053, "grad_norm": 7.780618190765381, "learning_rate": 4.633700146040263e-06, "loss": 0.53330026, "memory(GiB)": 34.88, "step": 80840, "train_speed(iter/s)": 0.410505 }, { "acc": 0.91004372, "epoch": 2.188963799312268, "grad_norm": 11.896357536315918, "learning_rate": 4.633142092706386e-06, "loss": 0.50651479, "memory(GiB)": 34.88, "step": 80845, "train_speed(iter/s)": 0.410506 }, { "acc": 0.91097965, "epoch": 2.189099179595484, "grad_norm": 6.9239420890808105, "learning_rate": 4.632584043974385e-06, "loss": 0.5107049, "memory(GiB)": 34.88, "step": 80850, "train_speed(iter/s)": 0.410508 }, { "acc": 0.90730553, "epoch": 2.1892345598786993, "grad_norm": 8.608879089355469, "learning_rate": 4.632025999851257e-06, "loss": 0.51936393, "memory(GiB)": 34.88, "step": 80855, "train_speed(iter/s)": 0.410509 }, { "acc": 0.92691908, "epoch": 2.1893699401619147, "grad_norm": 8.548532485961914, "learning_rate": 4.631467960343988e-06, "loss": 0.35851293, "memory(GiB)": 34.88, "step": 80860, "train_speed(iter/s)": 0.410511 }, { "acc": 0.89751854, "epoch": 2.1895053204451305, "grad_norm": 11.054707527160645, "learning_rate": 4.630909925459572e-06, "loss": 0.60447512, "memory(GiB)": 34.88, "step": 80865, "train_speed(iter/s)": 0.410512 }, { "acc": 0.90982704, "epoch": 2.189640700728346, "grad_norm": 112.95791625976562, "learning_rate": 4.630351895204996e-06, "loss": 0.53192749, "memory(GiB)": 34.88, "step": 80870, "train_speed(iter/s)": 0.410513 }, { "acc": 0.92852869, "epoch": 2.1897760810115616, "grad_norm": 13.307469367980957, "learning_rate": 4.629793869587253e-06, "loss": 0.38708518, "memory(GiB)": 34.88, "step": 80875, "train_speed(iter/s)": 0.410515 }, { "acc": 0.92100868, "epoch": 2.189911461294777, "grad_norm": 5.810297012329102, "learning_rate": 4.629235848613334e-06, "loss": 0.47639689, "memory(GiB)": 34.88, "step": 80880, "train_speed(iter/s)": 0.410516 }, { "acc": 0.91847935, "epoch": 2.190046841577993, "grad_norm": 10.06324291229248, "learning_rate": 4.628677832290227e-06, "loss": 0.44802175, "memory(GiB)": 34.88, "step": 80885, "train_speed(iter/s)": 0.410518 }, { "acc": 0.91582661, "epoch": 2.190182221861208, "grad_norm": 10.459242820739746, "learning_rate": 4.628119820624924e-06, "loss": 0.46617894, "memory(GiB)": 34.88, "step": 80890, "train_speed(iter/s)": 0.410519 }, { "acc": 0.92712708, "epoch": 2.1903176021444235, "grad_norm": 5.9206109046936035, "learning_rate": 4.627561813624414e-06, "loss": 0.45522261, "memory(GiB)": 34.88, "step": 80895, "train_speed(iter/s)": 0.410521 }, { "acc": 0.90128498, "epoch": 2.1904529824276393, "grad_norm": 6.849043369293213, "learning_rate": 4.627003811295688e-06, "loss": 0.59883738, "memory(GiB)": 34.88, "step": 80900, "train_speed(iter/s)": 0.410522 }, { "acc": 0.91661129, "epoch": 2.1905883627108547, "grad_norm": 10.374344825744629, "learning_rate": 4.626445813645734e-06, "loss": 0.47534437, "memory(GiB)": 34.88, "step": 80905, "train_speed(iter/s)": 0.410523 }, { "acc": 0.92222424, "epoch": 2.1907237429940705, "grad_norm": 11.73442554473877, "learning_rate": 4.625887820681544e-06, "loss": 0.43186612, "memory(GiB)": 34.88, "step": 80910, "train_speed(iter/s)": 0.410525 }, { "acc": 0.90399199, "epoch": 2.190859123277286, "grad_norm": 41.77125549316406, "learning_rate": 4.62532983241011e-06, "loss": 0.58873272, "memory(GiB)": 34.88, "step": 80915, "train_speed(iter/s)": 0.410526 }, { "acc": 0.93565569, "epoch": 2.1909945035605016, "grad_norm": 22.75139045715332, "learning_rate": 4.6247718488384175e-06, "loss": 0.36398044, "memory(GiB)": 34.88, "step": 80920, "train_speed(iter/s)": 0.410528 }, { "acc": 0.91207495, "epoch": 2.191129883843717, "grad_norm": 7.167462348937988, "learning_rate": 4.624213869973459e-06, "loss": 0.48343239, "memory(GiB)": 34.88, "step": 80925, "train_speed(iter/s)": 0.410529 }, { "acc": 0.9399869, "epoch": 2.1912652641269323, "grad_norm": 3.9122445583343506, "learning_rate": 4.623655895822225e-06, "loss": 0.30522733, "memory(GiB)": 34.88, "step": 80930, "train_speed(iter/s)": 0.41053 }, { "acc": 0.94316511, "epoch": 2.191400644410148, "grad_norm": 6.181196212768555, "learning_rate": 4.623097926391703e-06, "loss": 0.2854876, "memory(GiB)": 34.88, "step": 80935, "train_speed(iter/s)": 0.410532 }, { "acc": 0.91858692, "epoch": 2.1915360246933635, "grad_norm": 16.027263641357422, "learning_rate": 4.622539961688882e-06, "loss": 0.38609462, "memory(GiB)": 34.88, "step": 80940, "train_speed(iter/s)": 0.410533 }, { "acc": 0.93847818, "epoch": 2.1916714049765793, "grad_norm": 7.840813159942627, "learning_rate": 4.621982001720754e-06, "loss": 0.30259857, "memory(GiB)": 34.88, "step": 80945, "train_speed(iter/s)": 0.410535 }, { "acc": 0.9155798, "epoch": 2.1918067852597947, "grad_norm": 10.193039894104004, "learning_rate": 4.621424046494309e-06, "loss": 0.41446033, "memory(GiB)": 34.88, "step": 80950, "train_speed(iter/s)": 0.410536 }, { "acc": 0.91528282, "epoch": 2.1919421655430105, "grad_norm": 15.644050598144531, "learning_rate": 4.620866096016533e-06, "loss": 0.46625013, "memory(GiB)": 34.88, "step": 80955, "train_speed(iter/s)": 0.410537 }, { "acc": 0.92388706, "epoch": 2.192077545826226, "grad_norm": 14.968132972717285, "learning_rate": 4.62030815029442e-06, "loss": 0.38058491, "memory(GiB)": 34.88, "step": 80960, "train_speed(iter/s)": 0.410539 }, { "acc": 0.91393862, "epoch": 2.1922129261094416, "grad_norm": 21.160184860229492, "learning_rate": 4.619750209334955e-06, "loss": 0.51388741, "memory(GiB)": 34.88, "step": 80965, "train_speed(iter/s)": 0.41054 }, { "acc": 0.91089401, "epoch": 2.192348306392657, "grad_norm": 11.689563751220703, "learning_rate": 4.6191922731451315e-06, "loss": 0.53156686, "memory(GiB)": 34.88, "step": 80970, "train_speed(iter/s)": 0.410541 }, { "acc": 0.92328434, "epoch": 2.1924836866758723, "grad_norm": 9.728452682495117, "learning_rate": 4.618634341731935e-06, "loss": 0.48520188, "memory(GiB)": 34.88, "step": 80975, "train_speed(iter/s)": 0.410543 }, { "acc": 0.90973616, "epoch": 2.192619066959088, "grad_norm": 6.924734115600586, "learning_rate": 4.618076415102356e-06, "loss": 0.51593461, "memory(GiB)": 34.88, "step": 80980, "train_speed(iter/s)": 0.410544 }, { "acc": 0.91768875, "epoch": 2.1927544472423035, "grad_norm": 7.155054092407227, "learning_rate": 4.617518493263385e-06, "loss": 0.49535422, "memory(GiB)": 34.88, "step": 80985, "train_speed(iter/s)": 0.410546 }, { "acc": 0.91892891, "epoch": 2.1928898275255193, "grad_norm": 6.755396842956543, "learning_rate": 4.616960576222008e-06, "loss": 0.46962485, "memory(GiB)": 34.88, "step": 80990, "train_speed(iter/s)": 0.410547 }, { "acc": 0.9213748, "epoch": 2.1930252078087347, "grad_norm": 7.031832218170166, "learning_rate": 4.6164026639852175e-06, "loss": 0.42812524, "memory(GiB)": 34.88, "step": 80995, "train_speed(iter/s)": 0.410548 }, { "acc": 0.91463604, "epoch": 2.1931605880919505, "grad_norm": 8.710580825805664, "learning_rate": 4.615844756560001e-06, "loss": 0.40529308, "memory(GiB)": 34.88, "step": 81000, "train_speed(iter/s)": 0.41055 }, { "acc": 0.91325951, "epoch": 2.193295968375166, "grad_norm": 24.597923278808594, "learning_rate": 4.615286853953348e-06, "loss": 0.4862658, "memory(GiB)": 34.88, "step": 81005, "train_speed(iter/s)": 0.410551 }, { "acc": 0.90218611, "epoch": 2.193431348658381, "grad_norm": 8.966626167297363, "learning_rate": 4.614728956172244e-06, "loss": 0.57644048, "memory(GiB)": 34.88, "step": 81010, "train_speed(iter/s)": 0.410553 }, { "acc": 0.90053825, "epoch": 2.193566728941597, "grad_norm": 8.671263694763184, "learning_rate": 4.614171063223682e-06, "loss": 0.5123867, "memory(GiB)": 34.88, "step": 81015, "train_speed(iter/s)": 0.410554 }, { "acc": 0.93284645, "epoch": 2.1937021092248123, "grad_norm": 7.403128623962402, "learning_rate": 4.6136131751146495e-06, "loss": 0.35386639, "memory(GiB)": 34.88, "step": 81020, "train_speed(iter/s)": 0.410555 }, { "acc": 0.92406387, "epoch": 2.193837489508028, "grad_norm": 4.072897911071777, "learning_rate": 4.613055291852133e-06, "loss": 0.45929937, "memory(GiB)": 34.88, "step": 81025, "train_speed(iter/s)": 0.410557 }, { "acc": 0.91493263, "epoch": 2.1939728697912435, "grad_norm": 6.9672417640686035, "learning_rate": 4.612497413443125e-06, "loss": 0.43042641, "memory(GiB)": 34.88, "step": 81030, "train_speed(iter/s)": 0.410558 }, { "acc": 0.92664709, "epoch": 2.1941082500744593, "grad_norm": 3.6582884788513184, "learning_rate": 4.61193953989461e-06, "loss": 0.39298561, "memory(GiB)": 34.88, "step": 81035, "train_speed(iter/s)": 0.41056 }, { "acc": 0.92595482, "epoch": 2.1942436303576747, "grad_norm": 14.00373363494873, "learning_rate": 4.61138167121358e-06, "loss": 0.3873992, "memory(GiB)": 34.88, "step": 81040, "train_speed(iter/s)": 0.410561 }, { "acc": 0.90434341, "epoch": 2.1943790106408905, "grad_norm": 7.902142524719238, "learning_rate": 4.61082380740702e-06, "loss": 0.58060713, "memory(GiB)": 34.88, "step": 81045, "train_speed(iter/s)": 0.410562 }, { "acc": 0.92210379, "epoch": 2.194514390924106, "grad_norm": 13.537578582763672, "learning_rate": 4.610265948481921e-06, "loss": 0.43967552, "memory(GiB)": 34.88, "step": 81050, "train_speed(iter/s)": 0.410564 }, { "acc": 0.90450745, "epoch": 2.194649771207321, "grad_norm": 27.475337982177734, "learning_rate": 4.609708094445271e-06, "loss": 0.60467744, "memory(GiB)": 34.88, "step": 81055, "train_speed(iter/s)": 0.410565 }, { "acc": 0.9060051, "epoch": 2.194785151490537, "grad_norm": 7.277047157287598, "learning_rate": 4.609150245304056e-06, "loss": 0.49221153, "memory(GiB)": 34.88, "step": 81060, "train_speed(iter/s)": 0.410567 }, { "acc": 0.9172472, "epoch": 2.1949205317737523, "grad_norm": 5.9797234535217285, "learning_rate": 4.6085924010652665e-06, "loss": 0.54127922, "memory(GiB)": 34.88, "step": 81065, "train_speed(iter/s)": 0.410568 }, { "acc": 0.93510685, "epoch": 2.195055912056968, "grad_norm": 7.6450581550598145, "learning_rate": 4.608034561735889e-06, "loss": 0.35616477, "memory(GiB)": 34.88, "step": 81070, "train_speed(iter/s)": 0.41057 }, { "acc": 0.90961828, "epoch": 2.1951912923401835, "grad_norm": 28.72452735900879, "learning_rate": 4.607476727322913e-06, "loss": 0.44446745, "memory(GiB)": 34.88, "step": 81075, "train_speed(iter/s)": 0.410571 }, { "acc": 0.92280531, "epoch": 2.1953266726233993, "grad_norm": 21.177675247192383, "learning_rate": 4.606918897833325e-06, "loss": 0.51332788, "memory(GiB)": 34.88, "step": 81080, "train_speed(iter/s)": 0.410572 }, { "acc": 0.94065332, "epoch": 2.1954620529066147, "grad_norm": 4.642030239105225, "learning_rate": 4.6063610732741145e-06, "loss": 0.29061515, "memory(GiB)": 34.88, "step": 81085, "train_speed(iter/s)": 0.410574 }, { "acc": 0.93664951, "epoch": 2.19559743318983, "grad_norm": 6.866283416748047, "learning_rate": 4.605803253652268e-06, "loss": 0.35004964, "memory(GiB)": 34.88, "step": 81090, "train_speed(iter/s)": 0.410575 }, { "acc": 0.91586266, "epoch": 2.195732813473046, "grad_norm": 6.552346706390381, "learning_rate": 4.605245438974774e-06, "loss": 0.42177486, "memory(GiB)": 34.88, "step": 81095, "train_speed(iter/s)": 0.410577 }, { "acc": 0.93017874, "epoch": 2.195868193756261, "grad_norm": 8.984231948852539, "learning_rate": 4.604687629248621e-06, "loss": 0.34025803, "memory(GiB)": 34.88, "step": 81100, "train_speed(iter/s)": 0.410578 }, { "acc": 0.92911053, "epoch": 2.196003574039477, "grad_norm": 7.120288848876953, "learning_rate": 4.604129824480795e-06, "loss": 0.42525969, "memory(GiB)": 34.88, "step": 81105, "train_speed(iter/s)": 0.410579 }, { "acc": 0.91780396, "epoch": 2.1961389543226923, "grad_norm": 6.419589042663574, "learning_rate": 4.603572024678284e-06, "loss": 0.39319839, "memory(GiB)": 34.88, "step": 81110, "train_speed(iter/s)": 0.410581 }, { "acc": 0.9360857, "epoch": 2.196274334605908, "grad_norm": 4.604062080383301, "learning_rate": 4.603014229848075e-06, "loss": 0.35153837, "memory(GiB)": 34.88, "step": 81115, "train_speed(iter/s)": 0.410582 }, { "acc": 0.92416382, "epoch": 2.1964097148891235, "grad_norm": 10.470962524414062, "learning_rate": 4.602456439997159e-06, "loss": 0.46204267, "memory(GiB)": 34.88, "step": 81120, "train_speed(iter/s)": 0.410584 }, { "acc": 0.94168549, "epoch": 2.196545095172339, "grad_norm": 8.991952896118164, "learning_rate": 4.601898655132518e-06, "loss": 0.27960029, "memory(GiB)": 34.88, "step": 81125, "train_speed(iter/s)": 0.410585 }, { "acc": 0.92137699, "epoch": 2.1966804754555547, "grad_norm": 6.067183971405029, "learning_rate": 4.601340875261144e-06, "loss": 0.50871572, "memory(GiB)": 34.88, "step": 81130, "train_speed(iter/s)": 0.410586 }, { "acc": 0.92336922, "epoch": 2.19681585573877, "grad_norm": 12.172719955444336, "learning_rate": 4.600783100390021e-06, "loss": 0.44320965, "memory(GiB)": 34.88, "step": 81135, "train_speed(iter/s)": 0.410588 }, { "acc": 0.91247292, "epoch": 2.196951236021986, "grad_norm": 14.86977481842041, "learning_rate": 4.6002253305261385e-06, "loss": 0.44753075, "memory(GiB)": 34.88, "step": 81140, "train_speed(iter/s)": 0.410589 }, { "acc": 0.92491093, "epoch": 2.197086616305201, "grad_norm": 12.067452430725098, "learning_rate": 4.599667565676483e-06, "loss": 0.38458476, "memory(GiB)": 34.88, "step": 81145, "train_speed(iter/s)": 0.41059 }, { "acc": 0.9068903, "epoch": 2.197221996588417, "grad_norm": 9.001871109008789, "learning_rate": 4.5991098058480405e-06, "loss": 0.57840519, "memory(GiB)": 34.88, "step": 81150, "train_speed(iter/s)": 0.410592 }, { "acc": 0.93245087, "epoch": 2.1973573768716324, "grad_norm": 9.123124122619629, "learning_rate": 4.5985520510478e-06, "loss": 0.37575436, "memory(GiB)": 34.88, "step": 81155, "train_speed(iter/s)": 0.410593 }, { "acc": 0.92756557, "epoch": 2.197492757154848, "grad_norm": 9.47604751586914, "learning_rate": 4.597994301282746e-06, "loss": 0.40417562, "memory(GiB)": 34.88, "step": 81160, "train_speed(iter/s)": 0.410595 }, { "acc": 0.91497059, "epoch": 2.1976281374380635, "grad_norm": 11.978837966918945, "learning_rate": 4.597436556559868e-06, "loss": 0.43948021, "memory(GiB)": 34.88, "step": 81165, "train_speed(iter/s)": 0.410596 }, { "acc": 0.91951628, "epoch": 2.197763517721279, "grad_norm": 17.394330978393555, "learning_rate": 4.596878816886151e-06, "loss": 0.48118086, "memory(GiB)": 34.88, "step": 81170, "train_speed(iter/s)": 0.410598 }, { "acc": 0.93109875, "epoch": 2.1978988980044947, "grad_norm": 5.531324863433838, "learning_rate": 4.596321082268581e-06, "loss": 0.3322679, "memory(GiB)": 34.88, "step": 81175, "train_speed(iter/s)": 0.410599 }, { "acc": 0.91495953, "epoch": 2.19803427828771, "grad_norm": 8.613259315490723, "learning_rate": 4.59576335271415e-06, "loss": 0.38562598, "memory(GiB)": 34.88, "step": 81180, "train_speed(iter/s)": 0.4106 }, { "acc": 0.91354666, "epoch": 2.198169658570926, "grad_norm": 9.074675559997559, "learning_rate": 4.5952056282298375e-06, "loss": 0.50900002, "memory(GiB)": 34.88, "step": 81185, "train_speed(iter/s)": 0.410602 }, { "acc": 0.91929474, "epoch": 2.198305038854141, "grad_norm": 5.365999221801758, "learning_rate": 4.594647908822635e-06, "loss": 0.38621879, "memory(GiB)": 34.88, "step": 81190, "train_speed(iter/s)": 0.410603 }, { "acc": 0.90496216, "epoch": 2.198440419137357, "grad_norm": 7.120272159576416, "learning_rate": 4.594090194499526e-06, "loss": 0.51460457, "memory(GiB)": 34.88, "step": 81195, "train_speed(iter/s)": 0.410604 }, { "acc": 0.93070259, "epoch": 2.1985757994205724, "grad_norm": 4.867336273193359, "learning_rate": 4.5935324852675e-06, "loss": 0.40498948, "memory(GiB)": 34.88, "step": 81200, "train_speed(iter/s)": 0.410606 }, { "acc": 0.93478565, "epoch": 2.198711179703788, "grad_norm": 3.139528512954712, "learning_rate": 4.5929747811335405e-06, "loss": 0.28432109, "memory(GiB)": 34.88, "step": 81205, "train_speed(iter/s)": 0.410607 }, { "acc": 0.91221542, "epoch": 2.1988465599870035, "grad_norm": 13.181655883789062, "learning_rate": 4.592417082104635e-06, "loss": 0.50212269, "memory(GiB)": 34.88, "step": 81210, "train_speed(iter/s)": 0.410609 }, { "acc": 0.92062988, "epoch": 2.198981940270219, "grad_norm": 10.282176971435547, "learning_rate": 4.591859388187771e-06, "loss": 0.42951469, "memory(GiB)": 34.88, "step": 81215, "train_speed(iter/s)": 0.41061 }, { "acc": 0.92670927, "epoch": 2.1991173205534347, "grad_norm": 8.201048851013184, "learning_rate": 4.591301699389934e-06, "loss": 0.40531669, "memory(GiB)": 34.88, "step": 81220, "train_speed(iter/s)": 0.410611 }, { "acc": 0.93360023, "epoch": 2.19925270083665, "grad_norm": 7.100986480712891, "learning_rate": 4.5907440157181095e-06, "loss": 0.37233429, "memory(GiB)": 34.88, "step": 81225, "train_speed(iter/s)": 0.410613 }, { "acc": 0.92142162, "epoch": 2.199388081119866, "grad_norm": 8.514617919921875, "learning_rate": 4.5901863371792826e-06, "loss": 0.45786533, "memory(GiB)": 34.88, "step": 81230, "train_speed(iter/s)": 0.410614 }, { "acc": 0.90916414, "epoch": 2.199523461403081, "grad_norm": 6.544040679931641, "learning_rate": 4.589628663780442e-06, "loss": 0.51582475, "memory(GiB)": 34.88, "step": 81235, "train_speed(iter/s)": 0.410616 }, { "acc": 0.93563967, "epoch": 2.199658841686297, "grad_norm": 8.339847564697266, "learning_rate": 4.589070995528571e-06, "loss": 0.3763957, "memory(GiB)": 34.88, "step": 81240, "train_speed(iter/s)": 0.410617 }, { "acc": 0.92236071, "epoch": 2.1997942219695124, "grad_norm": 2.951289415359497, "learning_rate": 4.588513332430658e-06, "loss": 0.33855715, "memory(GiB)": 34.88, "step": 81245, "train_speed(iter/s)": 0.410618 }, { "acc": 0.92619429, "epoch": 2.1999296022527277, "grad_norm": 5.696974754333496, "learning_rate": 4.587955674493687e-06, "loss": 0.38948133, "memory(GiB)": 34.88, "step": 81250, "train_speed(iter/s)": 0.410619 }, { "acc": 0.93143673, "epoch": 2.2000649825359435, "grad_norm": 7.960155963897705, "learning_rate": 4.587398021724643e-06, "loss": 0.41105852, "memory(GiB)": 34.88, "step": 81255, "train_speed(iter/s)": 0.410621 }, { "acc": 0.94102926, "epoch": 2.200200362819159, "grad_norm": 11.560725212097168, "learning_rate": 4.586840374130516e-06, "loss": 0.27053251, "memory(GiB)": 34.88, "step": 81260, "train_speed(iter/s)": 0.410622 }, { "acc": 0.91809006, "epoch": 2.2003357431023747, "grad_norm": 11.732076644897461, "learning_rate": 4.586282731718287e-06, "loss": 0.48778653, "memory(GiB)": 34.88, "step": 81265, "train_speed(iter/s)": 0.410624 }, { "acc": 0.91280451, "epoch": 2.20047112338559, "grad_norm": 13.396069526672363, "learning_rate": 4.585725094494944e-06, "loss": 0.516642, "memory(GiB)": 34.88, "step": 81270, "train_speed(iter/s)": 0.410625 }, { "acc": 0.92388401, "epoch": 2.200606503668806, "grad_norm": 8.203922271728516, "learning_rate": 4.58516746246747e-06, "loss": 0.42332935, "memory(GiB)": 34.88, "step": 81275, "train_speed(iter/s)": 0.410627 }, { "acc": 0.91756449, "epoch": 2.200741883952021, "grad_norm": 10.776674270629883, "learning_rate": 4.584609835642853e-06, "loss": 0.39549921, "memory(GiB)": 34.88, "step": 81280, "train_speed(iter/s)": 0.410628 }, { "acc": 0.90825825, "epoch": 2.2008772642352366, "grad_norm": 4.375088691711426, "learning_rate": 4.584052214028078e-06, "loss": 0.42745161, "memory(GiB)": 34.88, "step": 81285, "train_speed(iter/s)": 0.410629 }, { "acc": 0.90790787, "epoch": 2.2010126445184524, "grad_norm": 7.79921293258667, "learning_rate": 4.583494597630129e-06, "loss": 0.52510281, "memory(GiB)": 34.88, "step": 81290, "train_speed(iter/s)": 0.410631 }, { "acc": 0.93814831, "epoch": 2.2011480248016677, "grad_norm": 8.290478706359863, "learning_rate": 4.582936986455993e-06, "loss": 0.34185438, "memory(GiB)": 34.88, "step": 81295, "train_speed(iter/s)": 0.410632 }, { "acc": 0.93745308, "epoch": 2.2012834050848835, "grad_norm": 23.868471145629883, "learning_rate": 4.582379380512653e-06, "loss": 0.32971227, "memory(GiB)": 34.88, "step": 81300, "train_speed(iter/s)": 0.410633 }, { "acc": 0.91019306, "epoch": 2.201418785368099, "grad_norm": 12.78443717956543, "learning_rate": 4.581821779807098e-06, "loss": 0.5737278, "memory(GiB)": 34.88, "step": 81305, "train_speed(iter/s)": 0.410635 }, { "acc": 0.90625038, "epoch": 2.2015541656513147, "grad_norm": 6.362057209014893, "learning_rate": 4.581264184346308e-06, "loss": 0.53491926, "memory(GiB)": 34.88, "step": 81310, "train_speed(iter/s)": 0.410636 }, { "acc": 0.92697182, "epoch": 2.20168954593453, "grad_norm": 5.059720516204834, "learning_rate": 4.580706594137271e-06, "loss": 0.48481131, "memory(GiB)": 34.88, "step": 81315, "train_speed(iter/s)": 0.410637 }, { "acc": 0.90498104, "epoch": 2.201824926217746, "grad_norm": 15.075094223022461, "learning_rate": 4.580149009186971e-06, "loss": 0.59902596, "memory(GiB)": 34.88, "step": 81320, "train_speed(iter/s)": 0.410639 }, { "acc": 0.91391716, "epoch": 2.201960306500961, "grad_norm": 7.891247749328613, "learning_rate": 4.5795914295023925e-06, "loss": 0.44416208, "memory(GiB)": 34.88, "step": 81325, "train_speed(iter/s)": 0.41064 }, { "acc": 0.93147526, "epoch": 2.2020956867841766, "grad_norm": 5.973071575164795, "learning_rate": 4.579033855090523e-06, "loss": 0.35923514, "memory(GiB)": 34.88, "step": 81330, "train_speed(iter/s)": 0.410642 }, { "acc": 0.93153696, "epoch": 2.2022310670673924, "grad_norm": 6.756353855133057, "learning_rate": 4.578476285958343e-06, "loss": 0.38443618, "memory(GiB)": 34.88, "step": 81335, "train_speed(iter/s)": 0.410643 }, { "acc": 0.93564339, "epoch": 2.2023664473506077, "grad_norm": 7.224756717681885, "learning_rate": 4.577918722112841e-06, "loss": 0.30606589, "memory(GiB)": 34.88, "step": 81340, "train_speed(iter/s)": 0.410644 }, { "acc": 0.92679996, "epoch": 2.2025018276338235, "grad_norm": 13.356302261352539, "learning_rate": 4.577361163560999e-06, "loss": 0.41064038, "memory(GiB)": 34.88, "step": 81345, "train_speed(iter/s)": 0.410645 }, { "acc": 0.91326694, "epoch": 2.202637207917039, "grad_norm": 3.542172908782959, "learning_rate": 4.576803610309803e-06, "loss": 0.54115157, "memory(GiB)": 34.88, "step": 81350, "train_speed(iter/s)": 0.410647 }, { "acc": 0.92207069, "epoch": 2.2027725882002547, "grad_norm": 5.749911785125732, "learning_rate": 4.576246062366236e-06, "loss": 0.40206904, "memory(GiB)": 34.88, "step": 81355, "train_speed(iter/s)": 0.410648 }, { "acc": 0.91712399, "epoch": 2.20290796848347, "grad_norm": 7.033833026885986, "learning_rate": 4.575688519737283e-06, "loss": 0.40426826, "memory(GiB)": 34.88, "step": 81360, "train_speed(iter/s)": 0.41065 }, { "acc": 0.92456074, "epoch": 2.203043348766686, "grad_norm": 9.642656326293945, "learning_rate": 4.57513098242993e-06, "loss": 0.39493287, "memory(GiB)": 34.88, "step": 81365, "train_speed(iter/s)": 0.410651 }, { "acc": 0.92720909, "epoch": 2.203178729049901, "grad_norm": 8.20571231842041, "learning_rate": 4.574573450451158e-06, "loss": 0.40814762, "memory(GiB)": 34.88, "step": 81370, "train_speed(iter/s)": 0.410652 }, { "acc": 0.92832031, "epoch": 2.2033141093331166, "grad_norm": 7.640102863311768, "learning_rate": 4.574015923807954e-06, "loss": 0.42254505, "memory(GiB)": 34.88, "step": 81375, "train_speed(iter/s)": 0.410654 }, { "acc": 0.93457479, "epoch": 2.2034494896163324, "grad_norm": 9.554539680480957, "learning_rate": 4.573458402507301e-06, "loss": 0.33326235, "memory(GiB)": 34.88, "step": 81380, "train_speed(iter/s)": 0.410655 }, { "acc": 0.92271214, "epoch": 2.2035848698995477, "grad_norm": 10.20068073272705, "learning_rate": 4.572900886556184e-06, "loss": 0.46935058, "memory(GiB)": 34.88, "step": 81385, "train_speed(iter/s)": 0.410657 }, { "acc": 0.92550755, "epoch": 2.2037202501827635, "grad_norm": 4.4939398765563965, "learning_rate": 4.572343375961585e-06, "loss": 0.4704051, "memory(GiB)": 34.88, "step": 81390, "train_speed(iter/s)": 0.410658 }, { "acc": 0.92985926, "epoch": 2.203855630465979, "grad_norm": 7.0135908126831055, "learning_rate": 4.5717858707304895e-06, "loss": 0.4198823, "memory(GiB)": 34.88, "step": 81395, "train_speed(iter/s)": 0.410659 }, { "acc": 0.91617279, "epoch": 2.2039910107491947, "grad_norm": 13.17548656463623, "learning_rate": 4.571228370869881e-06, "loss": 0.48376303, "memory(GiB)": 34.88, "step": 81400, "train_speed(iter/s)": 0.410661 }, { "acc": 0.93191643, "epoch": 2.20412639103241, "grad_norm": 7.8641228675842285, "learning_rate": 4.5706708763867425e-06, "loss": 0.36013093, "memory(GiB)": 34.88, "step": 81405, "train_speed(iter/s)": 0.410662 }, { "acc": 0.92563009, "epoch": 2.2042617713156254, "grad_norm": 13.982422828674316, "learning_rate": 4.570113387288059e-06, "loss": 0.42328949, "memory(GiB)": 34.88, "step": 81410, "train_speed(iter/s)": 0.410663 }, { "acc": 0.90267305, "epoch": 2.204397151598841, "grad_norm": 2.8824515342712402, "learning_rate": 4.569555903580813e-06, "loss": 0.53860741, "memory(GiB)": 34.88, "step": 81415, "train_speed(iter/s)": 0.410665 }, { "acc": 0.92790442, "epoch": 2.2045325318820566, "grad_norm": 8.398565292358398, "learning_rate": 4.568998425271989e-06, "loss": 0.3230957, "memory(GiB)": 34.88, "step": 81420, "train_speed(iter/s)": 0.410666 }, { "acc": 0.92779732, "epoch": 2.2046679121652724, "grad_norm": 9.1099214553833, "learning_rate": 4.56844095236857e-06, "loss": 0.38621249, "memory(GiB)": 34.88, "step": 81425, "train_speed(iter/s)": 0.410668 }, { "acc": 0.91711426, "epoch": 2.2048032924484877, "grad_norm": 13.450096130371094, "learning_rate": 4.56788348487754e-06, "loss": 0.44063864, "memory(GiB)": 34.88, "step": 81430, "train_speed(iter/s)": 0.410669 }, { "acc": 0.9127429, "epoch": 2.2049386727317035, "grad_norm": 12.507558822631836, "learning_rate": 4.567326022805882e-06, "loss": 0.43646126, "memory(GiB)": 34.88, "step": 81435, "train_speed(iter/s)": 0.41067 }, { "acc": 0.92983112, "epoch": 2.205074053014919, "grad_norm": 13.29312801361084, "learning_rate": 4.5667685661605786e-06, "loss": 0.37273324, "memory(GiB)": 34.88, "step": 81440, "train_speed(iter/s)": 0.410672 }, { "acc": 0.90857601, "epoch": 2.2052094332981342, "grad_norm": 9.78022575378418, "learning_rate": 4.566211114948614e-06, "loss": 0.43943119, "memory(GiB)": 34.88, "step": 81445, "train_speed(iter/s)": 0.410673 }, { "acc": 0.92226839, "epoch": 2.20534481358135, "grad_norm": 5.357179641723633, "learning_rate": 4.565653669176971e-06, "loss": 0.46631498, "memory(GiB)": 34.88, "step": 81450, "train_speed(iter/s)": 0.410674 }, { "acc": 0.93883314, "epoch": 2.2054801938645654, "grad_norm": 4.433380126953125, "learning_rate": 4.565096228852633e-06, "loss": 0.37670615, "memory(GiB)": 34.88, "step": 81455, "train_speed(iter/s)": 0.410676 }, { "acc": 0.9251956, "epoch": 2.205615574147781, "grad_norm": 23.998167037963867, "learning_rate": 4.564538793982582e-06, "loss": 0.44136467, "memory(GiB)": 34.88, "step": 81460, "train_speed(iter/s)": 0.410677 }, { "acc": 0.9449069, "epoch": 2.2057509544309966, "grad_norm": 6.007851600646973, "learning_rate": 4.5639813645738026e-06, "loss": 0.33987374, "memory(GiB)": 34.88, "step": 81465, "train_speed(iter/s)": 0.410678 }, { "acc": 0.91396828, "epoch": 2.2058863347142124, "grad_norm": 8.969194412231445, "learning_rate": 4.563423940633277e-06, "loss": 0.45166144, "memory(GiB)": 34.88, "step": 81470, "train_speed(iter/s)": 0.41068 }, { "acc": 0.93431797, "epoch": 2.2060217149974277, "grad_norm": 4.707570552825928, "learning_rate": 4.562866522167987e-06, "loss": 0.33417234, "memory(GiB)": 34.88, "step": 81475, "train_speed(iter/s)": 0.410681 }, { "acc": 0.92466021, "epoch": 2.2061570952806435, "grad_norm": 4.6505889892578125, "learning_rate": 4.562309109184917e-06, "loss": 0.37759714, "memory(GiB)": 34.88, "step": 81480, "train_speed(iter/s)": 0.410682 }, { "acc": 0.93787842, "epoch": 2.206292475563859, "grad_norm": 8.675127983093262, "learning_rate": 4.5617517016910486e-06, "loss": 0.39888506, "memory(GiB)": 34.88, "step": 81485, "train_speed(iter/s)": 0.410684 }, { "acc": 0.90292263, "epoch": 2.2064278558470742, "grad_norm": 17.475383758544922, "learning_rate": 4.561194299693365e-06, "loss": 0.55910163, "memory(GiB)": 34.88, "step": 81490, "train_speed(iter/s)": 0.410685 }, { "acc": 0.93237171, "epoch": 2.20656323613029, "grad_norm": 3.330418825149536, "learning_rate": 4.5606369031988475e-06, "loss": 0.43023973, "memory(GiB)": 34.88, "step": 81495, "train_speed(iter/s)": 0.410686 }, { "acc": 0.91978893, "epoch": 2.2066986164135054, "grad_norm": 4.686428070068359, "learning_rate": 4.5600795122144814e-06, "loss": 0.42292328, "memory(GiB)": 34.88, "step": 81500, "train_speed(iter/s)": 0.410688 }, { "acc": 0.92810173, "epoch": 2.206833996696721, "grad_norm": 8.095029830932617, "learning_rate": 4.559522126747246e-06, "loss": 0.35133588, "memory(GiB)": 34.88, "step": 81505, "train_speed(iter/s)": 0.410689 }, { "acc": 0.9161129, "epoch": 2.2069693769799366, "grad_norm": 7.620870590209961, "learning_rate": 4.5589647468041245e-06, "loss": 0.467134, "memory(GiB)": 34.88, "step": 81510, "train_speed(iter/s)": 0.41069 }, { "acc": 0.89767361, "epoch": 2.2071047572631524, "grad_norm": 19.89534568786621, "learning_rate": 4.558407372392102e-06, "loss": 0.52031388, "memory(GiB)": 34.88, "step": 81515, "train_speed(iter/s)": 0.410692 }, { "acc": 0.91646128, "epoch": 2.2072401375463677, "grad_norm": 6.408435344696045, "learning_rate": 4.557850003518156e-06, "loss": 0.45597744, "memory(GiB)": 34.88, "step": 81520, "train_speed(iter/s)": 0.410693 }, { "acc": 0.9368372, "epoch": 2.2073755178295835, "grad_norm": 9.651718139648438, "learning_rate": 4.557292640189273e-06, "loss": 0.35172024, "memory(GiB)": 34.88, "step": 81525, "train_speed(iter/s)": 0.410694 }, { "acc": 0.93114529, "epoch": 2.207510898112799, "grad_norm": 4.704885482788086, "learning_rate": 4.556735282412432e-06, "loss": 0.378128, "memory(GiB)": 34.88, "step": 81530, "train_speed(iter/s)": 0.410696 }, { "acc": 0.91213627, "epoch": 2.2076462783960142, "grad_norm": 9.957785606384277, "learning_rate": 4.556177930194617e-06, "loss": 0.55249672, "memory(GiB)": 34.88, "step": 81535, "train_speed(iter/s)": 0.410697 }, { "acc": 0.92264977, "epoch": 2.20778165867923, "grad_norm": 5.54663610458374, "learning_rate": 4.5556205835428065e-06, "loss": 0.38651104, "memory(GiB)": 34.88, "step": 81540, "train_speed(iter/s)": 0.410699 }, { "acc": 0.91799908, "epoch": 2.2079170389624454, "grad_norm": 7.660892009735107, "learning_rate": 4.555063242463986e-06, "loss": 0.49761863, "memory(GiB)": 34.88, "step": 81545, "train_speed(iter/s)": 0.4107 }, { "acc": 0.91647062, "epoch": 2.208052419245661, "grad_norm": 9.925381660461426, "learning_rate": 4.5545059069651375e-06, "loss": 0.44965138, "memory(GiB)": 34.88, "step": 81550, "train_speed(iter/s)": 0.410701 }, { "acc": 0.91021051, "epoch": 2.2081877995288766, "grad_norm": 9.771199226379395, "learning_rate": 4.5539485770532395e-06, "loss": 0.56505551, "memory(GiB)": 34.88, "step": 81555, "train_speed(iter/s)": 0.410703 }, { "acc": 0.92350082, "epoch": 2.2083231798120924, "grad_norm": 3.8112897872924805, "learning_rate": 4.553391252735278e-06, "loss": 0.41867876, "memory(GiB)": 34.88, "step": 81560, "train_speed(iter/s)": 0.410704 }, { "acc": 0.9314621, "epoch": 2.2084585600953077, "grad_norm": 9.77207088470459, "learning_rate": 4.552833934018231e-06, "loss": 0.40219498, "memory(GiB)": 34.88, "step": 81565, "train_speed(iter/s)": 0.410705 }, { "acc": 0.92920151, "epoch": 2.208593940378523, "grad_norm": 19.495201110839844, "learning_rate": 4.5522766209090806e-06, "loss": 0.49022431, "memory(GiB)": 34.88, "step": 81570, "train_speed(iter/s)": 0.410707 }, { "acc": 0.90429707, "epoch": 2.208729320661739, "grad_norm": 5.58779239654541, "learning_rate": 4.551719313414809e-06, "loss": 0.59771662, "memory(GiB)": 34.88, "step": 81575, "train_speed(iter/s)": 0.410708 }, { "acc": 0.93214798, "epoch": 2.2088647009449542, "grad_norm": 6.4789605140686035, "learning_rate": 4.551162011542397e-06, "loss": 0.36135855, "memory(GiB)": 34.88, "step": 81580, "train_speed(iter/s)": 0.410709 }, { "acc": 0.92685108, "epoch": 2.20900008122817, "grad_norm": 4.00557804107666, "learning_rate": 4.550604715298827e-06, "loss": 0.46215153, "memory(GiB)": 34.88, "step": 81585, "train_speed(iter/s)": 0.410711 }, { "acc": 0.94570236, "epoch": 2.2091354615113854, "grad_norm": 7.074129581451416, "learning_rate": 4.550047424691079e-06, "loss": 0.22986009, "memory(GiB)": 34.88, "step": 81590, "train_speed(iter/s)": 0.410712 }, { "acc": 0.92542763, "epoch": 2.209270841794601, "grad_norm": 13.532299995422363, "learning_rate": 4.549490139726135e-06, "loss": 0.37020826, "memory(GiB)": 34.88, "step": 81595, "train_speed(iter/s)": 0.410714 }, { "acc": 0.91368914, "epoch": 2.2094062220778166, "grad_norm": 13.780094146728516, "learning_rate": 4.548932860410975e-06, "loss": 0.49169054, "memory(GiB)": 34.88, "step": 81600, "train_speed(iter/s)": 0.410715 }, { "acc": 0.9294426, "epoch": 2.209541602361032, "grad_norm": 9.825288772583008, "learning_rate": 4.5483755867525825e-06, "loss": 0.3551034, "memory(GiB)": 34.88, "step": 81605, "train_speed(iter/s)": 0.410717 }, { "acc": 0.91429262, "epoch": 2.2096769826442477, "grad_norm": 10.279513359069824, "learning_rate": 4.547818318757935e-06, "loss": 0.43602667, "memory(GiB)": 34.88, "step": 81610, "train_speed(iter/s)": 0.410718 }, { "acc": 0.92541895, "epoch": 2.209812362927463, "grad_norm": 10.814703941345215, "learning_rate": 4.547261056434016e-06, "loss": 0.46661968, "memory(GiB)": 34.88, "step": 81615, "train_speed(iter/s)": 0.410719 }, { "acc": 0.90811329, "epoch": 2.209947743210679, "grad_norm": 9.212753295898438, "learning_rate": 4.546703799787804e-06, "loss": 0.45881929, "memory(GiB)": 34.88, "step": 81620, "train_speed(iter/s)": 0.410721 }, { "acc": 0.93114347, "epoch": 2.2100831234938942, "grad_norm": 25.06145477294922, "learning_rate": 4.546146548826282e-06, "loss": 0.39459152, "memory(GiB)": 34.88, "step": 81625, "train_speed(iter/s)": 0.410722 }, { "acc": 0.93325796, "epoch": 2.21021850377711, "grad_norm": 7.123772144317627, "learning_rate": 4.54558930355643e-06, "loss": 0.3485693, "memory(GiB)": 34.88, "step": 81630, "train_speed(iter/s)": 0.410723 }, { "acc": 0.92874308, "epoch": 2.2103538840603254, "grad_norm": 10.764883995056152, "learning_rate": 4.545032063985228e-06, "loss": 0.39698267, "memory(GiB)": 34.88, "step": 81635, "train_speed(iter/s)": 0.410725 }, { "acc": 0.92551489, "epoch": 2.210489264343541, "grad_norm": 7.674159049987793, "learning_rate": 4.544474830119659e-06, "loss": 0.44499846, "memory(GiB)": 34.88, "step": 81640, "train_speed(iter/s)": 0.410726 }, { "acc": 0.93561869, "epoch": 2.2106246446267566, "grad_norm": 11.891376495361328, "learning_rate": 4.543917601966699e-06, "loss": 0.42694941, "memory(GiB)": 34.88, "step": 81645, "train_speed(iter/s)": 0.410728 }, { "acc": 0.92696333, "epoch": 2.210760024909972, "grad_norm": 9.70711612701416, "learning_rate": 4.543360379533331e-06, "loss": 0.35799623, "memory(GiB)": 34.88, "step": 81650, "train_speed(iter/s)": 0.410729 }, { "acc": 0.91549673, "epoch": 2.2108954051931877, "grad_norm": 14.95984172821045, "learning_rate": 4.542803162826535e-06, "loss": 0.40576005, "memory(GiB)": 34.88, "step": 81655, "train_speed(iter/s)": 0.41073 }, { "acc": 0.94416628, "epoch": 2.211030785476403, "grad_norm": 10.295909881591797, "learning_rate": 4.542245951853291e-06, "loss": 0.27413001, "memory(GiB)": 34.88, "step": 81660, "train_speed(iter/s)": 0.410732 }, { "acc": 0.91961899, "epoch": 2.211166165759619, "grad_norm": 8.287222862243652, "learning_rate": 4.541688746620579e-06, "loss": 0.39620202, "memory(GiB)": 34.88, "step": 81665, "train_speed(iter/s)": 0.410733 }, { "acc": 0.92394638, "epoch": 2.2113015460428342, "grad_norm": 12.427824020385742, "learning_rate": 4.54113154713538e-06, "loss": 0.47642212, "memory(GiB)": 34.88, "step": 81670, "train_speed(iter/s)": 0.410735 }, { "acc": 0.92338924, "epoch": 2.21143692632605, "grad_norm": 11.564990997314453, "learning_rate": 4.540574353404675e-06, "loss": 0.37422805, "memory(GiB)": 34.88, "step": 81675, "train_speed(iter/s)": 0.410736 }, { "acc": 0.93393135, "epoch": 2.2115723066092654, "grad_norm": 7.22210168838501, "learning_rate": 4.540017165435441e-06, "loss": 0.34672904, "memory(GiB)": 34.88, "step": 81680, "train_speed(iter/s)": 0.410737 }, { "acc": 0.90544434, "epoch": 2.211707686892481, "grad_norm": 14.203598022460938, "learning_rate": 4.539459983234661e-06, "loss": 0.57058554, "memory(GiB)": 34.88, "step": 81685, "train_speed(iter/s)": 0.410739 }, { "acc": 0.93047638, "epoch": 2.2118430671756966, "grad_norm": 9.534438133239746, "learning_rate": 4.538902806809311e-06, "loss": 0.3827816, "memory(GiB)": 34.88, "step": 81690, "train_speed(iter/s)": 0.41074 }, { "acc": 0.92304611, "epoch": 2.211978447458912, "grad_norm": 19.676776885986328, "learning_rate": 4.5383456361663736e-06, "loss": 0.47721701, "memory(GiB)": 34.88, "step": 81695, "train_speed(iter/s)": 0.410741 }, { "acc": 0.92198429, "epoch": 2.2121138277421277, "grad_norm": 6.659937858581543, "learning_rate": 4.537788471312829e-06, "loss": 0.44133139, "memory(GiB)": 34.88, "step": 81700, "train_speed(iter/s)": 0.410743 }, { "acc": 0.92174282, "epoch": 2.212249208025343, "grad_norm": 4.739798069000244, "learning_rate": 4.537231312255654e-06, "loss": 0.43648825, "memory(GiB)": 34.88, "step": 81705, "train_speed(iter/s)": 0.410744 }, { "acc": 0.93297462, "epoch": 2.212384588308559, "grad_norm": 5.742198467254639, "learning_rate": 4.53667415900183e-06, "loss": 0.38557677, "memory(GiB)": 34.88, "step": 81710, "train_speed(iter/s)": 0.410746 }, { "acc": 0.92629194, "epoch": 2.2125199685917742, "grad_norm": 9.061169624328613, "learning_rate": 4.536117011558335e-06, "loss": 0.42265663, "memory(GiB)": 34.88, "step": 81715, "train_speed(iter/s)": 0.410747 }, { "acc": 0.93650627, "epoch": 2.21265534887499, "grad_norm": 7.754662036895752, "learning_rate": 4.5355598699321515e-06, "loss": 0.30045223, "memory(GiB)": 34.88, "step": 81720, "train_speed(iter/s)": 0.410748 }, { "acc": 0.91859608, "epoch": 2.2127907291582054, "grad_norm": 70.03031921386719, "learning_rate": 4.535002734130256e-06, "loss": 0.45300145, "memory(GiB)": 34.88, "step": 81725, "train_speed(iter/s)": 0.41075 }, { "acc": 0.92790728, "epoch": 2.2129261094414208, "grad_norm": 5.793501853942871, "learning_rate": 4.534445604159628e-06, "loss": 0.36696815, "memory(GiB)": 34.88, "step": 81730, "train_speed(iter/s)": 0.410751 }, { "acc": 0.92005806, "epoch": 2.2130614897246366, "grad_norm": 9.21097469329834, "learning_rate": 4.533888480027248e-06, "loss": 0.47337303, "memory(GiB)": 34.88, "step": 81735, "train_speed(iter/s)": 0.410752 }, { "acc": 0.92598696, "epoch": 2.213196870007852, "grad_norm": 5.056396007537842, "learning_rate": 4.5333313617400925e-06, "loss": 0.33213966, "memory(GiB)": 34.88, "step": 81740, "train_speed(iter/s)": 0.410754 }, { "acc": 0.9192234, "epoch": 2.2133322502910677, "grad_norm": 4.947527885437012, "learning_rate": 4.532774249305144e-06, "loss": 0.50485697, "memory(GiB)": 34.88, "step": 81745, "train_speed(iter/s)": 0.410755 }, { "acc": 0.93094082, "epoch": 2.213467630574283, "grad_norm": 5.342669486999512, "learning_rate": 4.5322171427293785e-06, "loss": 0.39469965, "memory(GiB)": 34.88, "step": 81750, "train_speed(iter/s)": 0.410756 }, { "acc": 0.90510921, "epoch": 2.213603010857499, "grad_norm": 8.83288288116455, "learning_rate": 4.531660042019776e-06, "loss": 0.4543107, "memory(GiB)": 34.88, "step": 81755, "train_speed(iter/s)": 0.410757 }, { "acc": 0.92410669, "epoch": 2.2137383911407142, "grad_norm": 8.713014602661133, "learning_rate": 4.531102947183315e-06, "loss": 0.45256348, "memory(GiB)": 34.88, "step": 81760, "train_speed(iter/s)": 0.410759 }, { "acc": 0.92069483, "epoch": 2.2138737714239296, "grad_norm": 15.879621505737305, "learning_rate": 4.530545858226976e-06, "loss": 0.48147225, "memory(GiB)": 34.88, "step": 81765, "train_speed(iter/s)": 0.41076 }, { "acc": 0.93742914, "epoch": 2.2140091517071454, "grad_norm": 4.1609206199646, "learning_rate": 4.529988775157736e-06, "loss": 0.35274935, "memory(GiB)": 34.88, "step": 81770, "train_speed(iter/s)": 0.410762 }, { "acc": 0.93608894, "epoch": 2.2141445319903608, "grad_norm": 6.9936113357543945, "learning_rate": 4.529431697982571e-06, "loss": 0.29698985, "memory(GiB)": 34.88, "step": 81775, "train_speed(iter/s)": 0.410763 }, { "acc": 0.9342371, "epoch": 2.2142799122735766, "grad_norm": 12.647478103637695, "learning_rate": 4.528874626708465e-06, "loss": 0.38190506, "memory(GiB)": 34.88, "step": 81780, "train_speed(iter/s)": 0.410765 }, { "acc": 0.9120594, "epoch": 2.214415292556792, "grad_norm": 9.765406608581543, "learning_rate": 4.528317561342392e-06, "loss": 0.46004009, "memory(GiB)": 34.88, "step": 81785, "train_speed(iter/s)": 0.410766 }, { "acc": 0.9270195, "epoch": 2.2145506728400077, "grad_norm": 5.455493450164795, "learning_rate": 4.527760501891333e-06, "loss": 0.42277746, "memory(GiB)": 34.88, "step": 81790, "train_speed(iter/s)": 0.410767 }, { "acc": 0.9375864, "epoch": 2.214686053123223, "grad_norm": 6.996997356414795, "learning_rate": 4.5272034483622636e-06, "loss": 0.28592765, "memory(GiB)": 34.88, "step": 81795, "train_speed(iter/s)": 0.410769 }, { "acc": 0.92343912, "epoch": 2.214821433406439, "grad_norm": 9.077375411987305, "learning_rate": 4.526646400762164e-06, "loss": 0.45945473, "memory(GiB)": 34.88, "step": 81800, "train_speed(iter/s)": 0.41077 }, { "acc": 0.93642788, "epoch": 2.2149568136896542, "grad_norm": 5.2292585372924805, "learning_rate": 4.5260893590980125e-06, "loss": 0.3569479, "memory(GiB)": 34.88, "step": 81805, "train_speed(iter/s)": 0.410771 }, { "acc": 0.92091665, "epoch": 2.2150921939728696, "grad_norm": 9.91434383392334, "learning_rate": 4.525532323376787e-06, "loss": 0.41155901, "memory(GiB)": 34.88, "step": 81810, "train_speed(iter/s)": 0.410772 }, { "acc": 0.91247005, "epoch": 2.2152275742560854, "grad_norm": 8.66685962677002, "learning_rate": 4.524975293605464e-06, "loss": 0.50309467, "memory(GiB)": 34.88, "step": 81815, "train_speed(iter/s)": 0.410774 }, { "acc": 0.91107225, "epoch": 2.2153629545393008, "grad_norm": 7.33657693862915, "learning_rate": 4.5244182697910215e-06, "loss": 0.45845408, "memory(GiB)": 34.88, "step": 81820, "train_speed(iter/s)": 0.410775 }, { "acc": 0.93950386, "epoch": 2.2154983348225166, "grad_norm": 4.747570514678955, "learning_rate": 4.5238612519404405e-06, "loss": 0.28691278, "memory(GiB)": 34.88, "step": 81825, "train_speed(iter/s)": 0.410776 }, { "acc": 0.92179766, "epoch": 2.215633715105732, "grad_norm": 9.900022506713867, "learning_rate": 4.523304240060694e-06, "loss": 0.50693855, "memory(GiB)": 34.88, "step": 81830, "train_speed(iter/s)": 0.410778 }, { "acc": 0.93491173, "epoch": 2.2157690953889477, "grad_norm": 19.942781448364258, "learning_rate": 4.5227472341587636e-06, "loss": 0.37705603, "memory(GiB)": 34.88, "step": 81835, "train_speed(iter/s)": 0.410779 }, { "acc": 0.9290575, "epoch": 2.215904475672163, "grad_norm": 8.51286506652832, "learning_rate": 4.522190234241624e-06, "loss": 0.35283124, "memory(GiB)": 34.88, "step": 81840, "train_speed(iter/s)": 0.41078 }, { "acc": 0.92982149, "epoch": 2.216039855955379, "grad_norm": 5.625714302062988, "learning_rate": 4.521633240316255e-06, "loss": 0.38532941, "memory(GiB)": 34.88, "step": 81845, "train_speed(iter/s)": 0.410782 }, { "acc": 0.92726612, "epoch": 2.2161752362385942, "grad_norm": 6.030319690704346, "learning_rate": 4.521076252389635e-06, "loss": 0.36995919, "memory(GiB)": 34.88, "step": 81850, "train_speed(iter/s)": 0.410783 }, { "acc": 0.91418972, "epoch": 2.2163106165218096, "grad_norm": 12.800236701965332, "learning_rate": 4.520519270468737e-06, "loss": 0.46213999, "memory(GiB)": 34.88, "step": 81855, "train_speed(iter/s)": 0.410784 }, { "acc": 0.92145653, "epoch": 2.2164459968050254, "grad_norm": 8.886927604675293, "learning_rate": 4.519962294560542e-06, "loss": 0.44154267, "memory(GiB)": 34.88, "step": 81860, "train_speed(iter/s)": 0.410786 }, { "acc": 0.94101467, "epoch": 2.2165813770882408, "grad_norm": 4.609020233154297, "learning_rate": 4.519405324672025e-06, "loss": 0.3516304, "memory(GiB)": 34.88, "step": 81865, "train_speed(iter/s)": 0.410787 }, { "acc": 0.93320618, "epoch": 2.2167167573714566, "grad_norm": 7.293737411499023, "learning_rate": 4.518848360810165e-06, "loss": 0.33659639, "memory(GiB)": 34.88, "step": 81870, "train_speed(iter/s)": 0.410789 }, { "acc": 0.92446156, "epoch": 2.216852137654672, "grad_norm": 7.442777633666992, "learning_rate": 4.518291402981937e-06, "loss": 0.41968803, "memory(GiB)": 34.88, "step": 81875, "train_speed(iter/s)": 0.410789 }, { "acc": 0.93391476, "epoch": 2.2169875179378877, "grad_norm": 5.949503421783447, "learning_rate": 4.517734451194321e-06, "loss": 0.32307758, "memory(GiB)": 34.88, "step": 81880, "train_speed(iter/s)": 0.410791 }, { "acc": 0.92664566, "epoch": 2.217122898221103, "grad_norm": 12.936113357543945, "learning_rate": 4.51717750545429e-06, "loss": 0.43974791, "memory(GiB)": 34.88, "step": 81885, "train_speed(iter/s)": 0.410792 }, { "acc": 0.93171921, "epoch": 2.2172582785043184, "grad_norm": 9.281825065612793, "learning_rate": 4.516620565768824e-06, "loss": 0.34601474, "memory(GiB)": 34.88, "step": 81890, "train_speed(iter/s)": 0.410793 }, { "acc": 0.92739725, "epoch": 2.2173936587875342, "grad_norm": 9.91813850402832, "learning_rate": 4.516063632144901e-06, "loss": 0.34412456, "memory(GiB)": 34.88, "step": 81895, "train_speed(iter/s)": 0.410794 }, { "acc": 0.93549442, "epoch": 2.2175290390707496, "grad_norm": 6.84026575088501, "learning_rate": 4.5155067045894926e-06, "loss": 0.28044314, "memory(GiB)": 34.88, "step": 81900, "train_speed(iter/s)": 0.410796 }, { "acc": 0.90571489, "epoch": 2.2176644193539654, "grad_norm": 8.916987419128418, "learning_rate": 4.514949783109579e-06, "loss": 0.56404839, "memory(GiB)": 34.88, "step": 81905, "train_speed(iter/s)": 0.410797 }, { "acc": 0.90935211, "epoch": 2.2177997996371808, "grad_norm": 19.822233200073242, "learning_rate": 4.514392867712135e-06, "loss": 0.479563, "memory(GiB)": 34.88, "step": 81910, "train_speed(iter/s)": 0.410798 }, { "acc": 0.93377333, "epoch": 2.2179351799203966, "grad_norm": 4.114435195922852, "learning_rate": 4.51383595840414e-06, "loss": 0.38968256, "memory(GiB)": 34.88, "step": 81915, "train_speed(iter/s)": 0.4108 }, { "acc": 0.92515488, "epoch": 2.218070560203612, "grad_norm": 10.149352073669434, "learning_rate": 4.513279055192567e-06, "loss": 0.43199644, "memory(GiB)": 34.88, "step": 81920, "train_speed(iter/s)": 0.410801 }, { "acc": 0.92917023, "epoch": 2.2182059404868273, "grad_norm": 7.772859573364258, "learning_rate": 4.512722158084393e-06, "loss": 0.35379806, "memory(GiB)": 34.88, "step": 81925, "train_speed(iter/s)": 0.410802 }, { "acc": 0.91601734, "epoch": 2.218341320770043, "grad_norm": 9.765463829040527, "learning_rate": 4.512165267086597e-06, "loss": 0.45841894, "memory(GiB)": 34.88, "step": 81930, "train_speed(iter/s)": 0.410804 }, { "acc": 0.92968349, "epoch": 2.2184767010532584, "grad_norm": 3.3098371028900146, "learning_rate": 4.511608382206152e-06, "loss": 0.35700922, "memory(GiB)": 34.88, "step": 81935, "train_speed(iter/s)": 0.410805 }, { "acc": 0.9197073, "epoch": 2.2186120813364743, "grad_norm": 5.627410411834717, "learning_rate": 4.511051503450036e-06, "loss": 0.42115631, "memory(GiB)": 34.88, "step": 81940, "train_speed(iter/s)": 0.410806 }, { "acc": 0.92846489, "epoch": 2.2187474616196896, "grad_norm": 12.092618942260742, "learning_rate": 4.510494630825223e-06, "loss": 0.34995875, "memory(GiB)": 34.88, "step": 81945, "train_speed(iter/s)": 0.410807 }, { "acc": 0.92047739, "epoch": 2.2188828419029054, "grad_norm": 8.90281867980957, "learning_rate": 4.509937764338691e-06, "loss": 0.42577782, "memory(GiB)": 34.88, "step": 81950, "train_speed(iter/s)": 0.410808 }, { "acc": 0.92156887, "epoch": 2.2190182221861208, "grad_norm": 8.20595645904541, "learning_rate": 4.509380903997415e-06, "loss": 0.49060163, "memory(GiB)": 34.88, "step": 81955, "train_speed(iter/s)": 0.410809 }, { "acc": 0.90756359, "epoch": 2.2191536024693366, "grad_norm": 9.859073638916016, "learning_rate": 4.508824049808369e-06, "loss": 0.62323895, "memory(GiB)": 34.88, "step": 81960, "train_speed(iter/s)": 0.410811 }, { "acc": 0.92352257, "epoch": 2.219288982752552, "grad_norm": 19.93820571899414, "learning_rate": 4.508267201778533e-06, "loss": 0.42950897, "memory(GiB)": 34.88, "step": 81965, "train_speed(iter/s)": 0.410812 }, { "acc": 0.90893974, "epoch": 2.2194243630357673, "grad_norm": 8.83040714263916, "learning_rate": 4.507710359914878e-06, "loss": 0.59433537, "memory(GiB)": 34.88, "step": 81970, "train_speed(iter/s)": 0.410813 }, { "acc": 0.91854525, "epoch": 2.219559743318983, "grad_norm": 6.4661126136779785, "learning_rate": 4.5071535242243835e-06, "loss": 0.39196084, "memory(GiB)": 34.88, "step": 81975, "train_speed(iter/s)": 0.410815 }, { "acc": 0.90100174, "epoch": 2.2196951236021984, "grad_norm": 10.34648323059082, "learning_rate": 4.506596694714022e-06, "loss": 0.49067421, "memory(GiB)": 34.88, "step": 81980, "train_speed(iter/s)": 0.410816 }, { "acc": 0.93098392, "epoch": 2.2198305038854143, "grad_norm": 17.1361083984375, "learning_rate": 4.50603987139077e-06, "loss": 0.5057435, "memory(GiB)": 34.88, "step": 81985, "train_speed(iter/s)": 0.410817 }, { "acc": 0.92746534, "epoch": 2.2199658841686296, "grad_norm": 4.72264289855957, "learning_rate": 4.505483054261602e-06, "loss": 0.39669197, "memory(GiB)": 34.88, "step": 81990, "train_speed(iter/s)": 0.410818 }, { "acc": 0.92208023, "epoch": 2.2201012644518454, "grad_norm": 14.01099967956543, "learning_rate": 4.504926243333493e-06, "loss": 0.42398276, "memory(GiB)": 34.88, "step": 81995, "train_speed(iter/s)": 0.410819 }, { "acc": 0.92470379, "epoch": 2.2202366447350608, "grad_norm": 8.639450073242188, "learning_rate": 4.504369438613422e-06, "loss": 0.40778627, "memory(GiB)": 34.88, "step": 82000, "train_speed(iter/s)": 0.410821 }, { "acc": 0.91662312, "epoch": 2.2203720250182766, "grad_norm": 25.45908546447754, "learning_rate": 4.5038126401083585e-06, "loss": 0.40403891, "memory(GiB)": 34.88, "step": 82005, "train_speed(iter/s)": 0.410822 }, { "acc": 0.9296319, "epoch": 2.220507405301492, "grad_norm": 31.48735237121582, "learning_rate": 4.503255847825282e-06, "loss": 0.42789006, "memory(GiB)": 34.88, "step": 82010, "train_speed(iter/s)": 0.410823 }, { "acc": 0.92245188, "epoch": 2.2206427855847073, "grad_norm": 4.582164764404297, "learning_rate": 4.502699061771165e-06, "loss": 0.35650077, "memory(GiB)": 34.88, "step": 82015, "train_speed(iter/s)": 0.410824 }, { "acc": 0.94301701, "epoch": 2.220778165867923, "grad_norm": 9.105093955993652, "learning_rate": 4.502142281952984e-06, "loss": 0.34075894, "memory(GiB)": 34.88, "step": 82020, "train_speed(iter/s)": 0.410825 }, { "acc": 0.92044334, "epoch": 2.2209135461511385, "grad_norm": 6.781698226928711, "learning_rate": 4.501585508377711e-06, "loss": 0.381075, "memory(GiB)": 34.88, "step": 82025, "train_speed(iter/s)": 0.410827 }, { "acc": 0.92430201, "epoch": 2.2210489264343543, "grad_norm": 10.202824592590332, "learning_rate": 4.501028741052323e-06, "loss": 0.47959175, "memory(GiB)": 34.88, "step": 82030, "train_speed(iter/s)": 0.410828 }, { "acc": 0.92753963, "epoch": 2.2211843067175696, "grad_norm": 13.045872688293457, "learning_rate": 4.5004719799837945e-06, "loss": 0.43430405, "memory(GiB)": 34.88, "step": 82035, "train_speed(iter/s)": 0.41083 }, { "acc": 0.90563822, "epoch": 2.2213196870007854, "grad_norm": 17.84762954711914, "learning_rate": 4.499915225179099e-06, "loss": 0.4722147, "memory(GiB)": 34.88, "step": 82040, "train_speed(iter/s)": 0.410831 }, { "acc": 0.92653475, "epoch": 2.2214550672840008, "grad_norm": 6.4485392570495605, "learning_rate": 4.4993584766452115e-06, "loss": 0.43305106, "memory(GiB)": 34.88, "step": 82045, "train_speed(iter/s)": 0.410833 }, { "acc": 0.92645702, "epoch": 2.221590447567216, "grad_norm": 36.03982925415039, "learning_rate": 4.4988017343891055e-06, "loss": 0.44498253, "memory(GiB)": 34.88, "step": 82050, "train_speed(iter/s)": 0.410834 }, { "acc": 0.91412106, "epoch": 2.221725827850432, "grad_norm": 8.256382942199707, "learning_rate": 4.498244998417757e-06, "loss": 0.49058409, "memory(GiB)": 34.88, "step": 82055, "train_speed(iter/s)": 0.410836 }, { "acc": 0.9150713, "epoch": 2.2218612081336473, "grad_norm": 11.073162078857422, "learning_rate": 4.4976882687381405e-06, "loss": 0.51910028, "memory(GiB)": 34.88, "step": 82060, "train_speed(iter/s)": 0.410837 }, { "acc": 0.92317715, "epoch": 2.221996588416863, "grad_norm": 2.520916700363159, "learning_rate": 4.497131545357226e-06, "loss": 0.43476639, "memory(GiB)": 34.88, "step": 82065, "train_speed(iter/s)": 0.410838 }, { "acc": 0.92327709, "epoch": 2.2221319687000785, "grad_norm": 4.804592609405518, "learning_rate": 4.496574828281993e-06, "loss": 0.38902884, "memory(GiB)": 34.88, "step": 82070, "train_speed(iter/s)": 0.410839 }, { "acc": 0.93918591, "epoch": 2.2222673489832943, "grad_norm": 11.622465133666992, "learning_rate": 4.496018117519412e-06, "loss": 0.36187358, "memory(GiB)": 34.88, "step": 82075, "train_speed(iter/s)": 0.410841 }, { "acc": 0.90929508, "epoch": 2.2224027292665096, "grad_norm": 7.031175136566162, "learning_rate": 4.4954614130764575e-06, "loss": 0.45758047, "memory(GiB)": 34.88, "step": 82080, "train_speed(iter/s)": 0.410842 }, { "acc": 0.91667061, "epoch": 2.222538109549725, "grad_norm": 5.731003761291504, "learning_rate": 4.494904714960104e-06, "loss": 0.48951735, "memory(GiB)": 34.88, "step": 82085, "train_speed(iter/s)": 0.410843 }, { "acc": 0.91352367, "epoch": 2.2226734898329408, "grad_norm": 8.005125045776367, "learning_rate": 4.494348023177326e-06, "loss": 0.49711137, "memory(GiB)": 34.88, "step": 82090, "train_speed(iter/s)": 0.410845 }, { "acc": 0.9146677, "epoch": 2.222808870116156, "grad_norm": 10.529634475708008, "learning_rate": 4.4937913377350945e-06, "loss": 0.48824735, "memory(GiB)": 34.88, "step": 82095, "train_speed(iter/s)": 0.410846 }, { "acc": 0.9137126, "epoch": 2.222944250399372, "grad_norm": 14.819525718688965, "learning_rate": 4.4932346586403855e-06, "loss": 0.49188175, "memory(GiB)": 34.88, "step": 82100, "train_speed(iter/s)": 0.410847 }, { "acc": 0.92110777, "epoch": 2.2230796306825873, "grad_norm": 5.880244731903076, "learning_rate": 4.492677985900173e-06, "loss": 0.35126057, "memory(GiB)": 34.88, "step": 82105, "train_speed(iter/s)": 0.410849 }, { "acc": 0.91792116, "epoch": 2.223215010965803, "grad_norm": 5.0135111808776855, "learning_rate": 4.492121319521427e-06, "loss": 0.4572197, "memory(GiB)": 34.88, "step": 82110, "train_speed(iter/s)": 0.41085 }, { "acc": 0.92724228, "epoch": 2.2233503912490185, "grad_norm": 10.253029823303223, "learning_rate": 4.491564659511124e-06, "loss": 0.40130453, "memory(GiB)": 34.88, "step": 82115, "train_speed(iter/s)": 0.410851 }, { "acc": 0.91351986, "epoch": 2.223485771532234, "grad_norm": 7.614315986633301, "learning_rate": 4.491008005876235e-06, "loss": 0.47741957, "memory(GiB)": 34.88, "step": 82120, "train_speed(iter/s)": 0.410852 }, { "acc": 0.92996187, "epoch": 2.2236211518154496, "grad_norm": 10.223336219787598, "learning_rate": 4.490451358623736e-06, "loss": 0.40808206, "memory(GiB)": 34.88, "step": 82125, "train_speed(iter/s)": 0.410854 }, { "acc": 0.91670475, "epoch": 2.223756532098665, "grad_norm": 4.847789287567139, "learning_rate": 4.489894717760597e-06, "loss": 0.36379771, "memory(GiB)": 34.88, "step": 82130, "train_speed(iter/s)": 0.410855 }, { "acc": 0.91041613, "epoch": 2.2238919123818808, "grad_norm": 8.551715850830078, "learning_rate": 4.489338083293794e-06, "loss": 0.48375044, "memory(GiB)": 34.88, "step": 82135, "train_speed(iter/s)": 0.410856 }, { "acc": 0.92198439, "epoch": 2.224027292665096, "grad_norm": 13.042777061462402, "learning_rate": 4.488781455230297e-06, "loss": 0.48207822, "memory(GiB)": 34.88, "step": 82140, "train_speed(iter/s)": 0.410857 }, { "acc": 0.92421627, "epoch": 2.224162672948312, "grad_norm": 6.1474761962890625, "learning_rate": 4.4882248335770804e-06, "loss": 0.42734399, "memory(GiB)": 34.88, "step": 82145, "train_speed(iter/s)": 0.410859 }, { "acc": 0.91828842, "epoch": 2.2242980532315273, "grad_norm": 7.3103203773498535, "learning_rate": 4.487668218341119e-06, "loss": 0.47556028, "memory(GiB)": 34.88, "step": 82150, "train_speed(iter/s)": 0.41086 }, { "acc": 0.93623581, "epoch": 2.224433433514743, "grad_norm": 5.04566764831543, "learning_rate": 4.487111609529381e-06, "loss": 0.29948254, "memory(GiB)": 34.88, "step": 82155, "train_speed(iter/s)": 0.410862 }, { "acc": 0.91545296, "epoch": 2.2245688137979585, "grad_norm": 8.773289680480957, "learning_rate": 4.486555007148843e-06, "loss": 0.49065504, "memory(GiB)": 34.88, "step": 82160, "train_speed(iter/s)": 0.410863 }, { "acc": 0.90977869, "epoch": 2.224704194081174, "grad_norm": 6.83109188079834, "learning_rate": 4.485998411206474e-06, "loss": 0.47156453, "memory(GiB)": 34.88, "step": 82165, "train_speed(iter/s)": 0.410864 }, { "acc": 0.9238904, "epoch": 2.2248395743643896, "grad_norm": 5.688207626342773, "learning_rate": 4.48544182170925e-06, "loss": 0.42886915, "memory(GiB)": 34.88, "step": 82170, "train_speed(iter/s)": 0.410865 }, { "acc": 0.90532026, "epoch": 2.224974954647605, "grad_norm": 13.202686309814453, "learning_rate": 4.484885238664141e-06, "loss": 0.57691927, "memory(GiB)": 34.88, "step": 82175, "train_speed(iter/s)": 0.410866 }, { "acc": 0.9187499, "epoch": 2.2251103349308208, "grad_norm": 6.9333577156066895, "learning_rate": 4.4843286620781206e-06, "loss": 0.46186247, "memory(GiB)": 34.88, "step": 82180, "train_speed(iter/s)": 0.410868 }, { "acc": 0.92810326, "epoch": 2.225245715214036, "grad_norm": 8.30029582977295, "learning_rate": 4.483772091958161e-06, "loss": 0.39216847, "memory(GiB)": 34.88, "step": 82185, "train_speed(iter/s)": 0.410869 }, { "acc": 0.91937771, "epoch": 2.225381095497252, "grad_norm": 6.210842132568359, "learning_rate": 4.483215528311232e-06, "loss": 0.36178579, "memory(GiB)": 34.88, "step": 82190, "train_speed(iter/s)": 0.41087 }, { "acc": 0.93521376, "epoch": 2.2255164757804673, "grad_norm": 6.977855205535889, "learning_rate": 4.482658971144309e-06, "loss": 0.33950024, "memory(GiB)": 34.88, "step": 82195, "train_speed(iter/s)": 0.410872 }, { "acc": 0.93141079, "epoch": 2.225651856063683, "grad_norm": 4.48344612121582, "learning_rate": 4.482102420464362e-06, "loss": 0.37313399, "memory(GiB)": 34.88, "step": 82200, "train_speed(iter/s)": 0.410873 }, { "acc": 0.92953949, "epoch": 2.2257872363468985, "grad_norm": 7.893222808837891, "learning_rate": 4.481545876278364e-06, "loss": 0.34993119, "memory(GiB)": 34.88, "step": 82205, "train_speed(iter/s)": 0.410874 }, { "acc": 0.92906666, "epoch": 2.225922616630114, "grad_norm": 7.58915376663208, "learning_rate": 4.480989338593284e-06, "loss": 0.39978192, "memory(GiB)": 34.88, "step": 82210, "train_speed(iter/s)": 0.410876 }, { "acc": 0.91142979, "epoch": 2.2260579969133296, "grad_norm": 5.648275375366211, "learning_rate": 4.480432807416099e-06, "loss": 0.44849606, "memory(GiB)": 34.88, "step": 82215, "train_speed(iter/s)": 0.410877 }, { "acc": 0.91751289, "epoch": 2.226193377196545, "grad_norm": 7.388442516326904, "learning_rate": 4.479876282753775e-06, "loss": 0.45457659, "memory(GiB)": 34.88, "step": 82220, "train_speed(iter/s)": 0.410878 }, { "acc": 0.92601719, "epoch": 2.226328757479761, "grad_norm": 5.911510467529297, "learning_rate": 4.4793197646132855e-06, "loss": 0.39495542, "memory(GiB)": 34.88, "step": 82225, "train_speed(iter/s)": 0.41088 }, { "acc": 0.91217899, "epoch": 2.226464137762976, "grad_norm": 5.51942253112793, "learning_rate": 4.478763253001606e-06, "loss": 0.50725732, "memory(GiB)": 34.88, "step": 82230, "train_speed(iter/s)": 0.410881 }, { "acc": 0.9287818, "epoch": 2.226599518046192, "grad_norm": 4.2015275955200195, "learning_rate": 4.478206747925702e-06, "loss": 0.41616879, "memory(GiB)": 34.88, "step": 82235, "train_speed(iter/s)": 0.410882 }, { "acc": 0.90414467, "epoch": 2.2267348983294073, "grad_norm": 8.466815948486328, "learning_rate": 4.477650249392548e-06, "loss": 0.55331511, "memory(GiB)": 34.88, "step": 82240, "train_speed(iter/s)": 0.410883 }, { "acc": 0.9049509, "epoch": 2.2268702786126227, "grad_norm": 5.976645469665527, "learning_rate": 4.4770937574091135e-06, "loss": 0.58517542, "memory(GiB)": 34.88, "step": 82245, "train_speed(iter/s)": 0.410885 }, { "acc": 0.92393856, "epoch": 2.2270056588958385, "grad_norm": 8.089459419250488, "learning_rate": 4.4765372719823715e-06, "loss": 0.47965832, "memory(GiB)": 34.88, "step": 82250, "train_speed(iter/s)": 0.410886 }, { "acc": 0.93257303, "epoch": 2.227141039179054, "grad_norm": 7.03320837020874, "learning_rate": 4.475980793119292e-06, "loss": 0.34713366, "memory(GiB)": 34.88, "step": 82255, "train_speed(iter/s)": 0.410888 }, { "acc": 0.9225563, "epoch": 2.2272764194622696, "grad_norm": 8.55269718170166, "learning_rate": 4.475424320826846e-06, "loss": 0.45235472, "memory(GiB)": 34.88, "step": 82260, "train_speed(iter/s)": 0.410889 }, { "acc": 0.92066784, "epoch": 2.227411799745485, "grad_norm": 7.282350540161133, "learning_rate": 4.474867855112006e-06, "loss": 0.43185844, "memory(GiB)": 34.88, "step": 82265, "train_speed(iter/s)": 0.41089 }, { "acc": 0.93459263, "epoch": 2.227547180028701, "grad_norm": 6.860389232635498, "learning_rate": 4.4743113959817404e-06, "loss": 0.37159538, "memory(GiB)": 34.88, "step": 82270, "train_speed(iter/s)": 0.410891 }, { "acc": 0.93307781, "epoch": 2.227682560311916, "grad_norm": 5.438589096069336, "learning_rate": 4.473754943443022e-06, "loss": 0.38763804, "memory(GiB)": 34.88, "step": 82275, "train_speed(iter/s)": 0.410893 }, { "acc": 0.92715149, "epoch": 2.2278179405951315, "grad_norm": 8.98814582824707, "learning_rate": 4.473198497502819e-06, "loss": 0.4314395, "memory(GiB)": 34.88, "step": 82280, "train_speed(iter/s)": 0.410894 }, { "acc": 0.92941751, "epoch": 2.2279533208783473, "grad_norm": 5.006512641906738, "learning_rate": 4.472642058168105e-06, "loss": 0.39545021, "memory(GiB)": 34.88, "step": 82285, "train_speed(iter/s)": 0.410895 }, { "acc": 0.91055222, "epoch": 2.2280887011615627, "grad_norm": 7.955480098724365, "learning_rate": 4.472085625445847e-06, "loss": 0.41460271, "memory(GiB)": 34.88, "step": 82290, "train_speed(iter/s)": 0.410897 }, { "acc": 0.92432117, "epoch": 2.2282240814447785, "grad_norm": 7.814107894897461, "learning_rate": 4.471529199343018e-06, "loss": 0.45663867, "memory(GiB)": 34.88, "step": 82295, "train_speed(iter/s)": 0.410898 }, { "acc": 0.92360764, "epoch": 2.228359461727994, "grad_norm": 5.558949947357178, "learning_rate": 4.470972779866589e-06, "loss": 0.38615346, "memory(GiB)": 34.88, "step": 82300, "train_speed(iter/s)": 0.410899 }, { "acc": 0.91647615, "epoch": 2.2284948420112096, "grad_norm": 15.37148380279541, "learning_rate": 4.470416367023527e-06, "loss": 0.49631457, "memory(GiB)": 34.88, "step": 82305, "train_speed(iter/s)": 0.410901 }, { "acc": 0.91043243, "epoch": 2.228630222294425, "grad_norm": 5.96169900894165, "learning_rate": 4.469859960820807e-06, "loss": 0.48814363, "memory(GiB)": 34.88, "step": 82310, "train_speed(iter/s)": 0.410902 }, { "acc": 0.89935446, "epoch": 2.228765602577641, "grad_norm": 6.98161506652832, "learning_rate": 4.469303561265394e-06, "loss": 0.5144846, "memory(GiB)": 34.88, "step": 82315, "train_speed(iter/s)": 0.410904 }, { "acc": 0.91814775, "epoch": 2.228900982860856, "grad_norm": 5.560911178588867, "learning_rate": 4.4687471683642615e-06, "loss": 0.45823712, "memory(GiB)": 34.88, "step": 82320, "train_speed(iter/s)": 0.410905 }, { "acc": 0.93687687, "epoch": 2.2290363631440715, "grad_norm": 3.4360828399658203, "learning_rate": 4.4681907821243754e-06, "loss": 0.35269544, "memory(GiB)": 34.88, "step": 82325, "train_speed(iter/s)": 0.410906 }, { "acc": 0.9337038, "epoch": 2.2291717434272873, "grad_norm": 6.521669864654541, "learning_rate": 4.4676344025527105e-06, "loss": 0.36522219, "memory(GiB)": 34.88, "step": 82330, "train_speed(iter/s)": 0.410908 }, { "acc": 0.91443129, "epoch": 2.2293071237105027, "grad_norm": 8.73333740234375, "learning_rate": 4.467078029656234e-06, "loss": 0.4698123, "memory(GiB)": 34.88, "step": 82335, "train_speed(iter/s)": 0.410909 }, { "acc": 0.91380005, "epoch": 2.2294425039937185, "grad_norm": 18.997207641601562, "learning_rate": 4.466521663441915e-06, "loss": 0.46454492, "memory(GiB)": 34.88, "step": 82340, "train_speed(iter/s)": 0.41091 }, { "acc": 0.93498592, "epoch": 2.229577884276934, "grad_norm": 13.479571342468262, "learning_rate": 4.465965303916725e-06, "loss": 0.352195, "memory(GiB)": 34.88, "step": 82345, "train_speed(iter/s)": 0.410912 }, { "acc": 0.9144125, "epoch": 2.2297132645601496, "grad_norm": 10.815037727355957, "learning_rate": 4.465408951087632e-06, "loss": 0.43608961, "memory(GiB)": 34.88, "step": 82350, "train_speed(iter/s)": 0.410913 }, { "acc": 0.92239714, "epoch": 2.229848644843365, "grad_norm": 4.946183681488037, "learning_rate": 4.4648526049616075e-06, "loss": 0.34341564, "memory(GiB)": 34.88, "step": 82355, "train_speed(iter/s)": 0.410914 }, { "acc": 0.91912327, "epoch": 2.229984025126581, "grad_norm": 5.05511474609375, "learning_rate": 4.464296265545617e-06, "loss": 0.46885481, "memory(GiB)": 34.88, "step": 82360, "train_speed(iter/s)": 0.410916 }, { "acc": 0.93337383, "epoch": 2.230119405409796, "grad_norm": 7.37878942489624, "learning_rate": 4.463739932846633e-06, "loss": 0.36569982, "memory(GiB)": 34.88, "step": 82365, "train_speed(iter/s)": 0.410917 }, { "acc": 0.91032228, "epoch": 2.2302547856930115, "grad_norm": 21.072158813476562, "learning_rate": 4.463183606871623e-06, "loss": 0.50843349, "memory(GiB)": 34.88, "step": 82370, "train_speed(iter/s)": 0.410918 }, { "acc": 0.92340889, "epoch": 2.2303901659762273, "grad_norm": 5.878529071807861, "learning_rate": 4.4626272876275555e-06, "loss": 0.41595039, "memory(GiB)": 34.88, "step": 82375, "train_speed(iter/s)": 0.410919 }, { "acc": 0.92875175, "epoch": 2.2305255462594427, "grad_norm": 9.295400619506836, "learning_rate": 4.462070975121402e-06, "loss": 0.40609884, "memory(GiB)": 34.88, "step": 82380, "train_speed(iter/s)": 0.410921 }, { "acc": 0.91730356, "epoch": 2.2306609265426585, "grad_norm": 13.983956336975098, "learning_rate": 4.46151466936013e-06, "loss": 0.44477024, "memory(GiB)": 34.88, "step": 82385, "train_speed(iter/s)": 0.410922 }, { "acc": 0.9235198, "epoch": 2.230796306825874, "grad_norm": 6.293712615966797, "learning_rate": 4.460958370350709e-06, "loss": 0.3945488, "memory(GiB)": 34.88, "step": 82390, "train_speed(iter/s)": 0.410923 }, { "acc": 0.907798, "epoch": 2.2309316871090896, "grad_norm": 18.921384811401367, "learning_rate": 4.460402078100106e-06, "loss": 0.5003778, "memory(GiB)": 34.88, "step": 82395, "train_speed(iter/s)": 0.410925 }, { "acc": 0.93949699, "epoch": 2.231067067392305, "grad_norm": 3.0944106578826904, "learning_rate": 4.4598457926152925e-06, "loss": 0.27671363, "memory(GiB)": 34.88, "step": 82400, "train_speed(iter/s)": 0.410926 }, { "acc": 0.90078669, "epoch": 2.2312024476755203, "grad_norm": 10.959786415100098, "learning_rate": 4.459289513903233e-06, "loss": 0.53888116, "memory(GiB)": 34.88, "step": 82405, "train_speed(iter/s)": 0.410928 }, { "acc": 0.90493431, "epoch": 2.231337827958736, "grad_norm": 9.932648658752441, "learning_rate": 4.458733241970899e-06, "loss": 0.56479454, "memory(GiB)": 34.88, "step": 82410, "train_speed(iter/s)": 0.410929 }, { "acc": 0.9234127, "epoch": 2.2314732082419515, "grad_norm": 14.826800346374512, "learning_rate": 4.4581769768252585e-06, "loss": 0.4283741, "memory(GiB)": 34.88, "step": 82415, "train_speed(iter/s)": 0.410931 }, { "acc": 0.91462727, "epoch": 2.2316085885251673, "grad_norm": 14.95166301727295, "learning_rate": 4.457620718473279e-06, "loss": 0.44516659, "memory(GiB)": 34.88, "step": 82420, "train_speed(iter/s)": 0.410932 }, { "acc": 0.93877621, "epoch": 2.2317439688083827, "grad_norm": 5.325925827026367, "learning_rate": 4.457064466921931e-06, "loss": 0.37708259, "memory(GiB)": 34.88, "step": 82425, "train_speed(iter/s)": 0.410933 }, { "acc": 0.92780523, "epoch": 2.2318793490915985, "grad_norm": 7.046332836151123, "learning_rate": 4.456508222178178e-06, "loss": 0.38078361, "memory(GiB)": 34.88, "step": 82430, "train_speed(iter/s)": 0.410935 }, { "acc": 0.9436491, "epoch": 2.232014729374814, "grad_norm": 3.743792772293091, "learning_rate": 4.4559519842489935e-06, "loss": 0.2820636, "memory(GiB)": 34.88, "step": 82435, "train_speed(iter/s)": 0.410936 }, { "acc": 0.92004642, "epoch": 2.232150109658029, "grad_norm": 9.811445236206055, "learning_rate": 4.455395753141343e-06, "loss": 0.54085202, "memory(GiB)": 34.88, "step": 82440, "train_speed(iter/s)": 0.410938 }, { "acc": 0.91080208, "epoch": 2.232285489941245, "grad_norm": 12.510893821716309, "learning_rate": 4.454839528862193e-06, "loss": 0.53170342, "memory(GiB)": 34.88, "step": 82445, "train_speed(iter/s)": 0.410939 }, { "acc": 0.91834526, "epoch": 2.2324208702244603, "grad_norm": 4.228555202484131, "learning_rate": 4.454283311418514e-06, "loss": 0.42002592, "memory(GiB)": 34.88, "step": 82450, "train_speed(iter/s)": 0.41094 }, { "acc": 0.91674175, "epoch": 2.232556250507676, "grad_norm": 18.014375686645508, "learning_rate": 4.45372710081727e-06, "loss": 0.49208603, "memory(GiB)": 34.88, "step": 82455, "train_speed(iter/s)": 0.410942 }, { "acc": 0.91236019, "epoch": 2.2326916307908915, "grad_norm": 10.570287704467773, "learning_rate": 4.453170897065433e-06, "loss": 0.49396572, "memory(GiB)": 34.88, "step": 82460, "train_speed(iter/s)": 0.410943 }, { "acc": 0.93197193, "epoch": 2.2328270110741073, "grad_norm": 7.899987697601318, "learning_rate": 4.452614700169966e-06, "loss": 0.42226562, "memory(GiB)": 34.88, "step": 82465, "train_speed(iter/s)": 0.410944 }, { "acc": 0.91278229, "epoch": 2.2329623913573227, "grad_norm": 10.49050235748291, "learning_rate": 4.4520585101378425e-06, "loss": 0.51545773, "memory(GiB)": 34.88, "step": 82470, "train_speed(iter/s)": 0.410945 }, { "acc": 0.90210133, "epoch": 2.2330977716405385, "grad_norm": 10.440970420837402, "learning_rate": 4.451502326976024e-06, "loss": 0.56898317, "memory(GiB)": 34.88, "step": 82475, "train_speed(iter/s)": 0.410947 }, { "acc": 0.92542763, "epoch": 2.233233151923754, "grad_norm": 4.810869216918945, "learning_rate": 4.450946150691481e-06, "loss": 0.40726099, "memory(GiB)": 34.88, "step": 82480, "train_speed(iter/s)": 0.410948 }, { "acc": 0.91927891, "epoch": 2.233368532206969, "grad_norm": 8.742987632751465, "learning_rate": 4.450389981291181e-06, "loss": 0.38949809, "memory(GiB)": 34.88, "step": 82485, "train_speed(iter/s)": 0.410949 }, { "acc": 0.91466713, "epoch": 2.233503912490185, "grad_norm": 10.47356128692627, "learning_rate": 4.449833818782087e-06, "loss": 0.51580615, "memory(GiB)": 34.88, "step": 82490, "train_speed(iter/s)": 0.410951 }, { "acc": 0.90721855, "epoch": 2.2336392927734003, "grad_norm": 29.295303344726562, "learning_rate": 4.449277663171172e-06, "loss": 0.53017831, "memory(GiB)": 34.88, "step": 82495, "train_speed(iter/s)": 0.410952 }, { "acc": 0.93746643, "epoch": 2.233774673056616, "grad_norm": 9.27782917022705, "learning_rate": 4.448721514465397e-06, "loss": 0.38447404, "memory(GiB)": 34.88, "step": 82500, "train_speed(iter/s)": 0.410954 }, { "acc": 0.92623281, "epoch": 2.2339100533398315, "grad_norm": 9.048773765563965, "learning_rate": 4.448165372671734e-06, "loss": 0.45724077, "memory(GiB)": 34.88, "step": 82505, "train_speed(iter/s)": 0.410955 }, { "acc": 0.90535126, "epoch": 2.2340454336230473, "grad_norm": 10.025725364685059, "learning_rate": 4.447609237797146e-06, "loss": 0.513451, "memory(GiB)": 34.88, "step": 82510, "train_speed(iter/s)": 0.410956 }, { "acc": 0.91481848, "epoch": 2.2341808139062627, "grad_norm": 7.882936477661133, "learning_rate": 4.447053109848603e-06, "loss": 0.5184258, "memory(GiB)": 34.88, "step": 82515, "train_speed(iter/s)": 0.410958 }, { "acc": 0.91210155, "epoch": 2.2343161941894785, "grad_norm": 19.876562118530273, "learning_rate": 4.446496988833069e-06, "loss": 0.50497046, "memory(GiB)": 34.88, "step": 82520, "train_speed(iter/s)": 0.410959 }, { "acc": 0.9141881, "epoch": 2.234451574472694, "grad_norm": 18.150678634643555, "learning_rate": 4.445940874757511e-06, "loss": 0.49758177, "memory(GiB)": 34.88, "step": 82525, "train_speed(iter/s)": 0.410961 }, { "acc": 0.91826639, "epoch": 2.234586954755909, "grad_norm": 6.226418495178223, "learning_rate": 4.445384767628896e-06, "loss": 0.3925215, "memory(GiB)": 34.88, "step": 82530, "train_speed(iter/s)": 0.410962 }, { "acc": 0.90681324, "epoch": 2.234722335039125, "grad_norm": 8.548375129699707, "learning_rate": 4.444828667454189e-06, "loss": 0.50446239, "memory(GiB)": 34.88, "step": 82535, "train_speed(iter/s)": 0.410963 }, { "acc": 0.91993313, "epoch": 2.2348577153223403, "grad_norm": 9.221150398254395, "learning_rate": 4.444272574240359e-06, "loss": 0.45020661, "memory(GiB)": 34.88, "step": 82540, "train_speed(iter/s)": 0.410965 }, { "acc": 0.91698885, "epoch": 2.234993095605556, "grad_norm": 15.108710289001465, "learning_rate": 4.44371648799437e-06, "loss": 0.42573137, "memory(GiB)": 34.88, "step": 82545, "train_speed(iter/s)": 0.410966 }, { "acc": 0.93895454, "epoch": 2.2351284758887715, "grad_norm": 5.25301456451416, "learning_rate": 4.443160408723189e-06, "loss": 0.37403698, "memory(GiB)": 34.88, "step": 82550, "train_speed(iter/s)": 0.410967 }, { "acc": 0.92877417, "epoch": 2.2352638561719873, "grad_norm": 15.28900146484375, "learning_rate": 4.44260433643378e-06, "loss": 0.41788931, "memory(GiB)": 34.88, "step": 82555, "train_speed(iter/s)": 0.410969 }, { "acc": 0.93256416, "epoch": 2.2353992364552027, "grad_norm": 6.915231227874756, "learning_rate": 4.442048271133111e-06, "loss": 0.32038121, "memory(GiB)": 34.88, "step": 82560, "train_speed(iter/s)": 0.41097 }, { "acc": 0.92247639, "epoch": 2.235534616738418, "grad_norm": 9.246369361877441, "learning_rate": 4.441492212828148e-06, "loss": 0.44345946, "memory(GiB)": 34.88, "step": 82565, "train_speed(iter/s)": 0.410972 }, { "acc": 0.92293892, "epoch": 2.235669997021634, "grad_norm": 6.846866607666016, "learning_rate": 4.440936161525856e-06, "loss": 0.47304816, "memory(GiB)": 34.88, "step": 82570, "train_speed(iter/s)": 0.410973 }, { "acc": 0.9178771, "epoch": 2.235805377304849, "grad_norm": 9.25247573852539, "learning_rate": 4.4403801172332005e-06, "loss": 0.43636804, "memory(GiB)": 34.88, "step": 82575, "train_speed(iter/s)": 0.410974 }, { "acc": 0.91726933, "epoch": 2.235940757588065, "grad_norm": 7.551126480102539, "learning_rate": 4.439824079957146e-06, "loss": 0.38205638, "memory(GiB)": 34.88, "step": 82580, "train_speed(iter/s)": 0.410976 }, { "acc": 0.91784487, "epoch": 2.2360761378712803, "grad_norm": 10.071751594543457, "learning_rate": 4.4392680497046616e-06, "loss": 0.48486319, "memory(GiB)": 34.88, "step": 82585, "train_speed(iter/s)": 0.410977 }, { "acc": 0.93022251, "epoch": 2.236211518154496, "grad_norm": 17.559551239013672, "learning_rate": 4.438712026482707e-06, "loss": 0.36440873, "memory(GiB)": 34.88, "step": 82590, "train_speed(iter/s)": 0.410978 }, { "acc": 0.9021328, "epoch": 2.2363468984377115, "grad_norm": 6.755965232849121, "learning_rate": 4.438156010298253e-06, "loss": 0.47241449, "memory(GiB)": 34.88, "step": 82595, "train_speed(iter/s)": 0.410979 }, { "acc": 0.93517971, "epoch": 2.236482278720927, "grad_norm": 9.274999618530273, "learning_rate": 4.437600001158261e-06, "loss": 0.43161263, "memory(GiB)": 34.88, "step": 82600, "train_speed(iter/s)": 0.410981 }, { "acc": 0.90744495, "epoch": 2.2366176590041427, "grad_norm": 8.03695011138916, "learning_rate": 4.4370439990697e-06, "loss": 0.49291339, "memory(GiB)": 34.88, "step": 82605, "train_speed(iter/s)": 0.410982 }, { "acc": 0.92786102, "epoch": 2.236753039287358, "grad_norm": 6.696671009063721, "learning_rate": 4.436488004039532e-06, "loss": 0.39031363, "memory(GiB)": 34.88, "step": 82610, "train_speed(iter/s)": 0.410983 }, { "acc": 0.92646675, "epoch": 2.236888419570574, "grad_norm": 3.257908344268799, "learning_rate": 4.43593201607472e-06, "loss": 0.35427527, "memory(GiB)": 34.88, "step": 82615, "train_speed(iter/s)": 0.410985 }, { "acc": 0.91899357, "epoch": 2.237023799853789, "grad_norm": 15.567931175231934, "learning_rate": 4.435376035182233e-06, "loss": 0.47244902, "memory(GiB)": 34.88, "step": 82620, "train_speed(iter/s)": 0.410986 }, { "acc": 0.91514664, "epoch": 2.237159180137005, "grad_norm": 19.60824203491211, "learning_rate": 4.434820061369034e-06, "loss": 0.54282894, "memory(GiB)": 34.88, "step": 82625, "train_speed(iter/s)": 0.410988 }, { "acc": 0.92406769, "epoch": 2.2372945604202203, "grad_norm": 4.769900798797607, "learning_rate": 4.434264094642088e-06, "loss": 0.41108594, "memory(GiB)": 34.88, "step": 82630, "train_speed(iter/s)": 0.410989 }, { "acc": 0.94341555, "epoch": 2.237429940703436, "grad_norm": 9.910197257995605, "learning_rate": 4.4337081350083585e-06, "loss": 0.29078598, "memory(GiB)": 34.88, "step": 82635, "train_speed(iter/s)": 0.41099 }, { "acc": 0.92395582, "epoch": 2.2375653209866515, "grad_norm": 12.59339714050293, "learning_rate": 4.433152182474812e-06, "loss": 0.45149708, "memory(GiB)": 34.88, "step": 82640, "train_speed(iter/s)": 0.410992 }, { "acc": 0.89924221, "epoch": 2.237700701269867, "grad_norm": 5.0312089920043945, "learning_rate": 4.432596237048411e-06, "loss": 0.62303782, "memory(GiB)": 34.88, "step": 82645, "train_speed(iter/s)": 0.410993 }, { "acc": 0.91968117, "epoch": 2.2378360815530827, "grad_norm": 3.431990146636963, "learning_rate": 4.432040298736121e-06, "loss": 0.37719331, "memory(GiB)": 34.88, "step": 82650, "train_speed(iter/s)": 0.410994 }, { "acc": 0.93301716, "epoch": 2.237971461836298, "grad_norm": 6.799955368041992, "learning_rate": 4.431484367544906e-06, "loss": 0.3351687, "memory(GiB)": 34.88, "step": 82655, "train_speed(iter/s)": 0.410996 }, { "acc": 0.93152819, "epoch": 2.238106842119514, "grad_norm": 5.717663764953613, "learning_rate": 4.430928443481729e-06, "loss": 0.36983507, "memory(GiB)": 34.88, "step": 82660, "train_speed(iter/s)": 0.410997 }, { "acc": 0.9196805, "epoch": 2.238242222402729, "grad_norm": 5.751988410949707, "learning_rate": 4.430372526553556e-06, "loss": 0.48005838, "memory(GiB)": 34.88, "step": 82665, "train_speed(iter/s)": 0.410998 }, { "acc": 0.9230607, "epoch": 2.238377602685945, "grad_norm": 11.615063667297363, "learning_rate": 4.429816616767349e-06, "loss": 0.38126485, "memory(GiB)": 34.88, "step": 82670, "train_speed(iter/s)": 0.411 }, { "acc": 0.93499365, "epoch": 2.2385129829691603, "grad_norm": 17.033580780029297, "learning_rate": 4.429260714130073e-06, "loss": 0.34209991, "memory(GiB)": 34.88, "step": 82675, "train_speed(iter/s)": 0.411001 }, { "acc": 0.93029003, "epoch": 2.238648363252376, "grad_norm": 10.668442726135254, "learning_rate": 4.428704818648692e-06, "loss": 0.34745977, "memory(GiB)": 34.88, "step": 82680, "train_speed(iter/s)": 0.411002 }, { "acc": 0.9386343, "epoch": 2.2387837435355915, "grad_norm": 7.2059807777404785, "learning_rate": 4.428148930330168e-06, "loss": 0.34116421, "memory(GiB)": 34.88, "step": 82685, "train_speed(iter/s)": 0.411004 }, { "acc": 0.91905499, "epoch": 2.238919123818807, "grad_norm": 9.532475471496582, "learning_rate": 4.427593049181468e-06, "loss": 0.50474205, "memory(GiB)": 34.88, "step": 82690, "train_speed(iter/s)": 0.411005 }, { "acc": 0.93749142, "epoch": 2.2390545041020227, "grad_norm": 8.837590217590332, "learning_rate": 4.427037175209552e-06, "loss": 0.31443462, "memory(GiB)": 34.88, "step": 82695, "train_speed(iter/s)": 0.411007 }, { "acc": 0.93231668, "epoch": 2.239189884385238, "grad_norm": 9.707733154296875, "learning_rate": 4.426481308421386e-06, "loss": 0.40404296, "memory(GiB)": 34.88, "step": 82700, "train_speed(iter/s)": 0.411008 }, { "acc": 0.9399437, "epoch": 2.239325264668454, "grad_norm": 10.161911964416504, "learning_rate": 4.425925448823929e-06, "loss": 0.38927765, "memory(GiB)": 34.88, "step": 82705, "train_speed(iter/s)": 0.411009 }, { "acc": 0.92714424, "epoch": 2.239460644951669, "grad_norm": 6.609464645385742, "learning_rate": 4.425369596424149e-06, "loss": 0.41337242, "memory(GiB)": 34.88, "step": 82710, "train_speed(iter/s)": 0.41101 }, { "acc": 0.93062496, "epoch": 2.239596025234885, "grad_norm": 13.611429214477539, "learning_rate": 4.424813751229008e-06, "loss": 0.41867113, "memory(GiB)": 34.88, "step": 82715, "train_speed(iter/s)": 0.411012 }, { "acc": 0.93996954, "epoch": 2.2397314055181003, "grad_norm": 5.574280261993408, "learning_rate": 4.424257913245467e-06, "loss": 0.33355217, "memory(GiB)": 34.88, "step": 82720, "train_speed(iter/s)": 0.411013 }, { "acc": 0.92048426, "epoch": 2.2398667858013157, "grad_norm": 6.704672336578369, "learning_rate": 4.423702082480492e-06, "loss": 0.46421576, "memory(GiB)": 34.88, "step": 82725, "train_speed(iter/s)": 0.411015 }, { "acc": 0.94015589, "epoch": 2.2400021660845315, "grad_norm": 25.28123664855957, "learning_rate": 4.423146258941044e-06, "loss": 0.33617356, "memory(GiB)": 34.88, "step": 82730, "train_speed(iter/s)": 0.411016 }, { "acc": 0.92757797, "epoch": 2.240137546367747, "grad_norm": 8.760738372802734, "learning_rate": 4.422590442634087e-06, "loss": 0.36971059, "memory(GiB)": 34.88, "step": 82735, "train_speed(iter/s)": 0.411017 }, { "acc": 0.92171898, "epoch": 2.2402729266509627, "grad_norm": 10.50013542175293, "learning_rate": 4.422034633566581e-06, "loss": 0.43056054, "memory(GiB)": 34.88, "step": 82740, "train_speed(iter/s)": 0.411019 }, { "acc": 0.91632881, "epoch": 2.240408306934178, "grad_norm": 4.958316802978516, "learning_rate": 4.4214788317454905e-06, "loss": 0.48393192, "memory(GiB)": 34.88, "step": 82745, "train_speed(iter/s)": 0.41102 }, { "acc": 0.92653847, "epoch": 2.240543687217394, "grad_norm": 5.364532470703125, "learning_rate": 4.420923037177779e-06, "loss": 0.3544987, "memory(GiB)": 34.88, "step": 82750, "train_speed(iter/s)": 0.411021 }, { "acc": 0.92938118, "epoch": 2.240679067500609, "grad_norm": 11.31584644317627, "learning_rate": 4.420367249870406e-06, "loss": 0.35044489, "memory(GiB)": 34.88, "step": 82755, "train_speed(iter/s)": 0.411023 }, { "acc": 0.93050642, "epoch": 2.2408144477838245, "grad_norm": 7.498960494995117, "learning_rate": 4.4198114698303375e-06, "loss": 0.30377827, "memory(GiB)": 34.88, "step": 82760, "train_speed(iter/s)": 0.411024 }, { "acc": 0.92568111, "epoch": 2.2409498280670404, "grad_norm": 12.460972785949707, "learning_rate": 4.419255697064534e-06, "loss": 0.41720543, "memory(GiB)": 34.88, "step": 82765, "train_speed(iter/s)": 0.411025 }, { "acc": 0.94111977, "epoch": 2.2410852083502557, "grad_norm": 14.016722679138184, "learning_rate": 4.418699931579957e-06, "loss": 0.37171731, "memory(GiB)": 34.88, "step": 82770, "train_speed(iter/s)": 0.411026 }, { "acc": 0.92745171, "epoch": 2.2412205886334715, "grad_norm": 12.138622283935547, "learning_rate": 4.41814417338357e-06, "loss": 0.31434717, "memory(GiB)": 34.88, "step": 82775, "train_speed(iter/s)": 0.411028 }, { "acc": 0.92077103, "epoch": 2.241355968916687, "grad_norm": 10.530192375183105, "learning_rate": 4.417588422482332e-06, "loss": 0.41090717, "memory(GiB)": 34.88, "step": 82780, "train_speed(iter/s)": 0.411029 }, { "acc": 0.91630764, "epoch": 2.2414913491999027, "grad_norm": 10.531831741333008, "learning_rate": 4.4170326788832084e-06, "loss": 0.46855807, "memory(GiB)": 34.88, "step": 82785, "train_speed(iter/s)": 0.41103 }, { "acc": 0.89982395, "epoch": 2.241626729483118, "grad_norm": 11.939262390136719, "learning_rate": 4.416476942593159e-06, "loss": 0.53074293, "memory(GiB)": 34.88, "step": 82790, "train_speed(iter/s)": 0.411031 }, { "acc": 0.92785797, "epoch": 2.241762109766334, "grad_norm": 5.944411754608154, "learning_rate": 4.415921213619146e-06, "loss": 0.3480423, "memory(GiB)": 34.88, "step": 82795, "train_speed(iter/s)": 0.411033 }, { "acc": 0.91993389, "epoch": 2.241897490049549, "grad_norm": 12.29091739654541, "learning_rate": 4.415365491968131e-06, "loss": 0.41144857, "memory(GiB)": 34.88, "step": 82800, "train_speed(iter/s)": 0.411034 }, { "acc": 0.91251602, "epoch": 2.2420328703327645, "grad_norm": 7.3873701095581055, "learning_rate": 4.4148097776470754e-06, "loss": 0.49253292, "memory(GiB)": 34.88, "step": 82805, "train_speed(iter/s)": 0.411035 }, { "acc": 0.91814041, "epoch": 2.2421682506159804, "grad_norm": 4.672815322875977, "learning_rate": 4.41425407066294e-06, "loss": 0.42997169, "memory(GiB)": 34.88, "step": 82810, "train_speed(iter/s)": 0.411037 }, { "acc": 0.93878212, "epoch": 2.2423036308991957, "grad_norm": 3.496131181716919, "learning_rate": 4.4136983710226885e-06, "loss": 0.33775563, "memory(GiB)": 34.88, "step": 82815, "train_speed(iter/s)": 0.411038 }, { "acc": 0.90385532, "epoch": 2.2424390111824115, "grad_norm": 25.42024803161621, "learning_rate": 4.413142678733281e-06, "loss": 0.45839281, "memory(GiB)": 34.88, "step": 82820, "train_speed(iter/s)": 0.411039 }, { "acc": 0.93199272, "epoch": 2.242574391465627, "grad_norm": 6.378608226776123, "learning_rate": 4.412586993801676e-06, "loss": 0.36946328, "memory(GiB)": 34.88, "step": 82825, "train_speed(iter/s)": 0.411041 }, { "acc": 0.9199934, "epoch": 2.2427097717488427, "grad_norm": 12.541157722473145, "learning_rate": 4.412031316234838e-06, "loss": 0.41621237, "memory(GiB)": 34.88, "step": 82830, "train_speed(iter/s)": 0.411042 }, { "acc": 0.9058095, "epoch": 2.242845152032058, "grad_norm": 6.714906692504883, "learning_rate": 4.411475646039725e-06, "loss": 0.46366777, "memory(GiB)": 34.88, "step": 82835, "train_speed(iter/s)": 0.411043 }, { "acc": 0.93964958, "epoch": 2.242980532315274, "grad_norm": 12.086024284362793, "learning_rate": 4.410919983223301e-06, "loss": 0.33838537, "memory(GiB)": 34.88, "step": 82840, "train_speed(iter/s)": 0.411045 }, { "acc": 0.9135663, "epoch": 2.243115912598489, "grad_norm": 8.37239933013916, "learning_rate": 4.410364327792525e-06, "loss": 0.47286272, "memory(GiB)": 34.88, "step": 82845, "train_speed(iter/s)": 0.411046 }, { "acc": 0.9081851, "epoch": 2.2432512928817046, "grad_norm": 46.394351959228516, "learning_rate": 4.409808679754358e-06, "loss": 0.54554367, "memory(GiB)": 34.88, "step": 82850, "train_speed(iter/s)": 0.411047 }, { "acc": 0.92586393, "epoch": 2.2433866731649204, "grad_norm": 10.751797676086426, "learning_rate": 4.4092530391157605e-06, "loss": 0.41608963, "memory(GiB)": 34.88, "step": 82855, "train_speed(iter/s)": 0.411048 }, { "acc": 0.9462101, "epoch": 2.2435220534481357, "grad_norm": 6.202708721160889, "learning_rate": 4.408697405883692e-06, "loss": 0.32218029, "memory(GiB)": 34.88, "step": 82860, "train_speed(iter/s)": 0.41105 }, { "acc": 0.91955204, "epoch": 2.2436574337313515, "grad_norm": 8.050169944763184, "learning_rate": 4.408141780065115e-06, "loss": 0.34301052, "memory(GiB)": 34.88, "step": 82865, "train_speed(iter/s)": 0.411051 }, { "acc": 0.93572845, "epoch": 2.243792814014567, "grad_norm": 5.0704731941223145, "learning_rate": 4.407586161666988e-06, "loss": 0.33060808, "memory(GiB)": 34.88, "step": 82870, "train_speed(iter/s)": 0.411052 }, { "acc": 0.915623, "epoch": 2.2439281942977827, "grad_norm": 22.179912567138672, "learning_rate": 4.4070305506962725e-06, "loss": 0.54867153, "memory(GiB)": 34.88, "step": 82875, "train_speed(iter/s)": 0.411053 }, { "acc": 0.90607595, "epoch": 2.244063574580998, "grad_norm": 7.477663993835449, "learning_rate": 4.4064749471599265e-06, "loss": 0.59720359, "memory(GiB)": 34.88, "step": 82880, "train_speed(iter/s)": 0.411055 }, { "acc": 0.93589945, "epoch": 2.2441989548642134, "grad_norm": 9.751168251037598, "learning_rate": 4.405919351064913e-06, "loss": 0.34307866, "memory(GiB)": 34.88, "step": 82885, "train_speed(iter/s)": 0.411056 }, { "acc": 0.92643414, "epoch": 2.244334335147429, "grad_norm": 5.5154924392700195, "learning_rate": 4.40536376241819e-06, "loss": 0.42201385, "memory(GiB)": 34.88, "step": 82890, "train_speed(iter/s)": 0.411057 }, { "acc": 0.91762505, "epoch": 2.2444697154306446, "grad_norm": 6.1244049072265625, "learning_rate": 4.404808181226717e-06, "loss": 0.4297596, "memory(GiB)": 34.88, "step": 82895, "train_speed(iter/s)": 0.411059 }, { "acc": 0.93329468, "epoch": 2.2446050957138604, "grad_norm": 8.952876091003418, "learning_rate": 4.404252607497457e-06, "loss": 0.36833339, "memory(GiB)": 34.88, "step": 82900, "train_speed(iter/s)": 0.41106 }, { "acc": 0.9173254, "epoch": 2.2447404759970757, "grad_norm": 7.883129596710205, "learning_rate": 4.403697041237365e-06, "loss": 0.45215473, "memory(GiB)": 34.88, "step": 82905, "train_speed(iter/s)": 0.411061 }, { "acc": 0.93073673, "epoch": 2.2448758562802915, "grad_norm": 10.51243782043457, "learning_rate": 4.403141482453405e-06, "loss": 0.30267861, "memory(GiB)": 34.88, "step": 82910, "train_speed(iter/s)": 0.411062 }, { "acc": 0.92051754, "epoch": 2.245011236563507, "grad_norm": 20.981212615966797, "learning_rate": 4.402585931152532e-06, "loss": 0.48048744, "memory(GiB)": 34.88, "step": 82915, "train_speed(iter/s)": 0.411064 }, { "acc": 0.92511692, "epoch": 2.2451466168467222, "grad_norm": 9.962798118591309, "learning_rate": 4.402030387341709e-06, "loss": 0.41278839, "memory(GiB)": 34.88, "step": 82920, "train_speed(iter/s)": 0.411065 }, { "acc": 0.91620531, "epoch": 2.245281997129938, "grad_norm": 10.56201171875, "learning_rate": 4.4014748510278924e-06, "loss": 0.43207989, "memory(GiB)": 34.88, "step": 82925, "train_speed(iter/s)": 0.411066 }, { "acc": 0.92560349, "epoch": 2.2454173774131534, "grad_norm": 8.524900436401367, "learning_rate": 4.400919322218044e-06, "loss": 0.47693858, "memory(GiB)": 34.88, "step": 82930, "train_speed(iter/s)": 0.411068 }, { "acc": 0.92688532, "epoch": 2.245552757696369, "grad_norm": 10.793909072875977, "learning_rate": 4.400363800919122e-06, "loss": 0.37976437, "memory(GiB)": 34.88, "step": 82935, "train_speed(iter/s)": 0.411069 }, { "acc": 0.93077297, "epoch": 2.2456881379795846, "grad_norm": 12.464564323425293, "learning_rate": 4.399808287138084e-06, "loss": 0.34170809, "memory(GiB)": 34.88, "step": 82940, "train_speed(iter/s)": 0.41107 }, { "acc": 0.93457603, "epoch": 2.2458235182628004, "grad_norm": 3.5985774993896484, "learning_rate": 4.3992527808818915e-06, "loss": 0.37168081, "memory(GiB)": 34.88, "step": 82945, "train_speed(iter/s)": 0.411071 }, { "acc": 0.92524567, "epoch": 2.2459588985460157, "grad_norm": 6.475371837615967, "learning_rate": 4.3986972821575e-06, "loss": 0.42978182, "memory(GiB)": 34.88, "step": 82950, "train_speed(iter/s)": 0.411073 }, { "acc": 0.93363667, "epoch": 2.2460942788292315, "grad_norm": 7.528588771820068, "learning_rate": 4.398141790971872e-06, "loss": 0.39778476, "memory(GiB)": 34.88, "step": 82955, "train_speed(iter/s)": 0.411074 }, { "acc": 0.92889576, "epoch": 2.246229659112447, "grad_norm": 6.872812271118164, "learning_rate": 4.397586307331963e-06, "loss": 0.36197796, "memory(GiB)": 34.88, "step": 82960, "train_speed(iter/s)": 0.411075 }, { "acc": 0.9211813, "epoch": 2.2463650393956622, "grad_norm": 15.43470287322998, "learning_rate": 4.397030831244733e-06, "loss": 0.4524229, "memory(GiB)": 34.88, "step": 82965, "train_speed(iter/s)": 0.411077 }, { "acc": 0.92476425, "epoch": 2.246500419678878, "grad_norm": 3.7426507472991943, "learning_rate": 4.39647536271714e-06, "loss": 0.44967704, "memory(GiB)": 34.88, "step": 82970, "train_speed(iter/s)": 0.411078 }, { "acc": 0.91430225, "epoch": 2.2466357999620934, "grad_norm": 9.748392105102539, "learning_rate": 4.395919901756143e-06, "loss": 0.47079678, "memory(GiB)": 34.88, "step": 82975, "train_speed(iter/s)": 0.411079 }, { "acc": 0.9119051, "epoch": 2.246771180245309, "grad_norm": 10.419803619384766, "learning_rate": 4.3953644483687e-06, "loss": 0.45621629, "memory(GiB)": 34.88, "step": 82980, "train_speed(iter/s)": 0.411081 }, { "acc": 0.92411633, "epoch": 2.2469065605285246, "grad_norm": 5.715958595275879, "learning_rate": 4.3948090025617684e-06, "loss": 0.37771177, "memory(GiB)": 34.88, "step": 82985, "train_speed(iter/s)": 0.411082 }, { "acc": 0.93087749, "epoch": 2.2470419408117404, "grad_norm": 9.471136093139648, "learning_rate": 4.3942535643423065e-06, "loss": 0.34812593, "memory(GiB)": 34.88, "step": 82990, "train_speed(iter/s)": 0.411084 }, { "acc": 0.92604475, "epoch": 2.2471773210949557, "grad_norm": 5.330924987792969, "learning_rate": 4.3936981337172724e-06, "loss": 0.36260219, "memory(GiB)": 34.88, "step": 82995, "train_speed(iter/s)": 0.411085 }, { "acc": 0.9211772, "epoch": 2.2473127013781715, "grad_norm": 13.443857192993164, "learning_rate": 4.393142710693626e-06, "loss": 0.39921238, "memory(GiB)": 34.88, "step": 83000, "train_speed(iter/s)": 0.411086 }, { "acc": 0.92424622, "epoch": 2.247448081661387, "grad_norm": 18.6680850982666, "learning_rate": 4.392587295278319e-06, "loss": 0.39248259, "memory(GiB)": 34.88, "step": 83005, "train_speed(iter/s)": 0.411088 }, { "acc": 0.92538223, "epoch": 2.2475834619446022, "grad_norm": 5.385828018188477, "learning_rate": 4.392031887478316e-06, "loss": 0.40929551, "memory(GiB)": 34.88, "step": 83010, "train_speed(iter/s)": 0.411089 }, { "acc": 0.91360979, "epoch": 2.247718842227818, "grad_norm": 10.669763565063477, "learning_rate": 4.3914764873005715e-06, "loss": 0.50393267, "memory(GiB)": 34.88, "step": 83015, "train_speed(iter/s)": 0.41109 }, { "acc": 0.93905506, "epoch": 2.2478542225110334, "grad_norm": 5.533145904541016, "learning_rate": 4.390921094752043e-06, "loss": 0.39662118, "memory(GiB)": 34.88, "step": 83020, "train_speed(iter/s)": 0.411092 }, { "acc": 0.90341597, "epoch": 2.247989602794249, "grad_norm": 11.929401397705078, "learning_rate": 4.390365709839689e-06, "loss": 0.65749722, "memory(GiB)": 34.88, "step": 83025, "train_speed(iter/s)": 0.411093 }, { "acc": 0.91006889, "epoch": 2.2481249830774646, "grad_norm": 11.944417953491211, "learning_rate": 4.3898103325704645e-06, "loss": 0.43948269, "memory(GiB)": 34.88, "step": 83030, "train_speed(iter/s)": 0.411094 }, { "acc": 0.93351517, "epoch": 2.2482603633606804, "grad_norm": 4.633838653564453, "learning_rate": 4.389254962951329e-06, "loss": 0.376986, "memory(GiB)": 34.88, "step": 83035, "train_speed(iter/s)": 0.411096 }, { "acc": 0.91474762, "epoch": 2.2483957436438957, "grad_norm": 6.405524253845215, "learning_rate": 4.388699600989238e-06, "loss": 0.55944171, "memory(GiB)": 34.88, "step": 83040, "train_speed(iter/s)": 0.411097 }, { "acc": 0.9166338, "epoch": 2.248531123927111, "grad_norm": 9.581442832946777, "learning_rate": 4.3881442466911486e-06, "loss": 0.39150066, "memory(GiB)": 34.88, "step": 83045, "train_speed(iter/s)": 0.411098 }, { "acc": 0.93333931, "epoch": 2.248666504210327, "grad_norm": 14.182465553283691, "learning_rate": 4.38758890006402e-06, "loss": 0.3965014, "memory(GiB)": 34.88, "step": 83050, "train_speed(iter/s)": 0.411099 }, { "acc": 0.92940083, "epoch": 2.2488018844935422, "grad_norm": 11.31686019897461, "learning_rate": 4.387033561114806e-06, "loss": 0.40826836, "memory(GiB)": 34.88, "step": 83055, "train_speed(iter/s)": 0.4111 }, { "acc": 0.9009222, "epoch": 2.248937264776758, "grad_norm": 6.110938549041748, "learning_rate": 4.386478229850465e-06, "loss": 0.53391523, "memory(GiB)": 34.88, "step": 83060, "train_speed(iter/s)": 0.411102 }, { "acc": 0.91885815, "epoch": 2.2490726450599734, "grad_norm": 12.409886360168457, "learning_rate": 4.385922906277954e-06, "loss": 0.41713767, "memory(GiB)": 34.88, "step": 83065, "train_speed(iter/s)": 0.411103 }, { "acc": 0.92964096, "epoch": 2.249208025343189, "grad_norm": 18.653108596801758, "learning_rate": 4.385367590404229e-06, "loss": 0.38157723, "memory(GiB)": 34.88, "step": 83070, "train_speed(iter/s)": 0.411105 }, { "acc": 0.92966766, "epoch": 2.2493434056264046, "grad_norm": 8.367267608642578, "learning_rate": 4.3848122822362445e-06, "loss": 0.35935163, "memory(GiB)": 34.88, "step": 83075, "train_speed(iter/s)": 0.411106 }, { "acc": 0.94219151, "epoch": 2.24947878590962, "grad_norm": 3.642040252685547, "learning_rate": 4.384256981780959e-06, "loss": 0.32389112, "memory(GiB)": 34.88, "step": 83080, "train_speed(iter/s)": 0.411107 }, { "acc": 0.92863731, "epoch": 2.2496141661928357, "grad_norm": 10.164958000183105, "learning_rate": 4.383701689045328e-06, "loss": 0.3673522, "memory(GiB)": 34.88, "step": 83085, "train_speed(iter/s)": 0.411109 }, { "acc": 0.91966457, "epoch": 2.249749546476051, "grad_norm": 8.573655128479004, "learning_rate": 4.383146404036308e-06, "loss": 0.42979584, "memory(GiB)": 34.88, "step": 83090, "train_speed(iter/s)": 0.41111 }, { "acc": 0.94310246, "epoch": 2.249884926759267, "grad_norm": 4.259994029998779, "learning_rate": 4.382591126760855e-06, "loss": 0.28834109, "memory(GiB)": 34.88, "step": 83095, "train_speed(iter/s)": 0.411111 }, { "acc": 0.92661715, "epoch": 2.2500203070424822, "grad_norm": 10.22899055480957, "learning_rate": 4.3820358572259245e-06, "loss": 0.40150299, "memory(GiB)": 34.88, "step": 83100, "train_speed(iter/s)": 0.411113 }, { "acc": 0.92703161, "epoch": 2.250155687325698, "grad_norm": 8.348555564880371, "learning_rate": 4.3814805954384746e-06, "loss": 0.420645, "memory(GiB)": 34.88, "step": 83105, "train_speed(iter/s)": 0.411114 }, { "acc": 0.93027649, "epoch": 2.2502910676089134, "grad_norm": 6.783127307891846, "learning_rate": 4.380925341405458e-06, "loss": 0.35484035, "memory(GiB)": 34.88, "step": 83110, "train_speed(iter/s)": 0.411116 }, { "acc": 0.92468834, "epoch": 2.2504264478921288, "grad_norm": 8.810896873474121, "learning_rate": 4.380370095133831e-06, "loss": 0.44217958, "memory(GiB)": 34.88, "step": 83115, "train_speed(iter/s)": 0.411117 }, { "acc": 0.92415428, "epoch": 2.2505618281753446, "grad_norm": 22.053142547607422, "learning_rate": 4.37981485663055e-06, "loss": 0.41393633, "memory(GiB)": 34.88, "step": 83120, "train_speed(iter/s)": 0.411119 }, { "acc": 0.92622166, "epoch": 2.25069720845856, "grad_norm": 6.570776462554932, "learning_rate": 4.379259625902569e-06, "loss": 0.41729784, "memory(GiB)": 34.88, "step": 83125, "train_speed(iter/s)": 0.41112 }, { "acc": 0.91532784, "epoch": 2.2508325887417757, "grad_norm": 8.893178939819336, "learning_rate": 4.3787044029568455e-06, "loss": 0.46060133, "memory(GiB)": 34.88, "step": 83130, "train_speed(iter/s)": 0.411121 }, { "acc": 0.93486252, "epoch": 2.250967969024991, "grad_norm": 7.997967720031738, "learning_rate": 4.378149187800332e-06, "loss": 0.3654603, "memory(GiB)": 34.88, "step": 83135, "train_speed(iter/s)": 0.411122 }, { "acc": 0.91493587, "epoch": 2.251103349308207, "grad_norm": 7.647066593170166, "learning_rate": 4.377593980439988e-06, "loss": 0.48549576, "memory(GiB)": 34.88, "step": 83140, "train_speed(iter/s)": 0.411124 }, { "acc": 0.92497921, "epoch": 2.2512387295914222, "grad_norm": 9.545475959777832, "learning_rate": 4.3770387808827644e-06, "loss": 0.48356285, "memory(GiB)": 34.88, "step": 83145, "train_speed(iter/s)": 0.411125 }, { "acc": 0.93432751, "epoch": 2.251374109874638, "grad_norm": 6.1652655601501465, "learning_rate": 4.3764835891356185e-06, "loss": 0.37095194, "memory(GiB)": 34.88, "step": 83150, "train_speed(iter/s)": 0.411126 }, { "acc": 0.93118916, "epoch": 2.2515094901578534, "grad_norm": 7.234628677368164, "learning_rate": 4.375928405205503e-06, "loss": 0.35012038, "memory(GiB)": 34.88, "step": 83155, "train_speed(iter/s)": 0.411127 }, { "acc": 0.90608711, "epoch": 2.251644870441069, "grad_norm": 14.48877239227295, "learning_rate": 4.375373229099375e-06, "loss": 0.57985086, "memory(GiB)": 34.88, "step": 83160, "train_speed(iter/s)": 0.411128 }, { "acc": 0.91549358, "epoch": 2.2517802507242846, "grad_norm": 76.2633056640625, "learning_rate": 4.374818060824188e-06, "loss": 0.45801215, "memory(GiB)": 34.88, "step": 83165, "train_speed(iter/s)": 0.41113 }, { "acc": 0.94087162, "epoch": 2.2519156310075, "grad_norm": 5.85650634765625, "learning_rate": 4.374262900386896e-06, "loss": 0.35727208, "memory(GiB)": 34.88, "step": 83170, "train_speed(iter/s)": 0.411131 }, { "acc": 0.9255785, "epoch": 2.2520510112907157, "grad_norm": 5.596423625946045, "learning_rate": 4.3737077477944555e-06, "loss": 0.44380751, "memory(GiB)": 34.88, "step": 83175, "train_speed(iter/s)": 0.411132 }, { "acc": 0.92103539, "epoch": 2.252186391573931, "grad_norm": 8.515499114990234, "learning_rate": 4.373152603053819e-06, "loss": 0.39111476, "memory(GiB)": 34.88, "step": 83180, "train_speed(iter/s)": 0.411133 }, { "acc": 0.92511854, "epoch": 2.252321771857147, "grad_norm": 14.274771690368652, "learning_rate": 4.372597466171942e-06, "loss": 0.44374175, "memory(GiB)": 34.88, "step": 83185, "train_speed(iter/s)": 0.411135 }, { "acc": 0.93423347, "epoch": 2.2524571521403622, "grad_norm": 4.353723049163818, "learning_rate": 4.3720423371557765e-06, "loss": 0.31463323, "memory(GiB)": 34.88, "step": 83190, "train_speed(iter/s)": 0.411136 }, { "acc": 0.92836285, "epoch": 2.252592532423578, "grad_norm": 5.239387035369873, "learning_rate": 4.371487216012279e-06, "loss": 0.39785156, "memory(GiB)": 34.88, "step": 83195, "train_speed(iter/s)": 0.411137 }, { "acc": 0.92076225, "epoch": 2.2527279127067934, "grad_norm": 6.67195463180542, "learning_rate": 4.370932102748403e-06, "loss": 0.44591408, "memory(GiB)": 34.88, "step": 83200, "train_speed(iter/s)": 0.411138 }, { "acc": 0.92667561, "epoch": 2.2528632929900088, "grad_norm": 13.129742622375488, "learning_rate": 4.370376997371102e-06, "loss": 0.37151303, "memory(GiB)": 34.88, "step": 83205, "train_speed(iter/s)": 0.41114 }, { "acc": 0.91501713, "epoch": 2.2529986732732246, "grad_norm": 5.621225357055664, "learning_rate": 4.36982189988733e-06, "loss": 0.40965986, "memory(GiB)": 34.88, "step": 83210, "train_speed(iter/s)": 0.411141 }, { "acc": 0.90772991, "epoch": 2.25313405355644, "grad_norm": 10.048991203308105, "learning_rate": 4.369266810304039e-06, "loss": 0.51492491, "memory(GiB)": 34.88, "step": 83215, "train_speed(iter/s)": 0.411142 }, { "acc": 0.9329895, "epoch": 2.2532694338396557, "grad_norm": 9.319089889526367, "learning_rate": 4.368711728628186e-06, "loss": 0.40011621, "memory(GiB)": 34.88, "step": 83220, "train_speed(iter/s)": 0.411143 }, { "acc": 0.90662193, "epoch": 2.253404814122871, "grad_norm": 11.064453125, "learning_rate": 4.368156654866721e-06, "loss": 0.54480863, "memory(GiB)": 34.88, "step": 83225, "train_speed(iter/s)": 0.411145 }, { "acc": 0.89896002, "epoch": 2.253540194406087, "grad_norm": 9.00970458984375, "learning_rate": 4.3676015890266e-06, "loss": 0.58734818, "memory(GiB)": 34.88, "step": 83230, "train_speed(iter/s)": 0.411146 }, { "acc": 0.93649549, "epoch": 2.2536755746893022, "grad_norm": 6.761488914489746, "learning_rate": 4.367046531114776e-06, "loss": 0.34163642, "memory(GiB)": 34.88, "step": 83235, "train_speed(iter/s)": 0.411147 }, { "acc": 0.92801762, "epoch": 2.2538109549725176, "grad_norm": 3.8714966773986816, "learning_rate": 4.366491481138199e-06, "loss": 0.48309917, "memory(GiB)": 34.88, "step": 83240, "train_speed(iter/s)": 0.411148 }, { "acc": 0.93522797, "epoch": 2.2539463352557334, "grad_norm": 8.210948944091797, "learning_rate": 4.3659364391038265e-06, "loss": 0.31336122, "memory(GiB)": 34.88, "step": 83245, "train_speed(iter/s)": 0.41115 }, { "acc": 0.92553062, "epoch": 2.2540817155389488, "grad_norm": 7.521623134613037, "learning_rate": 4.3653814050186076e-06, "loss": 0.38661931, "memory(GiB)": 34.88, "step": 83250, "train_speed(iter/s)": 0.411151 }, { "acc": 0.9244421, "epoch": 2.2542170958221646, "grad_norm": 7.180239200592041, "learning_rate": 4.3648263788895e-06, "loss": 0.49682298, "memory(GiB)": 34.88, "step": 83255, "train_speed(iter/s)": 0.411152 }, { "acc": 0.92725039, "epoch": 2.25435247610538, "grad_norm": 8.426023483276367, "learning_rate": 4.36427136072345e-06, "loss": 0.44287505, "memory(GiB)": 34.88, "step": 83260, "train_speed(iter/s)": 0.411153 }, { "acc": 0.93531475, "epoch": 2.2544878563885957, "grad_norm": 7.628508567810059, "learning_rate": 4.3637163505274166e-06, "loss": 0.33885858, "memory(GiB)": 34.88, "step": 83265, "train_speed(iter/s)": 0.411155 }, { "acc": 0.92480507, "epoch": 2.254623236671811, "grad_norm": 9.571878433227539, "learning_rate": 4.363161348308349e-06, "loss": 0.40238018, "memory(GiB)": 34.88, "step": 83270, "train_speed(iter/s)": 0.411156 }, { "acc": 0.91982727, "epoch": 2.2547586169550264, "grad_norm": 10.847332000732422, "learning_rate": 4.3626063540732e-06, "loss": 0.46133595, "memory(GiB)": 34.88, "step": 83275, "train_speed(iter/s)": 0.411157 }, { "acc": 0.92154875, "epoch": 2.2548939972382422, "grad_norm": 2.4399590492248535, "learning_rate": 4.362051367828923e-06, "loss": 0.43010273, "memory(GiB)": 34.88, "step": 83280, "train_speed(iter/s)": 0.411159 }, { "acc": 0.92008553, "epoch": 2.2550293775214576, "grad_norm": 11.785202026367188, "learning_rate": 4.361496389582469e-06, "loss": 0.47797174, "memory(GiB)": 34.88, "step": 83285, "train_speed(iter/s)": 0.41116 }, { "acc": 0.90574322, "epoch": 2.2551647578046734, "grad_norm": 20.209505081176758, "learning_rate": 4.3609414193407915e-06, "loss": 0.5165884, "memory(GiB)": 34.88, "step": 83290, "train_speed(iter/s)": 0.411161 }, { "acc": 0.94332991, "epoch": 2.2553001380878888, "grad_norm": 13.592499732971191, "learning_rate": 4.360386457110841e-06, "loss": 0.30975182, "memory(GiB)": 34.88, "step": 83295, "train_speed(iter/s)": 0.411163 }, { "acc": 0.93113174, "epoch": 2.2554355183711046, "grad_norm": 7.938328742980957, "learning_rate": 4.359831502899572e-06, "loss": 0.37950077, "memory(GiB)": 34.88, "step": 83300, "train_speed(iter/s)": 0.411164 }, { "acc": 0.91555614, "epoch": 2.25557089865432, "grad_norm": 8.195066452026367, "learning_rate": 4.359276556713934e-06, "loss": 0.43014984, "memory(GiB)": 34.88, "step": 83305, "train_speed(iter/s)": 0.411165 }, { "acc": 0.91648312, "epoch": 2.2557062789375357, "grad_norm": 9.242599487304688, "learning_rate": 4.358721618560879e-06, "loss": 0.52949286, "memory(GiB)": 34.88, "step": 83310, "train_speed(iter/s)": 0.411167 }, { "acc": 0.91967068, "epoch": 2.255841659220751, "grad_norm": 16.43433380126953, "learning_rate": 4.358166688447359e-06, "loss": 0.45050092, "memory(GiB)": 34.88, "step": 83315, "train_speed(iter/s)": 0.411168 }, { "acc": 0.92222004, "epoch": 2.255977039503967, "grad_norm": 8.650933265686035, "learning_rate": 4.357611766380328e-06, "loss": 0.46151762, "memory(GiB)": 34.88, "step": 83320, "train_speed(iter/s)": 0.411169 }, { "acc": 0.92313461, "epoch": 2.2561124197871822, "grad_norm": 10.546239852905273, "learning_rate": 4.357056852366735e-06, "loss": 0.41231928, "memory(GiB)": 34.88, "step": 83325, "train_speed(iter/s)": 0.411171 }, { "acc": 0.93655529, "epoch": 2.2562478000703976, "grad_norm": 3.861734628677368, "learning_rate": 4.35650194641353e-06, "loss": 0.3196053, "memory(GiB)": 34.88, "step": 83330, "train_speed(iter/s)": 0.411172 }, { "acc": 0.92378922, "epoch": 2.2563831803536134, "grad_norm": 8.306951522827148, "learning_rate": 4.355947048527667e-06, "loss": 0.34844267, "memory(GiB)": 34.88, "step": 83335, "train_speed(iter/s)": 0.411173 }, { "acc": 0.92710056, "epoch": 2.2565185606368288, "grad_norm": 21.47507095336914, "learning_rate": 4.3553921587160965e-06, "loss": 0.38533907, "memory(GiB)": 34.88, "step": 83340, "train_speed(iter/s)": 0.411175 }, { "acc": 0.93370361, "epoch": 2.2566539409200446, "grad_norm": 7.020769119262695, "learning_rate": 4.354837276985768e-06, "loss": 0.29755001, "memory(GiB)": 34.88, "step": 83345, "train_speed(iter/s)": 0.411176 }, { "acc": 0.91012115, "epoch": 2.25678932120326, "grad_norm": 7.088672161102295, "learning_rate": 4.354282403343636e-06, "loss": 0.44841871, "memory(GiB)": 34.88, "step": 83350, "train_speed(iter/s)": 0.411177 }, { "acc": 0.92696972, "epoch": 2.2569247014864757, "grad_norm": 12.890856742858887, "learning_rate": 4.353727537796647e-06, "loss": 0.47715368, "memory(GiB)": 34.88, "step": 83355, "train_speed(iter/s)": 0.411179 }, { "acc": 0.9317421, "epoch": 2.257060081769691, "grad_norm": 13.663185119628906, "learning_rate": 4.353172680351757e-06, "loss": 0.4166008, "memory(GiB)": 34.88, "step": 83360, "train_speed(iter/s)": 0.41118 }, { "acc": 0.91659632, "epoch": 2.2571954620529064, "grad_norm": 7.973715782165527, "learning_rate": 4.352617831015911e-06, "loss": 0.49021921, "memory(GiB)": 34.88, "step": 83365, "train_speed(iter/s)": 0.411181 }, { "acc": 0.93231745, "epoch": 2.2573308423361222, "grad_norm": 6.451838493347168, "learning_rate": 4.352062989796063e-06, "loss": 0.37494397, "memory(GiB)": 34.88, "step": 83370, "train_speed(iter/s)": 0.411182 }, { "acc": 0.91595383, "epoch": 2.2574662226193376, "grad_norm": 7.816139221191406, "learning_rate": 4.351508156699163e-06, "loss": 0.4547718, "memory(GiB)": 34.88, "step": 83375, "train_speed(iter/s)": 0.411184 }, { "acc": 0.92400551, "epoch": 2.2576016029025534, "grad_norm": 12.242517471313477, "learning_rate": 4.3509533317321615e-06, "loss": 0.49742308, "memory(GiB)": 34.88, "step": 83380, "train_speed(iter/s)": 0.411185 }, { "acc": 0.91859798, "epoch": 2.2577369831857688, "grad_norm": 9.87203598022461, "learning_rate": 4.350398514902007e-06, "loss": 0.47676935, "memory(GiB)": 34.88, "step": 83385, "train_speed(iter/s)": 0.411186 }, { "acc": 0.91105862, "epoch": 2.2578723634689846, "grad_norm": 9.67543888092041, "learning_rate": 4.349843706215651e-06, "loss": 0.49678001, "memory(GiB)": 34.88, "step": 83390, "train_speed(iter/s)": 0.411188 }, { "acc": 0.92233257, "epoch": 2.2580077437522, "grad_norm": 14.262094497680664, "learning_rate": 4.349288905680045e-06, "loss": 0.45813589, "memory(GiB)": 34.88, "step": 83395, "train_speed(iter/s)": 0.411189 }, { "acc": 0.91928158, "epoch": 2.2581431240354153, "grad_norm": 8.243867874145508, "learning_rate": 4.3487341133021374e-06, "loss": 0.46908364, "memory(GiB)": 34.88, "step": 83400, "train_speed(iter/s)": 0.41119 }, { "acc": 0.9137743, "epoch": 2.258278504318631, "grad_norm": 12.807812690734863, "learning_rate": 4.348179329088879e-06, "loss": 0.52381701, "memory(GiB)": 34.88, "step": 83405, "train_speed(iter/s)": 0.411192 }, { "acc": 0.89491034, "epoch": 2.2584138846018464, "grad_norm": 23.820758819580078, "learning_rate": 4.347624553047216e-06, "loss": 0.60113468, "memory(GiB)": 34.88, "step": 83410, "train_speed(iter/s)": 0.411193 }, { "acc": 0.91849632, "epoch": 2.2585492648850622, "grad_norm": 14.099701881408691, "learning_rate": 4.3470697851841046e-06, "loss": 0.4854166, "memory(GiB)": 34.88, "step": 83415, "train_speed(iter/s)": 0.411194 }, { "acc": 0.9159152, "epoch": 2.2586846451682776, "grad_norm": 11.650018692016602, "learning_rate": 4.346515025506488e-06, "loss": 0.50122032, "memory(GiB)": 34.88, "step": 83420, "train_speed(iter/s)": 0.411196 }, { "acc": 0.93981304, "epoch": 2.2588200254514934, "grad_norm": 8.344722747802734, "learning_rate": 4.345960274021317e-06, "loss": 0.35564985, "memory(GiB)": 34.88, "step": 83425, "train_speed(iter/s)": 0.411197 }, { "acc": 0.92918863, "epoch": 2.2589554057347088, "grad_norm": 10.352771759033203, "learning_rate": 4.345405530735546e-06, "loss": 0.30782342, "memory(GiB)": 34.88, "step": 83430, "train_speed(iter/s)": 0.411198 }, { "acc": 0.92704182, "epoch": 2.259090786017924, "grad_norm": 8.186991691589355, "learning_rate": 4.344850795656118e-06, "loss": 0.4230083, "memory(GiB)": 34.88, "step": 83435, "train_speed(iter/s)": 0.4112 }, { "acc": 0.92877884, "epoch": 2.25922616630114, "grad_norm": 10.181312561035156, "learning_rate": 4.344296068789985e-06, "loss": 0.41123428, "memory(GiB)": 34.88, "step": 83440, "train_speed(iter/s)": 0.411201 }, { "acc": 0.93894367, "epoch": 2.2593615465843553, "grad_norm": 16.42183494567871, "learning_rate": 4.343741350144097e-06, "loss": 0.39946523, "memory(GiB)": 34.88, "step": 83445, "train_speed(iter/s)": 0.411202 }, { "acc": 0.94542961, "epoch": 2.259496926867571, "grad_norm": 4.853211402893066, "learning_rate": 4.3431866397254e-06, "loss": 0.34050608, "memory(GiB)": 34.88, "step": 83450, "train_speed(iter/s)": 0.411203 }, { "acc": 0.93085709, "epoch": 2.2596323071507864, "grad_norm": 7.839773654937744, "learning_rate": 4.342631937540846e-06, "loss": 0.3985405, "memory(GiB)": 34.88, "step": 83455, "train_speed(iter/s)": 0.411205 }, { "acc": 0.94229527, "epoch": 2.2597676874340022, "grad_norm": 3.715312957763672, "learning_rate": 4.34207724359738e-06, "loss": 0.31741748, "memory(GiB)": 34.88, "step": 83460, "train_speed(iter/s)": 0.411206 }, { "acc": 0.91721077, "epoch": 2.2599030677172176, "grad_norm": 5.056064605712891, "learning_rate": 4.341522557901955e-06, "loss": 0.38586183, "memory(GiB)": 34.88, "step": 83465, "train_speed(iter/s)": 0.411208 }, { "acc": 0.92034941, "epoch": 2.2600384480004334, "grad_norm": 7.194250106811523, "learning_rate": 4.340967880461515e-06, "loss": 0.43276291, "memory(GiB)": 34.88, "step": 83470, "train_speed(iter/s)": 0.411209 }, { "acc": 0.9294281, "epoch": 2.2601738282836488, "grad_norm": 5.617040634155273, "learning_rate": 4.340413211283013e-06, "loss": 0.4255127, "memory(GiB)": 34.88, "step": 83475, "train_speed(iter/s)": 0.41121 }, { "acc": 0.9181179, "epoch": 2.2603092085668646, "grad_norm": 7.578713417053223, "learning_rate": 4.339858550373394e-06, "loss": 0.51494713, "memory(GiB)": 34.88, "step": 83480, "train_speed(iter/s)": 0.411212 }, { "acc": 0.93147945, "epoch": 2.26044458885008, "grad_norm": 3.697265386581421, "learning_rate": 4.339303897739609e-06, "loss": 0.42898932, "memory(GiB)": 34.88, "step": 83485, "train_speed(iter/s)": 0.411213 }, { "acc": 0.91358232, "epoch": 2.2605799691332953, "grad_norm": 4.911769866943359, "learning_rate": 4.338749253388602e-06, "loss": 0.53631554, "memory(GiB)": 34.88, "step": 83490, "train_speed(iter/s)": 0.411214 }, { "acc": 0.91287613, "epoch": 2.260715349416511, "grad_norm": 3.0004935264587402, "learning_rate": 4.3381946173273245e-06, "loss": 0.4977037, "memory(GiB)": 34.88, "step": 83495, "train_speed(iter/s)": 0.411216 }, { "acc": 0.92580919, "epoch": 2.2608507296997264, "grad_norm": 11.482258796691895, "learning_rate": 4.337639989562724e-06, "loss": 0.45243778, "memory(GiB)": 34.88, "step": 83500, "train_speed(iter/s)": 0.411217 }, { "acc": 0.9296443, "epoch": 2.2609861099829422, "grad_norm": 6.607451438903809, "learning_rate": 4.337085370101746e-06, "loss": 0.38582931, "memory(GiB)": 34.88, "step": 83505, "train_speed(iter/s)": 0.411218 }, { "acc": 0.91523561, "epoch": 2.2611214902661576, "grad_norm": 7.184787273406982, "learning_rate": 4.3365307589513415e-06, "loss": 0.46078739, "memory(GiB)": 34.88, "step": 83510, "train_speed(iter/s)": 0.411219 }, { "acc": 0.91280861, "epoch": 2.2612568705493734, "grad_norm": 10.951634407043457, "learning_rate": 4.335976156118454e-06, "loss": 0.48988934, "memory(GiB)": 34.88, "step": 83515, "train_speed(iter/s)": 0.411221 }, { "acc": 0.92223997, "epoch": 2.2613922508325888, "grad_norm": 7.948851108551025, "learning_rate": 4.335421561610036e-06, "loss": 0.40152836, "memory(GiB)": 34.88, "step": 83520, "train_speed(iter/s)": 0.411222 }, { "acc": 0.92582541, "epoch": 2.261527631115804, "grad_norm": 7.793793201446533, "learning_rate": 4.334866975433031e-06, "loss": 0.39002063, "memory(GiB)": 34.88, "step": 83525, "train_speed(iter/s)": 0.411223 }, { "acc": 0.91237736, "epoch": 2.26166301139902, "grad_norm": 14.018857955932617, "learning_rate": 4.3343123975943886e-06, "loss": 0.54364743, "memory(GiB)": 34.88, "step": 83530, "train_speed(iter/s)": 0.411225 }, { "acc": 0.92189503, "epoch": 2.2617983916822353, "grad_norm": 5.112648963928223, "learning_rate": 4.333757828101054e-06, "loss": 0.40219808, "memory(GiB)": 34.88, "step": 83535, "train_speed(iter/s)": 0.411226 }, { "acc": 0.9247447, "epoch": 2.261933771965451, "grad_norm": 6.789604187011719, "learning_rate": 4.333203266959976e-06, "loss": 0.39203956, "memory(GiB)": 34.88, "step": 83540, "train_speed(iter/s)": 0.411227 }, { "acc": 0.9202858, "epoch": 2.2620691522486664, "grad_norm": 6.7819647789001465, "learning_rate": 4.332648714178101e-06, "loss": 0.41282663, "memory(GiB)": 34.88, "step": 83545, "train_speed(iter/s)": 0.411229 }, { "acc": 0.93949242, "epoch": 2.2622045325318823, "grad_norm": 5.038455009460449, "learning_rate": 4.3320941697623745e-06, "loss": 0.31220927, "memory(GiB)": 34.88, "step": 83550, "train_speed(iter/s)": 0.41123 }, { "acc": 0.91310863, "epoch": 2.2623399128150976, "grad_norm": 6.592873573303223, "learning_rate": 4.331539633719746e-06, "loss": 0.44631138, "memory(GiB)": 34.88, "step": 83555, "train_speed(iter/s)": 0.411231 }, { "acc": 0.92833004, "epoch": 2.262475293098313, "grad_norm": 6.470881462097168, "learning_rate": 4.330985106057159e-06, "loss": 0.40736809, "memory(GiB)": 34.88, "step": 83560, "train_speed(iter/s)": 0.411233 }, { "acc": 0.91777544, "epoch": 2.2626106733815288, "grad_norm": 8.75652027130127, "learning_rate": 4.330430586781563e-06, "loss": 0.40991335, "memory(GiB)": 34.88, "step": 83565, "train_speed(iter/s)": 0.411234 }, { "acc": 0.91993952, "epoch": 2.262746053664744, "grad_norm": 4.976400852203369, "learning_rate": 4.329876075899903e-06, "loss": 0.44633055, "memory(GiB)": 34.88, "step": 83570, "train_speed(iter/s)": 0.411236 }, { "acc": 0.91744471, "epoch": 2.26288143394796, "grad_norm": 8.711267471313477, "learning_rate": 4.329321573419124e-06, "loss": 0.42111506, "memory(GiB)": 34.88, "step": 83575, "train_speed(iter/s)": 0.411237 }, { "acc": 0.93277559, "epoch": 2.2630168142311753, "grad_norm": 5.6519575119018555, "learning_rate": 4.328767079346175e-06, "loss": 0.42285986, "memory(GiB)": 34.88, "step": 83580, "train_speed(iter/s)": 0.411238 }, { "acc": 0.91916199, "epoch": 2.263152194514391, "grad_norm": 17.468050003051758, "learning_rate": 4.328212593688e-06, "loss": 0.43271551, "memory(GiB)": 34.88, "step": 83585, "train_speed(iter/s)": 0.41124 }, { "acc": 0.93763056, "epoch": 2.2632875747976064, "grad_norm": 6.33966064453125, "learning_rate": 4.327658116451546e-06, "loss": 0.29094825, "memory(GiB)": 34.88, "step": 83590, "train_speed(iter/s)": 0.411241 }, { "acc": 0.91400127, "epoch": 2.263422955080822, "grad_norm": 4.696037292480469, "learning_rate": 4.327103647643758e-06, "loss": 0.44968061, "memory(GiB)": 34.88, "step": 83595, "train_speed(iter/s)": 0.411242 }, { "acc": 0.91488037, "epoch": 2.2635583353640376, "grad_norm": 6.800835609436035, "learning_rate": 4.326549187271584e-06, "loss": 0.50220566, "memory(GiB)": 34.88, "step": 83600, "train_speed(iter/s)": 0.411244 }, { "acc": 0.92492695, "epoch": 2.263693715647253, "grad_norm": 5.294393539428711, "learning_rate": 4.325994735341967e-06, "loss": 0.39648829, "memory(GiB)": 34.88, "step": 83605, "train_speed(iter/s)": 0.411245 }, { "acc": 0.92848396, "epoch": 2.2638290959304688, "grad_norm": 7.096703052520752, "learning_rate": 4.325440291861855e-06, "loss": 0.35622821, "memory(GiB)": 34.88, "step": 83610, "train_speed(iter/s)": 0.411246 }, { "acc": 0.91624136, "epoch": 2.263964476213684, "grad_norm": 3.8773739337921143, "learning_rate": 4.324885856838193e-06, "loss": 0.39132555, "memory(GiB)": 34.88, "step": 83615, "train_speed(iter/s)": 0.411247 }, { "acc": 0.92291107, "epoch": 2.2640998564969, "grad_norm": 13.61266040802002, "learning_rate": 4.3243314302779235e-06, "loss": 0.38494945, "memory(GiB)": 34.88, "step": 83620, "train_speed(iter/s)": 0.411249 }, { "acc": 0.92191563, "epoch": 2.2642352367801153, "grad_norm": 7.517676830291748, "learning_rate": 4.323777012187995e-06, "loss": 0.39654851, "memory(GiB)": 34.88, "step": 83625, "train_speed(iter/s)": 0.41125 }, { "acc": 0.91789742, "epoch": 2.2643706170633306, "grad_norm": 7.661649227142334, "learning_rate": 4.323222602575351e-06, "loss": 0.43324823, "memory(GiB)": 34.88, "step": 83630, "train_speed(iter/s)": 0.411252 }, { "acc": 0.89301758, "epoch": 2.2645059973465465, "grad_norm": 13.424772262573242, "learning_rate": 4.322668201446939e-06, "loss": 0.69323764, "memory(GiB)": 34.88, "step": 83635, "train_speed(iter/s)": 0.411253 }, { "acc": 0.91929264, "epoch": 2.264641377629762, "grad_norm": 23.912729263305664, "learning_rate": 4.322113808809701e-06, "loss": 0.46960568, "memory(GiB)": 34.88, "step": 83640, "train_speed(iter/s)": 0.411254 }, { "acc": 0.936022, "epoch": 2.2647767579129776, "grad_norm": 7.713219165802002, "learning_rate": 4.321559424670582e-06, "loss": 0.40429845, "memory(GiB)": 34.88, "step": 83645, "train_speed(iter/s)": 0.411256 }, { "acc": 0.915662, "epoch": 2.264912138196193, "grad_norm": 7.733994960784912, "learning_rate": 4.32100504903653e-06, "loss": 0.48092451, "memory(GiB)": 34.88, "step": 83650, "train_speed(iter/s)": 0.411257 }, { "acc": 0.90198164, "epoch": 2.2650475184794088, "grad_norm": 8.748180389404297, "learning_rate": 4.320450681914487e-06, "loss": 0.56314807, "memory(GiB)": 34.88, "step": 83655, "train_speed(iter/s)": 0.411258 }, { "acc": 0.93555927, "epoch": 2.265182898762624, "grad_norm": 3.7021875381469727, "learning_rate": 4.3198963233113975e-06, "loss": 0.38757954, "memory(GiB)": 34.88, "step": 83660, "train_speed(iter/s)": 0.41126 }, { "acc": 0.93053036, "epoch": 2.26531827904584, "grad_norm": 6.6174092292785645, "learning_rate": 4.3193419732342054e-06, "loss": 0.43260145, "memory(GiB)": 34.88, "step": 83665, "train_speed(iter/s)": 0.411261 }, { "acc": 0.91807041, "epoch": 2.2654536593290553, "grad_norm": 14.168952941894531, "learning_rate": 4.318787631689858e-06, "loss": 0.45106759, "memory(GiB)": 34.88, "step": 83670, "train_speed(iter/s)": 0.411262 }, { "acc": 0.91930771, "epoch": 2.265589039612271, "grad_norm": 4.299767971038818, "learning_rate": 4.318233298685296e-06, "loss": 0.45811033, "memory(GiB)": 34.88, "step": 83675, "train_speed(iter/s)": 0.411263 }, { "acc": 0.94647865, "epoch": 2.2657244198954865, "grad_norm": 2.8930888175964355, "learning_rate": 4.317678974227467e-06, "loss": 0.2612658, "memory(GiB)": 34.88, "step": 83680, "train_speed(iter/s)": 0.411265 }, { "acc": 0.9218852, "epoch": 2.265859800178702, "grad_norm": 12.883030891418457, "learning_rate": 4.317124658323311e-06, "loss": 0.40280828, "memory(GiB)": 34.88, "step": 83685, "train_speed(iter/s)": 0.411266 }, { "acc": 0.93258295, "epoch": 2.2659951804619176, "grad_norm": 13.370387077331543, "learning_rate": 4.316570350979774e-06, "loss": 0.35404212, "memory(GiB)": 34.88, "step": 83690, "train_speed(iter/s)": 0.411267 }, { "acc": 0.92526503, "epoch": 2.266130560745133, "grad_norm": 8.726016998291016, "learning_rate": 4.316016052203802e-06, "loss": 0.35942173, "memory(GiB)": 34.88, "step": 83695, "train_speed(iter/s)": 0.411268 }, { "acc": 0.92943439, "epoch": 2.2662659410283488, "grad_norm": 14.979750633239746, "learning_rate": 4.315461762002335e-06, "loss": 0.39969006, "memory(GiB)": 34.88, "step": 83700, "train_speed(iter/s)": 0.41127 }, { "acc": 0.91743288, "epoch": 2.266401321311564, "grad_norm": 8.32375717163086, "learning_rate": 4.3149074803823185e-06, "loss": 0.40990248, "memory(GiB)": 34.88, "step": 83705, "train_speed(iter/s)": 0.411271 }, { "acc": 0.92049294, "epoch": 2.26653670159478, "grad_norm": 6.372313976287842, "learning_rate": 4.3143532073506944e-06, "loss": 0.42202277, "memory(GiB)": 34.88, "step": 83710, "train_speed(iter/s)": 0.411272 }, { "acc": 0.92287903, "epoch": 2.2666720818779953, "grad_norm": 13.82384967803955, "learning_rate": 4.313798942914408e-06, "loss": 0.38512816, "memory(GiB)": 34.88, "step": 83715, "train_speed(iter/s)": 0.411274 }, { "acc": 0.92480268, "epoch": 2.2668074621612107, "grad_norm": 4.4971184730529785, "learning_rate": 4.313244687080401e-06, "loss": 0.42069149, "memory(GiB)": 34.88, "step": 83720, "train_speed(iter/s)": 0.411275 }, { "acc": 0.92212086, "epoch": 2.2669428424444265, "grad_norm": 10.189651489257812, "learning_rate": 4.312690439855618e-06, "loss": 0.42082949, "memory(GiB)": 34.88, "step": 83725, "train_speed(iter/s)": 0.411276 }, { "acc": 0.92350159, "epoch": 2.267078222727642, "grad_norm": 6.070523738861084, "learning_rate": 4.312136201247001e-06, "loss": 0.43863835, "memory(GiB)": 34.88, "step": 83730, "train_speed(iter/s)": 0.411278 }, { "acc": 0.91552238, "epoch": 2.2672136030108576, "grad_norm": 11.148717880249023, "learning_rate": 4.311581971261493e-06, "loss": 0.4972023, "memory(GiB)": 34.88, "step": 83735, "train_speed(iter/s)": 0.411278 }, { "acc": 0.9287672, "epoch": 2.267348983294073, "grad_norm": 12.223735809326172, "learning_rate": 4.311027749906039e-06, "loss": 0.37929838, "memory(GiB)": 34.88, "step": 83740, "train_speed(iter/s)": 0.41128 }, { "acc": 0.94640608, "epoch": 2.2674843635772888, "grad_norm": 4.6930084228515625, "learning_rate": 4.310473537187577e-06, "loss": 0.35032964, "memory(GiB)": 34.88, "step": 83745, "train_speed(iter/s)": 0.411281 }, { "acc": 0.92430391, "epoch": 2.267619743860504, "grad_norm": 9.242321014404297, "learning_rate": 4.309919333113054e-06, "loss": 0.36406307, "memory(GiB)": 34.88, "step": 83750, "train_speed(iter/s)": 0.411283 }, { "acc": 0.92402697, "epoch": 2.2677551241437195, "grad_norm": 9.353076934814453, "learning_rate": 4.30936513768941e-06, "loss": 0.36681652, "memory(GiB)": 34.88, "step": 83755, "train_speed(iter/s)": 0.411284 }, { "acc": 0.91288071, "epoch": 2.2678905044269353, "grad_norm": 8.779363632202148, "learning_rate": 4.308810950923587e-06, "loss": 0.46444426, "memory(GiB)": 34.88, "step": 83760, "train_speed(iter/s)": 0.411285 }, { "acc": 0.90788546, "epoch": 2.2680258847101507, "grad_norm": 12.028615951538086, "learning_rate": 4.308256772822531e-06, "loss": 0.52351398, "memory(GiB)": 34.88, "step": 83765, "train_speed(iter/s)": 0.411286 }, { "acc": 0.92327518, "epoch": 2.2681612649933665, "grad_norm": 5.560236930847168, "learning_rate": 4.3077026033931805e-06, "loss": 0.34115205, "memory(GiB)": 34.88, "step": 83770, "train_speed(iter/s)": 0.411288 }, { "acc": 0.92120476, "epoch": 2.268296645276582, "grad_norm": 19.18600845336914, "learning_rate": 4.3071484426424796e-06, "loss": 0.4393466, "memory(GiB)": 34.88, "step": 83775, "train_speed(iter/s)": 0.411289 }, { "acc": 0.92126255, "epoch": 2.2684320255597976, "grad_norm": 10.609040260314941, "learning_rate": 4.306594290577369e-06, "loss": 0.49699049, "memory(GiB)": 34.88, "step": 83780, "train_speed(iter/s)": 0.41129 }, { "acc": 0.91668644, "epoch": 2.268567405843013, "grad_norm": 5.576919078826904, "learning_rate": 4.306040147204791e-06, "loss": 0.41173429, "memory(GiB)": 34.88, "step": 83785, "train_speed(iter/s)": 0.411292 }, { "acc": 0.91695337, "epoch": 2.2687027861262283, "grad_norm": 5.243932247161865, "learning_rate": 4.305486012531686e-06, "loss": 0.42529068, "memory(GiB)": 34.88, "step": 83790, "train_speed(iter/s)": 0.411293 }, { "acc": 0.91541004, "epoch": 2.268838166409444, "grad_norm": 13.284061431884766, "learning_rate": 4.304931886564997e-06, "loss": 0.42386703, "memory(GiB)": 34.88, "step": 83795, "train_speed(iter/s)": 0.411294 }, { "acc": 0.91854382, "epoch": 2.2689735466926595, "grad_norm": 9.425520896911621, "learning_rate": 4.3043777693116674e-06, "loss": 0.52522917, "memory(GiB)": 34.88, "step": 83800, "train_speed(iter/s)": 0.411296 }, { "acc": 0.90649166, "epoch": 2.2691089269758753, "grad_norm": 17.722627639770508, "learning_rate": 4.303823660778635e-06, "loss": 0.59183979, "memory(GiB)": 34.88, "step": 83805, "train_speed(iter/s)": 0.411297 }, { "acc": 0.92801123, "epoch": 2.2692443072590907, "grad_norm": 8.753405570983887, "learning_rate": 4.303269560972843e-06, "loss": 0.4030282, "memory(GiB)": 34.88, "step": 83810, "train_speed(iter/s)": 0.411298 }, { "acc": 0.93072453, "epoch": 2.2693796875423065, "grad_norm": 5.052095413208008, "learning_rate": 4.302715469901232e-06, "loss": 0.37886624, "memory(GiB)": 34.88, "step": 83815, "train_speed(iter/s)": 0.4113 }, { "acc": 0.91969042, "epoch": 2.269515067825522, "grad_norm": 14.536700248718262, "learning_rate": 4.302161387570746e-06, "loss": 0.46228437, "memory(GiB)": 34.88, "step": 83820, "train_speed(iter/s)": 0.411301 }, { "acc": 0.92587452, "epoch": 2.2696504481087376, "grad_norm": 7.490090370178223, "learning_rate": 4.30160731398832e-06, "loss": 0.37224908, "memory(GiB)": 34.88, "step": 83825, "train_speed(iter/s)": 0.411302 }, { "acc": 0.92401943, "epoch": 2.269785828391953, "grad_norm": 14.165809631347656, "learning_rate": 4.3010532491608985e-06, "loss": 0.44402056, "memory(GiB)": 34.88, "step": 83830, "train_speed(iter/s)": 0.411304 }, { "acc": 0.92951393, "epoch": 2.2699212086751688, "grad_norm": 7.852156639099121, "learning_rate": 4.300499193095424e-06, "loss": 0.38564069, "memory(GiB)": 34.88, "step": 83835, "train_speed(iter/s)": 0.411305 }, { "acc": 0.92411995, "epoch": 2.270056588958384, "grad_norm": 4.997243881225586, "learning_rate": 4.299945145798832e-06, "loss": 0.45824757, "memory(GiB)": 34.88, "step": 83840, "train_speed(iter/s)": 0.411306 }, { "acc": 0.92602444, "epoch": 2.2701919692415995, "grad_norm": 8.67737865447998, "learning_rate": 4.299391107278069e-06, "loss": 0.37886121, "memory(GiB)": 34.88, "step": 83845, "train_speed(iter/s)": 0.411308 }, { "acc": 0.93637629, "epoch": 2.2703273495248153, "grad_norm": 5.274353981018066, "learning_rate": 4.2988370775400716e-06, "loss": 0.38521085, "memory(GiB)": 34.88, "step": 83850, "train_speed(iter/s)": 0.411309 }, { "acc": 0.9214838, "epoch": 2.2704627298080307, "grad_norm": 5.099438190460205, "learning_rate": 4.2982830565917825e-06, "loss": 0.46497388, "memory(GiB)": 34.88, "step": 83855, "train_speed(iter/s)": 0.41131 }, { "acc": 0.93559828, "epoch": 2.2705981100912465, "grad_norm": 7.713232517242432, "learning_rate": 4.297729044440137e-06, "loss": 0.37934008, "memory(GiB)": 34.88, "step": 83860, "train_speed(iter/s)": 0.411312 }, { "acc": 0.93044014, "epoch": 2.270733490374462, "grad_norm": 5.432325839996338, "learning_rate": 4.297175041092083e-06, "loss": 0.4144309, "memory(GiB)": 34.88, "step": 83865, "train_speed(iter/s)": 0.411313 }, { "acc": 0.91767864, "epoch": 2.2708688706576776, "grad_norm": 5.575343132019043, "learning_rate": 4.296621046554553e-06, "loss": 0.45763826, "memory(GiB)": 34.88, "step": 83870, "train_speed(iter/s)": 0.411314 }, { "acc": 0.92334862, "epoch": 2.271004250940893, "grad_norm": 8.468826293945312, "learning_rate": 4.296067060834491e-06, "loss": 0.42233191, "memory(GiB)": 34.88, "step": 83875, "train_speed(iter/s)": 0.411316 }, { "acc": 0.91145525, "epoch": 2.2711396312241083, "grad_norm": 13.825773239135742, "learning_rate": 4.295513083938836e-06, "loss": 0.51088386, "memory(GiB)": 34.88, "step": 83880, "train_speed(iter/s)": 0.411317 }, { "acc": 0.9237525, "epoch": 2.271275011507324, "grad_norm": 8.788395881652832, "learning_rate": 4.2949591158745255e-06, "loss": 0.30048318, "memory(GiB)": 34.88, "step": 83885, "train_speed(iter/s)": 0.411318 }, { "acc": 0.91905136, "epoch": 2.2714103917905395, "grad_norm": 15.935322761535645, "learning_rate": 4.294405156648503e-06, "loss": 0.40890608, "memory(GiB)": 34.88, "step": 83890, "train_speed(iter/s)": 0.41132 }, { "acc": 0.92563543, "epoch": 2.2715457720737553, "grad_norm": 20.38251495361328, "learning_rate": 4.2938512062677055e-06, "loss": 0.36956272, "memory(GiB)": 34.88, "step": 83895, "train_speed(iter/s)": 0.411321 }, { "acc": 0.92865381, "epoch": 2.2716811523569707, "grad_norm": 8.117260932922363, "learning_rate": 4.293297264739074e-06, "loss": 0.39710331, "memory(GiB)": 34.88, "step": 83900, "train_speed(iter/s)": 0.411322 }, { "acc": 0.92722197, "epoch": 2.2718165326401865, "grad_norm": 6.599062442779541, "learning_rate": 4.292743332069545e-06, "loss": 0.37977798, "memory(GiB)": 34.88, "step": 83905, "train_speed(iter/s)": 0.411323 }, { "acc": 0.91582546, "epoch": 2.271951912923402, "grad_norm": 29.16401481628418, "learning_rate": 4.292189408266058e-06, "loss": 0.49495363, "memory(GiB)": 34.88, "step": 83910, "train_speed(iter/s)": 0.411325 }, { "acc": 0.90513096, "epoch": 2.272087293206617, "grad_norm": 8.329154968261719, "learning_rate": 4.291635493335555e-06, "loss": 0.59255624, "memory(GiB)": 34.88, "step": 83915, "train_speed(iter/s)": 0.411326 }, { "acc": 0.94320641, "epoch": 2.272222673489833, "grad_norm": 6.247265815734863, "learning_rate": 4.291081587284971e-06, "loss": 0.34100008, "memory(GiB)": 34.88, "step": 83920, "train_speed(iter/s)": 0.411327 }, { "acc": 0.92321959, "epoch": 2.2723580537730483, "grad_norm": 7.247901439666748, "learning_rate": 4.290527690121249e-06, "loss": 0.45337229, "memory(GiB)": 34.88, "step": 83925, "train_speed(iter/s)": 0.411328 }, { "acc": 0.92238703, "epoch": 2.272493434056264, "grad_norm": 3.0126993656158447, "learning_rate": 4.289973801851323e-06, "loss": 0.45968118, "memory(GiB)": 34.88, "step": 83930, "train_speed(iter/s)": 0.41133 }, { "acc": 0.92366505, "epoch": 2.2726288143394795, "grad_norm": 12.352130889892578, "learning_rate": 4.2894199224821346e-06, "loss": 0.39268789, "memory(GiB)": 34.88, "step": 83935, "train_speed(iter/s)": 0.411331 }, { "acc": 0.92384977, "epoch": 2.2727641946226953, "grad_norm": 12.351106643676758, "learning_rate": 4.288866052020621e-06, "loss": 0.42031932, "memory(GiB)": 34.88, "step": 83940, "train_speed(iter/s)": 0.411333 }, { "acc": 0.92597904, "epoch": 2.2728995749059107, "grad_norm": 46.12057876586914, "learning_rate": 4.2883121904737214e-06, "loss": 0.42926226, "memory(GiB)": 34.88, "step": 83945, "train_speed(iter/s)": 0.411334 }, { "acc": 0.91171207, "epoch": 2.273034955189126, "grad_norm": 8.954415321350098, "learning_rate": 4.2877583378483744e-06, "loss": 0.48843956, "memory(GiB)": 34.88, "step": 83950, "train_speed(iter/s)": 0.411335 }, { "acc": 0.93424129, "epoch": 2.273170335472342, "grad_norm": 7.286215782165527, "learning_rate": 4.287204494151515e-06, "loss": 0.39102683, "memory(GiB)": 34.88, "step": 83955, "train_speed(iter/s)": 0.411336 }, { "acc": 0.92055397, "epoch": 2.273305715755557, "grad_norm": 8.793149948120117, "learning_rate": 4.2866506593900864e-06, "loss": 0.46322088, "memory(GiB)": 34.88, "step": 83960, "train_speed(iter/s)": 0.411338 }, { "acc": 0.92790174, "epoch": 2.273441096038773, "grad_norm": 6.239081859588623, "learning_rate": 4.2860968335710215e-06, "loss": 0.40212145, "memory(GiB)": 34.88, "step": 83965, "train_speed(iter/s)": 0.411339 }, { "acc": 0.92548122, "epoch": 2.2735764763219883, "grad_norm": 11.775338172912598, "learning_rate": 4.28554301670126e-06, "loss": 0.49831982, "memory(GiB)": 34.88, "step": 83970, "train_speed(iter/s)": 0.41134 }, { "acc": 0.92041225, "epoch": 2.273711856605204, "grad_norm": 3.797466993331909, "learning_rate": 4.28498920878774e-06, "loss": 0.47708807, "memory(GiB)": 34.88, "step": 83975, "train_speed(iter/s)": 0.411342 }, { "acc": 0.92233963, "epoch": 2.2738472368884195, "grad_norm": 6.144777297973633, "learning_rate": 4.2844354098374e-06, "loss": 0.45326567, "memory(GiB)": 34.88, "step": 83980, "train_speed(iter/s)": 0.411343 }, { "acc": 0.92236958, "epoch": 2.2739826171716353, "grad_norm": 10.541848182678223, "learning_rate": 4.283881619857174e-06, "loss": 0.39619346, "memory(GiB)": 34.88, "step": 83985, "train_speed(iter/s)": 0.411344 }, { "acc": 0.90339298, "epoch": 2.2741179974548507, "grad_norm": 3.778613805770874, "learning_rate": 4.283327838854002e-06, "loss": 0.51244965, "memory(GiB)": 34.88, "step": 83990, "train_speed(iter/s)": 0.411346 }, { "acc": 0.94533634, "epoch": 2.2742533777380665, "grad_norm": 11.098854064941406, "learning_rate": 4.282774066834822e-06, "loss": 0.3302634, "memory(GiB)": 34.88, "step": 83995, "train_speed(iter/s)": 0.411347 }, { "acc": 0.91314259, "epoch": 2.274388758021282, "grad_norm": 8.447147369384766, "learning_rate": 4.2822203038065675e-06, "loss": 0.50533247, "memory(GiB)": 34.88, "step": 84000, "train_speed(iter/s)": 0.411348 }, { "acc": 0.93245878, "epoch": 2.274524138304497, "grad_norm": 5.0342278480529785, "learning_rate": 4.281666549776179e-06, "loss": 0.34895821, "memory(GiB)": 34.88, "step": 84005, "train_speed(iter/s)": 0.41135 }, { "acc": 0.91452065, "epoch": 2.274659518587713, "grad_norm": 7.3295369148254395, "learning_rate": 4.2811128047505905e-06, "loss": 0.52023296, "memory(GiB)": 34.88, "step": 84010, "train_speed(iter/s)": 0.41135 }, { "acc": 0.90712872, "epoch": 2.2747948988709283, "grad_norm": 9.978039741516113, "learning_rate": 4.280559068736742e-06, "loss": 0.50905132, "memory(GiB)": 34.88, "step": 84015, "train_speed(iter/s)": 0.411351 }, { "acc": 0.90992937, "epoch": 2.274930279154144, "grad_norm": 6.8954691886901855, "learning_rate": 4.2800053417415675e-06, "loss": 0.5397913, "memory(GiB)": 34.88, "step": 84020, "train_speed(iter/s)": 0.411353 }, { "acc": 0.91702347, "epoch": 2.2750656594373595, "grad_norm": 6.675645351409912, "learning_rate": 4.2794516237720034e-06, "loss": 0.45706258, "memory(GiB)": 34.88, "step": 84025, "train_speed(iter/s)": 0.411354 }, { "acc": 0.93220053, "epoch": 2.2752010397205753, "grad_norm": 11.673722267150879, "learning_rate": 4.2788979148349895e-06, "loss": 0.41229048, "memory(GiB)": 34.88, "step": 84030, "train_speed(iter/s)": 0.411355 }, { "acc": 0.94017735, "epoch": 2.2753364200037907, "grad_norm": 9.113734245300293, "learning_rate": 4.278344214937458e-06, "loss": 0.3148742, "memory(GiB)": 34.88, "step": 84035, "train_speed(iter/s)": 0.411357 }, { "acc": 0.93294334, "epoch": 2.275471800287006, "grad_norm": 7.26718282699585, "learning_rate": 4.277790524086347e-06, "loss": 0.3403203, "memory(GiB)": 34.88, "step": 84040, "train_speed(iter/s)": 0.411358 }, { "acc": 0.93519115, "epoch": 2.275607180570222, "grad_norm": 7.988828659057617, "learning_rate": 4.277236842288592e-06, "loss": 0.35575256, "memory(GiB)": 34.88, "step": 84045, "train_speed(iter/s)": 0.411359 }, { "acc": 0.91534481, "epoch": 2.275742560853437, "grad_norm": 8.475044250488281, "learning_rate": 4.27668316955113e-06, "loss": 0.44051585, "memory(GiB)": 34.88, "step": 84050, "train_speed(iter/s)": 0.411361 }, { "acc": 0.91698627, "epoch": 2.275877941136653, "grad_norm": 15.986557960510254, "learning_rate": 4.276129505880894e-06, "loss": 0.48384042, "memory(GiB)": 34.88, "step": 84055, "train_speed(iter/s)": 0.411362 }, { "acc": 0.91428204, "epoch": 2.2760133214198683, "grad_norm": 10.204231262207031, "learning_rate": 4.275575851284824e-06, "loss": 0.51744623, "memory(GiB)": 34.88, "step": 84060, "train_speed(iter/s)": 0.411363 }, { "acc": 0.92283115, "epoch": 2.276148701703084, "grad_norm": 9.823586463928223, "learning_rate": 4.2750222057698535e-06, "loss": 0.37786055, "memory(GiB)": 34.88, "step": 84065, "train_speed(iter/s)": 0.411364 }, { "acc": 0.90548029, "epoch": 2.2762840819862995, "grad_norm": 8.327417373657227, "learning_rate": 4.274468569342916e-06, "loss": 0.5409831, "memory(GiB)": 34.88, "step": 84070, "train_speed(iter/s)": 0.411366 }, { "acc": 0.90896206, "epoch": 2.276419462269515, "grad_norm": 19.13500213623047, "learning_rate": 4.273914942010952e-06, "loss": 0.50347958, "memory(GiB)": 34.88, "step": 84075, "train_speed(iter/s)": 0.411367 }, { "acc": 0.93749781, "epoch": 2.2765548425527307, "grad_norm": 7.401317119598389, "learning_rate": 4.2733613237808916e-06, "loss": 0.3357517, "memory(GiB)": 34.88, "step": 84080, "train_speed(iter/s)": 0.411369 }, { "acc": 0.9170063, "epoch": 2.276690222835946, "grad_norm": 11.875189781188965, "learning_rate": 4.2728077146596725e-06, "loss": 0.45994244, "memory(GiB)": 34.88, "step": 84085, "train_speed(iter/s)": 0.41137 }, { "acc": 0.93218746, "epoch": 2.276825603119162, "grad_norm": 8.282612800598145, "learning_rate": 4.272254114654228e-06, "loss": 0.36238601, "memory(GiB)": 34.88, "step": 84090, "train_speed(iter/s)": 0.411371 }, { "acc": 0.93591213, "epoch": 2.276960983402377, "grad_norm": 7.778720855712891, "learning_rate": 4.271700523771494e-06, "loss": 0.31779311, "memory(GiB)": 34.88, "step": 84095, "train_speed(iter/s)": 0.411373 }, { "acc": 0.92537594, "epoch": 2.277096363685593, "grad_norm": 6.273628234863281, "learning_rate": 4.271146942018408e-06, "loss": 0.39528701, "memory(GiB)": 34.88, "step": 84100, "train_speed(iter/s)": 0.411374 }, { "acc": 0.92284212, "epoch": 2.2772317439688083, "grad_norm": 7.362995624542236, "learning_rate": 4.2705933694018996e-06, "loss": 0.48057613, "memory(GiB)": 34.88, "step": 84105, "train_speed(iter/s)": 0.411375 }, { "acc": 0.92144299, "epoch": 2.2773671242520237, "grad_norm": 4.474291801452637, "learning_rate": 4.270039805928907e-06, "loss": 0.45657911, "memory(GiB)": 34.88, "step": 84110, "train_speed(iter/s)": 0.411376 }, { "acc": 0.92502527, "epoch": 2.2775025045352395, "grad_norm": 17.251440048217773, "learning_rate": 4.269486251606365e-06, "loss": 0.45836935, "memory(GiB)": 34.88, "step": 84115, "train_speed(iter/s)": 0.411378 }, { "acc": 0.89800415, "epoch": 2.277637884818455, "grad_norm": 9.892667770385742, "learning_rate": 4.268932706441206e-06, "loss": 0.61146517, "memory(GiB)": 34.88, "step": 84120, "train_speed(iter/s)": 0.411379 }, { "acc": 0.92093086, "epoch": 2.2777732651016707, "grad_norm": 9.71729564666748, "learning_rate": 4.268379170440364e-06, "loss": 0.39711444, "memory(GiB)": 34.88, "step": 84125, "train_speed(iter/s)": 0.41138 }, { "acc": 0.91745634, "epoch": 2.277908645384886, "grad_norm": 7.383866786956787, "learning_rate": 4.267825643610774e-06, "loss": 0.4555665, "memory(GiB)": 34.88, "step": 84130, "train_speed(iter/s)": 0.411382 }, { "acc": 0.92388954, "epoch": 2.278044025668102, "grad_norm": 3.2626302242279053, "learning_rate": 4.26727212595937e-06, "loss": 0.41300421, "memory(GiB)": 34.88, "step": 84135, "train_speed(iter/s)": 0.411383 }, { "acc": 0.92916012, "epoch": 2.278179405951317, "grad_norm": 7.078282833099365, "learning_rate": 4.266718617493086e-06, "loss": 0.42555809, "memory(GiB)": 34.88, "step": 84140, "train_speed(iter/s)": 0.411384 }, { "acc": 0.92295952, "epoch": 2.278314786234533, "grad_norm": 10.198253631591797, "learning_rate": 4.266165118218856e-06, "loss": 0.44305763, "memory(GiB)": 34.88, "step": 84145, "train_speed(iter/s)": 0.411385 }, { "acc": 0.93320179, "epoch": 2.2784501665177483, "grad_norm": 10.959997177124023, "learning_rate": 4.265611628143613e-06, "loss": 0.31165786, "memory(GiB)": 34.88, "step": 84150, "train_speed(iter/s)": 0.411387 }, { "acc": 0.92505245, "epoch": 2.278585546800964, "grad_norm": 16.49116325378418, "learning_rate": 4.265058147274293e-06, "loss": 0.42650118, "memory(GiB)": 34.88, "step": 84155, "train_speed(iter/s)": 0.411388 }, { "acc": 0.93223953, "epoch": 2.2787209270841795, "grad_norm": 8.083699226379395, "learning_rate": 4.264504675617825e-06, "loss": 0.34053862, "memory(GiB)": 34.88, "step": 84160, "train_speed(iter/s)": 0.411389 }, { "acc": 0.91639223, "epoch": 2.278856307367395, "grad_norm": 14.622639656066895, "learning_rate": 4.263951213181146e-06, "loss": 0.56247778, "memory(GiB)": 34.88, "step": 84165, "train_speed(iter/s)": 0.41139 }, { "acc": 0.90672798, "epoch": 2.2789916876506107, "grad_norm": 8.580355644226074, "learning_rate": 4.2633977599711865e-06, "loss": 0.48365159, "memory(GiB)": 34.88, "step": 84170, "train_speed(iter/s)": 0.411392 }, { "acc": 0.92100048, "epoch": 2.279127067933826, "grad_norm": 10.183135032653809, "learning_rate": 4.262844315994881e-06, "loss": 0.4740016, "memory(GiB)": 34.88, "step": 84175, "train_speed(iter/s)": 0.411393 }, { "acc": 0.92466526, "epoch": 2.279262448217042, "grad_norm": 7.851081848144531, "learning_rate": 4.262290881259163e-06, "loss": 0.37122822, "memory(GiB)": 34.88, "step": 84180, "train_speed(iter/s)": 0.411394 }, { "acc": 0.91937866, "epoch": 2.279397828500257, "grad_norm": 14.710617065429688, "learning_rate": 4.2617374557709645e-06, "loss": 0.43136864, "memory(GiB)": 34.88, "step": 84185, "train_speed(iter/s)": 0.411396 }, { "acc": 0.91142063, "epoch": 2.279533208783473, "grad_norm": 8.947775840759277, "learning_rate": 4.261184039537219e-06, "loss": 0.43874512, "memory(GiB)": 34.88, "step": 84190, "train_speed(iter/s)": 0.411396 }, { "acc": 0.92213631, "epoch": 2.2796685890666883, "grad_norm": 5.498154640197754, "learning_rate": 4.260630632564858e-06, "loss": 0.36912482, "memory(GiB)": 34.88, "step": 84195, "train_speed(iter/s)": 0.411397 }, { "acc": 0.92634907, "epoch": 2.2798039693499037, "grad_norm": 6.3927693367004395, "learning_rate": 4.260077234860817e-06, "loss": 0.4098268, "memory(GiB)": 34.88, "step": 84200, "train_speed(iter/s)": 0.411399 }, { "acc": 0.91916847, "epoch": 2.2799393496331195, "grad_norm": 7.793777942657471, "learning_rate": 4.259523846432024e-06, "loss": 0.45065837, "memory(GiB)": 34.88, "step": 84205, "train_speed(iter/s)": 0.4114 }, { "acc": 0.92969532, "epoch": 2.280074729916335, "grad_norm": 6.901451587677002, "learning_rate": 4.258970467285412e-06, "loss": 0.35764804, "memory(GiB)": 34.88, "step": 84210, "train_speed(iter/s)": 0.411401 }, { "acc": 0.93352261, "epoch": 2.2802101101995507, "grad_norm": 6.021812915802002, "learning_rate": 4.258417097427917e-06, "loss": 0.3081275, "memory(GiB)": 34.88, "step": 84215, "train_speed(iter/s)": 0.411402 }, { "acc": 0.9312871, "epoch": 2.280345490482766, "grad_norm": 21.232633590698242, "learning_rate": 4.2578637368664665e-06, "loss": 0.4559751, "memory(GiB)": 34.88, "step": 84220, "train_speed(iter/s)": 0.411404 }, { "acc": 0.93186531, "epoch": 2.280480870765982, "grad_norm": 6.081386089324951, "learning_rate": 4.257310385607997e-06, "loss": 0.35506837, "memory(GiB)": 34.88, "step": 84225, "train_speed(iter/s)": 0.411405 }, { "acc": 0.94992323, "epoch": 2.280616251049197, "grad_norm": 6.910171031951904, "learning_rate": 4.256757043659435e-06, "loss": 0.25513158, "memory(GiB)": 34.88, "step": 84230, "train_speed(iter/s)": 0.411406 }, { "acc": 0.91204014, "epoch": 2.2807516313324125, "grad_norm": 11.475637435913086, "learning_rate": 4.256203711027718e-06, "loss": 0.52294827, "memory(GiB)": 34.88, "step": 84235, "train_speed(iter/s)": 0.411407 }, { "acc": 0.92066879, "epoch": 2.2808870116156283, "grad_norm": 8.309212684631348, "learning_rate": 4.2556503877197725e-06, "loss": 0.37008848, "memory(GiB)": 34.88, "step": 84240, "train_speed(iter/s)": 0.411409 }, { "acc": 0.92162685, "epoch": 2.2810223918988437, "grad_norm": 3.60764479637146, "learning_rate": 4.255097073742532e-06, "loss": 0.45359492, "memory(GiB)": 34.88, "step": 84245, "train_speed(iter/s)": 0.41141 }, { "acc": 0.91733932, "epoch": 2.2811577721820595, "grad_norm": 14.499088287353516, "learning_rate": 4.254543769102929e-06, "loss": 0.4833251, "memory(GiB)": 34.88, "step": 84250, "train_speed(iter/s)": 0.411411 }, { "acc": 0.92581482, "epoch": 2.281293152465275, "grad_norm": 6.043933868408203, "learning_rate": 4.2539904738078915e-06, "loss": 0.33452153, "memory(GiB)": 34.88, "step": 84255, "train_speed(iter/s)": 0.411412 }, { "acc": 0.9080246, "epoch": 2.2814285327484907, "grad_norm": 4.056192874908447, "learning_rate": 4.253437187864355e-06, "loss": 0.50113935, "memory(GiB)": 34.88, "step": 84260, "train_speed(iter/s)": 0.411413 }, { "acc": 0.91762848, "epoch": 2.281563913031706, "grad_norm": 20.087495803833008, "learning_rate": 4.252883911279247e-06, "loss": 0.46495175, "memory(GiB)": 34.88, "step": 84265, "train_speed(iter/s)": 0.411415 }, { "acc": 0.91988316, "epoch": 2.2816992933149214, "grad_norm": 8.16920280456543, "learning_rate": 4.2523306440595e-06, "loss": 0.41703348, "memory(GiB)": 34.88, "step": 84270, "train_speed(iter/s)": 0.411416 }, { "acc": 0.91203022, "epoch": 2.281834673598137, "grad_norm": 9.437492370605469, "learning_rate": 4.251777386212044e-06, "loss": 0.54387779, "memory(GiB)": 34.88, "step": 84275, "train_speed(iter/s)": 0.411417 }, { "acc": 0.92164078, "epoch": 2.2819700538813525, "grad_norm": 12.563080787658691, "learning_rate": 4.251224137743811e-06, "loss": 0.35723839, "memory(GiB)": 34.88, "step": 84280, "train_speed(iter/s)": 0.411418 }, { "acc": 0.94655447, "epoch": 2.2821054341645683, "grad_norm": 9.903804779052734, "learning_rate": 4.250670898661729e-06, "loss": 0.30249977, "memory(GiB)": 34.88, "step": 84285, "train_speed(iter/s)": 0.41142 }, { "acc": 0.92069368, "epoch": 2.2822408144477837, "grad_norm": 7.154399871826172, "learning_rate": 4.25011766897273e-06, "loss": 0.42146301, "memory(GiB)": 34.88, "step": 84290, "train_speed(iter/s)": 0.411421 }, { "acc": 0.925667, "epoch": 2.2823761947309995, "grad_norm": 6.331545829772949, "learning_rate": 4.249564448683746e-06, "loss": 0.41269474, "memory(GiB)": 34.88, "step": 84295, "train_speed(iter/s)": 0.411422 }, { "acc": 0.91914873, "epoch": 2.282511575014215, "grad_norm": 10.576717376708984, "learning_rate": 4.2490112378017034e-06, "loss": 0.45448961, "memory(GiB)": 34.88, "step": 84300, "train_speed(iter/s)": 0.411423 }, { "acc": 0.93766556, "epoch": 2.2826469552974307, "grad_norm": 14.313356399536133, "learning_rate": 4.248458036333536e-06, "loss": 0.37149291, "memory(GiB)": 34.88, "step": 84305, "train_speed(iter/s)": 0.411425 }, { "acc": 0.92156811, "epoch": 2.282782335580646, "grad_norm": 5.911921501159668, "learning_rate": 4.24790484428617e-06, "loss": 0.41658964, "memory(GiB)": 34.88, "step": 84310, "train_speed(iter/s)": 0.411426 }, { "acc": 0.9070035, "epoch": 2.282917715863862, "grad_norm": 13.102824211120605, "learning_rate": 4.247351661666538e-06, "loss": 0.51225238, "memory(GiB)": 34.88, "step": 84315, "train_speed(iter/s)": 0.411427 }, { "acc": 0.93014812, "epoch": 2.283053096147077, "grad_norm": 16.447263717651367, "learning_rate": 4.246798488481569e-06, "loss": 0.37730422, "memory(GiB)": 34.88, "step": 84320, "train_speed(iter/s)": 0.411428 }, { "acc": 0.93675499, "epoch": 2.2831884764302925, "grad_norm": 4.144443035125732, "learning_rate": 4.246245324738192e-06, "loss": 0.30565042, "memory(GiB)": 34.88, "step": 84325, "train_speed(iter/s)": 0.411429 }, { "acc": 0.91546669, "epoch": 2.2833238567135083, "grad_norm": 10.558870315551758, "learning_rate": 4.245692170443339e-06, "loss": 0.46379876, "memory(GiB)": 34.88, "step": 84330, "train_speed(iter/s)": 0.41143 }, { "acc": 0.92634106, "epoch": 2.2834592369967237, "grad_norm": 8.152414321899414, "learning_rate": 4.245139025603934e-06, "loss": 0.50371132, "memory(GiB)": 34.88, "step": 84335, "train_speed(iter/s)": 0.411432 }, { "acc": 0.91690216, "epoch": 2.2835946172799395, "grad_norm": 6.834778785705566, "learning_rate": 4.244585890226912e-06, "loss": 0.46522093, "memory(GiB)": 34.88, "step": 84340, "train_speed(iter/s)": 0.411433 }, { "acc": 0.91563377, "epoch": 2.283729997563155, "grad_norm": 9.41450309753418, "learning_rate": 4.244032764319198e-06, "loss": 0.49382763, "memory(GiB)": 34.88, "step": 84345, "train_speed(iter/s)": 0.411434 }, { "acc": 0.93353281, "epoch": 2.2838653778463707, "grad_norm": 9.502838134765625, "learning_rate": 4.243479647887724e-06, "loss": 0.41359897, "memory(GiB)": 34.88, "step": 84350, "train_speed(iter/s)": 0.411435 }, { "acc": 0.91605387, "epoch": 2.284000758129586, "grad_norm": 11.050212860107422, "learning_rate": 4.242926540939416e-06, "loss": 0.49219618, "memory(GiB)": 34.88, "step": 84355, "train_speed(iter/s)": 0.411437 }, { "acc": 0.92456207, "epoch": 2.2841361384128014, "grad_norm": 4.314446449279785, "learning_rate": 4.242373443481204e-06, "loss": 0.40315299, "memory(GiB)": 34.88, "step": 84360, "train_speed(iter/s)": 0.411438 }, { "acc": 0.91245308, "epoch": 2.284271518696017, "grad_norm": 10.614901542663574, "learning_rate": 4.241820355520019e-06, "loss": 0.51135426, "memory(GiB)": 34.88, "step": 84365, "train_speed(iter/s)": 0.411439 }, { "acc": 0.90583591, "epoch": 2.2844068989792325, "grad_norm": 12.140721321105957, "learning_rate": 4.2412672770627865e-06, "loss": 0.5816638, "memory(GiB)": 34.88, "step": 84370, "train_speed(iter/s)": 0.41144 }, { "acc": 0.91045628, "epoch": 2.2845422792624483, "grad_norm": 8.185766220092773, "learning_rate": 4.2407142081164355e-06, "loss": 0.46210594, "memory(GiB)": 34.88, "step": 84375, "train_speed(iter/s)": 0.411441 }, { "acc": 0.91641436, "epoch": 2.2846776595456637, "grad_norm": 12.237702369689941, "learning_rate": 4.240161148687894e-06, "loss": 0.45056505, "memory(GiB)": 34.88, "step": 84380, "train_speed(iter/s)": 0.411442 }, { "acc": 0.94213514, "epoch": 2.2848130398288795, "grad_norm": 4.703129291534424, "learning_rate": 4.239608098784091e-06, "loss": 0.2820045, "memory(GiB)": 34.88, "step": 84385, "train_speed(iter/s)": 0.411444 }, { "acc": 0.93643456, "epoch": 2.284948420112095, "grad_norm": 19.975635528564453, "learning_rate": 4.239055058411954e-06, "loss": 0.36926904, "memory(GiB)": 34.88, "step": 84390, "train_speed(iter/s)": 0.411444 }, { "acc": 0.9230547, "epoch": 2.2850838003953102, "grad_norm": 9.058921813964844, "learning_rate": 4.238502027578411e-06, "loss": 0.42878666, "memory(GiB)": 34.88, "step": 84395, "train_speed(iter/s)": 0.411445 }, { "acc": 0.91572685, "epoch": 2.285219180678526, "grad_norm": 20.437110900878906, "learning_rate": 4.237949006290391e-06, "loss": 0.40189772, "memory(GiB)": 34.88, "step": 84400, "train_speed(iter/s)": 0.411447 }, { "acc": 0.9351922, "epoch": 2.2853545609617414, "grad_norm": 6.9601640701293945, "learning_rate": 4.237395994554818e-06, "loss": 0.34842956, "memory(GiB)": 34.88, "step": 84405, "train_speed(iter/s)": 0.411448 }, { "acc": 0.90025129, "epoch": 2.285489941244957, "grad_norm": 8.312572479248047, "learning_rate": 4.236842992378626e-06, "loss": 0.56320505, "memory(GiB)": 34.88, "step": 84410, "train_speed(iter/s)": 0.411449 }, { "acc": 0.92708931, "epoch": 2.2856253215281725, "grad_norm": 12.174713134765625, "learning_rate": 4.236289999768736e-06, "loss": 0.44054489, "memory(GiB)": 34.88, "step": 84415, "train_speed(iter/s)": 0.411451 }, { "acc": 0.90643349, "epoch": 2.2857607018113884, "grad_norm": 22.661712646484375, "learning_rate": 4.23573701673208e-06, "loss": 0.58889289, "memory(GiB)": 34.88, "step": 84420, "train_speed(iter/s)": 0.411452 }, { "acc": 0.93386421, "epoch": 2.2858960820946037, "grad_norm": 7.492886543273926, "learning_rate": 4.235184043275581e-06, "loss": 0.34372513, "memory(GiB)": 34.88, "step": 84425, "train_speed(iter/s)": 0.411453 }, { "acc": 0.93350658, "epoch": 2.286031462377819, "grad_norm": 11.849140167236328, "learning_rate": 4.23463107940617e-06, "loss": 0.35912476, "memory(GiB)": 34.88, "step": 84430, "train_speed(iter/s)": 0.411454 }, { "acc": 0.92970457, "epoch": 2.286166842661035, "grad_norm": 11.915319442749023, "learning_rate": 4.23407812513077e-06, "loss": 0.35723696, "memory(GiB)": 34.88, "step": 84435, "train_speed(iter/s)": 0.411455 }, { "acc": 0.94106693, "epoch": 2.2863022229442502, "grad_norm": 4.483527660369873, "learning_rate": 4.233525180456311e-06, "loss": 0.30754669, "memory(GiB)": 34.88, "step": 84440, "train_speed(iter/s)": 0.411456 }, { "acc": 0.93287888, "epoch": 2.286437603227466, "grad_norm": 7.34861946105957, "learning_rate": 4.232972245389718e-06, "loss": 0.33795018, "memory(GiB)": 34.88, "step": 84445, "train_speed(iter/s)": 0.411458 }, { "acc": 0.91445713, "epoch": 2.2865729835106814, "grad_norm": 5.722442150115967, "learning_rate": 4.23241931993792e-06, "loss": 0.48056059, "memory(GiB)": 34.88, "step": 84450, "train_speed(iter/s)": 0.411459 }, { "acc": 0.90677633, "epoch": 2.286708363793897, "grad_norm": 10.384778022766113, "learning_rate": 4.2318664041078415e-06, "loss": 0.58663702, "memory(GiB)": 34.88, "step": 84455, "train_speed(iter/s)": 0.41146 }, { "acc": 0.93355751, "epoch": 2.2868437440771126, "grad_norm": 4.471191883087158, "learning_rate": 4.231313497906408e-06, "loss": 0.35919516, "memory(GiB)": 34.88, "step": 84460, "train_speed(iter/s)": 0.411462 }, { "acc": 0.93699026, "epoch": 2.2869791243603284, "grad_norm": 5.372714042663574, "learning_rate": 4.230760601340548e-06, "loss": 0.2889009, "memory(GiB)": 34.88, "step": 84465, "train_speed(iter/s)": 0.411463 }, { "acc": 0.91565075, "epoch": 2.2871145046435437, "grad_norm": 6.139115333557129, "learning_rate": 4.230207714417185e-06, "loss": 0.40739822, "memory(GiB)": 34.88, "step": 84470, "train_speed(iter/s)": 0.411464 }, { "acc": 0.92252665, "epoch": 2.2872498849267595, "grad_norm": 6.761527061462402, "learning_rate": 4.2296548371432476e-06, "loss": 0.4186964, "memory(GiB)": 34.88, "step": 84475, "train_speed(iter/s)": 0.411465 }, { "acc": 0.93687544, "epoch": 2.287385265209975, "grad_norm": 12.580523490905762, "learning_rate": 4.22910196952566e-06, "loss": 0.34795241, "memory(GiB)": 34.88, "step": 84480, "train_speed(iter/s)": 0.411467 }, { "acc": 0.95082645, "epoch": 2.2875206454931902, "grad_norm": 7.700351715087891, "learning_rate": 4.228549111571349e-06, "loss": 0.26678948, "memory(GiB)": 34.88, "step": 84485, "train_speed(iter/s)": 0.411468 }, { "acc": 0.93935509, "epoch": 2.287656025776406, "grad_norm": 6.549174785614014, "learning_rate": 4.227996263287241e-06, "loss": 0.29853292, "memory(GiB)": 34.88, "step": 84490, "train_speed(iter/s)": 0.411469 }, { "acc": 0.92314625, "epoch": 2.2877914060596214, "grad_norm": 5.822781085968018, "learning_rate": 4.2274434246802584e-06, "loss": 0.44165993, "memory(GiB)": 34.88, "step": 84495, "train_speed(iter/s)": 0.41147 }, { "acc": 0.91607523, "epoch": 2.287926786342837, "grad_norm": 8.00771427154541, "learning_rate": 4.22689059575733e-06, "loss": 0.45908356, "memory(GiB)": 34.88, "step": 84500, "train_speed(iter/s)": 0.411471 }, { "acc": 0.92369719, "epoch": 2.2880621666260526, "grad_norm": 17.549890518188477, "learning_rate": 4.226337776525378e-06, "loss": 0.41171675, "memory(GiB)": 34.88, "step": 84505, "train_speed(iter/s)": 0.411472 }, { "acc": 0.92627163, "epoch": 2.2881975469092684, "grad_norm": 7.473493576049805, "learning_rate": 4.2257849669913286e-06, "loss": 0.37371626, "memory(GiB)": 34.88, "step": 84510, "train_speed(iter/s)": 0.411474 }, { "acc": 0.93052635, "epoch": 2.2883329271924837, "grad_norm": 6.117426872253418, "learning_rate": 4.225232167162109e-06, "loss": 0.37382236, "memory(GiB)": 34.88, "step": 84515, "train_speed(iter/s)": 0.411475 }, { "acc": 0.93192568, "epoch": 2.288468307475699, "grad_norm": 10.839350700378418, "learning_rate": 4.224679377044642e-06, "loss": 0.40522738, "memory(GiB)": 34.88, "step": 84520, "train_speed(iter/s)": 0.411476 }, { "acc": 0.93461685, "epoch": 2.288603687758915, "grad_norm": 4.099759578704834, "learning_rate": 4.224126596645852e-06, "loss": 0.34098668, "memory(GiB)": 34.88, "step": 84525, "train_speed(iter/s)": 0.411477 }, { "acc": 0.92445803, "epoch": 2.2887390680421302, "grad_norm": 12.530097961425781, "learning_rate": 4.223573825972665e-06, "loss": 0.36724145, "memory(GiB)": 34.88, "step": 84530, "train_speed(iter/s)": 0.411479 }, { "acc": 0.93980846, "epoch": 2.288874448325346, "grad_norm": 5.8004984855651855, "learning_rate": 4.223021065032006e-06, "loss": 0.34525735, "memory(GiB)": 34.88, "step": 84535, "train_speed(iter/s)": 0.41148 }, { "acc": 0.91094894, "epoch": 2.2890098286085614, "grad_norm": 9.41021728515625, "learning_rate": 4.2224683138307965e-06, "loss": 0.50444355, "memory(GiB)": 34.88, "step": 84540, "train_speed(iter/s)": 0.411481 }, { "acc": 0.935672, "epoch": 2.289145208891777, "grad_norm": 5.987573623657227, "learning_rate": 4.221915572375963e-06, "loss": 0.36954794, "memory(GiB)": 34.88, "step": 84545, "train_speed(iter/s)": 0.411483 }, { "acc": 0.91584044, "epoch": 2.2892805891749926, "grad_norm": 8.135313987731934, "learning_rate": 4.22136284067443e-06, "loss": 0.49513912, "memory(GiB)": 34.88, "step": 84550, "train_speed(iter/s)": 0.411484 }, { "acc": 0.92983875, "epoch": 2.289415969458208, "grad_norm": 9.199522018432617, "learning_rate": 4.2208101187331205e-06, "loss": 0.383321, "memory(GiB)": 34.88, "step": 84555, "train_speed(iter/s)": 0.411485 }, { "acc": 0.9210146, "epoch": 2.2895513497414237, "grad_norm": 6.822688579559326, "learning_rate": 4.22025740655896e-06, "loss": 0.46527052, "memory(GiB)": 34.88, "step": 84560, "train_speed(iter/s)": 0.411487 }, { "acc": 0.94051809, "epoch": 2.289686730024639, "grad_norm": 5.8925557136535645, "learning_rate": 4.219704704158869e-06, "loss": 0.2704577, "memory(GiB)": 34.88, "step": 84565, "train_speed(iter/s)": 0.411488 }, { "acc": 0.92673368, "epoch": 2.289822110307855, "grad_norm": 6.877954959869385, "learning_rate": 4.219152011539776e-06, "loss": 0.3944401, "memory(GiB)": 34.88, "step": 84570, "train_speed(iter/s)": 0.411489 }, { "acc": 0.92373943, "epoch": 2.2899574905910702, "grad_norm": 6.555420875549316, "learning_rate": 4.218599328708601e-06, "loss": 0.3816608, "memory(GiB)": 34.88, "step": 84575, "train_speed(iter/s)": 0.41149 }, { "acc": 0.92260208, "epoch": 2.290092870874286, "grad_norm": 8.458306312561035, "learning_rate": 4.218046655672268e-06, "loss": 0.37664185, "memory(GiB)": 34.88, "step": 84580, "train_speed(iter/s)": 0.411492 }, { "acc": 0.93818684, "epoch": 2.2902282511575014, "grad_norm": 13.688680648803711, "learning_rate": 4.2174939924377e-06, "loss": 0.35380685, "memory(GiB)": 34.88, "step": 84585, "train_speed(iter/s)": 0.411493 }, { "acc": 0.91917028, "epoch": 2.2903636314407168, "grad_norm": 15.235040664672852, "learning_rate": 4.2169413390118216e-06, "loss": 0.38062315, "memory(GiB)": 34.88, "step": 84590, "train_speed(iter/s)": 0.411494 }, { "acc": 0.92486763, "epoch": 2.2904990117239326, "grad_norm": 7.956745147705078, "learning_rate": 4.216388695401555e-06, "loss": 0.38849652, "memory(GiB)": 34.88, "step": 84595, "train_speed(iter/s)": 0.411496 }, { "acc": 0.9309166, "epoch": 2.290634392007148, "grad_norm": 6.0565314292907715, "learning_rate": 4.215836061613823e-06, "loss": 0.35420971, "memory(GiB)": 34.88, "step": 84600, "train_speed(iter/s)": 0.411497 }, { "acc": 0.93571501, "epoch": 2.2907697722903637, "grad_norm": 1.9244880676269531, "learning_rate": 4.215283437655549e-06, "loss": 0.38044403, "memory(GiB)": 34.88, "step": 84605, "train_speed(iter/s)": 0.411498 }, { "acc": 0.9408164, "epoch": 2.290905152573579, "grad_norm": 13.314477920532227, "learning_rate": 4.214730823533655e-06, "loss": 0.29980035, "memory(GiB)": 34.88, "step": 84610, "train_speed(iter/s)": 0.411499 }, { "acc": 0.92977123, "epoch": 2.291040532856795, "grad_norm": 5.688919544219971, "learning_rate": 4.2141782192550654e-06, "loss": 0.33785157, "memory(GiB)": 34.88, "step": 84615, "train_speed(iter/s)": 0.411501 }, { "acc": 0.92311649, "epoch": 2.2911759131400102, "grad_norm": 7.360840320587158, "learning_rate": 4.2136256248267e-06, "loss": 0.4084383, "memory(GiB)": 34.88, "step": 84620, "train_speed(iter/s)": 0.411502 }, { "acc": 0.91444235, "epoch": 2.2913112934232256, "grad_norm": 36.95454025268555, "learning_rate": 4.213073040255481e-06, "loss": 0.47688513, "memory(GiB)": 34.88, "step": 84625, "train_speed(iter/s)": 0.411503 }, { "acc": 0.91030884, "epoch": 2.2914466737064414, "grad_norm": 8.885848999023438, "learning_rate": 4.212520465548335e-06, "loss": 0.44875565, "memory(GiB)": 34.88, "step": 84630, "train_speed(iter/s)": 0.411505 }, { "acc": 0.91943073, "epoch": 2.2915820539896568, "grad_norm": 14.820568084716797, "learning_rate": 4.211967900712179e-06, "loss": 0.45989022, "memory(GiB)": 34.88, "step": 84635, "train_speed(iter/s)": 0.411506 }, { "acc": 0.9193819, "epoch": 2.2917174342728726, "grad_norm": 6.561489582061768, "learning_rate": 4.211415345753937e-06, "loss": 0.45856776, "memory(GiB)": 34.88, "step": 84640, "train_speed(iter/s)": 0.411507 }, { "acc": 0.92275534, "epoch": 2.291852814556088, "grad_norm": 5.092500686645508, "learning_rate": 4.210862800680531e-06, "loss": 0.44402037, "memory(GiB)": 34.88, "step": 84645, "train_speed(iter/s)": 0.411508 }, { "acc": 0.91870594, "epoch": 2.2919881948393037, "grad_norm": 9.86113452911377, "learning_rate": 4.210310265498883e-06, "loss": 0.43962998, "memory(GiB)": 34.88, "step": 84650, "train_speed(iter/s)": 0.41151 }, { "acc": 0.9260807, "epoch": 2.292123575122519, "grad_norm": 12.646749496459961, "learning_rate": 4.209757740215914e-06, "loss": 0.41144552, "memory(GiB)": 34.88, "step": 84655, "train_speed(iter/s)": 0.411511 }, { "acc": 0.92329235, "epoch": 2.292258955405735, "grad_norm": 9.277838706970215, "learning_rate": 4.209205224838546e-06, "loss": 0.44484234, "memory(GiB)": 34.88, "step": 84660, "train_speed(iter/s)": 0.411512 }, { "acc": 0.92932739, "epoch": 2.2923943356889502, "grad_norm": 13.726629257202148, "learning_rate": 4.2086527193737e-06, "loss": 0.39169149, "memory(GiB)": 34.88, "step": 84665, "train_speed(iter/s)": 0.411513 }, { "acc": 0.90980663, "epoch": 2.292529715972166, "grad_norm": 9.421092987060547, "learning_rate": 4.208100223828297e-06, "loss": 0.47790422, "memory(GiB)": 34.88, "step": 84670, "train_speed(iter/s)": 0.411515 }, { "acc": 0.91581173, "epoch": 2.2926650962553814, "grad_norm": 20.401155471801758, "learning_rate": 4.207547738209258e-06, "loss": 0.43790326, "memory(GiB)": 34.88, "step": 84675, "train_speed(iter/s)": 0.411516 }, { "acc": 0.90268536, "epoch": 2.2928004765385968, "grad_norm": 8.360457420349121, "learning_rate": 4.206995262523504e-06, "loss": 0.53643723, "memory(GiB)": 34.88, "step": 84680, "train_speed(iter/s)": 0.411517 }, { "acc": 0.91207256, "epoch": 2.2929358568218126, "grad_norm": 13.721303939819336, "learning_rate": 4.2064427967779565e-06, "loss": 0.52572412, "memory(GiB)": 34.88, "step": 84685, "train_speed(iter/s)": 0.411518 }, { "acc": 0.929848, "epoch": 2.293071237105028, "grad_norm": 9.692172050476074, "learning_rate": 4.205890340979535e-06, "loss": 0.43286104, "memory(GiB)": 34.88, "step": 84690, "train_speed(iter/s)": 0.41152 }, { "acc": 0.92142515, "epoch": 2.2932066173882437, "grad_norm": 9.462574005126953, "learning_rate": 4.205337895135162e-06, "loss": 0.42318211, "memory(GiB)": 34.88, "step": 84695, "train_speed(iter/s)": 0.411521 }, { "acc": 0.91953468, "epoch": 2.293341997671459, "grad_norm": 11.302641868591309, "learning_rate": 4.2047854592517575e-06, "loss": 0.35420761, "memory(GiB)": 34.88, "step": 84700, "train_speed(iter/s)": 0.411522 }, { "acc": 0.92627258, "epoch": 2.293477377954675, "grad_norm": 3.2770721912384033, "learning_rate": 4.204233033336238e-06, "loss": 0.39641929, "memory(GiB)": 34.88, "step": 84705, "train_speed(iter/s)": 0.411523 }, { "acc": 0.9398407, "epoch": 2.2936127582378902, "grad_norm": 5.007907867431641, "learning_rate": 4.20368061739553e-06, "loss": 0.31750607, "memory(GiB)": 34.88, "step": 84710, "train_speed(iter/s)": 0.411524 }, { "acc": 0.92648325, "epoch": 2.2937481385211056, "grad_norm": 6.238964080810547, "learning_rate": 4.203128211436549e-06, "loss": 0.43998485, "memory(GiB)": 34.88, "step": 84715, "train_speed(iter/s)": 0.411526 }, { "acc": 0.92975225, "epoch": 2.2938835188043214, "grad_norm": 9.96360969543457, "learning_rate": 4.202575815466217e-06, "loss": 0.38578904, "memory(GiB)": 34.88, "step": 84720, "train_speed(iter/s)": 0.411527 }, { "acc": 0.92152929, "epoch": 2.2940188990875368, "grad_norm": 5.59400749206543, "learning_rate": 4.202023429491453e-06, "loss": 0.40884843, "memory(GiB)": 34.88, "step": 84725, "train_speed(iter/s)": 0.411528 }, { "acc": 0.9385747, "epoch": 2.2941542793707526, "grad_norm": 10.138155937194824, "learning_rate": 4.201471053519178e-06, "loss": 0.33264713, "memory(GiB)": 34.88, "step": 84730, "train_speed(iter/s)": 0.411529 }, { "acc": 0.91977139, "epoch": 2.294289659653968, "grad_norm": 10.577874183654785, "learning_rate": 4.20091868755631e-06, "loss": 0.49464417, "memory(GiB)": 34.88, "step": 84735, "train_speed(iter/s)": 0.411531 }, { "acc": 0.92674379, "epoch": 2.2944250399371837, "grad_norm": 8.668452262878418, "learning_rate": 4.2003663316097684e-06, "loss": 0.4056241, "memory(GiB)": 34.88, "step": 84740, "train_speed(iter/s)": 0.411532 }, { "acc": 0.9393898, "epoch": 2.294560420220399, "grad_norm": 4.775858402252197, "learning_rate": 4.199813985686475e-06, "loss": 0.34418182, "memory(GiB)": 34.88, "step": 84745, "train_speed(iter/s)": 0.411533 }, { "acc": 0.91841354, "epoch": 2.2946958005036144, "grad_norm": 18.523906707763672, "learning_rate": 4.199261649793345e-06, "loss": 0.4805356, "memory(GiB)": 34.88, "step": 84750, "train_speed(iter/s)": 0.411534 }, { "acc": 0.94212093, "epoch": 2.2948311807868302, "grad_norm": 12.802483558654785, "learning_rate": 4.198709323937302e-06, "loss": 0.30921407, "memory(GiB)": 34.88, "step": 84755, "train_speed(iter/s)": 0.411536 }, { "acc": 0.95632048, "epoch": 2.2949665610700456, "grad_norm": 9.148458480834961, "learning_rate": 4.1981570081252616e-06, "loss": 0.20325775, "memory(GiB)": 34.88, "step": 84760, "train_speed(iter/s)": 0.411537 }, { "acc": 0.92468529, "epoch": 2.2951019413532614, "grad_norm": 6.383551597595215, "learning_rate": 4.1976047023641434e-06, "loss": 0.50229845, "memory(GiB)": 34.88, "step": 84765, "train_speed(iter/s)": 0.411538 }, { "acc": 0.93933372, "epoch": 2.2952373216364768, "grad_norm": 8.567100524902344, "learning_rate": 4.197052406660867e-06, "loss": 0.27055192, "memory(GiB)": 34.88, "step": 84770, "train_speed(iter/s)": 0.411539 }, { "acc": 0.90743961, "epoch": 2.2953727019196926, "grad_norm": 7.51589298248291, "learning_rate": 4.196500121022349e-06, "loss": 0.47271252, "memory(GiB)": 34.88, "step": 84775, "train_speed(iter/s)": 0.411541 }, { "acc": 0.93731012, "epoch": 2.295508082202908, "grad_norm": 6.002624034881592, "learning_rate": 4.195947845455512e-06, "loss": 0.34565976, "memory(GiB)": 34.88, "step": 84780, "train_speed(iter/s)": 0.411542 }, { "acc": 0.90917797, "epoch": 2.2956434624861233, "grad_norm": 12.409343719482422, "learning_rate": 4.195395579967271e-06, "loss": 0.47982531, "memory(GiB)": 34.88, "step": 84785, "train_speed(iter/s)": 0.411543 }, { "acc": 0.93941679, "epoch": 2.295778842769339, "grad_norm": 4.710396766662598, "learning_rate": 4.194843324564545e-06, "loss": 0.33950362, "memory(GiB)": 34.88, "step": 84790, "train_speed(iter/s)": 0.411544 }, { "acc": 0.91179266, "epoch": 2.2959142230525544, "grad_norm": 7.401089668273926, "learning_rate": 4.194291079254251e-06, "loss": 0.48213558, "memory(GiB)": 34.88, "step": 84795, "train_speed(iter/s)": 0.411546 }, { "acc": 0.92094698, "epoch": 2.2960496033357702, "grad_norm": 8.047118186950684, "learning_rate": 4.193738844043309e-06, "loss": 0.44564409, "memory(GiB)": 34.88, "step": 84800, "train_speed(iter/s)": 0.411547 }, { "acc": 0.90922813, "epoch": 2.2961849836189856, "grad_norm": 8.036642074584961, "learning_rate": 4.193186618938634e-06, "loss": 0.50139251, "memory(GiB)": 34.88, "step": 84805, "train_speed(iter/s)": 0.411548 }, { "acc": 0.91304255, "epoch": 2.2963203639022014, "grad_norm": 5.81002140045166, "learning_rate": 4.192634403947145e-06, "loss": 0.44988899, "memory(GiB)": 34.88, "step": 84810, "train_speed(iter/s)": 0.411549 }, { "acc": 0.9388938, "epoch": 2.2964557441854168, "grad_norm": 7.948357582092285, "learning_rate": 4.192082199075762e-06, "loss": 0.36245601, "memory(GiB)": 34.88, "step": 84815, "train_speed(iter/s)": 0.411551 }, { "acc": 0.92991657, "epoch": 2.2965911244686326, "grad_norm": 6.161441802978516, "learning_rate": 4.1915300043314e-06, "loss": 0.40498128, "memory(GiB)": 34.88, "step": 84820, "train_speed(iter/s)": 0.411552 }, { "acc": 0.93245049, "epoch": 2.296726504751848, "grad_norm": 21.516780853271484, "learning_rate": 4.190977819720978e-06, "loss": 0.42780361, "memory(GiB)": 34.88, "step": 84825, "train_speed(iter/s)": 0.411553 }, { "acc": 0.91444683, "epoch": 2.2968618850350637, "grad_norm": 8.966154098510742, "learning_rate": 4.190425645251411e-06, "loss": 0.42360678, "memory(GiB)": 34.88, "step": 84830, "train_speed(iter/s)": 0.411554 }, { "acc": 0.92669992, "epoch": 2.296997265318279, "grad_norm": 4.946771144866943, "learning_rate": 4.189873480929617e-06, "loss": 0.35500221, "memory(GiB)": 34.88, "step": 84835, "train_speed(iter/s)": 0.411556 }, { "acc": 0.91041412, "epoch": 2.2971326456014944, "grad_norm": 19.378173828125, "learning_rate": 4.189321326762513e-06, "loss": 0.51750417, "memory(GiB)": 34.88, "step": 84840, "train_speed(iter/s)": 0.411557 }, { "acc": 0.91096649, "epoch": 2.2972680258847102, "grad_norm": 9.748554229736328, "learning_rate": 4.188769182757015e-06, "loss": 0.52840776, "memory(GiB)": 34.88, "step": 84845, "train_speed(iter/s)": 0.411558 }, { "acc": 0.94511967, "epoch": 2.2974034061679256, "grad_norm": 23.98978614807129, "learning_rate": 4.188217048920042e-06, "loss": 0.31630478, "memory(GiB)": 34.88, "step": 84850, "train_speed(iter/s)": 0.411559 }, { "acc": 0.92746525, "epoch": 2.2975387864511414, "grad_norm": 6.313076496124268, "learning_rate": 4.187664925258508e-06, "loss": 0.37607927, "memory(GiB)": 34.88, "step": 84855, "train_speed(iter/s)": 0.411561 }, { "acc": 0.92794209, "epoch": 2.2976741667343568, "grad_norm": 6.031290054321289, "learning_rate": 4.1871128117793306e-06, "loss": 0.35319815, "memory(GiB)": 34.88, "step": 84860, "train_speed(iter/s)": 0.411562 }, { "acc": 0.9265008, "epoch": 2.2978095470175726, "grad_norm": 4.626092910766602, "learning_rate": 4.186560708489427e-06, "loss": 0.39132075, "memory(GiB)": 34.88, "step": 84865, "train_speed(iter/s)": 0.411563 }, { "acc": 0.91993542, "epoch": 2.297944927300788, "grad_norm": 8.689655303955078, "learning_rate": 4.186008615395713e-06, "loss": 0.35748122, "memory(GiB)": 34.88, "step": 84870, "train_speed(iter/s)": 0.411565 }, { "acc": 0.92621737, "epoch": 2.2980803075840033, "grad_norm": 4.732414722442627, "learning_rate": 4.185456532505102e-06, "loss": 0.42205777, "memory(GiB)": 34.88, "step": 84875, "train_speed(iter/s)": 0.411566 }, { "acc": 0.9377861, "epoch": 2.298215687867219, "grad_norm": 2.3941915035247803, "learning_rate": 4.184904459824513e-06, "loss": 0.31092973, "memory(GiB)": 34.88, "step": 84880, "train_speed(iter/s)": 0.411567 }, { "acc": 0.91779346, "epoch": 2.2983510681504344, "grad_norm": 6.0248703956604, "learning_rate": 4.1843523973608616e-06, "loss": 0.46751189, "memory(GiB)": 34.88, "step": 84885, "train_speed(iter/s)": 0.411569 }, { "acc": 0.95181179, "epoch": 2.2984864484336502, "grad_norm": 5.096108436584473, "learning_rate": 4.18380034512106e-06, "loss": 0.23194785, "memory(GiB)": 34.88, "step": 84890, "train_speed(iter/s)": 0.41157 }, { "acc": 0.93005753, "epoch": 2.2986218287168656, "grad_norm": 12.853240013122559, "learning_rate": 4.183248303112029e-06, "loss": 0.34287341, "memory(GiB)": 34.88, "step": 84895, "train_speed(iter/s)": 0.411571 }, { "acc": 0.91221819, "epoch": 2.2987572090000814, "grad_norm": 11.626544952392578, "learning_rate": 4.1826962713406795e-06, "loss": 0.58114967, "memory(GiB)": 34.88, "step": 84900, "train_speed(iter/s)": 0.411572 }, { "acc": 0.91553183, "epoch": 2.2988925892832968, "grad_norm": 6.170465469360352, "learning_rate": 4.18214424981393e-06, "loss": 0.470471, "memory(GiB)": 34.88, "step": 84905, "train_speed(iter/s)": 0.411574 }, { "acc": 0.92806072, "epoch": 2.299027969566512, "grad_norm": 5.293057441711426, "learning_rate": 4.181592238538695e-06, "loss": 0.39370384, "memory(GiB)": 34.88, "step": 84910, "train_speed(iter/s)": 0.411575 }, { "acc": 0.9338624, "epoch": 2.299163349849728, "grad_norm": 9.23561954498291, "learning_rate": 4.181040237521888e-06, "loss": 0.35519028, "memory(GiB)": 34.88, "step": 84915, "train_speed(iter/s)": 0.411576 }, { "acc": 0.94097548, "epoch": 2.2992987301329433, "grad_norm": 6.02609920501709, "learning_rate": 4.180488246770425e-06, "loss": 0.33491912, "memory(GiB)": 34.88, "step": 84920, "train_speed(iter/s)": 0.411577 }, { "acc": 0.91963673, "epoch": 2.299434110416159, "grad_norm": 18.625736236572266, "learning_rate": 4.179936266291219e-06, "loss": 0.39848762, "memory(GiB)": 34.88, "step": 84925, "train_speed(iter/s)": 0.411579 }, { "acc": 0.91959953, "epoch": 2.2995694906993744, "grad_norm": 34.666019439697266, "learning_rate": 4.179384296091188e-06, "loss": 0.46939077, "memory(GiB)": 34.88, "step": 84930, "train_speed(iter/s)": 0.41158 }, { "acc": 0.9239255, "epoch": 2.2997048709825902, "grad_norm": 4.778918743133545, "learning_rate": 4.1788323361772434e-06, "loss": 0.38990455, "memory(GiB)": 34.88, "step": 84935, "train_speed(iter/s)": 0.411581 }, { "acc": 0.92413359, "epoch": 2.2998402512658056, "grad_norm": 25.72207260131836, "learning_rate": 4.178280386556302e-06, "loss": 0.40550389, "memory(GiB)": 34.88, "step": 84940, "train_speed(iter/s)": 0.411582 }, { "acc": 0.91151514, "epoch": 2.299975631549021, "grad_norm": 27.084806442260742, "learning_rate": 4.1777284472352755e-06, "loss": 0.50640383, "memory(GiB)": 34.88, "step": 84945, "train_speed(iter/s)": 0.411584 }, { "acc": 0.92894545, "epoch": 2.3001110118322368, "grad_norm": 7.753818988800049, "learning_rate": 4.177176518221081e-06, "loss": 0.43141046, "memory(GiB)": 34.88, "step": 84950, "train_speed(iter/s)": 0.411585 }, { "acc": 0.90610027, "epoch": 2.300246392115452, "grad_norm": 25.48175048828125, "learning_rate": 4.176624599520631e-06, "loss": 0.54157143, "memory(GiB)": 34.88, "step": 84955, "train_speed(iter/s)": 0.411586 }, { "acc": 0.91883545, "epoch": 2.300381772398668, "grad_norm": 8.746437072753906, "learning_rate": 4.176072691140838e-06, "loss": 0.42020597, "memory(GiB)": 34.88, "step": 84960, "train_speed(iter/s)": 0.411587 }, { "acc": 0.92127419, "epoch": 2.3005171526818833, "grad_norm": 7.944967746734619, "learning_rate": 4.175520793088618e-06, "loss": 0.42230334, "memory(GiB)": 34.88, "step": 84965, "train_speed(iter/s)": 0.411588 }, { "acc": 0.9160553, "epoch": 2.300652532965099, "grad_norm": 5.233234882354736, "learning_rate": 4.174968905370883e-06, "loss": 0.42212286, "memory(GiB)": 34.88, "step": 84970, "train_speed(iter/s)": 0.411589 }, { "acc": 0.92728634, "epoch": 2.3007879132483144, "grad_norm": 7.818029403686523, "learning_rate": 4.174417027994547e-06, "loss": 0.4454145, "memory(GiB)": 34.88, "step": 84975, "train_speed(iter/s)": 0.411591 }, { "acc": 0.92958717, "epoch": 2.3009232935315302, "grad_norm": 7.241765975952148, "learning_rate": 4.173865160966524e-06, "loss": 0.39656768, "memory(GiB)": 34.88, "step": 84980, "train_speed(iter/s)": 0.411592 }, { "acc": 0.94075546, "epoch": 2.3010586738147456, "grad_norm": 9.056093215942383, "learning_rate": 4.173313304293727e-06, "loss": 0.33500185, "memory(GiB)": 34.88, "step": 84985, "train_speed(iter/s)": 0.411593 }, { "acc": 0.92822247, "epoch": 2.3011940540979614, "grad_norm": 10.231521606445312, "learning_rate": 4.172761457983068e-06, "loss": 0.4202702, "memory(GiB)": 34.88, "step": 84990, "train_speed(iter/s)": 0.411594 }, { "acc": 0.94477358, "epoch": 2.3013294343811768, "grad_norm": 11.635603904724121, "learning_rate": 4.172209622041461e-06, "loss": 0.29861624, "memory(GiB)": 34.88, "step": 84995, "train_speed(iter/s)": 0.411596 }, { "acc": 0.92979984, "epoch": 2.301464814664392, "grad_norm": 5.249851226806641, "learning_rate": 4.17165779647582e-06, "loss": 0.37679336, "memory(GiB)": 34.88, "step": 85000, "train_speed(iter/s)": 0.411597 }, { "acc": 0.91805096, "epoch": 2.301600194947608, "grad_norm": 9.964049339294434, "learning_rate": 4.171105981293054e-06, "loss": 0.44434471, "memory(GiB)": 34.88, "step": 85005, "train_speed(iter/s)": 0.411598 }, { "acc": 0.9231966, "epoch": 2.3017355752308233, "grad_norm": 10.2454252243042, "learning_rate": 4.170554176500079e-06, "loss": 0.4191165, "memory(GiB)": 34.88, "step": 85010, "train_speed(iter/s)": 0.4116 }, { "acc": 0.91573353, "epoch": 2.301870955514039, "grad_norm": 10.785750389099121, "learning_rate": 4.170002382103806e-06, "loss": 0.50625906, "memory(GiB)": 34.88, "step": 85015, "train_speed(iter/s)": 0.411601 }, { "acc": 0.92858572, "epoch": 2.3020063357972544, "grad_norm": 9.18071174621582, "learning_rate": 4.169450598111148e-06, "loss": 0.38855314, "memory(GiB)": 34.88, "step": 85020, "train_speed(iter/s)": 0.411602 }, { "acc": 0.916189, "epoch": 2.3021417160804702, "grad_norm": 13.067848205566406, "learning_rate": 4.168898824529016e-06, "loss": 0.49948096, "memory(GiB)": 34.88, "step": 85025, "train_speed(iter/s)": 0.411604 }, { "acc": 0.92930927, "epoch": 2.3022770963636856, "grad_norm": 4.297738552093506, "learning_rate": 4.168347061364323e-06, "loss": 0.40954995, "memory(GiB)": 34.88, "step": 85030, "train_speed(iter/s)": 0.411605 }, { "acc": 0.92361965, "epoch": 2.302412476646901, "grad_norm": 7.376601219177246, "learning_rate": 4.167795308623982e-06, "loss": 0.37878108, "memory(GiB)": 34.88, "step": 85035, "train_speed(iter/s)": 0.411606 }, { "acc": 0.93554239, "epoch": 2.3025478569301168, "grad_norm": 9.38565444946289, "learning_rate": 4.167243566314902e-06, "loss": 0.29366448, "memory(GiB)": 34.88, "step": 85040, "train_speed(iter/s)": 0.411608 }, { "acc": 0.91045351, "epoch": 2.302683237213332, "grad_norm": 82.85748291015625, "learning_rate": 4.166691834443997e-06, "loss": 0.49767451, "memory(GiB)": 34.88, "step": 85045, "train_speed(iter/s)": 0.411609 }, { "acc": 0.92663975, "epoch": 2.302818617496548, "grad_norm": 4.708950519561768, "learning_rate": 4.166140113018176e-06, "loss": 0.3688066, "memory(GiB)": 34.88, "step": 85050, "train_speed(iter/s)": 0.41161 }, { "acc": 0.92958879, "epoch": 2.3029539977797633, "grad_norm": 19.07329559326172, "learning_rate": 4.1655884020443545e-06, "loss": 0.4852397, "memory(GiB)": 34.88, "step": 85055, "train_speed(iter/s)": 0.411612 }, { "acc": 0.91431007, "epoch": 2.303089378062979, "grad_norm": 23.18054962158203, "learning_rate": 4.16503670152944e-06, "loss": 0.47198653, "memory(GiB)": 34.88, "step": 85060, "train_speed(iter/s)": 0.411613 }, { "acc": 0.9249197, "epoch": 2.3032247583461944, "grad_norm": 8.964149475097656, "learning_rate": 4.164485011480345e-06, "loss": 0.39654622, "memory(GiB)": 34.88, "step": 85065, "train_speed(iter/s)": 0.411614 }, { "acc": 0.92729664, "epoch": 2.30336013862941, "grad_norm": 15.251029014587402, "learning_rate": 4.163933331903981e-06, "loss": 0.41324501, "memory(GiB)": 34.88, "step": 85070, "train_speed(iter/s)": 0.411615 }, { "acc": 0.92648163, "epoch": 2.3034955189126256, "grad_norm": 5.847185134887695, "learning_rate": 4.163381662807257e-06, "loss": 0.4216095, "memory(GiB)": 34.88, "step": 85075, "train_speed(iter/s)": 0.411617 }, { "acc": 0.9226902, "epoch": 2.303630899195841, "grad_norm": 7.495336532592773, "learning_rate": 4.162830004197089e-06, "loss": 0.34597797, "memory(GiB)": 34.88, "step": 85080, "train_speed(iter/s)": 0.411618 }, { "acc": 0.92354927, "epoch": 2.3037662794790568, "grad_norm": 9.604104042053223, "learning_rate": 4.16227835608038e-06, "loss": 0.45005512, "memory(GiB)": 34.88, "step": 85085, "train_speed(iter/s)": 0.411619 }, { "acc": 0.92096024, "epoch": 2.303901659762272, "grad_norm": 6.522234916687012, "learning_rate": 4.161726718464045e-06, "loss": 0.38824129, "memory(GiB)": 34.88, "step": 85090, "train_speed(iter/s)": 0.41162 }, { "acc": 0.92854414, "epoch": 2.304037040045488, "grad_norm": 5.269369602203369, "learning_rate": 4.161175091354994e-06, "loss": 0.40766344, "memory(GiB)": 34.88, "step": 85095, "train_speed(iter/s)": 0.411622 }, { "acc": 0.91468906, "epoch": 2.3041724203287033, "grad_norm": 21.14751625061035, "learning_rate": 4.160623474760137e-06, "loss": 0.47405405, "memory(GiB)": 34.88, "step": 85100, "train_speed(iter/s)": 0.411623 }, { "acc": 0.93280354, "epoch": 2.3043078006119186, "grad_norm": 5.088217258453369, "learning_rate": 4.160071868686382e-06, "loss": 0.3304764, "memory(GiB)": 34.88, "step": 85105, "train_speed(iter/s)": 0.411624 }, { "acc": 0.91598883, "epoch": 2.3044431808951344, "grad_norm": 8.272408485412598, "learning_rate": 4.159520273140642e-06, "loss": 0.4107378, "memory(GiB)": 34.88, "step": 85110, "train_speed(iter/s)": 0.411625 }, { "acc": 0.94008694, "epoch": 2.30457856117835, "grad_norm": 6.024895191192627, "learning_rate": 4.158968688129827e-06, "loss": 0.29062531, "memory(GiB)": 34.88, "step": 85115, "train_speed(iter/s)": 0.411626 }, { "acc": 0.90830593, "epoch": 2.3047139414615656, "grad_norm": 14.891338348388672, "learning_rate": 4.158417113660844e-06, "loss": 0.56967745, "memory(GiB)": 34.88, "step": 85120, "train_speed(iter/s)": 0.411628 }, { "acc": 0.90998077, "epoch": 2.304849321744781, "grad_norm": 10.192655563354492, "learning_rate": 4.157865549740604e-06, "loss": 0.45838776, "memory(GiB)": 34.88, "step": 85125, "train_speed(iter/s)": 0.411629 }, { "acc": 0.94045086, "epoch": 2.3049847020279968, "grad_norm": 30.096946716308594, "learning_rate": 4.157313996376015e-06, "loss": 0.39925551, "memory(GiB)": 34.88, "step": 85130, "train_speed(iter/s)": 0.41163 }, { "acc": 0.92175713, "epoch": 2.305120082311212, "grad_norm": 7.825564384460449, "learning_rate": 4.15676245357399e-06, "loss": 0.47631974, "memory(GiB)": 34.88, "step": 85135, "train_speed(iter/s)": 0.411632 }, { "acc": 0.91196136, "epoch": 2.305255462594428, "grad_norm": 20.416879653930664, "learning_rate": 4.156210921341433e-06, "loss": 0.51571088, "memory(GiB)": 34.88, "step": 85140, "train_speed(iter/s)": 0.411633 }, { "acc": 0.92383671, "epoch": 2.3053908428776433, "grad_norm": 14.440406799316406, "learning_rate": 4.155659399685257e-06, "loss": 0.44790754, "memory(GiB)": 34.88, "step": 85145, "train_speed(iter/s)": 0.411634 }, { "acc": 0.9015995, "epoch": 2.305526223160859, "grad_norm": 6.093429088592529, "learning_rate": 4.15510788861237e-06, "loss": 0.58334475, "memory(GiB)": 34.88, "step": 85150, "train_speed(iter/s)": 0.411636 }, { "acc": 0.93920822, "epoch": 2.3056616034440744, "grad_norm": 6.301050186157227, "learning_rate": 4.15455638812968e-06, "loss": 0.36779456, "memory(GiB)": 34.88, "step": 85155, "train_speed(iter/s)": 0.411637 }, { "acc": 0.91995611, "epoch": 2.30579698372729, "grad_norm": 10.852754592895508, "learning_rate": 4.154004898244096e-06, "loss": 0.37943187, "memory(GiB)": 34.88, "step": 85160, "train_speed(iter/s)": 0.411638 }, { "acc": 0.9232584, "epoch": 2.3059323640105056, "grad_norm": 7.805123329162598, "learning_rate": 4.153453418962528e-06, "loss": 0.41535559, "memory(GiB)": 34.88, "step": 85165, "train_speed(iter/s)": 0.411639 }, { "acc": 0.92644529, "epoch": 2.306067744293721, "grad_norm": 10.360610961914062, "learning_rate": 4.152901950291882e-06, "loss": 0.43856645, "memory(GiB)": 34.88, "step": 85170, "train_speed(iter/s)": 0.411641 }, { "acc": 0.9354166, "epoch": 2.3062031245769368, "grad_norm": 5.083759784698486, "learning_rate": 4.1523504922390655e-06, "loss": 0.34981947, "memory(GiB)": 34.88, "step": 85175, "train_speed(iter/s)": 0.411642 }, { "acc": 0.92059813, "epoch": 2.306338504860152, "grad_norm": 7.58599853515625, "learning_rate": 4.15179904481099e-06, "loss": 0.38268828, "memory(GiB)": 34.88, "step": 85180, "train_speed(iter/s)": 0.411643 }, { "acc": 0.9175436, "epoch": 2.306473885143368, "grad_norm": 8.780712127685547, "learning_rate": 4.1512476080145606e-06, "loss": 0.46921864, "memory(GiB)": 34.88, "step": 85185, "train_speed(iter/s)": 0.411645 }, { "acc": 0.91938877, "epoch": 2.3066092654265833, "grad_norm": 8.65446662902832, "learning_rate": 4.150696181856685e-06, "loss": 0.43380208, "memory(GiB)": 34.88, "step": 85190, "train_speed(iter/s)": 0.411646 }, { "acc": 0.91433229, "epoch": 2.3067446457097986, "grad_norm": 10.078232765197754, "learning_rate": 4.150144766344274e-06, "loss": 0.49140015, "memory(GiB)": 34.88, "step": 85195, "train_speed(iter/s)": 0.411647 }, { "acc": 0.92913542, "epoch": 2.3068800259930144, "grad_norm": 9.90274429321289, "learning_rate": 4.1495933614842335e-06, "loss": 0.39350662, "memory(GiB)": 34.88, "step": 85200, "train_speed(iter/s)": 0.411648 }, { "acc": 0.92723837, "epoch": 2.30701540627623, "grad_norm": 13.477804183959961, "learning_rate": 4.149041967283471e-06, "loss": 0.42483888, "memory(GiB)": 34.88, "step": 85205, "train_speed(iter/s)": 0.41165 }, { "acc": 0.91582975, "epoch": 2.3071507865594456, "grad_norm": 8.325577735900879, "learning_rate": 4.148490583748892e-06, "loss": 0.49609098, "memory(GiB)": 34.88, "step": 85210, "train_speed(iter/s)": 0.411651 }, { "acc": 0.92658281, "epoch": 2.307286166842661, "grad_norm": 7.8013691902160645, "learning_rate": 4.147939210887406e-06, "loss": 0.36493886, "memory(GiB)": 34.88, "step": 85215, "train_speed(iter/s)": 0.411652 }, { "acc": 0.92073383, "epoch": 2.3074215471258768, "grad_norm": 8.118782043457031, "learning_rate": 4.147387848705918e-06, "loss": 0.40425496, "memory(GiB)": 34.88, "step": 85220, "train_speed(iter/s)": 0.411653 }, { "acc": 0.93390589, "epoch": 2.307556927409092, "grad_norm": 8.018899917602539, "learning_rate": 4.146836497211337e-06, "loss": 0.31287384, "memory(GiB)": 34.88, "step": 85225, "train_speed(iter/s)": 0.411655 }, { "acc": 0.92052631, "epoch": 2.3076923076923075, "grad_norm": 7.4131927490234375, "learning_rate": 4.146285156410569e-06, "loss": 0.45111008, "memory(GiB)": 34.88, "step": 85230, "train_speed(iter/s)": 0.411656 }, { "acc": 0.9343071, "epoch": 2.3078276879755233, "grad_norm": 11.404579162597656, "learning_rate": 4.145733826310519e-06, "loss": 0.37111065, "memory(GiB)": 34.88, "step": 85235, "train_speed(iter/s)": 0.411657 }, { "acc": 0.92944298, "epoch": 2.3079630682587386, "grad_norm": 6.106385231018066, "learning_rate": 4.145182506918098e-06, "loss": 0.39527123, "memory(GiB)": 34.88, "step": 85240, "train_speed(iter/s)": 0.411659 }, { "acc": 0.91336479, "epoch": 2.3080984485419545, "grad_norm": 9.324631690979004, "learning_rate": 4.1446311982402075e-06, "loss": 0.45206761, "memory(GiB)": 34.88, "step": 85245, "train_speed(iter/s)": 0.41166 }, { "acc": 0.93638, "epoch": 2.30823382882517, "grad_norm": 8.452093124389648, "learning_rate": 4.144079900283755e-06, "loss": 0.32289646, "memory(GiB)": 34.88, "step": 85250, "train_speed(iter/s)": 0.411661 }, { "acc": 0.91981983, "epoch": 2.3083692091083856, "grad_norm": 13.49689769744873, "learning_rate": 4.143528613055647e-06, "loss": 0.44896812, "memory(GiB)": 34.88, "step": 85255, "train_speed(iter/s)": 0.411663 }, { "acc": 0.93650179, "epoch": 2.308504589391601, "grad_norm": 6.535211563110352, "learning_rate": 4.14297733656279e-06, "loss": 0.37191224, "memory(GiB)": 34.88, "step": 85260, "train_speed(iter/s)": 0.411664 }, { "acc": 0.93948517, "epoch": 2.3086399696748163, "grad_norm": 6.059697151184082, "learning_rate": 4.14242607081209e-06, "loss": 0.32723193, "memory(GiB)": 34.88, "step": 85265, "train_speed(iter/s)": 0.411665 }, { "acc": 0.92838879, "epoch": 2.308775349958032, "grad_norm": 7.245423316955566, "learning_rate": 4.141874815810451e-06, "loss": 0.40753527, "memory(GiB)": 34.88, "step": 85270, "train_speed(iter/s)": 0.411666 }, { "acc": 0.91930656, "epoch": 2.3089107302412475, "grad_norm": 12.880609512329102, "learning_rate": 4.14132357156478e-06, "loss": 0.43973465, "memory(GiB)": 34.88, "step": 85275, "train_speed(iter/s)": 0.411668 }, { "acc": 0.93586731, "epoch": 2.3090461105244633, "grad_norm": 11.660765647888184, "learning_rate": 4.140772338081982e-06, "loss": 0.35887547, "memory(GiB)": 34.88, "step": 85280, "train_speed(iter/s)": 0.411669 }, { "acc": 0.90271873, "epoch": 2.3091814908076786, "grad_norm": 10.608149528503418, "learning_rate": 4.1402211153689635e-06, "loss": 0.5665657, "memory(GiB)": 34.88, "step": 85285, "train_speed(iter/s)": 0.41167 }, { "acc": 0.92937164, "epoch": 2.3093168710908945, "grad_norm": 9.010822296142578, "learning_rate": 4.139669903432627e-06, "loss": 0.31566877, "memory(GiB)": 34.88, "step": 85290, "train_speed(iter/s)": 0.411671 }, { "acc": 0.90965805, "epoch": 2.30945225137411, "grad_norm": 8.532560348510742, "learning_rate": 4.139118702279879e-06, "loss": 0.50571828, "memory(GiB)": 34.88, "step": 85295, "train_speed(iter/s)": 0.411673 }, { "acc": 0.91948195, "epoch": 2.3095876316573256, "grad_norm": 17.266332626342773, "learning_rate": 4.1385675119176264e-06, "loss": 0.45462627, "memory(GiB)": 34.88, "step": 85300, "train_speed(iter/s)": 0.411674 }, { "acc": 0.92248669, "epoch": 2.309723011940541, "grad_norm": 10.391103744506836, "learning_rate": 4.13801633235277e-06, "loss": 0.48040071, "memory(GiB)": 34.88, "step": 85305, "train_speed(iter/s)": 0.411675 }, { "acc": 0.93418217, "epoch": 2.3098583922237568, "grad_norm": 7.219261169433594, "learning_rate": 4.137465163592216e-06, "loss": 0.37177882, "memory(GiB)": 34.88, "step": 85310, "train_speed(iter/s)": 0.411676 }, { "acc": 0.94625931, "epoch": 2.309993772506972, "grad_norm": 6.0137553215026855, "learning_rate": 4.13691400564287e-06, "loss": 0.2641587, "memory(GiB)": 34.88, "step": 85315, "train_speed(iter/s)": 0.411678 }, { "acc": 0.92392893, "epoch": 2.3101291527901875, "grad_norm": 9.291922569274902, "learning_rate": 4.136362858511636e-06, "loss": 0.44839864, "memory(GiB)": 34.88, "step": 85320, "train_speed(iter/s)": 0.411679 }, { "acc": 0.92335949, "epoch": 2.3102645330734033, "grad_norm": 11.244711875915527, "learning_rate": 4.135811722205417e-06, "loss": 0.37080421, "memory(GiB)": 34.88, "step": 85325, "train_speed(iter/s)": 0.41168 }, { "acc": 0.92947502, "epoch": 2.3103999133566187, "grad_norm": 5.57470703125, "learning_rate": 4.135260596731118e-06, "loss": 0.38828225, "memory(GiB)": 34.88, "step": 85330, "train_speed(iter/s)": 0.411682 }, { "acc": 0.93391247, "epoch": 2.3105352936398345, "grad_norm": 9.298327445983887, "learning_rate": 4.134709482095643e-06, "loss": 0.36940987, "memory(GiB)": 34.88, "step": 85335, "train_speed(iter/s)": 0.411683 }, { "acc": 0.93102646, "epoch": 2.31067067392305, "grad_norm": 10.797952651977539, "learning_rate": 4.134158378305895e-06, "loss": 0.3925931, "memory(GiB)": 34.88, "step": 85340, "train_speed(iter/s)": 0.411684 }, { "acc": 0.91683903, "epoch": 2.3108060542062656, "grad_norm": 5.164192199707031, "learning_rate": 4.133607285368778e-06, "loss": 0.40887713, "memory(GiB)": 34.88, "step": 85345, "train_speed(iter/s)": 0.411685 }, { "acc": 0.93030434, "epoch": 2.310941434489481, "grad_norm": 5.3721723556518555, "learning_rate": 4.1330562032911954e-06, "loss": 0.39771788, "memory(GiB)": 34.88, "step": 85350, "train_speed(iter/s)": 0.411687 }, { "acc": 0.93775368, "epoch": 2.3110768147726963, "grad_norm": 8.264022827148438, "learning_rate": 4.1325051320800525e-06, "loss": 0.36218143, "memory(GiB)": 34.88, "step": 85355, "train_speed(iter/s)": 0.411688 }, { "acc": 0.93471422, "epoch": 2.311212195055912, "grad_norm": 16.62466812133789, "learning_rate": 4.131954071742249e-06, "loss": 0.37820954, "memory(GiB)": 34.88, "step": 85360, "train_speed(iter/s)": 0.411689 }, { "acc": 0.93075285, "epoch": 2.3113475753391275, "grad_norm": 5.080904006958008, "learning_rate": 4.131403022284692e-06, "loss": 0.32813754, "memory(GiB)": 34.88, "step": 85365, "train_speed(iter/s)": 0.41169 }, { "acc": 0.91737223, "epoch": 2.3114829556223433, "grad_norm": 8.7158784866333, "learning_rate": 4.1308519837142806e-06, "loss": 0.42809763, "memory(GiB)": 34.88, "step": 85370, "train_speed(iter/s)": 0.411692 }, { "acc": 0.92585478, "epoch": 2.3116183359055587, "grad_norm": 13.082219123840332, "learning_rate": 4.1303009560379206e-06, "loss": 0.40102348, "memory(GiB)": 34.88, "step": 85375, "train_speed(iter/s)": 0.411693 }, { "acc": 0.92614527, "epoch": 2.3117537161887745, "grad_norm": 14.190223693847656, "learning_rate": 4.129749939262513e-06, "loss": 0.35912139, "memory(GiB)": 34.88, "step": 85380, "train_speed(iter/s)": 0.411694 }, { "acc": 0.92669573, "epoch": 2.31188909647199, "grad_norm": 113.04576110839844, "learning_rate": 4.129198933394961e-06, "loss": 0.38836124, "memory(GiB)": 34.88, "step": 85385, "train_speed(iter/s)": 0.411696 }, { "acc": 0.91945515, "epoch": 2.312024476755205, "grad_norm": 8.397929191589355, "learning_rate": 4.128647938442167e-06, "loss": 0.5152885, "memory(GiB)": 34.88, "step": 85390, "train_speed(iter/s)": 0.411697 }, { "acc": 0.91989908, "epoch": 2.312159857038421, "grad_norm": 22.42082977294922, "learning_rate": 4.128096954411033e-06, "loss": 0.47036848, "memory(GiB)": 34.88, "step": 85395, "train_speed(iter/s)": 0.411698 }, { "acc": 0.93229275, "epoch": 2.3122952373216363, "grad_norm": 11.199590682983398, "learning_rate": 4.127545981308462e-06, "loss": 0.3503706, "memory(GiB)": 34.88, "step": 85400, "train_speed(iter/s)": 0.411699 }, { "acc": 0.93772926, "epoch": 2.312430617604852, "grad_norm": 7.732422828674316, "learning_rate": 4.126995019141355e-06, "loss": 0.32358773, "memory(GiB)": 34.88, "step": 85405, "train_speed(iter/s)": 0.411701 }, { "acc": 0.92218666, "epoch": 2.3125659978880675, "grad_norm": 8.833404541015625, "learning_rate": 4.126444067916613e-06, "loss": 0.40179358, "memory(GiB)": 34.88, "step": 85410, "train_speed(iter/s)": 0.411702 }, { "acc": 0.9157177, "epoch": 2.3127013781712833, "grad_norm": 8.499777793884277, "learning_rate": 4.125893127641142e-06, "loss": 0.50321178, "memory(GiB)": 34.88, "step": 85415, "train_speed(iter/s)": 0.411703 }, { "acc": 0.91618309, "epoch": 2.3128367584544987, "grad_norm": 12.367995262145996, "learning_rate": 4.125342198321839e-06, "loss": 0.54902439, "memory(GiB)": 34.88, "step": 85420, "train_speed(iter/s)": 0.411704 }, { "acc": 0.91782379, "epoch": 2.312972138737714, "grad_norm": 9.054960250854492, "learning_rate": 4.124791279965608e-06, "loss": 0.47345376, "memory(GiB)": 34.88, "step": 85425, "train_speed(iter/s)": 0.411706 }, { "acc": 0.94930305, "epoch": 2.31310751902093, "grad_norm": 5.884036540985107, "learning_rate": 4.124240372579348e-06, "loss": 0.27556143, "memory(GiB)": 34.88, "step": 85430, "train_speed(iter/s)": 0.411707 }, { "acc": 0.92529945, "epoch": 2.313242899304145, "grad_norm": 16.698450088500977, "learning_rate": 4.1236894761699635e-06, "loss": 0.42484412, "memory(GiB)": 34.88, "step": 85435, "train_speed(iter/s)": 0.411708 }, { "acc": 0.92036648, "epoch": 2.313378279587361, "grad_norm": 7.833452224731445, "learning_rate": 4.123138590744351e-06, "loss": 0.45893407, "memory(GiB)": 34.88, "step": 85440, "train_speed(iter/s)": 0.411709 }, { "acc": 0.91249332, "epoch": 2.3135136598705763, "grad_norm": 7.5728654861450195, "learning_rate": 4.122587716309417e-06, "loss": 0.46496878, "memory(GiB)": 34.88, "step": 85445, "train_speed(iter/s)": 0.411711 }, { "acc": 0.92682457, "epoch": 2.313649040153792, "grad_norm": 7.133080959320068, "learning_rate": 4.122036852872058e-06, "loss": 0.41587248, "memory(GiB)": 34.88, "step": 85450, "train_speed(iter/s)": 0.411712 }, { "acc": 0.90346403, "epoch": 2.3137844204370075, "grad_norm": 8.279434204101562, "learning_rate": 4.121486000439177e-06, "loss": 0.56987886, "memory(GiB)": 34.88, "step": 85455, "train_speed(iter/s)": 0.411713 }, { "acc": 0.92943707, "epoch": 2.3139198007202233, "grad_norm": 4.948093891143799, "learning_rate": 4.120935159017673e-06, "loss": 0.40114164, "memory(GiB)": 34.88, "step": 85460, "train_speed(iter/s)": 0.411715 }, { "acc": 0.93496513, "epoch": 2.3140551810034387, "grad_norm": 6.418630123138428, "learning_rate": 4.1203843286144465e-06, "loss": 0.39740882, "memory(GiB)": 34.88, "step": 85465, "train_speed(iter/s)": 0.411716 }, { "acc": 0.90561562, "epoch": 2.3141905612866545, "grad_norm": 14.707534790039062, "learning_rate": 4.119833509236399e-06, "loss": 0.56233578, "memory(GiB)": 34.88, "step": 85470, "train_speed(iter/s)": 0.411717 }, { "acc": 0.91428413, "epoch": 2.31432594156987, "grad_norm": 10.545452117919922, "learning_rate": 4.119282700890429e-06, "loss": 0.50749474, "memory(GiB)": 34.88, "step": 85475, "train_speed(iter/s)": 0.411718 }, { "acc": 0.91479092, "epoch": 2.314461321853085, "grad_norm": 13.88390064239502, "learning_rate": 4.118731903583438e-06, "loss": 0.51961689, "memory(GiB)": 34.88, "step": 85480, "train_speed(iter/s)": 0.411719 }, { "acc": 0.92161779, "epoch": 2.314596702136301, "grad_norm": 9.150254249572754, "learning_rate": 4.1181811173223255e-06, "loss": 0.42064195, "memory(GiB)": 34.88, "step": 85485, "train_speed(iter/s)": 0.411721 }, { "acc": 0.92990532, "epoch": 2.3147320824195163, "grad_norm": 3.3023738861083984, "learning_rate": 4.117630342113988e-06, "loss": 0.42005224, "memory(GiB)": 34.88, "step": 85490, "train_speed(iter/s)": 0.411722 }, { "acc": 0.94212952, "epoch": 2.314867462702732, "grad_norm": 7.519350528717041, "learning_rate": 4.117079577965331e-06, "loss": 0.44084511, "memory(GiB)": 34.88, "step": 85495, "train_speed(iter/s)": 0.411723 }, { "acc": 0.90671043, "epoch": 2.3150028429859475, "grad_norm": 5.179728984832764, "learning_rate": 4.11652882488325e-06, "loss": 0.54776497, "memory(GiB)": 34.88, "step": 85500, "train_speed(iter/s)": 0.411724 }, { "acc": 0.92682266, "epoch": 2.3151382232691633, "grad_norm": 12.192005157470703, "learning_rate": 4.115978082874645e-06, "loss": 0.356303, "memory(GiB)": 34.88, "step": 85505, "train_speed(iter/s)": 0.411725 }, { "acc": 0.91391182, "epoch": 2.3152736035523787, "grad_norm": 27.041336059570312, "learning_rate": 4.115427351946415e-06, "loss": 0.45334001, "memory(GiB)": 34.88, "step": 85510, "train_speed(iter/s)": 0.411727 }, { "acc": 0.92848387, "epoch": 2.315408983835594, "grad_norm": 5.706024646759033, "learning_rate": 4.114876632105458e-06, "loss": 0.36761005, "memory(GiB)": 34.88, "step": 85515, "train_speed(iter/s)": 0.411728 }, { "acc": 0.93878994, "epoch": 2.31554436411881, "grad_norm": 8.501852989196777, "learning_rate": 4.114325923358674e-06, "loss": 0.33008614, "memory(GiB)": 34.88, "step": 85520, "train_speed(iter/s)": 0.411729 }, { "acc": 0.93691444, "epoch": 2.315679744402025, "grad_norm": 5.9189043045043945, "learning_rate": 4.113775225712962e-06, "loss": 0.37968547, "memory(GiB)": 34.88, "step": 85525, "train_speed(iter/s)": 0.411731 }, { "acc": 0.90643806, "epoch": 2.315815124685241, "grad_norm": 12.00385856628418, "learning_rate": 4.1132245391752215e-06, "loss": 0.5398222, "memory(GiB)": 34.88, "step": 85530, "train_speed(iter/s)": 0.411732 }, { "acc": 0.92284889, "epoch": 2.3159505049684563, "grad_norm": 10.905304908752441, "learning_rate": 4.1126738637523476e-06, "loss": 0.44701142, "memory(GiB)": 34.88, "step": 85535, "train_speed(iter/s)": 0.411733 }, { "acc": 0.92583447, "epoch": 2.316085885251672, "grad_norm": 16.537761688232422, "learning_rate": 4.112123199451244e-06, "loss": 0.42336206, "memory(GiB)": 34.88, "step": 85540, "train_speed(iter/s)": 0.411734 }, { "acc": 0.92788677, "epoch": 2.3162212655348875, "grad_norm": 6.533122539520264, "learning_rate": 4.111572546278802e-06, "loss": 0.39155598, "memory(GiB)": 34.88, "step": 85545, "train_speed(iter/s)": 0.411735 }, { "acc": 0.90922279, "epoch": 2.316356645818103, "grad_norm": 10.891585350036621, "learning_rate": 4.111021904241925e-06, "loss": 0.53208237, "memory(GiB)": 34.88, "step": 85550, "train_speed(iter/s)": 0.411737 }, { "acc": 0.9032609, "epoch": 2.3164920261013187, "grad_norm": 17.312793731689453, "learning_rate": 4.110471273347508e-06, "loss": 0.57615108, "memory(GiB)": 34.88, "step": 85555, "train_speed(iter/s)": 0.411738 }, { "acc": 0.9195384, "epoch": 2.316627406384534, "grad_norm": 8.207383155822754, "learning_rate": 4.109920653602449e-06, "loss": 0.47065563, "memory(GiB)": 34.88, "step": 85560, "train_speed(iter/s)": 0.411739 }, { "acc": 0.92374191, "epoch": 2.31676278666775, "grad_norm": 8.415708541870117, "learning_rate": 4.109370045013648e-06, "loss": 0.40595365, "memory(GiB)": 34.88, "step": 85565, "train_speed(iter/s)": 0.411741 }, { "acc": 0.93436365, "epoch": 2.316898166950965, "grad_norm": 3.546297073364258, "learning_rate": 4.1088194475879996e-06, "loss": 0.31338243, "memory(GiB)": 34.88, "step": 85570, "train_speed(iter/s)": 0.411742 }, { "acc": 0.93601017, "epoch": 2.317033547234181, "grad_norm": 5.853313446044922, "learning_rate": 4.1082688613324025e-06, "loss": 0.37042472, "memory(GiB)": 34.88, "step": 85575, "train_speed(iter/s)": 0.411743 }, { "acc": 0.92659693, "epoch": 2.3171689275173963, "grad_norm": 6.341254234313965, "learning_rate": 4.107718286253754e-06, "loss": 0.38161316, "memory(GiB)": 34.88, "step": 85580, "train_speed(iter/s)": 0.411744 }, { "acc": 0.92871037, "epoch": 2.3173043078006117, "grad_norm": 4.874318599700928, "learning_rate": 4.107167722358952e-06, "loss": 0.35107613, "memory(GiB)": 34.88, "step": 85585, "train_speed(iter/s)": 0.411745 }, { "acc": 0.92232685, "epoch": 2.3174396880838275, "grad_norm": 11.629829406738281, "learning_rate": 4.10661716965489e-06, "loss": 0.48216782, "memory(GiB)": 34.88, "step": 85590, "train_speed(iter/s)": 0.411747 }, { "acc": 0.92216511, "epoch": 2.317575068367043, "grad_norm": 9.439262390136719, "learning_rate": 4.106066628148467e-06, "loss": 0.44023805, "memory(GiB)": 34.88, "step": 85595, "train_speed(iter/s)": 0.411748 }, { "acc": 0.91286001, "epoch": 2.3177104486502587, "grad_norm": 9.689525604248047, "learning_rate": 4.105516097846581e-06, "loss": 0.49584894, "memory(GiB)": 34.88, "step": 85600, "train_speed(iter/s)": 0.411749 }, { "acc": 0.92106762, "epoch": 2.317845828933474, "grad_norm": 13.670647621154785, "learning_rate": 4.104965578756126e-06, "loss": 0.51123934, "memory(GiB)": 34.88, "step": 85605, "train_speed(iter/s)": 0.41175 }, { "acc": 0.91140995, "epoch": 2.31798120921669, "grad_norm": 12.190812110900879, "learning_rate": 4.104415070884e-06, "loss": 0.55011311, "memory(GiB)": 34.88, "step": 85610, "train_speed(iter/s)": 0.411752 }, { "acc": 0.93129015, "epoch": 2.318116589499905, "grad_norm": 9.047553062438965, "learning_rate": 4.103864574237097e-06, "loss": 0.40053692, "memory(GiB)": 34.88, "step": 85615, "train_speed(iter/s)": 0.411753 }, { "acc": 0.94079065, "epoch": 2.318251969783121, "grad_norm": 7.665124893188477, "learning_rate": 4.103314088822316e-06, "loss": 0.33909044, "memory(GiB)": 34.88, "step": 85620, "train_speed(iter/s)": 0.411754 }, { "acc": 0.93037214, "epoch": 2.3183873500663363, "grad_norm": 8.548373222351074, "learning_rate": 4.102763614646552e-06, "loss": 0.35293772, "memory(GiB)": 34.88, "step": 85625, "train_speed(iter/s)": 0.411755 }, { "acc": 0.91971769, "epoch": 2.3185227303495517, "grad_norm": 14.211687088012695, "learning_rate": 4.102213151716699e-06, "loss": 0.45311375, "memory(GiB)": 34.88, "step": 85630, "train_speed(iter/s)": 0.411757 }, { "acc": 0.91244373, "epoch": 2.3186581106327675, "grad_norm": 8.737370491027832, "learning_rate": 4.1016627000396545e-06, "loss": 0.45559034, "memory(GiB)": 34.88, "step": 85635, "train_speed(iter/s)": 0.411758 }, { "acc": 0.91280575, "epoch": 2.318793490915983, "grad_norm": 31.740503311157227, "learning_rate": 4.1011122596223135e-06, "loss": 0.57896419, "memory(GiB)": 34.88, "step": 85640, "train_speed(iter/s)": 0.411759 }, { "acc": 0.93117867, "epoch": 2.3189288711991987, "grad_norm": 9.772510528564453, "learning_rate": 4.100561830471571e-06, "loss": 0.3502059, "memory(GiB)": 34.88, "step": 85645, "train_speed(iter/s)": 0.41176 }, { "acc": 0.92878208, "epoch": 2.319064251482414, "grad_norm": 6.520536422729492, "learning_rate": 4.100011412594322e-06, "loss": 0.36357222, "memory(GiB)": 34.88, "step": 85650, "train_speed(iter/s)": 0.411762 }, { "acc": 0.92095585, "epoch": 2.31919963176563, "grad_norm": 10.097199440002441, "learning_rate": 4.099461005997463e-06, "loss": 0.43989162, "memory(GiB)": 34.88, "step": 85655, "train_speed(iter/s)": 0.411763 }, { "acc": 0.91360836, "epoch": 2.319335012048845, "grad_norm": 12.262011528015137, "learning_rate": 4.098910610687887e-06, "loss": 0.4583643, "memory(GiB)": 34.88, "step": 85660, "train_speed(iter/s)": 0.411764 }, { "acc": 0.93248701, "epoch": 2.319470392332061, "grad_norm": 9.021764755249023, "learning_rate": 4.098360226672492e-06, "loss": 0.3482492, "memory(GiB)": 34.88, "step": 85665, "train_speed(iter/s)": 0.411765 }, { "acc": 0.92534275, "epoch": 2.3196057726152763, "grad_norm": 9.33549690246582, "learning_rate": 4.097809853958168e-06, "loss": 0.45727725, "memory(GiB)": 34.88, "step": 85670, "train_speed(iter/s)": 0.411767 }, { "acc": 0.91923113, "epoch": 2.3197411528984917, "grad_norm": 13.80993938446045, "learning_rate": 4.097259492551812e-06, "loss": 0.45881557, "memory(GiB)": 34.88, "step": 85675, "train_speed(iter/s)": 0.411768 }, { "acc": 0.94363184, "epoch": 2.3198765331817075, "grad_norm": 3.2669901847839355, "learning_rate": 4.096709142460319e-06, "loss": 0.30260811, "memory(GiB)": 34.88, "step": 85680, "train_speed(iter/s)": 0.411769 }, { "acc": 0.9336916, "epoch": 2.320011913464923, "grad_norm": 11.734541893005371, "learning_rate": 4.096158803690581e-06, "loss": 0.40284424, "memory(GiB)": 34.88, "step": 85685, "train_speed(iter/s)": 0.411771 }, { "acc": 0.92687654, "epoch": 2.3201472937481387, "grad_norm": 10.141958236694336, "learning_rate": 4.095608476249495e-06, "loss": 0.43811784, "memory(GiB)": 34.88, "step": 85690, "train_speed(iter/s)": 0.411772 }, { "acc": 0.93453417, "epoch": 2.320282674031354, "grad_norm": 3.7074437141418457, "learning_rate": 4.095058160143953e-06, "loss": 0.34743981, "memory(GiB)": 34.88, "step": 85695, "train_speed(iter/s)": 0.411773 }, { "acc": 0.93163328, "epoch": 2.32041805431457, "grad_norm": 15.682829856872559, "learning_rate": 4.0945078553808495e-06, "loss": 0.3454936, "memory(GiB)": 34.88, "step": 85700, "train_speed(iter/s)": 0.411774 }, { "acc": 0.90522346, "epoch": 2.320553434597785, "grad_norm": 11.284927368164062, "learning_rate": 4.093957561967078e-06, "loss": 0.48991494, "memory(GiB)": 34.88, "step": 85705, "train_speed(iter/s)": 0.411775 }, { "acc": 0.9370554, "epoch": 2.3206888148810005, "grad_norm": 7.341073513031006, "learning_rate": 4.093407279909532e-06, "loss": 0.3462441, "memory(GiB)": 34.88, "step": 85710, "train_speed(iter/s)": 0.411777 }, { "acc": 0.93538761, "epoch": 2.3208241951642163, "grad_norm": 21.59330940246582, "learning_rate": 4.092857009215104e-06, "loss": 0.38688478, "memory(GiB)": 34.88, "step": 85715, "train_speed(iter/s)": 0.411778 }, { "acc": 0.92065239, "epoch": 2.3209595754474317, "grad_norm": 9.53286361694336, "learning_rate": 4.092306749890688e-06, "loss": 0.48910437, "memory(GiB)": 34.88, "step": 85720, "train_speed(iter/s)": 0.411779 }, { "acc": 0.92495766, "epoch": 2.3210949557306475, "grad_norm": 5.016517162322998, "learning_rate": 4.091756501943178e-06, "loss": 0.41474552, "memory(GiB)": 34.88, "step": 85725, "train_speed(iter/s)": 0.41178 }, { "acc": 0.93262463, "epoch": 2.321230336013863, "grad_norm": 6.884593486785889, "learning_rate": 4.091206265379464e-06, "loss": 0.35046663, "memory(GiB)": 34.88, "step": 85730, "train_speed(iter/s)": 0.411782 }, { "acc": 0.93604221, "epoch": 2.3213657162970787, "grad_norm": 10.958597183227539, "learning_rate": 4.0906560402064425e-06, "loss": 0.35511365, "memory(GiB)": 34.88, "step": 85735, "train_speed(iter/s)": 0.411783 }, { "acc": 0.93948441, "epoch": 2.321501096580294, "grad_norm": 9.653075218200684, "learning_rate": 4.090105826431003e-06, "loss": 0.32353058, "memory(GiB)": 34.88, "step": 85740, "train_speed(iter/s)": 0.411784 }, { "acc": 0.93639441, "epoch": 2.3216364768635094, "grad_norm": 5.189187049865723, "learning_rate": 4.089555624060042e-06, "loss": 0.2439579, "memory(GiB)": 34.88, "step": 85745, "train_speed(iter/s)": 0.411786 }, { "acc": 0.93497868, "epoch": 2.321771857146725, "grad_norm": 5.790107727050781, "learning_rate": 4.089005433100448e-06, "loss": 0.38236635, "memory(GiB)": 34.88, "step": 85750, "train_speed(iter/s)": 0.411787 }, { "acc": 0.93033485, "epoch": 2.3219072374299405, "grad_norm": 11.115533828735352, "learning_rate": 4.088455253559113e-06, "loss": 0.4458497, "memory(GiB)": 34.88, "step": 85755, "train_speed(iter/s)": 0.411788 }, { "acc": 0.91442986, "epoch": 2.3220426177131563, "grad_norm": 8.124197959899902, "learning_rate": 4.087905085442932e-06, "loss": 0.49525185, "memory(GiB)": 34.88, "step": 85760, "train_speed(iter/s)": 0.41179 }, { "acc": 0.92092209, "epoch": 2.3221779979963717, "grad_norm": 11.760643005371094, "learning_rate": 4.087354928758796e-06, "loss": 0.42120914, "memory(GiB)": 34.88, "step": 85765, "train_speed(iter/s)": 0.411791 }, { "acc": 0.91090622, "epoch": 2.3223133782795875, "grad_norm": 21.872407913208008, "learning_rate": 4.086804783513596e-06, "loss": 0.55825348, "memory(GiB)": 34.88, "step": 85770, "train_speed(iter/s)": 0.411792 }, { "acc": 0.91269159, "epoch": 2.322448758562803, "grad_norm": 6.596986293792725, "learning_rate": 4.086254649714223e-06, "loss": 0.57936497, "memory(GiB)": 34.88, "step": 85775, "train_speed(iter/s)": 0.411793 }, { "acc": 0.9284502, "epoch": 2.322584138846018, "grad_norm": 6.027282238006592, "learning_rate": 4.085704527367571e-06, "loss": 0.44368949, "memory(GiB)": 34.88, "step": 85780, "train_speed(iter/s)": 0.411794 }, { "acc": 0.9230135, "epoch": 2.322719519129234, "grad_norm": 11.87414836883545, "learning_rate": 4.085154416480529e-06, "loss": 0.43525867, "memory(GiB)": 34.88, "step": 85785, "train_speed(iter/s)": 0.411795 }, { "acc": 0.92226677, "epoch": 2.3228548994124494, "grad_norm": 6.920089244842529, "learning_rate": 4.0846043170599895e-06, "loss": 0.49921017, "memory(GiB)": 34.88, "step": 85790, "train_speed(iter/s)": 0.411797 }, { "acc": 0.93258181, "epoch": 2.322990279695665, "grad_norm": 10.589364051818848, "learning_rate": 4.084054229112843e-06, "loss": 0.38133965, "memory(GiB)": 34.88, "step": 85795, "train_speed(iter/s)": 0.411798 }, { "acc": 0.9266593, "epoch": 2.3231256599788805, "grad_norm": 8.109728813171387, "learning_rate": 4.08350415264598e-06, "loss": 0.41487465, "memory(GiB)": 34.88, "step": 85800, "train_speed(iter/s)": 0.411799 }, { "acc": 0.93225117, "epoch": 2.3232610402620963, "grad_norm": 18.788881301879883, "learning_rate": 4.082954087666294e-06, "loss": 0.38972764, "memory(GiB)": 34.88, "step": 85805, "train_speed(iter/s)": 0.4118 }, { "acc": 0.92710876, "epoch": 2.3233964205453117, "grad_norm": 7.129342555999756, "learning_rate": 4.0824040341806705e-06, "loss": 0.42783818, "memory(GiB)": 34.88, "step": 85810, "train_speed(iter/s)": 0.411801 }, { "acc": 0.92578354, "epoch": 2.3235318008285275, "grad_norm": 15.818949699401855, "learning_rate": 4.081853992196006e-06, "loss": 0.410604, "memory(GiB)": 34.88, "step": 85815, "train_speed(iter/s)": 0.411803 }, { "acc": 0.92835732, "epoch": 2.323667181111743, "grad_norm": 17.02079200744629, "learning_rate": 4.081303961719185e-06, "loss": 0.35798335, "memory(GiB)": 34.88, "step": 85820, "train_speed(iter/s)": 0.411804 }, { "acc": 0.93876095, "epoch": 2.3238025613949587, "grad_norm": 5.802842617034912, "learning_rate": 4.080753942757101e-06, "loss": 0.36777866, "memory(GiB)": 34.88, "step": 85825, "train_speed(iter/s)": 0.411805 }, { "acc": 0.92634735, "epoch": 2.323937941678174, "grad_norm": 3.7925331592559814, "learning_rate": 4.080203935316645e-06, "loss": 0.38495166, "memory(GiB)": 34.88, "step": 85830, "train_speed(iter/s)": 0.411806 }, { "acc": 0.92307587, "epoch": 2.3240733219613894, "grad_norm": 7.014369964599609, "learning_rate": 4.0796539394047055e-06, "loss": 0.36784189, "memory(GiB)": 34.88, "step": 85835, "train_speed(iter/s)": 0.411808 }, { "acc": 0.9308548, "epoch": 2.324208702244605, "grad_norm": 11.362273216247559, "learning_rate": 4.079103955028173e-06, "loss": 0.40151739, "memory(GiB)": 34.88, "step": 85840, "train_speed(iter/s)": 0.411809 }, { "acc": 0.92751274, "epoch": 2.3243440825278205, "grad_norm": 9.686564445495605, "learning_rate": 4.0785539821939345e-06, "loss": 0.42805781, "memory(GiB)": 34.88, "step": 85845, "train_speed(iter/s)": 0.41181 }, { "acc": 0.94230785, "epoch": 2.3244794628110363, "grad_norm": 14.661422729492188, "learning_rate": 4.078004020908884e-06, "loss": 0.33009853, "memory(GiB)": 34.88, "step": 85850, "train_speed(iter/s)": 0.411811 }, { "acc": 0.92710228, "epoch": 2.3246148430942517, "grad_norm": 7.9249043464660645, "learning_rate": 4.077454071179906e-06, "loss": 0.41053567, "memory(GiB)": 34.88, "step": 85855, "train_speed(iter/s)": 0.411813 }, { "acc": 0.93919392, "epoch": 2.3247502233774675, "grad_norm": 4.55299186706543, "learning_rate": 4.076904133013892e-06, "loss": 0.37102017, "memory(GiB)": 34.88, "step": 85860, "train_speed(iter/s)": 0.411814 }, { "acc": 0.9261816, "epoch": 2.324885603660683, "grad_norm": 5.4056291580200195, "learning_rate": 4.076354206417733e-06, "loss": 0.43199863, "memory(GiB)": 34.88, "step": 85865, "train_speed(iter/s)": 0.411815 }, { "acc": 0.9296011, "epoch": 2.3250209839438982, "grad_norm": 9.376423835754395, "learning_rate": 4.075804291398314e-06, "loss": 0.33148017, "memory(GiB)": 34.88, "step": 85870, "train_speed(iter/s)": 0.411816 }, { "acc": 0.91901369, "epoch": 2.325156364227114, "grad_norm": 9.739480018615723, "learning_rate": 4.075254387962528e-06, "loss": 0.43853202, "memory(GiB)": 34.88, "step": 85875, "train_speed(iter/s)": 0.411818 }, { "acc": 0.93666162, "epoch": 2.3252917445103294, "grad_norm": 5.850388526916504, "learning_rate": 4.07470449611726e-06, "loss": 0.38735313, "memory(GiB)": 34.88, "step": 85880, "train_speed(iter/s)": 0.411819 }, { "acc": 0.92104359, "epoch": 2.325427124793545, "grad_norm": 9.396214485168457, "learning_rate": 4.074154615869401e-06, "loss": 0.42265525, "memory(GiB)": 34.88, "step": 85885, "train_speed(iter/s)": 0.41182 }, { "acc": 0.92635231, "epoch": 2.3255625050767605, "grad_norm": 8.013975143432617, "learning_rate": 4.0736047472258366e-06, "loss": 0.44074879, "memory(GiB)": 34.88, "step": 85890, "train_speed(iter/s)": 0.411821 }, { "acc": 0.92892361, "epoch": 2.3256978853599763, "grad_norm": 6.539740562438965, "learning_rate": 4.073054890193456e-06, "loss": 0.42078815, "memory(GiB)": 34.88, "step": 85895, "train_speed(iter/s)": 0.411822 }, { "acc": 0.92896109, "epoch": 2.3258332656431917, "grad_norm": 11.533677101135254, "learning_rate": 4.07250504477915e-06, "loss": 0.40555353, "memory(GiB)": 34.88, "step": 85900, "train_speed(iter/s)": 0.411824 }, { "acc": 0.92446861, "epoch": 2.325968645926407, "grad_norm": 7.221227645874023, "learning_rate": 4.071955210989802e-06, "loss": 0.41513863, "memory(GiB)": 34.88, "step": 85905, "train_speed(iter/s)": 0.411825 }, { "acc": 0.94018841, "epoch": 2.326104026209623, "grad_norm": 6.208047389984131, "learning_rate": 4.071405388832305e-06, "loss": 0.26135108, "memory(GiB)": 34.88, "step": 85910, "train_speed(iter/s)": 0.411826 }, { "acc": 0.94693432, "epoch": 2.3262394064928382, "grad_norm": 4.285454273223877, "learning_rate": 4.070855578313541e-06, "loss": 0.32006226, "memory(GiB)": 34.88, "step": 85915, "train_speed(iter/s)": 0.411828 }, { "acc": 0.92547379, "epoch": 2.326374786776054, "grad_norm": 7.544677734375, "learning_rate": 4.0703057794404035e-06, "loss": 0.39911342, "memory(GiB)": 34.88, "step": 85920, "train_speed(iter/s)": 0.411829 }, { "acc": 0.93216228, "epoch": 2.3265101670592694, "grad_norm": 7.4898505210876465, "learning_rate": 4.069755992219773e-06, "loss": 0.3182514, "memory(GiB)": 34.88, "step": 85925, "train_speed(iter/s)": 0.41183 }, { "acc": 0.92733841, "epoch": 2.326645547342485, "grad_norm": 6.884003639221191, "learning_rate": 4.0692062166585415e-06, "loss": 0.37358937, "memory(GiB)": 34.88, "step": 85930, "train_speed(iter/s)": 0.411831 }, { "acc": 0.93560591, "epoch": 2.3267809276257005, "grad_norm": 10.05125904083252, "learning_rate": 4.068656452763594e-06, "loss": 0.35716958, "memory(GiB)": 34.88, "step": 85935, "train_speed(iter/s)": 0.411832 }, { "acc": 0.93740177, "epoch": 2.326916307908916, "grad_norm": 5.32228946685791, "learning_rate": 4.0681067005418176e-06, "loss": 0.30887468, "memory(GiB)": 34.88, "step": 85940, "train_speed(iter/s)": 0.411834 }, { "acc": 0.94212646, "epoch": 2.3270516881921317, "grad_norm": 7.016190528869629, "learning_rate": 4.067556960000101e-06, "loss": 0.29606295, "memory(GiB)": 34.88, "step": 85945, "train_speed(iter/s)": 0.411835 }, { "acc": 0.92564602, "epoch": 2.327187068475347, "grad_norm": 4.324991703033447, "learning_rate": 4.067007231145329e-06, "loss": 0.34780285, "memory(GiB)": 34.88, "step": 85950, "train_speed(iter/s)": 0.411836 }, { "acc": 0.91870918, "epoch": 2.327322448758563, "grad_norm": 8.858871459960938, "learning_rate": 4.066457513984387e-06, "loss": 0.43904662, "memory(GiB)": 34.88, "step": 85955, "train_speed(iter/s)": 0.411837 }, { "acc": 0.9136466, "epoch": 2.3274578290417782, "grad_norm": 27.549245834350586, "learning_rate": 4.065907808524164e-06, "loss": 0.47866783, "memory(GiB)": 34.88, "step": 85960, "train_speed(iter/s)": 0.411839 }, { "acc": 0.94161758, "epoch": 2.327593209324994, "grad_norm": 9.483611106872559, "learning_rate": 4.065358114771544e-06, "loss": 0.30863757, "memory(GiB)": 34.88, "step": 85965, "train_speed(iter/s)": 0.41184 }, { "acc": 0.92593384, "epoch": 2.3277285896082094, "grad_norm": 8.92988395690918, "learning_rate": 4.064808432733413e-06, "loss": 0.44337611, "memory(GiB)": 34.88, "step": 85970, "train_speed(iter/s)": 0.411841 }, { "acc": 0.932619, "epoch": 2.327863969891425, "grad_norm": 9.22409439086914, "learning_rate": 4.064258762416658e-06, "loss": 0.39856329, "memory(GiB)": 34.88, "step": 85975, "train_speed(iter/s)": 0.411842 }, { "acc": 0.93043451, "epoch": 2.3279993501746405, "grad_norm": 9.295344352722168, "learning_rate": 4.063709103828165e-06, "loss": 0.38335168, "memory(GiB)": 34.88, "step": 85980, "train_speed(iter/s)": 0.411844 }, { "acc": 0.9373044, "epoch": 2.3281347304578563, "grad_norm": 6.142341613769531, "learning_rate": 4.063159456974817e-06, "loss": 0.30401695, "memory(GiB)": 34.88, "step": 85985, "train_speed(iter/s)": 0.411845 }, { "acc": 0.92705278, "epoch": 2.3282701107410717, "grad_norm": 4.915158748626709, "learning_rate": 4.062609821863503e-06, "loss": 0.36905384, "memory(GiB)": 34.88, "step": 85990, "train_speed(iter/s)": 0.411846 }, { "acc": 0.93371153, "epoch": 2.328405491024287, "grad_norm": 10.600566864013672, "learning_rate": 4.062060198501106e-06, "loss": 0.41598568, "memory(GiB)": 34.88, "step": 85995, "train_speed(iter/s)": 0.411848 }, { "acc": 0.90930405, "epoch": 2.328540871307503, "grad_norm": 13.37328815460205, "learning_rate": 4.061510586894512e-06, "loss": 0.55298948, "memory(GiB)": 34.88, "step": 86000, "train_speed(iter/s)": 0.411849 }, { "acc": 0.95692787, "epoch": 2.3286762515907182, "grad_norm": 7.091309070587158, "learning_rate": 4.060960987050604e-06, "loss": 0.26889863, "memory(GiB)": 34.88, "step": 86005, "train_speed(iter/s)": 0.41185 }, { "acc": 0.92104206, "epoch": 2.328811631873934, "grad_norm": 11.566428184509277, "learning_rate": 4.060411398976269e-06, "loss": 0.48604975, "memory(GiB)": 34.88, "step": 86010, "train_speed(iter/s)": 0.411851 }, { "acc": 0.93447399, "epoch": 2.3289470121571494, "grad_norm": 11.852972984313965, "learning_rate": 4.059861822678392e-06, "loss": 0.30354855, "memory(GiB)": 34.88, "step": 86015, "train_speed(iter/s)": 0.411853 }, { "acc": 0.91244011, "epoch": 2.329082392440365, "grad_norm": 28.98282241821289, "learning_rate": 4.059312258163854e-06, "loss": 0.48615599, "memory(GiB)": 34.88, "step": 86020, "train_speed(iter/s)": 0.411854 }, { "acc": 0.92107468, "epoch": 2.3292177727235805, "grad_norm": 8.480048179626465, "learning_rate": 4.058762705439544e-06, "loss": 0.42539892, "memory(GiB)": 34.88, "step": 86025, "train_speed(iter/s)": 0.411855 }, { "acc": 0.92957592, "epoch": 2.329353153006796, "grad_norm": 9.5521879196167, "learning_rate": 4.0582131645123426e-06, "loss": 0.44313393, "memory(GiB)": 34.88, "step": 86030, "train_speed(iter/s)": 0.411856 }, { "acc": 0.91015406, "epoch": 2.3294885332900117, "grad_norm": 6.955806255340576, "learning_rate": 4.0576636353891376e-06, "loss": 0.45716453, "memory(GiB)": 34.88, "step": 86035, "train_speed(iter/s)": 0.411858 }, { "acc": 0.91949863, "epoch": 2.329623913573227, "grad_norm": 12.171628952026367, "learning_rate": 4.057114118076808e-06, "loss": 0.41574945, "memory(GiB)": 34.88, "step": 86040, "train_speed(iter/s)": 0.411859 }, { "acc": 0.90750923, "epoch": 2.329759293856443, "grad_norm": 8.762601852416992, "learning_rate": 4.056564612582243e-06, "loss": 0.54221764, "memory(GiB)": 34.88, "step": 86045, "train_speed(iter/s)": 0.41186 }, { "acc": 0.90433922, "epoch": 2.3298946741396582, "grad_norm": 9.552912712097168, "learning_rate": 4.056015118912323e-06, "loss": 0.40885286, "memory(GiB)": 34.88, "step": 86050, "train_speed(iter/s)": 0.411861 }, { "acc": 0.93782835, "epoch": 2.330030054422874, "grad_norm": 19.272287368774414, "learning_rate": 4.055465637073931e-06, "loss": 0.30570393, "memory(GiB)": 34.88, "step": 86055, "train_speed(iter/s)": 0.411862 }, { "acc": 0.92982101, "epoch": 2.3301654347060894, "grad_norm": 9.168180465698242, "learning_rate": 4.0549161670739526e-06, "loss": 0.41626234, "memory(GiB)": 34.88, "step": 86060, "train_speed(iter/s)": 0.411864 }, { "acc": 0.90934381, "epoch": 2.3303008149893047, "grad_norm": 5.587912559509277, "learning_rate": 4.0543667089192675e-06, "loss": 0.48106737, "memory(GiB)": 34.88, "step": 86065, "train_speed(iter/s)": 0.411865 }, { "acc": 0.92739697, "epoch": 2.3304361952725205, "grad_norm": 6.708451271057129, "learning_rate": 4.053817262616763e-06, "loss": 0.4113111, "memory(GiB)": 34.88, "step": 86070, "train_speed(iter/s)": 0.411866 }, { "acc": 0.92156096, "epoch": 2.330571575555736, "grad_norm": 9.074682235717773, "learning_rate": 4.0532678281733194e-06, "loss": 0.38707142, "memory(GiB)": 34.88, "step": 86075, "train_speed(iter/s)": 0.411867 }, { "acc": 0.92731857, "epoch": 2.3307069558389517, "grad_norm": 3.5261874198913574, "learning_rate": 4.052718405595821e-06, "loss": 0.38074749, "memory(GiB)": 34.88, "step": 86080, "train_speed(iter/s)": 0.411868 }, { "acc": 0.91718597, "epoch": 2.330842336122167, "grad_norm": 12.679524421691895, "learning_rate": 4.052168994891149e-06, "loss": 0.47694445, "memory(GiB)": 34.88, "step": 86085, "train_speed(iter/s)": 0.41187 }, { "acc": 0.93929167, "epoch": 2.330977716405383, "grad_norm": 5.716095447540283, "learning_rate": 4.051619596066185e-06, "loss": 0.32509551, "memory(GiB)": 34.88, "step": 86090, "train_speed(iter/s)": 0.411871 }, { "acc": 0.92721815, "epoch": 2.3311130966885982, "grad_norm": 7.83855676651001, "learning_rate": 4.051070209127815e-06, "loss": 0.36666384, "memory(GiB)": 34.88, "step": 86095, "train_speed(iter/s)": 0.411872 }, { "acc": 0.93186016, "epoch": 2.3312484769718136, "grad_norm": 20.80072593688965, "learning_rate": 4.0505208340829165e-06, "loss": 0.29986558, "memory(GiB)": 34.88, "step": 86100, "train_speed(iter/s)": 0.411873 }, { "acc": 0.93016415, "epoch": 2.3313838572550294, "grad_norm": 6.0869317054748535, "learning_rate": 4.0499714709383755e-06, "loss": 0.41917543, "memory(GiB)": 34.88, "step": 86105, "train_speed(iter/s)": 0.411875 }, { "acc": 0.93336105, "epoch": 2.3315192375382447, "grad_norm": 4.566110610961914, "learning_rate": 4.04942211970107e-06, "loss": 0.30268993, "memory(GiB)": 34.88, "step": 86110, "train_speed(iter/s)": 0.411876 }, { "acc": 0.91654644, "epoch": 2.3316546178214606, "grad_norm": 13.399148941040039, "learning_rate": 4.048872780377885e-06, "loss": 0.53682241, "memory(GiB)": 34.88, "step": 86115, "train_speed(iter/s)": 0.411877 }, { "acc": 0.9264307, "epoch": 2.331789998104676, "grad_norm": 12.30327320098877, "learning_rate": 4.048323452975701e-06, "loss": 0.382517, "memory(GiB)": 34.88, "step": 86120, "train_speed(iter/s)": 0.411878 }, { "acc": 0.91526356, "epoch": 2.3319253783878917, "grad_norm": 8.709846496582031, "learning_rate": 4.0477741375014e-06, "loss": 0.42403975, "memory(GiB)": 34.88, "step": 86125, "train_speed(iter/s)": 0.411879 }, { "acc": 0.93041096, "epoch": 2.332060758671107, "grad_norm": 6.923874855041504, "learning_rate": 4.0472248339618616e-06, "loss": 0.40106554, "memory(GiB)": 34.88, "step": 86130, "train_speed(iter/s)": 0.411881 }, { "acc": 0.92121334, "epoch": 2.332196138954323, "grad_norm": 12.326948165893555, "learning_rate": 4.046675542363968e-06, "loss": 0.44358253, "memory(GiB)": 34.88, "step": 86135, "train_speed(iter/s)": 0.411882 }, { "acc": 0.93982, "epoch": 2.3323315192375382, "grad_norm": 11.590673446655273, "learning_rate": 4.0461262627146e-06, "loss": 0.30053225, "memory(GiB)": 34.88, "step": 86140, "train_speed(iter/s)": 0.411883 }, { "acc": 0.91848431, "epoch": 2.332466899520754, "grad_norm": 15.06766128540039, "learning_rate": 4.045576995020638e-06, "loss": 0.46543536, "memory(GiB)": 34.88, "step": 86145, "train_speed(iter/s)": 0.411884 }, { "acc": 0.92652264, "epoch": 2.3326022798039694, "grad_norm": 12.202570915222168, "learning_rate": 4.045027739288963e-06, "loss": 0.3764255, "memory(GiB)": 34.88, "step": 86150, "train_speed(iter/s)": 0.411886 }, { "acc": 0.94029293, "epoch": 2.3327376600871848, "grad_norm": 5.383037567138672, "learning_rate": 4.044478495526456e-06, "loss": 0.27004218, "memory(GiB)": 34.88, "step": 86155, "train_speed(iter/s)": 0.411887 }, { "acc": 0.92025185, "epoch": 2.3328730403704006, "grad_norm": 3.92488694190979, "learning_rate": 4.043929263739996e-06, "loss": 0.43096943, "memory(GiB)": 34.88, "step": 86160, "train_speed(iter/s)": 0.411888 }, { "acc": 0.94579401, "epoch": 2.333008420653616, "grad_norm": 5.467409133911133, "learning_rate": 4.043380043936466e-06, "loss": 0.28626971, "memory(GiB)": 34.88, "step": 86165, "train_speed(iter/s)": 0.411889 }, { "acc": 0.91506042, "epoch": 2.3331438009368317, "grad_norm": 12.412510871887207, "learning_rate": 4.042830836122744e-06, "loss": 0.43466458, "memory(GiB)": 34.88, "step": 86170, "train_speed(iter/s)": 0.411891 }, { "acc": 0.91748991, "epoch": 2.333279181220047, "grad_norm": 12.848881721496582, "learning_rate": 4.042281640305709e-06, "loss": 0.44014583, "memory(GiB)": 34.88, "step": 86175, "train_speed(iter/s)": 0.411892 }, { "acc": 0.93492746, "epoch": 2.333414561503263, "grad_norm": 10.217290878295898, "learning_rate": 4.041732456492242e-06, "loss": 0.42193475, "memory(GiB)": 34.88, "step": 86180, "train_speed(iter/s)": 0.411893 }, { "acc": 0.92267084, "epoch": 2.3335499417864782, "grad_norm": 16.245033264160156, "learning_rate": 4.0411832846892225e-06, "loss": 0.41052103, "memory(GiB)": 34.88, "step": 86185, "train_speed(iter/s)": 0.411894 }, { "acc": 0.90905094, "epoch": 2.3336853220696936, "grad_norm": 12.44223690032959, "learning_rate": 4.04063412490353e-06, "loss": 0.56561222, "memory(GiB)": 34.88, "step": 86190, "train_speed(iter/s)": 0.411895 }, { "acc": 0.93154535, "epoch": 2.3338207023529094, "grad_norm": 13.106682777404785, "learning_rate": 4.040084977142044e-06, "loss": 0.36899245, "memory(GiB)": 34.88, "step": 86195, "train_speed(iter/s)": 0.411897 }, { "acc": 0.91571407, "epoch": 2.3339560826361248, "grad_norm": 10.701624870300293, "learning_rate": 4.039535841411643e-06, "loss": 0.5048418, "memory(GiB)": 34.88, "step": 86200, "train_speed(iter/s)": 0.411898 }, { "acc": 0.90608797, "epoch": 2.3340914629193406, "grad_norm": 13.496644973754883, "learning_rate": 4.038986717719205e-06, "loss": 0.51871386, "memory(GiB)": 34.88, "step": 86205, "train_speed(iter/s)": 0.411899 }, { "acc": 0.92417831, "epoch": 2.334226843202556, "grad_norm": 5.945897579193115, "learning_rate": 4.038437606071614e-06, "loss": 0.41738777, "memory(GiB)": 34.88, "step": 86210, "train_speed(iter/s)": 0.4119 }, { "acc": 0.92208843, "epoch": 2.3343622234857717, "grad_norm": 4.328736782073975, "learning_rate": 4.0378885064757415e-06, "loss": 0.38691192, "memory(GiB)": 34.88, "step": 86215, "train_speed(iter/s)": 0.411901 }, { "acc": 0.91390533, "epoch": 2.334497603768987, "grad_norm": 5.690059661865234, "learning_rate": 4.037339418938471e-06, "loss": 0.52379808, "memory(GiB)": 34.88, "step": 86220, "train_speed(iter/s)": 0.411902 }, { "acc": 0.91149235, "epoch": 2.3346329840522024, "grad_norm": 7.209787845611572, "learning_rate": 4.036790343466678e-06, "loss": 0.44871392, "memory(GiB)": 34.88, "step": 86225, "train_speed(iter/s)": 0.411904 }, { "acc": 0.92460537, "epoch": 2.3347683643354182, "grad_norm": 7.035459995269775, "learning_rate": 4.036241280067244e-06, "loss": 0.44414864, "memory(GiB)": 34.88, "step": 86230, "train_speed(iter/s)": 0.411905 }, { "acc": 0.92336483, "epoch": 2.3349037446186336, "grad_norm": 7.579524040222168, "learning_rate": 4.0356922287470445e-06, "loss": 0.49054451, "memory(GiB)": 34.88, "step": 86235, "train_speed(iter/s)": 0.411906 }, { "acc": 0.92072029, "epoch": 2.3350391249018494, "grad_norm": 9.746760368347168, "learning_rate": 4.035143189512958e-06, "loss": 0.41393528, "memory(GiB)": 34.88, "step": 86240, "train_speed(iter/s)": 0.411907 }, { "acc": 0.91688471, "epoch": 2.3351745051850648, "grad_norm": 4.016243934631348, "learning_rate": 4.034594162371862e-06, "loss": 0.5585804, "memory(GiB)": 34.88, "step": 86245, "train_speed(iter/s)": 0.411909 }, { "acc": 0.92424154, "epoch": 2.3353098854682806, "grad_norm": 8.869606971740723, "learning_rate": 4.0340451473306345e-06, "loss": 0.402144, "memory(GiB)": 34.88, "step": 86250, "train_speed(iter/s)": 0.41191 }, { "acc": 0.93578949, "epoch": 2.335445265751496, "grad_norm": 4.6954569816589355, "learning_rate": 4.033496144396156e-06, "loss": 0.32202382, "memory(GiB)": 34.88, "step": 86255, "train_speed(iter/s)": 0.411911 }, { "acc": 0.92314339, "epoch": 2.3355806460347113, "grad_norm": 4.307572364807129, "learning_rate": 4.032947153575297e-06, "loss": 0.3544281, "memory(GiB)": 34.88, "step": 86260, "train_speed(iter/s)": 0.411913 }, { "acc": 0.92350569, "epoch": 2.335716026317927, "grad_norm": 9.547191619873047, "learning_rate": 4.032398174874941e-06, "loss": 0.44704704, "memory(GiB)": 34.88, "step": 86265, "train_speed(iter/s)": 0.411914 }, { "acc": 0.92982368, "epoch": 2.3358514066011424, "grad_norm": 8.478973388671875, "learning_rate": 4.031849208301962e-06, "loss": 0.34131365, "memory(GiB)": 34.88, "step": 86270, "train_speed(iter/s)": 0.411915 }, { "acc": 0.91971817, "epoch": 2.3359867868843582, "grad_norm": 13.54876708984375, "learning_rate": 4.0313002538632365e-06, "loss": 0.50037632, "memory(GiB)": 34.88, "step": 86275, "train_speed(iter/s)": 0.411916 }, { "acc": 0.93232479, "epoch": 2.3361221671675736, "grad_norm": 4.507480621337891, "learning_rate": 4.030751311565644e-06, "loss": 0.33892226, "memory(GiB)": 34.88, "step": 86280, "train_speed(iter/s)": 0.411918 }, { "acc": 0.92841234, "epoch": 2.3362575474507894, "grad_norm": 8.895820617675781, "learning_rate": 4.030202381416058e-06, "loss": 0.46680489, "memory(GiB)": 34.88, "step": 86285, "train_speed(iter/s)": 0.411919 }, { "acc": 0.93839016, "epoch": 2.3363929277340048, "grad_norm": 6.09735107421875, "learning_rate": 4.029653463421358e-06, "loss": 0.36290708, "memory(GiB)": 34.88, "step": 86290, "train_speed(iter/s)": 0.41192 }, { "acc": 0.92397671, "epoch": 2.3365283080172206, "grad_norm": 4.920802593231201, "learning_rate": 4.029104557588418e-06, "loss": 0.43990273, "memory(GiB)": 34.88, "step": 86295, "train_speed(iter/s)": 0.411921 }, { "acc": 0.92725019, "epoch": 2.336663688300436, "grad_norm": 10.930792808532715, "learning_rate": 4.028555663924114e-06, "loss": 0.42635908, "memory(GiB)": 34.88, "step": 86300, "train_speed(iter/s)": 0.411922 }, { "acc": 0.92654552, "epoch": 2.3367990685836517, "grad_norm": 7.60607385635376, "learning_rate": 4.028006782435323e-06, "loss": 0.41094832, "memory(GiB)": 34.88, "step": 86305, "train_speed(iter/s)": 0.411923 }, { "acc": 0.92270279, "epoch": 2.336934448866867, "grad_norm": 8.725643157958984, "learning_rate": 4.02745791312892e-06, "loss": 0.4781621, "memory(GiB)": 34.88, "step": 86310, "train_speed(iter/s)": 0.411925 }, { "acc": 0.93448801, "epoch": 2.3370698291500824, "grad_norm": 4.056188583374023, "learning_rate": 4.026909056011783e-06, "loss": 0.32652466, "memory(GiB)": 34.88, "step": 86315, "train_speed(iter/s)": 0.411926 }, { "acc": 0.90883713, "epoch": 2.3372052094332982, "grad_norm": 9.986228942871094, "learning_rate": 4.026360211090783e-06, "loss": 0.54232006, "memory(GiB)": 34.88, "step": 86320, "train_speed(iter/s)": 0.411927 }, { "acc": 0.93820801, "epoch": 2.3373405897165136, "grad_norm": 6.0596232414245605, "learning_rate": 4.0258113783728e-06, "loss": 0.33246992, "memory(GiB)": 34.88, "step": 86325, "train_speed(iter/s)": 0.411928 }, { "acc": 0.93239889, "epoch": 2.3374759699997294, "grad_norm": 7.457967758178711, "learning_rate": 4.025262557864706e-06, "loss": 0.40475368, "memory(GiB)": 34.88, "step": 86330, "train_speed(iter/s)": 0.41193 }, { "acc": 0.91804104, "epoch": 2.3376113502829448, "grad_norm": 2.9418458938598633, "learning_rate": 4.02471374957338e-06, "loss": 0.41723723, "memory(GiB)": 34.88, "step": 86335, "train_speed(iter/s)": 0.411931 }, { "acc": 0.91541939, "epoch": 2.3377467305661606, "grad_norm": 7.0378875732421875, "learning_rate": 4.024164953505691e-06, "loss": 0.45090342, "memory(GiB)": 34.88, "step": 86340, "train_speed(iter/s)": 0.411932 }, { "acc": 0.92564487, "epoch": 2.337882110849376, "grad_norm": 9.240801811218262, "learning_rate": 4.023616169668518e-06, "loss": 0.38936024, "memory(GiB)": 34.88, "step": 86345, "train_speed(iter/s)": 0.411933 }, { "acc": 0.92017536, "epoch": 2.3380174911325913, "grad_norm": 10.980874061584473, "learning_rate": 4.023067398068734e-06, "loss": 0.44613886, "memory(GiB)": 34.88, "step": 86350, "train_speed(iter/s)": 0.411934 }, { "acc": 0.94490795, "epoch": 2.338152871415807, "grad_norm": 7.214897155761719, "learning_rate": 4.0225186387132145e-06, "loss": 0.30542064, "memory(GiB)": 34.88, "step": 86355, "train_speed(iter/s)": 0.411936 }, { "acc": 0.92292032, "epoch": 2.3382882516990224, "grad_norm": 10.639105796813965, "learning_rate": 4.021969891608834e-06, "loss": 0.47502856, "memory(GiB)": 34.88, "step": 86360, "train_speed(iter/s)": 0.411937 }, { "acc": 0.91948776, "epoch": 2.3384236319822382, "grad_norm": 8.045641899108887, "learning_rate": 4.021421156762465e-06, "loss": 0.44087386, "memory(GiB)": 34.88, "step": 86365, "train_speed(iter/s)": 0.411938 }, { "acc": 0.91961756, "epoch": 2.3385590122654536, "grad_norm": 7.616573333740234, "learning_rate": 4.0208724341809835e-06, "loss": 0.47288904, "memory(GiB)": 34.88, "step": 86370, "train_speed(iter/s)": 0.411939 }, { "acc": 0.92758589, "epoch": 2.3386943925486694, "grad_norm": 8.477099418640137, "learning_rate": 4.020323723871261e-06, "loss": 0.45856843, "memory(GiB)": 34.88, "step": 86375, "train_speed(iter/s)": 0.411941 }, { "acc": 0.93744183, "epoch": 2.3388297728318848, "grad_norm": 12.734382629394531, "learning_rate": 4.019775025840173e-06, "loss": 0.42884936, "memory(GiB)": 34.88, "step": 86380, "train_speed(iter/s)": 0.411942 }, { "acc": 0.94277353, "epoch": 2.3389651531151, "grad_norm": 3.0524134635925293, "learning_rate": 4.019226340094593e-06, "loss": 0.2983139, "memory(GiB)": 34.88, "step": 86385, "train_speed(iter/s)": 0.411943 }, { "acc": 0.93198452, "epoch": 2.339100533398316, "grad_norm": 23.111560821533203, "learning_rate": 4.018677666641391e-06, "loss": 0.41087437, "memory(GiB)": 34.88, "step": 86390, "train_speed(iter/s)": 0.411944 }, { "acc": 0.92103376, "epoch": 2.3392359136815313, "grad_norm": 8.931356430053711, "learning_rate": 4.018129005487446e-06, "loss": 0.44259009, "memory(GiB)": 34.88, "step": 86395, "train_speed(iter/s)": 0.411945 }, { "acc": 0.94414749, "epoch": 2.339371293964747, "grad_norm": 4.100259780883789, "learning_rate": 4.017580356639627e-06, "loss": 0.36707678, "memory(GiB)": 34.88, "step": 86400, "train_speed(iter/s)": 0.411947 }, { "acc": 0.93533783, "epoch": 2.3395066742479624, "grad_norm": 4.708144187927246, "learning_rate": 4.017031720104809e-06, "loss": 0.39816771, "memory(GiB)": 34.88, "step": 86405, "train_speed(iter/s)": 0.411947 }, { "acc": 0.90246983, "epoch": 2.3396420545311782, "grad_norm": 8.75815486907959, "learning_rate": 4.016483095889862e-06, "loss": 0.53336067, "memory(GiB)": 34.88, "step": 86410, "train_speed(iter/s)": 0.411949 }, { "acc": 0.91226597, "epoch": 2.3397774348143936, "grad_norm": 9.710713386535645, "learning_rate": 4.015934484001661e-06, "loss": 0.48861132, "memory(GiB)": 34.88, "step": 86415, "train_speed(iter/s)": 0.41195 }, { "acc": 0.94130154, "epoch": 2.339912815097609, "grad_norm": 10.402941703796387, "learning_rate": 4.015385884447079e-06, "loss": 0.2945493, "memory(GiB)": 34.88, "step": 86420, "train_speed(iter/s)": 0.411951 }, { "acc": 0.94078226, "epoch": 2.3400481953808248, "grad_norm": 13.553646087646484, "learning_rate": 4.014837297232985e-06, "loss": 0.3710187, "memory(GiB)": 34.88, "step": 86425, "train_speed(iter/s)": 0.411952 }, { "acc": 0.93393307, "epoch": 2.34018357566404, "grad_norm": 16.4633731842041, "learning_rate": 4.0142887223662556e-06, "loss": 0.38502116, "memory(GiB)": 34.88, "step": 86430, "train_speed(iter/s)": 0.411953 }, { "acc": 0.92731934, "epoch": 2.340318955947256, "grad_norm": 4.585398197174072, "learning_rate": 4.0137401598537574e-06, "loss": 0.44247375, "memory(GiB)": 34.88, "step": 86435, "train_speed(iter/s)": 0.411954 }, { "acc": 0.92200623, "epoch": 2.3404543362304713, "grad_norm": 14.41181468963623, "learning_rate": 4.013191609702367e-06, "loss": 0.44113865, "memory(GiB)": 34.88, "step": 86440, "train_speed(iter/s)": 0.411956 }, { "acc": 0.92631302, "epoch": 2.340589716513687, "grad_norm": 8.811348915100098, "learning_rate": 4.012643071918953e-06, "loss": 0.42018499, "memory(GiB)": 34.88, "step": 86445, "train_speed(iter/s)": 0.411957 }, { "acc": 0.93398628, "epoch": 2.3407250967969024, "grad_norm": 10.133666038513184, "learning_rate": 4.01209454651039e-06, "loss": 0.36700869, "memory(GiB)": 34.88, "step": 86450, "train_speed(iter/s)": 0.411958 }, { "acc": 0.92987576, "epoch": 2.3408604770801182, "grad_norm": 6.613771915435791, "learning_rate": 4.011546033483546e-06, "loss": 0.34466593, "memory(GiB)": 34.88, "step": 86455, "train_speed(iter/s)": 0.411959 }, { "acc": 0.91228561, "epoch": 2.3409958573633336, "grad_norm": 24.802324295043945, "learning_rate": 4.010997532845294e-06, "loss": 0.43726978, "memory(GiB)": 34.88, "step": 86460, "train_speed(iter/s)": 0.411961 }, { "acc": 0.92856541, "epoch": 2.3411312376465494, "grad_norm": 6.200396537780762, "learning_rate": 4.010449044602507e-06, "loss": 0.35412793, "memory(GiB)": 34.88, "step": 86465, "train_speed(iter/s)": 0.411962 }, { "acc": 0.929984, "epoch": 2.3412666179297648, "grad_norm": 8.245441436767578, "learning_rate": 4.00990056876205e-06, "loss": 0.40426764, "memory(GiB)": 34.88, "step": 86470, "train_speed(iter/s)": 0.411963 }, { "acc": 0.93230782, "epoch": 2.34140199821298, "grad_norm": 8.932828903198242, "learning_rate": 4.0093521053308006e-06, "loss": 0.36278872, "memory(GiB)": 34.88, "step": 86475, "train_speed(iter/s)": 0.411964 }, { "acc": 0.92619143, "epoch": 2.341537378496196, "grad_norm": 6.029951095581055, "learning_rate": 4.0088036543156235e-06, "loss": 0.37594075, "memory(GiB)": 34.88, "step": 86480, "train_speed(iter/s)": 0.411965 }, { "acc": 0.91562881, "epoch": 2.3416727587794113, "grad_norm": 6.070553302764893, "learning_rate": 4.008255215723394e-06, "loss": 0.43455811, "memory(GiB)": 34.88, "step": 86485, "train_speed(iter/s)": 0.411967 }, { "acc": 0.94184322, "epoch": 2.341808139062627, "grad_norm": 8.451655387878418, "learning_rate": 4.007706789560979e-06, "loss": 0.317941, "memory(GiB)": 34.88, "step": 86490, "train_speed(iter/s)": 0.411968 }, { "acc": 0.91141243, "epoch": 2.3419435193458424, "grad_norm": 4.433671474456787, "learning_rate": 4.00715837583525e-06, "loss": 0.46414723, "memory(GiB)": 34.88, "step": 86495, "train_speed(iter/s)": 0.411969 }, { "acc": 0.92325115, "epoch": 2.3420788996290582, "grad_norm": 11.350536346435547, "learning_rate": 4.006609974553076e-06, "loss": 0.39856186, "memory(GiB)": 34.88, "step": 86500, "train_speed(iter/s)": 0.41197 }, { "acc": 0.90246925, "epoch": 2.3422142799122736, "grad_norm": 6.357539176940918, "learning_rate": 4.0060615857213285e-06, "loss": 0.52418962, "memory(GiB)": 34.88, "step": 86505, "train_speed(iter/s)": 0.411972 }, { "acc": 0.9233778, "epoch": 2.342349660195489, "grad_norm": 8.367894172668457, "learning_rate": 4.005513209346876e-06, "loss": 0.39878039, "memory(GiB)": 34.88, "step": 86510, "train_speed(iter/s)": 0.411973 }, { "acc": 0.93696842, "epoch": 2.3424850404787048, "grad_norm": 10.25460147857666, "learning_rate": 4.004964845436589e-06, "loss": 0.37192307, "memory(GiB)": 34.88, "step": 86515, "train_speed(iter/s)": 0.411974 }, { "acc": 0.91344795, "epoch": 2.34262042076192, "grad_norm": 5.703423500061035, "learning_rate": 4.004416493997335e-06, "loss": 0.42514653, "memory(GiB)": 34.88, "step": 86520, "train_speed(iter/s)": 0.411975 }, { "acc": 0.91903572, "epoch": 2.342755801045136, "grad_norm": 6.686334609985352, "learning_rate": 4.003868155035983e-06, "loss": 0.40667481, "memory(GiB)": 34.88, "step": 86525, "train_speed(iter/s)": 0.411976 }, { "acc": 0.9219451, "epoch": 2.3428911813283513, "grad_norm": 10.281234741210938, "learning_rate": 4.003319828559406e-06, "loss": 0.47237234, "memory(GiB)": 34.88, "step": 86530, "train_speed(iter/s)": 0.411977 }, { "acc": 0.92754726, "epoch": 2.343026561611567, "grad_norm": 9.479324340820312, "learning_rate": 4.002771514574468e-06, "loss": 0.38641238, "memory(GiB)": 34.88, "step": 86535, "train_speed(iter/s)": 0.411978 }, { "acc": 0.91886292, "epoch": 2.3431619418947824, "grad_norm": 14.333858489990234, "learning_rate": 4.00222321308804e-06, "loss": 0.51264963, "memory(GiB)": 34.88, "step": 86540, "train_speed(iter/s)": 0.411979 }, { "acc": 0.91889763, "epoch": 2.343297322177998, "grad_norm": 7.207271575927734, "learning_rate": 4.001674924106992e-06, "loss": 0.43207941, "memory(GiB)": 34.88, "step": 86545, "train_speed(iter/s)": 0.411981 }, { "acc": 0.91669493, "epoch": 2.3434327024612136, "grad_norm": 6.586143493652344, "learning_rate": 4.00112664763819e-06, "loss": 0.45038967, "memory(GiB)": 34.88, "step": 86550, "train_speed(iter/s)": 0.411982 }, { "acc": 0.92651529, "epoch": 2.343568082744429, "grad_norm": 12.852444648742676, "learning_rate": 4.0005783836885034e-06, "loss": 0.44165826, "memory(GiB)": 34.88, "step": 86555, "train_speed(iter/s)": 0.411983 }, { "acc": 0.93123569, "epoch": 2.3437034630276448, "grad_norm": 4.583801746368408, "learning_rate": 4.000030132264798e-06, "loss": 0.39239309, "memory(GiB)": 34.88, "step": 86560, "train_speed(iter/s)": 0.411984 }, { "acc": 0.91853704, "epoch": 2.34383884331086, "grad_norm": 9.410087585449219, "learning_rate": 3.999481893373945e-06, "loss": 0.41575699, "memory(GiB)": 34.88, "step": 86565, "train_speed(iter/s)": 0.411986 }, { "acc": 0.93040895, "epoch": 2.343974223594076, "grad_norm": 7.392806053161621, "learning_rate": 3.998933667022811e-06, "loss": 0.3870189, "memory(GiB)": 34.88, "step": 86570, "train_speed(iter/s)": 0.411986 }, { "acc": 0.92290611, "epoch": 2.3441096038772913, "grad_norm": 5.136490345001221, "learning_rate": 3.998385453218262e-06, "loss": 0.44722672, "memory(GiB)": 34.88, "step": 86575, "train_speed(iter/s)": 0.411987 }, { "acc": 0.92922211, "epoch": 2.3442449841605066, "grad_norm": 6.839222431182861, "learning_rate": 3.997837251967168e-06, "loss": 0.36267786, "memory(GiB)": 34.88, "step": 86580, "train_speed(iter/s)": 0.411989 }, { "acc": 0.9249402, "epoch": 2.3443803644437224, "grad_norm": 13.063308715820312, "learning_rate": 3.997289063276395e-06, "loss": 0.40200477, "memory(GiB)": 34.88, "step": 86585, "train_speed(iter/s)": 0.41199 }, { "acc": 0.92940063, "epoch": 2.344515744726938, "grad_norm": 12.960539817810059, "learning_rate": 3.9967408871528115e-06, "loss": 0.45093174, "memory(GiB)": 34.88, "step": 86590, "train_speed(iter/s)": 0.411991 }, { "acc": 0.94139824, "epoch": 2.3446511250101536, "grad_norm": 7.178346157073975, "learning_rate": 3.996192723603282e-06, "loss": 0.31935728, "memory(GiB)": 34.88, "step": 86595, "train_speed(iter/s)": 0.411993 }, { "acc": 0.93527489, "epoch": 2.344786505293369, "grad_norm": 7.1408185958862305, "learning_rate": 3.995644572634675e-06, "loss": 0.33875742, "memory(GiB)": 34.88, "step": 86600, "train_speed(iter/s)": 0.411994 }, { "acc": 0.92875643, "epoch": 2.3449218855765848, "grad_norm": 10.616904258728027, "learning_rate": 3.995096434253855e-06, "loss": 0.3826488, "memory(GiB)": 34.88, "step": 86605, "train_speed(iter/s)": 0.411994 }, { "acc": 0.92064152, "epoch": 2.3450572658598, "grad_norm": 8.970046043395996, "learning_rate": 3.994548308467692e-06, "loss": 0.42682514, "memory(GiB)": 34.88, "step": 86610, "train_speed(iter/s)": 0.411996 }, { "acc": 0.9314786, "epoch": 2.345192646143016, "grad_norm": 8.729475021362305, "learning_rate": 3.994000195283051e-06, "loss": 0.34604783, "memory(GiB)": 34.88, "step": 86615, "train_speed(iter/s)": 0.411997 }, { "acc": 0.90509825, "epoch": 2.3453280264262313, "grad_norm": 7.7723517417907715, "learning_rate": 3.9934520947067965e-06, "loss": 0.47124481, "memory(GiB)": 34.88, "step": 86620, "train_speed(iter/s)": 0.411997 }, { "acc": 0.90129223, "epoch": 2.345463406709447, "grad_norm": 7.019114017486572, "learning_rate": 3.992904006745796e-06, "loss": 0.5815681, "memory(GiB)": 34.88, "step": 86625, "train_speed(iter/s)": 0.411999 }, { "acc": 0.91815701, "epoch": 2.3455987869926624, "grad_norm": 10.95539665222168, "learning_rate": 3.992355931406917e-06, "loss": 0.40876455, "memory(GiB)": 34.88, "step": 86630, "train_speed(iter/s)": 0.412 }, { "acc": 0.92741938, "epoch": 2.345734167275878, "grad_norm": 11.75303840637207, "learning_rate": 3.991807868697023e-06, "loss": 0.45192566, "memory(GiB)": 34.88, "step": 86635, "train_speed(iter/s)": 0.412001 }, { "acc": 0.92071342, "epoch": 2.3458695475590936, "grad_norm": 9.80403995513916, "learning_rate": 3.991259818622978e-06, "loss": 0.41448979, "memory(GiB)": 34.88, "step": 86640, "train_speed(iter/s)": 0.412002 }, { "acc": 0.91734896, "epoch": 2.346004927842309, "grad_norm": 32.07851791381836, "learning_rate": 3.9907117811916506e-06, "loss": 0.4807755, "memory(GiB)": 34.88, "step": 86645, "train_speed(iter/s)": 0.412004 }, { "acc": 0.94472237, "epoch": 2.3461403081255248, "grad_norm": 9.626757621765137, "learning_rate": 3.990163756409905e-06, "loss": 0.34356449, "memory(GiB)": 34.88, "step": 86650, "train_speed(iter/s)": 0.412005 }, { "acc": 0.9380497, "epoch": 2.34627568840874, "grad_norm": 20.39128303527832, "learning_rate": 3.989615744284606e-06, "loss": 0.41694002, "memory(GiB)": 34.88, "step": 86655, "train_speed(iter/s)": 0.412006 }, { "acc": 0.93571978, "epoch": 2.346411068691956, "grad_norm": 6.128800392150879, "learning_rate": 3.989067744822619e-06, "loss": 0.37291217, "memory(GiB)": 34.88, "step": 86660, "train_speed(iter/s)": 0.412007 }, { "acc": 0.93774281, "epoch": 2.3465464489751713, "grad_norm": 3.6966843605041504, "learning_rate": 3.988519758030807e-06, "loss": 0.33086882, "memory(GiB)": 34.88, "step": 86665, "train_speed(iter/s)": 0.412008 }, { "acc": 0.91755142, "epoch": 2.3466818292583866, "grad_norm": 5.782377243041992, "learning_rate": 3.9879717839160395e-06, "loss": 0.49521809, "memory(GiB)": 34.88, "step": 86670, "train_speed(iter/s)": 0.412009 }, { "acc": 0.92562351, "epoch": 2.3468172095416024, "grad_norm": 7.338794708251953, "learning_rate": 3.987423822485175e-06, "loss": 0.42639589, "memory(GiB)": 34.88, "step": 86675, "train_speed(iter/s)": 0.41201 }, { "acc": 0.92016525, "epoch": 2.346952589824818, "grad_norm": 6.358279705047607, "learning_rate": 3.986875873745081e-06, "loss": 0.45757513, "memory(GiB)": 34.88, "step": 86680, "train_speed(iter/s)": 0.412012 }, { "acc": 0.93874464, "epoch": 2.3470879701080336, "grad_norm": 3.499969959259033, "learning_rate": 3.98632793770262e-06, "loss": 0.3424221, "memory(GiB)": 34.88, "step": 86685, "train_speed(iter/s)": 0.412013 }, { "acc": 0.9338418, "epoch": 2.347223350391249, "grad_norm": 8.671948432922363, "learning_rate": 3.985780014364657e-06, "loss": 0.40191011, "memory(GiB)": 34.88, "step": 86690, "train_speed(iter/s)": 0.412014 }, { "acc": 0.92803764, "epoch": 2.3473587306744648, "grad_norm": 7.199356555938721, "learning_rate": 3.985232103738057e-06, "loss": 0.45346994, "memory(GiB)": 34.88, "step": 86695, "train_speed(iter/s)": 0.412015 }, { "acc": 0.93336468, "epoch": 2.34749411095768, "grad_norm": 6.521695137023926, "learning_rate": 3.98468420582968e-06, "loss": 0.36973209, "memory(GiB)": 34.88, "step": 86700, "train_speed(iter/s)": 0.412016 }, { "acc": 0.91332779, "epoch": 2.3476294912408955, "grad_norm": 14.77981185913086, "learning_rate": 3.984136320646395e-06, "loss": 0.50763407, "memory(GiB)": 34.88, "step": 86705, "train_speed(iter/s)": 0.412018 }, { "acc": 0.93837032, "epoch": 2.3477648715241113, "grad_norm": 7.016881942749023, "learning_rate": 3.98358844819506e-06, "loss": 0.31190088, "memory(GiB)": 34.88, "step": 86710, "train_speed(iter/s)": 0.412019 }, { "acc": 0.93149853, "epoch": 2.3479002518073266, "grad_norm": 8.306965827941895, "learning_rate": 3.983040588482542e-06, "loss": 0.34793272, "memory(GiB)": 34.88, "step": 86715, "train_speed(iter/s)": 0.41202 }, { "acc": 0.93093557, "epoch": 2.3480356320905424, "grad_norm": 10.45337200164795, "learning_rate": 3.982492741515702e-06, "loss": 0.37415648, "memory(GiB)": 34.88, "step": 86720, "train_speed(iter/s)": 0.412021 }, { "acc": 0.94157372, "epoch": 2.348171012373758, "grad_norm": 8.149757385253906, "learning_rate": 3.981944907301402e-06, "loss": 0.34182034, "memory(GiB)": 34.88, "step": 86725, "train_speed(iter/s)": 0.412022 }, { "acc": 0.91821709, "epoch": 2.3483063926569736, "grad_norm": 4.651970863342285, "learning_rate": 3.981397085846507e-06, "loss": 0.40254674, "memory(GiB)": 34.88, "step": 86730, "train_speed(iter/s)": 0.412024 }, { "acc": 0.94658127, "epoch": 2.348441772940189, "grad_norm": 8.401610374450684, "learning_rate": 3.9808492771578785e-06, "loss": 0.30218472, "memory(GiB)": 34.88, "step": 86735, "train_speed(iter/s)": 0.412025 }, { "acc": 0.93569107, "epoch": 2.3485771532234043, "grad_norm": 8.997416496276855, "learning_rate": 3.980301481242378e-06, "loss": 0.35509415, "memory(GiB)": 34.88, "step": 86740, "train_speed(iter/s)": 0.412025 }, { "acc": 0.91790562, "epoch": 2.34871253350662, "grad_norm": 6.54166316986084, "learning_rate": 3.979753698106869e-06, "loss": 0.40491457, "memory(GiB)": 34.88, "step": 86745, "train_speed(iter/s)": 0.412027 }, { "acc": 0.91142845, "epoch": 2.3488479137898355, "grad_norm": 9.64816951751709, "learning_rate": 3.979205927758215e-06, "loss": 0.42937193, "memory(GiB)": 34.88, "step": 86750, "train_speed(iter/s)": 0.412028 }, { "acc": 0.92741079, "epoch": 2.3489832940730513, "grad_norm": 10.485669136047363, "learning_rate": 3.978658170203275e-06, "loss": 0.42574549, "memory(GiB)": 34.88, "step": 86755, "train_speed(iter/s)": 0.412029 }, { "acc": 0.91801329, "epoch": 2.3491186743562666, "grad_norm": 5.488868713378906, "learning_rate": 3.9781104254489115e-06, "loss": 0.50967789, "memory(GiB)": 34.88, "step": 86760, "train_speed(iter/s)": 0.412029 }, { "acc": 0.92257881, "epoch": 2.3492540546394824, "grad_norm": 5.253168106079102, "learning_rate": 3.977562693501986e-06, "loss": 0.40833063, "memory(GiB)": 34.88, "step": 86765, "train_speed(iter/s)": 0.412031 }, { "acc": 0.91796074, "epoch": 2.349389434922698, "grad_norm": 10.371623992919922, "learning_rate": 3.977014974369361e-06, "loss": 0.44535933, "memory(GiB)": 34.88, "step": 86770, "train_speed(iter/s)": 0.412032 }, { "acc": 0.92465601, "epoch": 2.349524815205913, "grad_norm": 5.909769058227539, "learning_rate": 3.9764672680578975e-06, "loss": 0.37023611, "memory(GiB)": 34.88, "step": 86775, "train_speed(iter/s)": 0.412033 }, { "acc": 0.92546158, "epoch": 2.349660195489129, "grad_norm": 5.6606292724609375, "learning_rate": 3.975919574574455e-06, "loss": 0.39294195, "memory(GiB)": 34.88, "step": 86780, "train_speed(iter/s)": 0.412035 }, { "acc": 0.92409306, "epoch": 2.3497955757723443, "grad_norm": 8.870529174804688, "learning_rate": 3.9753718939258965e-06, "loss": 0.42774487, "memory(GiB)": 34.88, "step": 86785, "train_speed(iter/s)": 0.412036 }, { "acc": 0.94304714, "epoch": 2.34993095605556, "grad_norm": 4.156755447387695, "learning_rate": 3.974824226119081e-06, "loss": 0.33826392, "memory(GiB)": 34.88, "step": 86790, "train_speed(iter/s)": 0.412037 }, { "acc": 0.92802896, "epoch": 2.3500663363387755, "grad_norm": 8.497102737426758, "learning_rate": 3.974276571160872e-06, "loss": 0.40676203, "memory(GiB)": 34.88, "step": 86795, "train_speed(iter/s)": 0.412038 }, { "acc": 0.92863731, "epoch": 2.3502017166219913, "grad_norm": 4.983396530151367, "learning_rate": 3.973728929058127e-06, "loss": 0.40961785, "memory(GiB)": 34.88, "step": 86800, "train_speed(iter/s)": 0.412039 }, { "acc": 0.9273674, "epoch": 2.3503370969052066, "grad_norm": 7.421241760253906, "learning_rate": 3.973181299817706e-06, "loss": 0.39838409, "memory(GiB)": 34.88, "step": 86805, "train_speed(iter/s)": 0.412041 }, { "acc": 0.9307744, "epoch": 2.3504724771884224, "grad_norm": 6.200963020324707, "learning_rate": 3.972633683446472e-06, "loss": 0.41391821, "memory(GiB)": 34.88, "step": 86810, "train_speed(iter/s)": 0.412042 }, { "acc": 0.93376875, "epoch": 2.350607857471638, "grad_norm": 6.118529319763184, "learning_rate": 3.972086079951282e-06, "loss": 0.35976009, "memory(GiB)": 34.88, "step": 86815, "train_speed(iter/s)": 0.412043 }, { "acc": 0.92568245, "epoch": 2.3507432377548536, "grad_norm": 8.428348541259766, "learning_rate": 3.971538489338998e-06, "loss": 0.4518301, "memory(GiB)": 34.88, "step": 86820, "train_speed(iter/s)": 0.412044 }, { "acc": 0.90314178, "epoch": 2.350878618038069, "grad_norm": 11.16923999786377, "learning_rate": 3.970990911616478e-06, "loss": 0.55259876, "memory(GiB)": 34.88, "step": 86825, "train_speed(iter/s)": 0.412045 }, { "acc": 0.92488146, "epoch": 2.3510139983212843, "grad_norm": 5.2206645011901855, "learning_rate": 3.970443346790584e-06, "loss": 0.42715735, "memory(GiB)": 34.88, "step": 86830, "train_speed(iter/s)": 0.412046 }, { "acc": 0.93260918, "epoch": 2.3511493786045, "grad_norm": 2.9628970623016357, "learning_rate": 3.969895794868172e-06, "loss": 0.38695195, "memory(GiB)": 34.88, "step": 86835, "train_speed(iter/s)": 0.412048 }, { "acc": 0.92649755, "epoch": 2.3512847588877155, "grad_norm": 12.10562801361084, "learning_rate": 3.969348255856104e-06, "loss": 0.40198584, "memory(GiB)": 34.88, "step": 86840, "train_speed(iter/s)": 0.412049 }, { "acc": 0.9377924, "epoch": 2.3514201391709313, "grad_norm": 4.895345687866211, "learning_rate": 3.968800729761238e-06, "loss": 0.26504242, "memory(GiB)": 34.88, "step": 86845, "train_speed(iter/s)": 0.41205 }, { "acc": 0.91760349, "epoch": 2.3515555194541466, "grad_norm": 11.194830894470215, "learning_rate": 3.9682532165904306e-06, "loss": 0.44798446, "memory(GiB)": 34.88, "step": 86850, "train_speed(iter/s)": 0.412051 }, { "acc": 0.9123724, "epoch": 2.3516908997373625, "grad_norm": 9.031000137329102, "learning_rate": 3.967705716350545e-06, "loss": 0.4571537, "memory(GiB)": 34.88, "step": 86855, "train_speed(iter/s)": 0.412053 }, { "acc": 0.92392349, "epoch": 2.351826280020578, "grad_norm": 6.976497650146484, "learning_rate": 3.967158229048435e-06, "loss": 0.3881979, "memory(GiB)": 34.88, "step": 86860, "train_speed(iter/s)": 0.412054 }, { "acc": 0.92127609, "epoch": 2.351961660303793, "grad_norm": 10.345653533935547, "learning_rate": 3.966610754690963e-06, "loss": 0.43117094, "memory(GiB)": 34.88, "step": 86865, "train_speed(iter/s)": 0.412055 }, { "acc": 0.92073708, "epoch": 2.352097040587009, "grad_norm": 6.818457126617432, "learning_rate": 3.966063293284984e-06, "loss": 0.44589334, "memory(GiB)": 34.88, "step": 86870, "train_speed(iter/s)": 0.412056 }, { "acc": 0.91861858, "epoch": 2.3522324208702243, "grad_norm": 14.880082130432129, "learning_rate": 3.965515844837358e-06, "loss": 0.46859598, "memory(GiB)": 34.88, "step": 86875, "train_speed(iter/s)": 0.412057 }, { "acc": 0.92763157, "epoch": 2.35236780115344, "grad_norm": 7.42929744720459, "learning_rate": 3.9649684093549435e-06, "loss": 0.40893459, "memory(GiB)": 34.88, "step": 86880, "train_speed(iter/s)": 0.412059 }, { "acc": 0.92021465, "epoch": 2.3525031814366555, "grad_norm": 11.53894329071045, "learning_rate": 3.9644209868445946e-06, "loss": 0.40269594, "memory(GiB)": 34.88, "step": 86885, "train_speed(iter/s)": 0.41206 }, { "acc": 0.9363184, "epoch": 2.3526385617198713, "grad_norm": 18.382627487182617, "learning_rate": 3.963873577313173e-06, "loss": 0.35821693, "memory(GiB)": 34.88, "step": 86890, "train_speed(iter/s)": 0.412061 }, { "acc": 0.91821804, "epoch": 2.3527739420030866, "grad_norm": 16.183677673339844, "learning_rate": 3.963326180767534e-06, "loss": 0.45402851, "memory(GiB)": 34.88, "step": 86895, "train_speed(iter/s)": 0.412062 }, { "acc": 0.92711124, "epoch": 2.352909322286302, "grad_norm": 12.246789932250977, "learning_rate": 3.9627787972145344e-06, "loss": 0.47924027, "memory(GiB)": 34.88, "step": 86900, "train_speed(iter/s)": 0.412063 }, { "acc": 0.94175262, "epoch": 2.353044702569518, "grad_norm": 6.766268253326416, "learning_rate": 3.9622314266610315e-06, "loss": 0.27993658, "memory(GiB)": 34.88, "step": 86905, "train_speed(iter/s)": 0.412064 }, { "acc": 0.91712646, "epoch": 2.353180082852733, "grad_norm": 10.770389556884766, "learning_rate": 3.961684069113884e-06, "loss": 0.39831595, "memory(GiB)": 34.88, "step": 86910, "train_speed(iter/s)": 0.412065 }, { "acc": 0.91925335, "epoch": 2.353315463135949, "grad_norm": 7.461445331573486, "learning_rate": 3.961136724579947e-06, "loss": 0.53374681, "memory(GiB)": 34.88, "step": 86915, "train_speed(iter/s)": 0.412066 }, { "acc": 0.92645645, "epoch": 2.3534508434191643, "grad_norm": 6.770612716674805, "learning_rate": 3.960589393066077e-06, "loss": 0.38477409, "memory(GiB)": 34.88, "step": 86920, "train_speed(iter/s)": 0.412068 }, { "acc": 0.92489967, "epoch": 2.35358622370238, "grad_norm": 18.599750518798828, "learning_rate": 3.960042074579133e-06, "loss": 0.46517882, "memory(GiB)": 34.88, "step": 86925, "train_speed(iter/s)": 0.412069 }, { "acc": 0.93685169, "epoch": 2.3537216039855955, "grad_norm": 6.410057544708252, "learning_rate": 3.9594947691259664e-06, "loss": 0.36699345, "memory(GiB)": 34.88, "step": 86930, "train_speed(iter/s)": 0.41207 }, { "acc": 0.9353632, "epoch": 2.353856984268811, "grad_norm": 16.826927185058594, "learning_rate": 3.9589474767134365e-06, "loss": 0.37910717, "memory(GiB)": 34.88, "step": 86935, "train_speed(iter/s)": 0.412071 }, { "acc": 0.92458477, "epoch": 2.3539923645520267, "grad_norm": 6.058166027069092, "learning_rate": 3.958400197348399e-06, "loss": 0.44520445, "memory(GiB)": 34.88, "step": 86940, "train_speed(iter/s)": 0.412073 }, { "acc": 0.93740768, "epoch": 2.354127744835242, "grad_norm": 5.358365058898926, "learning_rate": 3.957852931037709e-06, "loss": 0.35866888, "memory(GiB)": 34.88, "step": 86945, "train_speed(iter/s)": 0.412074 }, { "acc": 0.92370987, "epoch": 2.354263125118458, "grad_norm": 4.221251487731934, "learning_rate": 3.957305677788222e-06, "loss": 0.38682463, "memory(GiB)": 34.88, "step": 86950, "train_speed(iter/s)": 0.412075 }, { "acc": 0.90711832, "epoch": 2.354398505401673, "grad_norm": 15.841954231262207, "learning_rate": 3.956758437606793e-06, "loss": 0.56653204, "memory(GiB)": 34.88, "step": 86955, "train_speed(iter/s)": 0.412076 }, { "acc": 0.92477417, "epoch": 2.354533885684889, "grad_norm": 11.871540069580078, "learning_rate": 3.95621121050028e-06, "loss": 0.49162569, "memory(GiB)": 34.88, "step": 86960, "train_speed(iter/s)": 0.412077 }, { "acc": 0.93570881, "epoch": 2.3546692659681043, "grad_norm": 3.862366199493408, "learning_rate": 3.955663996475536e-06, "loss": 0.3276566, "memory(GiB)": 34.88, "step": 86965, "train_speed(iter/s)": 0.412079 }, { "acc": 0.92528706, "epoch": 2.35480464625132, "grad_norm": 6.407648086547852, "learning_rate": 3.955116795539416e-06, "loss": 0.44017901, "memory(GiB)": 34.88, "step": 86970, "train_speed(iter/s)": 0.41208 }, { "acc": 0.93114414, "epoch": 2.3549400265345355, "grad_norm": 6.400175094604492, "learning_rate": 3.954569607698773e-06, "loss": 0.39554253, "memory(GiB)": 34.88, "step": 86975, "train_speed(iter/s)": 0.412081 }, { "acc": 0.92591991, "epoch": 2.3550754068177513, "grad_norm": 10.71644401550293, "learning_rate": 3.954022432960464e-06, "loss": 0.37656279, "memory(GiB)": 34.88, "step": 86980, "train_speed(iter/s)": 0.412082 }, { "acc": 0.92389412, "epoch": 2.3552107871009667, "grad_norm": 7.406026363372803, "learning_rate": 3.953475271331343e-06, "loss": 0.38875551, "memory(GiB)": 34.88, "step": 86985, "train_speed(iter/s)": 0.412084 }, { "acc": 0.93088036, "epoch": 2.355346167384182, "grad_norm": 16.59610366821289, "learning_rate": 3.9529281228182635e-06, "loss": 0.44824491, "memory(GiB)": 34.88, "step": 86990, "train_speed(iter/s)": 0.412085 }, { "acc": 0.91682911, "epoch": 2.355481547667398, "grad_norm": 6.10072660446167, "learning_rate": 3.952380987428082e-06, "loss": 0.44687262, "memory(GiB)": 34.88, "step": 86995, "train_speed(iter/s)": 0.412086 }, { "acc": 0.92421455, "epoch": 2.355616927950613, "grad_norm": 10.52387523651123, "learning_rate": 3.951833865167649e-06, "loss": 0.40826488, "memory(GiB)": 34.88, "step": 87000, "train_speed(iter/s)": 0.412087 }, { "acc": 0.91505032, "epoch": 2.355752308233829, "grad_norm": 4.459901809692383, "learning_rate": 3.951286756043822e-06, "loss": 0.43354311, "memory(GiB)": 34.88, "step": 87005, "train_speed(iter/s)": 0.412089 }, { "acc": 0.92891731, "epoch": 2.3558876885170443, "grad_norm": 9.285451889038086, "learning_rate": 3.950739660063451e-06, "loss": 0.41385441, "memory(GiB)": 34.88, "step": 87010, "train_speed(iter/s)": 0.41209 }, { "acc": 0.91981754, "epoch": 2.35602306880026, "grad_norm": 5.928840160369873, "learning_rate": 3.950192577233393e-06, "loss": 0.36429405, "memory(GiB)": 34.88, "step": 87015, "train_speed(iter/s)": 0.412091 }, { "acc": 0.93485241, "epoch": 2.3561584490834755, "grad_norm": 85.71302795410156, "learning_rate": 3.949645507560496e-06, "loss": 0.35846922, "memory(GiB)": 34.88, "step": 87020, "train_speed(iter/s)": 0.412092 }, { "acc": 0.9193284, "epoch": 2.356293829366691, "grad_norm": 10.246170043945312, "learning_rate": 3.949098451051618e-06, "loss": 0.46027737, "memory(GiB)": 34.88, "step": 87025, "train_speed(iter/s)": 0.412093 }, { "acc": 0.92404671, "epoch": 2.3564292096499067, "grad_norm": 12.294221878051758, "learning_rate": 3.948551407713611e-06, "loss": 0.45542688, "memory(GiB)": 34.88, "step": 87030, "train_speed(iter/s)": 0.412094 }, { "acc": 0.92587748, "epoch": 2.356564589933122, "grad_norm": 9.026134490966797, "learning_rate": 3.948004377553326e-06, "loss": 0.42500544, "memory(GiB)": 34.88, "step": 87035, "train_speed(iter/s)": 0.412095 }, { "acc": 0.92826309, "epoch": 2.356699970216338, "grad_norm": 12.442935943603516, "learning_rate": 3.947457360577618e-06, "loss": 0.35410931, "memory(GiB)": 34.88, "step": 87040, "train_speed(iter/s)": 0.412097 }, { "acc": 0.93050442, "epoch": 2.356835350499553, "grad_norm": 5.622409820556641, "learning_rate": 3.946910356793338e-06, "loss": 0.36940947, "memory(GiB)": 34.88, "step": 87045, "train_speed(iter/s)": 0.412098 }, { "acc": 0.9204689, "epoch": 2.356970730782769, "grad_norm": 3.6219186782836914, "learning_rate": 3.94636336620734e-06, "loss": 0.38107176, "memory(GiB)": 34.88, "step": 87050, "train_speed(iter/s)": 0.412099 }, { "acc": 0.91274633, "epoch": 2.3571061110659843, "grad_norm": 7.207374095916748, "learning_rate": 3.945816388826472e-06, "loss": 0.49074879, "memory(GiB)": 34.88, "step": 87055, "train_speed(iter/s)": 0.4121 }, { "acc": 0.91923046, "epoch": 2.3572414913491997, "grad_norm": 8.503669738769531, "learning_rate": 3.945269424657589e-06, "loss": 0.37221467, "memory(GiB)": 34.88, "step": 87060, "train_speed(iter/s)": 0.412101 }, { "acc": 0.92308559, "epoch": 2.3573768716324155, "grad_norm": 5.761883735656738, "learning_rate": 3.944722473707545e-06, "loss": 0.43482499, "memory(GiB)": 34.88, "step": 87065, "train_speed(iter/s)": 0.412103 }, { "acc": 0.93707886, "epoch": 2.357512251915631, "grad_norm": 6.146855354309082, "learning_rate": 3.944175535983186e-06, "loss": 0.34731126, "memory(GiB)": 34.88, "step": 87070, "train_speed(iter/s)": 0.412104 }, { "acc": 0.91753769, "epoch": 2.3576476321988467, "grad_norm": 10.110581398010254, "learning_rate": 3.943628611491368e-06, "loss": 0.39619403, "memory(GiB)": 34.88, "step": 87075, "train_speed(iter/s)": 0.412105 }, { "acc": 0.93002949, "epoch": 2.357783012482062, "grad_norm": 4.127682209014893, "learning_rate": 3.943081700238942e-06, "loss": 0.35795317, "memory(GiB)": 34.88, "step": 87080, "train_speed(iter/s)": 0.412107 }, { "acc": 0.93931561, "epoch": 2.357918392765278, "grad_norm": 10.186700820922852, "learning_rate": 3.942534802232757e-06, "loss": 0.27632585, "memory(GiB)": 34.88, "step": 87085, "train_speed(iter/s)": 0.412108 }, { "acc": 0.94187431, "epoch": 2.358053773048493, "grad_norm": 4.393811225891113, "learning_rate": 3.941987917479665e-06, "loss": 0.34962358, "memory(GiB)": 34.88, "step": 87090, "train_speed(iter/s)": 0.412109 }, { "acc": 0.92824955, "epoch": 2.3581891533317085, "grad_norm": 3.7333767414093018, "learning_rate": 3.941441045986516e-06, "loss": 0.38232627, "memory(GiB)": 34.88, "step": 87095, "train_speed(iter/s)": 0.41211 }, { "acc": 0.92481022, "epoch": 2.3583245336149243, "grad_norm": 5.728460788726807, "learning_rate": 3.940894187760163e-06, "loss": 0.40662889, "memory(GiB)": 34.88, "step": 87100, "train_speed(iter/s)": 0.412112 }, { "acc": 0.9376543, "epoch": 2.3584599138981397, "grad_norm": 4.561769008636475, "learning_rate": 3.940347342807454e-06, "loss": 0.32610917, "memory(GiB)": 34.88, "step": 87105, "train_speed(iter/s)": 0.412113 }, { "acc": 0.92675648, "epoch": 2.3585952941813555, "grad_norm": 7.17717981338501, "learning_rate": 3.93980051113524e-06, "loss": 0.37162228, "memory(GiB)": 34.88, "step": 87110, "train_speed(iter/s)": 0.412114 }, { "acc": 0.91590147, "epoch": 2.358730674464571, "grad_norm": 7.642755031585693, "learning_rate": 3.939253692750372e-06, "loss": 0.41953559, "memory(GiB)": 34.88, "step": 87115, "train_speed(iter/s)": 0.412115 }, { "acc": 0.92848539, "epoch": 2.3588660547477867, "grad_norm": 5.2866692543029785, "learning_rate": 3.938706887659699e-06, "loss": 0.36673493, "memory(GiB)": 34.88, "step": 87120, "train_speed(iter/s)": 0.412116 }, { "acc": 0.91872215, "epoch": 2.359001435031002, "grad_norm": 12.378252029418945, "learning_rate": 3.938160095870071e-06, "loss": 0.4541584, "memory(GiB)": 34.88, "step": 87125, "train_speed(iter/s)": 0.412117 }, { "acc": 0.922995, "epoch": 2.359136815314218, "grad_norm": 22.206497192382812, "learning_rate": 3.937613317388338e-06, "loss": 0.42764459, "memory(GiB)": 34.88, "step": 87130, "train_speed(iter/s)": 0.412119 }, { "acc": 0.93498526, "epoch": 2.359272195597433, "grad_norm": 6.792474746704102, "learning_rate": 3.937066552221351e-06, "loss": 0.35166137, "memory(GiB)": 34.88, "step": 87135, "train_speed(iter/s)": 0.41212 }, { "acc": 0.92591782, "epoch": 2.359407575880649, "grad_norm": 8.100677490234375, "learning_rate": 3.936519800375954e-06, "loss": 0.39409311, "memory(GiB)": 34.88, "step": 87140, "train_speed(iter/s)": 0.412121 }, { "acc": 0.92970095, "epoch": 2.3595429561638643, "grad_norm": 7.687887668609619, "learning_rate": 3.935973061859003e-06, "loss": 0.37254539, "memory(GiB)": 34.88, "step": 87145, "train_speed(iter/s)": 0.412122 }, { "acc": 0.93673697, "epoch": 2.3596783364470797, "grad_norm": 5.481945991516113, "learning_rate": 3.935426336677341e-06, "loss": 0.3831594, "memory(GiB)": 34.88, "step": 87150, "train_speed(iter/s)": 0.412124 }, { "acc": 0.9260952, "epoch": 2.3598137167302955, "grad_norm": 9.105923652648926, "learning_rate": 3.9348796248378215e-06, "loss": 0.40881281, "memory(GiB)": 34.88, "step": 87155, "train_speed(iter/s)": 0.412125 }, { "acc": 0.91303768, "epoch": 2.359949097013511, "grad_norm": 7.427087306976318, "learning_rate": 3.934332926347289e-06, "loss": 0.47240295, "memory(GiB)": 34.88, "step": 87160, "train_speed(iter/s)": 0.412126 }, { "acc": 0.91151314, "epoch": 2.3600844772967267, "grad_norm": 8.90107536315918, "learning_rate": 3.933786241212597e-06, "loss": 0.49105978, "memory(GiB)": 34.88, "step": 87165, "train_speed(iter/s)": 0.412127 }, { "acc": 0.93068733, "epoch": 2.360219857579942, "grad_norm": 6.224165916442871, "learning_rate": 3.933239569440588e-06, "loss": 0.38007109, "memory(GiB)": 34.88, "step": 87170, "train_speed(iter/s)": 0.412129 }, { "acc": 0.91424541, "epoch": 2.360355237863158, "grad_norm": 5.790152072906494, "learning_rate": 3.9326929110381155e-06, "loss": 0.45672092, "memory(GiB)": 34.88, "step": 87175, "train_speed(iter/s)": 0.41213 }, { "acc": 0.93035736, "epoch": 2.360490618146373, "grad_norm": 4.13518762588501, "learning_rate": 3.932146266012024e-06, "loss": 0.43200665, "memory(GiB)": 34.88, "step": 87180, "train_speed(iter/s)": 0.412131 }, { "acc": 0.9221487, "epoch": 2.3606259984295885, "grad_norm": 9.041359901428223, "learning_rate": 3.931599634369162e-06, "loss": 0.45533915, "memory(GiB)": 34.88, "step": 87185, "train_speed(iter/s)": 0.412132 }, { "acc": 0.92664452, "epoch": 2.3607613787128043, "grad_norm": 11.670759201049805, "learning_rate": 3.931053016116378e-06, "loss": 0.38362486, "memory(GiB)": 34.88, "step": 87190, "train_speed(iter/s)": 0.412133 }, { "acc": 0.91656303, "epoch": 2.3608967589960197, "grad_norm": 7.4221320152282715, "learning_rate": 3.930506411260518e-06, "loss": 0.42524233, "memory(GiB)": 34.88, "step": 87195, "train_speed(iter/s)": 0.412135 }, { "acc": 0.92558908, "epoch": 2.3610321392792355, "grad_norm": 9.541142463684082, "learning_rate": 3.9299598198084325e-06, "loss": 0.42283406, "memory(GiB)": 34.88, "step": 87200, "train_speed(iter/s)": 0.412136 }, { "acc": 0.908638, "epoch": 2.361167519562451, "grad_norm": 6.990450382232666, "learning_rate": 3.929413241766964e-06, "loss": 0.50328379, "memory(GiB)": 34.88, "step": 87205, "train_speed(iter/s)": 0.412137 }, { "acc": 0.92109766, "epoch": 2.3613028998456667, "grad_norm": 2.583756923675537, "learning_rate": 3.928866677142962e-06, "loss": 0.40822802, "memory(GiB)": 34.88, "step": 87210, "train_speed(iter/s)": 0.412138 }, { "acc": 0.92532663, "epoch": 2.361438280128882, "grad_norm": 7.3462347984313965, "learning_rate": 3.9283201259432756e-06, "loss": 0.43142633, "memory(GiB)": 34.88, "step": 87215, "train_speed(iter/s)": 0.412139 }, { "acc": 0.9245389, "epoch": 2.3615736604120974, "grad_norm": 10.46621036529541, "learning_rate": 3.927773588174746e-06, "loss": 0.35497828, "memory(GiB)": 34.88, "step": 87220, "train_speed(iter/s)": 0.41214 }, { "acc": 0.93834581, "epoch": 2.361709040695313, "grad_norm": 7.581181526184082, "learning_rate": 3.927227063844224e-06, "loss": 0.32734053, "memory(GiB)": 34.88, "step": 87225, "train_speed(iter/s)": 0.412142 }, { "acc": 0.92162266, "epoch": 2.3618444209785285, "grad_norm": 6.52647066116333, "learning_rate": 3.926680552958554e-06, "loss": 0.47150731, "memory(GiB)": 34.88, "step": 87230, "train_speed(iter/s)": 0.412143 }, { "acc": 0.92177143, "epoch": 2.3619798012617443, "grad_norm": 7.931183338165283, "learning_rate": 3.9261340555245836e-06, "loss": 0.45119715, "memory(GiB)": 34.88, "step": 87235, "train_speed(iter/s)": 0.412144 }, { "acc": 0.92714863, "epoch": 2.3621151815449597, "grad_norm": 7.164088249206543, "learning_rate": 3.925587571549156e-06, "loss": 0.46516342, "memory(GiB)": 34.88, "step": 87240, "train_speed(iter/s)": 0.412145 }, { "acc": 0.91181402, "epoch": 2.3622505618281755, "grad_norm": 16.833152770996094, "learning_rate": 3.925041101039121e-06, "loss": 0.41730037, "memory(GiB)": 34.88, "step": 87245, "train_speed(iter/s)": 0.412147 }, { "acc": 0.93429871, "epoch": 2.362385942111391, "grad_norm": 5.8565545082092285, "learning_rate": 3.92449464400132e-06, "loss": 0.31041307, "memory(GiB)": 34.88, "step": 87250, "train_speed(iter/s)": 0.412148 }, { "acc": 0.91536465, "epoch": 2.362521322394606, "grad_norm": 16.936084747314453, "learning_rate": 3.9239482004426e-06, "loss": 0.47621155, "memory(GiB)": 34.88, "step": 87255, "train_speed(iter/s)": 0.412149 }, { "acc": 0.92456398, "epoch": 2.362656702677822, "grad_norm": 8.227575302124023, "learning_rate": 3.923401770369809e-06, "loss": 0.33086114, "memory(GiB)": 34.88, "step": 87260, "train_speed(iter/s)": 0.41215 }, { "acc": 0.92746887, "epoch": 2.3627920829610374, "grad_norm": 12.25597095489502, "learning_rate": 3.922855353789789e-06, "loss": 0.41123934, "memory(GiB)": 34.88, "step": 87265, "train_speed(iter/s)": 0.412151 }, { "acc": 0.93757963, "epoch": 2.362927463244253, "grad_norm": 8.443302154541016, "learning_rate": 3.922308950709386e-06, "loss": 0.33714619, "memory(GiB)": 34.88, "step": 87270, "train_speed(iter/s)": 0.412153 }, { "acc": 0.93575325, "epoch": 2.3630628435274685, "grad_norm": 6.6922993659973145, "learning_rate": 3.9217625611354434e-06, "loss": 0.33825507, "memory(GiB)": 34.88, "step": 87275, "train_speed(iter/s)": 0.412154 }, { "acc": 0.90858421, "epoch": 2.3631982238106843, "grad_norm": 14.133440017700195, "learning_rate": 3.921216185074808e-06, "loss": 0.4438693, "memory(GiB)": 34.88, "step": 87280, "train_speed(iter/s)": 0.412155 }, { "acc": 0.93514633, "epoch": 2.3633336040938997, "grad_norm": 4.862850666046143, "learning_rate": 3.920669822534322e-06, "loss": 0.32448204, "memory(GiB)": 34.88, "step": 87285, "train_speed(iter/s)": 0.412156 }, { "acc": 0.93303461, "epoch": 2.3634689843771155, "grad_norm": 6.264799118041992, "learning_rate": 3.920123473520832e-06, "loss": 0.41784086, "memory(GiB)": 34.88, "step": 87290, "train_speed(iter/s)": 0.412157 }, { "acc": 0.9423152, "epoch": 2.363604364660331, "grad_norm": 12.980528831481934, "learning_rate": 3.919577138041181e-06, "loss": 0.29392128, "memory(GiB)": 34.88, "step": 87295, "train_speed(iter/s)": 0.412159 }, { "acc": 0.93172922, "epoch": 2.3637397449435467, "grad_norm": 5.965627193450928, "learning_rate": 3.919030816102213e-06, "loss": 0.46148214, "memory(GiB)": 34.88, "step": 87300, "train_speed(iter/s)": 0.41216 }, { "acc": 0.92738638, "epoch": 2.363875125226762, "grad_norm": 4.68330192565918, "learning_rate": 3.91848450771077e-06, "loss": 0.38456225, "memory(GiB)": 34.88, "step": 87305, "train_speed(iter/s)": 0.412161 }, { "acc": 0.94171391, "epoch": 2.3640105055099774, "grad_norm": 3.966942548751831, "learning_rate": 3.917938212873699e-06, "loss": 0.31857924, "memory(GiB)": 34.88, "step": 87310, "train_speed(iter/s)": 0.412162 }, { "acc": 0.92245121, "epoch": 2.364145885793193, "grad_norm": 9.538678169250488, "learning_rate": 3.9173919315978405e-06, "loss": 0.38682508, "memory(GiB)": 34.88, "step": 87315, "train_speed(iter/s)": 0.412164 }, { "acc": 0.92240677, "epoch": 2.3642812660764085, "grad_norm": 8.694391250610352, "learning_rate": 3.916845663890039e-06, "loss": 0.39250326, "memory(GiB)": 34.88, "step": 87320, "train_speed(iter/s)": 0.412165 }, { "acc": 0.92334824, "epoch": 2.3644166463596243, "grad_norm": 12.112032890319824, "learning_rate": 3.9162994097571365e-06, "loss": 0.41000805, "memory(GiB)": 34.88, "step": 87325, "train_speed(iter/s)": 0.412166 }, { "acc": 0.91417408, "epoch": 2.3645520266428397, "grad_norm": 10.719168663024902, "learning_rate": 3.915753169205979e-06, "loss": 0.44210253, "memory(GiB)": 34.88, "step": 87330, "train_speed(iter/s)": 0.412167 }, { "acc": 0.9259861, "epoch": 2.3646874069260555, "grad_norm": 5.207922458648682, "learning_rate": 3.915206942243405e-06, "loss": 0.45485048, "memory(GiB)": 34.88, "step": 87335, "train_speed(iter/s)": 0.412168 }, { "acc": 0.92421989, "epoch": 2.364822787209271, "grad_norm": 15.838690757751465, "learning_rate": 3.9146607288762605e-06, "loss": 0.45838261, "memory(GiB)": 34.88, "step": 87340, "train_speed(iter/s)": 0.412169 }, { "acc": 0.92689648, "epoch": 2.364958167492486, "grad_norm": 8.366844177246094, "learning_rate": 3.914114529111387e-06, "loss": 0.37373972, "memory(GiB)": 34.88, "step": 87345, "train_speed(iter/s)": 0.412171 }, { "acc": 0.93542128, "epoch": 2.365093547775702, "grad_norm": 12.504225730895996, "learning_rate": 3.913568342955626e-06, "loss": 0.31598854, "memory(GiB)": 34.88, "step": 87350, "train_speed(iter/s)": 0.412172 }, { "acc": 0.91178808, "epoch": 2.3652289280589174, "grad_norm": 6.6341071128845215, "learning_rate": 3.913022170415819e-06, "loss": 0.52643366, "memory(GiB)": 34.88, "step": 87355, "train_speed(iter/s)": 0.412173 }, { "acc": 0.93139029, "epoch": 2.365364308342133, "grad_norm": 30.738536834716797, "learning_rate": 3.912476011498808e-06, "loss": 0.43572245, "memory(GiB)": 34.88, "step": 87360, "train_speed(iter/s)": 0.412175 }, { "acc": 0.91765156, "epoch": 2.3654996886253485, "grad_norm": 5.446393013000488, "learning_rate": 3.9119298662114375e-06, "loss": 0.46334133, "memory(GiB)": 34.88, "step": 87365, "train_speed(iter/s)": 0.412176 }, { "acc": 0.94051313, "epoch": 2.3656350689085643, "grad_norm": 10.960962295532227, "learning_rate": 3.911383734560545e-06, "loss": 0.38727121, "memory(GiB)": 34.88, "step": 87370, "train_speed(iter/s)": 0.412177 }, { "acc": 0.92173157, "epoch": 2.3657704491917797, "grad_norm": 7.333976745605469, "learning_rate": 3.910837616552975e-06, "loss": 0.50395098, "memory(GiB)": 34.88, "step": 87375, "train_speed(iter/s)": 0.412178 }, { "acc": 0.91828337, "epoch": 2.365905829474995, "grad_norm": 7.702314376831055, "learning_rate": 3.910291512195567e-06, "loss": 0.53255839, "memory(GiB)": 34.88, "step": 87380, "train_speed(iter/s)": 0.41218 }, { "acc": 0.92709017, "epoch": 2.366041209758211, "grad_norm": 14.580920219421387, "learning_rate": 3.909745421495164e-06, "loss": 0.4187808, "memory(GiB)": 34.88, "step": 87385, "train_speed(iter/s)": 0.412181 }, { "acc": 0.92009926, "epoch": 2.366176590041426, "grad_norm": 7.344513416290283, "learning_rate": 3.909199344458604e-06, "loss": 0.41410112, "memory(GiB)": 34.88, "step": 87390, "train_speed(iter/s)": 0.412182 }, { "acc": 0.92154007, "epoch": 2.366311970324642, "grad_norm": 8.914076805114746, "learning_rate": 3.908653281092729e-06, "loss": 0.41100283, "memory(GiB)": 34.88, "step": 87395, "train_speed(iter/s)": 0.412183 }, { "acc": 0.92631702, "epoch": 2.3664473506078574, "grad_norm": 5.7136359214782715, "learning_rate": 3.90810723140438e-06, "loss": 0.43096371, "memory(GiB)": 34.88, "step": 87400, "train_speed(iter/s)": 0.412184 }, { "acc": 0.92212543, "epoch": 2.366582730891073, "grad_norm": 4.242010593414307, "learning_rate": 3.907561195400396e-06, "loss": 0.3432405, "memory(GiB)": 34.88, "step": 87405, "train_speed(iter/s)": 0.412185 }, { "acc": 0.92678413, "epoch": 2.3667181111742885, "grad_norm": 6.207354545593262, "learning_rate": 3.90701517308762e-06, "loss": 0.37677479, "memory(GiB)": 34.88, "step": 87410, "train_speed(iter/s)": 0.412187 }, { "acc": 0.93088436, "epoch": 2.366853491457504, "grad_norm": 7.761782646179199, "learning_rate": 3.906469164472889e-06, "loss": 0.39292088, "memory(GiB)": 34.88, "step": 87415, "train_speed(iter/s)": 0.412188 }, { "acc": 0.93020782, "epoch": 2.3669888717407197, "grad_norm": 8.731854438781738, "learning_rate": 3.9059231695630455e-06, "loss": 0.33192296, "memory(GiB)": 34.88, "step": 87420, "train_speed(iter/s)": 0.412189 }, { "acc": 0.91074276, "epoch": 2.367124252023935, "grad_norm": 12.497385025024414, "learning_rate": 3.905377188364926e-06, "loss": 0.44832163, "memory(GiB)": 34.88, "step": 87425, "train_speed(iter/s)": 0.412191 }, { "acc": 0.9260601, "epoch": 2.367259632307151, "grad_norm": 3.7456915378570557, "learning_rate": 3.904831220885373e-06, "loss": 0.43404179, "memory(GiB)": 34.88, "step": 87430, "train_speed(iter/s)": 0.412192 }, { "acc": 0.92194748, "epoch": 2.367395012590366, "grad_norm": 5.392545223236084, "learning_rate": 3.904285267131222e-06, "loss": 0.41851387, "memory(GiB)": 34.88, "step": 87435, "train_speed(iter/s)": 0.412193 }, { "acc": 0.92428808, "epoch": 2.367530392873582, "grad_norm": 10.335578918457031, "learning_rate": 3.903739327109316e-06, "loss": 0.36988671, "memory(GiB)": 34.88, "step": 87440, "train_speed(iter/s)": 0.412194 }, { "acc": 0.93722563, "epoch": 2.3676657731567974, "grad_norm": 8.890660285949707, "learning_rate": 3.903193400826492e-06, "loss": 0.38022361, "memory(GiB)": 34.88, "step": 87445, "train_speed(iter/s)": 0.412195 }, { "acc": 0.92256947, "epoch": 2.367801153440013, "grad_norm": 10.25907039642334, "learning_rate": 3.90264748828959e-06, "loss": 0.43313951, "memory(GiB)": 34.88, "step": 87450, "train_speed(iter/s)": 0.412196 }, { "acc": 0.90961142, "epoch": 2.3679365337232285, "grad_norm": 10.126959800720215, "learning_rate": 3.902101589505448e-06, "loss": 0.55524039, "memory(GiB)": 34.88, "step": 87455, "train_speed(iter/s)": 0.412198 }, { "acc": 0.92032509, "epoch": 2.3680719140064443, "grad_norm": 8.323519706726074, "learning_rate": 3.901555704480904e-06, "loss": 0.38035364, "memory(GiB)": 34.88, "step": 87460, "train_speed(iter/s)": 0.412199 }, { "acc": 0.91409092, "epoch": 2.3682072942896597, "grad_norm": 6.4153876304626465, "learning_rate": 3.9010098332227985e-06, "loss": 0.49440393, "memory(GiB)": 34.88, "step": 87465, "train_speed(iter/s)": 0.4122 }, { "acc": 0.94532957, "epoch": 2.368342674572875, "grad_norm": 9.758700370788574, "learning_rate": 3.900463975737967e-06, "loss": 0.38292637, "memory(GiB)": 34.88, "step": 87470, "train_speed(iter/s)": 0.412201 }, { "acc": 0.93030033, "epoch": 2.368478054856091, "grad_norm": 8.129900932312012, "learning_rate": 3.899918132033247e-06, "loss": 0.40502348, "memory(GiB)": 34.88, "step": 87475, "train_speed(iter/s)": 0.412202 }, { "acc": 0.91709957, "epoch": 2.3686134351393062, "grad_norm": 5.606233596801758, "learning_rate": 3.8993723021154785e-06, "loss": 0.42338552, "memory(GiB)": 34.88, "step": 87480, "train_speed(iter/s)": 0.412203 }, { "acc": 0.92127514, "epoch": 2.368748815422522, "grad_norm": 13.628562927246094, "learning_rate": 3.898826485991497e-06, "loss": 0.4407145, "memory(GiB)": 34.88, "step": 87485, "train_speed(iter/s)": 0.412204 }, { "acc": 0.92397165, "epoch": 2.3688841957057374, "grad_norm": 12.65317440032959, "learning_rate": 3.898280683668143e-06, "loss": 0.39563785, "memory(GiB)": 34.88, "step": 87490, "train_speed(iter/s)": 0.412206 }, { "acc": 0.93786163, "epoch": 2.369019575988953, "grad_norm": 9.087986946105957, "learning_rate": 3.89773489515225e-06, "loss": 0.36369855, "memory(GiB)": 34.88, "step": 87495, "train_speed(iter/s)": 0.412207 }, { "acc": 0.93260117, "epoch": 2.3691549562721685, "grad_norm": 5.746912002563477, "learning_rate": 3.897189120450658e-06, "loss": 0.32289298, "memory(GiB)": 34.88, "step": 87500, "train_speed(iter/s)": 0.412208 }, { "acc": 0.91751385, "epoch": 2.369290336555384, "grad_norm": 5.1724772453308105, "learning_rate": 3.896643359570202e-06, "loss": 0.47883143, "memory(GiB)": 34.88, "step": 87505, "train_speed(iter/s)": 0.412209 }, { "acc": 0.91667843, "epoch": 2.3694257168385997, "grad_norm": 5.421269416809082, "learning_rate": 3.89609761251772e-06, "loss": 0.44272437, "memory(GiB)": 34.88, "step": 87510, "train_speed(iter/s)": 0.412211 }, { "acc": 0.90127525, "epoch": 2.369561097121815, "grad_norm": 8.035161972045898, "learning_rate": 3.895551879300048e-06, "loss": 0.5281703, "memory(GiB)": 34.88, "step": 87515, "train_speed(iter/s)": 0.412212 }, { "acc": 0.93683386, "epoch": 2.369696477405031, "grad_norm": 7.010959625244141, "learning_rate": 3.895006159924022e-06, "loss": 0.36397634, "memory(GiB)": 34.88, "step": 87520, "train_speed(iter/s)": 0.412213 }, { "acc": 0.93827362, "epoch": 2.3698318576882462, "grad_norm": 7.047490119934082, "learning_rate": 3.89446045439648e-06, "loss": 0.31860876, "memory(GiB)": 34.88, "step": 87525, "train_speed(iter/s)": 0.412214 }, { "acc": 0.91767159, "epoch": 2.369967237971462, "grad_norm": 6.402816295623779, "learning_rate": 3.893914762724255e-06, "loss": 0.46312561, "memory(GiB)": 34.88, "step": 87530, "train_speed(iter/s)": 0.412216 }, { "acc": 0.93943758, "epoch": 2.3701026182546774, "grad_norm": 11.88092041015625, "learning_rate": 3.893369084914186e-06, "loss": 0.38228068, "memory(GiB)": 34.88, "step": 87535, "train_speed(iter/s)": 0.412217 }, { "acc": 0.93063269, "epoch": 2.3702379985378927, "grad_norm": 10.617192268371582, "learning_rate": 3.892823420973105e-06, "loss": 0.40781479, "memory(GiB)": 34.88, "step": 87540, "train_speed(iter/s)": 0.412218 }, { "acc": 0.92685709, "epoch": 2.3703733788211085, "grad_norm": 5.064770221710205, "learning_rate": 3.892277770907852e-06, "loss": 0.31690502, "memory(GiB)": 34.88, "step": 87545, "train_speed(iter/s)": 0.412219 }, { "acc": 0.92152262, "epoch": 2.370508759104324, "grad_norm": 10.562774658203125, "learning_rate": 3.891732134725259e-06, "loss": 0.43986459, "memory(GiB)": 34.88, "step": 87550, "train_speed(iter/s)": 0.41222 }, { "acc": 0.93296013, "epoch": 2.3706441393875397, "grad_norm": 5.116175174713135, "learning_rate": 3.8911865124321625e-06, "loss": 0.34011796, "memory(GiB)": 34.88, "step": 87555, "train_speed(iter/s)": 0.412221 }, { "acc": 0.93373623, "epoch": 2.370779519670755, "grad_norm": 3.1108295917510986, "learning_rate": 3.890640904035397e-06, "loss": 0.31082911, "memory(GiB)": 34.88, "step": 87560, "train_speed(iter/s)": 0.412223 }, { "acc": 0.90524054, "epoch": 2.370914899953971, "grad_norm": 42.2445068359375, "learning_rate": 3.890095309541798e-06, "loss": 0.5460433, "memory(GiB)": 34.88, "step": 87565, "train_speed(iter/s)": 0.412224 }, { "acc": 0.92543497, "epoch": 2.3710502802371862, "grad_norm": 24.73545265197754, "learning_rate": 3.8895497289582e-06, "loss": 0.39765487, "memory(GiB)": 34.88, "step": 87570, "train_speed(iter/s)": 0.412225 }, { "acc": 0.94334087, "epoch": 2.3711856605204016, "grad_norm": 4.747035026550293, "learning_rate": 3.889004162291436e-06, "loss": 0.32540197, "memory(GiB)": 34.88, "step": 87575, "train_speed(iter/s)": 0.412226 }, { "acc": 0.92602291, "epoch": 2.3713210408036174, "grad_norm": 9.659734725952148, "learning_rate": 3.888458609548342e-06, "loss": 0.36981823, "memory(GiB)": 34.88, "step": 87580, "train_speed(iter/s)": 0.412227 }, { "acc": 0.92106438, "epoch": 2.3714564210868327, "grad_norm": 18.91372299194336, "learning_rate": 3.887913070735751e-06, "loss": 0.43504558, "memory(GiB)": 34.88, "step": 87585, "train_speed(iter/s)": 0.412229 }, { "acc": 0.92849512, "epoch": 2.3715918013700485, "grad_norm": 6.691670894622803, "learning_rate": 3.887367545860498e-06, "loss": 0.42930117, "memory(GiB)": 34.88, "step": 87590, "train_speed(iter/s)": 0.41223 }, { "acc": 0.9238451, "epoch": 2.371727181653264, "grad_norm": 26.588239669799805, "learning_rate": 3.886822034929418e-06, "loss": 0.37959838, "memory(GiB)": 34.88, "step": 87595, "train_speed(iter/s)": 0.412231 }, { "acc": 0.92136383, "epoch": 2.3718625619364797, "grad_norm": 7.2556352615356445, "learning_rate": 3.88627653794934e-06, "loss": 0.44584465, "memory(GiB)": 34.88, "step": 87600, "train_speed(iter/s)": 0.412232 }, { "acc": 0.92471619, "epoch": 2.371997942219695, "grad_norm": 7.10421895980835, "learning_rate": 3.885731054927103e-06, "loss": 0.40523872, "memory(GiB)": 34.88, "step": 87605, "train_speed(iter/s)": 0.412234 }, { "acc": 0.9175745, "epoch": 2.372133322502911, "grad_norm": 14.75439167022705, "learning_rate": 3.885185585869535e-06, "loss": 0.45764112, "memory(GiB)": 34.88, "step": 87610, "train_speed(iter/s)": 0.412235 }, { "acc": 0.92550192, "epoch": 2.3722687027861262, "grad_norm": 4.231979846954346, "learning_rate": 3.884640130783473e-06, "loss": 0.45086126, "memory(GiB)": 34.88, "step": 87615, "train_speed(iter/s)": 0.412236 }, { "acc": 0.92533979, "epoch": 2.372404083069342, "grad_norm": 6.478976726531982, "learning_rate": 3.884094689675748e-06, "loss": 0.45167694, "memory(GiB)": 34.88, "step": 87620, "train_speed(iter/s)": 0.412237 }, { "acc": 0.93276501, "epoch": 2.3725394633525574, "grad_norm": 13.056126594543457, "learning_rate": 3.8835492625531925e-06, "loss": 0.36552103, "memory(GiB)": 34.88, "step": 87625, "train_speed(iter/s)": 0.412238 }, { "acc": 0.92741785, "epoch": 2.3726748436357727, "grad_norm": 7.952868938446045, "learning_rate": 3.883003849422641e-06, "loss": 0.42828426, "memory(GiB)": 34.88, "step": 87630, "train_speed(iter/s)": 0.41224 }, { "acc": 0.92179203, "epoch": 2.3728102239189885, "grad_norm": 8.18447208404541, "learning_rate": 3.882458450290925e-06, "loss": 0.49840846, "memory(GiB)": 34.88, "step": 87635, "train_speed(iter/s)": 0.412241 }, { "acc": 0.92202053, "epoch": 2.372945604202204, "grad_norm": 14.489428520202637, "learning_rate": 3.881913065164877e-06, "loss": 0.46158142, "memory(GiB)": 34.88, "step": 87640, "train_speed(iter/s)": 0.412242 }, { "acc": 0.93120461, "epoch": 2.3730809844854197, "grad_norm": 5.4040656089782715, "learning_rate": 3.8813676940513265e-06, "loss": 0.37254913, "memory(GiB)": 34.88, "step": 87645, "train_speed(iter/s)": 0.412243 }, { "acc": 0.93013897, "epoch": 2.373216364768635, "grad_norm": 5.633750915527344, "learning_rate": 3.880822336957108e-06, "loss": 0.40292988, "memory(GiB)": 34.88, "step": 87650, "train_speed(iter/s)": 0.412245 }, { "acc": 0.91385593, "epoch": 2.373351745051851, "grad_norm": 4.163500785827637, "learning_rate": 3.880276993889052e-06, "loss": 0.4879611, "memory(GiB)": 34.88, "step": 87655, "train_speed(iter/s)": 0.412246 }, { "acc": 0.93076382, "epoch": 2.3734871253350662, "grad_norm": 5.800814151763916, "learning_rate": 3.8797316648539905e-06, "loss": 0.37875905, "memory(GiB)": 34.88, "step": 87660, "train_speed(iter/s)": 0.412247 }, { "acc": 0.92596741, "epoch": 2.3736225056182816, "grad_norm": 5.565979480743408, "learning_rate": 3.8791863498587554e-06, "loss": 0.38028197, "memory(GiB)": 34.88, "step": 87665, "train_speed(iter/s)": 0.412248 }, { "acc": 0.93513498, "epoch": 2.3737578859014974, "grad_norm": 4.695101737976074, "learning_rate": 3.878641048910176e-06, "loss": 0.39924603, "memory(GiB)": 34.88, "step": 87670, "train_speed(iter/s)": 0.412249 }, { "acc": 0.93058462, "epoch": 2.3738932661847127, "grad_norm": 11.524808883666992, "learning_rate": 3.8780957620150855e-06, "loss": 0.41689639, "memory(GiB)": 34.88, "step": 87675, "train_speed(iter/s)": 0.41225 }, { "acc": 0.92786045, "epoch": 2.3740286464679285, "grad_norm": 7.14307165145874, "learning_rate": 3.877550489180313e-06, "loss": 0.35665984, "memory(GiB)": 34.88, "step": 87680, "train_speed(iter/s)": 0.412252 }, { "acc": 0.9142025, "epoch": 2.374164026751144, "grad_norm": 6.831945896148682, "learning_rate": 3.8770052304126906e-06, "loss": 0.60283318, "memory(GiB)": 34.88, "step": 87685, "train_speed(iter/s)": 0.412253 }, { "acc": 0.9263031, "epoch": 2.3742994070343597, "grad_norm": 7.8369669914245605, "learning_rate": 3.876459985719046e-06, "loss": 0.4180541, "memory(GiB)": 34.88, "step": 87690, "train_speed(iter/s)": 0.412254 }, { "acc": 0.94028769, "epoch": 2.374434787317575, "grad_norm": 2.1146481037139893, "learning_rate": 3.8759147551062136e-06, "loss": 0.36010551, "memory(GiB)": 34.88, "step": 87695, "train_speed(iter/s)": 0.412255 }, { "acc": 0.91962643, "epoch": 2.3745701676007904, "grad_norm": 10.423402786254883, "learning_rate": 3.875369538581019e-06, "loss": 0.45904899, "memory(GiB)": 34.88, "step": 87700, "train_speed(iter/s)": 0.412257 }, { "acc": 0.9337698, "epoch": 2.3747055478840062, "grad_norm": 5.263685703277588, "learning_rate": 3.874824336150296e-06, "loss": 0.32746129, "memory(GiB)": 34.88, "step": 87705, "train_speed(iter/s)": 0.412258 }, { "acc": 0.93073349, "epoch": 2.3748409281672216, "grad_norm": 4.899001121520996, "learning_rate": 3.8742791478208725e-06, "loss": 0.32722569, "memory(GiB)": 34.88, "step": 87710, "train_speed(iter/s)": 0.412259 }, { "acc": 0.93256264, "epoch": 2.3749763084504374, "grad_norm": 10.9264497756958, "learning_rate": 3.873733973599577e-06, "loss": 0.3946085, "memory(GiB)": 34.88, "step": 87715, "train_speed(iter/s)": 0.41226 }, { "acc": 0.92692013, "epoch": 2.3751116887336527, "grad_norm": 6.866735458374023, "learning_rate": 3.873188813493242e-06, "loss": 0.40120554, "memory(GiB)": 34.88, "step": 87720, "train_speed(iter/s)": 0.412262 }, { "acc": 0.91450224, "epoch": 2.3752470690168686, "grad_norm": 9.74403190612793, "learning_rate": 3.872643667508693e-06, "loss": 0.50432463, "memory(GiB)": 34.88, "step": 87725, "train_speed(iter/s)": 0.412263 }, { "acc": 0.91740551, "epoch": 2.375382449300084, "grad_norm": 4.319198131561279, "learning_rate": 3.872098535652762e-06, "loss": 0.43062344, "memory(GiB)": 34.88, "step": 87730, "train_speed(iter/s)": 0.412264 }, { "acc": 0.90764103, "epoch": 2.3755178295832993, "grad_norm": 9.423735618591309, "learning_rate": 3.871553417932275e-06, "loss": 0.52793493, "memory(GiB)": 34.88, "step": 87735, "train_speed(iter/s)": 0.412265 }, { "acc": 0.94068832, "epoch": 2.375653209866515, "grad_norm": 3.9534285068511963, "learning_rate": 3.871008314354063e-06, "loss": 0.38518858, "memory(GiB)": 34.88, "step": 87740, "train_speed(iter/s)": 0.412267 }, { "acc": 0.93641911, "epoch": 2.3757885901497304, "grad_norm": 6.175446510314941, "learning_rate": 3.870463224924955e-06, "loss": 0.34334087, "memory(GiB)": 34.88, "step": 87745, "train_speed(iter/s)": 0.412268 }, { "acc": 0.9320591, "epoch": 2.3759239704329462, "grad_norm": 8.44091796875, "learning_rate": 3.869918149651776e-06, "loss": 0.43959551, "memory(GiB)": 34.88, "step": 87750, "train_speed(iter/s)": 0.412269 }, { "acc": 0.91482868, "epoch": 2.3760593507161616, "grad_norm": 10.199763298034668, "learning_rate": 3.869373088541359e-06, "loss": 0.58136001, "memory(GiB)": 34.88, "step": 87755, "train_speed(iter/s)": 0.41227 }, { "acc": 0.9307663, "epoch": 2.3761947309993774, "grad_norm": 9.001466751098633, "learning_rate": 3.868828041600528e-06, "loss": 0.47440605, "memory(GiB)": 34.88, "step": 87760, "train_speed(iter/s)": 0.412271 }, { "acc": 0.92216282, "epoch": 2.3763301112825927, "grad_norm": 4.294707298278809, "learning_rate": 3.868283008836111e-06, "loss": 0.40171733, "memory(GiB)": 34.88, "step": 87765, "train_speed(iter/s)": 0.412272 }, { "acc": 0.93002825, "epoch": 2.376465491565808, "grad_norm": 4.660351276397705, "learning_rate": 3.867737990254938e-06, "loss": 0.3957134, "memory(GiB)": 34.88, "step": 87770, "train_speed(iter/s)": 0.412273 }, { "acc": 0.94294357, "epoch": 2.376600871849024, "grad_norm": 5.427504062652588, "learning_rate": 3.867192985863833e-06, "loss": 0.30326128, "memory(GiB)": 34.88, "step": 87775, "train_speed(iter/s)": 0.412275 }, { "acc": 0.93338461, "epoch": 2.3767362521322393, "grad_norm": 5.232324600219727, "learning_rate": 3.866647995669627e-06, "loss": 0.41141319, "memory(GiB)": 34.88, "step": 87780, "train_speed(iter/s)": 0.412276 }, { "acc": 0.94083385, "epoch": 2.376871632415455, "grad_norm": 4.308553218841553, "learning_rate": 3.866103019679145e-06, "loss": 0.291995, "memory(GiB)": 34.88, "step": 87785, "train_speed(iter/s)": 0.412277 }, { "acc": 0.93670549, "epoch": 2.3770070126986704, "grad_norm": 10.747136116027832, "learning_rate": 3.865558057899215e-06, "loss": 0.42876062, "memory(GiB)": 34.88, "step": 87790, "train_speed(iter/s)": 0.412278 }, { "acc": 0.93058491, "epoch": 2.3771423929818862, "grad_norm": 7.135274410247803, "learning_rate": 3.8650131103366615e-06, "loss": 0.44050627, "memory(GiB)": 34.88, "step": 87795, "train_speed(iter/s)": 0.412279 }, { "acc": 0.92692871, "epoch": 2.3772777732651016, "grad_norm": 6.856821537017822, "learning_rate": 3.8644681769983145e-06, "loss": 0.4229744, "memory(GiB)": 34.88, "step": 87800, "train_speed(iter/s)": 0.41228 }, { "acc": 0.91613302, "epoch": 2.3774131535483174, "grad_norm": 8.080512046813965, "learning_rate": 3.863923257890998e-06, "loss": 0.52004013, "memory(GiB)": 34.88, "step": 87805, "train_speed(iter/s)": 0.412282 }, { "acc": 0.92570982, "epoch": 2.3775485338315328, "grad_norm": 12.7193603515625, "learning_rate": 3.8633783530215365e-06, "loss": 0.40151424, "memory(GiB)": 34.88, "step": 87810, "train_speed(iter/s)": 0.412283 }, { "acc": 0.93217907, "epoch": 2.3776839141147486, "grad_norm": 4.7564377784729, "learning_rate": 3.86283346239676e-06, "loss": 0.35464497, "memory(GiB)": 34.88, "step": 87815, "train_speed(iter/s)": 0.412284 }, { "acc": 0.92390957, "epoch": 2.377819294397964, "grad_norm": 6.537553310394287, "learning_rate": 3.862288586023491e-06, "loss": 0.47210841, "memory(GiB)": 34.88, "step": 87820, "train_speed(iter/s)": 0.412285 }, { "acc": 0.92754745, "epoch": 2.3779546746811793, "grad_norm": 9.83264446258545, "learning_rate": 3.861743723908557e-06, "loss": 0.3887018, "memory(GiB)": 34.88, "step": 87825, "train_speed(iter/s)": 0.412286 }, { "acc": 0.92798138, "epoch": 2.378090054964395, "grad_norm": 6.12949275970459, "learning_rate": 3.861198876058783e-06, "loss": 0.40441389, "memory(GiB)": 34.88, "step": 87830, "train_speed(iter/s)": 0.412288 }, { "acc": 0.93304996, "epoch": 2.3782254352476104, "grad_norm": 11.754555702209473, "learning_rate": 3.860654042480995e-06, "loss": 0.35882869, "memory(GiB)": 34.88, "step": 87835, "train_speed(iter/s)": 0.412289 }, { "acc": 0.9205966, "epoch": 2.3783608155308262, "grad_norm": 27.821918487548828, "learning_rate": 3.860109223182017e-06, "loss": 0.4272881, "memory(GiB)": 34.88, "step": 87840, "train_speed(iter/s)": 0.41229 }, { "acc": 0.91853085, "epoch": 2.3784961958140416, "grad_norm": 9.181031227111816, "learning_rate": 3.8595644181686745e-06, "loss": 0.48960676, "memory(GiB)": 34.88, "step": 87845, "train_speed(iter/s)": 0.412291 }, { "acc": 0.92465477, "epoch": 2.3786315760972574, "grad_norm": 6.836771488189697, "learning_rate": 3.859019627447792e-06, "loss": 0.42989807, "memory(GiB)": 34.88, "step": 87850, "train_speed(iter/s)": 0.412292 }, { "acc": 0.92861691, "epoch": 2.3787669563804728, "grad_norm": 7.3487725257873535, "learning_rate": 3.8584748510261936e-06, "loss": 0.33802152, "memory(GiB)": 34.88, "step": 87855, "train_speed(iter/s)": 0.412293 }, { "acc": 0.93381243, "epoch": 2.378902336663688, "grad_norm": 7.856532573699951, "learning_rate": 3.857930088910706e-06, "loss": 0.44509735, "memory(GiB)": 34.88, "step": 87860, "train_speed(iter/s)": 0.412294 }, { "acc": 0.93332644, "epoch": 2.379037716946904, "grad_norm": 6.963032245635986, "learning_rate": 3.857385341108151e-06, "loss": 0.32866573, "memory(GiB)": 34.88, "step": 87865, "train_speed(iter/s)": 0.412295 }, { "acc": 0.93970881, "epoch": 2.3791730972301193, "grad_norm": 6.099133014678955, "learning_rate": 3.856840607625352e-06, "loss": 0.32129583, "memory(GiB)": 34.88, "step": 87870, "train_speed(iter/s)": 0.412296 }, { "acc": 0.94500227, "epoch": 2.379308477513335, "grad_norm": 8.58577823638916, "learning_rate": 3.8562958884691344e-06, "loss": 0.25973477, "memory(GiB)": 34.88, "step": 87875, "train_speed(iter/s)": 0.412297 }, { "acc": 0.92990665, "epoch": 2.3794438577965504, "grad_norm": 16.670127868652344, "learning_rate": 3.855751183646323e-06, "loss": 0.38718102, "memory(GiB)": 34.88, "step": 87880, "train_speed(iter/s)": 0.412298 }, { "acc": 0.90515213, "epoch": 2.3795792380797662, "grad_norm": 11.05044937133789, "learning_rate": 3.855206493163739e-06, "loss": 0.564252, "memory(GiB)": 34.88, "step": 87885, "train_speed(iter/s)": 0.4123 }, { "acc": 0.91368446, "epoch": 2.3797146183629816, "grad_norm": 7.022197723388672, "learning_rate": 3.854661817028207e-06, "loss": 0.42815905, "memory(GiB)": 34.88, "step": 87890, "train_speed(iter/s)": 0.412301 }, { "acc": 0.91436224, "epoch": 2.379849998646197, "grad_norm": 8.639379501342773, "learning_rate": 3.85411715524655e-06, "loss": 0.52755203, "memory(GiB)": 34.88, "step": 87895, "train_speed(iter/s)": 0.412302 }, { "acc": 0.91991386, "epoch": 2.3799853789294128, "grad_norm": 7.192159652709961, "learning_rate": 3.85357250782559e-06, "loss": 0.49236078, "memory(GiB)": 34.88, "step": 87900, "train_speed(iter/s)": 0.412303 }, { "acc": 0.92965851, "epoch": 2.380120759212628, "grad_norm": 28.788808822631836, "learning_rate": 3.853027874772151e-06, "loss": 0.3537668, "memory(GiB)": 34.88, "step": 87905, "train_speed(iter/s)": 0.412304 }, { "acc": 0.94061384, "epoch": 2.380256139495844, "grad_norm": 4.2774882316589355, "learning_rate": 3.852483256093054e-06, "loss": 0.26840761, "memory(GiB)": 34.88, "step": 87910, "train_speed(iter/s)": 0.412305 }, { "acc": 0.94194431, "epoch": 2.3803915197790593, "grad_norm": 5.082289218902588, "learning_rate": 3.851938651795125e-06, "loss": 0.29353118, "memory(GiB)": 34.88, "step": 87915, "train_speed(iter/s)": 0.412307 }, { "acc": 0.93369236, "epoch": 2.380526900062275, "grad_norm": 7.104426383972168, "learning_rate": 3.851394061885182e-06, "loss": 0.4047606, "memory(GiB)": 34.88, "step": 87920, "train_speed(iter/s)": 0.412308 }, { "acc": 0.91385765, "epoch": 2.3806622803454904, "grad_norm": 11.137173652648926, "learning_rate": 3.850849486370048e-06, "loss": 0.53944778, "memory(GiB)": 34.88, "step": 87925, "train_speed(iter/s)": 0.412309 }, { "acc": 0.92343397, "epoch": 2.380797660628706, "grad_norm": 9.543523788452148, "learning_rate": 3.850304925256549e-06, "loss": 0.42861209, "memory(GiB)": 34.88, "step": 87930, "train_speed(iter/s)": 0.41231 }, { "acc": 0.93245583, "epoch": 2.3809330409119216, "grad_norm": 18.05474281311035, "learning_rate": 3.8497603785515015e-06, "loss": 0.36053655, "memory(GiB)": 34.88, "step": 87935, "train_speed(iter/s)": 0.412311 }, { "acc": 0.94431801, "epoch": 2.381068421195137, "grad_norm": 6.8946027755737305, "learning_rate": 3.849215846261729e-06, "loss": 0.30321639, "memory(GiB)": 34.88, "step": 87940, "train_speed(iter/s)": 0.412312 }, { "acc": 0.90869474, "epoch": 2.3812038014783528, "grad_norm": 4.284924030303955, "learning_rate": 3.848671328394052e-06, "loss": 0.46126571, "memory(GiB)": 34.88, "step": 87945, "train_speed(iter/s)": 0.412314 }, { "acc": 0.93004713, "epoch": 2.381339181761568, "grad_norm": 11.970687866210938, "learning_rate": 3.848126824955294e-06, "loss": 0.4191926, "memory(GiB)": 34.88, "step": 87950, "train_speed(iter/s)": 0.412314 }, { "acc": 0.92209244, "epoch": 2.381474562044784, "grad_norm": 6.811137676239014, "learning_rate": 3.847582335952273e-06, "loss": 0.42449589, "memory(GiB)": 34.88, "step": 87955, "train_speed(iter/s)": 0.412316 }, { "acc": 0.92986145, "epoch": 2.3816099423279993, "grad_norm": 56.06676483154297, "learning_rate": 3.847037861391811e-06, "loss": 0.43232427, "memory(GiB)": 34.88, "step": 87960, "train_speed(iter/s)": 0.412317 }, { "acc": 0.9089674, "epoch": 2.381745322611215, "grad_norm": 9.864705085754395, "learning_rate": 3.84649340128073e-06, "loss": 0.56590562, "memory(GiB)": 34.88, "step": 87965, "train_speed(iter/s)": 0.412318 }, { "acc": 0.92585487, "epoch": 2.3818807028944304, "grad_norm": 4.490232467651367, "learning_rate": 3.84594895562585e-06, "loss": 0.43588939, "memory(GiB)": 34.88, "step": 87970, "train_speed(iter/s)": 0.412319 }, { "acc": 0.93016558, "epoch": 2.3820160831776462, "grad_norm": 5.219801902770996, "learning_rate": 3.84540452443399e-06, "loss": 0.3388567, "memory(GiB)": 34.88, "step": 87975, "train_speed(iter/s)": 0.412321 }, { "acc": 0.9197319, "epoch": 2.3821514634608616, "grad_norm": 8.697186470031738, "learning_rate": 3.84486010771197e-06, "loss": 0.40566683, "memory(GiB)": 34.88, "step": 87980, "train_speed(iter/s)": 0.412322 }, { "acc": 0.92584467, "epoch": 2.382286843744077, "grad_norm": 6.7551140785217285, "learning_rate": 3.8443157054666104e-06, "loss": 0.38246846, "memory(GiB)": 34.88, "step": 87985, "train_speed(iter/s)": 0.412323 }, { "acc": 0.93528252, "epoch": 2.3824222240272928, "grad_norm": 8.45412540435791, "learning_rate": 3.843771317704731e-06, "loss": 0.39172122, "memory(GiB)": 34.88, "step": 87990, "train_speed(iter/s)": 0.412325 }, { "acc": 0.9467474, "epoch": 2.382557604310508, "grad_norm": 3.2619574069976807, "learning_rate": 3.8432269444331525e-06, "loss": 0.2998105, "memory(GiB)": 34.88, "step": 87995, "train_speed(iter/s)": 0.412326 }, { "acc": 0.92114792, "epoch": 2.382692984593724, "grad_norm": 6.957308769226074, "learning_rate": 3.84268258565869e-06, "loss": 0.41659193, "memory(GiB)": 34.88, "step": 88000, "train_speed(iter/s)": 0.412327 }, { "acc": 0.94019642, "epoch": 2.3828283648769393, "grad_norm": 6.36598014831543, "learning_rate": 3.842138241388168e-06, "loss": 0.35754061, "memory(GiB)": 34.88, "step": 88005, "train_speed(iter/s)": 0.412328 }, { "acc": 0.92192078, "epoch": 2.382963745160155, "grad_norm": 7.935429096221924, "learning_rate": 3.841593911628404e-06, "loss": 0.43982782, "memory(GiB)": 34.88, "step": 88010, "train_speed(iter/s)": 0.412329 }, { "acc": 0.91652527, "epoch": 2.3830991254433704, "grad_norm": 10.802909851074219, "learning_rate": 3.841049596386214e-06, "loss": 0.4610877, "memory(GiB)": 34.88, "step": 88015, "train_speed(iter/s)": 0.41233 }, { "acc": 0.90929337, "epoch": 2.383234505726586, "grad_norm": 27.33589744567871, "learning_rate": 3.840505295668421e-06, "loss": 0.49532175, "memory(GiB)": 34.88, "step": 88020, "train_speed(iter/s)": 0.412332 }, { "acc": 0.92619247, "epoch": 2.3833698860098016, "grad_norm": 6.579528331756592, "learning_rate": 3.839961009481839e-06, "loss": 0.39796944, "memory(GiB)": 34.88, "step": 88025, "train_speed(iter/s)": 0.412333 }, { "acc": 0.93070097, "epoch": 2.383505266293017, "grad_norm": 6.818907737731934, "learning_rate": 3.839416737833289e-06, "loss": 0.31323359, "memory(GiB)": 34.88, "step": 88030, "train_speed(iter/s)": 0.412334 }, { "acc": 0.92921677, "epoch": 2.3836406465762328, "grad_norm": 6.67853307723999, "learning_rate": 3.838872480729588e-06, "loss": 0.37503476, "memory(GiB)": 34.88, "step": 88035, "train_speed(iter/s)": 0.412335 }, { "acc": 0.93635502, "epoch": 2.383776026859448, "grad_norm": 10.573799133300781, "learning_rate": 3.838328238177554e-06, "loss": 0.3054322, "memory(GiB)": 34.88, "step": 88040, "train_speed(iter/s)": 0.412336 }, { "acc": 0.9293993, "epoch": 2.383911407142664, "grad_norm": 7.5695929527282715, "learning_rate": 3.837784010184006e-06, "loss": 0.40737309, "memory(GiB)": 34.88, "step": 88045, "train_speed(iter/s)": 0.412338 }, { "acc": 0.92620525, "epoch": 2.3840467874258793, "grad_norm": 6.544119358062744, "learning_rate": 3.8372397967557595e-06, "loss": 0.38072264, "memory(GiB)": 34.88, "step": 88050, "train_speed(iter/s)": 0.412339 }, { "acc": 0.92875271, "epoch": 2.3841821677090946, "grad_norm": 6.183566093444824, "learning_rate": 3.8366955978996354e-06, "loss": 0.37655282, "memory(GiB)": 34.88, "step": 88055, "train_speed(iter/s)": 0.41234 }, { "acc": 0.92537003, "epoch": 2.3843175479923104, "grad_norm": 17.848047256469727, "learning_rate": 3.836151413622446e-06, "loss": 0.45112772, "memory(GiB)": 34.88, "step": 88060, "train_speed(iter/s)": 0.412341 }, { "acc": 0.93069057, "epoch": 2.384452928275526, "grad_norm": 4.698173999786377, "learning_rate": 3.835607243931011e-06, "loss": 0.38971567, "memory(GiB)": 34.88, "step": 88065, "train_speed(iter/s)": 0.412342 }, { "acc": 0.92922382, "epoch": 2.3845883085587416, "grad_norm": 4.997311592102051, "learning_rate": 3.835063088832146e-06, "loss": 0.40705872, "memory(GiB)": 34.88, "step": 88070, "train_speed(iter/s)": 0.412343 }, { "acc": 0.92944775, "epoch": 2.384723688841957, "grad_norm": 18.767194747924805, "learning_rate": 3.834518948332668e-06, "loss": 0.43723669, "memory(GiB)": 34.88, "step": 88075, "train_speed(iter/s)": 0.412345 }, { "acc": 0.93390474, "epoch": 2.3848590691251728, "grad_norm": 5.170001029968262, "learning_rate": 3.833974822439395e-06, "loss": 0.38751383, "memory(GiB)": 34.88, "step": 88080, "train_speed(iter/s)": 0.412346 }, { "acc": 0.93165703, "epoch": 2.384994449408388, "grad_norm": 6.807178974151611, "learning_rate": 3.833430711159142e-06, "loss": 0.36829324, "memory(GiB)": 34.88, "step": 88085, "train_speed(iter/s)": 0.412347 }, { "acc": 0.91434278, "epoch": 2.3851298296916035, "grad_norm": 9.6618013381958, "learning_rate": 3.8328866144987255e-06, "loss": 0.46741915, "memory(GiB)": 34.88, "step": 88090, "train_speed(iter/s)": 0.412348 }, { "acc": 0.91918392, "epoch": 2.3852652099748193, "grad_norm": 7.193467617034912, "learning_rate": 3.83234253246496e-06, "loss": 0.43278751, "memory(GiB)": 34.88, "step": 88095, "train_speed(iter/s)": 0.412349 }, { "acc": 0.93173456, "epoch": 2.3854005902580346, "grad_norm": 5.203069686889648, "learning_rate": 3.831798465064664e-06, "loss": 0.35710647, "memory(GiB)": 34.88, "step": 88100, "train_speed(iter/s)": 0.41235 }, { "acc": 0.92323341, "epoch": 2.3855359705412504, "grad_norm": 5.494447708129883, "learning_rate": 3.83125441230465e-06, "loss": 0.40723238, "memory(GiB)": 34.88, "step": 88105, "train_speed(iter/s)": 0.412352 }, { "acc": 0.9263896, "epoch": 2.385671350824466, "grad_norm": 10.091010093688965, "learning_rate": 3.8307103741917345e-06, "loss": 0.38752654, "memory(GiB)": 34.88, "step": 88110, "train_speed(iter/s)": 0.412353 }, { "acc": 0.94009819, "epoch": 2.3858067311076816, "grad_norm": 11.571088790893555, "learning_rate": 3.830166350732734e-06, "loss": 0.33692727, "memory(GiB)": 34.88, "step": 88115, "train_speed(iter/s)": 0.412354 }, { "acc": 0.91825418, "epoch": 2.385942111390897, "grad_norm": 7.867942810058594, "learning_rate": 3.82962234193446e-06, "loss": 0.5100605, "memory(GiB)": 34.88, "step": 88120, "train_speed(iter/s)": 0.412355 }, { "acc": 0.93353729, "epoch": 2.3860774916741128, "grad_norm": 5.1303911209106445, "learning_rate": 3.829078347803731e-06, "loss": 0.36330509, "memory(GiB)": 34.88, "step": 88125, "train_speed(iter/s)": 0.412356 }, { "acc": 0.93802757, "epoch": 2.386212871957328, "grad_norm": 6.059051990509033, "learning_rate": 3.828534368347358e-06, "loss": 0.32952271, "memory(GiB)": 34.88, "step": 88130, "train_speed(iter/s)": 0.412358 }, { "acc": 0.92062922, "epoch": 2.386348252240544, "grad_norm": 4.301563262939453, "learning_rate": 3.82799040357216e-06, "loss": 0.3956965, "memory(GiB)": 34.88, "step": 88135, "train_speed(iter/s)": 0.412359 }, { "acc": 0.91571169, "epoch": 2.3864836325237593, "grad_norm": 8.048304557800293, "learning_rate": 3.827446453484947e-06, "loss": 0.54324341, "memory(GiB)": 34.88, "step": 88140, "train_speed(iter/s)": 0.41236 }, { "acc": 0.91806688, "epoch": 2.3866190128069746, "grad_norm": 6.220882415771484, "learning_rate": 3.826902518092535e-06, "loss": 0.47843304, "memory(GiB)": 34.88, "step": 88145, "train_speed(iter/s)": 0.412361 }, { "acc": 0.92661963, "epoch": 2.3867543930901904, "grad_norm": 7.256386756896973, "learning_rate": 3.826358597401738e-06, "loss": 0.45577545, "memory(GiB)": 34.88, "step": 88150, "train_speed(iter/s)": 0.412362 }, { "acc": 0.92249622, "epoch": 2.386889773373406, "grad_norm": 7.276519298553467, "learning_rate": 3.825814691419369e-06, "loss": 0.48195271, "memory(GiB)": 34.88, "step": 88155, "train_speed(iter/s)": 0.412363 }, { "acc": 0.93500614, "epoch": 2.3870251536566216, "grad_norm": 3.3475818634033203, "learning_rate": 3.825270800152243e-06, "loss": 0.34920304, "memory(GiB)": 34.88, "step": 88160, "train_speed(iter/s)": 0.412364 }, { "acc": 0.93604088, "epoch": 2.387160533939837, "grad_norm": 8.602143287658691, "learning_rate": 3.82472692360717e-06, "loss": 0.315343, "memory(GiB)": 34.88, "step": 88165, "train_speed(iter/s)": 0.412366 }, { "acc": 0.93239822, "epoch": 2.3872959142230528, "grad_norm": 5.403038024902344, "learning_rate": 3.824183061790966e-06, "loss": 0.35122364, "memory(GiB)": 34.88, "step": 88170, "train_speed(iter/s)": 0.412367 }, { "acc": 0.92656746, "epoch": 2.387431294506268, "grad_norm": 9.930964469909668, "learning_rate": 3.823639214710443e-06, "loss": 0.39996386, "memory(GiB)": 34.88, "step": 88175, "train_speed(iter/s)": 0.412368 }, { "acc": 0.9172019, "epoch": 2.3875666747894835, "grad_norm": 11.628381729125977, "learning_rate": 3.823095382372416e-06, "loss": 0.44697723, "memory(GiB)": 34.88, "step": 88180, "train_speed(iter/s)": 0.412369 }, { "acc": 0.94151535, "epoch": 2.3877020550726993, "grad_norm": 11.874483108520508, "learning_rate": 3.822551564783692e-06, "loss": 0.35883489, "memory(GiB)": 34.88, "step": 88185, "train_speed(iter/s)": 0.412371 }, { "acc": 0.92541924, "epoch": 2.3878374353559146, "grad_norm": 45.26652145385742, "learning_rate": 3.822007761951088e-06, "loss": 0.52658157, "memory(GiB)": 34.88, "step": 88190, "train_speed(iter/s)": 0.412372 }, { "acc": 0.91830187, "epoch": 2.3879728156391304, "grad_norm": 9.937442779541016, "learning_rate": 3.821463973881416e-06, "loss": 0.39749703, "memory(GiB)": 34.88, "step": 88195, "train_speed(iter/s)": 0.412373 }, { "acc": 0.93320217, "epoch": 2.388108195922346, "grad_norm": 9.444025039672852, "learning_rate": 3.820920200581485e-06, "loss": 0.33510675, "memory(GiB)": 34.88, "step": 88200, "train_speed(iter/s)": 0.412374 }, { "acc": 0.9239934, "epoch": 2.3882435762055616, "grad_norm": 5.875157833099365, "learning_rate": 3.820376442058111e-06, "loss": 0.4228138, "memory(GiB)": 34.88, "step": 88205, "train_speed(iter/s)": 0.412375 }, { "acc": 0.93477612, "epoch": 2.388378956488777, "grad_norm": 3.836287260055542, "learning_rate": 3.8198326983181024e-06, "loss": 0.405899, "memory(GiB)": 34.88, "step": 88210, "train_speed(iter/s)": 0.412377 }, { "acc": 0.91514244, "epoch": 2.3885143367719923, "grad_norm": 13.521745681762695, "learning_rate": 3.819288969368272e-06, "loss": 0.49149261, "memory(GiB)": 34.88, "step": 88215, "train_speed(iter/s)": 0.412378 }, { "acc": 0.94992104, "epoch": 2.388649717055208, "grad_norm": 6.331371307373047, "learning_rate": 3.818745255215431e-06, "loss": 0.26244922, "memory(GiB)": 34.88, "step": 88220, "train_speed(iter/s)": 0.412379 }, { "acc": 0.90652723, "epoch": 2.3887850973384235, "grad_norm": 11.553555488586426, "learning_rate": 3.81820155586639e-06, "loss": 0.46549854, "memory(GiB)": 34.88, "step": 88225, "train_speed(iter/s)": 0.41238 }, { "acc": 0.91933212, "epoch": 2.3889204776216393, "grad_norm": 10.78231430053711, "learning_rate": 3.817657871327962e-06, "loss": 0.43129973, "memory(GiB)": 34.88, "step": 88230, "train_speed(iter/s)": 0.412381 }, { "acc": 0.94341717, "epoch": 2.3890558579048546, "grad_norm": 6.637536525726318, "learning_rate": 3.817114201606953e-06, "loss": 0.31242878, "memory(GiB)": 34.88, "step": 88235, "train_speed(iter/s)": 0.412382 }, { "acc": 0.92102699, "epoch": 2.3891912381880704, "grad_norm": 24.342376708984375, "learning_rate": 3.816570546710179e-06, "loss": 0.40939703, "memory(GiB)": 34.88, "step": 88240, "train_speed(iter/s)": 0.412383 }, { "acc": 0.91956873, "epoch": 2.389326618471286, "grad_norm": 8.936984062194824, "learning_rate": 3.816026906644446e-06, "loss": 0.53001609, "memory(GiB)": 34.88, "step": 88245, "train_speed(iter/s)": 0.412385 }, { "acc": 0.91724701, "epoch": 2.389461998754501, "grad_norm": 4.528936386108398, "learning_rate": 3.8154832814165665e-06, "loss": 0.46780934, "memory(GiB)": 34.88, "step": 88250, "train_speed(iter/s)": 0.412385 }, { "acc": 0.89271469, "epoch": 2.389597379037717, "grad_norm": 24.60126304626465, "learning_rate": 3.81493967103335e-06, "loss": 0.64463863, "memory(GiB)": 34.88, "step": 88255, "train_speed(iter/s)": 0.412386 }, { "acc": 0.92494812, "epoch": 2.3897327593209323, "grad_norm": 9.590812683105469, "learning_rate": 3.8143960755016063e-06, "loss": 0.40244656, "memory(GiB)": 34.88, "step": 88260, "train_speed(iter/s)": 0.412388 }, { "acc": 0.92434311, "epoch": 2.389868139604148, "grad_norm": 9.675573348999023, "learning_rate": 3.8138524948281457e-06, "loss": 0.36785481, "memory(GiB)": 34.88, "step": 88265, "train_speed(iter/s)": 0.412389 }, { "acc": 0.92671375, "epoch": 2.3900035198873635, "grad_norm": 1.9988977909088135, "learning_rate": 3.8133089290197758e-06, "loss": 0.40900731, "memory(GiB)": 34.88, "step": 88270, "train_speed(iter/s)": 0.41239 }, { "acc": 0.93687172, "epoch": 2.3901389001705793, "grad_norm": 6.551021575927734, "learning_rate": 3.8127653780833075e-06, "loss": 0.28704329, "memory(GiB)": 34.88, "step": 88275, "train_speed(iter/s)": 0.412391 }, { "acc": 0.90973921, "epoch": 2.3902742804537946, "grad_norm": 12.269123077392578, "learning_rate": 3.8122218420255486e-06, "loss": 0.52623496, "memory(GiB)": 34.88, "step": 88280, "train_speed(iter/s)": 0.412392 }, { "acc": 0.93358431, "epoch": 2.3904096607370104, "grad_norm": 5.1725945472717285, "learning_rate": 3.8116783208533095e-06, "loss": 0.40775242, "memory(GiB)": 34.88, "step": 88285, "train_speed(iter/s)": 0.412393 }, { "acc": 0.92533083, "epoch": 2.390545041020226, "grad_norm": 13.211758613586426, "learning_rate": 3.811134814573397e-06, "loss": 0.49857178, "memory(GiB)": 34.88, "step": 88290, "train_speed(iter/s)": 0.412394 }, { "acc": 0.94305744, "epoch": 2.3906804213034416, "grad_norm": 6.6746110916137695, "learning_rate": 3.810591323192621e-06, "loss": 0.32398875, "memory(GiB)": 34.88, "step": 88295, "train_speed(iter/s)": 0.412396 }, { "acc": 0.92111073, "epoch": 2.390815801586657, "grad_norm": 11.088809967041016, "learning_rate": 3.8100478467177885e-06, "loss": 0.49536948, "memory(GiB)": 34.88, "step": 88300, "train_speed(iter/s)": 0.412397 }, { "acc": 0.92002182, "epoch": 2.3909511818698723, "grad_norm": 9.892637252807617, "learning_rate": 3.809504385155709e-06, "loss": 0.47686148, "memory(GiB)": 34.88, "step": 88305, "train_speed(iter/s)": 0.412398 }, { "acc": 0.92640848, "epoch": 2.391086562153088, "grad_norm": 9.385174751281738, "learning_rate": 3.8089609385131917e-06, "loss": 0.45675073, "memory(GiB)": 34.88, "step": 88310, "train_speed(iter/s)": 0.412399 }, { "acc": 0.93939838, "epoch": 2.3912219424363035, "grad_norm": 27.976058959960938, "learning_rate": 3.8084175067970402e-06, "loss": 0.31215725, "memory(GiB)": 34.88, "step": 88315, "train_speed(iter/s)": 0.4124 }, { "acc": 0.92622004, "epoch": 2.3913573227195193, "grad_norm": 5.352383613586426, "learning_rate": 3.8078740900140652e-06, "loss": 0.47687187, "memory(GiB)": 34.88, "step": 88320, "train_speed(iter/s)": 0.412401 }, { "acc": 0.93111305, "epoch": 2.3914927030027346, "grad_norm": 13.725007057189941, "learning_rate": 3.807330688171073e-06, "loss": 0.41395783, "memory(GiB)": 34.88, "step": 88325, "train_speed(iter/s)": 0.412403 }, { "acc": 0.92480927, "epoch": 2.3916280832859504, "grad_norm": 6.664933204650879, "learning_rate": 3.806787301274871e-06, "loss": 0.41149907, "memory(GiB)": 34.88, "step": 88330, "train_speed(iter/s)": 0.412404 }, { "acc": 0.91666412, "epoch": 2.391763463569166, "grad_norm": 5.601443290710449, "learning_rate": 3.806243929332266e-06, "loss": 0.43651943, "memory(GiB)": 34.88, "step": 88335, "train_speed(iter/s)": 0.412405 }, { "acc": 0.91174498, "epoch": 2.391898843852381, "grad_norm": 5.628684043884277, "learning_rate": 3.8057005723500645e-06, "loss": 0.53268385, "memory(GiB)": 34.88, "step": 88340, "train_speed(iter/s)": 0.412406 }, { "acc": 0.91021299, "epoch": 2.392034224135597, "grad_norm": 7.033640384674072, "learning_rate": 3.8051572303350753e-06, "loss": 0.43575315, "memory(GiB)": 34.88, "step": 88345, "train_speed(iter/s)": 0.412408 }, { "acc": 0.93304539, "epoch": 2.3921696044188123, "grad_norm": 7.327761650085449, "learning_rate": 3.8046139032941026e-06, "loss": 0.39550877, "memory(GiB)": 34.88, "step": 88350, "train_speed(iter/s)": 0.412409 }, { "acc": 0.94030418, "epoch": 2.392304984702028, "grad_norm": 12.073944091796875, "learning_rate": 3.8040705912339533e-06, "loss": 0.31037393, "memory(GiB)": 34.88, "step": 88355, "train_speed(iter/s)": 0.41241 }, { "acc": 0.9236824, "epoch": 2.3924403649852435, "grad_norm": 9.88831901550293, "learning_rate": 3.803527294161433e-06, "loss": 0.47830586, "memory(GiB)": 34.88, "step": 88360, "train_speed(iter/s)": 0.412411 }, { "acc": 0.92908506, "epoch": 2.3925757452684593, "grad_norm": 7.155850410461426, "learning_rate": 3.8029840120833483e-06, "loss": 0.33185079, "memory(GiB)": 34.88, "step": 88365, "train_speed(iter/s)": 0.412412 }, { "acc": 0.92144175, "epoch": 2.3927111255516746, "grad_norm": 15.809365272521973, "learning_rate": 3.802440745006503e-06, "loss": 0.40639324, "memory(GiB)": 34.88, "step": 88370, "train_speed(iter/s)": 0.412414 }, { "acc": 0.90550718, "epoch": 2.39284650583489, "grad_norm": 18.358802795410156, "learning_rate": 3.8018974929377038e-06, "loss": 0.55179434, "memory(GiB)": 34.88, "step": 88375, "train_speed(iter/s)": 0.412415 }, { "acc": 0.92687035, "epoch": 2.392981886118106, "grad_norm": 6.45642614364624, "learning_rate": 3.801354255883758e-06, "loss": 0.40341392, "memory(GiB)": 34.88, "step": 88380, "train_speed(iter/s)": 0.412416 }, { "acc": 0.92631607, "epoch": 2.393117266401321, "grad_norm": 12.914433479309082, "learning_rate": 3.8008110338514686e-06, "loss": 0.38433104, "memory(GiB)": 34.88, "step": 88385, "train_speed(iter/s)": 0.412417 }, { "acc": 0.922332, "epoch": 2.393252646684537, "grad_norm": 11.561383247375488, "learning_rate": 3.800267826847642e-06, "loss": 0.46057796, "memory(GiB)": 34.88, "step": 88390, "train_speed(iter/s)": 0.412418 }, { "acc": 0.9241684, "epoch": 2.3933880269677523, "grad_norm": 9.337055206298828, "learning_rate": 3.7997246348790796e-06, "loss": 0.52750769, "memory(GiB)": 34.88, "step": 88395, "train_speed(iter/s)": 0.41242 }, { "acc": 0.92986383, "epoch": 2.393523407250968, "grad_norm": 7.6692047119140625, "learning_rate": 3.7991814579525897e-06, "loss": 0.38122163, "memory(GiB)": 34.88, "step": 88400, "train_speed(iter/s)": 0.412421 }, { "acc": 0.94047709, "epoch": 2.3936587875341835, "grad_norm": 8.467957496643066, "learning_rate": 3.7986382960749742e-06, "loss": 0.34961782, "memory(GiB)": 34.88, "step": 88405, "train_speed(iter/s)": 0.412422 }, { "acc": 0.93501215, "epoch": 2.393794167817399, "grad_norm": 7.451853275299072, "learning_rate": 3.7980951492530377e-06, "loss": 0.37805896, "memory(GiB)": 34.88, "step": 88410, "train_speed(iter/s)": 0.412423 }, { "acc": 0.94320641, "epoch": 2.3939295481006146, "grad_norm": 6.10719633102417, "learning_rate": 3.7975520174935858e-06, "loss": 0.30319314, "memory(GiB)": 34.88, "step": 88415, "train_speed(iter/s)": 0.412424 }, { "acc": 0.92497759, "epoch": 2.39406492838383, "grad_norm": 4.196075439453125, "learning_rate": 3.7970089008034204e-06, "loss": 0.42764616, "memory(GiB)": 34.88, "step": 88420, "train_speed(iter/s)": 0.412425 }, { "acc": 0.93448181, "epoch": 2.394200308667046, "grad_norm": 23.942598342895508, "learning_rate": 3.7964657991893467e-06, "loss": 0.3698627, "memory(GiB)": 34.88, "step": 88425, "train_speed(iter/s)": 0.412427 }, { "acc": 0.9284668, "epoch": 2.394335688950261, "grad_norm": 10.135690689086914, "learning_rate": 3.795922712658167e-06, "loss": 0.33317277, "memory(GiB)": 34.88, "step": 88430, "train_speed(iter/s)": 0.412428 }, { "acc": 0.94161634, "epoch": 2.394471069233477, "grad_norm": 5.071187973022461, "learning_rate": 3.7953796412166853e-06, "loss": 0.32916379, "memory(GiB)": 34.88, "step": 88435, "train_speed(iter/s)": 0.412429 }, { "acc": 0.92474308, "epoch": 2.3946064495166923, "grad_norm": 11.556550979614258, "learning_rate": 3.7948365848717034e-06, "loss": 0.43267279, "memory(GiB)": 34.88, "step": 88440, "train_speed(iter/s)": 0.41243 }, { "acc": 0.92053566, "epoch": 2.394741829799908, "grad_norm": 13.881246566772461, "learning_rate": 3.7942935436300238e-06, "loss": 0.43374944, "memory(GiB)": 34.88, "step": 88445, "train_speed(iter/s)": 0.412431 }, { "acc": 0.92457104, "epoch": 2.3948772100831235, "grad_norm": 17.15825653076172, "learning_rate": 3.793750517498452e-06, "loss": 0.4379879, "memory(GiB)": 34.88, "step": 88450, "train_speed(iter/s)": 0.412433 }, { "acc": 0.92971516, "epoch": 2.3950125903663393, "grad_norm": 4.32830810546875, "learning_rate": 3.7932075064837882e-06, "loss": 0.35065742, "memory(GiB)": 34.88, "step": 88455, "train_speed(iter/s)": 0.412434 }, { "acc": 0.91438065, "epoch": 2.3951479706495546, "grad_norm": 19.21987533569336, "learning_rate": 3.7926645105928357e-06, "loss": 0.4223979, "memory(GiB)": 34.88, "step": 88460, "train_speed(iter/s)": 0.412435 }, { "acc": 0.90198021, "epoch": 2.39528335093277, "grad_norm": 9.323974609375, "learning_rate": 3.792121529832395e-06, "loss": 0.64970121, "memory(GiB)": 34.88, "step": 88465, "train_speed(iter/s)": 0.412436 }, { "acc": 0.91347675, "epoch": 2.395418731215986, "grad_norm": 15.701343536376953, "learning_rate": 3.79157856420927e-06, "loss": 0.48140106, "memory(GiB)": 34.88, "step": 88470, "train_speed(iter/s)": 0.412437 }, { "acc": 0.93776741, "epoch": 2.395554111499201, "grad_norm": 4.075773239135742, "learning_rate": 3.791035613730262e-06, "loss": 0.38795352, "memory(GiB)": 34.88, "step": 88475, "train_speed(iter/s)": 0.412438 }, { "acc": 0.91904945, "epoch": 2.395689491782417, "grad_norm": 7.7806901931762695, "learning_rate": 3.7904926784021715e-06, "loss": 0.46732578, "memory(GiB)": 34.88, "step": 88480, "train_speed(iter/s)": 0.41244 }, { "acc": 0.92357616, "epoch": 2.3958248720656323, "grad_norm": 5.507839679718018, "learning_rate": 3.7899497582317997e-06, "loss": 0.43260307, "memory(GiB)": 34.88, "step": 88485, "train_speed(iter/s)": 0.412441 }, { "acc": 0.90974874, "epoch": 2.395960252348848, "grad_norm": 20.464941024780273, "learning_rate": 3.789406853225948e-06, "loss": 0.55266666, "memory(GiB)": 34.88, "step": 88490, "train_speed(iter/s)": 0.412442 }, { "acc": 0.92023973, "epoch": 2.3960956326320635, "grad_norm": 31.543582916259766, "learning_rate": 3.788863963391419e-06, "loss": 0.46734352, "memory(GiB)": 34.88, "step": 88495, "train_speed(iter/s)": 0.412443 }, { "acc": 0.9449748, "epoch": 2.396231012915279, "grad_norm": 3.8090832233428955, "learning_rate": 3.788321088735011e-06, "loss": 0.34526403, "memory(GiB)": 34.88, "step": 88500, "train_speed(iter/s)": 0.412444 }, { "acc": 0.93760643, "epoch": 2.3963663931984946, "grad_norm": 5.921145915985107, "learning_rate": 3.7877782292635277e-06, "loss": 0.32153692, "memory(GiB)": 34.88, "step": 88505, "train_speed(iter/s)": 0.412445 }, { "acc": 0.92748585, "epoch": 2.39650177348171, "grad_norm": 7.838169574737549, "learning_rate": 3.787235384983766e-06, "loss": 0.38511341, "memory(GiB)": 34.88, "step": 88510, "train_speed(iter/s)": 0.412447 }, { "acc": 0.94389086, "epoch": 2.396637153764926, "grad_norm": 7.2209625244140625, "learning_rate": 3.7866925559025296e-06, "loss": 0.36566954, "memory(GiB)": 34.88, "step": 88515, "train_speed(iter/s)": 0.412448 }, { "acc": 0.92247543, "epoch": 2.396772534048141, "grad_norm": 8.92017936706543, "learning_rate": 3.786149742026614e-06, "loss": 0.41737552, "memory(GiB)": 34.88, "step": 88520, "train_speed(iter/s)": 0.412449 }, { "acc": 0.93611126, "epoch": 2.396907914331357, "grad_norm": 8.442325592041016, "learning_rate": 3.7856069433628224e-06, "loss": 0.3719918, "memory(GiB)": 34.88, "step": 88525, "train_speed(iter/s)": 0.41245 }, { "acc": 0.93076115, "epoch": 2.3970432946145723, "grad_norm": 6.642587661743164, "learning_rate": 3.7850641599179543e-06, "loss": 0.44090681, "memory(GiB)": 34.88, "step": 88530, "train_speed(iter/s)": 0.412451 }, { "acc": 0.92186985, "epoch": 2.3971786748977877, "grad_norm": 8.569331169128418, "learning_rate": 3.784521391698807e-06, "loss": 0.43945627, "memory(GiB)": 34.88, "step": 88535, "train_speed(iter/s)": 0.412452 }, { "acc": 0.9279109, "epoch": 2.3973140551810035, "grad_norm": 6.010834217071533, "learning_rate": 3.7839786387121835e-06, "loss": 0.35020523, "memory(GiB)": 34.88, "step": 88540, "train_speed(iter/s)": 0.412453 }, { "acc": 0.93168831, "epoch": 2.397449435464219, "grad_norm": 6.8293538093566895, "learning_rate": 3.783435900964878e-06, "loss": 0.40923781, "memory(GiB)": 34.88, "step": 88545, "train_speed(iter/s)": 0.412454 }, { "acc": 0.93833752, "epoch": 2.3975848157474347, "grad_norm": 5.180393695831299, "learning_rate": 3.7828931784636936e-06, "loss": 0.31597946, "memory(GiB)": 34.88, "step": 88550, "train_speed(iter/s)": 0.412456 }, { "acc": 0.9099988, "epoch": 2.39772019603065, "grad_norm": 6.794543743133545, "learning_rate": 3.7823504712154263e-06, "loss": 0.42963281, "memory(GiB)": 34.88, "step": 88555, "train_speed(iter/s)": 0.412457 }, { "acc": 0.91492157, "epoch": 2.397855576313866, "grad_norm": 5.79580545425415, "learning_rate": 3.7818077792268764e-06, "loss": 0.48122931, "memory(GiB)": 34.88, "step": 88560, "train_speed(iter/s)": 0.412458 }, { "acc": 0.92270203, "epoch": 2.397990956597081, "grad_norm": 4.310218811035156, "learning_rate": 3.7812651025048413e-06, "loss": 0.43643913, "memory(GiB)": 34.88, "step": 88565, "train_speed(iter/s)": 0.412459 }, { "acc": 0.92030392, "epoch": 2.3981263368802965, "grad_norm": 4.957256317138672, "learning_rate": 3.780722441056118e-06, "loss": 0.40534096, "memory(GiB)": 34.88, "step": 88570, "train_speed(iter/s)": 0.41246 }, { "acc": 0.90463896, "epoch": 2.3982617171635123, "grad_norm": 6.8540940284729, "learning_rate": 3.7801797948875067e-06, "loss": 0.53949294, "memory(GiB)": 34.88, "step": 88575, "train_speed(iter/s)": 0.412462 }, { "acc": 0.91927795, "epoch": 2.3983970974467277, "grad_norm": 5.7823262214660645, "learning_rate": 3.779637164005802e-06, "loss": 0.47327394, "memory(GiB)": 34.88, "step": 88580, "train_speed(iter/s)": 0.412463 }, { "acc": 0.92808285, "epoch": 2.3985324777299435, "grad_norm": 9.691486358642578, "learning_rate": 3.779094548417806e-06, "loss": 0.42519827, "memory(GiB)": 34.88, "step": 88585, "train_speed(iter/s)": 0.412464 }, { "acc": 0.92892189, "epoch": 2.398667858013159, "grad_norm": 10.956981658935547, "learning_rate": 3.778551948130311e-06, "loss": 0.45462713, "memory(GiB)": 34.88, "step": 88590, "train_speed(iter/s)": 0.412465 }, { "acc": 0.92983093, "epoch": 2.3988032382963747, "grad_norm": 11.38196086883545, "learning_rate": 3.7780093631501174e-06, "loss": 0.4458951, "memory(GiB)": 34.88, "step": 88595, "train_speed(iter/s)": 0.412466 }, { "acc": 0.91981297, "epoch": 2.39893861857959, "grad_norm": 23.376678466796875, "learning_rate": 3.777466793484022e-06, "loss": 0.40894136, "memory(GiB)": 34.88, "step": 88600, "train_speed(iter/s)": 0.412467 }, { "acc": 0.92880583, "epoch": 2.399073998862806, "grad_norm": 5.500094890594482, "learning_rate": 3.7769242391388195e-06, "loss": 0.35627575, "memory(GiB)": 34.88, "step": 88605, "train_speed(iter/s)": 0.412468 }, { "acc": 0.93157253, "epoch": 2.399209379146021, "grad_norm": 6.957505702972412, "learning_rate": 3.7763817001213087e-06, "loss": 0.3864464, "memory(GiB)": 34.88, "step": 88610, "train_speed(iter/s)": 0.41247 }, { "acc": 0.92437267, "epoch": 2.399344759429237, "grad_norm": 7.035554885864258, "learning_rate": 3.7758391764382834e-06, "loss": 0.43688002, "memory(GiB)": 34.88, "step": 88615, "train_speed(iter/s)": 0.41247 }, { "acc": 0.93813972, "epoch": 2.3994801397124523, "grad_norm": 6.201934337615967, "learning_rate": 3.7752966680965434e-06, "loss": 0.29497461, "memory(GiB)": 34.88, "step": 88620, "train_speed(iter/s)": 0.412471 }, { "acc": 0.93446293, "epoch": 2.3996155199956677, "grad_norm": 9.56997299194336, "learning_rate": 3.7747541751028804e-06, "loss": 0.34078937, "memory(GiB)": 34.88, "step": 88625, "train_speed(iter/s)": 0.412473 }, { "acc": 0.9038167, "epoch": 2.3997509002788835, "grad_norm": 25.989900588989258, "learning_rate": 3.7742116974640946e-06, "loss": 0.5819912, "memory(GiB)": 34.88, "step": 88630, "train_speed(iter/s)": 0.412474 }, { "acc": 0.94552088, "epoch": 2.399886280562099, "grad_norm": 8.762991905212402, "learning_rate": 3.773669235186978e-06, "loss": 0.31426787, "memory(GiB)": 34.88, "step": 88635, "train_speed(iter/s)": 0.412475 }, { "acc": 0.91706142, "epoch": 2.4000216608453147, "grad_norm": 21.34674835205078, "learning_rate": 3.773126788278328e-06, "loss": 0.51564212, "memory(GiB)": 34.88, "step": 88640, "train_speed(iter/s)": 0.412476 }, { "acc": 0.93407669, "epoch": 2.40015704112853, "grad_norm": 5.752325534820557, "learning_rate": 3.7725843567449406e-06, "loss": 0.39465868, "memory(GiB)": 34.88, "step": 88645, "train_speed(iter/s)": 0.412478 }, { "acc": 0.93375607, "epoch": 2.400292421411746, "grad_norm": 8.796613693237305, "learning_rate": 3.7720419405936076e-06, "loss": 0.3620677, "memory(GiB)": 34.88, "step": 88650, "train_speed(iter/s)": 0.412479 }, { "acc": 0.92712173, "epoch": 2.400427801694961, "grad_norm": 6.73508358001709, "learning_rate": 3.771499539831126e-06, "loss": 0.43321924, "memory(GiB)": 34.88, "step": 88655, "train_speed(iter/s)": 0.41248 }, { "acc": 0.9253005, "epoch": 2.4005631819781765, "grad_norm": 11.460591316223145, "learning_rate": 3.7709571544642907e-06, "loss": 0.41940589, "memory(GiB)": 34.88, "step": 88660, "train_speed(iter/s)": 0.412481 }, { "acc": 0.92316313, "epoch": 2.4006985622613923, "grad_norm": 4.23921012878418, "learning_rate": 3.7704147844998958e-06, "loss": 0.46513014, "memory(GiB)": 34.88, "step": 88665, "train_speed(iter/s)": 0.412482 }, { "acc": 0.93468971, "epoch": 2.4008339425446077, "grad_norm": 22.73699378967285, "learning_rate": 3.7698724299447343e-06, "loss": 0.37619705, "memory(GiB)": 34.88, "step": 88670, "train_speed(iter/s)": 0.412484 }, { "acc": 0.91615028, "epoch": 2.4009693228278235, "grad_norm": 9.250284194946289, "learning_rate": 3.7693300908056013e-06, "loss": 0.46749506, "memory(GiB)": 34.88, "step": 88675, "train_speed(iter/s)": 0.412485 }, { "acc": 0.9203783, "epoch": 2.401104703111039, "grad_norm": 8.319334983825684, "learning_rate": 3.7687877670892913e-06, "loss": 0.40783901, "memory(GiB)": 34.88, "step": 88680, "train_speed(iter/s)": 0.412486 }, { "acc": 0.93932972, "epoch": 2.4012400833942547, "grad_norm": 5.9943623542785645, "learning_rate": 3.768245458802598e-06, "loss": 0.31479988, "memory(GiB)": 34.88, "step": 88685, "train_speed(iter/s)": 0.412487 }, { "acc": 0.92707443, "epoch": 2.40137546367747, "grad_norm": 7.6576738357543945, "learning_rate": 3.7677031659523135e-06, "loss": 0.41534033, "memory(GiB)": 34.88, "step": 88690, "train_speed(iter/s)": 0.412488 }, { "acc": 0.93794212, "epoch": 2.4015108439606854, "grad_norm": 3.6320736408233643, "learning_rate": 3.767160888545231e-06, "loss": 0.32724366, "memory(GiB)": 34.88, "step": 88695, "train_speed(iter/s)": 0.41249 }, { "acc": 0.92951269, "epoch": 2.401646224243901, "grad_norm": 4.6213483810424805, "learning_rate": 3.7666186265881455e-06, "loss": 0.35293703, "memory(GiB)": 34.88, "step": 88700, "train_speed(iter/s)": 0.412491 }, { "acc": 0.93626041, "epoch": 2.4017816045271165, "grad_norm": 5.285248756408691, "learning_rate": 3.766076380087848e-06, "loss": 0.30187769, "memory(GiB)": 34.88, "step": 88705, "train_speed(iter/s)": 0.412492 }, { "acc": 0.94257946, "epoch": 2.4019169848103323, "grad_norm": 10.16701602935791, "learning_rate": 3.7655341490511323e-06, "loss": 0.29308136, "memory(GiB)": 34.88, "step": 88710, "train_speed(iter/s)": 0.412493 }, { "acc": 0.91831379, "epoch": 2.4020523650935477, "grad_norm": 8.930654525756836, "learning_rate": 3.7649919334847902e-06, "loss": 0.43238411, "memory(GiB)": 34.88, "step": 88715, "train_speed(iter/s)": 0.412495 }, { "acc": 0.92831593, "epoch": 2.4021877453767635, "grad_norm": 20.154245376586914, "learning_rate": 3.764449733395615e-06, "loss": 0.41193228, "memory(GiB)": 34.88, "step": 88720, "train_speed(iter/s)": 0.412495 }, { "acc": 0.93509121, "epoch": 2.402323125659979, "grad_norm": 9.0217866897583, "learning_rate": 3.7639075487903994e-06, "loss": 0.37665253, "memory(GiB)": 34.88, "step": 88725, "train_speed(iter/s)": 0.412497 }, { "acc": 0.90422173, "epoch": 2.402458505943194, "grad_norm": 8.561417579650879, "learning_rate": 3.7633653796759324e-06, "loss": 0.58467245, "memory(GiB)": 34.88, "step": 88730, "train_speed(iter/s)": 0.412498 }, { "acc": 0.92597485, "epoch": 2.40259388622641, "grad_norm": 13.233855247497559, "learning_rate": 3.7628232260590077e-06, "loss": 0.37345877, "memory(GiB)": 34.88, "step": 88735, "train_speed(iter/s)": 0.412499 }, { "acc": 0.92149076, "epoch": 2.4027292665096254, "grad_norm": 8.519060134887695, "learning_rate": 3.7622810879464167e-06, "loss": 0.46549754, "memory(GiB)": 34.88, "step": 88740, "train_speed(iter/s)": 0.4125 }, { "acc": 0.92068367, "epoch": 2.402864646792841, "grad_norm": 36.89570999145508, "learning_rate": 3.7617389653449513e-06, "loss": 0.45747547, "memory(GiB)": 34.88, "step": 88745, "train_speed(iter/s)": 0.412501 }, { "acc": 0.9280612, "epoch": 2.4030000270760565, "grad_norm": 7.504661560058594, "learning_rate": 3.761196858261401e-06, "loss": 0.40669155, "memory(GiB)": 34.88, "step": 88750, "train_speed(iter/s)": 0.412502 }, { "acc": 0.92346334, "epoch": 2.4031354073592723, "grad_norm": 6.180585861206055, "learning_rate": 3.760654766702558e-06, "loss": 0.42898402, "memory(GiB)": 34.88, "step": 88755, "train_speed(iter/s)": 0.412503 }, { "acc": 0.92059679, "epoch": 2.4032707876424877, "grad_norm": 18.24372673034668, "learning_rate": 3.7601126906752135e-06, "loss": 0.50255589, "memory(GiB)": 34.88, "step": 88760, "train_speed(iter/s)": 0.412505 }, { "acc": 0.90475883, "epoch": 2.403406167925703, "grad_norm": 9.563217163085938, "learning_rate": 3.7595706301861558e-06, "loss": 0.5667819, "memory(GiB)": 34.88, "step": 88765, "train_speed(iter/s)": 0.412506 }, { "acc": 0.93036966, "epoch": 2.403541548208919, "grad_norm": 6.82822322845459, "learning_rate": 3.75902858524218e-06, "loss": 0.42806664, "memory(GiB)": 34.88, "step": 88770, "train_speed(iter/s)": 0.412507 }, { "acc": 0.92709656, "epoch": 2.403676928492134, "grad_norm": 5.963789463043213, "learning_rate": 3.7584865558500705e-06, "loss": 0.45571532, "memory(GiB)": 34.88, "step": 88775, "train_speed(iter/s)": 0.412508 }, { "acc": 0.93124504, "epoch": 2.40381230877535, "grad_norm": 2.3102266788482666, "learning_rate": 3.757944542016621e-06, "loss": 0.2912375, "memory(GiB)": 34.88, "step": 88780, "train_speed(iter/s)": 0.412509 }, { "acc": 0.92782307, "epoch": 2.4039476890585654, "grad_norm": 6.770261287689209, "learning_rate": 3.7574025437486194e-06, "loss": 0.3329566, "memory(GiB)": 34.88, "step": 88785, "train_speed(iter/s)": 0.41251 }, { "acc": 0.92600851, "epoch": 2.404083069341781, "grad_norm": 5.066823959350586, "learning_rate": 3.7568605610528565e-06, "loss": 0.38642087, "memory(GiB)": 34.88, "step": 88790, "train_speed(iter/s)": 0.412511 }, { "acc": 0.9211277, "epoch": 2.4042184496249965, "grad_norm": 2.4474809169769287, "learning_rate": 3.7563185939361213e-06, "loss": 0.4517499, "memory(GiB)": 34.88, "step": 88795, "train_speed(iter/s)": 0.412512 }, { "acc": 0.94646549, "epoch": 2.4043538299082123, "grad_norm": 5.302805423736572, "learning_rate": 3.7557766424052026e-06, "loss": 0.3186676, "memory(GiB)": 34.88, "step": 88800, "train_speed(iter/s)": 0.412513 }, { "acc": 0.9291522, "epoch": 2.4044892101914277, "grad_norm": 9.747148513793945, "learning_rate": 3.7552347064668913e-06, "loss": 0.37211022, "memory(GiB)": 34.88, "step": 88805, "train_speed(iter/s)": 0.412514 }, { "acc": 0.9230051, "epoch": 2.4046245904746435, "grad_norm": 18.312868118286133, "learning_rate": 3.754692786127974e-06, "loss": 0.48511305, "memory(GiB)": 34.88, "step": 88810, "train_speed(iter/s)": 0.412516 }, { "acc": 0.93323431, "epoch": 2.404759970757859, "grad_norm": 6.9013824462890625, "learning_rate": 3.754150881395241e-06, "loss": 0.39735377, "memory(GiB)": 34.88, "step": 88815, "train_speed(iter/s)": 0.412517 }, { "acc": 0.91979427, "epoch": 2.404895351041074, "grad_norm": 5.1493239402771, "learning_rate": 3.7536089922754775e-06, "loss": 0.46842947, "memory(GiB)": 34.88, "step": 88820, "train_speed(iter/s)": 0.412518 }, { "acc": 0.91885328, "epoch": 2.40503073132429, "grad_norm": 6.9652099609375, "learning_rate": 3.7530671187754752e-06, "loss": 0.52438807, "memory(GiB)": 34.88, "step": 88825, "train_speed(iter/s)": 0.412519 }, { "acc": 0.93320904, "epoch": 2.4051661116075054, "grad_norm": 4.65382719039917, "learning_rate": 3.752525260902021e-06, "loss": 0.4565217, "memory(GiB)": 34.88, "step": 88830, "train_speed(iter/s)": 0.41252 }, { "acc": 0.94099007, "epoch": 2.405301491890721, "grad_norm": 5.793479919433594, "learning_rate": 3.7519834186619025e-06, "loss": 0.31638265, "memory(GiB)": 34.88, "step": 88835, "train_speed(iter/s)": 0.412521 }, { "acc": 0.92608643, "epoch": 2.4054368721739365, "grad_norm": 8.109326362609863, "learning_rate": 3.7514415920619074e-06, "loss": 0.40362291, "memory(GiB)": 34.88, "step": 88840, "train_speed(iter/s)": 0.412522 }, { "acc": 0.90812302, "epoch": 2.4055722524571523, "grad_norm": 10.584911346435547, "learning_rate": 3.7508997811088233e-06, "loss": 0.57697272, "memory(GiB)": 34.88, "step": 88845, "train_speed(iter/s)": 0.412523 }, { "acc": 0.92302704, "epoch": 2.4057076327403677, "grad_norm": 2.4458677768707275, "learning_rate": 3.750357985809438e-06, "loss": 0.4407918, "memory(GiB)": 34.88, "step": 88850, "train_speed(iter/s)": 0.412525 }, { "acc": 0.93435135, "epoch": 2.405843013023583, "grad_norm": 6.1321120262146, "learning_rate": 3.749816206170538e-06, "loss": 0.37734609, "memory(GiB)": 34.88, "step": 88855, "train_speed(iter/s)": 0.412526 }, { "acc": 0.93659191, "epoch": 2.405978393306799, "grad_norm": 5.610252857208252, "learning_rate": 3.7492744421989097e-06, "loss": 0.31197705, "memory(GiB)": 34.88, "step": 88860, "train_speed(iter/s)": 0.412527 }, { "acc": 0.95105381, "epoch": 2.406113773590014, "grad_norm": 7.78173828125, "learning_rate": 3.74873269390134e-06, "loss": 0.29356771, "memory(GiB)": 34.88, "step": 88865, "train_speed(iter/s)": 0.412528 }, { "acc": 0.92326727, "epoch": 2.40624915387323, "grad_norm": 5.794167995452881, "learning_rate": 3.748190961284615e-06, "loss": 0.39010985, "memory(GiB)": 34.88, "step": 88870, "train_speed(iter/s)": 0.412529 }, { "acc": 0.9249754, "epoch": 2.4063845341564454, "grad_norm": 10.923638343811035, "learning_rate": 3.747649244355523e-06, "loss": 0.44485807, "memory(GiB)": 34.88, "step": 88875, "train_speed(iter/s)": 0.41253 }, { "acc": 0.91828232, "epoch": 2.406519914439661, "grad_norm": 8.212756156921387, "learning_rate": 3.7471075431208466e-06, "loss": 0.50716491, "memory(GiB)": 34.88, "step": 88880, "train_speed(iter/s)": 0.412531 }, { "acc": 0.92964497, "epoch": 2.4066552947228765, "grad_norm": 6.715543270111084, "learning_rate": 3.746565857587376e-06, "loss": 0.40309629, "memory(GiB)": 34.88, "step": 88885, "train_speed(iter/s)": 0.412532 }, { "acc": 0.92546425, "epoch": 2.406790675006092, "grad_norm": 13.792609214782715, "learning_rate": 3.7460241877618926e-06, "loss": 0.39306951, "memory(GiB)": 34.88, "step": 88890, "train_speed(iter/s)": 0.412533 }, { "acc": 0.9279932, "epoch": 2.4069260552893077, "grad_norm": 8.600881576538086, "learning_rate": 3.745482533651184e-06, "loss": 0.4108922, "memory(GiB)": 34.88, "step": 88895, "train_speed(iter/s)": 0.412535 }, { "acc": 0.92720737, "epoch": 2.407061435572523, "grad_norm": 5.697867393493652, "learning_rate": 3.744940895262037e-06, "loss": 0.45324774, "memory(GiB)": 34.88, "step": 88900, "train_speed(iter/s)": 0.412536 }, { "acc": 0.9437149, "epoch": 2.407196815855739, "grad_norm": 8.46313190460205, "learning_rate": 3.7443992726012325e-06, "loss": 0.34261773, "memory(GiB)": 34.88, "step": 88905, "train_speed(iter/s)": 0.412537 }, { "acc": 0.92732792, "epoch": 2.407332196138954, "grad_norm": 7.632421493530273, "learning_rate": 3.74385766567556e-06, "loss": 0.40337391, "memory(GiB)": 34.88, "step": 88910, "train_speed(iter/s)": 0.412538 }, { "acc": 0.93651543, "epoch": 2.40746757642217, "grad_norm": 6.8306708335876465, "learning_rate": 3.7433160744918e-06, "loss": 0.36771748, "memory(GiB)": 34.88, "step": 88915, "train_speed(iter/s)": 0.412539 }, { "acc": 0.94002514, "epoch": 2.4076029567053854, "grad_norm": 6.622898101806641, "learning_rate": 3.742774499056741e-06, "loss": 0.32298973, "memory(GiB)": 34.88, "step": 88920, "train_speed(iter/s)": 0.412541 }, { "acc": 0.93463106, "epoch": 2.4077383369886007, "grad_norm": 27.548810958862305, "learning_rate": 3.7422329393771635e-06, "loss": 0.37240448, "memory(GiB)": 34.88, "step": 88925, "train_speed(iter/s)": 0.412542 }, { "acc": 0.93584547, "epoch": 2.4078737172718165, "grad_norm": 7.269273281097412, "learning_rate": 3.741691395459855e-06, "loss": 0.41987257, "memory(GiB)": 34.88, "step": 88930, "train_speed(iter/s)": 0.412543 }, { "acc": 0.92698021, "epoch": 2.408009097555032, "grad_norm": 8.142836570739746, "learning_rate": 3.7411498673115974e-06, "loss": 0.4578052, "memory(GiB)": 34.88, "step": 88935, "train_speed(iter/s)": 0.412544 }, { "acc": 0.93262482, "epoch": 2.4081444778382477, "grad_norm": 5.750482559204102, "learning_rate": 3.7406083549391743e-06, "loss": 0.34461503, "memory(GiB)": 34.88, "step": 88940, "train_speed(iter/s)": 0.412545 }, { "acc": 0.91921616, "epoch": 2.408279858121463, "grad_norm": 7.933731555938721, "learning_rate": 3.7400668583493704e-06, "loss": 0.48202381, "memory(GiB)": 34.88, "step": 88945, "train_speed(iter/s)": 0.412546 }, { "acc": 0.93348255, "epoch": 2.408415238404679, "grad_norm": 5.305638313293457, "learning_rate": 3.7395253775489675e-06, "loss": 0.41192665, "memory(GiB)": 34.88, "step": 88950, "train_speed(iter/s)": 0.412547 }, { "acc": 0.92528543, "epoch": 2.408550618687894, "grad_norm": 7.2360358238220215, "learning_rate": 3.7389839125447504e-06, "loss": 0.4468523, "memory(GiB)": 34.88, "step": 88955, "train_speed(iter/s)": 0.412549 }, { "acc": 0.94142351, "epoch": 2.40868599897111, "grad_norm": 3.5324923992156982, "learning_rate": 3.7384424633434994e-06, "loss": 0.36644466, "memory(GiB)": 34.88, "step": 88960, "train_speed(iter/s)": 0.41255 }, { "acc": 0.92353706, "epoch": 2.4088213792543254, "grad_norm": 7.577129364013672, "learning_rate": 3.737901029952002e-06, "loss": 0.39765975, "memory(GiB)": 34.88, "step": 88965, "train_speed(iter/s)": 0.412551 }, { "acc": 0.90736074, "epoch": 2.408956759537541, "grad_norm": 7.64234733581543, "learning_rate": 3.7373596123770356e-06, "loss": 0.48367214, "memory(GiB)": 34.88, "step": 88970, "train_speed(iter/s)": 0.412552 }, { "acc": 0.92125769, "epoch": 2.4090921398207565, "grad_norm": 11.060354232788086, "learning_rate": 3.736818210625384e-06, "loss": 0.43492794, "memory(GiB)": 34.88, "step": 88975, "train_speed(iter/s)": 0.412553 }, { "acc": 0.93446331, "epoch": 2.409227520103972, "grad_norm": 9.398262977600098, "learning_rate": 3.7362768247038326e-06, "loss": 0.39679518, "memory(GiB)": 34.88, "step": 88980, "train_speed(iter/s)": 0.412554 }, { "acc": 0.91971493, "epoch": 2.4093629003871877, "grad_norm": 10.115952491760254, "learning_rate": 3.735735454619159e-06, "loss": 0.46810808, "memory(GiB)": 34.88, "step": 88985, "train_speed(iter/s)": 0.412555 }, { "acc": 0.93029613, "epoch": 2.409498280670403, "grad_norm": 6.641233444213867, "learning_rate": 3.735194100378147e-06, "loss": 0.38223126, "memory(GiB)": 34.88, "step": 88990, "train_speed(iter/s)": 0.412556 }, { "acc": 0.92300968, "epoch": 2.409633660953619, "grad_norm": 6.51123046875, "learning_rate": 3.734652761987576e-06, "loss": 0.42664571, "memory(GiB)": 34.88, "step": 88995, "train_speed(iter/s)": 0.412557 }, { "acc": 0.93817492, "epoch": 2.409769041236834, "grad_norm": 4.686562538146973, "learning_rate": 3.7341114394542316e-06, "loss": 0.34795287, "memory(GiB)": 34.88, "step": 89000, "train_speed(iter/s)": 0.412558 }, { "acc": 0.92880535, "epoch": 2.40990442152005, "grad_norm": 13.26459789276123, "learning_rate": 3.7335701327848906e-06, "loss": 0.43176746, "memory(GiB)": 34.88, "step": 89005, "train_speed(iter/s)": 0.412559 }, { "acc": 0.92431049, "epoch": 2.4100398018032654, "grad_norm": 6.49289608001709, "learning_rate": 3.7330288419863364e-06, "loss": 0.37131205, "memory(GiB)": 34.88, "step": 89010, "train_speed(iter/s)": 0.41256 }, { "acc": 0.92824211, "epoch": 2.4101751820864807, "grad_norm": 7.393249988555908, "learning_rate": 3.7324875670653483e-06, "loss": 0.41283884, "memory(GiB)": 34.88, "step": 89015, "train_speed(iter/s)": 0.412561 }, { "acc": 0.95632801, "epoch": 2.4103105623696965, "grad_norm": 5.975154399871826, "learning_rate": 3.731946308028709e-06, "loss": 0.27963293, "memory(GiB)": 34.88, "step": 89020, "train_speed(iter/s)": 0.412562 }, { "acc": 0.94166946, "epoch": 2.410445942652912, "grad_norm": 6.303924083709717, "learning_rate": 3.7314050648831967e-06, "loss": 0.38926923, "memory(GiB)": 34.88, "step": 89025, "train_speed(iter/s)": 0.412564 }, { "acc": 0.93304005, "epoch": 2.4105813229361277, "grad_norm": 3.700446128845215, "learning_rate": 3.730863837635592e-06, "loss": 0.46210012, "memory(GiB)": 34.88, "step": 89030, "train_speed(iter/s)": 0.412565 }, { "acc": 0.9357954, "epoch": 2.410716703219343, "grad_norm": 7.123783588409424, "learning_rate": 3.7303226262926755e-06, "loss": 0.37377949, "memory(GiB)": 34.88, "step": 89035, "train_speed(iter/s)": 0.412565 }, { "acc": 0.94048986, "epoch": 2.410852083502559, "grad_norm": 7.286820888519287, "learning_rate": 3.7297814308612263e-06, "loss": 0.29716949, "memory(GiB)": 34.88, "step": 89040, "train_speed(iter/s)": 0.412567 }, { "acc": 0.93393269, "epoch": 2.410987463785774, "grad_norm": 17.433883666992188, "learning_rate": 3.7292402513480236e-06, "loss": 0.35111566, "memory(GiB)": 34.88, "step": 89045, "train_speed(iter/s)": 0.412568 }, { "acc": 0.93088264, "epoch": 2.4111228440689896, "grad_norm": 13.85035514831543, "learning_rate": 3.728699087759847e-06, "loss": 0.34997754, "memory(GiB)": 34.88, "step": 89050, "train_speed(iter/s)": 0.412569 }, { "acc": 0.93137646, "epoch": 2.4112582243522054, "grad_norm": 6.233245372772217, "learning_rate": 3.7281579401034766e-06, "loss": 0.37636724, "memory(GiB)": 34.88, "step": 89055, "train_speed(iter/s)": 0.41257 }, { "acc": 0.92406445, "epoch": 2.4113936046354207, "grad_norm": 4.070669174194336, "learning_rate": 3.7276168083856923e-06, "loss": 0.44568844, "memory(GiB)": 34.88, "step": 89060, "train_speed(iter/s)": 0.412571 }, { "acc": 0.93608274, "epoch": 2.4115289849186365, "grad_norm": 13.293899536132812, "learning_rate": 3.7270756926132686e-06, "loss": 0.40957479, "memory(GiB)": 34.88, "step": 89065, "train_speed(iter/s)": 0.412572 }, { "acc": 0.92519608, "epoch": 2.411664365201852, "grad_norm": 8.638026237487793, "learning_rate": 3.726534592792988e-06, "loss": 0.44501424, "memory(GiB)": 34.88, "step": 89070, "train_speed(iter/s)": 0.412573 }, { "acc": 0.92137203, "epoch": 2.4117997454850677, "grad_norm": 8.930789947509766, "learning_rate": 3.7259935089316267e-06, "loss": 0.4936265, "memory(GiB)": 34.88, "step": 89075, "train_speed(iter/s)": 0.412574 }, { "acc": 0.93517132, "epoch": 2.411935125768283, "grad_norm": 7.782547473907471, "learning_rate": 3.7254524410359645e-06, "loss": 0.34477599, "memory(GiB)": 34.88, "step": 89080, "train_speed(iter/s)": 0.412575 }, { "acc": 0.93052664, "epoch": 2.4120705060514984, "grad_norm": 6.143227577209473, "learning_rate": 3.7249113891127767e-06, "loss": 0.36562552, "memory(GiB)": 34.88, "step": 89085, "train_speed(iter/s)": 0.412577 }, { "acc": 0.9085947, "epoch": 2.4122058863347142, "grad_norm": 11.251493453979492, "learning_rate": 3.724370353168844e-06, "loss": 0.51019402, "memory(GiB)": 34.88, "step": 89090, "train_speed(iter/s)": 0.412578 }, { "acc": 0.91312103, "epoch": 2.4123412666179296, "grad_norm": 5.672067642211914, "learning_rate": 3.7238293332109428e-06, "loss": 0.4318306, "memory(GiB)": 34.88, "step": 89095, "train_speed(iter/s)": 0.412578 }, { "acc": 0.9334053, "epoch": 2.4124766469011454, "grad_norm": 13.510271072387695, "learning_rate": 3.72328832924585e-06, "loss": 0.42891979, "memory(GiB)": 34.88, "step": 89100, "train_speed(iter/s)": 0.41258 }, { "acc": 0.92344866, "epoch": 2.4126120271843607, "grad_norm": 8.09703540802002, "learning_rate": 3.7227473412803442e-06, "loss": 0.44956613, "memory(GiB)": 34.88, "step": 89105, "train_speed(iter/s)": 0.412581 }, { "acc": 0.92277861, "epoch": 2.4127474074675765, "grad_norm": 8.838062286376953, "learning_rate": 3.7222063693212007e-06, "loss": 0.39729693, "memory(GiB)": 34.88, "step": 89110, "train_speed(iter/s)": 0.412582 }, { "acc": 0.92915974, "epoch": 2.412882787750792, "grad_norm": 9.134602546691895, "learning_rate": 3.7216654133751963e-06, "loss": 0.42687416, "memory(GiB)": 34.88, "step": 89115, "train_speed(iter/s)": 0.412583 }, { "acc": 0.90470066, "epoch": 2.4130181680340077, "grad_norm": 5.625476837158203, "learning_rate": 3.7211244734491074e-06, "loss": 0.55082474, "memory(GiB)": 34.88, "step": 89120, "train_speed(iter/s)": 0.412584 }, { "acc": 0.91910543, "epoch": 2.413153548317223, "grad_norm": 5.354822635650635, "learning_rate": 3.720583549549711e-06, "loss": 0.43006916, "memory(GiB)": 34.88, "step": 89125, "train_speed(iter/s)": 0.412585 }, { "acc": 0.92492905, "epoch": 2.413288928600439, "grad_norm": 8.107270240783691, "learning_rate": 3.7200426416837847e-06, "loss": 0.39971452, "memory(GiB)": 34.88, "step": 89130, "train_speed(iter/s)": 0.412586 }, { "acc": 0.92164068, "epoch": 2.4134243088836542, "grad_norm": 6.971579551696777, "learning_rate": 3.7195017498581016e-06, "loss": 0.43324265, "memory(GiB)": 34.88, "step": 89135, "train_speed(iter/s)": 0.412587 }, { "acc": 0.92481594, "epoch": 2.4135596891668696, "grad_norm": 9.969058990478516, "learning_rate": 3.71896087407944e-06, "loss": 0.40650158, "memory(GiB)": 34.88, "step": 89140, "train_speed(iter/s)": 0.412588 }, { "acc": 0.91128139, "epoch": 2.4136950694500854, "grad_norm": 10.064725875854492, "learning_rate": 3.7184200143545733e-06, "loss": 0.54159689, "memory(GiB)": 34.88, "step": 89145, "train_speed(iter/s)": 0.412589 }, { "acc": 0.93551159, "epoch": 2.4138304497333007, "grad_norm": 12.237929344177246, "learning_rate": 3.7178791706902777e-06, "loss": 0.39143312, "memory(GiB)": 34.88, "step": 89150, "train_speed(iter/s)": 0.412591 }, { "acc": 0.92745638, "epoch": 2.4139658300165165, "grad_norm": 6.393999099731445, "learning_rate": 3.717338343093328e-06, "loss": 0.41909409, "memory(GiB)": 34.88, "step": 89155, "train_speed(iter/s)": 0.412592 }, { "acc": 0.93219223, "epoch": 2.414101210299732, "grad_norm": 13.560531616210938, "learning_rate": 3.716797531570499e-06, "loss": 0.38958931, "memory(GiB)": 34.88, "step": 89160, "train_speed(iter/s)": 0.412593 }, { "acc": 0.93513327, "epoch": 2.4142365905829477, "grad_norm": 17.290237426757812, "learning_rate": 3.716256736128568e-06, "loss": 0.36851697, "memory(GiB)": 34.88, "step": 89165, "train_speed(iter/s)": 0.412594 }, { "acc": 0.93358669, "epoch": 2.414371970866163, "grad_norm": 8.546756744384766, "learning_rate": 3.715715956774305e-06, "loss": 0.38060746, "memory(GiB)": 34.88, "step": 89170, "train_speed(iter/s)": 0.412595 }, { "acc": 0.91603622, "epoch": 2.4145073511493784, "grad_norm": 7.572719573974609, "learning_rate": 3.715175193514489e-06, "loss": 0.48370695, "memory(GiB)": 34.88, "step": 89175, "train_speed(iter/s)": 0.412596 }, { "acc": 0.93929958, "epoch": 2.4146427314325942, "grad_norm": 5.404228210449219, "learning_rate": 3.7146344463558896e-06, "loss": 0.31923685, "memory(GiB)": 34.88, "step": 89180, "train_speed(iter/s)": 0.412597 }, { "acc": 0.93184738, "epoch": 2.4147781117158096, "grad_norm": 8.277005195617676, "learning_rate": 3.7140937153052846e-06, "loss": 0.38587742, "memory(GiB)": 34.88, "step": 89185, "train_speed(iter/s)": 0.412598 }, { "acc": 0.93516846, "epoch": 2.4149134919990254, "grad_norm": 13.769356727600098, "learning_rate": 3.7135530003694466e-06, "loss": 0.36365168, "memory(GiB)": 34.88, "step": 89190, "train_speed(iter/s)": 0.412599 }, { "acc": 0.91986694, "epoch": 2.4150488722822407, "grad_norm": 21.020240783691406, "learning_rate": 3.7130123015551466e-06, "loss": 0.43868647, "memory(GiB)": 34.88, "step": 89195, "train_speed(iter/s)": 0.412601 }, { "acc": 0.90657396, "epoch": 2.4151842525654565, "grad_norm": 12.663283348083496, "learning_rate": 3.712471618869162e-06, "loss": 0.53428192, "memory(GiB)": 34.88, "step": 89200, "train_speed(iter/s)": 0.412602 }, { "acc": 0.9116785, "epoch": 2.415319632848672, "grad_norm": 21.730915069580078, "learning_rate": 3.7119309523182617e-06, "loss": 0.4639998, "memory(GiB)": 34.88, "step": 89205, "train_speed(iter/s)": 0.412603 }, { "acc": 0.92093134, "epoch": 2.4154550131318873, "grad_norm": 8.021519660949707, "learning_rate": 3.711390301909223e-06, "loss": 0.47403917, "memory(GiB)": 34.88, "step": 89210, "train_speed(iter/s)": 0.412604 }, { "acc": 0.93434887, "epoch": 2.415590393415103, "grad_norm": 7.822514533996582, "learning_rate": 3.710849667648814e-06, "loss": 0.35101414, "memory(GiB)": 34.88, "step": 89215, "train_speed(iter/s)": 0.412605 }, { "acc": 0.91811962, "epoch": 2.4157257736983184, "grad_norm": 8.421948432922363, "learning_rate": 3.7103090495438116e-06, "loss": 0.50855694, "memory(GiB)": 34.88, "step": 89220, "train_speed(iter/s)": 0.412606 }, { "acc": 0.94206619, "epoch": 2.4158611539815342, "grad_norm": 6.208402156829834, "learning_rate": 3.7097684476009847e-06, "loss": 0.3018178, "memory(GiB)": 34.88, "step": 89225, "train_speed(iter/s)": 0.412608 }, { "acc": 0.92161503, "epoch": 2.4159965342647496, "grad_norm": 9.986635208129883, "learning_rate": 3.7092278618271088e-06, "loss": 0.39726985, "memory(GiB)": 34.88, "step": 89230, "train_speed(iter/s)": 0.412609 }, { "acc": 0.90412197, "epoch": 2.4161319145479654, "grad_norm": 6.461872100830078, "learning_rate": 3.708687292228952e-06, "loss": 0.48613091, "memory(GiB)": 34.88, "step": 89235, "train_speed(iter/s)": 0.41261 }, { "acc": 0.92909288, "epoch": 2.4162672948311807, "grad_norm": 5.025426864624023, "learning_rate": 3.7081467388132868e-06, "loss": 0.45578556, "memory(GiB)": 34.88, "step": 89240, "train_speed(iter/s)": 0.412611 }, { "acc": 0.93880625, "epoch": 2.416402675114396, "grad_norm": 12.885276794433594, "learning_rate": 3.707606201586888e-06, "loss": 0.39801314, "memory(GiB)": 34.88, "step": 89245, "train_speed(iter/s)": 0.412612 }, { "acc": 0.92520866, "epoch": 2.416538055397612, "grad_norm": 7.432967662811279, "learning_rate": 3.7070656805565223e-06, "loss": 0.38563666, "memory(GiB)": 34.88, "step": 89250, "train_speed(iter/s)": 0.412613 }, { "acc": 0.93290796, "epoch": 2.4166734356808273, "grad_norm": 16.88930320739746, "learning_rate": 3.7065251757289654e-06, "loss": 0.41082735, "memory(GiB)": 34.88, "step": 89255, "train_speed(iter/s)": 0.412614 }, { "acc": 0.93543072, "epoch": 2.416808815964043, "grad_norm": 5.694427013397217, "learning_rate": 3.705984687110984e-06, "loss": 0.38314867, "memory(GiB)": 34.88, "step": 89260, "train_speed(iter/s)": 0.412616 }, { "acc": 0.93603964, "epoch": 2.4169441962472584, "grad_norm": 5.435322284698486, "learning_rate": 3.705444214709352e-06, "loss": 0.39544599, "memory(GiB)": 34.88, "step": 89265, "train_speed(iter/s)": 0.412617 }, { "acc": 0.929739, "epoch": 2.4170795765304742, "grad_norm": 8.790057182312012, "learning_rate": 3.704903758530838e-06, "loss": 0.373686, "memory(GiB)": 34.88, "step": 89270, "train_speed(iter/s)": 0.412618 }, { "acc": 0.92344494, "epoch": 2.4172149568136896, "grad_norm": 7.681795120239258, "learning_rate": 3.704363318582212e-06, "loss": 0.44394231, "memory(GiB)": 34.88, "step": 89275, "train_speed(iter/s)": 0.412619 }, { "acc": 0.91550789, "epoch": 2.4173503370969054, "grad_norm": 17.667882919311523, "learning_rate": 3.7038228948702453e-06, "loss": 0.48229542, "memory(GiB)": 34.88, "step": 89280, "train_speed(iter/s)": 0.41262 }, { "acc": 0.93409138, "epoch": 2.4174857173801207, "grad_norm": 5.517354965209961, "learning_rate": 3.7032824874017078e-06, "loss": 0.39468379, "memory(GiB)": 34.88, "step": 89285, "train_speed(iter/s)": 0.412621 }, { "acc": 0.89613743, "epoch": 2.4176210976633365, "grad_norm": 13.363228797912598, "learning_rate": 3.702742096183368e-06, "loss": 0.56999626, "memory(GiB)": 34.88, "step": 89290, "train_speed(iter/s)": 0.412622 }, { "acc": 0.92564163, "epoch": 2.417756477946552, "grad_norm": 7.569206237792969, "learning_rate": 3.7022017212219964e-06, "loss": 0.39434624, "memory(GiB)": 34.88, "step": 89295, "train_speed(iter/s)": 0.412623 }, { "acc": 0.92114191, "epoch": 2.4178918582297673, "grad_norm": 16.536128997802734, "learning_rate": 3.701661362524362e-06, "loss": 0.45476699, "memory(GiB)": 34.88, "step": 89300, "train_speed(iter/s)": 0.412625 }, { "acc": 0.92923603, "epoch": 2.418027238512983, "grad_norm": 5.454092502593994, "learning_rate": 3.701121020097233e-06, "loss": 0.44809446, "memory(GiB)": 34.88, "step": 89305, "train_speed(iter/s)": 0.412626 }, { "acc": 0.92110252, "epoch": 2.4181626187961984, "grad_norm": 12.691855430603027, "learning_rate": 3.70058069394738e-06, "loss": 0.48340549, "memory(GiB)": 34.88, "step": 89310, "train_speed(iter/s)": 0.412627 }, { "acc": 0.93550701, "epoch": 2.4182979990794142, "grad_norm": 17.594438552856445, "learning_rate": 3.7000403840815706e-06, "loss": 0.34893169, "memory(GiB)": 34.88, "step": 89315, "train_speed(iter/s)": 0.412628 }, { "acc": 0.93144913, "epoch": 2.4184333793626296, "grad_norm": 8.94347858428955, "learning_rate": 3.6995000905065727e-06, "loss": 0.38133273, "memory(GiB)": 34.88, "step": 89320, "train_speed(iter/s)": 0.412629 }, { "acc": 0.91520128, "epoch": 2.4185687596458454, "grad_norm": 24.366775512695312, "learning_rate": 3.6989598132291553e-06, "loss": 0.42375641, "memory(GiB)": 34.88, "step": 89325, "train_speed(iter/s)": 0.41263 }, { "acc": 0.91721058, "epoch": 2.4187041399290607, "grad_norm": 10.791460037231445, "learning_rate": 3.6984195522560862e-06, "loss": 0.43931379, "memory(GiB)": 34.88, "step": 89330, "train_speed(iter/s)": 0.412631 }, { "acc": 0.92072544, "epoch": 2.418839520212276, "grad_norm": 7.916556358337402, "learning_rate": 3.6978793075941333e-06, "loss": 0.48771596, "memory(GiB)": 34.88, "step": 89335, "train_speed(iter/s)": 0.412633 }, { "acc": 0.93350763, "epoch": 2.418974900495492, "grad_norm": 5.235055446624756, "learning_rate": 3.697339079250064e-06, "loss": 0.3664844, "memory(GiB)": 34.88, "step": 89340, "train_speed(iter/s)": 0.412634 }, { "acc": 0.93096781, "epoch": 2.4191102807787073, "grad_norm": 8.769858360290527, "learning_rate": 3.6967988672306464e-06, "loss": 0.35309119, "memory(GiB)": 34.88, "step": 89345, "train_speed(iter/s)": 0.412635 }, { "acc": 0.933813, "epoch": 2.419245661061923, "grad_norm": 9.839349746704102, "learning_rate": 3.6962586715426463e-06, "loss": 0.40855412, "memory(GiB)": 34.88, "step": 89350, "train_speed(iter/s)": 0.412636 }, { "acc": 0.92112045, "epoch": 2.4193810413451384, "grad_norm": 12.800579071044922, "learning_rate": 3.6957184921928328e-06, "loss": 0.47314444, "memory(GiB)": 34.88, "step": 89355, "train_speed(iter/s)": 0.412637 }, { "acc": 0.91512375, "epoch": 2.4195164216283542, "grad_norm": 8.027225494384766, "learning_rate": 3.695178329187972e-06, "loss": 0.4734498, "memory(GiB)": 34.88, "step": 89360, "train_speed(iter/s)": 0.412639 }, { "acc": 0.93910427, "epoch": 2.4196518019115696, "grad_norm": 8.668010711669922, "learning_rate": 3.6946381825348293e-06, "loss": 0.36756592, "memory(GiB)": 34.88, "step": 89365, "train_speed(iter/s)": 0.41264 }, { "acc": 0.9282053, "epoch": 2.419787182194785, "grad_norm": 9.362144470214844, "learning_rate": 3.694098052240172e-06, "loss": 0.35771174, "memory(GiB)": 34.88, "step": 89370, "train_speed(iter/s)": 0.412641 }, { "acc": 0.94527912, "epoch": 2.4199225624780007, "grad_norm": 4.696038246154785, "learning_rate": 3.6935579383107656e-06, "loss": 0.30521498, "memory(GiB)": 34.88, "step": 89375, "train_speed(iter/s)": 0.412642 }, { "acc": 0.91724386, "epoch": 2.420057942761216, "grad_norm": 9.47859001159668, "learning_rate": 3.6930178407533773e-06, "loss": 0.42959251, "memory(GiB)": 34.88, "step": 89380, "train_speed(iter/s)": 0.412643 }, { "acc": 0.95864716, "epoch": 2.420193323044432, "grad_norm": 5.24558687210083, "learning_rate": 3.6924777595747713e-06, "loss": 0.19411943, "memory(GiB)": 34.88, "step": 89385, "train_speed(iter/s)": 0.412645 }, { "acc": 0.93225822, "epoch": 2.4203287033276473, "grad_norm": 7.7527852058410645, "learning_rate": 3.6919376947817148e-06, "loss": 0.42087564, "memory(GiB)": 34.88, "step": 89390, "train_speed(iter/s)": 0.412646 }, { "acc": 0.92581148, "epoch": 2.420464083610863, "grad_norm": 3.521240711212158, "learning_rate": 3.691397646380973e-06, "loss": 0.42001863, "memory(GiB)": 34.88, "step": 89395, "train_speed(iter/s)": 0.412647 }, { "acc": 0.93551817, "epoch": 2.4205994638940784, "grad_norm": 5.817928314208984, "learning_rate": 3.6908576143793105e-06, "loss": 0.26682644, "memory(GiB)": 34.88, "step": 89400, "train_speed(iter/s)": 0.412648 }, { "acc": 0.91273499, "epoch": 2.420734844177294, "grad_norm": 10.882843017578125, "learning_rate": 3.6903175987834933e-06, "loss": 0.46708336, "memory(GiB)": 34.88, "step": 89405, "train_speed(iter/s)": 0.412649 }, { "acc": 0.91035805, "epoch": 2.4208702244605096, "grad_norm": 18.598691940307617, "learning_rate": 3.689777599600283e-06, "loss": 0.55737114, "memory(GiB)": 34.88, "step": 89410, "train_speed(iter/s)": 0.41265 }, { "acc": 0.93297787, "epoch": 2.421005604743725, "grad_norm": 4.482077121734619, "learning_rate": 3.6892376168364474e-06, "loss": 0.34568777, "memory(GiB)": 34.88, "step": 89415, "train_speed(iter/s)": 0.412652 }, { "acc": 0.94320784, "epoch": 2.4211409850269408, "grad_norm": 9.425439834594727, "learning_rate": 3.688697650498749e-06, "loss": 0.34405518, "memory(GiB)": 34.88, "step": 89420, "train_speed(iter/s)": 0.412653 }, { "acc": 0.92893944, "epoch": 2.421276365310156, "grad_norm": 4.442495822906494, "learning_rate": 3.688157700593952e-06, "loss": 0.43699408, "memory(GiB)": 34.88, "step": 89425, "train_speed(iter/s)": 0.412654 }, { "acc": 0.91774426, "epoch": 2.421411745593372, "grad_norm": 14.860824584960938, "learning_rate": 3.6876177671288234e-06, "loss": 0.48263688, "memory(GiB)": 34.88, "step": 89430, "train_speed(iter/s)": 0.412655 }, { "acc": 0.93248234, "epoch": 2.4215471258765873, "grad_norm": 6.73497200012207, "learning_rate": 3.687077850110123e-06, "loss": 0.45835743, "memory(GiB)": 34.88, "step": 89435, "train_speed(iter/s)": 0.412656 }, { "acc": 0.93177376, "epoch": 2.421682506159803, "grad_norm": 10.199644088745117, "learning_rate": 3.686537949544618e-06, "loss": 0.37384195, "memory(GiB)": 34.88, "step": 89440, "train_speed(iter/s)": 0.412658 }, { "acc": 0.93196478, "epoch": 2.4218178864430184, "grad_norm": 5.901559352874756, "learning_rate": 3.6859980654390676e-06, "loss": 0.40275497, "memory(GiB)": 34.88, "step": 89445, "train_speed(iter/s)": 0.412659 }, { "acc": 0.93015985, "epoch": 2.4219532667262342, "grad_norm": 5.75370454788208, "learning_rate": 3.6854581978002373e-06, "loss": 0.36567073, "memory(GiB)": 34.88, "step": 89450, "train_speed(iter/s)": 0.41266 }, { "acc": 0.92341194, "epoch": 2.4220886470094496, "grad_norm": 5.560887336730957, "learning_rate": 3.684918346634889e-06, "loss": 0.40434661, "memory(GiB)": 34.88, "step": 89455, "train_speed(iter/s)": 0.412661 }, { "acc": 0.93186626, "epoch": 2.422224027292665, "grad_norm": 6.790907382965088, "learning_rate": 3.6843785119497856e-06, "loss": 0.37975945, "memory(GiB)": 34.88, "step": 89460, "train_speed(iter/s)": 0.412662 }, { "acc": 0.93143911, "epoch": 2.4223594075758808, "grad_norm": 2.2467153072357178, "learning_rate": 3.6838386937516916e-06, "loss": 0.37356889, "memory(GiB)": 34.88, "step": 89465, "train_speed(iter/s)": 0.412663 }, { "acc": 0.9184948, "epoch": 2.422494787859096, "grad_norm": 6.328736782073975, "learning_rate": 3.683298892047367e-06, "loss": 0.39680536, "memory(GiB)": 34.88, "step": 89470, "train_speed(iter/s)": 0.412664 }, { "acc": 0.91770782, "epoch": 2.422630168142312, "grad_norm": 11.02974796295166, "learning_rate": 3.6827591068435747e-06, "loss": 0.51654143, "memory(GiB)": 34.88, "step": 89475, "train_speed(iter/s)": 0.412665 }, { "acc": 0.92730942, "epoch": 2.4227655484255273, "grad_norm": 8.515929222106934, "learning_rate": 3.6822193381470772e-06, "loss": 0.42681522, "memory(GiB)": 34.88, "step": 89480, "train_speed(iter/s)": 0.412667 }, { "acc": 0.92438145, "epoch": 2.422900928708743, "grad_norm": 24.601524353027344, "learning_rate": 3.6816795859646352e-06, "loss": 0.41072698, "memory(GiB)": 34.88, "step": 89485, "train_speed(iter/s)": 0.412668 }, { "acc": 0.91877537, "epoch": 2.4230363089919584, "grad_norm": 9.052451133728027, "learning_rate": 3.6811398503030092e-06, "loss": 0.40144796, "memory(GiB)": 34.88, "step": 89490, "train_speed(iter/s)": 0.412669 }, { "acc": 0.92848892, "epoch": 2.423171689275174, "grad_norm": 6.512171745300293, "learning_rate": 3.6806001311689633e-06, "loss": 0.41665068, "memory(GiB)": 34.88, "step": 89495, "train_speed(iter/s)": 0.41267 }, { "acc": 0.9316288, "epoch": 2.4233070695583896, "grad_norm": 8.90905475616455, "learning_rate": 3.6800604285692554e-06, "loss": 0.32858906, "memory(GiB)": 34.88, "step": 89500, "train_speed(iter/s)": 0.412671 }, { "acc": 0.92960987, "epoch": 2.423442449841605, "grad_norm": 6.644044399261475, "learning_rate": 3.679520742510647e-06, "loss": 0.38673408, "memory(GiB)": 34.88, "step": 89505, "train_speed(iter/s)": 0.412672 }, { "acc": 0.93749809, "epoch": 2.4235778301248208, "grad_norm": 7.668903350830078, "learning_rate": 3.6789810729999014e-06, "loss": 0.37304039, "memory(GiB)": 34.88, "step": 89510, "train_speed(iter/s)": 0.412674 }, { "acc": 0.95343189, "epoch": 2.423713210408036, "grad_norm": 6.163556098937988, "learning_rate": 3.6784414200437756e-06, "loss": 0.25302646, "memory(GiB)": 34.88, "step": 89515, "train_speed(iter/s)": 0.412675 }, { "acc": 0.92727947, "epoch": 2.423848590691252, "grad_norm": 8.942948341369629, "learning_rate": 3.6779017836490333e-06, "loss": 0.40199242, "memory(GiB)": 34.88, "step": 89520, "train_speed(iter/s)": 0.412675 }, { "acc": 0.92637291, "epoch": 2.4239839709744673, "grad_norm": 7.72473669052124, "learning_rate": 3.6773621638224315e-06, "loss": 0.36853366, "memory(GiB)": 34.88, "step": 89525, "train_speed(iter/s)": 0.412677 }, { "acc": 0.91650448, "epoch": 2.4241193512576826, "grad_norm": 8.596553802490234, "learning_rate": 3.676822560570732e-06, "loss": 0.49833765, "memory(GiB)": 34.88, "step": 89530, "train_speed(iter/s)": 0.412678 }, { "acc": 0.92222958, "epoch": 2.4242547315408984, "grad_norm": 6.189203262329102, "learning_rate": 3.676282973900692e-06, "loss": 0.38407764, "memory(GiB)": 34.88, "step": 89535, "train_speed(iter/s)": 0.412679 }, { "acc": 0.9445343, "epoch": 2.424390111824114, "grad_norm": 8.00263786315918, "learning_rate": 3.675743403819072e-06, "loss": 0.31043305, "memory(GiB)": 34.88, "step": 89540, "train_speed(iter/s)": 0.41268 }, { "acc": 0.90577526, "epoch": 2.4245254921073296, "grad_norm": 20.431629180908203, "learning_rate": 3.675203850332633e-06, "loss": 0.55947318, "memory(GiB)": 34.88, "step": 89545, "train_speed(iter/s)": 0.412682 }, { "acc": 0.94606285, "epoch": 2.424660872390545, "grad_norm": 5.442381381988525, "learning_rate": 3.67466431344813e-06, "loss": 0.2661531, "memory(GiB)": 34.88, "step": 89550, "train_speed(iter/s)": 0.412683 }, { "acc": 0.93181744, "epoch": 2.4247962526737608, "grad_norm": 3.5240702629089355, "learning_rate": 3.6741247931723273e-06, "loss": 0.36942084, "memory(GiB)": 34.88, "step": 89555, "train_speed(iter/s)": 0.412684 }, { "acc": 0.9390317, "epoch": 2.424931632956976, "grad_norm": 5.368627548217773, "learning_rate": 3.673585289511979e-06, "loss": 0.32046008, "memory(GiB)": 34.88, "step": 89560, "train_speed(iter/s)": 0.412685 }, { "acc": 0.91798353, "epoch": 2.4250670132401915, "grad_norm": 7.919571876525879, "learning_rate": 3.6730458024738465e-06, "loss": 0.4323307, "memory(GiB)": 34.88, "step": 89565, "train_speed(iter/s)": 0.412686 }, { "acc": 0.91664019, "epoch": 2.4252023935234073, "grad_norm": 5.8529229164123535, "learning_rate": 3.6725063320646838e-06, "loss": 0.48455977, "memory(GiB)": 34.88, "step": 89570, "train_speed(iter/s)": 0.412687 }, { "acc": 0.92344627, "epoch": 2.4253377738066226, "grad_norm": 7.4006242752075195, "learning_rate": 3.671966878291252e-06, "loss": 0.42721086, "memory(GiB)": 34.88, "step": 89575, "train_speed(iter/s)": 0.412688 }, { "acc": 0.91471558, "epoch": 2.4254731540898384, "grad_norm": 28.800434112548828, "learning_rate": 3.6714274411603086e-06, "loss": 0.43846149, "memory(GiB)": 34.88, "step": 89580, "train_speed(iter/s)": 0.412689 }, { "acc": 0.93220577, "epoch": 2.425608534373054, "grad_norm": 5.795347213745117, "learning_rate": 3.67088802067861e-06, "loss": 0.34809043, "memory(GiB)": 34.88, "step": 89585, "train_speed(iter/s)": 0.41269 }, { "acc": 0.90469322, "epoch": 2.4257439146562696, "grad_norm": 8.659513473510742, "learning_rate": 3.6703486168529147e-06, "loss": 0.50957494, "memory(GiB)": 34.88, "step": 89590, "train_speed(iter/s)": 0.412692 }, { "acc": 0.9101263, "epoch": 2.425879294939485, "grad_norm": 5.573963165283203, "learning_rate": 3.6698092296899786e-06, "loss": 0.48763413, "memory(GiB)": 34.88, "step": 89595, "train_speed(iter/s)": 0.412693 }, { "acc": 0.93140764, "epoch": 2.4260146752227008, "grad_norm": 7.723804950714111, "learning_rate": 3.6692698591965597e-06, "loss": 0.36820624, "memory(GiB)": 34.88, "step": 89600, "train_speed(iter/s)": 0.412694 }, { "acc": 0.9182765, "epoch": 2.426150055505916, "grad_norm": 7.974100112915039, "learning_rate": 3.668730505379414e-06, "loss": 0.48039055, "memory(GiB)": 34.88, "step": 89605, "train_speed(iter/s)": 0.412695 }, { "acc": 0.93251896, "epoch": 2.426285435789132, "grad_norm": 8.606022834777832, "learning_rate": 3.6681911682452987e-06, "loss": 0.3698523, "memory(GiB)": 34.88, "step": 89610, "train_speed(iter/s)": 0.412697 }, { "acc": 0.9271225, "epoch": 2.4264208160723473, "grad_norm": 11.295682907104492, "learning_rate": 3.667651847800969e-06, "loss": 0.42027278, "memory(GiB)": 34.88, "step": 89615, "train_speed(iter/s)": 0.412698 }, { "acc": 0.93116474, "epoch": 2.4265561963555626, "grad_norm": 7.183095932006836, "learning_rate": 3.667112544053181e-06, "loss": 0.37709208, "memory(GiB)": 34.88, "step": 89620, "train_speed(iter/s)": 0.412699 }, { "acc": 0.92520943, "epoch": 2.4266915766387784, "grad_norm": 11.292325973510742, "learning_rate": 3.666573257008692e-06, "loss": 0.40503893, "memory(GiB)": 34.88, "step": 89625, "train_speed(iter/s)": 0.4127 }, { "acc": 0.93289499, "epoch": 2.426826956921994, "grad_norm": 8.393000602722168, "learning_rate": 3.6660339866742556e-06, "loss": 0.44203091, "memory(GiB)": 34.88, "step": 89630, "train_speed(iter/s)": 0.412701 }, { "acc": 0.90862141, "epoch": 2.4269623372052096, "grad_norm": 3.311802625656128, "learning_rate": 3.6654947330566294e-06, "loss": 0.49425726, "memory(GiB)": 34.88, "step": 89635, "train_speed(iter/s)": 0.412703 }, { "acc": 0.91776066, "epoch": 2.427097717488425, "grad_norm": 6.538247585296631, "learning_rate": 3.6649554961625655e-06, "loss": 0.4235177, "memory(GiB)": 34.88, "step": 89640, "train_speed(iter/s)": 0.412704 }, { "acc": 0.93813505, "epoch": 2.4272330977716408, "grad_norm": 14.987137794494629, "learning_rate": 3.6644162759988232e-06, "loss": 0.36594443, "memory(GiB)": 34.88, "step": 89645, "train_speed(iter/s)": 0.412705 }, { "acc": 0.92060108, "epoch": 2.427368478054856, "grad_norm": 10.102340698242188, "learning_rate": 3.663877072572154e-06, "loss": 0.50747538, "memory(GiB)": 34.88, "step": 89650, "train_speed(iter/s)": 0.412706 }, { "acc": 0.94305449, "epoch": 2.4275038583380715, "grad_norm": 5.80803918838501, "learning_rate": 3.6633378858893133e-06, "loss": 0.33196154, "memory(GiB)": 34.88, "step": 89655, "train_speed(iter/s)": 0.412707 }, { "acc": 0.91923409, "epoch": 2.4276392386212873, "grad_norm": 9.034849166870117, "learning_rate": 3.6627987159570553e-06, "loss": 0.46982045, "memory(GiB)": 34.88, "step": 89660, "train_speed(iter/s)": 0.412708 }, { "acc": 0.94275007, "epoch": 2.4277746189045026, "grad_norm": 4.930065631866455, "learning_rate": 3.6622595627821343e-06, "loss": 0.29090643, "memory(GiB)": 34.88, "step": 89665, "train_speed(iter/s)": 0.412709 }, { "acc": 0.91642809, "epoch": 2.4279099991877184, "grad_norm": 4.746387481689453, "learning_rate": 3.6617204263713056e-06, "loss": 0.48472219, "memory(GiB)": 34.88, "step": 89670, "train_speed(iter/s)": 0.41271 }, { "acc": 0.94436169, "epoch": 2.428045379470934, "grad_norm": 5.416011333465576, "learning_rate": 3.66118130673132e-06, "loss": 0.33933342, "memory(GiB)": 34.88, "step": 89675, "train_speed(iter/s)": 0.412711 }, { "acc": 0.93462658, "epoch": 2.4281807597541496, "grad_norm": 6.353601932525635, "learning_rate": 3.6606422038689347e-06, "loss": 0.34079177, "memory(GiB)": 34.88, "step": 89680, "train_speed(iter/s)": 0.412713 }, { "acc": 0.91829224, "epoch": 2.428316140037365, "grad_norm": 6.104523658752441, "learning_rate": 3.6601031177908997e-06, "loss": 0.43015127, "memory(GiB)": 34.88, "step": 89685, "train_speed(iter/s)": 0.412714 }, { "acc": 0.91105289, "epoch": 2.4284515203205803, "grad_norm": 7.604935646057129, "learning_rate": 3.6595640485039703e-06, "loss": 0.45796947, "memory(GiB)": 34.88, "step": 89690, "train_speed(iter/s)": 0.412715 }, { "acc": 0.94317522, "epoch": 2.428586900603796, "grad_norm": 11.935592651367188, "learning_rate": 3.659024996014899e-06, "loss": 0.32618871, "memory(GiB)": 34.88, "step": 89695, "train_speed(iter/s)": 0.412716 }, { "acc": 0.92651405, "epoch": 2.4287222808870115, "grad_norm": 7.090348243713379, "learning_rate": 3.658485960330437e-06, "loss": 0.38546777, "memory(GiB)": 34.88, "step": 89700, "train_speed(iter/s)": 0.412717 }, { "acc": 0.92795658, "epoch": 2.4288576611702273, "grad_norm": 7.509278774261475, "learning_rate": 3.6579469414573386e-06, "loss": 0.45621967, "memory(GiB)": 34.88, "step": 89705, "train_speed(iter/s)": 0.412719 }, { "acc": 0.92653961, "epoch": 2.4289930414534426, "grad_norm": 9.500497817993164, "learning_rate": 3.6574079394023546e-06, "loss": 0.4216495, "memory(GiB)": 34.88, "step": 89710, "train_speed(iter/s)": 0.41272 }, { "acc": 0.93497639, "epoch": 2.4291284217366584, "grad_norm": 10.005599021911621, "learning_rate": 3.6568689541722386e-06, "loss": 0.35846696, "memory(GiB)": 34.88, "step": 89715, "train_speed(iter/s)": 0.412721 }, { "acc": 0.93723249, "epoch": 2.429263802019874, "grad_norm": 12.50369930267334, "learning_rate": 3.656329985773741e-06, "loss": 0.35643277, "memory(GiB)": 34.88, "step": 89720, "train_speed(iter/s)": 0.412722 }, { "acc": 0.93087749, "epoch": 2.429399182303089, "grad_norm": 8.513294219970703, "learning_rate": 3.6557910342136138e-06, "loss": 0.35875239, "memory(GiB)": 34.88, "step": 89725, "train_speed(iter/s)": 0.412723 }, { "acc": 0.93635101, "epoch": 2.429534562586305, "grad_norm": 5.793500900268555, "learning_rate": 3.65525209949861e-06, "loss": 0.3533947, "memory(GiB)": 34.88, "step": 89730, "train_speed(iter/s)": 0.412724 }, { "acc": 0.92985363, "epoch": 2.4296699428695203, "grad_norm": 6.445102214813232, "learning_rate": 3.65471318163548e-06, "loss": 0.35716367, "memory(GiB)": 34.88, "step": 89735, "train_speed(iter/s)": 0.412726 }, { "acc": 0.93930035, "epoch": 2.429805323152736, "grad_norm": 5.2378830909729, "learning_rate": 3.654174280630973e-06, "loss": 0.29585657, "memory(GiB)": 34.88, "step": 89740, "train_speed(iter/s)": 0.412727 }, { "acc": 0.92122126, "epoch": 2.4299407034359515, "grad_norm": 6.884607791900635, "learning_rate": 3.653635396491842e-06, "loss": 0.46366315, "memory(GiB)": 34.88, "step": 89745, "train_speed(iter/s)": 0.412728 }, { "acc": 0.91261797, "epoch": 2.4300760837191673, "grad_norm": 53.84190368652344, "learning_rate": 3.653096529224837e-06, "loss": 0.42549829, "memory(GiB)": 34.88, "step": 89750, "train_speed(iter/s)": 0.412729 }, { "acc": 0.92887135, "epoch": 2.4302114640023826, "grad_norm": 8.318546295166016, "learning_rate": 3.6525576788367063e-06, "loss": 0.42616205, "memory(GiB)": 34.88, "step": 89755, "train_speed(iter/s)": 0.41273 }, { "acc": 0.92660713, "epoch": 2.430346844285598, "grad_norm": 5.9993815422058105, "learning_rate": 3.6520188453342036e-06, "loss": 0.35698938, "memory(GiB)": 34.88, "step": 89760, "train_speed(iter/s)": 0.412731 }, { "acc": 0.93317986, "epoch": 2.430482224568814, "grad_norm": 6.361743450164795, "learning_rate": 3.651480028724076e-06, "loss": 0.38534448, "memory(GiB)": 34.88, "step": 89765, "train_speed(iter/s)": 0.412732 }, { "acc": 0.92059832, "epoch": 2.430617604852029, "grad_norm": 9.307856559753418, "learning_rate": 3.650941229013074e-06, "loss": 0.44674015, "memory(GiB)": 34.88, "step": 89770, "train_speed(iter/s)": 0.412733 }, { "acc": 0.92614326, "epoch": 2.430752985135245, "grad_norm": 8.933309555053711, "learning_rate": 3.650402446207951e-06, "loss": 0.41251488, "memory(GiB)": 34.88, "step": 89775, "train_speed(iter/s)": 0.412734 }, { "acc": 0.93454809, "epoch": 2.4308883654184603, "grad_norm": 5.9917378425598145, "learning_rate": 3.6498636803154486e-06, "loss": 0.3904963, "memory(GiB)": 34.88, "step": 89780, "train_speed(iter/s)": 0.412735 }, { "acc": 0.92045364, "epoch": 2.431023745701676, "grad_norm": 6.267750263214111, "learning_rate": 3.6493249313423223e-06, "loss": 0.45252829, "memory(GiB)": 34.88, "step": 89785, "train_speed(iter/s)": 0.412736 }, { "acc": 0.93926086, "epoch": 2.4311591259848915, "grad_norm": 10.86823558807373, "learning_rate": 3.648786199295318e-06, "loss": 0.40771461, "memory(GiB)": 34.88, "step": 89790, "train_speed(iter/s)": 0.412737 }, { "acc": 0.927841, "epoch": 2.4312945062681073, "grad_norm": 7.256955623626709, "learning_rate": 3.6482474841811854e-06, "loss": 0.39310191, "memory(GiB)": 34.88, "step": 89795, "train_speed(iter/s)": 0.412739 }, { "acc": 0.91393032, "epoch": 2.4314298865513226, "grad_norm": 12.417004585266113, "learning_rate": 3.6477087860066722e-06, "loss": 0.50316105, "memory(GiB)": 34.88, "step": 89800, "train_speed(iter/s)": 0.41274 }, { "acc": 0.93661032, "epoch": 2.4315652668345384, "grad_norm": 9.074899673461914, "learning_rate": 3.6471701047785274e-06, "loss": 0.35153871, "memory(GiB)": 34.88, "step": 89805, "train_speed(iter/s)": 0.412741 }, { "acc": 0.91533318, "epoch": 2.431700647117754, "grad_norm": 7.944098949432373, "learning_rate": 3.6466314405034993e-06, "loss": 0.44492226, "memory(GiB)": 34.88, "step": 89810, "train_speed(iter/s)": 0.412742 }, { "acc": 0.93343172, "epoch": 2.431836027400969, "grad_norm": 5.872401714324951, "learning_rate": 3.6460927931883354e-06, "loss": 0.35628359, "memory(GiB)": 34.88, "step": 89815, "train_speed(iter/s)": 0.412743 }, { "acc": 0.92537212, "epoch": 2.431971407684185, "grad_norm": 10.837718963623047, "learning_rate": 3.6455541628397834e-06, "loss": 0.40094967, "memory(GiB)": 34.88, "step": 89820, "train_speed(iter/s)": 0.412744 }, { "acc": 0.9374526, "epoch": 2.4321067879674003, "grad_norm": 8.975034713745117, "learning_rate": 3.6450155494645893e-06, "loss": 0.32758322, "memory(GiB)": 34.88, "step": 89825, "train_speed(iter/s)": 0.412746 }, { "acc": 0.9322464, "epoch": 2.432242168250616, "grad_norm": 10.126913070678711, "learning_rate": 3.644476953069503e-06, "loss": 0.41837606, "memory(GiB)": 34.88, "step": 89830, "train_speed(iter/s)": 0.412747 }, { "acc": 0.92799006, "epoch": 2.4323775485338315, "grad_norm": 16.60479736328125, "learning_rate": 3.643938373661268e-06, "loss": 0.44103847, "memory(GiB)": 34.88, "step": 89835, "train_speed(iter/s)": 0.412748 }, { "acc": 0.92927914, "epoch": 2.4325129288170473, "grad_norm": 11.587271690368652, "learning_rate": 3.643399811246634e-06, "loss": 0.38440039, "memory(GiB)": 34.88, "step": 89840, "train_speed(iter/s)": 0.412749 }, { "acc": 0.9472249, "epoch": 2.4326483091002626, "grad_norm": 6.876954078674316, "learning_rate": 3.6428612658323465e-06, "loss": 0.23414993, "memory(GiB)": 34.88, "step": 89845, "train_speed(iter/s)": 0.41275 }, { "acc": 0.94048233, "epoch": 2.432783689383478, "grad_norm": 11.907576560974121, "learning_rate": 3.6423227374251517e-06, "loss": 0.31014705, "memory(GiB)": 34.88, "step": 89850, "train_speed(iter/s)": 0.412751 }, { "acc": 0.93048277, "epoch": 2.432919069666694, "grad_norm": 14.435059547424316, "learning_rate": 3.6417842260317965e-06, "loss": 0.40563359, "memory(GiB)": 34.88, "step": 89855, "train_speed(iter/s)": 0.412753 }, { "acc": 0.9197073, "epoch": 2.433054449949909, "grad_norm": 8.62008285522461, "learning_rate": 3.6412457316590267e-06, "loss": 0.4906951, "memory(GiB)": 34.88, "step": 89860, "train_speed(iter/s)": 0.412754 }, { "acc": 0.93104248, "epoch": 2.433189830233125, "grad_norm": 5.479617595672607, "learning_rate": 3.6407072543135873e-06, "loss": 0.3486932, "memory(GiB)": 34.88, "step": 89865, "train_speed(iter/s)": 0.412755 }, { "acc": 0.92591124, "epoch": 2.4333252105163403, "grad_norm": 8.198338508605957, "learning_rate": 3.640168794002223e-06, "loss": 0.43724966, "memory(GiB)": 34.88, "step": 89870, "train_speed(iter/s)": 0.412756 }, { "acc": 0.92719793, "epoch": 2.433460590799556, "grad_norm": 6.838664531707764, "learning_rate": 3.63963035073168e-06, "loss": 0.39598932, "memory(GiB)": 34.88, "step": 89875, "train_speed(iter/s)": 0.412757 }, { "acc": 0.94442673, "epoch": 2.4335959710827715, "grad_norm": 5.339790344238281, "learning_rate": 3.6390919245087042e-06, "loss": 0.28846242, "memory(GiB)": 34.88, "step": 89880, "train_speed(iter/s)": 0.412758 }, { "acc": 0.91166611, "epoch": 2.433731351365987, "grad_norm": 10.192429542541504, "learning_rate": 3.638553515340039e-06, "loss": 0.5334868, "memory(GiB)": 34.88, "step": 89885, "train_speed(iter/s)": 0.412759 }, { "acc": 0.92069359, "epoch": 2.4338667316492026, "grad_norm": 6.513328552246094, "learning_rate": 3.6380151232324314e-06, "loss": 0.41879311, "memory(GiB)": 34.88, "step": 89890, "train_speed(iter/s)": 0.41276 }, { "acc": 0.91406679, "epoch": 2.434002111932418, "grad_norm": 7.423910140991211, "learning_rate": 3.637476748192622e-06, "loss": 0.43973322, "memory(GiB)": 34.88, "step": 89895, "train_speed(iter/s)": 0.412761 }, { "acc": 0.9326335, "epoch": 2.434137492215634, "grad_norm": 5.027122974395752, "learning_rate": 3.63693839022736e-06, "loss": 0.37169664, "memory(GiB)": 34.88, "step": 89900, "train_speed(iter/s)": 0.412762 }, { "acc": 0.92285786, "epoch": 2.434272872498849, "grad_norm": 13.09925651550293, "learning_rate": 3.6364000493433843e-06, "loss": 0.47807565, "memory(GiB)": 34.88, "step": 89905, "train_speed(iter/s)": 0.412764 }, { "acc": 0.92358875, "epoch": 2.434408252782065, "grad_norm": 9.953764915466309, "learning_rate": 3.635861725547441e-06, "loss": 0.37915797, "memory(GiB)": 34.88, "step": 89910, "train_speed(iter/s)": 0.412765 }, { "acc": 0.92954807, "epoch": 2.4345436330652803, "grad_norm": 10.648492813110352, "learning_rate": 3.635323418846274e-06, "loss": 0.32637076, "memory(GiB)": 34.88, "step": 89915, "train_speed(iter/s)": 0.412766 }, { "acc": 0.91913013, "epoch": 2.4346790133484957, "grad_norm": 10.963719367980957, "learning_rate": 3.6347851292466256e-06, "loss": 0.45836029, "memory(GiB)": 34.88, "step": 89920, "train_speed(iter/s)": 0.412767 }, { "acc": 0.94343452, "epoch": 2.4348143936317115, "grad_norm": 11.995784759521484, "learning_rate": 3.634246856755241e-06, "loss": 0.39020793, "memory(GiB)": 34.88, "step": 89925, "train_speed(iter/s)": 0.412768 }, { "acc": 0.94205198, "epoch": 2.434949773914927, "grad_norm": 14.893688201904297, "learning_rate": 3.6337086013788603e-06, "loss": 0.36324456, "memory(GiB)": 34.88, "step": 89930, "train_speed(iter/s)": 0.41277 }, { "acc": 0.93869209, "epoch": 2.4350851541981426, "grad_norm": 3.4694664478302, "learning_rate": 3.6331703631242284e-06, "loss": 0.33205643, "memory(GiB)": 34.88, "step": 89935, "train_speed(iter/s)": 0.412771 }, { "acc": 0.92231541, "epoch": 2.435220534481358, "grad_norm": 4.631935119628906, "learning_rate": 3.632632141998087e-06, "loss": 0.43542552, "memory(GiB)": 34.88, "step": 89940, "train_speed(iter/s)": 0.412772 }, { "acc": 0.92535877, "epoch": 2.435355914764574, "grad_norm": 6.544983863830566, "learning_rate": 3.632093938007179e-06, "loss": 0.41582499, "memory(GiB)": 34.88, "step": 89945, "train_speed(iter/s)": 0.412773 }, { "acc": 0.93529444, "epoch": 2.435491295047789, "grad_norm": 10.54753589630127, "learning_rate": 3.631555751158245e-06, "loss": 0.39779346, "memory(GiB)": 34.88, "step": 89950, "train_speed(iter/s)": 0.412774 }, { "acc": 0.93274384, "epoch": 2.435626675331005, "grad_norm": 7.164830207824707, "learning_rate": 3.6310175814580273e-06, "loss": 0.45370512, "memory(GiB)": 34.88, "step": 89955, "train_speed(iter/s)": 0.412775 }, { "acc": 0.92169046, "epoch": 2.4357620556142203, "grad_norm": 4.859896659851074, "learning_rate": 3.6304794289132685e-06, "loss": 0.48415079, "memory(GiB)": 34.88, "step": 89960, "train_speed(iter/s)": 0.412777 }, { "acc": 0.94246464, "epoch": 2.435897435897436, "grad_norm": 9.486190795898438, "learning_rate": 3.6299412935307093e-06, "loss": 0.37242653, "memory(GiB)": 34.88, "step": 89965, "train_speed(iter/s)": 0.412778 }, { "acc": 0.9410203, "epoch": 2.4360328161806515, "grad_norm": 21.528087615966797, "learning_rate": 3.629403175317091e-06, "loss": 0.32123256, "memory(GiB)": 34.88, "step": 89970, "train_speed(iter/s)": 0.412779 }, { "acc": 0.91216908, "epoch": 2.436168196463867, "grad_norm": 7.6606974601745605, "learning_rate": 3.6288650742791544e-06, "loss": 0.49165592, "memory(GiB)": 34.88, "step": 89975, "train_speed(iter/s)": 0.41278 }, { "acc": 0.90591393, "epoch": 2.4363035767470826, "grad_norm": 11.103894233703613, "learning_rate": 3.6283269904236413e-06, "loss": 0.48588095, "memory(GiB)": 34.88, "step": 89980, "train_speed(iter/s)": 0.412781 }, { "acc": 0.90987844, "epoch": 2.436438957030298, "grad_norm": 6.031266212463379, "learning_rate": 3.6277889237572915e-06, "loss": 0.5158215, "memory(GiB)": 34.88, "step": 89985, "train_speed(iter/s)": 0.412783 }, { "acc": 0.93865824, "epoch": 2.436574337313514, "grad_norm": 13.884876251220703, "learning_rate": 3.6272508742868446e-06, "loss": 0.33903272, "memory(GiB)": 34.88, "step": 89990, "train_speed(iter/s)": 0.412784 }, { "acc": 0.93377028, "epoch": 2.436709717596729, "grad_norm": 9.468371391296387, "learning_rate": 3.626712842019042e-06, "loss": 0.34588087, "memory(GiB)": 34.88, "step": 89995, "train_speed(iter/s)": 0.412785 }, { "acc": 0.92422676, "epoch": 2.436845097879945, "grad_norm": 5.916086673736572, "learning_rate": 3.626174826960622e-06, "loss": 0.36366167, "memory(GiB)": 34.88, "step": 90000, "train_speed(iter/s)": 0.412786 }, { "epoch": 2.436845097879945, "eval_acc": 0.6181440760302847, "eval_loss": 1.1477999687194824, "eval_runtime": 1295.5444, "eval_samples_per_second": 66.617, "eval_steps_per_second": 2.083, "step": 90000 }, { "acc": 0.92426701, "epoch": 2.4369804781631603, "grad_norm": 4.855507850646973, "learning_rate": 3.6256368291183264e-06, "loss": 0.44069629, "memory(GiB)": 34.88, "step": 90005, "train_speed(iter/s)": 0.410305 }, { "acc": 0.93176517, "epoch": 2.4371158584463757, "grad_norm": 8.386252403259277, "learning_rate": 3.6250988484988925e-06, "loss": 0.36133301, "memory(GiB)": 34.88, "step": 90010, "train_speed(iter/s)": 0.410306 }, { "acc": 0.93497105, "epoch": 2.4372512387295915, "grad_norm": 17.081815719604492, "learning_rate": 3.6245608851090617e-06, "loss": 0.3179172, "memory(GiB)": 34.88, "step": 90015, "train_speed(iter/s)": 0.410307 }, { "acc": 0.92941322, "epoch": 2.437386619012807, "grad_norm": 6.923687934875488, "learning_rate": 3.624022938955571e-06, "loss": 0.44762583, "memory(GiB)": 34.88, "step": 90020, "train_speed(iter/s)": 0.410308 }, { "acc": 0.91219769, "epoch": 2.4375219992960226, "grad_norm": 14.432945251464844, "learning_rate": 3.6234850100451614e-06, "loss": 0.55808334, "memory(GiB)": 34.88, "step": 90025, "train_speed(iter/s)": 0.41031 }, { "acc": 0.95269508, "epoch": 2.437657379579238, "grad_norm": 12.90511703491211, "learning_rate": 3.622947098384571e-06, "loss": 0.28820391, "memory(GiB)": 34.88, "step": 90030, "train_speed(iter/s)": 0.410311 }, { "acc": 0.93571911, "epoch": 2.437792759862454, "grad_norm": 4.242061138153076, "learning_rate": 3.6224092039805354e-06, "loss": 0.30295925, "memory(GiB)": 34.88, "step": 90035, "train_speed(iter/s)": 0.410312 }, { "acc": 0.90484352, "epoch": 2.437928140145669, "grad_norm": 10.021385192871094, "learning_rate": 3.621871326839797e-06, "loss": 0.47261105, "memory(GiB)": 34.88, "step": 90040, "train_speed(iter/s)": 0.410313 }, { "acc": 0.92392311, "epoch": 2.4380635204288845, "grad_norm": 12.802257537841797, "learning_rate": 3.6213334669690904e-06, "loss": 0.47115145, "memory(GiB)": 34.88, "step": 90045, "train_speed(iter/s)": 0.410315 }, { "acc": 0.92455502, "epoch": 2.4381989007121003, "grad_norm": 11.37770938873291, "learning_rate": 3.620795624375155e-06, "loss": 0.48314958, "memory(GiB)": 34.88, "step": 90050, "train_speed(iter/s)": 0.410316 }, { "acc": 0.91905766, "epoch": 2.4383342809953157, "grad_norm": 7.366897106170654, "learning_rate": 3.6202577990647275e-06, "loss": 0.44353065, "memory(GiB)": 34.88, "step": 90055, "train_speed(iter/s)": 0.410317 }, { "acc": 0.92190523, "epoch": 2.4384696612785315, "grad_norm": 6.165576457977295, "learning_rate": 3.6197199910445468e-06, "loss": 0.4589498, "memory(GiB)": 34.88, "step": 90060, "train_speed(iter/s)": 0.410318 }, { "acc": 0.92075996, "epoch": 2.438605041561747, "grad_norm": 5.883736610412598, "learning_rate": 3.619182200321348e-06, "loss": 0.49251132, "memory(GiB)": 34.88, "step": 90065, "train_speed(iter/s)": 0.41032 }, { "acc": 0.9410388, "epoch": 2.4387404218449626, "grad_norm": 8.065546035766602, "learning_rate": 3.61864442690187e-06, "loss": 0.31164546, "memory(GiB)": 34.88, "step": 90070, "train_speed(iter/s)": 0.410321 }, { "acc": 0.94422665, "epoch": 2.438875802128178, "grad_norm": 5.278353214263916, "learning_rate": 3.6181066707928487e-06, "loss": 0.31222742, "memory(GiB)": 34.88, "step": 90075, "train_speed(iter/s)": 0.410322 }, { "acc": 0.94994946, "epoch": 2.4390111824113934, "grad_norm": 5.687537670135498, "learning_rate": 3.617568932001019e-06, "loss": 0.27055411, "memory(GiB)": 34.88, "step": 90080, "train_speed(iter/s)": 0.410323 }, { "acc": 0.92803802, "epoch": 2.439146562694609, "grad_norm": 8.219531059265137, "learning_rate": 3.6170312105331185e-06, "loss": 0.4389317, "memory(GiB)": 34.88, "step": 90085, "train_speed(iter/s)": 0.410324 }, { "acc": 0.92407684, "epoch": 2.4392819429778245, "grad_norm": 18.492292404174805, "learning_rate": 3.6164935063958833e-06, "loss": 0.39720235, "memory(GiB)": 34.88, "step": 90090, "train_speed(iter/s)": 0.410326 }, { "acc": 0.93542862, "epoch": 2.4394173232610403, "grad_norm": 5.08945369720459, "learning_rate": 3.615955819596049e-06, "loss": 0.38312321, "memory(GiB)": 34.88, "step": 90095, "train_speed(iter/s)": 0.410327 }, { "acc": 0.93762913, "epoch": 2.4395527035442557, "grad_norm": 11.207841873168945, "learning_rate": 3.6154181501403505e-06, "loss": 0.33232844, "memory(GiB)": 34.88, "step": 90100, "train_speed(iter/s)": 0.410328 }, { "acc": 0.94683361, "epoch": 2.4396880838274715, "grad_norm": 6.913859844207764, "learning_rate": 3.614880498035524e-06, "loss": 0.28634973, "memory(GiB)": 34.88, "step": 90105, "train_speed(iter/s)": 0.410329 }, { "acc": 0.92136021, "epoch": 2.439823464110687, "grad_norm": 5.996057510375977, "learning_rate": 3.6143428632883067e-06, "loss": 0.40969419, "memory(GiB)": 34.88, "step": 90110, "train_speed(iter/s)": 0.41033 }, { "acc": 0.92441826, "epoch": 2.4399588443939026, "grad_norm": 8.561576843261719, "learning_rate": 3.613805245905429e-06, "loss": 0.42535286, "memory(GiB)": 34.88, "step": 90115, "train_speed(iter/s)": 0.410331 }, { "acc": 0.93436537, "epoch": 2.440094224677118, "grad_norm": 9.654501914978027, "learning_rate": 3.6132676458936295e-06, "loss": 0.42114062, "memory(GiB)": 34.88, "step": 90120, "train_speed(iter/s)": 0.410333 }, { "acc": 0.93658848, "epoch": 2.440229604960334, "grad_norm": 7.857222557067871, "learning_rate": 3.6127300632596398e-06, "loss": 0.36429477, "memory(GiB)": 34.88, "step": 90125, "train_speed(iter/s)": 0.410334 }, { "acc": 0.93355598, "epoch": 2.440364985243549, "grad_norm": 9.640424728393555, "learning_rate": 3.612192498010196e-06, "loss": 0.41763916, "memory(GiB)": 34.88, "step": 90130, "train_speed(iter/s)": 0.410335 }, { "acc": 0.93620005, "epoch": 2.4405003655267645, "grad_norm": 8.021474838256836, "learning_rate": 3.6116549501520313e-06, "loss": 0.33707213, "memory(GiB)": 34.88, "step": 90135, "train_speed(iter/s)": 0.410336 }, { "acc": 0.93361034, "epoch": 2.4406357458099803, "grad_norm": 4.952791690826416, "learning_rate": 3.61111741969188e-06, "loss": 0.35397127, "memory(GiB)": 34.88, "step": 90140, "train_speed(iter/s)": 0.410338 }, { "acc": 0.93707838, "epoch": 2.4407711260931957, "grad_norm": 8.087687492370605, "learning_rate": 3.610579906636476e-06, "loss": 0.35154955, "memory(GiB)": 34.88, "step": 90145, "train_speed(iter/s)": 0.410339 }, { "acc": 0.91470842, "epoch": 2.4409065063764115, "grad_norm": 25.134166717529297, "learning_rate": 3.610042410992553e-06, "loss": 0.49889917, "memory(GiB)": 34.88, "step": 90150, "train_speed(iter/s)": 0.41034 }, { "acc": 0.94096918, "epoch": 2.441041886659627, "grad_norm": 2.4301018714904785, "learning_rate": 3.6095049327668435e-06, "loss": 0.28208048, "memory(GiB)": 34.88, "step": 90155, "train_speed(iter/s)": 0.410341 }, { "acc": 0.92647476, "epoch": 2.4411772669428426, "grad_norm": 14.444692611694336, "learning_rate": 3.6089674719660796e-06, "loss": 0.40471005, "memory(GiB)": 34.88, "step": 90160, "train_speed(iter/s)": 0.410342 }, { "acc": 0.91790476, "epoch": 2.441312647226058, "grad_norm": 7.2348127365112305, "learning_rate": 3.6084300285969957e-06, "loss": 0.45977077, "memory(GiB)": 34.88, "step": 90165, "train_speed(iter/s)": 0.410344 }, { "acc": 0.9239193, "epoch": 2.4414480275092734, "grad_norm": 5.68870735168457, "learning_rate": 3.6078926026663226e-06, "loss": 0.42044477, "memory(GiB)": 34.88, "step": 90170, "train_speed(iter/s)": 0.410345 }, { "acc": 0.91963596, "epoch": 2.441583407792489, "grad_norm": 11.300575256347656, "learning_rate": 3.607355194180793e-06, "loss": 0.508319, "memory(GiB)": 34.88, "step": 90175, "train_speed(iter/s)": 0.410346 }, { "acc": 0.91973839, "epoch": 2.4417187880757045, "grad_norm": 4.894684791564941, "learning_rate": 3.6068178031471413e-06, "loss": 0.47933078, "memory(GiB)": 34.88, "step": 90180, "train_speed(iter/s)": 0.410347 }, { "acc": 0.91605949, "epoch": 2.4418541683589203, "grad_norm": 9.867431640625, "learning_rate": 3.6062804295720972e-06, "loss": 0.44199429, "memory(GiB)": 34.88, "step": 90185, "train_speed(iter/s)": 0.410348 }, { "acc": 0.92193403, "epoch": 2.4419895486421357, "grad_norm": 6.368067741394043, "learning_rate": 3.605743073462393e-06, "loss": 0.40551443, "memory(GiB)": 34.88, "step": 90190, "train_speed(iter/s)": 0.41035 }, { "acc": 0.93303356, "epoch": 2.4421249289253515, "grad_norm": 6.222632884979248, "learning_rate": 3.6052057348247603e-06, "loss": 0.31890397, "memory(GiB)": 34.88, "step": 90195, "train_speed(iter/s)": 0.410351 }, { "acc": 0.9473011, "epoch": 2.442260309208567, "grad_norm": 5.473329067230225, "learning_rate": 3.6046684136659306e-06, "loss": 0.26155639, "memory(GiB)": 34.88, "step": 90200, "train_speed(iter/s)": 0.410352 }, { "acc": 0.93718309, "epoch": 2.442395689491782, "grad_norm": 3.233684778213501, "learning_rate": 3.6041311099926324e-06, "loss": 0.33007479, "memory(GiB)": 34.88, "step": 90205, "train_speed(iter/s)": 0.410353 }, { "acc": 0.92810631, "epoch": 2.442531069774998, "grad_norm": 14.110462188720703, "learning_rate": 3.603593823811598e-06, "loss": 0.42724338, "memory(GiB)": 34.88, "step": 90210, "train_speed(iter/s)": 0.410354 }, { "acc": 0.91891594, "epoch": 2.4426664500582134, "grad_norm": 10.80335521697998, "learning_rate": 3.6030565551295604e-06, "loss": 0.4937726, "memory(GiB)": 34.88, "step": 90215, "train_speed(iter/s)": 0.410356 }, { "acc": 0.92201519, "epoch": 2.442801830341429, "grad_norm": 3.9399971961975098, "learning_rate": 3.6025193039532467e-06, "loss": 0.35072834, "memory(GiB)": 34.88, "step": 90220, "train_speed(iter/s)": 0.410357 }, { "acc": 0.93041172, "epoch": 2.4429372106246445, "grad_norm": 5.9638519287109375, "learning_rate": 3.60198207028939e-06, "loss": 0.37971222, "memory(GiB)": 34.88, "step": 90225, "train_speed(iter/s)": 0.410358 }, { "acc": 0.93858109, "epoch": 2.4430725909078603, "grad_norm": 8.611322402954102, "learning_rate": 3.6014448541447162e-06, "loss": 0.3632098, "memory(GiB)": 34.88, "step": 90230, "train_speed(iter/s)": 0.410359 }, { "acc": 0.9394968, "epoch": 2.4432079711910757, "grad_norm": 7.584293365478516, "learning_rate": 3.6009076555259605e-06, "loss": 0.30085635, "memory(GiB)": 34.88, "step": 90235, "train_speed(iter/s)": 0.410361 }, { "acc": 0.93154964, "epoch": 2.443343351474291, "grad_norm": 4.481986999511719, "learning_rate": 3.6003704744398453e-06, "loss": 0.45484953, "memory(GiB)": 34.88, "step": 90240, "train_speed(iter/s)": 0.410362 }, { "acc": 0.92329903, "epoch": 2.443478731757507, "grad_norm": 10.778301239013672, "learning_rate": 3.599833310893106e-06, "loss": 0.42326961, "memory(GiB)": 34.88, "step": 90245, "train_speed(iter/s)": 0.410363 }, { "acc": 0.9155138, "epoch": 2.443614112040722, "grad_norm": 12.13817310333252, "learning_rate": 3.599296164892468e-06, "loss": 0.47803822, "memory(GiB)": 34.88, "step": 90250, "train_speed(iter/s)": 0.410364 }, { "acc": 0.94339981, "epoch": 2.443749492323938, "grad_norm": 5.216062545776367, "learning_rate": 3.5987590364446608e-06, "loss": 0.34104218, "memory(GiB)": 34.88, "step": 90255, "train_speed(iter/s)": 0.410365 }, { "acc": 0.93592682, "epoch": 2.4438848726071534, "grad_norm": 6.27710485458374, "learning_rate": 3.5982219255564155e-06, "loss": 0.32805305, "memory(GiB)": 34.88, "step": 90260, "train_speed(iter/s)": 0.410367 }, { "acc": 0.93523779, "epoch": 2.444020252890369, "grad_norm": 8.332247734069824, "learning_rate": 3.597684832234457e-06, "loss": 0.34287686, "memory(GiB)": 34.88, "step": 90265, "train_speed(iter/s)": 0.410368 }, { "acc": 0.92893972, "epoch": 2.4441556331735845, "grad_norm": 6.195473670959473, "learning_rate": 3.597147756485516e-06, "loss": 0.32168531, "memory(GiB)": 34.88, "step": 90270, "train_speed(iter/s)": 0.410369 }, { "acc": 0.91386833, "epoch": 2.4442910134568003, "grad_norm": 4.841277122497559, "learning_rate": 3.596610698316319e-06, "loss": 0.52065039, "memory(GiB)": 34.88, "step": 90275, "train_speed(iter/s)": 0.41037 }, { "acc": 0.89951572, "epoch": 2.4444263937400157, "grad_norm": 20.170021057128906, "learning_rate": 3.596073657733595e-06, "loss": 0.61592202, "memory(GiB)": 34.88, "step": 90280, "train_speed(iter/s)": 0.410371 }, { "acc": 0.91105747, "epoch": 2.4445617740232315, "grad_norm": 5.8329548835754395, "learning_rate": 3.595536634744071e-06, "loss": 0.50570121, "memory(GiB)": 34.88, "step": 90285, "train_speed(iter/s)": 0.410373 }, { "acc": 0.94448986, "epoch": 2.444697154306447, "grad_norm": 3.4333953857421875, "learning_rate": 3.5949996293544707e-06, "loss": 0.32150731, "memory(GiB)": 34.88, "step": 90290, "train_speed(iter/s)": 0.410374 }, { "acc": 0.92766552, "epoch": 2.444832534589662, "grad_norm": 6.562255382537842, "learning_rate": 3.594462641571528e-06, "loss": 0.37910511, "memory(GiB)": 34.88, "step": 90295, "train_speed(iter/s)": 0.410375 }, { "acc": 0.94194527, "epoch": 2.444967914872878, "grad_norm": 6.752261161804199, "learning_rate": 3.5939256714019634e-06, "loss": 0.29317126, "memory(GiB)": 34.88, "step": 90300, "train_speed(iter/s)": 0.410377 }, { "acc": 0.92698574, "epoch": 2.4451032951560934, "grad_norm": 10.455939292907715, "learning_rate": 3.5933887188525075e-06, "loss": 0.37331009, "memory(GiB)": 34.88, "step": 90305, "train_speed(iter/s)": 0.410378 }, { "acc": 0.92668991, "epoch": 2.445238675439309, "grad_norm": 4.019781112670898, "learning_rate": 3.5928517839298847e-06, "loss": 0.44943848, "memory(GiB)": 34.88, "step": 90310, "train_speed(iter/s)": 0.410379 }, { "acc": 0.94140759, "epoch": 2.4453740557225245, "grad_norm": 8.476317405700684, "learning_rate": 3.592314866640823e-06, "loss": 0.28921659, "memory(GiB)": 34.88, "step": 90315, "train_speed(iter/s)": 0.41038 }, { "acc": 0.92624645, "epoch": 2.4455094360057403, "grad_norm": 12.429166793823242, "learning_rate": 3.5917779669920467e-06, "loss": 0.37477388, "memory(GiB)": 34.88, "step": 90320, "train_speed(iter/s)": 0.410381 }, { "acc": 0.92231102, "epoch": 2.4456448162889557, "grad_norm": 7.100866794586182, "learning_rate": 3.591241084990282e-06, "loss": 0.34652598, "memory(GiB)": 34.88, "step": 90325, "train_speed(iter/s)": 0.410383 }, { "acc": 0.92611895, "epoch": 2.445780196572171, "grad_norm": 9.88509464263916, "learning_rate": 3.5907042206422537e-06, "loss": 0.41059113, "memory(GiB)": 34.88, "step": 90330, "train_speed(iter/s)": 0.410384 }, { "acc": 0.94020939, "epoch": 2.445915576855387, "grad_norm": 8.912724494934082, "learning_rate": 3.5901673739546884e-06, "loss": 0.33327363, "memory(GiB)": 34.88, "step": 90335, "train_speed(iter/s)": 0.410385 }, { "acc": 0.93077269, "epoch": 2.446050957138602, "grad_norm": 9.709943771362305, "learning_rate": 3.589630544934311e-06, "loss": 0.37047367, "memory(GiB)": 34.88, "step": 90340, "train_speed(iter/s)": 0.410387 }, { "acc": 0.92899113, "epoch": 2.446186337421818, "grad_norm": 10.72834587097168, "learning_rate": 3.5890937335878444e-06, "loss": 0.37026591, "memory(GiB)": 34.88, "step": 90345, "train_speed(iter/s)": 0.410388 }, { "acc": 0.94333715, "epoch": 2.4463217177050334, "grad_norm": 11.178058624267578, "learning_rate": 3.588556939922016e-06, "loss": 0.36695516, "memory(GiB)": 34.88, "step": 90350, "train_speed(iter/s)": 0.410389 }, { "acc": 0.91772776, "epoch": 2.446457097988249, "grad_norm": 15.565813064575195, "learning_rate": 3.588020163943548e-06, "loss": 0.48440065, "memory(GiB)": 34.88, "step": 90355, "train_speed(iter/s)": 0.41039 }, { "acc": 0.9320508, "epoch": 2.4465924782714645, "grad_norm": 13.266840934753418, "learning_rate": 3.5874834056591663e-06, "loss": 0.38134327, "memory(GiB)": 34.88, "step": 90360, "train_speed(iter/s)": 0.410392 }, { "acc": 0.92116032, "epoch": 2.44672785855468, "grad_norm": 6.123811721801758, "learning_rate": 3.586946665075594e-06, "loss": 0.44631963, "memory(GiB)": 34.88, "step": 90365, "train_speed(iter/s)": 0.410393 }, { "acc": 0.92980223, "epoch": 2.4468632388378957, "grad_norm": 23.115835189819336, "learning_rate": 3.586409942199554e-06, "loss": 0.34510832, "memory(GiB)": 34.88, "step": 90370, "train_speed(iter/s)": 0.410394 }, { "acc": 0.94249887, "epoch": 2.446998619121111, "grad_norm": 3.8533987998962402, "learning_rate": 3.585873237037772e-06, "loss": 0.26038601, "memory(GiB)": 34.88, "step": 90375, "train_speed(iter/s)": 0.410395 }, { "acc": 0.93947334, "epoch": 2.447133999404327, "grad_norm": 6.136346817016602, "learning_rate": 3.585336549596968e-06, "loss": 0.34302907, "memory(GiB)": 34.88, "step": 90380, "train_speed(iter/s)": 0.410397 }, { "acc": 0.92635155, "epoch": 2.447269379687542, "grad_norm": 6.3827714920043945, "learning_rate": 3.584799879883869e-06, "loss": 0.36316242, "memory(GiB)": 34.88, "step": 90385, "train_speed(iter/s)": 0.410398 }, { "acc": 0.91570473, "epoch": 2.447404759970758, "grad_norm": 8.444169044494629, "learning_rate": 3.5842632279051943e-06, "loss": 0.5139535, "memory(GiB)": 34.88, "step": 90390, "train_speed(iter/s)": 0.410399 }, { "acc": 0.93994846, "epoch": 2.4475401402539734, "grad_norm": 6.084094524383545, "learning_rate": 3.583726593667669e-06, "loss": 0.32041798, "memory(GiB)": 34.88, "step": 90395, "train_speed(iter/s)": 0.4104 }, { "acc": 0.92639685, "epoch": 2.4476755205371887, "grad_norm": 8.17023754119873, "learning_rate": 3.583189977178014e-06, "loss": 0.39839873, "memory(GiB)": 34.88, "step": 90400, "train_speed(iter/s)": 0.410401 }, { "acc": 0.93768921, "epoch": 2.4478109008204045, "grad_norm": 8.92282485961914, "learning_rate": 3.582653378442952e-06, "loss": 0.3164402, "memory(GiB)": 34.88, "step": 90405, "train_speed(iter/s)": 0.410403 }, { "acc": 0.92555237, "epoch": 2.44794628110362, "grad_norm": 11.174205780029297, "learning_rate": 3.5821167974692065e-06, "loss": 0.42890596, "memory(GiB)": 34.88, "step": 90410, "train_speed(iter/s)": 0.410404 }, { "acc": 0.92792978, "epoch": 2.4480816613868357, "grad_norm": 9.063485145568848, "learning_rate": 3.581580234263496e-06, "loss": 0.46154132, "memory(GiB)": 34.88, "step": 90415, "train_speed(iter/s)": 0.410405 }, { "acc": 0.92659264, "epoch": 2.448217041670051, "grad_norm": 7.941083908081055, "learning_rate": 3.5810436888325446e-06, "loss": 0.36893721, "memory(GiB)": 34.88, "step": 90420, "train_speed(iter/s)": 0.410406 }, { "acc": 0.91369801, "epoch": 2.448352421953267, "grad_norm": 7.425836563110352, "learning_rate": 3.580507161183072e-06, "loss": 0.48088846, "memory(GiB)": 34.88, "step": 90425, "train_speed(iter/s)": 0.410408 }, { "acc": 0.92707796, "epoch": 2.448487802236482, "grad_norm": 7.200060844421387, "learning_rate": 3.579970651321801e-06, "loss": 0.42171564, "memory(GiB)": 34.88, "step": 90430, "train_speed(iter/s)": 0.410409 }, { "acc": 0.93674545, "epoch": 2.448623182519698, "grad_norm": 7.171557426452637, "learning_rate": 3.57943415925545e-06, "loss": 0.35900135, "memory(GiB)": 34.88, "step": 90435, "train_speed(iter/s)": 0.41041 }, { "acc": 0.92586155, "epoch": 2.4487585628029134, "grad_norm": 8.467283248901367, "learning_rate": 3.578897684990742e-06, "loss": 0.45768852, "memory(GiB)": 34.88, "step": 90440, "train_speed(iter/s)": 0.410411 }, { "acc": 0.92477579, "epoch": 2.448893943086129, "grad_norm": 8.230910301208496, "learning_rate": 3.578361228534397e-06, "loss": 0.46032004, "memory(GiB)": 34.88, "step": 90445, "train_speed(iter/s)": 0.410413 }, { "acc": 0.91170206, "epoch": 2.4490293233693445, "grad_norm": 6.430938720703125, "learning_rate": 3.577824789893134e-06, "loss": 0.54297242, "memory(GiB)": 34.88, "step": 90450, "train_speed(iter/s)": 0.410414 }, { "acc": 0.95569839, "epoch": 2.44916470365256, "grad_norm": 4.725410461425781, "learning_rate": 3.5772883690736737e-06, "loss": 0.22986963, "memory(GiB)": 34.88, "step": 90455, "train_speed(iter/s)": 0.410415 }, { "acc": 0.91409216, "epoch": 2.4493000839357757, "grad_norm": 13.044299125671387, "learning_rate": 3.576751966082734e-06, "loss": 0.5086967, "memory(GiB)": 34.88, "step": 90460, "train_speed(iter/s)": 0.410416 }, { "acc": 0.91553135, "epoch": 2.449435464218991, "grad_norm": 7.672000408172607, "learning_rate": 3.576215580927039e-06, "loss": 0.48579693, "memory(GiB)": 34.88, "step": 90465, "train_speed(iter/s)": 0.410418 }, { "acc": 0.93274002, "epoch": 2.449570844502207, "grad_norm": 4.685550689697266, "learning_rate": 3.575679213613303e-06, "loss": 0.39108155, "memory(GiB)": 34.88, "step": 90470, "train_speed(iter/s)": 0.410419 }, { "acc": 0.94060068, "epoch": 2.449706224785422, "grad_norm": 12.319270133972168, "learning_rate": 3.5751428641482472e-06, "loss": 0.36408052, "memory(GiB)": 34.88, "step": 90475, "train_speed(iter/s)": 0.41042 }, { "acc": 0.93270988, "epoch": 2.449841605068638, "grad_norm": 6.0697340965271, "learning_rate": 3.574606532538591e-06, "loss": 0.39284911, "memory(GiB)": 34.88, "step": 90480, "train_speed(iter/s)": 0.410421 }, { "acc": 0.90792351, "epoch": 2.4499769853518534, "grad_norm": 13.204452514648438, "learning_rate": 3.574070218791052e-06, "loss": 0.49238276, "memory(GiB)": 34.88, "step": 90485, "train_speed(iter/s)": 0.410423 }, { "acc": 0.93377247, "epoch": 2.4501123656350687, "grad_norm": 3.863513231277466, "learning_rate": 3.5735339229123504e-06, "loss": 0.32148862, "memory(GiB)": 34.88, "step": 90490, "train_speed(iter/s)": 0.410424 }, { "acc": 0.92790871, "epoch": 2.4502477459182845, "grad_norm": 10.547056198120117, "learning_rate": 3.5729976449092008e-06, "loss": 0.43078346, "memory(GiB)": 34.88, "step": 90495, "train_speed(iter/s)": 0.410425 }, { "acc": 0.910748, "epoch": 2.4503831262015, "grad_norm": 12.211722373962402, "learning_rate": 3.572461384788325e-06, "loss": 0.53134956, "memory(GiB)": 34.88, "step": 90500, "train_speed(iter/s)": 0.410426 }, { "acc": 0.92443924, "epoch": 2.4505185064847157, "grad_norm": 10.724225044250488, "learning_rate": 3.5719251425564375e-06, "loss": 0.44485321, "memory(GiB)": 34.88, "step": 90505, "train_speed(iter/s)": 0.410427 }, { "acc": 0.9351944, "epoch": 2.450653886767931, "grad_norm": 4.104083061218262, "learning_rate": 3.571388918220257e-06, "loss": 0.38186965, "memory(GiB)": 34.88, "step": 90510, "train_speed(iter/s)": 0.410429 }, { "acc": 0.93124762, "epoch": 2.450789267051147, "grad_norm": 7.329432010650635, "learning_rate": 3.5708527117865023e-06, "loss": 0.45136676, "memory(GiB)": 34.88, "step": 90515, "train_speed(iter/s)": 0.41043 }, { "acc": 0.92052479, "epoch": 2.450924647334362, "grad_norm": 6.025228977203369, "learning_rate": 3.570316523261888e-06, "loss": 0.36653361, "memory(GiB)": 34.88, "step": 90520, "train_speed(iter/s)": 0.410431 }, { "acc": 0.92539253, "epoch": 2.4510600276175776, "grad_norm": 9.009380340576172, "learning_rate": 3.569780352653133e-06, "loss": 0.43807545, "memory(GiB)": 34.88, "step": 90525, "train_speed(iter/s)": 0.410432 }, { "acc": 0.94072609, "epoch": 2.4511954079007934, "grad_norm": 8.124451637268066, "learning_rate": 3.5692441999669537e-06, "loss": 0.25496533, "memory(GiB)": 34.88, "step": 90530, "train_speed(iter/s)": 0.410433 }, { "acc": 0.93732824, "epoch": 2.4513307881840087, "grad_norm": 8.725078582763672, "learning_rate": 3.5687080652100648e-06, "loss": 0.33076622, "memory(GiB)": 34.88, "step": 90535, "train_speed(iter/s)": 0.410434 }, { "acc": 0.91783619, "epoch": 2.4514661684672245, "grad_norm": 17.288677215576172, "learning_rate": 3.5681719483891823e-06, "loss": 0.52309561, "memory(GiB)": 34.88, "step": 90540, "train_speed(iter/s)": 0.410436 }, { "acc": 0.94028616, "epoch": 2.45160154875044, "grad_norm": 5.203907012939453, "learning_rate": 3.5676358495110254e-06, "loss": 0.34309549, "memory(GiB)": 34.88, "step": 90545, "train_speed(iter/s)": 0.410437 }, { "acc": 0.92073536, "epoch": 2.4517369290336557, "grad_norm": 12.948066711425781, "learning_rate": 3.5670997685823056e-06, "loss": 0.49022198, "memory(GiB)": 34.88, "step": 90550, "train_speed(iter/s)": 0.410438 }, { "acc": 0.92945261, "epoch": 2.451872309316871, "grad_norm": 13.163853645324707, "learning_rate": 3.5665637056097394e-06, "loss": 0.42912455, "memory(GiB)": 34.88, "step": 90555, "train_speed(iter/s)": 0.41044 }, { "acc": 0.94166622, "epoch": 2.4520076896000864, "grad_norm": 14.460972785949707, "learning_rate": 3.566027660600046e-06, "loss": 0.31027455, "memory(GiB)": 34.88, "step": 90560, "train_speed(iter/s)": 0.410441 }, { "acc": 0.91168928, "epoch": 2.452143069883302, "grad_norm": 8.089995384216309, "learning_rate": 3.565491633559935e-06, "loss": 0.48867517, "memory(GiB)": 34.88, "step": 90565, "train_speed(iter/s)": 0.410442 }, { "acc": 0.92372875, "epoch": 2.4522784501665176, "grad_norm": 6.827120304107666, "learning_rate": 3.564955624496126e-06, "loss": 0.42545161, "memory(GiB)": 34.88, "step": 90570, "train_speed(iter/s)": 0.410443 }, { "acc": 0.94069462, "epoch": 2.4524138304497334, "grad_norm": 6.121854782104492, "learning_rate": 3.5644196334153287e-06, "loss": 0.3357667, "memory(GiB)": 34.88, "step": 90575, "train_speed(iter/s)": 0.410445 }, { "acc": 0.9439847, "epoch": 2.4525492107329487, "grad_norm": 4.228302478790283, "learning_rate": 3.5638836603242615e-06, "loss": 0.23391843, "memory(GiB)": 34.88, "step": 90580, "train_speed(iter/s)": 0.410446 }, { "acc": 0.92718296, "epoch": 2.4526845910161645, "grad_norm": 7.9893364906311035, "learning_rate": 3.563347705229635e-06, "loss": 0.3561821, "memory(GiB)": 34.88, "step": 90585, "train_speed(iter/s)": 0.410447 }, { "acc": 0.92667255, "epoch": 2.45281997129938, "grad_norm": 15.953497886657715, "learning_rate": 3.562811768138166e-06, "loss": 0.44682002, "memory(GiB)": 34.88, "step": 90590, "train_speed(iter/s)": 0.410449 }, { "acc": 0.91557388, "epoch": 2.4529553515825957, "grad_norm": 11.280400276184082, "learning_rate": 3.562275849056568e-06, "loss": 0.47272339, "memory(GiB)": 34.88, "step": 90595, "train_speed(iter/s)": 0.41045 }, { "acc": 0.93967094, "epoch": 2.453090731865811, "grad_norm": 11.48592472076416, "learning_rate": 3.561739947991552e-06, "loss": 0.35791223, "memory(GiB)": 34.88, "step": 90600, "train_speed(iter/s)": 0.410451 }, { "acc": 0.94659042, "epoch": 2.453226112149027, "grad_norm": 6.840329647064209, "learning_rate": 3.561204064949834e-06, "loss": 0.29065366, "memory(GiB)": 34.88, "step": 90605, "train_speed(iter/s)": 0.410452 }, { "acc": 0.94549646, "epoch": 2.453361492432242, "grad_norm": 7.059516906738281, "learning_rate": 3.560668199938125e-06, "loss": 0.33216479, "memory(GiB)": 34.88, "step": 90610, "train_speed(iter/s)": 0.410453 }, { "acc": 0.92469139, "epoch": 2.4534968727154576, "grad_norm": 8.763603210449219, "learning_rate": 3.560132352963141e-06, "loss": 0.48401461, "memory(GiB)": 34.88, "step": 90615, "train_speed(iter/s)": 0.410454 }, { "acc": 0.91217613, "epoch": 2.4536322529986734, "grad_norm": 10.219427108764648, "learning_rate": 3.5595965240315895e-06, "loss": 0.50302801, "memory(GiB)": 34.88, "step": 90620, "train_speed(iter/s)": 0.410456 }, { "acc": 0.92327023, "epoch": 2.4537676332818887, "grad_norm": 4.120699882507324, "learning_rate": 3.5590607131501853e-06, "loss": 0.40493608, "memory(GiB)": 34.88, "step": 90625, "train_speed(iter/s)": 0.410457 }, { "acc": 0.91336689, "epoch": 2.4539030135651045, "grad_norm": 11.457137107849121, "learning_rate": 3.5585249203256423e-06, "loss": 0.50184851, "memory(GiB)": 34.88, "step": 90630, "train_speed(iter/s)": 0.410458 }, { "acc": 0.92231932, "epoch": 2.45403839384832, "grad_norm": 5.951014995574951, "learning_rate": 3.5579891455646694e-06, "loss": 0.40740724, "memory(GiB)": 34.88, "step": 90635, "train_speed(iter/s)": 0.41046 }, { "acc": 0.93678417, "epoch": 2.4541737741315357, "grad_norm": 12.73261547088623, "learning_rate": 3.5574533888739793e-06, "loss": 0.27137289, "memory(GiB)": 34.88, "step": 90640, "train_speed(iter/s)": 0.410461 }, { "acc": 0.93926945, "epoch": 2.454309154414751, "grad_norm": 8.023821830749512, "learning_rate": 3.5569176502602835e-06, "loss": 0.33284614, "memory(GiB)": 34.88, "step": 90645, "train_speed(iter/s)": 0.410462 }, { "acc": 0.92538471, "epoch": 2.4544445346979664, "grad_norm": 5.83418083190918, "learning_rate": 3.556381929730295e-06, "loss": 0.39604073, "memory(GiB)": 34.88, "step": 90650, "train_speed(iter/s)": 0.410463 }, { "acc": 0.91850157, "epoch": 2.454579914981182, "grad_norm": 9.602956771850586, "learning_rate": 3.5558462272907224e-06, "loss": 0.47244081, "memory(GiB)": 34.88, "step": 90655, "train_speed(iter/s)": 0.410465 }, { "acc": 0.9146512, "epoch": 2.4547152952643976, "grad_norm": 10.0066556930542, "learning_rate": 3.555310542948276e-06, "loss": 0.57385817, "memory(GiB)": 34.88, "step": 90660, "train_speed(iter/s)": 0.410466 }, { "acc": 0.92502632, "epoch": 2.4548506755476134, "grad_norm": 8.740792274475098, "learning_rate": 3.554774876709668e-06, "loss": 0.42264838, "memory(GiB)": 34.88, "step": 90665, "train_speed(iter/s)": 0.410467 }, { "acc": 0.92684135, "epoch": 2.4549860558308287, "grad_norm": 8.466360092163086, "learning_rate": 3.5542392285816073e-06, "loss": 0.33118324, "memory(GiB)": 34.88, "step": 90670, "train_speed(iter/s)": 0.410468 }, { "acc": 0.93814812, "epoch": 2.4551214361140445, "grad_norm": 13.519498825073242, "learning_rate": 3.553703598570806e-06, "loss": 0.39525509, "memory(GiB)": 34.88, "step": 90675, "train_speed(iter/s)": 0.410469 }, { "acc": 0.9343749, "epoch": 2.45525681639726, "grad_norm": 5.586278438568115, "learning_rate": 3.5531679866839713e-06, "loss": 0.35296423, "memory(GiB)": 34.88, "step": 90680, "train_speed(iter/s)": 0.410471 }, { "acc": 0.92729969, "epoch": 2.4553921966804753, "grad_norm": 14.221099853515625, "learning_rate": 3.552632392927815e-06, "loss": 0.3525095, "memory(GiB)": 34.88, "step": 90685, "train_speed(iter/s)": 0.410472 }, { "acc": 0.92404556, "epoch": 2.455527576963691, "grad_norm": 9.412914276123047, "learning_rate": 3.5520968173090447e-06, "loss": 0.44807367, "memory(GiB)": 34.88, "step": 90690, "train_speed(iter/s)": 0.410473 }, { "acc": 0.93279057, "epoch": 2.4556629572469064, "grad_norm": 5.129547119140625, "learning_rate": 3.551561259834371e-06, "loss": 0.37349758, "memory(GiB)": 34.88, "step": 90695, "train_speed(iter/s)": 0.410474 }, { "acc": 0.93304958, "epoch": 2.455798337530122, "grad_norm": 14.541205406188965, "learning_rate": 3.5510257205105026e-06, "loss": 0.33800802, "memory(GiB)": 34.88, "step": 90700, "train_speed(iter/s)": 0.410475 }, { "acc": 0.92738609, "epoch": 2.4559337178133376, "grad_norm": 7.426215648651123, "learning_rate": 3.550490199344146e-06, "loss": 0.3892467, "memory(GiB)": 34.88, "step": 90705, "train_speed(iter/s)": 0.410477 }, { "acc": 0.91062975, "epoch": 2.4560690980965534, "grad_norm": 5.530211448669434, "learning_rate": 3.5499546963420136e-06, "loss": 0.53442125, "memory(GiB)": 34.88, "step": 90710, "train_speed(iter/s)": 0.410478 }, { "acc": 0.93308029, "epoch": 2.4562044783797687, "grad_norm": 11.16589641571045, "learning_rate": 3.54941921151081e-06, "loss": 0.39393468, "memory(GiB)": 34.88, "step": 90715, "train_speed(iter/s)": 0.410479 }, { "acc": 0.91343231, "epoch": 2.456339858662984, "grad_norm": 14.199166297912598, "learning_rate": 3.548883744857245e-06, "loss": 0.45486751, "memory(GiB)": 34.88, "step": 90720, "train_speed(iter/s)": 0.41048 }, { "acc": 0.93586884, "epoch": 2.4564752389462, "grad_norm": 11.419567108154297, "learning_rate": 3.548348296388026e-06, "loss": 0.3427269, "memory(GiB)": 34.88, "step": 90725, "train_speed(iter/s)": 0.410481 }, { "acc": 0.93559647, "epoch": 2.4566106192294153, "grad_norm": 4.9905781745910645, "learning_rate": 3.5478128661098606e-06, "loss": 0.38521385, "memory(GiB)": 34.88, "step": 90730, "train_speed(iter/s)": 0.410482 }, { "acc": 0.9420145, "epoch": 2.456745999512631, "grad_norm": 8.028576850891113, "learning_rate": 3.5472774540294556e-06, "loss": 0.24578717, "memory(GiB)": 34.88, "step": 90735, "train_speed(iter/s)": 0.410484 }, { "acc": 0.92187843, "epoch": 2.4568813797958464, "grad_norm": 28.443405151367188, "learning_rate": 3.5467420601535196e-06, "loss": 0.41052046, "memory(GiB)": 34.88, "step": 90740, "train_speed(iter/s)": 0.410485 }, { "acc": 0.92227001, "epoch": 2.4570167600790622, "grad_norm": 12.937938690185547, "learning_rate": 3.5462066844887585e-06, "loss": 0.43505478, "memory(GiB)": 34.88, "step": 90745, "train_speed(iter/s)": 0.410486 }, { "acc": 0.92709961, "epoch": 2.4571521403622776, "grad_norm": 5.638624668121338, "learning_rate": 3.545671327041877e-06, "loss": 0.38002186, "memory(GiB)": 34.88, "step": 90750, "train_speed(iter/s)": 0.410487 }, { "acc": 0.9119585, "epoch": 2.4572875206454934, "grad_norm": 5.828993320465088, "learning_rate": 3.5451359878195856e-06, "loss": 0.49965878, "memory(GiB)": 34.88, "step": 90755, "train_speed(iter/s)": 0.410489 }, { "acc": 0.93681431, "epoch": 2.4574229009287087, "grad_norm": 15.103001594543457, "learning_rate": 3.5446006668285867e-06, "loss": 0.31424866, "memory(GiB)": 34.88, "step": 90760, "train_speed(iter/s)": 0.41049 }, { "acc": 0.92477617, "epoch": 2.4575582812119245, "grad_norm": 6.8337602615356445, "learning_rate": 3.5440653640755886e-06, "loss": 0.4091011, "memory(GiB)": 34.88, "step": 90765, "train_speed(iter/s)": 0.410491 }, { "acc": 0.91261444, "epoch": 2.45769366149514, "grad_norm": 7.895533561706543, "learning_rate": 3.5435300795672956e-06, "loss": 0.50795259, "memory(GiB)": 34.88, "step": 90770, "train_speed(iter/s)": 0.410493 }, { "acc": 0.92606373, "epoch": 2.4578290417783553, "grad_norm": 6.336913585662842, "learning_rate": 3.5429948133104138e-06, "loss": 0.45707111, "memory(GiB)": 34.88, "step": 90775, "train_speed(iter/s)": 0.410494 }, { "acc": 0.92800045, "epoch": 2.457964422061571, "grad_norm": 11.148271560668945, "learning_rate": 3.54245956531165e-06, "loss": 0.36578686, "memory(GiB)": 34.88, "step": 90780, "train_speed(iter/s)": 0.410495 }, { "acc": 0.92302208, "epoch": 2.4580998023447864, "grad_norm": 8.242741584777832, "learning_rate": 3.5419243355777055e-06, "loss": 0.40622621, "memory(GiB)": 34.88, "step": 90785, "train_speed(iter/s)": 0.410496 }, { "acc": 0.93042498, "epoch": 2.4582351826280022, "grad_norm": 5.990338325500488, "learning_rate": 3.5413891241152893e-06, "loss": 0.43248477, "memory(GiB)": 34.88, "step": 90790, "train_speed(iter/s)": 0.410498 }, { "acc": 0.92243786, "epoch": 2.4583705629112176, "grad_norm": 5.9781036376953125, "learning_rate": 3.540853930931102e-06, "loss": 0.40608807, "memory(GiB)": 34.88, "step": 90795, "train_speed(iter/s)": 0.410499 }, { "acc": 0.9379591, "epoch": 2.4585059431944334, "grad_norm": 5.057893753051758, "learning_rate": 3.5403187560318515e-06, "loss": 0.26911829, "memory(GiB)": 34.88, "step": 90800, "train_speed(iter/s)": 0.4105 }, { "acc": 0.93153057, "epoch": 2.4586413234776487, "grad_norm": 7.483545780181885, "learning_rate": 3.5397835994242393e-06, "loss": 0.34505014, "memory(GiB)": 34.88, "step": 90805, "train_speed(iter/s)": 0.410501 }, { "acc": 0.94095917, "epoch": 2.458776703760864, "grad_norm": 7.300746917724609, "learning_rate": 3.5392484611149706e-06, "loss": 0.34788661, "memory(GiB)": 34.88, "step": 90810, "train_speed(iter/s)": 0.410502 }, { "acc": 0.9251688, "epoch": 2.45891208404408, "grad_norm": 9.968740463256836, "learning_rate": 3.538713341110749e-06, "loss": 0.41892462, "memory(GiB)": 34.88, "step": 90815, "train_speed(iter/s)": 0.410503 }, { "acc": 0.93701229, "epoch": 2.4590474643272953, "grad_norm": 5.6331562995910645, "learning_rate": 3.538178239418277e-06, "loss": 0.34401908, "memory(GiB)": 34.88, "step": 90820, "train_speed(iter/s)": 0.410504 }, { "acc": 0.92518072, "epoch": 2.459182844610511, "grad_norm": 3.0566089153289795, "learning_rate": 3.5376431560442597e-06, "loss": 0.42663059, "memory(GiB)": 34.88, "step": 90825, "train_speed(iter/s)": 0.410506 }, { "acc": 0.92223063, "epoch": 2.4593182248937264, "grad_norm": 85.77934265136719, "learning_rate": 3.5371080909953983e-06, "loss": 0.45622373, "memory(GiB)": 34.88, "step": 90830, "train_speed(iter/s)": 0.410507 }, { "acc": 0.93455772, "epoch": 2.4594536051769422, "grad_norm": 9.04086971282959, "learning_rate": 3.5365730442783954e-06, "loss": 0.33331075, "memory(GiB)": 34.88, "step": 90835, "train_speed(iter/s)": 0.410508 }, { "acc": 0.9278121, "epoch": 2.4595889854601576, "grad_norm": 8.023636817932129, "learning_rate": 3.536038015899955e-06, "loss": 0.4360456, "memory(GiB)": 34.88, "step": 90840, "train_speed(iter/s)": 0.41051 }, { "acc": 0.94085827, "epoch": 2.459724365743373, "grad_norm": 5.7043538093566895, "learning_rate": 3.5355030058667795e-06, "loss": 0.33541934, "memory(GiB)": 34.88, "step": 90845, "train_speed(iter/s)": 0.410511 }, { "acc": 0.91875515, "epoch": 2.4598597460265887, "grad_norm": 14.649072647094727, "learning_rate": 3.534968014185568e-06, "loss": 0.47821541, "memory(GiB)": 34.88, "step": 90850, "train_speed(iter/s)": 0.410512 }, { "acc": 0.93300371, "epoch": 2.459995126309804, "grad_norm": 9.839162826538086, "learning_rate": 3.534433040863026e-06, "loss": 0.38182707, "memory(GiB)": 34.88, "step": 90855, "train_speed(iter/s)": 0.410513 }, { "acc": 0.93755627, "epoch": 2.46013050659302, "grad_norm": 14.863210678100586, "learning_rate": 3.5338980859058525e-06, "loss": 0.37845855, "memory(GiB)": 34.88, "step": 90860, "train_speed(iter/s)": 0.410515 }, { "acc": 0.90866699, "epoch": 2.4602658868762353, "grad_norm": 15.680312156677246, "learning_rate": 3.533363149320751e-06, "loss": 0.59293947, "memory(GiB)": 34.88, "step": 90865, "train_speed(iter/s)": 0.410516 }, { "acc": 0.9124979, "epoch": 2.460401267159451, "grad_norm": 9.282231330871582, "learning_rate": 3.53282823111442e-06, "loss": 0.48194489, "memory(GiB)": 34.88, "step": 90870, "train_speed(iter/s)": 0.410517 }, { "acc": 0.95692253, "epoch": 2.4605366474426664, "grad_norm": 3.9269111156463623, "learning_rate": 3.5322933312935635e-06, "loss": 0.25685456, "memory(GiB)": 34.88, "step": 90875, "train_speed(iter/s)": 0.410518 }, { "acc": 0.92596178, "epoch": 2.460672027725882, "grad_norm": 5.244848728179932, "learning_rate": 3.531758449864879e-06, "loss": 0.4182725, "memory(GiB)": 34.88, "step": 90880, "train_speed(iter/s)": 0.410519 }, { "acc": 0.92731924, "epoch": 2.4608074080090976, "grad_norm": 6.771943092346191, "learning_rate": 3.531223586835069e-06, "loss": 0.3326088, "memory(GiB)": 34.88, "step": 90885, "train_speed(iter/s)": 0.41052 }, { "acc": 0.92139702, "epoch": 2.460942788292313, "grad_norm": 3.8571527004241943, "learning_rate": 3.5306887422108345e-06, "loss": 0.45948305, "memory(GiB)": 34.88, "step": 90890, "train_speed(iter/s)": 0.410522 }, { "acc": 0.92981148, "epoch": 2.4610781685755287, "grad_norm": 3.569620132446289, "learning_rate": 3.530153915998873e-06, "loss": 0.4059742, "memory(GiB)": 34.88, "step": 90895, "train_speed(iter/s)": 0.410523 }, { "acc": 0.92545986, "epoch": 2.461213548858744, "grad_norm": 11.862507820129395, "learning_rate": 3.5296191082058852e-06, "loss": 0.4293149, "memory(GiB)": 34.88, "step": 90900, "train_speed(iter/s)": 0.410524 }, { "acc": 0.92208719, "epoch": 2.46134892914196, "grad_norm": 7.161750316619873, "learning_rate": 3.529084318838571e-06, "loss": 0.46864552, "memory(GiB)": 34.88, "step": 90905, "train_speed(iter/s)": 0.410525 }, { "acc": 0.92671452, "epoch": 2.4614843094251753, "grad_norm": 9.757932662963867, "learning_rate": 3.5285495479036304e-06, "loss": 0.41754141, "memory(GiB)": 34.88, "step": 90910, "train_speed(iter/s)": 0.410526 }, { "acc": 0.93911552, "epoch": 2.4616196897083906, "grad_norm": 2.9943246841430664, "learning_rate": 3.52801479540776e-06, "loss": 0.33901818, "memory(GiB)": 34.88, "step": 90915, "train_speed(iter/s)": 0.410527 }, { "acc": 0.92199183, "epoch": 2.4617550699916064, "grad_norm": 24.51819610595703, "learning_rate": 3.5274800613576615e-06, "loss": 0.51464634, "memory(GiB)": 34.88, "step": 90920, "train_speed(iter/s)": 0.410528 }, { "acc": 0.91745949, "epoch": 2.461890450274822, "grad_norm": 8.111902236938477, "learning_rate": 3.5269453457600305e-06, "loss": 0.4377883, "memory(GiB)": 34.88, "step": 90925, "train_speed(iter/s)": 0.41053 }, { "acc": 0.91771622, "epoch": 2.4620258305580376, "grad_norm": 7.6435394287109375, "learning_rate": 3.52641064862157e-06, "loss": 0.43002367, "memory(GiB)": 34.88, "step": 90930, "train_speed(iter/s)": 0.410531 }, { "acc": 0.91433983, "epoch": 2.462161210841253, "grad_norm": 6.716241836547852, "learning_rate": 3.5258759699489737e-06, "loss": 0.47416744, "memory(GiB)": 34.88, "step": 90935, "train_speed(iter/s)": 0.410532 }, { "acc": 0.93346291, "epoch": 2.4622965911244687, "grad_norm": 8.702993392944336, "learning_rate": 3.5253413097489404e-06, "loss": 0.36814096, "memory(GiB)": 34.88, "step": 90940, "train_speed(iter/s)": 0.410533 }, { "acc": 0.94054632, "epoch": 2.462431971407684, "grad_norm": 10.3601713180542, "learning_rate": 3.524806668028169e-06, "loss": 0.36743288, "memory(GiB)": 34.88, "step": 90945, "train_speed(iter/s)": 0.410534 }, { "acc": 0.93689537, "epoch": 2.4625673516909, "grad_norm": 11.668381690979004, "learning_rate": 3.524272044793356e-06, "loss": 0.35068746, "memory(GiB)": 34.88, "step": 90950, "train_speed(iter/s)": 0.410535 }, { "acc": 0.9409914, "epoch": 2.4627027319741153, "grad_norm": 14.121346473693848, "learning_rate": 3.523737440051199e-06, "loss": 0.35985448, "memory(GiB)": 34.88, "step": 90955, "train_speed(iter/s)": 0.410536 }, { "acc": 0.93329124, "epoch": 2.462838112257331, "grad_norm": 6.795791149139404, "learning_rate": 3.5232028538083943e-06, "loss": 0.36661191, "memory(GiB)": 34.88, "step": 90960, "train_speed(iter/s)": 0.410537 }, { "acc": 0.94663372, "epoch": 2.4629734925405464, "grad_norm": 4.283698081970215, "learning_rate": 3.5226682860716405e-06, "loss": 0.30355639, "memory(GiB)": 34.88, "step": 90965, "train_speed(iter/s)": 0.410538 }, { "acc": 0.92946215, "epoch": 2.463108872823762, "grad_norm": 12.77998161315918, "learning_rate": 3.5221337368476328e-06, "loss": 0.37550228, "memory(GiB)": 34.88, "step": 90970, "train_speed(iter/s)": 0.410539 }, { "acc": 0.92974339, "epoch": 2.4632442531069776, "grad_norm": 6.508213520050049, "learning_rate": 3.5215992061430666e-06, "loss": 0.40423794, "memory(GiB)": 34.88, "step": 90975, "train_speed(iter/s)": 0.410541 }, { "acc": 0.91542282, "epoch": 2.463379633390193, "grad_norm": 10.752851486206055, "learning_rate": 3.52106469396464e-06, "loss": 0.51274948, "memory(GiB)": 34.88, "step": 90980, "train_speed(iter/s)": 0.410542 }, { "acc": 0.93202496, "epoch": 2.4635150136734087, "grad_norm": 4.343653678894043, "learning_rate": 3.5205302003190463e-06, "loss": 0.39790933, "memory(GiB)": 34.88, "step": 90985, "train_speed(iter/s)": 0.410543 }, { "acc": 0.92020826, "epoch": 2.463650393956624, "grad_norm": 6.4132161140441895, "learning_rate": 3.519995725212983e-06, "loss": 0.43258524, "memory(GiB)": 34.88, "step": 90990, "train_speed(iter/s)": 0.410544 }, { "acc": 0.93805752, "epoch": 2.46378577423984, "grad_norm": 7.260659217834473, "learning_rate": 3.5194612686531454e-06, "loss": 0.36257679, "memory(GiB)": 34.88, "step": 90995, "train_speed(iter/s)": 0.410545 }, { "acc": 0.92661762, "epoch": 2.4639211545230553, "grad_norm": 12.100250244140625, "learning_rate": 3.5189268306462277e-06, "loss": 0.41893387, "memory(GiB)": 34.88, "step": 91000, "train_speed(iter/s)": 0.410547 }, { "acc": 0.93267441, "epoch": 2.4640565348062706, "grad_norm": 12.13026237487793, "learning_rate": 3.5183924111989237e-06, "loss": 0.39649396, "memory(GiB)": 34.88, "step": 91005, "train_speed(iter/s)": 0.410548 }, { "acc": 0.92122536, "epoch": 2.4641919150894864, "grad_norm": 7.074516773223877, "learning_rate": 3.517858010317933e-06, "loss": 0.47685761, "memory(GiB)": 34.88, "step": 91010, "train_speed(iter/s)": 0.410549 }, { "acc": 0.92581625, "epoch": 2.464327295372702, "grad_norm": 7.302781581878662, "learning_rate": 3.517323628009945e-06, "loss": 0.44643536, "memory(GiB)": 34.88, "step": 91015, "train_speed(iter/s)": 0.41055 }, { "acc": 0.9430521, "epoch": 2.4644626756559176, "grad_norm": 5.627129554748535, "learning_rate": 3.516789264281655e-06, "loss": 0.3390233, "memory(GiB)": 34.88, "step": 91020, "train_speed(iter/s)": 0.410551 }, { "acc": 0.92923489, "epoch": 2.464598055939133, "grad_norm": 8.613974571228027, "learning_rate": 3.516254919139758e-06, "loss": 0.40998707, "memory(GiB)": 34.88, "step": 91025, "train_speed(iter/s)": 0.410552 }, { "acc": 0.91612768, "epoch": 2.4647334362223488, "grad_norm": 7.025635719299316, "learning_rate": 3.5157205925909467e-06, "loss": 0.45204959, "memory(GiB)": 34.88, "step": 91030, "train_speed(iter/s)": 0.410553 }, { "acc": 0.94143991, "epoch": 2.464868816505564, "grad_norm": 5.092621326446533, "learning_rate": 3.515186284641916e-06, "loss": 0.26177311, "memory(GiB)": 34.88, "step": 91035, "train_speed(iter/s)": 0.410554 }, { "acc": 0.93374748, "epoch": 2.4650041967887795, "grad_norm": 6.033807754516602, "learning_rate": 3.514651995299356e-06, "loss": 0.403479, "memory(GiB)": 34.88, "step": 91040, "train_speed(iter/s)": 0.410555 }, { "acc": 0.91770802, "epoch": 2.4651395770719953, "grad_norm": 8.021147727966309, "learning_rate": 3.514117724569965e-06, "loss": 0.59411879, "memory(GiB)": 34.88, "step": 91045, "train_speed(iter/s)": 0.410556 }, { "acc": 0.94135046, "epoch": 2.4652749573552106, "grad_norm": 9.529130935668945, "learning_rate": 3.5135834724604288e-06, "loss": 0.33139486, "memory(GiB)": 34.88, "step": 91050, "train_speed(iter/s)": 0.410557 }, { "acc": 0.93836994, "epoch": 2.4654103376384264, "grad_norm": 4.70843505859375, "learning_rate": 3.513049238977448e-06, "loss": 0.31153619, "memory(GiB)": 34.88, "step": 91055, "train_speed(iter/s)": 0.410558 }, { "acc": 0.92686405, "epoch": 2.465545717921642, "grad_norm": 6.729470729827881, "learning_rate": 3.5125150241277102e-06, "loss": 0.43668141, "memory(GiB)": 34.88, "step": 91060, "train_speed(iter/s)": 0.410559 }, { "acc": 0.92101145, "epoch": 2.4656810982048576, "grad_norm": 5.929110527038574, "learning_rate": 3.5119808279179063e-06, "loss": 0.44680505, "memory(GiB)": 34.88, "step": 91065, "train_speed(iter/s)": 0.410561 }, { "acc": 0.94630642, "epoch": 2.465816478488073, "grad_norm": 4.810600757598877, "learning_rate": 3.511446650354731e-06, "loss": 0.35567961, "memory(GiB)": 34.88, "step": 91070, "train_speed(iter/s)": 0.410562 }, { "acc": 0.92760658, "epoch": 2.4659518587712883, "grad_norm": 10.238811492919922, "learning_rate": 3.510912491444874e-06, "loss": 0.42693634, "memory(GiB)": 34.88, "step": 91075, "train_speed(iter/s)": 0.410563 }, { "acc": 0.95013723, "epoch": 2.466087239054504, "grad_norm": 4.272700786590576, "learning_rate": 3.510378351195029e-06, "loss": 0.24905229, "memory(GiB)": 34.88, "step": 91080, "train_speed(iter/s)": 0.410564 }, { "acc": 0.93923702, "epoch": 2.4662226193377195, "grad_norm": 4.0136590003967285, "learning_rate": 3.5098442296118836e-06, "loss": 0.27520299, "memory(GiB)": 34.88, "step": 91085, "train_speed(iter/s)": 0.410565 }, { "acc": 0.93063622, "epoch": 2.4663579996209353, "grad_norm": 6.2516655921936035, "learning_rate": 3.5093101267021332e-06, "loss": 0.36432419, "memory(GiB)": 34.88, "step": 91090, "train_speed(iter/s)": 0.410566 }, { "acc": 0.93225594, "epoch": 2.4664933799041506, "grad_norm": 8.237312316894531, "learning_rate": 3.508776042472466e-06, "loss": 0.4440093, "memory(GiB)": 34.88, "step": 91095, "train_speed(iter/s)": 0.410567 }, { "acc": 0.93059702, "epoch": 2.4666287601873664, "grad_norm": 16.980134963989258, "learning_rate": 3.508241976929571e-06, "loss": 0.44442186, "memory(GiB)": 34.88, "step": 91100, "train_speed(iter/s)": 0.410568 }, { "acc": 0.93587418, "epoch": 2.466764140470582, "grad_norm": 3.977510690689087, "learning_rate": 3.507707930080141e-06, "loss": 0.37190826, "memory(GiB)": 34.88, "step": 91105, "train_speed(iter/s)": 0.41057 }, { "acc": 0.93162899, "epoch": 2.4668995207537976, "grad_norm": 5.925973892211914, "learning_rate": 3.5071739019308635e-06, "loss": 0.35313158, "memory(GiB)": 34.88, "step": 91110, "train_speed(iter/s)": 0.410571 }, { "acc": 0.91998005, "epoch": 2.467034901037013, "grad_norm": 9.326211929321289, "learning_rate": 3.506639892488432e-06, "loss": 0.49766583, "memory(GiB)": 34.88, "step": 91115, "train_speed(iter/s)": 0.410572 }, { "acc": 0.93449793, "epoch": 2.4671702813202288, "grad_norm": 6.759767055511475, "learning_rate": 3.506105901759531e-06, "loss": 0.35294318, "memory(GiB)": 34.88, "step": 91120, "train_speed(iter/s)": 0.410573 }, { "acc": 0.91965828, "epoch": 2.467305661603444, "grad_norm": 12.051887512207031, "learning_rate": 3.5055719297508562e-06, "loss": 0.41800222, "memory(GiB)": 34.88, "step": 91125, "train_speed(iter/s)": 0.410574 }, { "acc": 0.92264633, "epoch": 2.4674410418866595, "grad_norm": 10.032994270324707, "learning_rate": 3.5050379764690894e-06, "loss": 0.47132516, "memory(GiB)": 34.88, "step": 91130, "train_speed(iter/s)": 0.410575 }, { "acc": 0.91988926, "epoch": 2.4675764221698753, "grad_norm": 4.906108856201172, "learning_rate": 3.504504041920927e-06, "loss": 0.43532581, "memory(GiB)": 34.88, "step": 91135, "train_speed(iter/s)": 0.410576 }, { "acc": 0.94358215, "epoch": 2.4677118024530906, "grad_norm": 3.7496485710144043, "learning_rate": 3.5039701261130515e-06, "loss": 0.27297361, "memory(GiB)": 34.88, "step": 91140, "train_speed(iter/s)": 0.410578 }, { "acc": 0.94804497, "epoch": 2.4678471827363064, "grad_norm": 9.457931518554688, "learning_rate": 3.5034362290521525e-06, "loss": 0.29320154, "memory(GiB)": 34.88, "step": 91145, "train_speed(iter/s)": 0.410579 }, { "acc": 0.93432121, "epoch": 2.467982563019522, "grad_norm": 4.943167209625244, "learning_rate": 3.50290235074492e-06, "loss": 0.38592973, "memory(GiB)": 34.88, "step": 91150, "train_speed(iter/s)": 0.41058 }, { "acc": 0.92200232, "epoch": 2.4681179433027376, "grad_norm": 8.135936737060547, "learning_rate": 3.5023684911980387e-06, "loss": 0.47890949, "memory(GiB)": 34.88, "step": 91155, "train_speed(iter/s)": 0.410581 }, { "acc": 0.93868828, "epoch": 2.468253323585953, "grad_norm": 6.546831130981445, "learning_rate": 3.501834650418202e-06, "loss": 0.30283694, "memory(GiB)": 34.88, "step": 91160, "train_speed(iter/s)": 0.410582 }, { "acc": 0.93620625, "epoch": 2.4683887038691683, "grad_norm": 9.446598052978516, "learning_rate": 3.5013008284120903e-06, "loss": 0.38966742, "memory(GiB)": 34.88, "step": 91165, "train_speed(iter/s)": 0.410583 }, { "acc": 0.93579884, "epoch": 2.468524084152384, "grad_norm": 8.893399238586426, "learning_rate": 3.500767025186397e-06, "loss": 0.34918203, "memory(GiB)": 34.88, "step": 91170, "train_speed(iter/s)": 0.410585 }, { "acc": 0.92085304, "epoch": 2.4686594644355995, "grad_norm": 11.748893737792969, "learning_rate": 3.500233240747803e-06, "loss": 0.51405025, "memory(GiB)": 34.88, "step": 91175, "train_speed(iter/s)": 0.410586 }, { "acc": 0.92465048, "epoch": 2.4687948447188153, "grad_norm": 8.161964416503906, "learning_rate": 3.499699475103002e-06, "loss": 0.43158002, "memory(GiB)": 34.88, "step": 91180, "train_speed(iter/s)": 0.410587 }, { "acc": 0.92370338, "epoch": 2.4689302250020306, "grad_norm": 5.300545692443848, "learning_rate": 3.4991657282586755e-06, "loss": 0.43958864, "memory(GiB)": 34.88, "step": 91185, "train_speed(iter/s)": 0.410588 }, { "acc": 0.93332796, "epoch": 2.4690656052852464, "grad_norm": 7.724186897277832, "learning_rate": 3.498632000221508e-06, "loss": 0.36421196, "memory(GiB)": 34.88, "step": 91190, "train_speed(iter/s)": 0.410589 }, { "acc": 0.92935133, "epoch": 2.469200985568462, "grad_norm": 9.430770874023438, "learning_rate": 3.4980982909981927e-06, "loss": 0.35132647, "memory(GiB)": 34.88, "step": 91195, "train_speed(iter/s)": 0.41059 }, { "acc": 0.91649952, "epoch": 2.469336365851677, "grad_norm": 8.192939758300781, "learning_rate": 3.4975646005954067e-06, "loss": 0.46123128, "memory(GiB)": 34.88, "step": 91200, "train_speed(iter/s)": 0.410591 }, { "acc": 0.9384016, "epoch": 2.469471746134893, "grad_norm": 4.883908748626709, "learning_rate": 3.497030929019843e-06, "loss": 0.32316244, "memory(GiB)": 34.88, "step": 91205, "train_speed(iter/s)": 0.410592 }, { "acc": 0.93814831, "epoch": 2.4696071264181083, "grad_norm": 4.06879997253418, "learning_rate": 3.496497276278181e-06, "loss": 0.33228168, "memory(GiB)": 34.88, "step": 91210, "train_speed(iter/s)": 0.410594 }, { "acc": 0.9355154, "epoch": 2.469742506701324, "grad_norm": 3.6306400299072266, "learning_rate": 3.4959636423771115e-06, "loss": 0.37609966, "memory(GiB)": 34.88, "step": 91215, "train_speed(iter/s)": 0.410595 }, { "acc": 0.94608631, "epoch": 2.4698778869845395, "grad_norm": 5.917346000671387, "learning_rate": 3.4954300273233157e-06, "loss": 0.28376379, "memory(GiB)": 34.88, "step": 91220, "train_speed(iter/s)": 0.410596 }, { "acc": 0.9145503, "epoch": 2.4700132672677553, "grad_norm": 8.533490180969238, "learning_rate": 3.4948964311234767e-06, "loss": 0.50317678, "memory(GiB)": 34.88, "step": 91225, "train_speed(iter/s)": 0.410597 }, { "acc": 0.93206711, "epoch": 2.4701486475509706, "grad_norm": 12.320222854614258, "learning_rate": 3.4943628537842832e-06, "loss": 0.39311483, "memory(GiB)": 34.88, "step": 91230, "train_speed(iter/s)": 0.410598 }, { "acc": 0.91426573, "epoch": 2.470284027834186, "grad_norm": 9.230563163757324, "learning_rate": 3.493829295312414e-06, "loss": 0.46709185, "memory(GiB)": 34.88, "step": 91235, "train_speed(iter/s)": 0.410599 }, { "acc": 0.92974205, "epoch": 2.470419408117402, "grad_norm": 4.497313976287842, "learning_rate": 3.4932957557145604e-06, "loss": 0.37715516, "memory(GiB)": 34.88, "step": 91240, "train_speed(iter/s)": 0.4106 }, { "acc": 0.92917995, "epoch": 2.470554788400617, "grad_norm": 9.779256820678711, "learning_rate": 3.4927622349973972e-06, "loss": 0.40051298, "memory(GiB)": 34.88, "step": 91245, "train_speed(iter/s)": 0.410602 }, { "acc": 0.92816372, "epoch": 2.470690168683833, "grad_norm": 7.59457540512085, "learning_rate": 3.4922287331676153e-06, "loss": 0.43143492, "memory(GiB)": 34.88, "step": 91250, "train_speed(iter/s)": 0.410603 }, { "acc": 0.91662121, "epoch": 2.4708255489670483, "grad_norm": 6.526491165161133, "learning_rate": 3.4916952502318925e-06, "loss": 0.47573166, "memory(GiB)": 34.88, "step": 91255, "train_speed(iter/s)": 0.410604 }, { "acc": 0.92583618, "epoch": 2.470960929250264, "grad_norm": 6.809638023376465, "learning_rate": 3.491161786196915e-06, "loss": 0.41372042, "memory(GiB)": 34.88, "step": 91260, "train_speed(iter/s)": 0.410605 }, { "acc": 0.90874538, "epoch": 2.4710963095334795, "grad_norm": 7.553389549255371, "learning_rate": 3.490628341069366e-06, "loss": 0.43865128, "memory(GiB)": 34.88, "step": 91265, "train_speed(iter/s)": 0.410607 }, { "acc": 0.94639606, "epoch": 2.4712316898166953, "grad_norm": 5.000129699707031, "learning_rate": 3.4900949148559236e-06, "loss": 0.29205208, "memory(GiB)": 34.88, "step": 91270, "train_speed(iter/s)": 0.410608 }, { "acc": 0.95502481, "epoch": 2.4713670700999106, "grad_norm": 4.750421524047852, "learning_rate": 3.4895615075632755e-06, "loss": 0.23293486, "memory(GiB)": 34.88, "step": 91275, "train_speed(iter/s)": 0.410609 }, { "acc": 0.92990742, "epoch": 2.4715024503831264, "grad_norm": 7.849782466888428, "learning_rate": 3.4890281191980977e-06, "loss": 0.42189493, "memory(GiB)": 34.88, "step": 91280, "train_speed(iter/s)": 0.41061 }, { "acc": 0.94168577, "epoch": 2.471637830666342, "grad_norm": 12.747133255004883, "learning_rate": 3.4884947497670794e-06, "loss": 0.34200482, "memory(GiB)": 34.88, "step": 91285, "train_speed(iter/s)": 0.410612 }, { "acc": 0.93142357, "epoch": 2.471773210949557, "grad_norm": 9.771417617797852, "learning_rate": 3.4879613992768944e-06, "loss": 0.4113555, "memory(GiB)": 34.88, "step": 91290, "train_speed(iter/s)": 0.410613 }, { "acc": 0.92182484, "epoch": 2.471908591232773, "grad_norm": 14.834108352661133, "learning_rate": 3.4874280677342303e-06, "loss": 0.43633184, "memory(GiB)": 34.88, "step": 91295, "train_speed(iter/s)": 0.410614 }, { "acc": 0.92451916, "epoch": 2.4720439715159883, "grad_norm": 70.18718719482422, "learning_rate": 3.4868947551457622e-06, "loss": 0.38980737, "memory(GiB)": 34.88, "step": 91300, "train_speed(iter/s)": 0.410615 }, { "acc": 0.94769163, "epoch": 2.472179351799204, "grad_norm": 13.654854774475098, "learning_rate": 3.4863614615181767e-06, "loss": 0.31483021, "memory(GiB)": 34.88, "step": 91305, "train_speed(iter/s)": 0.410616 }, { "acc": 0.94126568, "epoch": 2.4723147320824195, "grad_norm": 5.576210021972656, "learning_rate": 3.485828186858152e-06, "loss": 0.29880083, "memory(GiB)": 34.88, "step": 91310, "train_speed(iter/s)": 0.410617 }, { "acc": 0.92749577, "epoch": 2.4724501123656353, "grad_norm": 5.732360363006592, "learning_rate": 3.485294931172365e-06, "loss": 0.39665246, "memory(GiB)": 34.88, "step": 91315, "train_speed(iter/s)": 0.410619 }, { "acc": 0.92572556, "epoch": 2.4725854926488506, "grad_norm": 8.281233787536621, "learning_rate": 3.4847616944675023e-06, "loss": 0.38479486, "memory(GiB)": 34.88, "step": 91320, "train_speed(iter/s)": 0.41062 }, { "acc": 0.91924534, "epoch": 2.472720872932066, "grad_norm": 9.084809303283691, "learning_rate": 3.484228476750237e-06, "loss": 0.45635653, "memory(GiB)": 34.88, "step": 91325, "train_speed(iter/s)": 0.410621 }, { "acc": 0.91991825, "epoch": 2.472856253215282, "grad_norm": 7.99263858795166, "learning_rate": 3.483695278027255e-06, "loss": 0.48447976, "memory(GiB)": 34.88, "step": 91330, "train_speed(iter/s)": 0.410622 }, { "acc": 0.92604227, "epoch": 2.472991633498497, "grad_norm": 11.789566993713379, "learning_rate": 3.4831620983052308e-06, "loss": 0.4121675, "memory(GiB)": 34.88, "step": 91335, "train_speed(iter/s)": 0.410623 }, { "acc": 0.92961521, "epoch": 2.473127013781713, "grad_norm": 11.860380172729492, "learning_rate": 3.4826289375908455e-06, "loss": 0.45506039, "memory(GiB)": 34.88, "step": 91340, "train_speed(iter/s)": 0.410625 }, { "acc": 0.91484871, "epoch": 2.4732623940649283, "grad_norm": 7.000067710876465, "learning_rate": 3.4820957958907803e-06, "loss": 0.45992651, "memory(GiB)": 34.88, "step": 91345, "train_speed(iter/s)": 0.410626 }, { "acc": 0.93434563, "epoch": 2.473397774348144, "grad_norm": 5.419975280761719, "learning_rate": 3.481562673211708e-06, "loss": 0.36494467, "memory(GiB)": 34.88, "step": 91350, "train_speed(iter/s)": 0.410627 }, { "acc": 0.92779312, "epoch": 2.4735331546313595, "grad_norm": 8.416313171386719, "learning_rate": 3.4810295695603125e-06, "loss": 0.36272347, "memory(GiB)": 34.88, "step": 91355, "train_speed(iter/s)": 0.410629 }, { "acc": 0.92638121, "epoch": 2.473668534914575, "grad_norm": 3.7119622230529785, "learning_rate": 3.480496484943268e-06, "loss": 0.38496583, "memory(GiB)": 34.88, "step": 91360, "train_speed(iter/s)": 0.41063 }, { "acc": 0.91912727, "epoch": 2.4738039151977906, "grad_norm": 7.11863374710083, "learning_rate": 3.479963419367257e-06, "loss": 0.37801654, "memory(GiB)": 34.88, "step": 91365, "train_speed(iter/s)": 0.410631 }, { "acc": 0.9275157, "epoch": 2.473939295481006, "grad_norm": 9.868087768554688, "learning_rate": 3.479430372838951e-06, "loss": 0.40139437, "memory(GiB)": 34.88, "step": 91370, "train_speed(iter/s)": 0.410632 }, { "acc": 0.93531075, "epoch": 2.474074675764222, "grad_norm": 6.483443737030029, "learning_rate": 3.478897345365034e-06, "loss": 0.31574521, "memory(GiB)": 34.88, "step": 91375, "train_speed(iter/s)": 0.410634 }, { "acc": 0.91869144, "epoch": 2.474210056047437, "grad_norm": 11.719121932983398, "learning_rate": 3.4783643369521784e-06, "loss": 0.49749684, "memory(GiB)": 34.88, "step": 91380, "train_speed(iter/s)": 0.410635 }, { "acc": 0.92704868, "epoch": 2.474345436330653, "grad_norm": 9.726730346679688, "learning_rate": 3.4778313476070646e-06, "loss": 0.35319693, "memory(GiB)": 34.88, "step": 91385, "train_speed(iter/s)": 0.410636 }, { "acc": 0.93590651, "epoch": 2.4744808166138683, "grad_norm": 10.497118949890137, "learning_rate": 3.4772983773363687e-06, "loss": 0.33537714, "memory(GiB)": 34.88, "step": 91390, "train_speed(iter/s)": 0.410637 }, { "acc": 0.92053137, "epoch": 2.4746161968970837, "grad_norm": 5.7002458572387695, "learning_rate": 3.4767654261467633e-06, "loss": 0.40835433, "memory(GiB)": 34.88, "step": 91395, "train_speed(iter/s)": 0.410639 }, { "acc": 0.9261632, "epoch": 2.4747515771802995, "grad_norm": 8.016756057739258, "learning_rate": 3.47623249404493e-06, "loss": 0.47653222, "memory(GiB)": 34.88, "step": 91400, "train_speed(iter/s)": 0.41064 }, { "acc": 0.93621264, "epoch": 2.474886957463515, "grad_norm": 2.9357476234436035, "learning_rate": 3.4756995810375393e-06, "loss": 0.29825122, "memory(GiB)": 34.88, "step": 91405, "train_speed(iter/s)": 0.410641 }, { "acc": 0.9281436, "epoch": 2.4750223377467306, "grad_norm": 22.016124725341797, "learning_rate": 3.4751666871312716e-06, "loss": 0.414677, "memory(GiB)": 34.88, "step": 91410, "train_speed(iter/s)": 0.410642 }, { "acc": 0.9402174, "epoch": 2.475157718029946, "grad_norm": 7.880192279815674, "learning_rate": 3.474633812332801e-06, "loss": 0.32210336, "memory(GiB)": 34.88, "step": 91415, "train_speed(iter/s)": 0.410644 }, { "acc": 0.92732201, "epoch": 2.475293098313162, "grad_norm": 3.8373944759368896, "learning_rate": 3.474100956648804e-06, "loss": 0.42818942, "memory(GiB)": 34.88, "step": 91420, "train_speed(iter/s)": 0.410645 }, { "acc": 0.93483057, "epoch": 2.475428478596377, "grad_norm": 18.7748966217041, "learning_rate": 3.4735681200859523e-06, "loss": 0.43386922, "memory(GiB)": 34.88, "step": 91425, "train_speed(iter/s)": 0.410646 }, { "acc": 0.92229347, "epoch": 2.475563858879593, "grad_norm": 10.839486122131348, "learning_rate": 3.4730353026509245e-06, "loss": 0.43408856, "memory(GiB)": 34.88, "step": 91430, "train_speed(iter/s)": 0.410648 }, { "acc": 0.92280874, "epoch": 2.4756992391628083, "grad_norm": 8.802489280700684, "learning_rate": 3.4725025043503946e-06, "loss": 0.41349039, "memory(GiB)": 34.88, "step": 91435, "train_speed(iter/s)": 0.410649 }, { "acc": 0.93173647, "epoch": 2.475834619446024, "grad_norm": 3.111907958984375, "learning_rate": 3.471969725191032e-06, "loss": 0.3483943, "memory(GiB)": 34.88, "step": 91440, "train_speed(iter/s)": 0.41065 }, { "acc": 0.94793072, "epoch": 2.4759699997292395, "grad_norm": 15.981169700622559, "learning_rate": 3.4714369651795173e-06, "loss": 0.27080245, "memory(GiB)": 34.88, "step": 91445, "train_speed(iter/s)": 0.410651 }, { "acc": 0.94932213, "epoch": 2.476105380012455, "grad_norm": 6.027307987213135, "learning_rate": 3.47090422432252e-06, "loss": 0.3323818, "memory(GiB)": 34.88, "step": 91450, "train_speed(iter/s)": 0.410652 }, { "acc": 0.94363022, "epoch": 2.4762407602956706, "grad_norm": 5.983506679534912, "learning_rate": 3.4703715026267176e-06, "loss": 0.28439527, "memory(GiB)": 34.88, "step": 91455, "train_speed(iter/s)": 0.410653 }, { "acc": 0.94741383, "epoch": 2.476376140578886, "grad_norm": 17.726003646850586, "learning_rate": 3.4698388000987796e-06, "loss": 0.30619113, "memory(GiB)": 34.88, "step": 91460, "train_speed(iter/s)": 0.410655 }, { "acc": 0.95009289, "epoch": 2.476511520862102, "grad_norm": 3.7823808193206787, "learning_rate": 3.4693061167453824e-06, "loss": 0.28477333, "memory(GiB)": 34.88, "step": 91465, "train_speed(iter/s)": 0.410656 }, { "acc": 0.92300711, "epoch": 2.476646901145317, "grad_norm": 5.314088821411133, "learning_rate": 3.468773452573196e-06, "loss": 0.41749125, "memory(GiB)": 34.88, "step": 91470, "train_speed(iter/s)": 0.410657 }, { "acc": 0.93698177, "epoch": 2.476782281428533, "grad_norm": 3.529759645462036, "learning_rate": 3.4682408075888946e-06, "loss": 0.38375378, "memory(GiB)": 34.88, "step": 91475, "train_speed(iter/s)": 0.410658 }, { "acc": 0.94711523, "epoch": 2.4769176617117483, "grad_norm": 5.392853736877441, "learning_rate": 3.4677081817991533e-06, "loss": 0.28448584, "memory(GiB)": 34.88, "step": 91480, "train_speed(iter/s)": 0.41066 }, { "acc": 0.93105354, "epoch": 2.4770530419949637, "grad_norm": 7.342728614807129, "learning_rate": 3.467175575210637e-06, "loss": 0.33649282, "memory(GiB)": 34.88, "step": 91485, "train_speed(iter/s)": 0.410661 }, { "acc": 0.91843853, "epoch": 2.4771884222781795, "grad_norm": 7.326469421386719, "learning_rate": 3.466642987830025e-06, "loss": 0.45643954, "memory(GiB)": 34.88, "step": 91490, "train_speed(iter/s)": 0.410662 }, { "acc": 0.91575356, "epoch": 2.477323802561395, "grad_norm": 5.811651706695557, "learning_rate": 3.466110419663984e-06, "loss": 0.45707703, "memory(GiB)": 34.88, "step": 91495, "train_speed(iter/s)": 0.410664 }, { "acc": 0.92217445, "epoch": 2.4774591828446106, "grad_norm": 6.003875255584717, "learning_rate": 3.4655778707191893e-06, "loss": 0.41672091, "memory(GiB)": 34.88, "step": 91500, "train_speed(iter/s)": 0.410665 }, { "acc": 0.93667011, "epoch": 2.477594563127826, "grad_norm": 10.404607772827148, "learning_rate": 3.4650453410023093e-06, "loss": 0.38021703, "memory(GiB)": 34.88, "step": 91505, "train_speed(iter/s)": 0.410666 }, { "acc": 0.93189449, "epoch": 2.477729943411042, "grad_norm": 8.319780349731445, "learning_rate": 3.4645128305200174e-06, "loss": 0.41870356, "memory(GiB)": 34.88, "step": 91510, "train_speed(iter/s)": 0.410667 }, { "acc": 0.93228264, "epoch": 2.477865323694257, "grad_norm": 10.001348495483398, "learning_rate": 3.463980339278984e-06, "loss": 0.3416811, "memory(GiB)": 34.88, "step": 91515, "train_speed(iter/s)": 0.410668 }, { "acc": 0.93541946, "epoch": 2.4780007039774725, "grad_norm": 8.221545219421387, "learning_rate": 3.4634478672858747e-06, "loss": 0.34206889, "memory(GiB)": 34.88, "step": 91520, "train_speed(iter/s)": 0.410669 }, { "acc": 0.91408148, "epoch": 2.4781360842606883, "grad_norm": 24.542123794555664, "learning_rate": 3.462915414547366e-06, "loss": 0.46892657, "memory(GiB)": 34.88, "step": 91525, "train_speed(iter/s)": 0.41067 }, { "acc": 0.92428379, "epoch": 2.4782714645439037, "grad_norm": 9.082674026489258, "learning_rate": 3.4623829810701247e-06, "loss": 0.40820036, "memory(GiB)": 34.88, "step": 91530, "train_speed(iter/s)": 0.410672 }, { "acc": 0.92013578, "epoch": 2.4784068448271195, "grad_norm": 8.246078491210938, "learning_rate": 3.4618505668608234e-06, "loss": 0.39304867, "memory(GiB)": 34.88, "step": 91535, "train_speed(iter/s)": 0.410673 }, { "acc": 0.92547112, "epoch": 2.478542225110335, "grad_norm": 7.202933311462402, "learning_rate": 3.4613181719261274e-06, "loss": 0.40421247, "memory(GiB)": 34.88, "step": 91540, "train_speed(iter/s)": 0.410674 }, { "acc": 0.90998907, "epoch": 2.4786776053935506, "grad_norm": 8.081371307373047, "learning_rate": 3.46078579627271e-06, "loss": 0.4681366, "memory(GiB)": 34.88, "step": 91545, "train_speed(iter/s)": 0.410676 }, { "acc": 0.92394114, "epoch": 2.478812985676766, "grad_norm": 9.923432350158691, "learning_rate": 3.4602534399072374e-06, "loss": 0.41068149, "memory(GiB)": 34.88, "step": 91550, "train_speed(iter/s)": 0.410677 }, { "acc": 0.92091846, "epoch": 2.4789483659599814, "grad_norm": 8.263263702392578, "learning_rate": 3.4597211028363804e-06, "loss": 0.44338484, "memory(GiB)": 34.88, "step": 91555, "train_speed(iter/s)": 0.410678 }, { "acc": 0.91414928, "epoch": 2.479083746243197, "grad_norm": 8.193243026733398, "learning_rate": 3.4591887850668065e-06, "loss": 0.43300405, "memory(GiB)": 34.88, "step": 91560, "train_speed(iter/s)": 0.410679 }, { "acc": 0.94665699, "epoch": 2.4792191265264125, "grad_norm": 4.470613479614258, "learning_rate": 3.458656486605184e-06, "loss": 0.27919068, "memory(GiB)": 34.88, "step": 91565, "train_speed(iter/s)": 0.410681 }, { "acc": 0.93288574, "epoch": 2.4793545068096283, "grad_norm": 4.22045373916626, "learning_rate": 3.458124207458181e-06, "loss": 0.37109232, "memory(GiB)": 34.88, "step": 91570, "train_speed(iter/s)": 0.410682 }, { "acc": 0.93048096, "epoch": 2.4794898870928437, "grad_norm": 8.988109588623047, "learning_rate": 3.4575919476324654e-06, "loss": 0.42540793, "memory(GiB)": 34.88, "step": 91575, "train_speed(iter/s)": 0.410683 }, { "acc": 0.92051897, "epoch": 2.4796252673760595, "grad_norm": 4.339661598205566, "learning_rate": 3.457059707134706e-06, "loss": 0.46906533, "memory(GiB)": 34.88, "step": 91580, "train_speed(iter/s)": 0.410684 }, { "acc": 0.91918879, "epoch": 2.479760647659275, "grad_norm": 9.302417755126953, "learning_rate": 3.456527485971568e-06, "loss": 0.44835024, "memory(GiB)": 34.88, "step": 91585, "train_speed(iter/s)": 0.410685 }, { "acc": 0.9451231, "epoch": 2.4798960279424906, "grad_norm": 9.917303085327148, "learning_rate": 3.4559952841497206e-06, "loss": 0.33948255, "memory(GiB)": 34.88, "step": 91590, "train_speed(iter/s)": 0.410686 }, { "acc": 0.9191988, "epoch": 2.480031408225706, "grad_norm": 4.962021827697754, "learning_rate": 3.455463101675829e-06, "loss": 0.43077803, "memory(GiB)": 34.88, "step": 91595, "train_speed(iter/s)": 0.410688 }, { "acc": 0.94013901, "epoch": 2.480166788508922, "grad_norm": 3.2009286880493164, "learning_rate": 3.4549309385565603e-06, "loss": 0.32866278, "memory(GiB)": 34.88, "step": 91600, "train_speed(iter/s)": 0.410689 }, { "acc": 0.93643799, "epoch": 2.480302168792137, "grad_norm": 15.053582191467285, "learning_rate": 3.454398794798583e-06, "loss": 0.35787215, "memory(GiB)": 34.88, "step": 91605, "train_speed(iter/s)": 0.41069 }, { "acc": 0.93425026, "epoch": 2.4804375490753525, "grad_norm": 5.656283378601074, "learning_rate": 3.4538666704085592e-06, "loss": 0.32565212, "memory(GiB)": 34.88, "step": 91610, "train_speed(iter/s)": 0.410691 }, { "acc": 0.93011875, "epoch": 2.4805729293585683, "grad_norm": 7.931985378265381, "learning_rate": 3.4533345653931583e-06, "loss": 0.37986324, "memory(GiB)": 34.88, "step": 91615, "train_speed(iter/s)": 0.410693 }, { "acc": 0.94349613, "epoch": 2.4807083096417837, "grad_norm": 4.613867282867432, "learning_rate": 3.4528024797590436e-06, "loss": 0.30929899, "memory(GiB)": 34.88, "step": 91620, "train_speed(iter/s)": 0.410694 }, { "acc": 0.92935715, "epoch": 2.4808436899249995, "grad_norm": 7.954525947570801, "learning_rate": 3.452270413512883e-06, "loss": 0.32728691, "memory(GiB)": 34.88, "step": 91625, "train_speed(iter/s)": 0.410695 }, { "acc": 0.94472313, "epoch": 2.480979070208215, "grad_norm": 8.354353904724121, "learning_rate": 3.4517383666613387e-06, "loss": 0.27748523, "memory(GiB)": 34.88, "step": 91630, "train_speed(iter/s)": 0.410696 }, { "acc": 0.91412973, "epoch": 2.4811144504914306, "grad_norm": 13.934338569641113, "learning_rate": 3.451206339211078e-06, "loss": 0.48537488, "memory(GiB)": 34.88, "step": 91635, "train_speed(iter/s)": 0.410698 }, { "acc": 0.91948891, "epoch": 2.481249830774646, "grad_norm": 5.854734897613525, "learning_rate": 3.4506743311687645e-06, "loss": 0.48647332, "memory(GiB)": 34.88, "step": 91640, "train_speed(iter/s)": 0.410699 }, { "acc": 0.92721987, "epoch": 2.4813852110578614, "grad_norm": 10.549790382385254, "learning_rate": 3.450142342541063e-06, "loss": 0.39286828, "memory(GiB)": 34.88, "step": 91645, "train_speed(iter/s)": 0.4107 }, { "acc": 0.94584904, "epoch": 2.481520591341077, "grad_norm": 4.893763542175293, "learning_rate": 3.449610373334638e-06, "loss": 0.31475935, "memory(GiB)": 34.88, "step": 91650, "train_speed(iter/s)": 0.410701 }, { "acc": 0.93903484, "epoch": 2.4816559716242925, "grad_norm": 6.279306411743164, "learning_rate": 3.4490784235561516e-06, "loss": 0.31891475, "memory(GiB)": 34.88, "step": 91655, "train_speed(iter/s)": 0.410703 }, { "acc": 0.92567739, "epoch": 2.4817913519075083, "grad_norm": 6.598715305328369, "learning_rate": 3.4485464932122716e-06, "loss": 0.35945306, "memory(GiB)": 34.88, "step": 91660, "train_speed(iter/s)": 0.410704 }, { "acc": 0.92315044, "epoch": 2.4819267321907237, "grad_norm": 5.210383415222168, "learning_rate": 3.4480145823096558e-06, "loss": 0.43688674, "memory(GiB)": 34.88, "step": 91665, "train_speed(iter/s)": 0.410705 }, { "acc": 0.91392393, "epoch": 2.4820621124739395, "grad_norm": 6.85453987121582, "learning_rate": 3.4474826908549723e-06, "loss": 0.55024328, "memory(GiB)": 34.88, "step": 91670, "train_speed(iter/s)": 0.410706 }, { "acc": 0.91765938, "epoch": 2.482197492757155, "grad_norm": 7.354465961456299, "learning_rate": 3.446950818854881e-06, "loss": 0.46900625, "memory(GiB)": 34.88, "step": 91675, "train_speed(iter/s)": 0.410707 }, { "acc": 0.90909271, "epoch": 2.48233287304037, "grad_norm": 3.749837636947632, "learning_rate": 3.446418966316048e-06, "loss": 0.46747675, "memory(GiB)": 34.88, "step": 91680, "train_speed(iter/s)": 0.410708 }, { "acc": 0.94092321, "epoch": 2.482468253323586, "grad_norm": 8.466039657592773, "learning_rate": 3.4458871332451327e-06, "loss": 0.32895846, "memory(GiB)": 34.88, "step": 91685, "train_speed(iter/s)": 0.41071 }, { "acc": 0.90555725, "epoch": 2.4826036336068014, "grad_norm": 15.202317237854004, "learning_rate": 3.4453553196487975e-06, "loss": 0.48056011, "memory(GiB)": 34.88, "step": 91690, "train_speed(iter/s)": 0.410711 }, { "acc": 0.93000326, "epoch": 2.482739013890017, "grad_norm": 5.247762680053711, "learning_rate": 3.4448235255337063e-06, "loss": 0.4665432, "memory(GiB)": 34.88, "step": 91695, "train_speed(iter/s)": 0.410712 }, { "acc": 0.93690014, "epoch": 2.4828743941732325, "grad_norm": 8.680163383483887, "learning_rate": 3.4442917509065194e-06, "loss": 0.3504468, "memory(GiB)": 34.88, "step": 91700, "train_speed(iter/s)": 0.410713 }, { "acc": 0.92498636, "epoch": 2.4830097744564483, "grad_norm": 7.32316255569458, "learning_rate": 3.4437599957738995e-06, "loss": 0.44204364, "memory(GiB)": 34.88, "step": 91705, "train_speed(iter/s)": 0.410714 }, { "acc": 0.93204107, "epoch": 2.4831451547396637, "grad_norm": 11.30143928527832, "learning_rate": 3.443228260142506e-06, "loss": 0.38098135, "memory(GiB)": 34.88, "step": 91710, "train_speed(iter/s)": 0.410716 }, { "acc": 0.92419081, "epoch": 2.483280535022879, "grad_norm": 5.397745609283447, "learning_rate": 3.4426965440190017e-06, "loss": 0.42528696, "memory(GiB)": 34.88, "step": 91715, "train_speed(iter/s)": 0.410717 }, { "acc": 0.92144623, "epoch": 2.483415915306095, "grad_norm": 6.0441741943359375, "learning_rate": 3.4421648474100456e-06, "loss": 0.45944343, "memory(GiB)": 34.88, "step": 91720, "train_speed(iter/s)": 0.410718 }, { "acc": 0.92372236, "epoch": 2.48355129558931, "grad_norm": 12.188824653625488, "learning_rate": 3.441633170322302e-06, "loss": 0.4853857, "memory(GiB)": 34.88, "step": 91725, "train_speed(iter/s)": 0.410719 }, { "acc": 0.934795, "epoch": 2.483686675872526, "grad_norm": 5.906579494476318, "learning_rate": 3.4411015127624274e-06, "loss": 0.3229131, "memory(GiB)": 34.88, "step": 91730, "train_speed(iter/s)": 0.41072 }, { "acc": 0.95123577, "epoch": 2.4838220561557414, "grad_norm": 8.201922416687012, "learning_rate": 3.4405698747370825e-06, "loss": 0.22564888, "memory(GiB)": 34.88, "step": 91735, "train_speed(iter/s)": 0.410722 }, { "acc": 0.91095304, "epoch": 2.483957436438957, "grad_norm": 8.836580276489258, "learning_rate": 3.4400382562529282e-06, "loss": 0.56989632, "memory(GiB)": 34.88, "step": 91740, "train_speed(iter/s)": 0.410723 }, { "acc": 0.92394676, "epoch": 2.4840928167221725, "grad_norm": 8.707757949829102, "learning_rate": 3.4395066573166224e-06, "loss": 0.42940555, "memory(GiB)": 34.88, "step": 91745, "train_speed(iter/s)": 0.410724 }, { "acc": 0.92483072, "epoch": 2.4842281970053883, "grad_norm": 5.500741958618164, "learning_rate": 3.4389750779348268e-06, "loss": 0.41160283, "memory(GiB)": 34.88, "step": 91750, "train_speed(iter/s)": 0.410725 }, { "acc": 0.93682766, "epoch": 2.4843635772886037, "grad_norm": 10.023859024047852, "learning_rate": 3.4384435181141975e-06, "loss": 0.33292513, "memory(GiB)": 34.88, "step": 91755, "train_speed(iter/s)": 0.410726 }, { "acc": 0.91761999, "epoch": 2.4844989575718195, "grad_norm": 12.602356910705566, "learning_rate": 3.4379119778613976e-06, "loss": 0.46849394, "memory(GiB)": 34.88, "step": 91760, "train_speed(iter/s)": 0.410728 }, { "acc": 0.92447348, "epoch": 2.484634337855035, "grad_norm": 9.761223793029785, "learning_rate": 3.4373804571830833e-06, "loss": 0.40595303, "memory(GiB)": 34.88, "step": 91765, "train_speed(iter/s)": 0.410729 }, { "acc": 0.9305933, "epoch": 2.48476971813825, "grad_norm": 6.295718193054199, "learning_rate": 3.4368489560859103e-06, "loss": 0.39921806, "memory(GiB)": 34.88, "step": 91770, "train_speed(iter/s)": 0.41073 }, { "acc": 0.94082527, "epoch": 2.484905098421466, "grad_norm": 8.238088607788086, "learning_rate": 3.436317474576541e-06, "loss": 0.29027004, "memory(GiB)": 34.88, "step": 91775, "train_speed(iter/s)": 0.410731 }, { "acc": 0.94849186, "epoch": 2.4850404787046814, "grad_norm": 9.003278732299805, "learning_rate": 3.4357860126616304e-06, "loss": 0.28788176, "memory(GiB)": 34.88, "step": 91780, "train_speed(iter/s)": 0.410733 }, { "acc": 0.93946934, "epoch": 2.485175858987897, "grad_norm": 12.744536399841309, "learning_rate": 3.435254570347838e-06, "loss": 0.38154523, "memory(GiB)": 34.88, "step": 91785, "train_speed(iter/s)": 0.410734 }, { "acc": 0.91856289, "epoch": 2.4853112392711125, "grad_norm": 10.538320541381836, "learning_rate": 3.4347231476418186e-06, "loss": 0.4419632, "memory(GiB)": 34.88, "step": 91790, "train_speed(iter/s)": 0.410735 }, { "acc": 0.93507185, "epoch": 2.4854466195543283, "grad_norm": 4.856647491455078, "learning_rate": 3.4341917445502345e-06, "loss": 0.38590713, "memory(GiB)": 34.88, "step": 91795, "train_speed(iter/s)": 0.410736 }, { "acc": 0.93206882, "epoch": 2.4855819998375437, "grad_norm": 10.252869606018066, "learning_rate": 3.433660361079735e-06, "loss": 0.34900784, "memory(GiB)": 34.88, "step": 91800, "train_speed(iter/s)": 0.410737 }, { "acc": 0.92065754, "epoch": 2.485717380120759, "grad_norm": 7.773053169250488, "learning_rate": 3.4331289972369836e-06, "loss": 0.38849003, "memory(GiB)": 34.88, "step": 91805, "train_speed(iter/s)": 0.410738 }, { "acc": 0.93811569, "epoch": 2.485852760403975, "grad_norm": 2.8060710430145264, "learning_rate": 3.4325976530286338e-06, "loss": 0.38639946, "memory(GiB)": 34.88, "step": 91810, "train_speed(iter/s)": 0.410739 }, { "acc": 0.91973, "epoch": 2.48598814068719, "grad_norm": 11.733784675598145, "learning_rate": 3.4320663284613403e-06, "loss": 0.51538877, "memory(GiB)": 34.88, "step": 91815, "train_speed(iter/s)": 0.410741 }, { "acc": 0.91843729, "epoch": 2.486123520970406, "grad_norm": 7.627865791320801, "learning_rate": 3.4315350235417616e-06, "loss": 0.49334822, "memory(GiB)": 34.88, "step": 91820, "train_speed(iter/s)": 0.410742 }, { "acc": 0.9322897, "epoch": 2.4862589012536214, "grad_norm": 7.334921836853027, "learning_rate": 3.4310037382765517e-06, "loss": 0.3228415, "memory(GiB)": 34.88, "step": 91825, "train_speed(iter/s)": 0.410743 }, { "acc": 0.93183994, "epoch": 2.486394281536837, "grad_norm": 16.429834365844727, "learning_rate": 3.4304724726723664e-06, "loss": 0.41564445, "memory(GiB)": 34.88, "step": 91830, "train_speed(iter/s)": 0.410744 }, { "acc": 0.92229939, "epoch": 2.4865296618200525, "grad_norm": 7.499748706817627, "learning_rate": 3.4299412267358605e-06, "loss": 0.42585526, "memory(GiB)": 34.88, "step": 91835, "train_speed(iter/s)": 0.410746 }, { "acc": 0.9271904, "epoch": 2.486665042103268, "grad_norm": 8.34585189819336, "learning_rate": 3.4294100004736913e-06, "loss": 0.44668207, "memory(GiB)": 34.88, "step": 91840, "train_speed(iter/s)": 0.410747 }, { "acc": 0.90885973, "epoch": 2.4868004223864837, "grad_norm": 12.777667999267578, "learning_rate": 3.428878793892509e-06, "loss": 0.55015039, "memory(GiB)": 34.88, "step": 91845, "train_speed(iter/s)": 0.410748 }, { "acc": 0.92543888, "epoch": 2.486935802669699, "grad_norm": 10.350720405578613, "learning_rate": 3.4283476069989737e-06, "loss": 0.40619206, "memory(GiB)": 34.88, "step": 91850, "train_speed(iter/s)": 0.410749 }, { "acc": 0.92099972, "epoch": 2.487071182952915, "grad_norm": 9.364048957824707, "learning_rate": 3.4278164397997343e-06, "loss": 0.47299566, "memory(GiB)": 34.88, "step": 91855, "train_speed(iter/s)": 0.410751 }, { "acc": 0.92217693, "epoch": 2.48720656323613, "grad_norm": 20.01729393005371, "learning_rate": 3.427285292301447e-06, "loss": 0.4608808, "memory(GiB)": 34.88, "step": 91860, "train_speed(iter/s)": 0.410752 }, { "acc": 0.93006821, "epoch": 2.487341943519346, "grad_norm": 11.264753341674805, "learning_rate": 3.4267541645107657e-06, "loss": 0.43741908, "memory(GiB)": 34.88, "step": 91865, "train_speed(iter/s)": 0.410753 }, { "acc": 0.92041245, "epoch": 2.4874773238025614, "grad_norm": 11.377359390258789, "learning_rate": 3.4262230564343417e-06, "loss": 0.39624698, "memory(GiB)": 34.88, "step": 91870, "train_speed(iter/s)": 0.410754 }, { "acc": 0.92262135, "epoch": 2.4876127040857767, "grad_norm": 9.363088607788086, "learning_rate": 3.4256919680788327e-06, "loss": 0.40310802, "memory(GiB)": 34.88, "step": 91875, "train_speed(iter/s)": 0.410755 }, { "acc": 0.93612232, "epoch": 2.4877480843689925, "grad_norm": 6.13600492477417, "learning_rate": 3.425160899450886e-06, "loss": 0.32802052, "memory(GiB)": 34.88, "step": 91880, "train_speed(iter/s)": 0.410757 }, { "acc": 0.9383461, "epoch": 2.487883464652208, "grad_norm": 6.495283126831055, "learning_rate": 3.42462985055716e-06, "loss": 0.25549989, "memory(GiB)": 34.88, "step": 91885, "train_speed(iter/s)": 0.410758 }, { "acc": 0.92974892, "epoch": 2.4880188449354237, "grad_norm": 7.136729717254639, "learning_rate": 3.424098821404303e-06, "loss": 0.34147921, "memory(GiB)": 34.88, "step": 91890, "train_speed(iter/s)": 0.410759 }, { "acc": 0.94380808, "epoch": 2.488154225218639, "grad_norm": 5.37205696105957, "learning_rate": 3.4235678119989673e-06, "loss": 0.28788395, "memory(GiB)": 34.88, "step": 91895, "train_speed(iter/s)": 0.41076 }, { "acc": 0.93309383, "epoch": 2.488289605501855, "grad_norm": 5.579254627227783, "learning_rate": 3.423036822347807e-06, "loss": 0.35735896, "memory(GiB)": 34.88, "step": 91900, "train_speed(iter/s)": 0.410761 }, { "acc": 0.93438644, "epoch": 2.48842498578507, "grad_norm": 6.878359794616699, "learning_rate": 3.422505852457471e-06, "loss": 0.37501495, "memory(GiB)": 34.88, "step": 91905, "train_speed(iter/s)": 0.410763 }, { "acc": 0.93248539, "epoch": 2.4885603660682856, "grad_norm": 7.569511890411377, "learning_rate": 3.4219749023346155e-06, "loss": 0.40837545, "memory(GiB)": 34.88, "step": 91910, "train_speed(iter/s)": 0.410764 }, { "acc": 0.93948536, "epoch": 2.4886957463515014, "grad_norm": 7.558499813079834, "learning_rate": 3.421443971985885e-06, "loss": 0.34752872, "memory(GiB)": 34.88, "step": 91915, "train_speed(iter/s)": 0.410765 }, { "acc": 0.93129025, "epoch": 2.4888311266347167, "grad_norm": 5.866647243499756, "learning_rate": 3.4209130614179375e-06, "loss": 0.30406275, "memory(GiB)": 34.88, "step": 91920, "train_speed(iter/s)": 0.410766 }, { "acc": 0.94678955, "epoch": 2.4889665069179325, "grad_norm": 7.91473388671875, "learning_rate": 3.4203821706374175e-06, "loss": 0.31339307, "memory(GiB)": 34.88, "step": 91925, "train_speed(iter/s)": 0.410768 }, { "acc": 0.94216089, "epoch": 2.489101887201148, "grad_norm": 8.209867477416992, "learning_rate": 3.419851299650981e-06, "loss": 0.34544806, "memory(GiB)": 34.88, "step": 91930, "train_speed(iter/s)": 0.410769 }, { "acc": 0.93428173, "epoch": 2.4892372674843637, "grad_norm": 9.886237144470215, "learning_rate": 3.419320448465275e-06, "loss": 0.35856211, "memory(GiB)": 34.88, "step": 91935, "train_speed(iter/s)": 0.41077 }, { "acc": 0.93872328, "epoch": 2.489372647767579, "grad_norm": 13.894619941711426, "learning_rate": 3.4187896170869468e-06, "loss": 0.34391127, "memory(GiB)": 34.88, "step": 91940, "train_speed(iter/s)": 0.410771 }, { "acc": 0.91940536, "epoch": 2.489508028050795, "grad_norm": 11.75777530670166, "learning_rate": 3.4182588055226527e-06, "loss": 0.46808195, "memory(GiB)": 34.88, "step": 91945, "train_speed(iter/s)": 0.410772 }, { "acc": 0.93496647, "epoch": 2.48964340833401, "grad_norm": 6.564286708831787, "learning_rate": 3.4177280137790355e-06, "loss": 0.40215955, "memory(GiB)": 34.88, "step": 91950, "train_speed(iter/s)": 0.410773 }, { "acc": 0.92780542, "epoch": 2.489778788617226, "grad_norm": 5.837798118591309, "learning_rate": 3.4171972418627506e-06, "loss": 0.4176228, "memory(GiB)": 34.88, "step": 91955, "train_speed(iter/s)": 0.410775 }, { "acc": 0.92677374, "epoch": 2.4899141689004414, "grad_norm": 17.709074020385742, "learning_rate": 3.4166664897804415e-06, "loss": 0.46846304, "memory(GiB)": 34.88, "step": 91960, "train_speed(iter/s)": 0.410776 }, { "acc": 0.92837906, "epoch": 2.4900495491836567, "grad_norm": 8.188998222351074, "learning_rate": 3.4161357575387623e-06, "loss": 0.39489205, "memory(GiB)": 34.88, "step": 91965, "train_speed(iter/s)": 0.410777 }, { "acc": 0.94339905, "epoch": 2.4901849294668725, "grad_norm": 7.193375110626221, "learning_rate": 3.415605045144355e-06, "loss": 0.29112687, "memory(GiB)": 34.88, "step": 91970, "train_speed(iter/s)": 0.410778 }, { "acc": 0.94546824, "epoch": 2.490320309750088, "grad_norm": 4.397403240203857, "learning_rate": 3.4150743526038734e-06, "loss": 0.32176676, "memory(GiB)": 34.88, "step": 91975, "train_speed(iter/s)": 0.410779 }, { "acc": 0.93427887, "epoch": 2.4904556900333037, "grad_norm": 8.172209739685059, "learning_rate": 3.4145436799239645e-06, "loss": 0.4553484, "memory(GiB)": 34.88, "step": 91980, "train_speed(iter/s)": 0.410781 }, { "acc": 0.92043858, "epoch": 2.490591070316519, "grad_norm": 18.151432037353516, "learning_rate": 3.4140130271112715e-06, "loss": 0.36869123, "memory(GiB)": 34.88, "step": 91985, "train_speed(iter/s)": 0.410782 }, { "acc": 0.94828777, "epoch": 2.490726450599735, "grad_norm": 4.913707733154297, "learning_rate": 3.4134823941724486e-06, "loss": 0.3205133, "memory(GiB)": 34.88, "step": 91990, "train_speed(iter/s)": 0.410783 }, { "acc": 0.91237535, "epoch": 2.49086183088295, "grad_norm": 4.365383625030518, "learning_rate": 3.4129517811141373e-06, "loss": 0.42967129, "memory(GiB)": 34.88, "step": 91995, "train_speed(iter/s)": 0.410784 }, { "acc": 0.94135075, "epoch": 2.4909972111661656, "grad_norm": 4.706893444061279, "learning_rate": 3.412421187942989e-06, "loss": 0.31587439, "memory(GiB)": 34.88, "step": 92000, "train_speed(iter/s)": 0.410785 }, { "acc": 0.92885571, "epoch": 2.4911325914493814, "grad_norm": 3.026615619659424, "learning_rate": 3.4118906146656463e-06, "loss": 0.38671818, "memory(GiB)": 34.88, "step": 92005, "train_speed(iter/s)": 0.410787 }, { "acc": 0.94411974, "epoch": 2.4912679717325967, "grad_norm": 4.395601272583008, "learning_rate": 3.4113600612887585e-06, "loss": 0.3379288, "memory(GiB)": 34.88, "step": 92010, "train_speed(iter/s)": 0.410788 }, { "acc": 0.9482893, "epoch": 2.4914033520158125, "grad_norm": 5.692201137542725, "learning_rate": 3.4108295278189717e-06, "loss": 0.25874426, "memory(GiB)": 34.88, "step": 92015, "train_speed(iter/s)": 0.410789 }, { "acc": 0.94710693, "epoch": 2.491538732299028, "grad_norm": 4.51724100112915, "learning_rate": 3.410299014262931e-06, "loss": 0.30381322, "memory(GiB)": 34.88, "step": 92020, "train_speed(iter/s)": 0.41079 }, { "acc": 0.93729153, "epoch": 2.4916741125822437, "grad_norm": 13.018631935119629, "learning_rate": 3.409768520627284e-06, "loss": 0.30935409, "memory(GiB)": 34.88, "step": 92025, "train_speed(iter/s)": 0.410791 }, { "acc": 0.92695875, "epoch": 2.491809492865459, "grad_norm": 33.94918441772461, "learning_rate": 3.4092380469186712e-06, "loss": 0.33865528, "memory(GiB)": 34.88, "step": 92030, "train_speed(iter/s)": 0.410793 }, { "acc": 0.93141422, "epoch": 2.4919448731486744, "grad_norm": 13.64452075958252, "learning_rate": 3.408707593143745e-06, "loss": 0.35521042, "memory(GiB)": 34.88, "step": 92035, "train_speed(iter/s)": 0.410794 }, { "acc": 0.92471886, "epoch": 2.49208025343189, "grad_norm": 19.997669219970703, "learning_rate": 3.4081771593091417e-06, "loss": 0.40859661, "memory(GiB)": 34.88, "step": 92040, "train_speed(iter/s)": 0.410795 }, { "acc": 0.93166637, "epoch": 2.4922156337151056, "grad_norm": 12.034464836120605, "learning_rate": 3.4076467454215156e-06, "loss": 0.42728834, "memory(GiB)": 34.88, "step": 92045, "train_speed(iter/s)": 0.410796 }, { "acc": 0.93374138, "epoch": 2.4923510139983214, "grad_norm": 8.689891815185547, "learning_rate": 3.407116351487503e-06, "loss": 0.4130434, "memory(GiB)": 34.88, "step": 92050, "train_speed(iter/s)": 0.410798 }, { "acc": 0.94551649, "epoch": 2.4924863942815367, "grad_norm": 4.8786516189575195, "learning_rate": 3.406585977513754e-06, "loss": 0.32250652, "memory(GiB)": 34.88, "step": 92055, "train_speed(iter/s)": 0.410799 }, { "acc": 0.93166447, "epoch": 2.4926217745647525, "grad_norm": 5.263222694396973, "learning_rate": 3.40605562350691e-06, "loss": 0.32622976, "memory(GiB)": 34.88, "step": 92060, "train_speed(iter/s)": 0.4108 }, { "acc": 0.91426811, "epoch": 2.492757154847968, "grad_norm": 12.927383422851562, "learning_rate": 3.405525289473613e-06, "loss": 0.44264798, "memory(GiB)": 34.88, "step": 92065, "train_speed(iter/s)": 0.410801 }, { "acc": 0.92171936, "epoch": 2.4928925351311833, "grad_norm": 12.882588386535645, "learning_rate": 3.404994975420511e-06, "loss": 0.36022606, "memory(GiB)": 34.88, "step": 92070, "train_speed(iter/s)": 0.410802 }, { "acc": 0.93400288, "epoch": 2.493027915414399, "grad_norm": 3.4190826416015625, "learning_rate": 3.404464681354242e-06, "loss": 0.36925979, "memory(GiB)": 34.88, "step": 92075, "train_speed(iter/s)": 0.410804 }, { "acc": 0.92131233, "epoch": 2.4931632956976144, "grad_norm": 7.620737075805664, "learning_rate": 3.403934407281454e-06, "loss": 0.38771174, "memory(GiB)": 34.88, "step": 92080, "train_speed(iter/s)": 0.410805 }, { "acc": 0.94273167, "epoch": 2.49329867598083, "grad_norm": 6.991121292114258, "learning_rate": 3.4034041532087848e-06, "loss": 0.2978498, "memory(GiB)": 34.88, "step": 92085, "train_speed(iter/s)": 0.410806 }, { "acc": 0.94799919, "epoch": 2.4934340562640456, "grad_norm": 8.239219665527344, "learning_rate": 3.4028739191428805e-06, "loss": 0.24774432, "memory(GiB)": 34.88, "step": 92090, "train_speed(iter/s)": 0.410807 }, { "acc": 0.91254063, "epoch": 2.4935694365472614, "grad_norm": 4.462136745452881, "learning_rate": 3.4023437050903814e-06, "loss": 0.46821947, "memory(GiB)": 34.88, "step": 92095, "train_speed(iter/s)": 0.410809 }, { "acc": 0.92498827, "epoch": 2.4937048168304767, "grad_norm": 17.817081451416016, "learning_rate": 3.4018135110579313e-06, "loss": 0.45183468, "memory(GiB)": 34.88, "step": 92100, "train_speed(iter/s)": 0.41081 }, { "acc": 0.90722332, "epoch": 2.4938401971136925, "grad_norm": 6.257938385009766, "learning_rate": 3.401283337052173e-06, "loss": 0.53152857, "memory(GiB)": 34.88, "step": 92105, "train_speed(iter/s)": 0.410811 }, { "acc": 0.93912773, "epoch": 2.493975577396908, "grad_norm": 7.350663185119629, "learning_rate": 3.400753183079742e-06, "loss": 0.31900749, "memory(GiB)": 34.88, "step": 92110, "train_speed(iter/s)": 0.410812 }, { "acc": 0.94762802, "epoch": 2.4941109576801237, "grad_norm": 6.678492069244385, "learning_rate": 3.4002230491472865e-06, "loss": 0.29279194, "memory(GiB)": 34.88, "step": 92115, "train_speed(iter/s)": 0.410813 }, { "acc": 0.92068443, "epoch": 2.494246337963339, "grad_norm": 43.05876541137695, "learning_rate": 3.3996929352614407e-06, "loss": 0.47878904, "memory(GiB)": 34.88, "step": 92120, "train_speed(iter/s)": 0.410815 }, { "acc": 0.93877048, "epoch": 2.4943817182465544, "grad_norm": 8.789795875549316, "learning_rate": 3.3991628414288513e-06, "loss": 0.35373056, "memory(GiB)": 34.88, "step": 92125, "train_speed(iter/s)": 0.410816 }, { "acc": 0.93501816, "epoch": 2.49451709852977, "grad_norm": 5.855586528778076, "learning_rate": 3.398632767656156e-06, "loss": 0.34620001, "memory(GiB)": 34.88, "step": 92130, "train_speed(iter/s)": 0.410817 }, { "acc": 0.93841228, "epoch": 2.4946524788129856, "grad_norm": 5.064289569854736, "learning_rate": 3.3981027139499954e-06, "loss": 0.3547019, "memory(GiB)": 34.88, "step": 92135, "train_speed(iter/s)": 0.410818 }, { "acc": 0.94641027, "epoch": 2.4947878590962014, "grad_norm": 6.849288463592529, "learning_rate": 3.3975726803170096e-06, "loss": 0.30022669, "memory(GiB)": 34.88, "step": 92140, "train_speed(iter/s)": 0.410819 }, { "acc": 0.94476748, "epoch": 2.4949232393794167, "grad_norm": 7.322772979736328, "learning_rate": 3.3970426667638397e-06, "loss": 0.27938538, "memory(GiB)": 34.88, "step": 92145, "train_speed(iter/s)": 0.41082 }, { "acc": 0.9524354, "epoch": 2.4950586196626325, "grad_norm": 6.51435661315918, "learning_rate": 3.396512673297123e-06, "loss": 0.28063583, "memory(GiB)": 34.88, "step": 92150, "train_speed(iter/s)": 0.410822 }, { "acc": 0.91616545, "epoch": 2.495193999945848, "grad_norm": 5.719313621520996, "learning_rate": 3.3959826999234974e-06, "loss": 0.42227283, "memory(GiB)": 34.88, "step": 92155, "train_speed(iter/s)": 0.410823 }, { "acc": 0.94058504, "epoch": 2.4953293802290633, "grad_norm": 7.9738240242004395, "learning_rate": 3.3954527466496063e-06, "loss": 0.3362062, "memory(GiB)": 34.88, "step": 92160, "train_speed(iter/s)": 0.410824 }, { "acc": 0.90662804, "epoch": 2.495464760512279, "grad_norm": 13.916293144226074, "learning_rate": 3.3949228134820837e-06, "loss": 0.52863927, "memory(GiB)": 34.88, "step": 92165, "train_speed(iter/s)": 0.410825 }, { "acc": 0.91523685, "epoch": 2.4956001407954944, "grad_norm": 5.901618480682373, "learning_rate": 3.394392900427572e-06, "loss": 0.54489217, "memory(GiB)": 34.88, "step": 92170, "train_speed(iter/s)": 0.410826 }, { "acc": 0.92745953, "epoch": 2.49573552107871, "grad_norm": 8.49148178100586, "learning_rate": 3.393863007492707e-06, "loss": 0.46734042, "memory(GiB)": 34.88, "step": 92175, "train_speed(iter/s)": 0.410828 }, { "acc": 0.92040272, "epoch": 2.4958709013619256, "grad_norm": 5.052355766296387, "learning_rate": 3.3933331346841277e-06, "loss": 0.42990088, "memory(GiB)": 34.88, "step": 92180, "train_speed(iter/s)": 0.410829 }, { "acc": 0.93590469, "epoch": 2.4960062816451414, "grad_norm": 12.910426139831543, "learning_rate": 3.3928032820084726e-06, "loss": 0.36070919, "memory(GiB)": 34.88, "step": 92185, "train_speed(iter/s)": 0.41083 }, { "acc": 0.91807356, "epoch": 2.4961416619283567, "grad_norm": 9.055218696594238, "learning_rate": 3.3922734494723757e-06, "loss": 0.38967528, "memory(GiB)": 34.88, "step": 92190, "train_speed(iter/s)": 0.410831 }, { "acc": 0.93599892, "epoch": 2.496277042211572, "grad_norm": 18.63888168334961, "learning_rate": 3.391743637082478e-06, "loss": 0.36774154, "memory(GiB)": 34.88, "step": 92195, "train_speed(iter/s)": 0.410832 }, { "acc": 0.92826834, "epoch": 2.496412422494788, "grad_norm": 4.391458511352539, "learning_rate": 3.391213844845414e-06, "loss": 0.3608556, "memory(GiB)": 34.88, "step": 92200, "train_speed(iter/s)": 0.410834 }, { "acc": 0.94872341, "epoch": 2.4965478027780033, "grad_norm": 12.11341381072998, "learning_rate": 3.390684072767822e-06, "loss": 0.35323663, "memory(GiB)": 34.88, "step": 92205, "train_speed(iter/s)": 0.410835 }, { "acc": 0.93860435, "epoch": 2.496683183061219, "grad_norm": 6.77217960357666, "learning_rate": 3.390154320856337e-06, "loss": 0.35920861, "memory(GiB)": 34.88, "step": 92210, "train_speed(iter/s)": 0.410836 }, { "acc": 0.93467369, "epoch": 2.4968185633444344, "grad_norm": 7.380570888519287, "learning_rate": 3.389624589117597e-06, "loss": 0.38070242, "memory(GiB)": 34.88, "step": 92215, "train_speed(iter/s)": 0.410837 }, { "acc": 0.92230339, "epoch": 2.49695394362765, "grad_norm": 20.299545288085938, "learning_rate": 3.389094877558236e-06, "loss": 0.41966109, "memory(GiB)": 34.88, "step": 92220, "train_speed(iter/s)": 0.410839 }, { "acc": 0.9176959, "epoch": 2.4970893239108656, "grad_norm": 7.220780372619629, "learning_rate": 3.388565186184891e-06, "loss": 0.44279666, "memory(GiB)": 34.88, "step": 92225, "train_speed(iter/s)": 0.41084 }, { "acc": 0.92361412, "epoch": 2.497224704194081, "grad_norm": 9.097517013549805, "learning_rate": 3.388035515004199e-06, "loss": 0.38505266, "memory(GiB)": 34.88, "step": 92230, "train_speed(iter/s)": 0.410841 }, { "acc": 0.94637117, "epoch": 2.4973600844772967, "grad_norm": 8.140341758728027, "learning_rate": 3.3875058640227897e-06, "loss": 0.30135386, "memory(GiB)": 34.88, "step": 92235, "train_speed(iter/s)": 0.410842 }, { "acc": 0.92381392, "epoch": 2.497495464760512, "grad_norm": 5.354658126831055, "learning_rate": 3.386976233247303e-06, "loss": 0.36623013, "memory(GiB)": 34.88, "step": 92240, "train_speed(iter/s)": 0.410843 }, { "acc": 0.92173662, "epoch": 2.497630845043728, "grad_norm": 5.72299337387085, "learning_rate": 3.386446622684372e-06, "loss": 0.477248, "memory(GiB)": 34.88, "step": 92245, "train_speed(iter/s)": 0.410845 }, { "acc": 0.92261, "epoch": 2.4977662253269433, "grad_norm": 10.241949081420898, "learning_rate": 3.385917032340631e-06, "loss": 0.47520742, "memory(GiB)": 34.88, "step": 92250, "train_speed(iter/s)": 0.410846 }, { "acc": 0.9391799, "epoch": 2.497901605610159, "grad_norm": 7.527523040771484, "learning_rate": 3.385387462222714e-06, "loss": 0.34400823, "memory(GiB)": 34.88, "step": 92255, "train_speed(iter/s)": 0.410847 }, { "acc": 0.92872257, "epoch": 2.4980369858933744, "grad_norm": 2.1445846557617188, "learning_rate": 3.3848579123372565e-06, "loss": 0.41436329, "memory(GiB)": 34.88, "step": 92260, "train_speed(iter/s)": 0.410848 }, { "acc": 0.92719421, "epoch": 2.49817236617659, "grad_norm": 13.81275749206543, "learning_rate": 3.384328382690889e-06, "loss": 0.40865631, "memory(GiB)": 34.88, "step": 92265, "train_speed(iter/s)": 0.41085 }, { "acc": 0.94135695, "epoch": 2.4983077464598056, "grad_norm": 13.619756698608398, "learning_rate": 3.3837988732902487e-06, "loss": 0.33428898, "memory(GiB)": 34.88, "step": 92270, "train_speed(iter/s)": 0.410851 }, { "acc": 0.92568855, "epoch": 2.4984431267430214, "grad_norm": 7.348417282104492, "learning_rate": 3.3832693841419663e-06, "loss": 0.51475325, "memory(GiB)": 34.88, "step": 92275, "train_speed(iter/s)": 0.410852 }, { "acc": 0.9140358, "epoch": 2.4985785070262367, "grad_norm": 6.7047319412231445, "learning_rate": 3.3827399152526745e-06, "loss": 0.4662509, "memory(GiB)": 34.88, "step": 92280, "train_speed(iter/s)": 0.410853 }, { "acc": 0.94109573, "epoch": 2.498713887309452, "grad_norm": 4.785008430480957, "learning_rate": 3.382210466629007e-06, "loss": 0.30892196, "memory(GiB)": 34.88, "step": 92285, "train_speed(iter/s)": 0.410854 }, { "acc": 0.91868753, "epoch": 2.498849267592668, "grad_norm": 6.880056858062744, "learning_rate": 3.3816810382775953e-06, "loss": 0.53640656, "memory(GiB)": 34.88, "step": 92290, "train_speed(iter/s)": 0.410855 }, { "acc": 0.9305316, "epoch": 2.4989846478758833, "grad_norm": 7.339481830596924, "learning_rate": 3.381151630205074e-06, "loss": 0.372157, "memory(GiB)": 34.88, "step": 92295, "train_speed(iter/s)": 0.410857 }, { "acc": 0.93735762, "epoch": 2.499120028159099, "grad_norm": 8.313730239868164, "learning_rate": 3.3806222424180708e-06, "loss": 0.36724958, "memory(GiB)": 34.88, "step": 92300, "train_speed(iter/s)": 0.410858 }, { "acc": 0.9370472, "epoch": 2.4992554084423144, "grad_norm": 9.571297645568848, "learning_rate": 3.380092874923221e-06, "loss": 0.35038857, "memory(GiB)": 34.88, "step": 92305, "train_speed(iter/s)": 0.410859 }, { "acc": 0.93672104, "epoch": 2.49939078872553, "grad_norm": 9.217970848083496, "learning_rate": 3.379563527727155e-06, "loss": 0.38426998, "memory(GiB)": 34.88, "step": 92310, "train_speed(iter/s)": 0.41086 }, { "acc": 0.93084755, "epoch": 2.4995261690087456, "grad_norm": 6.834050178527832, "learning_rate": 3.379034200836501e-06, "loss": 0.32779255, "memory(GiB)": 34.88, "step": 92315, "train_speed(iter/s)": 0.410861 }, { "acc": 0.93220749, "epoch": 2.499661549291961, "grad_norm": 20.582889556884766, "learning_rate": 3.378504894257894e-06, "loss": 0.37493484, "memory(GiB)": 34.88, "step": 92320, "train_speed(iter/s)": 0.410863 }, { "acc": 0.935606, "epoch": 2.4997969295751767, "grad_norm": 8.683173179626465, "learning_rate": 3.3779756079979626e-06, "loss": 0.30679207, "memory(GiB)": 34.88, "step": 92325, "train_speed(iter/s)": 0.410864 }, { "acc": 0.94327431, "epoch": 2.499932309858392, "grad_norm": 6.780651092529297, "learning_rate": 3.377446342063338e-06, "loss": 0.29658623, "memory(GiB)": 34.88, "step": 92330, "train_speed(iter/s)": 0.410865 }, { "acc": 0.93500853, "epoch": 2.500067690141608, "grad_norm": 5.730764389038086, "learning_rate": 3.376917096460649e-06, "loss": 0.3773509, "memory(GiB)": 34.88, "step": 92335, "train_speed(iter/s)": 0.410866 }, { "acc": 0.91140079, "epoch": 2.5002030704248233, "grad_norm": 9.434578895568848, "learning_rate": 3.376387871196527e-06, "loss": 0.53886261, "memory(GiB)": 34.88, "step": 92340, "train_speed(iter/s)": 0.410868 }, { "acc": 0.93280525, "epoch": 2.500338450708039, "grad_norm": 10.844315528869629, "learning_rate": 3.3758586662775993e-06, "loss": 0.37139823, "memory(GiB)": 34.88, "step": 92345, "train_speed(iter/s)": 0.410869 }, { "acc": 0.92049885, "epoch": 2.5004738309912544, "grad_norm": 12.295700073242188, "learning_rate": 3.3753294817104987e-06, "loss": 0.43317165, "memory(GiB)": 34.88, "step": 92350, "train_speed(iter/s)": 0.41087 }, { "acc": 0.93556833, "epoch": 2.50060921127447, "grad_norm": 6.962587356567383, "learning_rate": 3.3748003175018514e-06, "loss": 0.36137409, "memory(GiB)": 34.88, "step": 92355, "train_speed(iter/s)": 0.410872 }, { "acc": 0.92030563, "epoch": 2.5007445915576856, "grad_norm": 5.746126174926758, "learning_rate": 3.374271173658286e-06, "loss": 0.46732054, "memory(GiB)": 34.88, "step": 92360, "train_speed(iter/s)": 0.410872 }, { "acc": 0.92991428, "epoch": 2.500879971840901, "grad_norm": 10.529905319213867, "learning_rate": 3.3737420501864343e-06, "loss": 0.41277113, "memory(GiB)": 34.88, "step": 92365, "train_speed(iter/s)": 0.410874 }, { "acc": 0.92851276, "epoch": 2.5010153521241167, "grad_norm": 5.951601028442383, "learning_rate": 3.37321294709292e-06, "loss": 0.39025457, "memory(GiB)": 34.88, "step": 92370, "train_speed(iter/s)": 0.410875 }, { "acc": 0.93522263, "epoch": 2.501150732407332, "grad_norm": 13.972458839416504, "learning_rate": 3.372683864384376e-06, "loss": 0.34157772, "memory(GiB)": 34.88, "step": 92375, "train_speed(iter/s)": 0.410876 }, { "acc": 0.92275019, "epoch": 2.501286112690548, "grad_norm": 8.796540260314941, "learning_rate": 3.3721548020674266e-06, "loss": 0.45297146, "memory(GiB)": 34.88, "step": 92380, "train_speed(iter/s)": 0.410877 }, { "acc": 0.92602921, "epoch": 2.5014214929737633, "grad_norm": 5.912322521209717, "learning_rate": 3.3716257601487008e-06, "loss": 0.40457964, "memory(GiB)": 34.88, "step": 92385, "train_speed(iter/s)": 0.410878 }, { "acc": 0.94093409, "epoch": 2.5015568732569786, "grad_norm": 7.103000164031982, "learning_rate": 3.371096738634825e-06, "loss": 0.3447299, "memory(GiB)": 34.88, "step": 92390, "train_speed(iter/s)": 0.41088 }, { "acc": 0.92001591, "epoch": 2.5016922535401944, "grad_norm": 6.150974750518799, "learning_rate": 3.370567737532428e-06, "loss": 0.42893634, "memory(GiB)": 34.88, "step": 92395, "train_speed(iter/s)": 0.410881 }, { "acc": 0.93126259, "epoch": 2.5018276338234102, "grad_norm": 4.040574073791504, "learning_rate": 3.3700387568481353e-06, "loss": 0.36575046, "memory(GiB)": 34.88, "step": 92400, "train_speed(iter/s)": 0.410882 }, { "acc": 0.93375063, "epoch": 2.5019630141066256, "grad_norm": 9.221702575683594, "learning_rate": 3.369509796588572e-06, "loss": 0.29004445, "memory(GiB)": 34.88, "step": 92405, "train_speed(iter/s)": 0.410883 }, { "acc": 0.93847046, "epoch": 2.502098394389841, "grad_norm": 7.010797023773193, "learning_rate": 3.368980856760367e-06, "loss": 0.38943863, "memory(GiB)": 34.88, "step": 92410, "train_speed(iter/s)": 0.410885 }, { "acc": 0.93084555, "epoch": 2.5022337746730567, "grad_norm": 6.648216247558594, "learning_rate": 3.3684519373701446e-06, "loss": 0.36905165, "memory(GiB)": 34.88, "step": 92415, "train_speed(iter/s)": 0.410886 }, { "acc": 0.91824369, "epoch": 2.502369154956272, "grad_norm": 10.415081977844238, "learning_rate": 3.367923038424532e-06, "loss": 0.46524987, "memory(GiB)": 34.88, "step": 92420, "train_speed(iter/s)": 0.410887 }, { "acc": 0.92592287, "epoch": 2.5025045352394875, "grad_norm": 6.810117721557617, "learning_rate": 3.367394159930153e-06, "loss": 0.34387918, "memory(GiB)": 34.88, "step": 92425, "train_speed(iter/s)": 0.410888 }, { "acc": 0.90711861, "epoch": 2.5026399155227033, "grad_norm": 8.874018669128418, "learning_rate": 3.3668653018936347e-06, "loss": 0.54191995, "memory(GiB)": 34.88, "step": 92430, "train_speed(iter/s)": 0.410889 }, { "acc": 0.93552332, "epoch": 2.502775295805919, "grad_norm": 6.244002342224121, "learning_rate": 3.3663364643216006e-06, "loss": 0.31652448, "memory(GiB)": 34.88, "step": 92435, "train_speed(iter/s)": 0.41089 }, { "acc": 0.94131079, "epoch": 2.5029106760891344, "grad_norm": 4.134404182434082, "learning_rate": 3.3658076472206757e-06, "loss": 0.24129629, "memory(GiB)": 34.88, "step": 92440, "train_speed(iter/s)": 0.410891 }, { "acc": 0.93113422, "epoch": 2.50304605637235, "grad_norm": 7.3453497886657715, "learning_rate": 3.3652788505974855e-06, "loss": 0.39421051, "memory(GiB)": 34.88, "step": 92445, "train_speed(iter/s)": 0.410893 }, { "acc": 0.92218943, "epoch": 2.5031814366555656, "grad_norm": 9.850456237792969, "learning_rate": 3.364750074458652e-06, "loss": 0.44775906, "memory(GiB)": 34.88, "step": 92450, "train_speed(iter/s)": 0.410894 }, { "acc": 0.93071451, "epoch": 2.503316816938781, "grad_norm": 5.680449962615967, "learning_rate": 3.3642213188108015e-06, "loss": 0.36424704, "memory(GiB)": 34.88, "step": 92455, "train_speed(iter/s)": 0.410895 }, { "acc": 0.91418657, "epoch": 2.5034521972219967, "grad_norm": 17.177717208862305, "learning_rate": 3.363692583660556e-06, "loss": 0.47082715, "memory(GiB)": 34.88, "step": 92460, "train_speed(iter/s)": 0.410896 }, { "acc": 0.9046175, "epoch": 2.503587577505212, "grad_norm": 3.7304136753082275, "learning_rate": 3.3631638690145405e-06, "loss": 0.62430153, "memory(GiB)": 34.88, "step": 92465, "train_speed(iter/s)": 0.410897 }, { "acc": 0.93328276, "epoch": 2.503722957788428, "grad_norm": 5.211508274078369, "learning_rate": 3.362635174879375e-06, "loss": 0.39234035, "memory(GiB)": 34.88, "step": 92470, "train_speed(iter/s)": 0.410899 }, { "acc": 0.93292532, "epoch": 2.5038583380716433, "grad_norm": 5.357461452484131, "learning_rate": 3.362106501261689e-06, "loss": 0.34622035, "memory(GiB)": 34.88, "step": 92475, "train_speed(iter/s)": 0.4109 }, { "acc": 0.94823895, "epoch": 2.5039937183548586, "grad_norm": 6.537470817565918, "learning_rate": 3.3615778481681e-06, "loss": 0.26061983, "memory(GiB)": 34.88, "step": 92480, "train_speed(iter/s)": 0.410901 }, { "acc": 0.93584957, "epoch": 2.5041290986380744, "grad_norm": 8.15755558013916, "learning_rate": 3.361049215605229e-06, "loss": 0.37782946, "memory(GiB)": 34.88, "step": 92485, "train_speed(iter/s)": 0.410902 }, { "acc": 0.93033714, "epoch": 2.50426447892129, "grad_norm": 5.627871036529541, "learning_rate": 3.360520603579703e-06, "loss": 0.34079764, "memory(GiB)": 34.88, "step": 92490, "train_speed(iter/s)": 0.410903 }, { "acc": 0.93206329, "epoch": 2.5043998592045056, "grad_norm": 5.686694622039795, "learning_rate": 3.3599920120981396e-06, "loss": 0.30790987, "memory(GiB)": 34.88, "step": 92495, "train_speed(iter/s)": 0.410904 }, { "acc": 0.93909426, "epoch": 2.504535239487721, "grad_norm": 4.588681221008301, "learning_rate": 3.3594634411671633e-06, "loss": 0.32402086, "memory(GiB)": 34.88, "step": 92500, "train_speed(iter/s)": 0.410906 }, { "acc": 0.95448666, "epoch": 2.5046706197709367, "grad_norm": 2.430130958557129, "learning_rate": 3.358934890793393e-06, "loss": 0.21728244, "memory(GiB)": 34.88, "step": 92505, "train_speed(iter/s)": 0.410907 }, { "acc": 0.94846392, "epoch": 2.504806000054152, "grad_norm": 3.143064022064209, "learning_rate": 3.3584063609834545e-06, "loss": 0.27083709, "memory(GiB)": 34.88, "step": 92510, "train_speed(iter/s)": 0.410908 }, { "acc": 0.92932682, "epoch": 2.5049413803373675, "grad_norm": 7.052997589111328, "learning_rate": 3.357877851743962e-06, "loss": 0.39523773, "memory(GiB)": 34.88, "step": 92515, "train_speed(iter/s)": 0.410909 }, { "acc": 0.92357626, "epoch": 2.5050767606205833, "grad_norm": 9.027069091796875, "learning_rate": 3.3573493630815435e-06, "loss": 0.4203516, "memory(GiB)": 34.88, "step": 92520, "train_speed(iter/s)": 0.41091 }, { "acc": 0.92615318, "epoch": 2.5052121409037986, "grad_norm": 7.784926891326904, "learning_rate": 3.356820895002814e-06, "loss": 0.43196688, "memory(GiB)": 34.88, "step": 92525, "train_speed(iter/s)": 0.410912 }, { "acc": 0.93388548, "epoch": 2.5053475211870144, "grad_norm": 3.612133264541626, "learning_rate": 3.3562924475143936e-06, "loss": 0.33043971, "memory(GiB)": 34.88, "step": 92530, "train_speed(iter/s)": 0.410913 }, { "acc": 0.9398736, "epoch": 2.50548290147023, "grad_norm": 6.313111305236816, "learning_rate": 3.355764020622905e-06, "loss": 0.27070127, "memory(GiB)": 34.88, "step": 92535, "train_speed(iter/s)": 0.410914 }, { "acc": 0.93951588, "epoch": 2.5056182817534456, "grad_norm": 10.561202049255371, "learning_rate": 3.355235614334965e-06, "loss": 0.31655738, "memory(GiB)": 34.88, "step": 92540, "train_speed(iter/s)": 0.410915 }, { "acc": 0.91956806, "epoch": 2.505753662036661, "grad_norm": 15.218775749206543, "learning_rate": 3.354707228657197e-06, "loss": 0.45789132, "memory(GiB)": 34.88, "step": 92545, "train_speed(iter/s)": 0.410917 }, { "acc": 0.92832489, "epoch": 2.5058890423198763, "grad_norm": 8.76190185546875, "learning_rate": 3.3541788635962143e-06, "loss": 0.42324076, "memory(GiB)": 34.88, "step": 92550, "train_speed(iter/s)": 0.410918 }, { "acc": 0.9272337, "epoch": 2.506024422603092, "grad_norm": 14.65485668182373, "learning_rate": 3.353650519158642e-06, "loss": 0.41804738, "memory(GiB)": 34.88, "step": 92555, "train_speed(iter/s)": 0.410919 }, { "acc": 0.93311634, "epoch": 2.506159802886308, "grad_norm": 11.8590087890625, "learning_rate": 3.3531221953510916e-06, "loss": 0.45217967, "memory(GiB)": 34.88, "step": 92560, "train_speed(iter/s)": 0.41092 }, { "acc": 0.92405434, "epoch": 2.5062951831695233, "grad_norm": 8.618104934692383, "learning_rate": 3.35259389218019e-06, "loss": 0.50460033, "memory(GiB)": 34.88, "step": 92565, "train_speed(iter/s)": 0.410921 }, { "acc": 0.94583492, "epoch": 2.5064305634527386, "grad_norm": 13.456679344177246, "learning_rate": 3.3520656096525484e-06, "loss": 0.2977006, "memory(GiB)": 34.88, "step": 92570, "train_speed(iter/s)": 0.410923 }, { "acc": 0.91166811, "epoch": 2.5065659437359544, "grad_norm": 13.25333309173584, "learning_rate": 3.3515373477747853e-06, "loss": 0.46861315, "memory(GiB)": 34.88, "step": 92575, "train_speed(iter/s)": 0.410923 }, { "acc": 0.92263727, "epoch": 2.50670132401917, "grad_norm": 12.63611125946045, "learning_rate": 3.3510091065535216e-06, "loss": 0.46129704, "memory(GiB)": 34.88, "step": 92580, "train_speed(iter/s)": 0.410925 }, { "acc": 0.922402, "epoch": 2.506836704302385, "grad_norm": 14.834687232971191, "learning_rate": 3.350480885995371e-06, "loss": 0.4137464, "memory(GiB)": 34.88, "step": 92585, "train_speed(iter/s)": 0.410926 }, { "acc": 0.9346386, "epoch": 2.506972084585601, "grad_norm": 4.550660610198975, "learning_rate": 3.349952686106954e-06, "loss": 0.39868021, "memory(GiB)": 34.88, "step": 92590, "train_speed(iter/s)": 0.410927 }, { "acc": 0.93754025, "epoch": 2.5071074648688167, "grad_norm": 4.627249717712402, "learning_rate": 3.349424506894883e-06, "loss": 0.35422499, "memory(GiB)": 34.88, "step": 92595, "train_speed(iter/s)": 0.410928 }, { "acc": 0.93881302, "epoch": 2.507242845152032, "grad_norm": 11.445812225341797, "learning_rate": 3.3488963483657794e-06, "loss": 0.35476332, "memory(GiB)": 34.88, "step": 92600, "train_speed(iter/s)": 0.41093 }, { "acc": 0.94128799, "epoch": 2.5073782254352475, "grad_norm": 12.25091552734375, "learning_rate": 3.3483682105262566e-06, "loss": 0.36629434, "memory(GiB)": 34.88, "step": 92605, "train_speed(iter/s)": 0.410931 }, { "acc": 0.93125763, "epoch": 2.5075136057184633, "grad_norm": 7.857213497161865, "learning_rate": 3.347840093382929e-06, "loss": 0.37233543, "memory(GiB)": 34.88, "step": 92610, "train_speed(iter/s)": 0.410932 }, { "acc": 0.93753853, "epoch": 2.5076489860016786, "grad_norm": 5.8186140060424805, "learning_rate": 3.347311996942416e-06, "loss": 0.27927041, "memory(GiB)": 34.88, "step": 92615, "train_speed(iter/s)": 0.410933 }, { "acc": 0.93712349, "epoch": 2.5077843662848944, "grad_norm": 15.203957557678223, "learning_rate": 3.3467839212113283e-06, "loss": 0.34815347, "memory(GiB)": 34.88, "step": 92620, "train_speed(iter/s)": 0.410934 }, { "acc": 0.93819923, "epoch": 2.50791974656811, "grad_norm": 9.047033309936523, "learning_rate": 3.346255866196288e-06, "loss": 0.36890774, "memory(GiB)": 34.88, "step": 92625, "train_speed(iter/s)": 0.410935 }, { "acc": 0.91879826, "epoch": 2.5080551268513256, "grad_norm": 9.101799011230469, "learning_rate": 3.3457278319039026e-06, "loss": 0.45547404, "memory(GiB)": 34.88, "step": 92630, "train_speed(iter/s)": 0.410936 }, { "acc": 0.91975498, "epoch": 2.508190507134541, "grad_norm": 16.00933074951172, "learning_rate": 3.345199818340794e-06, "loss": 0.54743824, "memory(GiB)": 34.88, "step": 92635, "train_speed(iter/s)": 0.410938 }, { "acc": 0.95385971, "epoch": 2.5083258874177563, "grad_norm": 6.414880752563477, "learning_rate": 3.344671825513569e-06, "loss": 0.26030259, "memory(GiB)": 34.88, "step": 92640, "train_speed(iter/s)": 0.410939 }, { "acc": 0.92903023, "epoch": 2.508461267700972, "grad_norm": 10.8761625289917, "learning_rate": 3.344143853428849e-06, "loss": 0.42330551, "memory(GiB)": 34.88, "step": 92645, "train_speed(iter/s)": 0.41094 }, { "acc": 0.92025452, "epoch": 2.5085966479841875, "grad_norm": 8.007731437683105, "learning_rate": 3.343615902093243e-06, "loss": 0.47969985, "memory(GiB)": 34.88, "step": 92650, "train_speed(iter/s)": 0.410941 }, { "acc": 0.91362743, "epoch": 2.5087320282674033, "grad_norm": 8.330288887023926, "learning_rate": 3.343087971513364e-06, "loss": 0.51123457, "memory(GiB)": 34.88, "step": 92655, "train_speed(iter/s)": 0.410942 }, { "acc": 0.92048798, "epoch": 2.5088674085506186, "grad_norm": 7.868228435516357, "learning_rate": 3.3425600616958315e-06, "loss": 0.41102576, "memory(GiB)": 34.88, "step": 92660, "train_speed(iter/s)": 0.410944 }, { "acc": 0.91731863, "epoch": 2.5090027888338344, "grad_norm": 9.597124099731445, "learning_rate": 3.3420321726472502e-06, "loss": 0.4517921, "memory(GiB)": 34.88, "step": 92665, "train_speed(iter/s)": 0.410945 }, { "acc": 0.89982681, "epoch": 2.50913816911705, "grad_norm": 12.597724914550781, "learning_rate": 3.3415043043742417e-06, "loss": 0.55691204, "memory(GiB)": 34.88, "step": 92670, "train_speed(iter/s)": 0.410946 }, { "acc": 0.92351875, "epoch": 2.509273549400265, "grad_norm": 10.656652450561523, "learning_rate": 3.34097645688341e-06, "loss": 0.45045505, "memory(GiB)": 34.88, "step": 92675, "train_speed(iter/s)": 0.410947 }, { "acc": 0.92538395, "epoch": 2.509408929683481, "grad_norm": 4.012306213378906, "learning_rate": 3.3404486301813754e-06, "loss": 0.35190153, "memory(GiB)": 34.88, "step": 92680, "train_speed(iter/s)": 0.410949 }, { "acc": 0.9285533, "epoch": 2.5095443099666963, "grad_norm": 6.094547271728516, "learning_rate": 3.339920824274743e-06, "loss": 0.40214128, "memory(GiB)": 34.88, "step": 92685, "train_speed(iter/s)": 0.41095 }, { "acc": 0.91846838, "epoch": 2.509679690249912, "grad_norm": 17.14859390258789, "learning_rate": 3.33939303917013e-06, "loss": 0.48377028, "memory(GiB)": 34.88, "step": 92690, "train_speed(iter/s)": 0.410951 }, { "acc": 0.91251078, "epoch": 2.5098150705331275, "grad_norm": 11.562332153320312, "learning_rate": 3.338865274874147e-06, "loss": 0.48420143, "memory(GiB)": 34.88, "step": 92695, "train_speed(iter/s)": 0.410952 }, { "acc": 0.92847157, "epoch": 2.5099504508163433, "grad_norm": 12.661820411682129, "learning_rate": 3.3383375313933995e-06, "loss": 0.41908584, "memory(GiB)": 34.88, "step": 92700, "train_speed(iter/s)": 0.410953 }, { "acc": 0.92258453, "epoch": 2.5100858310995586, "grad_norm": 12.314903259277344, "learning_rate": 3.337809808734508e-06, "loss": 0.41574545, "memory(GiB)": 34.88, "step": 92705, "train_speed(iter/s)": 0.410954 }, { "acc": 0.90432854, "epoch": 2.510221211382774, "grad_norm": 6.508810520172119, "learning_rate": 3.337282106904074e-06, "loss": 0.53569736, "memory(GiB)": 34.88, "step": 92710, "train_speed(iter/s)": 0.410956 }, { "acc": 0.93091574, "epoch": 2.51035659166599, "grad_norm": 11.774300575256348, "learning_rate": 3.3367544259087157e-06, "loss": 0.4335659, "memory(GiB)": 34.88, "step": 92715, "train_speed(iter/s)": 0.410957 }, { "acc": 0.92058496, "epoch": 2.5104919719492056, "grad_norm": 6.977020263671875, "learning_rate": 3.3362267657550366e-06, "loss": 0.38368306, "memory(GiB)": 34.88, "step": 92720, "train_speed(iter/s)": 0.410958 }, { "acc": 0.93096561, "epoch": 2.510627352232421, "grad_norm": 6.717900276184082, "learning_rate": 3.335699126449653e-06, "loss": 0.35249534, "memory(GiB)": 34.88, "step": 92725, "train_speed(iter/s)": 0.410959 }, { "acc": 0.92506084, "epoch": 2.5107627325156363, "grad_norm": 3.5544979572296143, "learning_rate": 3.335171507999172e-06, "loss": 0.37084842, "memory(GiB)": 34.88, "step": 92730, "train_speed(iter/s)": 0.41096 }, { "acc": 0.93365402, "epoch": 2.510898112798852, "grad_norm": 10.13849925994873, "learning_rate": 3.3346439104102003e-06, "loss": 0.36619208, "memory(GiB)": 34.88, "step": 92735, "train_speed(iter/s)": 0.410961 }, { "acc": 0.93782368, "epoch": 2.5110334930820675, "grad_norm": 9.61518383026123, "learning_rate": 3.334116333689352e-06, "loss": 0.35598183, "memory(GiB)": 34.88, "step": 92740, "train_speed(iter/s)": 0.410963 }, { "acc": 0.92661076, "epoch": 2.511168873365283, "grad_norm": 7.843109130859375, "learning_rate": 3.3335887778432306e-06, "loss": 0.35233364, "memory(GiB)": 34.88, "step": 92745, "train_speed(iter/s)": 0.410964 }, { "acc": 0.94263783, "epoch": 2.5113042536484986, "grad_norm": 6.7747883796691895, "learning_rate": 3.3330612428784516e-06, "loss": 0.28379269, "memory(GiB)": 34.88, "step": 92750, "train_speed(iter/s)": 0.410965 }, { "acc": 0.94143076, "epoch": 2.5114396339317144, "grad_norm": 5.483659267425537, "learning_rate": 3.3325337288016153e-06, "loss": 0.26249976, "memory(GiB)": 34.88, "step": 92755, "train_speed(iter/s)": 0.410966 }, { "acc": 0.93799095, "epoch": 2.51157501421493, "grad_norm": 5.53135347366333, "learning_rate": 3.332006235619337e-06, "loss": 0.3328867, "memory(GiB)": 34.88, "step": 92760, "train_speed(iter/s)": 0.410968 }, { "acc": 0.94138823, "epoch": 2.511710394498145, "grad_norm": 2.2838239669799805, "learning_rate": 3.3314787633382193e-06, "loss": 0.31933832, "memory(GiB)": 34.88, "step": 92765, "train_speed(iter/s)": 0.410969 }, { "acc": 0.93649712, "epoch": 2.511845774781361, "grad_norm": 8.654508590698242, "learning_rate": 3.3309513119648743e-06, "loss": 0.35416846, "memory(GiB)": 34.88, "step": 92770, "train_speed(iter/s)": 0.41097 }, { "acc": 0.93179379, "epoch": 2.5119811550645763, "grad_norm": 5.639735221862793, "learning_rate": 3.3304238815059083e-06, "loss": 0.42408094, "memory(GiB)": 34.88, "step": 92775, "train_speed(iter/s)": 0.410971 }, { "acc": 0.94228621, "epoch": 2.512116535347792, "grad_norm": 1.9071255922317505, "learning_rate": 3.3298964719679245e-06, "loss": 0.345559, "memory(GiB)": 34.88, "step": 92780, "train_speed(iter/s)": 0.410973 }, { "acc": 0.93479452, "epoch": 2.5122519156310075, "grad_norm": 9.005555152893066, "learning_rate": 3.3293690833575347e-06, "loss": 0.40633588, "memory(GiB)": 34.88, "step": 92785, "train_speed(iter/s)": 0.410974 }, { "acc": 0.93568869, "epoch": 2.5123872959142233, "grad_norm": 13.74782657623291, "learning_rate": 3.3288417156813415e-06, "loss": 0.31942878, "memory(GiB)": 34.88, "step": 92790, "train_speed(iter/s)": 0.410975 }, { "acc": 0.92350807, "epoch": 2.5125226761974386, "grad_norm": 5.627513408660889, "learning_rate": 3.3283143689459554e-06, "loss": 0.41509047, "memory(GiB)": 34.88, "step": 92795, "train_speed(iter/s)": 0.410976 }, { "acc": 0.93513584, "epoch": 2.512658056480654, "grad_norm": 11.275161743164062, "learning_rate": 3.327787043157978e-06, "loss": 0.43779984, "memory(GiB)": 34.88, "step": 92800, "train_speed(iter/s)": 0.410977 }, { "acc": 0.9388114, "epoch": 2.51279343676387, "grad_norm": 12.17824649810791, "learning_rate": 3.3272597383240186e-06, "loss": 0.36290503, "memory(GiB)": 34.88, "step": 92805, "train_speed(iter/s)": 0.410978 }, { "acc": 0.91718302, "epoch": 2.512928817047085, "grad_norm": 12.165477752685547, "learning_rate": 3.32673245445068e-06, "loss": 0.41718712, "memory(GiB)": 34.88, "step": 92810, "train_speed(iter/s)": 0.410979 }, { "acc": 0.94727392, "epoch": 2.513064197330301, "grad_norm": 8.603898048400879, "learning_rate": 3.3262051915445697e-06, "loss": 0.34058218, "memory(GiB)": 34.88, "step": 92815, "train_speed(iter/s)": 0.410981 }, { "acc": 0.93564301, "epoch": 2.5131995776135163, "grad_norm": 5.694202423095703, "learning_rate": 3.3256779496122944e-06, "loss": 0.39408774, "memory(GiB)": 34.88, "step": 92820, "train_speed(iter/s)": 0.410982 }, { "acc": 0.93486137, "epoch": 2.513334957896732, "grad_norm": 10.888798713684082, "learning_rate": 3.325150728660452e-06, "loss": 0.35605712, "memory(GiB)": 34.88, "step": 92825, "train_speed(iter/s)": 0.410983 }, { "acc": 0.92541189, "epoch": 2.5134703381799475, "grad_norm": 15.349726676940918, "learning_rate": 3.3246235286956547e-06, "loss": 0.42100205, "memory(GiB)": 34.88, "step": 92830, "train_speed(iter/s)": 0.410984 }, { "acc": 0.94008274, "epoch": 2.513605718463163, "grad_norm": 3.780137062072754, "learning_rate": 3.3240963497245006e-06, "loss": 0.30065489, "memory(GiB)": 34.88, "step": 92835, "train_speed(iter/s)": 0.410985 }, { "acc": 0.93859015, "epoch": 2.5137410987463786, "grad_norm": 7.377752304077148, "learning_rate": 3.323569191753598e-06, "loss": 0.36947436, "memory(GiB)": 34.88, "step": 92840, "train_speed(iter/s)": 0.410987 }, { "acc": 0.93000317, "epoch": 2.513876479029594, "grad_norm": 6.410431861877441, "learning_rate": 3.3230420547895474e-06, "loss": 0.36849549, "memory(GiB)": 34.88, "step": 92845, "train_speed(iter/s)": 0.410988 }, { "acc": 0.93261566, "epoch": 2.51401185931281, "grad_norm": 16.547199249267578, "learning_rate": 3.3225149388389554e-06, "loss": 0.40215578, "memory(GiB)": 34.88, "step": 92850, "train_speed(iter/s)": 0.410989 }, { "acc": 0.95096273, "epoch": 2.514147239596025, "grad_norm": 6.720540523529053, "learning_rate": 3.321987843908424e-06, "loss": 0.29695296, "memory(GiB)": 34.88, "step": 92855, "train_speed(iter/s)": 0.41099 }, { "acc": 0.92067547, "epoch": 2.514282619879241, "grad_norm": 8.020438194274902, "learning_rate": 3.3214607700045537e-06, "loss": 0.46952686, "memory(GiB)": 34.88, "step": 92860, "train_speed(iter/s)": 0.410991 }, { "acc": 0.92409058, "epoch": 2.5144180001624563, "grad_norm": 7.916062355041504, "learning_rate": 3.320933717133951e-06, "loss": 0.46041007, "memory(GiB)": 34.88, "step": 92865, "train_speed(iter/s)": 0.410993 }, { "acc": 0.92590685, "epoch": 2.5145533804456717, "grad_norm": 7.415717601776123, "learning_rate": 3.3204066853032137e-06, "loss": 0.36155639, "memory(GiB)": 34.88, "step": 92870, "train_speed(iter/s)": 0.410994 }, { "acc": 0.92712688, "epoch": 2.5146887607288875, "grad_norm": 6.929079532623291, "learning_rate": 3.3198796745189486e-06, "loss": 0.35591471, "memory(GiB)": 34.88, "step": 92875, "train_speed(iter/s)": 0.410995 }, { "acc": 0.93479366, "epoch": 2.514824141012103, "grad_norm": 5.219666957855225, "learning_rate": 3.3193526847877544e-06, "loss": 0.39812396, "memory(GiB)": 34.88, "step": 92880, "train_speed(iter/s)": 0.410996 }, { "acc": 0.92621078, "epoch": 2.5149595212953186, "grad_norm": 5.040874481201172, "learning_rate": 3.3188257161162345e-06, "loss": 0.43599501, "memory(GiB)": 34.88, "step": 92885, "train_speed(iter/s)": 0.410998 }, { "acc": 0.92780895, "epoch": 2.515094901578534, "grad_norm": 4.7255024909973145, "learning_rate": 3.318298768510989e-06, "loss": 0.42146311, "memory(GiB)": 34.88, "step": 92890, "train_speed(iter/s)": 0.410999 }, { "acc": 0.92995176, "epoch": 2.51523028186175, "grad_norm": 8.856246948242188, "learning_rate": 3.31777184197862e-06, "loss": 0.36463563, "memory(GiB)": 34.88, "step": 92895, "train_speed(iter/s)": 0.411 }, { "acc": 0.928936, "epoch": 2.515365662144965, "grad_norm": 11.426712989807129, "learning_rate": 3.3172449365257294e-06, "loss": 0.41297555, "memory(GiB)": 34.88, "step": 92900, "train_speed(iter/s)": 0.411001 }, { "acc": 0.94109097, "epoch": 2.5155010424281805, "grad_norm": 5.4062724113464355, "learning_rate": 3.316718052158912e-06, "loss": 0.37010465, "memory(GiB)": 34.88, "step": 92905, "train_speed(iter/s)": 0.411002 }, { "acc": 0.94315548, "epoch": 2.5156364227113963, "grad_norm": 5.636534214019775, "learning_rate": 3.3161911888847754e-06, "loss": 0.33393359, "memory(GiB)": 34.88, "step": 92910, "train_speed(iter/s)": 0.411003 }, { "acc": 0.92416687, "epoch": 2.515771802994612, "grad_norm": 5.963650226593018, "learning_rate": 3.3156643467099147e-06, "loss": 0.42553921, "memory(GiB)": 34.88, "step": 92915, "train_speed(iter/s)": 0.411004 }, { "acc": 0.91700258, "epoch": 2.5159071832778275, "grad_norm": 9.81829833984375, "learning_rate": 3.3151375256409324e-06, "loss": 0.48649263, "memory(GiB)": 34.88, "step": 92920, "train_speed(iter/s)": 0.411005 }, { "acc": 0.91886063, "epoch": 2.516042563561043, "grad_norm": 6.5196990966796875, "learning_rate": 3.314610725684425e-06, "loss": 0.48363132, "memory(GiB)": 34.88, "step": 92925, "train_speed(iter/s)": 0.411006 }, { "acc": 0.92213917, "epoch": 2.5161779438442586, "grad_norm": 10.09200668334961, "learning_rate": 3.3140839468469954e-06, "loss": 0.47037148, "memory(GiB)": 34.88, "step": 92930, "train_speed(iter/s)": 0.411007 }, { "acc": 0.94603901, "epoch": 2.516313324127474, "grad_norm": 8.39906120300293, "learning_rate": 3.3135571891352398e-06, "loss": 0.27987664, "memory(GiB)": 34.88, "step": 92935, "train_speed(iter/s)": 0.411009 }, { "acc": 0.92984486, "epoch": 2.5164487044106894, "grad_norm": 8.823657035827637, "learning_rate": 3.313030452555759e-06, "loss": 0.39370687, "memory(GiB)": 34.88, "step": 92940, "train_speed(iter/s)": 0.41101 }, { "acc": 0.93466034, "epoch": 2.516584084693905, "grad_norm": 8.150047302246094, "learning_rate": 3.312503737115149e-06, "loss": 0.37185488, "memory(GiB)": 34.88, "step": 92945, "train_speed(iter/s)": 0.411011 }, { "acc": 0.924821, "epoch": 2.516719464977121, "grad_norm": 11.069830894470215, "learning_rate": 3.311977042820008e-06, "loss": 0.42395344, "memory(GiB)": 34.88, "step": 92950, "train_speed(iter/s)": 0.411012 }, { "acc": 0.93036747, "epoch": 2.5168548452603363, "grad_norm": 7.538053512573242, "learning_rate": 3.3114503696769367e-06, "loss": 0.4383594, "memory(GiB)": 34.88, "step": 92955, "train_speed(iter/s)": 0.411013 }, { "acc": 0.92951488, "epoch": 2.5169902255435517, "grad_norm": 13.225507736206055, "learning_rate": 3.3109237176925306e-06, "loss": 0.38328466, "memory(GiB)": 34.88, "step": 92960, "train_speed(iter/s)": 0.411015 }, { "acc": 0.94562683, "epoch": 2.5171256058267675, "grad_norm": 3.6711783409118652, "learning_rate": 3.3103970868733874e-06, "loss": 0.29798374, "memory(GiB)": 34.88, "step": 92965, "train_speed(iter/s)": 0.411016 }, { "acc": 0.92985859, "epoch": 2.517260986109983, "grad_norm": 10.131522178649902, "learning_rate": 3.309870477226104e-06, "loss": 0.37879386, "memory(GiB)": 34.88, "step": 92970, "train_speed(iter/s)": 0.411017 }, { "acc": 0.91885338, "epoch": 2.5173963663931986, "grad_norm": 4.7334723472595215, "learning_rate": 3.309343888757278e-06, "loss": 0.47746367, "memory(GiB)": 34.88, "step": 92975, "train_speed(iter/s)": 0.411018 }, { "acc": 0.90680256, "epoch": 2.517531746676414, "grad_norm": 11.993000984191895, "learning_rate": 3.3088173214735063e-06, "loss": 0.52657585, "memory(GiB)": 34.88, "step": 92980, "train_speed(iter/s)": 0.411019 }, { "acc": 0.93307819, "epoch": 2.51766712695963, "grad_norm": 5.3553290367126465, "learning_rate": 3.3082907753813815e-06, "loss": 0.33067484, "memory(GiB)": 34.88, "step": 92985, "train_speed(iter/s)": 0.41102 }, { "acc": 0.94632473, "epoch": 2.517802507242845, "grad_norm": 4.046721935272217, "learning_rate": 3.307764250487505e-06, "loss": 0.29913898, "memory(GiB)": 34.88, "step": 92990, "train_speed(iter/s)": 0.411022 }, { "acc": 0.93021698, "epoch": 2.5179378875260605, "grad_norm": 6.139132499694824, "learning_rate": 3.3072377467984677e-06, "loss": 0.41572065, "memory(GiB)": 34.88, "step": 92995, "train_speed(iter/s)": 0.411023 }, { "acc": 0.93942699, "epoch": 2.5180732678092763, "grad_norm": 4.657756328582764, "learning_rate": 3.3067112643208694e-06, "loss": 0.336904, "memory(GiB)": 34.88, "step": 93000, "train_speed(iter/s)": 0.411024 }, { "acc": 0.93160486, "epoch": 2.5182086480924917, "grad_norm": 5.700733184814453, "learning_rate": 3.306184803061302e-06, "loss": 0.33633347, "memory(GiB)": 34.88, "step": 93005, "train_speed(iter/s)": 0.411025 }, { "acc": 0.90528355, "epoch": 2.5183440283757075, "grad_norm": 8.584779739379883, "learning_rate": 3.3056583630263624e-06, "loss": 0.59744282, "memory(GiB)": 34.88, "step": 93010, "train_speed(iter/s)": 0.411026 }, { "acc": 0.93570404, "epoch": 2.518479408658923, "grad_norm": 12.832771301269531, "learning_rate": 3.3051319442226436e-06, "loss": 0.34625814, "memory(GiB)": 34.88, "step": 93015, "train_speed(iter/s)": 0.411027 }, { "acc": 0.94091644, "epoch": 2.5186147889421386, "grad_norm": 3.4928810596466064, "learning_rate": 3.304605546656743e-06, "loss": 0.30041492, "memory(GiB)": 34.88, "step": 93020, "train_speed(iter/s)": 0.411029 }, { "acc": 0.94385681, "epoch": 2.518750169225354, "grad_norm": 11.463826179504395, "learning_rate": 3.3040791703352526e-06, "loss": 0.36187172, "memory(GiB)": 34.88, "step": 93025, "train_speed(iter/s)": 0.41103 }, { "acc": 0.91291771, "epoch": 2.5188855495085694, "grad_norm": 15.282090187072754, "learning_rate": 3.3035528152647656e-06, "loss": 0.51699243, "memory(GiB)": 34.88, "step": 93030, "train_speed(iter/s)": 0.411031 }, { "acc": 0.94385614, "epoch": 2.519020929791785, "grad_norm": 4.192734241485596, "learning_rate": 3.303026481451877e-06, "loss": 0.3300674, "memory(GiB)": 34.88, "step": 93035, "train_speed(iter/s)": 0.411032 }, { "acc": 0.94506035, "epoch": 2.5191563100750005, "grad_norm": 8.03671932220459, "learning_rate": 3.302500168903179e-06, "loss": 0.30933921, "memory(GiB)": 34.88, "step": 93040, "train_speed(iter/s)": 0.411033 }, { "acc": 0.93311157, "epoch": 2.5192916903582163, "grad_norm": 10.743481636047363, "learning_rate": 3.3019738776252665e-06, "loss": 0.33893723, "memory(GiB)": 34.88, "step": 93045, "train_speed(iter/s)": 0.411034 }, { "acc": 0.91618443, "epoch": 2.5194270706414317, "grad_norm": 8.712252616882324, "learning_rate": 3.30144760762473e-06, "loss": 0.60978069, "memory(GiB)": 34.88, "step": 93050, "train_speed(iter/s)": 0.411035 }, { "acc": 0.93385353, "epoch": 2.5195624509246475, "grad_norm": 8.408723831176758, "learning_rate": 3.3009213589081644e-06, "loss": 0.3657393, "memory(GiB)": 34.88, "step": 93055, "train_speed(iter/s)": 0.411037 }, { "acc": 0.93713608, "epoch": 2.519697831207863, "grad_norm": 6.021378993988037, "learning_rate": 3.3003951314821595e-06, "loss": 0.37913389, "memory(GiB)": 34.88, "step": 93060, "train_speed(iter/s)": 0.411038 }, { "acc": 0.92472849, "epoch": 2.519833211491078, "grad_norm": 6.083061218261719, "learning_rate": 3.2998689253533116e-06, "loss": 0.40594544, "memory(GiB)": 34.88, "step": 93065, "train_speed(iter/s)": 0.411039 }, { "acc": 0.93937721, "epoch": 2.519968591774294, "grad_norm": 4.326255798339844, "learning_rate": 3.2993427405282084e-06, "loss": 0.29926543, "memory(GiB)": 34.88, "step": 93070, "train_speed(iter/s)": 0.41104 }, { "acc": 0.92062798, "epoch": 2.52010397205751, "grad_norm": 6.2100300788879395, "learning_rate": 3.2988165770134415e-06, "loss": 0.45569048, "memory(GiB)": 34.88, "step": 93075, "train_speed(iter/s)": 0.411041 }, { "acc": 0.93348541, "epoch": 2.520239352340725, "grad_norm": 6.028985500335693, "learning_rate": 3.2982904348156047e-06, "loss": 0.39621918, "memory(GiB)": 34.88, "step": 93080, "train_speed(iter/s)": 0.411042 }, { "acc": 0.93188705, "epoch": 2.5203747326239405, "grad_norm": 5.977360725402832, "learning_rate": 3.297764313941286e-06, "loss": 0.36765974, "memory(GiB)": 34.88, "step": 93085, "train_speed(iter/s)": 0.411044 }, { "acc": 0.94171247, "epoch": 2.5205101129071563, "grad_norm": 7.385447025299072, "learning_rate": 3.2972382143970788e-06, "loss": 0.42751904, "memory(GiB)": 34.88, "step": 93090, "train_speed(iter/s)": 0.411045 }, { "acc": 0.92571678, "epoch": 2.5206454931903717, "grad_norm": 8.984912872314453, "learning_rate": 3.2967121361895716e-06, "loss": 0.42001371, "memory(GiB)": 34.88, "step": 93095, "train_speed(iter/s)": 0.411046 }, { "acc": 0.94463549, "epoch": 2.520780873473587, "grad_norm": 7.5083394050598145, "learning_rate": 3.2961860793253563e-06, "loss": 0.22214274, "memory(GiB)": 34.88, "step": 93100, "train_speed(iter/s)": 0.411047 }, { "acc": 0.94048424, "epoch": 2.520916253756803, "grad_norm": 9.822009086608887, "learning_rate": 3.29566004381102e-06, "loss": 0.33382607, "memory(GiB)": 34.88, "step": 93105, "train_speed(iter/s)": 0.411048 }, { "acc": 0.9251689, "epoch": 2.5210516340400186, "grad_norm": 8.563101768493652, "learning_rate": 3.2951340296531565e-06, "loss": 0.42216821, "memory(GiB)": 34.88, "step": 93110, "train_speed(iter/s)": 0.41105 }, { "acc": 0.92944298, "epoch": 2.521187014323234, "grad_norm": 8.365846633911133, "learning_rate": 3.294608036858353e-06, "loss": 0.38563788, "memory(GiB)": 34.88, "step": 93115, "train_speed(iter/s)": 0.411051 }, { "acc": 0.92662945, "epoch": 2.5213223946064494, "grad_norm": 6.735691070556641, "learning_rate": 3.2940820654331964e-06, "loss": 0.42831802, "memory(GiB)": 34.88, "step": 93120, "train_speed(iter/s)": 0.411052 }, { "acc": 0.91708946, "epoch": 2.521457774889665, "grad_norm": 59.474143981933594, "learning_rate": 3.2935561153842788e-06, "loss": 0.45476513, "memory(GiB)": 34.88, "step": 93125, "train_speed(iter/s)": 0.411053 }, { "acc": 0.93028336, "epoch": 2.5215931551728805, "grad_norm": 4.938474178314209, "learning_rate": 3.293030186718187e-06, "loss": 0.37462354, "memory(GiB)": 34.88, "step": 93130, "train_speed(iter/s)": 0.411054 }, { "acc": 0.94329653, "epoch": 2.5217285354560963, "grad_norm": 2.9208858013153076, "learning_rate": 3.2925042794415098e-06, "loss": 0.28836417, "memory(GiB)": 34.88, "step": 93135, "train_speed(iter/s)": 0.411055 }, { "acc": 0.92322855, "epoch": 2.5218639157393117, "grad_norm": 3.653966188430786, "learning_rate": 3.2919783935608348e-06, "loss": 0.43990865, "memory(GiB)": 34.88, "step": 93140, "train_speed(iter/s)": 0.411057 }, { "acc": 0.92342815, "epoch": 2.5219992960225275, "grad_norm": 8.769309997558594, "learning_rate": 3.2914525290827503e-06, "loss": 0.39296594, "memory(GiB)": 34.88, "step": 93145, "train_speed(iter/s)": 0.411058 }, { "acc": 0.92439241, "epoch": 2.522134676305743, "grad_norm": 4.522168159484863, "learning_rate": 3.2909266860138445e-06, "loss": 0.4195776, "memory(GiB)": 34.88, "step": 93150, "train_speed(iter/s)": 0.411059 }, { "acc": 0.93547792, "epoch": 2.522270056588958, "grad_norm": 8.330132484436035, "learning_rate": 3.290400864360703e-06, "loss": 0.35889909, "memory(GiB)": 34.88, "step": 93155, "train_speed(iter/s)": 0.41106 }, { "acc": 0.91530857, "epoch": 2.522405436872174, "grad_norm": 10.260326385498047, "learning_rate": 3.2898750641299143e-06, "loss": 0.51726713, "memory(GiB)": 34.88, "step": 93160, "train_speed(iter/s)": 0.411061 }, { "acc": 0.92853098, "epoch": 2.5225408171553894, "grad_norm": 23.5113468170166, "learning_rate": 3.2893492853280634e-06, "loss": 0.45212727, "memory(GiB)": 34.88, "step": 93165, "train_speed(iter/s)": 0.411063 }, { "acc": 0.91946392, "epoch": 2.522676197438605, "grad_norm": 4.3352155685424805, "learning_rate": 3.2888235279617382e-06, "loss": 0.3932291, "memory(GiB)": 34.88, "step": 93170, "train_speed(iter/s)": 0.411064 }, { "acc": 0.94099617, "epoch": 2.5228115777218205, "grad_norm": 4.687039375305176, "learning_rate": 3.288297792037523e-06, "loss": 0.36086206, "memory(GiB)": 34.88, "step": 93175, "train_speed(iter/s)": 0.411065 }, { "acc": 0.93323784, "epoch": 2.5229469580050363, "grad_norm": 6.860377788543701, "learning_rate": 3.2877720775620063e-06, "loss": 0.41780148, "memory(GiB)": 34.88, "step": 93180, "train_speed(iter/s)": 0.411066 }, { "acc": 0.9270216, "epoch": 2.5230823382882517, "grad_norm": 21.598485946655273, "learning_rate": 3.2872463845417705e-06, "loss": 0.38996344, "memory(GiB)": 34.88, "step": 93185, "train_speed(iter/s)": 0.411068 }, { "acc": 0.92882347, "epoch": 2.523217718571467, "grad_norm": 8.20478630065918, "learning_rate": 3.286720712983406e-06, "loss": 0.39893591, "memory(GiB)": 34.88, "step": 93190, "train_speed(iter/s)": 0.411069 }, { "acc": 0.93165922, "epoch": 2.523353098854683, "grad_norm": 16.185646057128906, "learning_rate": 3.286195062893493e-06, "loss": 0.43590279, "memory(GiB)": 34.88, "step": 93195, "train_speed(iter/s)": 0.41107 }, { "acc": 0.93202553, "epoch": 2.523488479137898, "grad_norm": 5.934950351715088, "learning_rate": 3.285669434278617e-06, "loss": 0.34252954, "memory(GiB)": 34.88, "step": 93200, "train_speed(iter/s)": 0.411071 }, { "acc": 0.92488842, "epoch": 2.523623859421114, "grad_norm": 10.412540435791016, "learning_rate": 3.2851438271453644e-06, "loss": 0.40385795, "memory(GiB)": 34.88, "step": 93205, "train_speed(iter/s)": 0.411072 }, { "acc": 0.9406599, "epoch": 2.5237592397043294, "grad_norm": 5.429606914520264, "learning_rate": 3.2846182415003185e-06, "loss": 0.29443984, "memory(GiB)": 34.88, "step": 93210, "train_speed(iter/s)": 0.411074 }, { "acc": 0.94189358, "epoch": 2.523894619987545, "grad_norm": 5.265275478363037, "learning_rate": 3.2840926773500635e-06, "loss": 0.30161214, "memory(GiB)": 34.88, "step": 93215, "train_speed(iter/s)": 0.411075 }, { "acc": 0.92579842, "epoch": 2.5240300002707605, "grad_norm": 13.293924331665039, "learning_rate": 3.283567134701181e-06, "loss": 0.41783218, "memory(GiB)": 34.88, "step": 93220, "train_speed(iter/s)": 0.411076 }, { "acc": 0.94565611, "epoch": 2.524165380553976, "grad_norm": 6.3302741050720215, "learning_rate": 3.2830416135602596e-06, "loss": 0.31563072, "memory(GiB)": 34.88, "step": 93225, "train_speed(iter/s)": 0.411077 }, { "acc": 0.93097458, "epoch": 2.5243007608371917, "grad_norm": 7.484989643096924, "learning_rate": 3.2825161139338757e-06, "loss": 0.41829491, "memory(GiB)": 34.88, "step": 93230, "train_speed(iter/s)": 0.411078 }, { "acc": 0.92994709, "epoch": 2.5244361411204075, "grad_norm": 9.210957527160645, "learning_rate": 3.28199063582862e-06, "loss": 0.40025501, "memory(GiB)": 34.88, "step": 93235, "train_speed(iter/s)": 0.41108 }, { "acc": 0.93074303, "epoch": 2.524571521403623, "grad_norm": 17.76224708557129, "learning_rate": 3.2814651792510693e-06, "loss": 0.36693978, "memory(GiB)": 34.88, "step": 93240, "train_speed(iter/s)": 0.411081 }, { "acc": 0.94038324, "epoch": 2.524706901686838, "grad_norm": 10.058328628540039, "learning_rate": 3.280939744207807e-06, "loss": 0.33954265, "memory(GiB)": 34.88, "step": 93245, "train_speed(iter/s)": 0.411082 }, { "acc": 0.93914099, "epoch": 2.524842281970054, "grad_norm": 11.86540699005127, "learning_rate": 3.280414330705416e-06, "loss": 0.3093852, "memory(GiB)": 34.88, "step": 93250, "train_speed(iter/s)": 0.411083 }, { "acc": 0.92049856, "epoch": 2.5249776622532694, "grad_norm": 14.682121276855469, "learning_rate": 3.2798889387504763e-06, "loss": 0.49085536, "memory(GiB)": 34.88, "step": 93255, "train_speed(iter/s)": 0.411084 }, { "acc": 0.91505413, "epoch": 2.5251130425364847, "grad_norm": 6.808013439178467, "learning_rate": 3.2793635683495738e-06, "loss": 0.47789965, "memory(GiB)": 34.88, "step": 93260, "train_speed(iter/s)": 0.411085 }, { "acc": 0.92795067, "epoch": 2.5252484228197005, "grad_norm": 9.573847770690918, "learning_rate": 3.278838219509285e-06, "loss": 0.48336153, "memory(GiB)": 34.88, "step": 93265, "train_speed(iter/s)": 0.411086 }, { "acc": 0.92027473, "epoch": 2.5253838031029163, "grad_norm": 5.660368919372559, "learning_rate": 3.2783128922361958e-06, "loss": 0.32098269, "memory(GiB)": 34.88, "step": 93270, "train_speed(iter/s)": 0.411087 }, { "acc": 0.91551447, "epoch": 2.5255191833861317, "grad_norm": 11.5159330368042, "learning_rate": 3.277787586536883e-06, "loss": 0.49098263, "memory(GiB)": 34.88, "step": 93275, "train_speed(iter/s)": 0.411089 }, { "acc": 0.93186302, "epoch": 2.525654563669347, "grad_norm": 14.517501831054688, "learning_rate": 3.2772623024179267e-06, "loss": 0.37327228, "memory(GiB)": 34.88, "step": 93280, "train_speed(iter/s)": 0.41109 }, { "acc": 0.94059191, "epoch": 2.525789943952563, "grad_norm": 9.48078727722168, "learning_rate": 3.2767370398859106e-06, "loss": 0.33355198, "memory(GiB)": 34.88, "step": 93285, "train_speed(iter/s)": 0.411091 }, { "acc": 0.92236633, "epoch": 2.525925324235778, "grad_norm": 10.108261108398438, "learning_rate": 3.276211798947411e-06, "loss": 0.42896194, "memory(GiB)": 34.88, "step": 93290, "train_speed(iter/s)": 0.411092 }, { "acc": 0.93489208, "epoch": 2.526060704518994, "grad_norm": 8.085066795349121, "learning_rate": 3.2756865796090116e-06, "loss": 0.35363374, "memory(GiB)": 34.88, "step": 93295, "train_speed(iter/s)": 0.411093 }, { "acc": 0.94116659, "epoch": 2.5261960848022094, "grad_norm": 9.41519546508789, "learning_rate": 3.2751613818772864e-06, "loss": 0.37344151, "memory(GiB)": 34.88, "step": 93300, "train_speed(iter/s)": 0.411094 }, { "acc": 0.94828129, "epoch": 2.526331465085425, "grad_norm": 7.365073204040527, "learning_rate": 3.2746362057588216e-06, "loss": 0.31263659, "memory(GiB)": 34.88, "step": 93305, "train_speed(iter/s)": 0.411095 }, { "acc": 0.92937088, "epoch": 2.5264668453686405, "grad_norm": 4.386645317077637, "learning_rate": 3.2741110512601883e-06, "loss": 0.38932679, "memory(GiB)": 34.88, "step": 93310, "train_speed(iter/s)": 0.411097 }, { "acc": 0.92841558, "epoch": 2.526602225651856, "grad_norm": 10.94680404663086, "learning_rate": 3.2735859183879727e-06, "loss": 0.43613124, "memory(GiB)": 34.88, "step": 93315, "train_speed(iter/s)": 0.411098 }, { "acc": 0.91304483, "epoch": 2.5267376059350717, "grad_norm": 5.585953712463379, "learning_rate": 3.2730608071487473e-06, "loss": 0.40766425, "memory(GiB)": 34.88, "step": 93320, "train_speed(iter/s)": 0.411099 }, { "acc": 0.92967072, "epoch": 2.526872986218287, "grad_norm": 6.732263088226318, "learning_rate": 3.272535717549091e-06, "loss": 0.39354286, "memory(GiB)": 34.88, "step": 93325, "train_speed(iter/s)": 0.4111 }, { "acc": 0.92572536, "epoch": 2.527008366501503, "grad_norm": 16.636075973510742, "learning_rate": 3.2720106495955855e-06, "loss": 0.46680679, "memory(GiB)": 34.88, "step": 93330, "train_speed(iter/s)": 0.411101 }, { "acc": 0.93365288, "epoch": 2.527143746784718, "grad_norm": 5.769966125488281, "learning_rate": 3.271485603294803e-06, "loss": 0.33210096, "memory(GiB)": 34.88, "step": 93335, "train_speed(iter/s)": 0.411102 }, { "acc": 0.93138332, "epoch": 2.527279127067934, "grad_norm": 5.235328674316406, "learning_rate": 3.2709605786533254e-06, "loss": 0.40841866, "memory(GiB)": 34.88, "step": 93340, "train_speed(iter/s)": 0.411103 }, { "acc": 0.93869104, "epoch": 2.5274145073511494, "grad_norm": 2.8620293140411377, "learning_rate": 3.2704355756777255e-06, "loss": 0.34740086, "memory(GiB)": 34.88, "step": 93345, "train_speed(iter/s)": 0.411104 }, { "acc": 0.93654547, "epoch": 2.5275498876343647, "grad_norm": 6.340582370758057, "learning_rate": 3.2699105943745846e-06, "loss": 0.35107441, "memory(GiB)": 34.88, "step": 93350, "train_speed(iter/s)": 0.411105 }, { "acc": 0.9415453, "epoch": 2.5276852679175805, "grad_norm": 6.580822944641113, "learning_rate": 3.269385634750474e-06, "loss": 0.34110312, "memory(GiB)": 34.88, "step": 93355, "train_speed(iter/s)": 0.411106 }, { "acc": 0.92359552, "epoch": 2.527820648200796, "grad_norm": 4.7133402824401855, "learning_rate": 3.268860696811975e-06, "loss": 0.41908226, "memory(GiB)": 34.88, "step": 93360, "train_speed(iter/s)": 0.411108 }, { "acc": 0.92036982, "epoch": 2.5279560284840117, "grad_norm": 7.539995193481445, "learning_rate": 3.2683357805656608e-06, "loss": 0.37855992, "memory(GiB)": 34.88, "step": 93365, "train_speed(iter/s)": 0.411109 }, { "acc": 0.93240175, "epoch": 2.528091408767227, "grad_norm": 13.484411239624023, "learning_rate": 3.2678108860181042e-06, "loss": 0.40419989, "memory(GiB)": 34.88, "step": 93370, "train_speed(iter/s)": 0.41111 }, { "acc": 0.92688198, "epoch": 2.528226789050443, "grad_norm": 4.22237491607666, "learning_rate": 3.2672860131758873e-06, "loss": 0.38234539, "memory(GiB)": 34.88, "step": 93375, "train_speed(iter/s)": 0.411111 }, { "acc": 0.91810017, "epoch": 2.528362169333658, "grad_norm": 7.980030536651611, "learning_rate": 3.2667611620455773e-06, "loss": 0.4879128, "memory(GiB)": 34.88, "step": 93380, "train_speed(iter/s)": 0.411112 }, { "acc": 0.94091997, "epoch": 2.5284975496168736, "grad_norm": 14.19665241241455, "learning_rate": 3.2662363326337567e-06, "loss": 0.37198339, "memory(GiB)": 34.88, "step": 93385, "train_speed(iter/s)": 0.411113 }, { "acc": 0.92732944, "epoch": 2.5286329299000894, "grad_norm": 11.199067115783691, "learning_rate": 3.2657115249469932e-06, "loss": 0.44433799, "memory(GiB)": 34.88, "step": 93390, "train_speed(iter/s)": 0.411115 }, { "acc": 0.93989697, "epoch": 2.528768310183305, "grad_norm": 6.5716872215271, "learning_rate": 3.265186738991867e-06, "loss": 0.32494819, "memory(GiB)": 34.88, "step": 93395, "train_speed(iter/s)": 0.411116 }, { "acc": 0.93174629, "epoch": 2.5289036904665205, "grad_norm": 10.023924827575684, "learning_rate": 3.2646619747749485e-06, "loss": 0.43514414, "memory(GiB)": 34.88, "step": 93400, "train_speed(iter/s)": 0.411117 }, { "acc": 0.92024488, "epoch": 2.529039070749736, "grad_norm": 10.398930549621582, "learning_rate": 3.26413723230281e-06, "loss": 0.51410623, "memory(GiB)": 34.88, "step": 93405, "train_speed(iter/s)": 0.411118 }, { "acc": 0.91916656, "epoch": 2.5291744510329517, "grad_norm": 9.24872875213623, "learning_rate": 3.2636125115820304e-06, "loss": 0.54571667, "memory(GiB)": 34.88, "step": 93410, "train_speed(iter/s)": 0.411119 }, { "acc": 0.92881956, "epoch": 2.529309831316167, "grad_norm": 8.23226547241211, "learning_rate": 3.263087812619177e-06, "loss": 0.34076116, "memory(GiB)": 34.88, "step": 93415, "train_speed(iter/s)": 0.41112 }, { "acc": 0.92825127, "epoch": 2.5294452115993824, "grad_norm": 7.717257976531982, "learning_rate": 3.2625631354208277e-06, "loss": 0.43002262, "memory(GiB)": 34.88, "step": 93420, "train_speed(iter/s)": 0.411122 }, { "acc": 0.92435465, "epoch": 2.529580591882598, "grad_norm": 20.37054443359375, "learning_rate": 3.2620384799935495e-06, "loss": 0.41648312, "memory(GiB)": 34.88, "step": 93425, "train_speed(iter/s)": 0.411123 }, { "acc": 0.90860558, "epoch": 2.529715972165814, "grad_norm": 10.200106620788574, "learning_rate": 3.2615138463439213e-06, "loss": 0.54946909, "memory(GiB)": 34.88, "step": 93430, "train_speed(iter/s)": 0.411124 }, { "acc": 0.92160473, "epoch": 2.5298513524490294, "grad_norm": 6.18400764465332, "learning_rate": 3.260989234478508e-06, "loss": 0.45205727, "memory(GiB)": 34.88, "step": 93435, "train_speed(iter/s)": 0.411125 }, { "acc": 0.93172112, "epoch": 2.5299867327322447, "grad_norm": 8.396431922912598, "learning_rate": 3.2604646444038878e-06, "loss": 0.35474567, "memory(GiB)": 34.88, "step": 93440, "train_speed(iter/s)": 0.411125 }, { "acc": 0.95367622, "epoch": 2.5301221130154605, "grad_norm": 4.768822193145752, "learning_rate": 3.259940076126631e-06, "loss": 0.2526679, "memory(GiB)": 34.88, "step": 93445, "train_speed(iter/s)": 0.411127 }, { "acc": 0.93115292, "epoch": 2.530257493298676, "grad_norm": 10.00340461730957, "learning_rate": 3.2594155296533038e-06, "loss": 0.45622606, "memory(GiB)": 34.88, "step": 93450, "train_speed(iter/s)": 0.411128 }, { "acc": 0.93395863, "epoch": 2.5303928735818917, "grad_norm": 4.681432723999023, "learning_rate": 3.258891004990484e-06, "loss": 0.34358127, "memory(GiB)": 34.88, "step": 93455, "train_speed(iter/s)": 0.411129 }, { "acc": 0.93451996, "epoch": 2.530528253865107, "grad_norm": 3.642146587371826, "learning_rate": 3.2583665021447366e-06, "loss": 0.38079486, "memory(GiB)": 34.88, "step": 93460, "train_speed(iter/s)": 0.41113 }, { "acc": 0.92516489, "epoch": 2.530663634148323, "grad_norm": 18.96453285217285, "learning_rate": 3.2578420211226362e-06, "loss": 0.37273157, "memory(GiB)": 34.88, "step": 93465, "train_speed(iter/s)": 0.411131 }, { "acc": 0.93187218, "epoch": 2.530799014431538, "grad_norm": 3.9376325607299805, "learning_rate": 3.25731756193075e-06, "loss": 0.37497306, "memory(GiB)": 34.88, "step": 93470, "train_speed(iter/s)": 0.411132 }, { "acc": 0.91621428, "epoch": 2.5309343947147536, "grad_norm": 12.995401382446289, "learning_rate": 3.2567931245756503e-06, "loss": 0.44517961, "memory(GiB)": 34.88, "step": 93475, "train_speed(iter/s)": 0.411133 }, { "acc": 0.92756481, "epoch": 2.5310697749979694, "grad_norm": 8.584335327148438, "learning_rate": 3.2562687090639044e-06, "loss": 0.3784399, "memory(GiB)": 34.88, "step": 93480, "train_speed(iter/s)": 0.411135 }, { "acc": 0.93281126, "epoch": 2.5312051552811847, "grad_norm": 5.202479839324951, "learning_rate": 3.255744315402084e-06, "loss": 0.39849384, "memory(GiB)": 34.88, "step": 93485, "train_speed(iter/s)": 0.411136 }, { "acc": 0.9198266, "epoch": 2.5313405355644005, "grad_norm": 40.76639175415039, "learning_rate": 3.255219943596758e-06, "loss": 0.44940615, "memory(GiB)": 34.88, "step": 93490, "train_speed(iter/s)": 0.411137 }, { "acc": 0.92247372, "epoch": 2.531475915847616, "grad_norm": 11.157062530517578, "learning_rate": 3.2546955936544917e-06, "loss": 0.53185983, "memory(GiB)": 34.88, "step": 93495, "train_speed(iter/s)": 0.411138 }, { "acc": 0.92954245, "epoch": 2.5316112961308317, "grad_norm": 6.646198749542236, "learning_rate": 3.2541712655818587e-06, "loss": 0.37345891, "memory(GiB)": 34.88, "step": 93500, "train_speed(iter/s)": 0.41114 }, { "acc": 0.93660412, "epoch": 2.531746676414047, "grad_norm": 14.018872261047363, "learning_rate": 3.253646959385421e-06, "loss": 0.34426408, "memory(GiB)": 34.88, "step": 93505, "train_speed(iter/s)": 0.411141 }, { "acc": 0.92831917, "epoch": 2.5318820566972624, "grad_norm": 6.609872341156006, "learning_rate": 3.2531226750717526e-06, "loss": 0.35632524, "memory(GiB)": 34.88, "step": 93510, "train_speed(iter/s)": 0.411142 }, { "acc": 0.92846375, "epoch": 2.532017436980478, "grad_norm": 14.35739517211914, "learning_rate": 3.252598412647418e-06, "loss": 0.36522541, "memory(GiB)": 34.88, "step": 93515, "train_speed(iter/s)": 0.411143 }, { "acc": 0.93043079, "epoch": 2.5321528172636936, "grad_norm": 23.415082931518555, "learning_rate": 3.252074172118987e-06, "loss": 0.47567244, "memory(GiB)": 34.88, "step": 93520, "train_speed(iter/s)": 0.411144 }, { "acc": 0.91711483, "epoch": 2.5322881975469094, "grad_norm": 5.425356388092041, "learning_rate": 3.2515499534930252e-06, "loss": 0.46930256, "memory(GiB)": 34.88, "step": 93525, "train_speed(iter/s)": 0.411145 }, { "acc": 0.93689766, "epoch": 2.5324235778301247, "grad_norm": 11.700937271118164, "learning_rate": 3.2510257567760966e-06, "loss": 0.28602905, "memory(GiB)": 34.88, "step": 93530, "train_speed(iter/s)": 0.411146 }, { "acc": 0.92150326, "epoch": 2.5325589581133405, "grad_norm": 14.766901969909668, "learning_rate": 3.2505015819747733e-06, "loss": 0.50423203, "memory(GiB)": 34.88, "step": 93535, "train_speed(iter/s)": 0.411147 }, { "acc": 0.93626862, "epoch": 2.532694338396556, "grad_norm": 9.276884078979492, "learning_rate": 3.2499774290956164e-06, "loss": 0.34241662, "memory(GiB)": 34.88, "step": 93540, "train_speed(iter/s)": 0.411148 }, { "acc": 0.92222462, "epoch": 2.5328297186797712, "grad_norm": 9.663269996643066, "learning_rate": 3.2494532981451977e-06, "loss": 0.45497508, "memory(GiB)": 34.88, "step": 93545, "train_speed(iter/s)": 0.41115 }, { "acc": 0.93077793, "epoch": 2.532965098962987, "grad_norm": 6.791925430297852, "learning_rate": 3.248929189130076e-06, "loss": 0.37095501, "memory(GiB)": 34.88, "step": 93550, "train_speed(iter/s)": 0.411151 }, { "acc": 0.91706285, "epoch": 2.533100479246203, "grad_norm": 8.912879943847656, "learning_rate": 3.248405102056823e-06, "loss": 0.4203012, "memory(GiB)": 34.88, "step": 93555, "train_speed(iter/s)": 0.411152 }, { "acc": 0.92892227, "epoch": 2.533235859529418, "grad_norm": 5.099722862243652, "learning_rate": 3.2478810369320006e-06, "loss": 0.33562412, "memory(GiB)": 34.88, "step": 93560, "train_speed(iter/s)": 0.411153 }, { "acc": 0.91405354, "epoch": 2.5333712398126336, "grad_norm": 10.308476448059082, "learning_rate": 3.2473569937621753e-06, "loss": 0.5061904, "memory(GiB)": 34.88, "step": 93565, "train_speed(iter/s)": 0.411154 }, { "acc": 0.93387785, "epoch": 2.5335066200958494, "grad_norm": 7.0361738204956055, "learning_rate": 3.2468329725539127e-06, "loss": 0.38555751, "memory(GiB)": 34.88, "step": 93570, "train_speed(iter/s)": 0.411155 }, { "acc": 0.93054466, "epoch": 2.5336420003790647, "grad_norm": 7.209248065948486, "learning_rate": 3.2463089733137715e-06, "loss": 0.37079802, "memory(GiB)": 34.88, "step": 93575, "train_speed(iter/s)": 0.411156 }, { "acc": 0.92900391, "epoch": 2.53377738066228, "grad_norm": 7.111556053161621, "learning_rate": 3.2457849960483233e-06, "loss": 0.38358886, "memory(GiB)": 34.88, "step": 93580, "train_speed(iter/s)": 0.411157 }, { "acc": 0.89883003, "epoch": 2.533912760945496, "grad_norm": 17.837207794189453, "learning_rate": 3.2452610407641264e-06, "loss": 0.59298935, "memory(GiB)": 34.88, "step": 93585, "train_speed(iter/s)": 0.411158 }, { "acc": 0.92284336, "epoch": 2.5340481412287117, "grad_norm": 18.967243194580078, "learning_rate": 3.244737107467748e-06, "loss": 0.4547781, "memory(GiB)": 34.88, "step": 93590, "train_speed(iter/s)": 0.411159 }, { "acc": 0.93697834, "epoch": 2.534183521511927, "grad_norm": 6.0964789390563965, "learning_rate": 3.2442131961657485e-06, "loss": 0.42257681, "memory(GiB)": 34.88, "step": 93595, "train_speed(iter/s)": 0.41116 }, { "acc": 0.938097, "epoch": 2.5343189017951424, "grad_norm": 9.739653587341309, "learning_rate": 3.2436893068646935e-06, "loss": 0.37067692, "memory(GiB)": 34.88, "step": 93600, "train_speed(iter/s)": 0.411161 }, { "acc": 0.93149233, "epoch": 2.534454282078358, "grad_norm": 5.278042316436768, "learning_rate": 3.2431654395711435e-06, "loss": 0.3636754, "memory(GiB)": 34.88, "step": 93605, "train_speed(iter/s)": 0.411163 }, { "acc": 0.92716026, "epoch": 2.5345896623615736, "grad_norm": 11.435023307800293, "learning_rate": 3.2426415942916635e-06, "loss": 0.38593235, "memory(GiB)": 34.88, "step": 93610, "train_speed(iter/s)": 0.411164 }, { "acc": 0.93340273, "epoch": 2.5347250426447894, "grad_norm": 14.148186683654785, "learning_rate": 3.2421177710328145e-06, "loss": 0.46980724, "memory(GiB)": 34.88, "step": 93615, "train_speed(iter/s)": 0.411165 }, { "acc": 0.93456068, "epoch": 2.5348604229280047, "grad_norm": 6.937983512878418, "learning_rate": 3.241593969801155e-06, "loss": 0.33430865, "memory(GiB)": 34.88, "step": 93620, "train_speed(iter/s)": 0.411166 }, { "acc": 0.92485828, "epoch": 2.5349958032112205, "grad_norm": 6.647914886474609, "learning_rate": 3.2410701906032525e-06, "loss": 0.42872047, "memory(GiB)": 34.88, "step": 93625, "train_speed(iter/s)": 0.411168 }, { "acc": 0.91834488, "epoch": 2.535131183494436, "grad_norm": 7.094541549682617, "learning_rate": 3.240546433445664e-06, "loss": 0.38566108, "memory(GiB)": 34.88, "step": 93630, "train_speed(iter/s)": 0.411169 }, { "acc": 0.92504311, "epoch": 2.5352665637776512, "grad_norm": 14.667478561401367, "learning_rate": 3.2400226983349535e-06, "loss": 0.35817623, "memory(GiB)": 34.88, "step": 93635, "train_speed(iter/s)": 0.41117 }, { "acc": 0.93271961, "epoch": 2.535401944060867, "grad_norm": 5.712238311767578, "learning_rate": 3.23949898527768e-06, "loss": 0.38611996, "memory(GiB)": 34.88, "step": 93640, "train_speed(iter/s)": 0.411171 }, { "acc": 0.94184074, "epoch": 2.5355373243440824, "grad_norm": 18.234352111816406, "learning_rate": 3.2389752942804054e-06, "loss": 0.3070935, "memory(GiB)": 34.88, "step": 93645, "train_speed(iter/s)": 0.411172 }, { "acc": 0.91897011, "epoch": 2.535672704627298, "grad_norm": 9.315695762634277, "learning_rate": 3.238451625349687e-06, "loss": 0.44913201, "memory(GiB)": 34.88, "step": 93650, "train_speed(iter/s)": 0.411173 }, { "acc": 0.91726027, "epoch": 2.5358080849105136, "grad_norm": 11.849079132080078, "learning_rate": 3.2379279784920897e-06, "loss": 0.48868246, "memory(GiB)": 34.88, "step": 93655, "train_speed(iter/s)": 0.411175 }, { "acc": 0.93741531, "epoch": 2.5359434651937294, "grad_norm": 6.4886627197265625, "learning_rate": 3.237404353714169e-06, "loss": 0.38874335, "memory(GiB)": 34.88, "step": 93660, "train_speed(iter/s)": 0.411176 }, { "acc": 0.93552322, "epoch": 2.5360788454769447, "grad_norm": 4.630184173583984, "learning_rate": 3.2368807510224854e-06, "loss": 0.35358725, "memory(GiB)": 34.88, "step": 93665, "train_speed(iter/s)": 0.411177 }, { "acc": 0.94767351, "epoch": 2.53621422576016, "grad_norm": 4.568121433258057, "learning_rate": 3.2363571704236002e-06, "loss": 0.31156125, "memory(GiB)": 34.88, "step": 93670, "train_speed(iter/s)": 0.411178 }, { "acc": 0.91236343, "epoch": 2.536349606043376, "grad_norm": 4.69010591506958, "learning_rate": 3.235833611924068e-06, "loss": 0.48122239, "memory(GiB)": 34.88, "step": 93675, "train_speed(iter/s)": 0.411179 }, { "acc": 0.93008575, "epoch": 2.5364849863265913, "grad_norm": 8.168880462646484, "learning_rate": 3.235310075530451e-06, "loss": 0.39336331, "memory(GiB)": 34.88, "step": 93680, "train_speed(iter/s)": 0.41118 }, { "acc": 0.93696575, "epoch": 2.536620366609807, "grad_norm": 4.1057448387146, "learning_rate": 3.234786561249306e-06, "loss": 0.33632662, "memory(GiB)": 34.88, "step": 93685, "train_speed(iter/s)": 0.411182 }, { "acc": 0.92682114, "epoch": 2.5367557468930224, "grad_norm": 16.41476821899414, "learning_rate": 3.234263069087192e-06, "loss": 0.42436047, "memory(GiB)": 34.88, "step": 93690, "train_speed(iter/s)": 0.411183 }, { "acc": 0.9168663, "epoch": 2.536891127176238, "grad_norm": 11.63790512084961, "learning_rate": 3.233739599050667e-06, "loss": 0.54105005, "memory(GiB)": 34.88, "step": 93695, "train_speed(iter/s)": 0.411184 }, { "acc": 0.94656963, "epoch": 2.5370265074594536, "grad_norm": 4.361551284790039, "learning_rate": 3.233216151146286e-06, "loss": 0.26339562, "memory(GiB)": 34.88, "step": 93700, "train_speed(iter/s)": 0.411185 }, { "acc": 0.92302694, "epoch": 2.537161887742669, "grad_norm": 9.804871559143066, "learning_rate": 3.232692725380609e-06, "loss": 0.45035105, "memory(GiB)": 34.88, "step": 93705, "train_speed(iter/s)": 0.411186 }, { "acc": 0.93496656, "epoch": 2.5372972680258847, "grad_norm": 9.097771644592285, "learning_rate": 3.2321693217601915e-06, "loss": 0.37271323, "memory(GiB)": 34.88, "step": 93710, "train_speed(iter/s)": 0.411187 }, { "acc": 0.92656422, "epoch": 2.5374326483091005, "grad_norm": 11.430451393127441, "learning_rate": 3.2316459402915906e-06, "loss": 0.45535994, "memory(GiB)": 34.88, "step": 93715, "train_speed(iter/s)": 0.411189 }, { "acc": 0.92683783, "epoch": 2.537568028592316, "grad_norm": 4.3034772872924805, "learning_rate": 3.231122580981362e-06, "loss": 0.40145912, "memory(GiB)": 34.88, "step": 93720, "train_speed(iter/s)": 0.41119 }, { "acc": 0.9381464, "epoch": 2.5377034088755313, "grad_norm": 15.607563972473145, "learning_rate": 3.2305992438360634e-06, "loss": 0.39696126, "memory(GiB)": 34.88, "step": 93725, "train_speed(iter/s)": 0.411191 }, { "acc": 0.93057919, "epoch": 2.537838789158747, "grad_norm": 4.5835418701171875, "learning_rate": 3.2300759288622483e-06, "loss": 0.33687477, "memory(GiB)": 34.88, "step": 93730, "train_speed(iter/s)": 0.411192 }, { "acc": 0.92525272, "epoch": 2.5379741694419624, "grad_norm": 12.00210952758789, "learning_rate": 3.229552636066475e-06, "loss": 0.3837162, "memory(GiB)": 34.88, "step": 93735, "train_speed(iter/s)": 0.411193 }, { "acc": 0.93959808, "epoch": 2.5381095497251778, "grad_norm": 5.115525722503662, "learning_rate": 3.229029365455297e-06, "loss": 0.34347515, "memory(GiB)": 34.88, "step": 93740, "train_speed(iter/s)": 0.411194 }, { "acc": 0.92063255, "epoch": 2.5382449300083936, "grad_norm": 12.651335716247559, "learning_rate": 3.228506117035269e-06, "loss": 0.48508124, "memory(GiB)": 34.88, "step": 93745, "train_speed(iter/s)": 0.411196 }, { "acc": 0.92884274, "epoch": 2.5383803102916094, "grad_norm": 11.527471542358398, "learning_rate": 3.2279828908129473e-06, "loss": 0.37528646, "memory(GiB)": 34.88, "step": 93750, "train_speed(iter/s)": 0.411197 }, { "acc": 0.92524014, "epoch": 2.5385156905748247, "grad_norm": 3.9162724018096924, "learning_rate": 3.2274596867948838e-06, "loss": 0.36455002, "memory(GiB)": 34.88, "step": 93755, "train_speed(iter/s)": 0.411198 }, { "acc": 0.94208469, "epoch": 2.53865107085804, "grad_norm": 7.132140636444092, "learning_rate": 3.226936504987636e-06, "loss": 0.30588176, "memory(GiB)": 34.88, "step": 93760, "train_speed(iter/s)": 0.411199 }, { "acc": 0.9367588, "epoch": 2.538786451141256, "grad_norm": 4.893909454345703, "learning_rate": 3.226413345397754e-06, "loss": 0.31940498, "memory(GiB)": 34.88, "step": 93765, "train_speed(iter/s)": 0.411201 }, { "acc": 0.9413619, "epoch": 2.5389218314244713, "grad_norm": 6.140143394470215, "learning_rate": 3.2258902080317963e-06, "loss": 0.38924689, "memory(GiB)": 34.88, "step": 93770, "train_speed(iter/s)": 0.411202 }, { "acc": 0.92035637, "epoch": 2.539057211707687, "grad_norm": 15.374567985534668, "learning_rate": 3.22536709289631e-06, "loss": 0.53897953, "memory(GiB)": 34.88, "step": 93775, "train_speed(iter/s)": 0.411203 }, { "acc": 0.93200989, "epoch": 2.5391925919909024, "grad_norm": 10.073741912841797, "learning_rate": 3.2248439999978544e-06, "loss": 0.38648918, "memory(GiB)": 34.88, "step": 93780, "train_speed(iter/s)": 0.411204 }, { "acc": 0.93884878, "epoch": 2.539327972274118, "grad_norm": 10.083882331848145, "learning_rate": 3.2243209293429782e-06, "loss": 0.35297403, "memory(GiB)": 34.88, "step": 93785, "train_speed(iter/s)": 0.411205 }, { "acc": 0.9301445, "epoch": 2.5394633525573336, "grad_norm": 8.491256713867188, "learning_rate": 3.2237978809382346e-06, "loss": 0.41594572, "memory(GiB)": 34.88, "step": 93790, "train_speed(iter/s)": 0.411206 }, { "acc": 0.92180443, "epoch": 2.539598732840549, "grad_norm": 16.348068237304688, "learning_rate": 3.2232748547901773e-06, "loss": 0.51292028, "memory(GiB)": 34.88, "step": 93795, "train_speed(iter/s)": 0.411207 }, { "acc": 0.91328735, "epoch": 2.5397341131237647, "grad_norm": 8.628768920898438, "learning_rate": 3.2227518509053553e-06, "loss": 0.49394846, "memory(GiB)": 34.88, "step": 93800, "train_speed(iter/s)": 0.411208 }, { "acc": 0.93200264, "epoch": 2.53986949340698, "grad_norm": 11.521262168884277, "learning_rate": 3.2222288692903246e-06, "loss": 0.39100211, "memory(GiB)": 34.88, "step": 93805, "train_speed(iter/s)": 0.411209 }, { "acc": 0.93305454, "epoch": 2.540004873690196, "grad_norm": 6.0217671394348145, "learning_rate": 3.221705909951632e-06, "loss": 0.42960734, "memory(GiB)": 34.88, "step": 93810, "train_speed(iter/s)": 0.411211 }, { "acc": 0.9170145, "epoch": 2.5401402539734113, "grad_norm": 8.17430305480957, "learning_rate": 3.2211829728958316e-06, "loss": 0.40912771, "memory(GiB)": 34.88, "step": 93815, "train_speed(iter/s)": 0.411212 }, { "acc": 0.92475901, "epoch": 2.540275634256627, "grad_norm": 10.881178855895996, "learning_rate": 3.2206600581294746e-06, "loss": 0.43340654, "memory(GiB)": 34.88, "step": 93820, "train_speed(iter/s)": 0.411212 }, { "acc": 0.94127026, "epoch": 2.5404110145398424, "grad_norm": 8.421687126159668, "learning_rate": 3.220137165659108e-06, "loss": 0.31939538, "memory(GiB)": 34.88, "step": 93825, "train_speed(iter/s)": 0.411214 }, { "acc": 0.92885609, "epoch": 2.5405463948230578, "grad_norm": 5.019954681396484, "learning_rate": 3.219614295491286e-06, "loss": 0.41303463, "memory(GiB)": 34.88, "step": 93830, "train_speed(iter/s)": 0.411215 }, { "acc": 0.92942028, "epoch": 2.5406817751062736, "grad_norm": 11.638752937316895, "learning_rate": 3.219091447632555e-06, "loss": 0.39542749, "memory(GiB)": 34.88, "step": 93835, "train_speed(iter/s)": 0.411216 }, { "acc": 0.9450038, "epoch": 2.540817155389489, "grad_norm": 4.253125190734863, "learning_rate": 3.2185686220894686e-06, "loss": 0.23905296, "memory(GiB)": 34.88, "step": 93840, "train_speed(iter/s)": 0.411217 }, { "acc": 0.93339605, "epoch": 2.5409525356727047, "grad_norm": 1.9276875257492065, "learning_rate": 3.218045818868572e-06, "loss": 0.37335372, "memory(GiB)": 34.88, "step": 93845, "train_speed(iter/s)": 0.411218 }, { "acc": 0.94549179, "epoch": 2.54108791595592, "grad_norm": 4.968008518218994, "learning_rate": 3.2175230379764186e-06, "loss": 0.26642413, "memory(GiB)": 34.88, "step": 93850, "train_speed(iter/s)": 0.41122 }, { "acc": 0.93440237, "epoch": 2.541223296239136, "grad_norm": 7.740653991699219, "learning_rate": 3.2170002794195533e-06, "loss": 0.35823488, "memory(GiB)": 34.88, "step": 93855, "train_speed(iter/s)": 0.411221 }, { "acc": 0.9292593, "epoch": 2.5413586765223513, "grad_norm": 5.907340049743652, "learning_rate": 3.216477543204529e-06, "loss": 0.37382042, "memory(GiB)": 34.88, "step": 93860, "train_speed(iter/s)": 0.411222 }, { "acc": 0.94787636, "epoch": 2.5414940568055666, "grad_norm": 6.581900119781494, "learning_rate": 3.2159548293378905e-06, "loss": 0.34074829, "memory(GiB)": 34.88, "step": 93865, "train_speed(iter/s)": 0.411223 }, { "acc": 0.93171072, "epoch": 2.5416294370887824, "grad_norm": 10.702001571655273, "learning_rate": 3.215432137826186e-06, "loss": 0.42630906, "memory(GiB)": 34.88, "step": 93870, "train_speed(iter/s)": 0.411224 }, { "acc": 0.93797894, "epoch": 2.541764817371998, "grad_norm": 12.705767631530762, "learning_rate": 3.2149094686759657e-06, "loss": 0.37955608, "memory(GiB)": 34.88, "step": 93875, "train_speed(iter/s)": 0.411225 }, { "acc": 0.93523512, "epoch": 2.5419001976552136, "grad_norm": 5.389180660247803, "learning_rate": 3.214386821893774e-06, "loss": 0.41110849, "memory(GiB)": 34.88, "step": 93880, "train_speed(iter/s)": 0.411226 }, { "acc": 0.93364887, "epoch": 2.542035577938429, "grad_norm": 6.178688049316406, "learning_rate": 3.213864197486161e-06, "loss": 0.39796782, "memory(GiB)": 34.88, "step": 93885, "train_speed(iter/s)": 0.411227 }, { "acc": 0.92456779, "epoch": 2.5421709582216447, "grad_norm": 7.743659019470215, "learning_rate": 3.2133415954596704e-06, "loss": 0.47082052, "memory(GiB)": 34.88, "step": 93890, "train_speed(iter/s)": 0.411229 }, { "acc": 0.92252302, "epoch": 2.54230633850486, "grad_norm": 15.113068580627441, "learning_rate": 3.212819015820852e-06, "loss": 0.49149661, "memory(GiB)": 34.88, "step": 93895, "train_speed(iter/s)": 0.41123 }, { "acc": 0.93167725, "epoch": 2.5424417187880755, "grad_norm": 6.265534400939941, "learning_rate": 3.2122964585762486e-06, "loss": 0.38590498, "memory(GiB)": 34.88, "step": 93900, "train_speed(iter/s)": 0.411231 }, { "acc": 0.92187109, "epoch": 2.5425770990712913, "grad_norm": 8.528377532958984, "learning_rate": 3.2117739237324115e-06, "loss": 0.44617214, "memory(GiB)": 34.88, "step": 93905, "train_speed(iter/s)": 0.411232 }, { "acc": 0.95182829, "epoch": 2.542712479354507, "grad_norm": 4.863974571228027, "learning_rate": 3.211251411295882e-06, "loss": 0.30090261, "memory(GiB)": 34.88, "step": 93910, "train_speed(iter/s)": 0.411233 }, { "acc": 0.94381018, "epoch": 2.5428478596377224, "grad_norm": 14.998480796813965, "learning_rate": 3.2107289212732056e-06, "loss": 0.34529986, "memory(GiB)": 34.88, "step": 93915, "train_speed(iter/s)": 0.411234 }, { "acc": 0.92560101, "epoch": 2.5429832399209378, "grad_norm": 3.962493896484375, "learning_rate": 3.2102064536709308e-06, "loss": 0.36531126, "memory(GiB)": 34.88, "step": 93920, "train_speed(iter/s)": 0.411236 }, { "acc": 0.94505529, "epoch": 2.5431186202041536, "grad_norm": 5.837821006774902, "learning_rate": 3.2096840084955994e-06, "loss": 0.26275177, "memory(GiB)": 34.88, "step": 93925, "train_speed(iter/s)": 0.411237 }, { "acc": 0.90790024, "epoch": 2.543254000487369, "grad_norm": 6.01898193359375, "learning_rate": 3.2091615857537577e-06, "loss": 0.44540515, "memory(GiB)": 34.88, "step": 93930, "train_speed(iter/s)": 0.411238 }, { "acc": 0.92528763, "epoch": 2.5433893807705843, "grad_norm": 7.864957809448242, "learning_rate": 3.2086391854519482e-06, "loss": 0.50669823, "memory(GiB)": 34.88, "step": 93935, "train_speed(iter/s)": 0.411239 }, { "acc": 0.94966755, "epoch": 2.5435247610538, "grad_norm": 17.654518127441406, "learning_rate": 3.2081168075967194e-06, "loss": 0.25589347, "memory(GiB)": 34.88, "step": 93940, "train_speed(iter/s)": 0.41124 }, { "acc": 0.94194746, "epoch": 2.543660141337016, "grad_norm": 6.024592399597168, "learning_rate": 3.2075944521946107e-06, "loss": 0.27811089, "memory(GiB)": 34.88, "step": 93945, "train_speed(iter/s)": 0.411242 }, { "acc": 0.93886786, "epoch": 2.5437955216202313, "grad_norm": 3.809525489807129, "learning_rate": 3.2070721192521663e-06, "loss": 0.34097457, "memory(GiB)": 34.88, "step": 93950, "train_speed(iter/s)": 0.411243 }, { "acc": 0.92735767, "epoch": 2.5439309019034466, "grad_norm": 9.17155933380127, "learning_rate": 3.2065498087759313e-06, "loss": 0.41084547, "memory(GiB)": 34.88, "step": 93955, "train_speed(iter/s)": 0.411244 }, { "acc": 0.92650623, "epoch": 2.5440662821866624, "grad_norm": 7.357034206390381, "learning_rate": 3.2060275207724464e-06, "loss": 0.42524366, "memory(GiB)": 34.88, "step": 93960, "train_speed(iter/s)": 0.411245 }, { "acc": 0.91966934, "epoch": 2.544201662469878, "grad_norm": 6.834507465362549, "learning_rate": 3.2055052552482558e-06, "loss": 0.52792892, "memory(GiB)": 34.88, "step": 93965, "train_speed(iter/s)": 0.411246 }, { "acc": 0.92223692, "epoch": 2.5443370427530936, "grad_norm": 9.311918258666992, "learning_rate": 3.2049830122099007e-06, "loss": 0.39151125, "memory(GiB)": 34.88, "step": 93970, "train_speed(iter/s)": 0.411248 }, { "acc": 0.94733868, "epoch": 2.544472423036309, "grad_norm": 8.512639045715332, "learning_rate": 3.2044607916639266e-06, "loss": 0.25700297, "memory(GiB)": 34.88, "step": 93975, "train_speed(iter/s)": 0.411249 }, { "acc": 0.94244213, "epoch": 2.5446078033195247, "grad_norm": 6.172922134399414, "learning_rate": 3.203938593616869e-06, "loss": 0.25646243, "memory(GiB)": 34.88, "step": 93980, "train_speed(iter/s)": 0.41125 }, { "acc": 0.92134771, "epoch": 2.54474318360274, "grad_norm": 16.817543029785156, "learning_rate": 3.2034164180752777e-06, "loss": 0.39280691, "memory(GiB)": 34.88, "step": 93985, "train_speed(iter/s)": 0.411251 }, { "acc": 0.93032703, "epoch": 2.5448785638859555, "grad_norm": 3.3021974563598633, "learning_rate": 3.202894265045688e-06, "loss": 0.39515121, "memory(GiB)": 34.88, "step": 93990, "train_speed(iter/s)": 0.411252 }, { "acc": 0.93739948, "epoch": 2.5450139441691713, "grad_norm": 5.81417179107666, "learning_rate": 3.2023721345346404e-06, "loss": 0.359689, "memory(GiB)": 34.88, "step": 93995, "train_speed(iter/s)": 0.411253 }, { "acc": 0.9331768, "epoch": 2.5451493244523866, "grad_norm": 5.318484306335449, "learning_rate": 3.2018500265486796e-06, "loss": 0.33597641, "memory(GiB)": 34.88, "step": 94000, "train_speed(iter/s)": 0.411255 }, { "acc": 0.94103489, "epoch": 2.5452847047356024, "grad_norm": 12.155571937561035, "learning_rate": 3.201327941094342e-06, "loss": 0.36147904, "memory(GiB)": 34.88, "step": 94005, "train_speed(iter/s)": 0.411256 }, { "acc": 0.92623062, "epoch": 2.545420085018818, "grad_norm": 7.956104278564453, "learning_rate": 3.2008058781781724e-06, "loss": 0.43319621, "memory(GiB)": 34.88, "step": 94010, "train_speed(iter/s)": 0.411257 }, { "acc": 0.92213535, "epoch": 2.5455554653020336, "grad_norm": 16.015796661376953, "learning_rate": 3.200283837806706e-06, "loss": 0.43917165, "memory(GiB)": 34.88, "step": 94015, "train_speed(iter/s)": 0.411258 }, { "acc": 0.92740993, "epoch": 2.545690845585249, "grad_norm": 9.321768760681152, "learning_rate": 3.199761819986486e-06, "loss": 0.43823676, "memory(GiB)": 34.88, "step": 94020, "train_speed(iter/s)": 0.411259 }, { "acc": 0.94155712, "epoch": 2.5458262258684643, "grad_norm": 11.566048622131348, "learning_rate": 3.1992398247240487e-06, "loss": 0.36923666, "memory(GiB)": 34.88, "step": 94025, "train_speed(iter/s)": 0.41126 }, { "acc": 0.92442932, "epoch": 2.54596160615168, "grad_norm": 10.993988037109375, "learning_rate": 3.1987178520259365e-06, "loss": 0.46250362, "memory(GiB)": 34.88, "step": 94030, "train_speed(iter/s)": 0.411261 }, { "acc": 0.92868557, "epoch": 2.5460969864348955, "grad_norm": 9.367181777954102, "learning_rate": 3.1981959018986854e-06, "loss": 0.36847067, "memory(GiB)": 34.88, "step": 94035, "train_speed(iter/s)": 0.411263 }, { "acc": 0.92367344, "epoch": 2.5462323667181113, "grad_norm": 9.653919219970703, "learning_rate": 3.197673974348833e-06, "loss": 0.46413984, "memory(GiB)": 34.88, "step": 94040, "train_speed(iter/s)": 0.411264 }, { "acc": 0.906674, "epoch": 2.5463677470013266, "grad_norm": 11.391246795654297, "learning_rate": 3.197152069382922e-06, "loss": 0.58842454, "memory(GiB)": 34.88, "step": 94045, "train_speed(iter/s)": 0.411265 }, { "acc": 0.91991987, "epoch": 2.5465031272845424, "grad_norm": 9.509532928466797, "learning_rate": 3.196630187007484e-06, "loss": 0.49301243, "memory(GiB)": 34.88, "step": 94050, "train_speed(iter/s)": 0.411266 }, { "acc": 0.93826523, "epoch": 2.546638507567758, "grad_norm": 3.035918951034546, "learning_rate": 3.196108327229063e-06, "loss": 0.352351, "memory(GiB)": 34.88, "step": 94055, "train_speed(iter/s)": 0.411267 }, { "acc": 0.91328049, "epoch": 2.546773887850973, "grad_norm": 8.90067195892334, "learning_rate": 3.195586490054191e-06, "loss": 0.57813935, "memory(GiB)": 34.88, "step": 94060, "train_speed(iter/s)": 0.411268 }, { "acc": 0.90990677, "epoch": 2.546909268134189, "grad_norm": 7.1095805168151855, "learning_rate": 3.1950646754894097e-06, "loss": 0.48638806, "memory(GiB)": 34.88, "step": 94065, "train_speed(iter/s)": 0.411269 }, { "acc": 0.93183136, "epoch": 2.5470446484174047, "grad_norm": 8.5700101852417, "learning_rate": 3.1945428835412527e-06, "loss": 0.35407116, "memory(GiB)": 34.88, "step": 94070, "train_speed(iter/s)": 0.411271 }, { "acc": 0.92328434, "epoch": 2.54718002870062, "grad_norm": 11.593368530273438, "learning_rate": 3.194021114216256e-06, "loss": 0.41459179, "memory(GiB)": 34.88, "step": 94075, "train_speed(iter/s)": 0.411272 }, { "acc": 0.93656464, "epoch": 2.5473154089838355, "grad_norm": 12.248679161071777, "learning_rate": 3.1934993675209595e-06, "loss": 0.35134664, "memory(GiB)": 34.88, "step": 94080, "train_speed(iter/s)": 0.411273 }, { "acc": 0.92572336, "epoch": 2.5474507892670513, "grad_norm": 7.766141891479492, "learning_rate": 3.192977643461893e-06, "loss": 0.43008137, "memory(GiB)": 34.88, "step": 94085, "train_speed(iter/s)": 0.411274 }, { "acc": 0.92847357, "epoch": 2.5475861695502666, "grad_norm": 8.484436988830566, "learning_rate": 3.192455942045599e-06, "loss": 0.40328875, "memory(GiB)": 34.88, "step": 94090, "train_speed(iter/s)": 0.411275 }, { "acc": 0.94062643, "epoch": 2.547721549833482, "grad_norm": 11.856599807739258, "learning_rate": 3.1919342632786075e-06, "loss": 0.32779536, "memory(GiB)": 34.88, "step": 94095, "train_speed(iter/s)": 0.411276 }, { "acc": 0.95251637, "epoch": 2.547856930116698, "grad_norm": 7.3482890129089355, "learning_rate": 3.1914126071674586e-06, "loss": 0.33233724, "memory(GiB)": 34.88, "step": 94100, "train_speed(iter/s)": 0.411277 }, { "acc": 0.92924023, "epoch": 2.5479923103999136, "grad_norm": 6.030131816864014, "learning_rate": 3.1908909737186803e-06, "loss": 0.36605864, "memory(GiB)": 34.88, "step": 94105, "train_speed(iter/s)": 0.411279 }, { "acc": 0.93025417, "epoch": 2.548127690683129, "grad_norm": 16.495637893676758, "learning_rate": 3.1903693629388145e-06, "loss": 0.45956726, "memory(GiB)": 34.88, "step": 94110, "train_speed(iter/s)": 0.41128 }, { "acc": 0.93221912, "epoch": 2.5482630709663443, "grad_norm": 6.527031898498535, "learning_rate": 3.1898477748343908e-06, "loss": 0.39808431, "memory(GiB)": 34.88, "step": 94115, "train_speed(iter/s)": 0.411281 }, { "acc": 0.92508812, "epoch": 2.54839845124956, "grad_norm": 6.51812219619751, "learning_rate": 3.189326209411942e-06, "loss": 0.47405877, "memory(GiB)": 34.88, "step": 94120, "train_speed(iter/s)": 0.411282 }, { "acc": 0.94381657, "epoch": 2.5485338315327755, "grad_norm": 11.532535552978516, "learning_rate": 3.188804666678007e-06, "loss": 0.32250829, "memory(GiB)": 34.88, "step": 94125, "train_speed(iter/s)": 0.411283 }, { "acc": 0.93655396, "epoch": 2.5486692118159913, "grad_norm": 8.34316635131836, "learning_rate": 3.1882831466391127e-06, "loss": 0.35361376, "memory(GiB)": 34.88, "step": 94130, "train_speed(iter/s)": 0.411284 }, { "acc": 0.93358593, "epoch": 2.5488045920992066, "grad_norm": 5.942145347595215, "learning_rate": 3.1877616493017994e-06, "loss": 0.37554636, "memory(GiB)": 34.88, "step": 94135, "train_speed(iter/s)": 0.411286 }, { "acc": 0.92423458, "epoch": 2.5489399723824224, "grad_norm": 8.83948802947998, "learning_rate": 3.1872401746725924e-06, "loss": 0.44645185, "memory(GiB)": 34.88, "step": 94140, "train_speed(iter/s)": 0.411287 }, { "acc": 0.93929691, "epoch": 2.549075352665638, "grad_norm": 7.304596424102783, "learning_rate": 3.1867187227580304e-06, "loss": 0.33460326, "memory(GiB)": 34.88, "step": 94145, "train_speed(iter/s)": 0.411288 }, { "acc": 0.90907116, "epoch": 2.549210732948853, "grad_norm": 17.198829650878906, "learning_rate": 3.18619729356464e-06, "loss": 0.56190109, "memory(GiB)": 34.88, "step": 94150, "train_speed(iter/s)": 0.411289 }, { "acc": 0.91443195, "epoch": 2.549346113232069, "grad_norm": 6.56757926940918, "learning_rate": 3.1856758870989586e-06, "loss": 0.48822641, "memory(GiB)": 34.88, "step": 94155, "train_speed(iter/s)": 0.41129 }, { "acc": 0.91112309, "epoch": 2.5494814935152843, "grad_norm": 13.429438591003418, "learning_rate": 3.1851545033675164e-06, "loss": 0.50486135, "memory(GiB)": 34.88, "step": 94160, "train_speed(iter/s)": 0.411292 }, { "acc": 0.93023319, "epoch": 2.5496168737985, "grad_norm": 5.202207088470459, "learning_rate": 3.18463314237684e-06, "loss": 0.47424221, "memory(GiB)": 34.88, "step": 94165, "train_speed(iter/s)": 0.411293 }, { "acc": 0.94281101, "epoch": 2.5497522540817155, "grad_norm": 5.631443500518799, "learning_rate": 3.184111804133467e-06, "loss": 0.35303659, "memory(GiB)": 34.88, "step": 94170, "train_speed(iter/s)": 0.411294 }, { "acc": 0.92806683, "epoch": 2.5498876343649313, "grad_norm": 19.470964431762695, "learning_rate": 3.183590488643924e-06, "loss": 0.3621892, "memory(GiB)": 34.88, "step": 94175, "train_speed(iter/s)": 0.411295 }, { "acc": 0.94053431, "epoch": 2.5500230146481466, "grad_norm": 7.02956485748291, "learning_rate": 3.183069195914744e-06, "loss": 0.29617872, "memory(GiB)": 34.88, "step": 94180, "train_speed(iter/s)": 0.411296 }, { "acc": 0.93722801, "epoch": 2.550158394931362, "grad_norm": 13.94780445098877, "learning_rate": 3.1825479259524544e-06, "loss": 0.39173193, "memory(GiB)": 34.88, "step": 94185, "train_speed(iter/s)": 0.411297 }, { "acc": 0.94942875, "epoch": 2.550293775214578, "grad_norm": 4.74600076675415, "learning_rate": 3.1820266787635873e-06, "loss": 0.31924541, "memory(GiB)": 34.88, "step": 94190, "train_speed(iter/s)": 0.411299 }, { "acc": 0.94259224, "epoch": 2.550429155497793, "grad_norm": 6.705563545227051, "learning_rate": 3.181505454354672e-06, "loss": 0.32098069, "memory(GiB)": 34.88, "step": 94195, "train_speed(iter/s)": 0.4113 }, { "acc": 0.93958969, "epoch": 2.550564535781009, "grad_norm": 8.194110870361328, "learning_rate": 3.180984252732238e-06, "loss": 0.34929895, "memory(GiB)": 34.88, "step": 94200, "train_speed(iter/s)": 0.411301 }, { "acc": 0.93806973, "epoch": 2.5506999160642243, "grad_norm": 7.982722759246826, "learning_rate": 3.1804630739028165e-06, "loss": 0.3343643, "memory(GiB)": 34.88, "step": 94205, "train_speed(iter/s)": 0.411302 }, { "acc": 0.93792152, "epoch": 2.55083529634744, "grad_norm": 43.79466247558594, "learning_rate": 3.1799419178729294e-06, "loss": 0.36991057, "memory(GiB)": 34.88, "step": 94210, "train_speed(iter/s)": 0.411303 }, { "acc": 0.92863836, "epoch": 2.5509706766306555, "grad_norm": 2.71661114692688, "learning_rate": 3.179420784649113e-06, "loss": 0.40236826, "memory(GiB)": 34.88, "step": 94215, "train_speed(iter/s)": 0.411304 }, { "acc": 0.92080822, "epoch": 2.551106056913871, "grad_norm": 8.98998737335205, "learning_rate": 3.1788996742378897e-06, "loss": 0.48751183, "memory(GiB)": 34.88, "step": 94220, "train_speed(iter/s)": 0.411305 }, { "acc": 0.92325268, "epoch": 2.5512414371970866, "grad_norm": 8.107810020446777, "learning_rate": 3.178378586645792e-06, "loss": 0.33885355, "memory(GiB)": 34.88, "step": 94225, "train_speed(iter/s)": 0.411306 }, { "acc": 0.93695602, "epoch": 2.5513768174803024, "grad_norm": 6.315950393676758, "learning_rate": 3.1778575218793435e-06, "loss": 0.36812606, "memory(GiB)": 34.88, "step": 94230, "train_speed(iter/s)": 0.411308 }, { "acc": 0.9198802, "epoch": 2.551512197763518, "grad_norm": 11.945412635803223, "learning_rate": 3.1773364799450752e-06, "loss": 0.48477659, "memory(GiB)": 34.88, "step": 94235, "train_speed(iter/s)": 0.411309 }, { "acc": 0.9257884, "epoch": 2.551647578046733, "grad_norm": 9.145096778869629, "learning_rate": 3.1768154608495135e-06, "loss": 0.48325357, "memory(GiB)": 34.88, "step": 94240, "train_speed(iter/s)": 0.41131 }, { "acc": 0.9387373, "epoch": 2.551782958329949, "grad_norm": 8.736928939819336, "learning_rate": 3.1762944645991807e-06, "loss": 0.34951987, "memory(GiB)": 34.88, "step": 94245, "train_speed(iter/s)": 0.411311 }, { "acc": 0.94563904, "epoch": 2.5519183386131643, "grad_norm": 5.731316089630127, "learning_rate": 3.1757734912006107e-06, "loss": 0.32679796, "memory(GiB)": 34.88, "step": 94250, "train_speed(iter/s)": 0.411313 }, { "acc": 0.93568029, "epoch": 2.5520537188963797, "grad_norm": 7.096762180328369, "learning_rate": 3.175252540660321e-06, "loss": 0.37428839, "memory(GiB)": 34.88, "step": 94255, "train_speed(iter/s)": 0.411314 }, { "acc": 0.93206949, "epoch": 2.5521890991795955, "grad_norm": 5.971183776855469, "learning_rate": 3.1747316129848467e-06, "loss": 0.37805574, "memory(GiB)": 34.88, "step": 94260, "train_speed(iter/s)": 0.411315 }, { "acc": 0.92092323, "epoch": 2.5523244794628113, "grad_norm": 9.635387420654297, "learning_rate": 3.1742107081807068e-06, "loss": 0.43642144, "memory(GiB)": 34.88, "step": 94265, "train_speed(iter/s)": 0.411316 }, { "acc": 0.93664627, "epoch": 2.5524598597460266, "grad_norm": 4.854458808898926, "learning_rate": 3.1736898262544296e-06, "loss": 0.34378071, "memory(GiB)": 34.88, "step": 94270, "train_speed(iter/s)": 0.411317 }, { "acc": 0.93723955, "epoch": 2.552595240029242, "grad_norm": 7.889804840087891, "learning_rate": 3.1731689672125393e-06, "loss": 0.36110468, "memory(GiB)": 34.88, "step": 94275, "train_speed(iter/s)": 0.411319 }, { "acc": 0.93934498, "epoch": 2.552730620312458, "grad_norm": 6.502099514007568, "learning_rate": 3.1726481310615616e-06, "loss": 0.27833586, "memory(GiB)": 34.88, "step": 94280, "train_speed(iter/s)": 0.41132 }, { "acc": 0.93326569, "epoch": 2.552866000595673, "grad_norm": 7.897733688354492, "learning_rate": 3.1721273178080207e-06, "loss": 0.35962656, "memory(GiB)": 34.88, "step": 94285, "train_speed(iter/s)": 0.411321 }, { "acc": 0.92017221, "epoch": 2.553001380878889, "grad_norm": 8.001781463623047, "learning_rate": 3.1716065274584383e-06, "loss": 0.44314294, "memory(GiB)": 34.88, "step": 94290, "train_speed(iter/s)": 0.411322 }, { "acc": 0.93112297, "epoch": 2.5531367611621043, "grad_norm": 6.24441385269165, "learning_rate": 3.171085760019343e-06, "loss": 0.27452941, "memory(GiB)": 34.88, "step": 94295, "train_speed(iter/s)": 0.411323 }, { "acc": 0.93400812, "epoch": 2.55327214144532, "grad_norm": 5.197391986846924, "learning_rate": 3.1705650154972532e-06, "loss": 0.44157825, "memory(GiB)": 34.88, "step": 94300, "train_speed(iter/s)": 0.411324 }, { "acc": 0.91067801, "epoch": 2.5534075217285355, "grad_norm": 10.166557312011719, "learning_rate": 3.1700442938986962e-06, "loss": 0.51704388, "memory(GiB)": 34.88, "step": 94305, "train_speed(iter/s)": 0.411326 }, { "acc": 0.91330471, "epoch": 2.553542902011751, "grad_norm": 5.114752292633057, "learning_rate": 3.1695235952301935e-06, "loss": 0.53993359, "memory(GiB)": 34.88, "step": 94310, "train_speed(iter/s)": 0.411327 }, { "acc": 0.92626925, "epoch": 2.5536782822949666, "grad_norm": 5.06894063949585, "learning_rate": 3.1690029194982686e-06, "loss": 0.37889376, "memory(GiB)": 34.88, "step": 94315, "train_speed(iter/s)": 0.411328 }, { "acc": 0.93840628, "epoch": 2.553813662578182, "grad_norm": 12.677532196044922, "learning_rate": 3.1684822667094423e-06, "loss": 0.36495023, "memory(GiB)": 34.88, "step": 94320, "train_speed(iter/s)": 0.411329 }, { "acc": 0.92383404, "epoch": 2.553949042861398, "grad_norm": 6.177188873291016, "learning_rate": 3.1679616368702397e-06, "loss": 0.41158371, "memory(GiB)": 34.88, "step": 94325, "train_speed(iter/s)": 0.411331 }, { "acc": 0.93583984, "epoch": 2.554084423144613, "grad_norm": 7.972995281219482, "learning_rate": 3.1674410299871818e-06, "loss": 0.41095619, "memory(GiB)": 34.88, "step": 94330, "train_speed(iter/s)": 0.411332 }, { "acc": 0.91081123, "epoch": 2.554219803427829, "grad_norm": 3.655768394470215, "learning_rate": 3.1669204460667864e-06, "loss": 0.51903505, "memory(GiB)": 34.88, "step": 94335, "train_speed(iter/s)": 0.411333 }, { "acc": 0.92291203, "epoch": 2.5543551837110443, "grad_norm": 9.708246231079102, "learning_rate": 3.1663998851155795e-06, "loss": 0.5193841, "memory(GiB)": 34.88, "step": 94340, "train_speed(iter/s)": 0.411334 }, { "acc": 0.93474989, "epoch": 2.5544905639942597, "grad_norm": 3.590066909790039, "learning_rate": 3.16587934714008e-06, "loss": 0.30649776, "memory(GiB)": 34.88, "step": 94345, "train_speed(iter/s)": 0.411335 }, { "acc": 0.94392147, "epoch": 2.5546259442774755, "grad_norm": 9.184433937072754, "learning_rate": 3.1653588321468103e-06, "loss": 0.35574765, "memory(GiB)": 34.88, "step": 94350, "train_speed(iter/s)": 0.411336 }, { "acc": 0.9391077, "epoch": 2.554761324560691, "grad_norm": 8.866734504699707, "learning_rate": 3.1648383401422883e-06, "loss": 0.28896513, "memory(GiB)": 34.88, "step": 94355, "train_speed(iter/s)": 0.411338 }, { "acc": 0.94720039, "epoch": 2.5548967048439066, "grad_norm": 1.9612767696380615, "learning_rate": 3.1643178711330376e-06, "loss": 0.29543586, "memory(GiB)": 34.88, "step": 94360, "train_speed(iter/s)": 0.411338 }, { "acc": 0.93884201, "epoch": 2.555032085127122, "grad_norm": 7.650562286376953, "learning_rate": 3.163797425125576e-06, "loss": 0.28292732, "memory(GiB)": 34.88, "step": 94365, "train_speed(iter/s)": 0.41134 }, { "acc": 0.94076643, "epoch": 2.555167465410338, "grad_norm": 7.856894493103027, "learning_rate": 3.163277002126421e-06, "loss": 0.33719101, "memory(GiB)": 34.88, "step": 94370, "train_speed(iter/s)": 0.411341 }, { "acc": 0.93859825, "epoch": 2.555302845693553, "grad_norm": 4.444840908050537, "learning_rate": 3.1627566021420956e-06, "loss": 0.34596148, "memory(GiB)": 34.88, "step": 94375, "train_speed(iter/s)": 0.411342 }, { "acc": 0.94343624, "epoch": 2.5554382259767685, "grad_norm": 7.7922139167785645, "learning_rate": 3.1622362251791174e-06, "loss": 0.33600769, "memory(GiB)": 34.88, "step": 94380, "train_speed(iter/s)": 0.411343 }, { "acc": 0.922019, "epoch": 2.5555736062599843, "grad_norm": 8.670194625854492, "learning_rate": 3.1617158712440054e-06, "loss": 0.42305346, "memory(GiB)": 34.88, "step": 94385, "train_speed(iter/s)": 0.411344 }, { "acc": 0.93868475, "epoch": 2.5557089865432, "grad_norm": 9.92763614654541, "learning_rate": 3.1611955403432764e-06, "loss": 0.35737493, "memory(GiB)": 34.88, "step": 94390, "train_speed(iter/s)": 0.411345 }, { "acc": 0.93507385, "epoch": 2.5558443668264155, "grad_norm": 11.229838371276855, "learning_rate": 3.160675232483452e-06, "loss": 0.36619511, "memory(GiB)": 34.88, "step": 94395, "train_speed(iter/s)": 0.411347 }, { "acc": 0.9286828, "epoch": 2.555979747109631, "grad_norm": 18.418210983276367, "learning_rate": 3.160154947671046e-06, "loss": 0.40828943, "memory(GiB)": 34.88, "step": 94400, "train_speed(iter/s)": 0.411348 }, { "acc": 0.93075275, "epoch": 2.5561151273928466, "grad_norm": 13.069154739379883, "learning_rate": 3.15963468591258e-06, "loss": 0.45158157, "memory(GiB)": 34.88, "step": 94405, "train_speed(iter/s)": 0.411349 }, { "acc": 0.92090292, "epoch": 2.556250507676062, "grad_norm": 11.026311874389648, "learning_rate": 3.159114447214569e-06, "loss": 0.42907696, "memory(GiB)": 34.88, "step": 94410, "train_speed(iter/s)": 0.41135 }, { "acc": 0.9405098, "epoch": 2.5563858879592773, "grad_norm": 9.876321792602539, "learning_rate": 3.15859423158353e-06, "loss": 0.41826925, "memory(GiB)": 34.88, "step": 94415, "train_speed(iter/s)": 0.411351 }, { "acc": 0.93049326, "epoch": 2.556521268242493, "grad_norm": 7.835499286651611, "learning_rate": 3.1580740390259796e-06, "loss": 0.43503232, "memory(GiB)": 34.88, "step": 94420, "train_speed(iter/s)": 0.411353 }, { "acc": 0.92559023, "epoch": 2.556656648525709, "grad_norm": 10.969868659973145, "learning_rate": 3.1575538695484347e-06, "loss": 0.40390415, "memory(GiB)": 34.88, "step": 94425, "train_speed(iter/s)": 0.411354 }, { "acc": 0.92074118, "epoch": 2.5567920288089243, "grad_norm": 6.309839248657227, "learning_rate": 3.1570337231574127e-06, "loss": 0.41456904, "memory(GiB)": 34.88, "step": 94430, "train_speed(iter/s)": 0.411355 }, { "acc": 0.90991592, "epoch": 2.5569274090921397, "grad_norm": 9.267011642456055, "learning_rate": 3.1565135998594264e-06, "loss": 0.53416595, "memory(GiB)": 34.88, "step": 94435, "train_speed(iter/s)": 0.411356 }, { "acc": 0.94289055, "epoch": 2.5570627893753555, "grad_norm": 5.9848222732543945, "learning_rate": 3.1559934996609946e-06, "loss": 0.30126181, "memory(GiB)": 34.88, "step": 94440, "train_speed(iter/s)": 0.411357 }, { "acc": 0.93548784, "epoch": 2.557198169658571, "grad_norm": 4.861409664154053, "learning_rate": 3.15547342256863e-06, "loss": 0.37081671, "memory(GiB)": 34.88, "step": 94445, "train_speed(iter/s)": 0.411358 }, { "acc": 0.94444447, "epoch": 2.5573335499417866, "grad_norm": 3.8110899925231934, "learning_rate": 3.1549533685888502e-06, "loss": 0.29710689, "memory(GiB)": 34.88, "step": 94450, "train_speed(iter/s)": 0.411359 }, { "acc": 0.93096628, "epoch": 2.557468930225002, "grad_norm": 11.224067687988281, "learning_rate": 3.1544333377281683e-06, "loss": 0.36794519, "memory(GiB)": 34.88, "step": 94455, "train_speed(iter/s)": 0.41136 }, { "acc": 0.9291502, "epoch": 2.557604310508218, "grad_norm": 6.08731746673584, "learning_rate": 3.153913329993098e-06, "loss": 0.40064211, "memory(GiB)": 34.88, "step": 94460, "train_speed(iter/s)": 0.411362 }, { "acc": 0.92298021, "epoch": 2.557739690791433, "grad_norm": 2.5200397968292236, "learning_rate": 3.1533933453901557e-06, "loss": 0.5019824, "memory(GiB)": 34.88, "step": 94465, "train_speed(iter/s)": 0.411363 }, { "acc": 0.91561613, "epoch": 2.5578750710746485, "grad_norm": 7.787918567657471, "learning_rate": 3.152873383925852e-06, "loss": 0.42055669, "memory(GiB)": 34.88, "step": 94470, "train_speed(iter/s)": 0.411364 }, { "acc": 0.93137379, "epoch": 2.5580104513578643, "grad_norm": 8.683953285217285, "learning_rate": 3.1523534456067027e-06, "loss": 0.41081095, "memory(GiB)": 34.88, "step": 94475, "train_speed(iter/s)": 0.411365 }, { "acc": 0.93766661, "epoch": 2.5581458316410797, "grad_norm": 16.755836486816406, "learning_rate": 3.1518335304392204e-06, "loss": 0.41578321, "memory(GiB)": 34.88, "step": 94480, "train_speed(iter/s)": 0.411366 }, { "acc": 0.92309399, "epoch": 2.5582812119242955, "grad_norm": 8.955488204956055, "learning_rate": 3.151313638429919e-06, "loss": 0.40470281, "memory(GiB)": 34.88, "step": 94485, "train_speed(iter/s)": 0.411368 }, { "acc": 0.9183012, "epoch": 2.558416592207511, "grad_norm": 11.991433143615723, "learning_rate": 3.150793769585311e-06, "loss": 0.47532129, "memory(GiB)": 34.88, "step": 94490, "train_speed(iter/s)": 0.411369 }, { "acc": 0.9244339, "epoch": 2.5585519724907266, "grad_norm": 6.78927755355835, "learning_rate": 3.150273923911906e-06, "loss": 0.52627592, "memory(GiB)": 34.88, "step": 94495, "train_speed(iter/s)": 0.41137 }, { "acc": 0.91763821, "epoch": 2.558687352773942, "grad_norm": 4.889894485473633, "learning_rate": 3.1497541014162207e-06, "loss": 0.45176988, "memory(GiB)": 34.88, "step": 94500, "train_speed(iter/s)": 0.411371 }, { "acc": 0.93055763, "epoch": 2.5588227330571574, "grad_norm": 14.926877975463867, "learning_rate": 3.1492343021047627e-06, "loss": 0.37940979, "memory(GiB)": 34.88, "step": 94505, "train_speed(iter/s)": 0.411372 }, { "acc": 0.9267252, "epoch": 2.558958113340373, "grad_norm": 11.281885147094727, "learning_rate": 3.148714525984047e-06, "loss": 0.3813767, "memory(GiB)": 34.88, "step": 94510, "train_speed(iter/s)": 0.411373 }, { "acc": 0.93804655, "epoch": 2.5590934936235885, "grad_norm": 5.232918739318848, "learning_rate": 3.1481947730605813e-06, "loss": 0.38076146, "memory(GiB)": 34.88, "step": 94515, "train_speed(iter/s)": 0.411375 }, { "acc": 0.92695732, "epoch": 2.5592288739068043, "grad_norm": 10.016961097717285, "learning_rate": 3.1476750433408792e-06, "loss": 0.40944982, "memory(GiB)": 34.88, "step": 94520, "train_speed(iter/s)": 0.411376 }, { "acc": 0.9218214, "epoch": 2.5593642541900197, "grad_norm": 6.4525933265686035, "learning_rate": 3.14715533683145e-06, "loss": 0.40951719, "memory(GiB)": 34.88, "step": 94525, "train_speed(iter/s)": 0.411377 }, { "acc": 0.92807713, "epoch": 2.5594996344732355, "grad_norm": 6.157740592956543, "learning_rate": 3.1466356535388054e-06, "loss": 0.3960515, "memory(GiB)": 34.88, "step": 94530, "train_speed(iter/s)": 0.411378 }, { "acc": 0.92041245, "epoch": 2.559635014756451, "grad_norm": 3.9849371910095215, "learning_rate": 3.146115993469455e-06, "loss": 0.41163054, "memory(GiB)": 34.88, "step": 94535, "train_speed(iter/s)": 0.411379 }, { "acc": 0.92654705, "epoch": 2.559770395039666, "grad_norm": 10.449682235717773, "learning_rate": 3.145596356629907e-06, "loss": 0.39799037, "memory(GiB)": 34.88, "step": 94540, "train_speed(iter/s)": 0.41138 }, { "acc": 0.93492374, "epoch": 2.559905775322882, "grad_norm": 5.853155612945557, "learning_rate": 3.145076743026672e-06, "loss": 0.31840868, "memory(GiB)": 34.88, "step": 94545, "train_speed(iter/s)": 0.411382 }, { "acc": 0.92462673, "epoch": 2.560041155606098, "grad_norm": 6.029699802398682, "learning_rate": 3.1445571526662595e-06, "loss": 0.47108793, "memory(GiB)": 34.88, "step": 94550, "train_speed(iter/s)": 0.411383 }, { "acc": 0.93675756, "epoch": 2.560176535889313, "grad_norm": 5.449054718017578, "learning_rate": 3.144037585555178e-06, "loss": 0.31396482, "memory(GiB)": 34.88, "step": 94555, "train_speed(iter/s)": 0.411384 }, { "acc": 0.93012056, "epoch": 2.5603119161725285, "grad_norm": 7.730164051055908, "learning_rate": 3.1435180416999357e-06, "loss": 0.42179642, "memory(GiB)": 34.88, "step": 94560, "train_speed(iter/s)": 0.411385 }, { "acc": 0.92579317, "epoch": 2.5604472964557443, "grad_norm": 12.424932479858398, "learning_rate": 3.1429985211070424e-06, "loss": 0.51337709, "memory(GiB)": 34.88, "step": 94565, "train_speed(iter/s)": 0.411386 }, { "acc": 0.94321995, "epoch": 2.5605826767389597, "grad_norm": 14.525492668151855, "learning_rate": 3.1424790237830034e-06, "loss": 0.26301847, "memory(GiB)": 34.88, "step": 94570, "train_speed(iter/s)": 0.411388 }, { "acc": 0.91800251, "epoch": 2.560718057022175, "grad_norm": 7.090066909790039, "learning_rate": 3.141959549734331e-06, "loss": 0.48100758, "memory(GiB)": 34.88, "step": 94575, "train_speed(iter/s)": 0.411389 }, { "acc": 0.9301589, "epoch": 2.560853437305391, "grad_norm": 4.061208724975586, "learning_rate": 3.141440098967528e-06, "loss": 0.43171768, "memory(GiB)": 34.88, "step": 94580, "train_speed(iter/s)": 0.41139 }, { "acc": 0.94325676, "epoch": 2.5609888175886066, "grad_norm": 8.842731475830078, "learning_rate": 3.1409206714891035e-06, "loss": 0.30941844, "memory(GiB)": 34.88, "step": 94585, "train_speed(iter/s)": 0.411391 }, { "acc": 0.92531319, "epoch": 2.561124197871822, "grad_norm": 6.7375593185424805, "learning_rate": 3.140401267305564e-06, "loss": 0.37602756, "memory(GiB)": 34.88, "step": 94590, "train_speed(iter/s)": 0.411392 }, { "acc": 0.93467827, "epoch": 2.5612595781550374, "grad_norm": 4.106562614440918, "learning_rate": 3.139881886423416e-06, "loss": 0.27820253, "memory(GiB)": 34.88, "step": 94595, "train_speed(iter/s)": 0.411393 }, { "acc": 0.94305563, "epoch": 2.561394958438253, "grad_norm": 6.092535495758057, "learning_rate": 3.139362528849167e-06, "loss": 0.39091132, "memory(GiB)": 34.88, "step": 94600, "train_speed(iter/s)": 0.411394 }, { "acc": 0.93378448, "epoch": 2.5615303387214685, "grad_norm": 15.528390884399414, "learning_rate": 3.1388431945893195e-06, "loss": 0.39404111, "memory(GiB)": 34.88, "step": 94605, "train_speed(iter/s)": 0.411395 }, { "acc": 0.92596788, "epoch": 2.5616657190046843, "grad_norm": 5.50469446182251, "learning_rate": 3.1383238836503845e-06, "loss": 0.42343245, "memory(GiB)": 34.88, "step": 94610, "train_speed(iter/s)": 0.411396 }, { "acc": 0.93547478, "epoch": 2.5618010992878997, "grad_norm": 4.765520095825195, "learning_rate": 3.1378045960388644e-06, "loss": 0.32272532, "memory(GiB)": 34.88, "step": 94615, "train_speed(iter/s)": 0.411398 }, { "acc": 0.91361217, "epoch": 2.5619364795711155, "grad_norm": 11.200407028198242, "learning_rate": 3.1372853317612627e-06, "loss": 0.48354654, "memory(GiB)": 34.88, "step": 94620, "train_speed(iter/s)": 0.411399 }, { "acc": 0.93653374, "epoch": 2.562071859854331, "grad_norm": 5.4848151206970215, "learning_rate": 3.136766090824087e-06, "loss": 0.33511953, "memory(GiB)": 34.88, "step": 94625, "train_speed(iter/s)": 0.4114 }, { "acc": 0.93611088, "epoch": 2.562207240137546, "grad_norm": 5.568665504455566, "learning_rate": 3.13624687323384e-06, "loss": 0.36613169, "memory(GiB)": 34.88, "step": 94630, "train_speed(iter/s)": 0.411401 }, { "acc": 0.92145519, "epoch": 2.562342620420762, "grad_norm": 7.541163444519043, "learning_rate": 3.1357276789970274e-06, "loss": 0.43619914, "memory(GiB)": 34.88, "step": 94635, "train_speed(iter/s)": 0.411402 }, { "acc": 0.91395121, "epoch": 2.5624780007039774, "grad_norm": 20.13169288635254, "learning_rate": 3.1352085081201516e-06, "loss": 0.41983266, "memory(GiB)": 34.88, "step": 94640, "train_speed(iter/s)": 0.411403 }, { "acc": 0.93897514, "epoch": 2.562613380987193, "grad_norm": 5.498512268066406, "learning_rate": 3.134689360609718e-06, "loss": 0.34009919, "memory(GiB)": 34.88, "step": 94645, "train_speed(iter/s)": 0.411405 }, { "acc": 0.92964964, "epoch": 2.5627487612704085, "grad_norm": 21.575908660888672, "learning_rate": 3.1341702364722264e-06, "loss": 0.41497326, "memory(GiB)": 34.88, "step": 94650, "train_speed(iter/s)": 0.411406 }, { "acc": 0.93490543, "epoch": 2.5628841415536243, "grad_norm": 10.481834411621094, "learning_rate": 3.1336511357141865e-06, "loss": 0.3545501, "memory(GiB)": 34.88, "step": 94655, "train_speed(iter/s)": 0.411407 }, { "acc": 0.9197854, "epoch": 2.5630195218368397, "grad_norm": 20.68280792236328, "learning_rate": 3.1331320583420947e-06, "loss": 0.43888321, "memory(GiB)": 34.88, "step": 94660, "train_speed(iter/s)": 0.411408 }, { "acc": 0.93047428, "epoch": 2.563154902120055, "grad_norm": 4.796938419342041, "learning_rate": 3.1326130043624564e-06, "loss": 0.35473428, "memory(GiB)": 34.88, "step": 94665, "train_speed(iter/s)": 0.411409 }, { "acc": 0.92344236, "epoch": 2.563290282403271, "grad_norm": 30.79435157775879, "learning_rate": 3.132093973781773e-06, "loss": 0.48026538, "memory(GiB)": 34.88, "step": 94670, "train_speed(iter/s)": 0.41141 }, { "acc": 0.93665953, "epoch": 2.563425662686486, "grad_norm": 10.465510368347168, "learning_rate": 3.1315749666065453e-06, "loss": 0.37236214, "memory(GiB)": 34.88, "step": 94675, "train_speed(iter/s)": 0.411412 }, { "acc": 0.92783489, "epoch": 2.563561042969702, "grad_norm": 4.62612771987915, "learning_rate": 3.1310559828432774e-06, "loss": 0.44970918, "memory(GiB)": 34.88, "step": 94680, "train_speed(iter/s)": 0.411413 }, { "acc": 0.92064543, "epoch": 2.5636964232529174, "grad_norm": 62.17148971557617, "learning_rate": 3.1305370224984676e-06, "loss": 0.45434217, "memory(GiB)": 34.88, "step": 94685, "train_speed(iter/s)": 0.411414 }, { "acc": 0.91787939, "epoch": 2.563831803536133, "grad_norm": 5.6564130783081055, "learning_rate": 3.1300180855786213e-06, "loss": 0.49792013, "memory(GiB)": 34.88, "step": 94690, "train_speed(iter/s)": 0.411415 }, { "acc": 0.92733269, "epoch": 2.5639671838193485, "grad_norm": 7.514336585998535, "learning_rate": 3.1294991720902334e-06, "loss": 0.40546336, "memory(GiB)": 34.88, "step": 94695, "train_speed(iter/s)": 0.411416 }, { "acc": 0.927491, "epoch": 2.564102564102564, "grad_norm": 7.004718780517578, "learning_rate": 3.12898028203981e-06, "loss": 0.42085099, "memory(GiB)": 34.88, "step": 94700, "train_speed(iter/s)": 0.411417 }, { "acc": 0.9390276, "epoch": 2.5642379443857797, "grad_norm": 3.8518006801605225, "learning_rate": 3.1284614154338483e-06, "loss": 0.35941751, "memory(GiB)": 34.88, "step": 94705, "train_speed(iter/s)": 0.411418 }, { "acc": 0.92045765, "epoch": 2.5643733246689955, "grad_norm": 10.29192066192627, "learning_rate": 3.127942572278847e-06, "loss": 0.4558763, "memory(GiB)": 34.88, "step": 94710, "train_speed(iter/s)": 0.411419 }, { "acc": 0.94732866, "epoch": 2.564508704952211, "grad_norm": 5.983648300170898, "learning_rate": 3.1274237525813082e-06, "loss": 0.30839181, "memory(GiB)": 34.88, "step": 94715, "train_speed(iter/s)": 0.411421 }, { "acc": 0.92150383, "epoch": 2.564644085235426, "grad_norm": 2.9482181072235107, "learning_rate": 3.1269049563477276e-06, "loss": 0.44187088, "memory(GiB)": 34.88, "step": 94720, "train_speed(iter/s)": 0.411422 }, { "acc": 0.92481995, "epoch": 2.564779465518642, "grad_norm": 5.27713680267334, "learning_rate": 3.1263861835846105e-06, "loss": 0.37415333, "memory(GiB)": 34.88, "step": 94725, "train_speed(iter/s)": 0.411423 }, { "acc": 0.93829145, "epoch": 2.5649148458018574, "grad_norm": 13.081374168395996, "learning_rate": 3.1258674342984472e-06, "loss": 0.28753686, "memory(GiB)": 34.88, "step": 94730, "train_speed(iter/s)": 0.411424 }, { "acc": 0.94630375, "epoch": 2.5650502260850727, "grad_norm": 7.649958610534668, "learning_rate": 3.1253487084957447e-06, "loss": 0.357252, "memory(GiB)": 34.88, "step": 94735, "train_speed(iter/s)": 0.411425 }, { "acc": 0.92848463, "epoch": 2.5651856063682885, "grad_norm": 7.147651672363281, "learning_rate": 3.1248300061829933e-06, "loss": 0.38857343, "memory(GiB)": 34.88, "step": 94740, "train_speed(iter/s)": 0.411427 }, { "acc": 0.94759159, "epoch": 2.5653209866515043, "grad_norm": 8.333534240722656, "learning_rate": 3.124311327366697e-06, "loss": 0.29231448, "memory(GiB)": 34.88, "step": 94745, "train_speed(iter/s)": 0.411428 }, { "acc": 0.91522961, "epoch": 2.5654563669347197, "grad_norm": 14.04869270324707, "learning_rate": 3.12379267205335e-06, "loss": 0.51046944, "memory(GiB)": 34.88, "step": 94750, "train_speed(iter/s)": 0.411429 }, { "acc": 0.92380848, "epoch": 2.565591747217935, "grad_norm": 8.588003158569336, "learning_rate": 3.123274040249447e-06, "loss": 0.39004843, "memory(GiB)": 34.88, "step": 94755, "train_speed(iter/s)": 0.41143 }, { "acc": 0.92723961, "epoch": 2.565727127501151, "grad_norm": 10.654952049255371, "learning_rate": 3.1227554319614927e-06, "loss": 0.37014766, "memory(GiB)": 34.88, "step": 94760, "train_speed(iter/s)": 0.411431 }, { "acc": 0.93108292, "epoch": 2.565862507784366, "grad_norm": 12.540481567382812, "learning_rate": 3.1222368471959745e-06, "loss": 0.3707906, "memory(GiB)": 34.88, "step": 94765, "train_speed(iter/s)": 0.411432 }, { "acc": 0.92647009, "epoch": 2.565997888067582, "grad_norm": 6.326280117034912, "learning_rate": 3.1217182859593964e-06, "loss": 0.39991088, "memory(GiB)": 34.88, "step": 94770, "train_speed(iter/s)": 0.411433 }, { "acc": 0.93899279, "epoch": 2.5661332683507974, "grad_norm": 8.145088195800781, "learning_rate": 3.121199748258248e-06, "loss": 0.35163927, "memory(GiB)": 34.88, "step": 94775, "train_speed(iter/s)": 0.411434 }, { "acc": 0.91520662, "epoch": 2.566268648634013, "grad_norm": 19.900163650512695, "learning_rate": 3.1206812340990312e-06, "loss": 0.46420808, "memory(GiB)": 34.88, "step": 94780, "train_speed(iter/s)": 0.411436 }, { "acc": 0.94268322, "epoch": 2.5664040289172285, "grad_norm": 9.743640899658203, "learning_rate": 3.1201627434882377e-06, "loss": 0.35645409, "memory(GiB)": 34.88, "step": 94785, "train_speed(iter/s)": 0.411437 }, { "acc": 0.93936615, "epoch": 2.566539409200444, "grad_norm": 11.095318794250488, "learning_rate": 3.11964427643236e-06, "loss": 0.35283947, "memory(GiB)": 34.88, "step": 94790, "train_speed(iter/s)": 0.411438 }, { "acc": 0.94037476, "epoch": 2.5666747894836597, "grad_norm": 7.691071510314941, "learning_rate": 3.1191258329379006e-06, "loss": 0.28977847, "memory(GiB)": 34.88, "step": 94795, "train_speed(iter/s)": 0.411439 }, { "acc": 0.92457161, "epoch": 2.566810169766875, "grad_norm": 9.575002670288086, "learning_rate": 3.1186074130113454e-06, "loss": 0.43091564, "memory(GiB)": 34.88, "step": 94800, "train_speed(iter/s)": 0.41144 }, { "acc": 0.94188166, "epoch": 2.566945550050091, "grad_norm": 6.1121110916137695, "learning_rate": 3.118089016659196e-06, "loss": 0.35793483, "memory(GiB)": 34.88, "step": 94805, "train_speed(iter/s)": 0.411441 }, { "acc": 0.91063061, "epoch": 2.567080930333306, "grad_norm": 12.367293357849121, "learning_rate": 3.1175706438879403e-06, "loss": 0.47381511, "memory(GiB)": 34.88, "step": 94810, "train_speed(iter/s)": 0.411442 }, { "acc": 0.92305708, "epoch": 2.567216310616522, "grad_norm": 7.790286540985107, "learning_rate": 3.1170522947040776e-06, "loss": 0.40225754, "memory(GiB)": 34.88, "step": 94815, "train_speed(iter/s)": 0.411443 }, { "acc": 0.92617283, "epoch": 2.5673516908997374, "grad_norm": 6.518309593200684, "learning_rate": 3.1165339691140954e-06, "loss": 0.45119171, "memory(GiB)": 34.88, "step": 94820, "train_speed(iter/s)": 0.411445 }, { "acc": 0.94055634, "epoch": 2.5674870711829527, "grad_norm": 6.965823173522949, "learning_rate": 3.1160156671244923e-06, "loss": 0.29023561, "memory(GiB)": 34.88, "step": 94825, "train_speed(iter/s)": 0.411446 }, { "acc": 0.92805824, "epoch": 2.5676224514661685, "grad_norm": 12.162972450256348, "learning_rate": 3.1154973887417585e-06, "loss": 0.43289762, "memory(GiB)": 34.88, "step": 94830, "train_speed(iter/s)": 0.411447 }, { "acc": 0.93851824, "epoch": 2.567757831749384, "grad_norm": 6.661795139312744, "learning_rate": 3.114979133972384e-06, "loss": 0.3814394, "memory(GiB)": 34.88, "step": 94835, "train_speed(iter/s)": 0.411448 }, { "acc": 0.94526958, "epoch": 2.5678932120325997, "grad_norm": 4.069313049316406, "learning_rate": 3.114460902822867e-06, "loss": 0.29132957, "memory(GiB)": 34.88, "step": 94840, "train_speed(iter/s)": 0.411449 }, { "acc": 0.93047848, "epoch": 2.568028592315815, "grad_norm": 11.443397521972656, "learning_rate": 3.1139426952996908e-06, "loss": 0.38978539, "memory(GiB)": 34.88, "step": 94845, "train_speed(iter/s)": 0.41145 }, { "acc": 0.92517862, "epoch": 2.568163972599031, "grad_norm": 3.077221632003784, "learning_rate": 3.113424511409357e-06, "loss": 0.38040681, "memory(GiB)": 34.88, "step": 94850, "train_speed(iter/s)": 0.411452 }, { "acc": 0.94095325, "epoch": 2.568299352882246, "grad_norm": 12.941804885864258, "learning_rate": 3.1129063511583475e-06, "loss": 0.36049843, "memory(GiB)": 34.88, "step": 94855, "train_speed(iter/s)": 0.411453 }, { "acc": 0.93855944, "epoch": 2.5684347331654616, "grad_norm": 6.934634685516357, "learning_rate": 3.112388214553161e-06, "loss": 0.327352, "memory(GiB)": 34.88, "step": 94860, "train_speed(iter/s)": 0.411454 }, { "acc": 0.93277321, "epoch": 2.5685701134486774, "grad_norm": 7.764969348907471, "learning_rate": 3.1118701016002817e-06, "loss": 0.38452573, "memory(GiB)": 34.88, "step": 94865, "train_speed(iter/s)": 0.411455 }, { "acc": 0.92692719, "epoch": 2.568705493731893, "grad_norm": 10.091273307800293, "learning_rate": 3.1113520123062053e-06, "loss": 0.39684391, "memory(GiB)": 34.88, "step": 94870, "train_speed(iter/s)": 0.411456 }, { "acc": 0.92852011, "epoch": 2.5688408740151085, "grad_norm": 8.258753776550293, "learning_rate": 3.1108339466774206e-06, "loss": 0.37340651, "memory(GiB)": 34.88, "step": 94875, "train_speed(iter/s)": 0.411457 }, { "acc": 0.91721449, "epoch": 2.568976254298324, "grad_norm": 9.078991889953613, "learning_rate": 3.1103159047204127e-06, "loss": 0.46555114, "memory(GiB)": 34.88, "step": 94880, "train_speed(iter/s)": 0.411458 }, { "acc": 0.92753677, "epoch": 2.5691116345815397, "grad_norm": 8.755276679992676, "learning_rate": 3.109797886441678e-06, "loss": 0.40692396, "memory(GiB)": 34.88, "step": 94885, "train_speed(iter/s)": 0.41146 }, { "acc": 0.93915424, "epoch": 2.569247014864755, "grad_norm": 7.461318492889404, "learning_rate": 3.1092798918477e-06, "loss": 0.33291039, "memory(GiB)": 34.88, "step": 94890, "train_speed(iter/s)": 0.411461 }, { "acc": 0.93400097, "epoch": 2.5693823951479704, "grad_norm": 3.8181657791137695, "learning_rate": 3.1087619209449725e-06, "loss": 0.28588743, "memory(GiB)": 34.88, "step": 94895, "train_speed(iter/s)": 0.411462 }, { "acc": 0.94833517, "epoch": 2.569517775431186, "grad_norm": 8.152836799621582, "learning_rate": 3.1082439737399793e-06, "loss": 0.26474543, "memory(GiB)": 34.88, "step": 94900, "train_speed(iter/s)": 0.411463 }, { "acc": 0.92480698, "epoch": 2.569653155714402, "grad_norm": 6.240659236907959, "learning_rate": 3.1077260502392137e-06, "loss": 0.39289289, "memory(GiB)": 34.88, "step": 94905, "train_speed(iter/s)": 0.411464 }, { "acc": 0.93127041, "epoch": 2.5697885359976174, "grad_norm": 6.476590156555176, "learning_rate": 3.10720815044916e-06, "loss": 0.38138571, "memory(GiB)": 34.88, "step": 94910, "train_speed(iter/s)": 0.411465 }, { "acc": 0.92494259, "epoch": 2.5699239162808327, "grad_norm": 4.6783905029296875, "learning_rate": 3.106690274376306e-06, "loss": 0.40734024, "memory(GiB)": 34.88, "step": 94915, "train_speed(iter/s)": 0.411467 }, { "acc": 0.93535557, "epoch": 2.5700592965640485, "grad_norm": 11.878337860107422, "learning_rate": 3.106172422027143e-06, "loss": 0.36013262, "memory(GiB)": 34.88, "step": 94920, "train_speed(iter/s)": 0.411468 }, { "acc": 0.92619324, "epoch": 2.570194676847264, "grad_norm": 9.13138198852539, "learning_rate": 3.105654593408151e-06, "loss": 0.43314781, "memory(GiB)": 34.88, "step": 94925, "train_speed(iter/s)": 0.411469 }, { "acc": 0.93249207, "epoch": 2.5703300571304792, "grad_norm": 6.289922714233398, "learning_rate": 3.1051367885258255e-06, "loss": 0.36643922, "memory(GiB)": 34.88, "step": 94930, "train_speed(iter/s)": 0.41147 }, { "acc": 0.90902948, "epoch": 2.570465437413695, "grad_norm": 9.972725868225098, "learning_rate": 3.1046190073866446e-06, "loss": 0.54382277, "memory(GiB)": 34.88, "step": 94935, "train_speed(iter/s)": 0.411471 }, { "acc": 0.91898146, "epoch": 2.570600817696911, "grad_norm": 13.26032829284668, "learning_rate": 3.104101249997101e-06, "loss": 0.53064594, "memory(GiB)": 34.88, "step": 94940, "train_speed(iter/s)": 0.411472 }, { "acc": 0.94400024, "epoch": 2.570736197980126, "grad_norm": 7.169956684112549, "learning_rate": 3.1035835163636767e-06, "loss": 0.35666285, "memory(GiB)": 34.88, "step": 94945, "train_speed(iter/s)": 0.411474 }, { "acc": 0.94205418, "epoch": 2.5708715782633416, "grad_norm": 6.263045310974121, "learning_rate": 3.1030658064928603e-06, "loss": 0.37474594, "memory(GiB)": 34.88, "step": 94950, "train_speed(iter/s)": 0.411475 }, { "acc": 0.92623177, "epoch": 2.5710069585465574, "grad_norm": 5.598337173461914, "learning_rate": 3.1025481203911365e-06, "loss": 0.41128693, "memory(GiB)": 34.88, "step": 94955, "train_speed(iter/s)": 0.411476 }, { "acc": 0.93233871, "epoch": 2.5711423388297727, "grad_norm": 5.616158485412598, "learning_rate": 3.1020304580649863e-06, "loss": 0.3547647, "memory(GiB)": 34.88, "step": 94960, "train_speed(iter/s)": 0.411477 }, { "acc": 0.92080021, "epoch": 2.5712777191129885, "grad_norm": 8.762577056884766, "learning_rate": 3.1015128195209006e-06, "loss": 0.45583124, "memory(GiB)": 34.88, "step": 94965, "train_speed(iter/s)": 0.411478 }, { "acc": 0.93133621, "epoch": 2.571413099396204, "grad_norm": 10.492204666137695, "learning_rate": 3.100995204765358e-06, "loss": 0.33539886, "memory(GiB)": 34.88, "step": 94970, "train_speed(iter/s)": 0.411479 }, { "acc": 0.93201103, "epoch": 2.5715484796794197, "grad_norm": 3.9514036178588867, "learning_rate": 3.1004776138048475e-06, "loss": 0.38418884, "memory(GiB)": 34.88, "step": 94975, "train_speed(iter/s)": 0.41148 }, { "acc": 0.92675858, "epoch": 2.571683859962635, "grad_norm": 4.579909801483154, "learning_rate": 3.0999600466458496e-06, "loss": 0.40059543, "memory(GiB)": 34.88, "step": 94980, "train_speed(iter/s)": 0.411481 }, { "acc": 0.92738972, "epoch": 2.5718192402458504, "grad_norm": 5.766902446746826, "learning_rate": 3.09944250329485e-06, "loss": 0.36798613, "memory(GiB)": 34.88, "step": 94985, "train_speed(iter/s)": 0.411482 }, { "acc": 0.93837013, "epoch": 2.571954620529066, "grad_norm": 21.204809188842773, "learning_rate": 3.0989249837583306e-06, "loss": 0.35446863, "memory(GiB)": 34.88, "step": 94990, "train_speed(iter/s)": 0.411484 }, { "acc": 0.92234612, "epoch": 2.5720900008122816, "grad_norm": 10.972248077392578, "learning_rate": 3.0984074880427763e-06, "loss": 0.40942831, "memory(GiB)": 34.88, "step": 94995, "train_speed(iter/s)": 0.411485 }, { "acc": 0.94087906, "epoch": 2.5722253810954974, "grad_norm": 3.988112688064575, "learning_rate": 3.097890016154668e-06, "loss": 0.32634306, "memory(GiB)": 34.88, "step": 95000, "train_speed(iter/s)": 0.411486 }, { "acc": 0.94353695, "epoch": 2.5723607613787127, "grad_norm": 8.431045532226562, "learning_rate": 3.0973725681004863e-06, "loss": 0.40100379, "memory(GiB)": 34.88, "step": 95005, "train_speed(iter/s)": 0.411487 }, { "acc": 0.9340661, "epoch": 2.5724961416619285, "grad_norm": 5.148039817810059, "learning_rate": 3.0968551438867177e-06, "loss": 0.34109325, "memory(GiB)": 34.88, "step": 95010, "train_speed(iter/s)": 0.411488 }, { "acc": 0.93057384, "epoch": 2.572631521945144, "grad_norm": 8.549177169799805, "learning_rate": 3.0963377435198403e-06, "loss": 0.39612112, "memory(GiB)": 34.88, "step": 95015, "train_speed(iter/s)": 0.411489 }, { "acc": 0.92493038, "epoch": 2.5727669022283592, "grad_norm": 10.851940155029297, "learning_rate": 3.0958203670063374e-06, "loss": 0.40059433, "memory(GiB)": 34.88, "step": 95020, "train_speed(iter/s)": 0.411491 }, { "acc": 0.91565075, "epoch": 2.572902282511575, "grad_norm": 14.979216575622559, "learning_rate": 3.0953030143526884e-06, "loss": 0.43460093, "memory(GiB)": 34.88, "step": 95025, "train_speed(iter/s)": 0.411492 }, { "acc": 0.93210402, "epoch": 2.5730376627947904, "grad_norm": 8.103344917297363, "learning_rate": 3.094785685565377e-06, "loss": 0.43324914, "memory(GiB)": 34.88, "step": 95030, "train_speed(iter/s)": 0.411493 }, { "acc": 0.93032789, "epoch": 2.573173043078006, "grad_norm": 8.52029037475586, "learning_rate": 3.0942683806508832e-06, "loss": 0.40271688, "memory(GiB)": 34.88, "step": 95035, "train_speed(iter/s)": 0.411494 }, { "acc": 0.93197861, "epoch": 2.5733084233612216, "grad_norm": 17.12192153930664, "learning_rate": 3.093751099615683e-06, "loss": 0.38115327, "memory(GiB)": 34.88, "step": 95040, "train_speed(iter/s)": 0.411495 }, { "acc": 0.93047705, "epoch": 2.5734438036444374, "grad_norm": 11.288269996643066, "learning_rate": 3.0932338424662616e-06, "loss": 0.40219021, "memory(GiB)": 34.88, "step": 95045, "train_speed(iter/s)": 0.411496 }, { "acc": 0.92211571, "epoch": 2.5735791839276527, "grad_norm": 55.21123504638672, "learning_rate": 3.0927166092090944e-06, "loss": 0.4636919, "memory(GiB)": 34.88, "step": 95050, "train_speed(iter/s)": 0.411497 }, { "acc": 0.9444067, "epoch": 2.573714564210868, "grad_norm": 6.5228118896484375, "learning_rate": 3.092199399850665e-06, "loss": 0.35513062, "memory(GiB)": 34.88, "step": 95055, "train_speed(iter/s)": 0.411499 }, { "acc": 0.92554226, "epoch": 2.573849944494084, "grad_norm": 4.347658157348633, "learning_rate": 3.0916822143974495e-06, "loss": 0.40187345, "memory(GiB)": 34.88, "step": 95060, "train_speed(iter/s)": 0.4115 }, { "acc": 0.92641687, "epoch": 2.5739853247772997, "grad_norm": 5.953907012939453, "learning_rate": 3.0911650528559285e-06, "loss": 0.42478719, "memory(GiB)": 34.88, "step": 95065, "train_speed(iter/s)": 0.411501 }, { "acc": 0.92702904, "epoch": 2.574120705060515, "grad_norm": 7.505028247833252, "learning_rate": 3.090647915232578e-06, "loss": 0.32873979, "memory(GiB)": 34.88, "step": 95070, "train_speed(iter/s)": 0.411502 }, { "acc": 0.94470901, "epoch": 2.5742560853437304, "grad_norm": 6.118077754974365, "learning_rate": 3.0901308015338795e-06, "loss": 0.3962219, "memory(GiB)": 34.88, "step": 95075, "train_speed(iter/s)": 0.411503 }, { "acc": 0.93878756, "epoch": 2.574391465626946, "grad_norm": 5.163037300109863, "learning_rate": 3.08961371176631e-06, "loss": 0.37915094, "memory(GiB)": 34.88, "step": 95080, "train_speed(iter/s)": 0.411504 }, { "acc": 0.93979301, "epoch": 2.5745268459101616, "grad_norm": 8.431055068969727, "learning_rate": 3.0890966459363426e-06, "loss": 0.28112235, "memory(GiB)": 34.88, "step": 95085, "train_speed(iter/s)": 0.411505 }, { "acc": 0.9376543, "epoch": 2.574662226193377, "grad_norm": 9.453386306762695, "learning_rate": 3.088579604050461e-06, "loss": 0.35029354, "memory(GiB)": 34.88, "step": 95090, "train_speed(iter/s)": 0.411506 }, { "acc": 0.94830933, "epoch": 2.5747976064765927, "grad_norm": 3.370405435562134, "learning_rate": 3.0880625861151373e-06, "loss": 0.22591391, "memory(GiB)": 34.88, "step": 95095, "train_speed(iter/s)": 0.411508 }, { "acc": 0.93889446, "epoch": 2.5749329867598085, "grad_norm": 7.019092559814453, "learning_rate": 3.087545592136852e-06, "loss": 0.2763814, "memory(GiB)": 34.88, "step": 95100, "train_speed(iter/s)": 0.411509 }, { "acc": 0.91779652, "epoch": 2.575068367043024, "grad_norm": 6.393491744995117, "learning_rate": 3.0870286221220775e-06, "loss": 0.47298326, "memory(GiB)": 34.88, "step": 95105, "train_speed(iter/s)": 0.41151 }, { "acc": 0.93007078, "epoch": 2.5752037473262392, "grad_norm": 4.627683639526367, "learning_rate": 3.0865116760772935e-06, "loss": 0.32176976, "memory(GiB)": 34.88, "step": 95110, "train_speed(iter/s)": 0.411511 }, { "acc": 0.95100079, "epoch": 2.575339127609455, "grad_norm": 9.674215316772461, "learning_rate": 3.0859947540089735e-06, "loss": 0.25326281, "memory(GiB)": 34.88, "step": 95115, "train_speed(iter/s)": 0.411512 }, { "acc": 0.94325581, "epoch": 2.5754745078926704, "grad_norm": 5.594941139221191, "learning_rate": 3.0854778559235947e-06, "loss": 0.29109027, "memory(GiB)": 34.88, "step": 95120, "train_speed(iter/s)": 0.411513 }, { "acc": 0.93721714, "epoch": 2.575609888175886, "grad_norm": 3.4933738708496094, "learning_rate": 3.0849609818276303e-06, "loss": 0.40044727, "memory(GiB)": 34.88, "step": 95125, "train_speed(iter/s)": 0.411514 }, { "acc": 0.93242073, "epoch": 2.5757452684591016, "grad_norm": 13.331485748291016, "learning_rate": 3.0844441317275566e-06, "loss": 0.36351566, "memory(GiB)": 34.88, "step": 95130, "train_speed(iter/s)": 0.411516 }, { "acc": 0.92923012, "epoch": 2.5758806487423174, "grad_norm": 6.923187732696533, "learning_rate": 3.083927305629848e-06, "loss": 0.45776658, "memory(GiB)": 34.88, "step": 95135, "train_speed(iter/s)": 0.411517 }, { "acc": 0.93051462, "epoch": 2.5760160290255327, "grad_norm": 6.865182876586914, "learning_rate": 3.083410503540978e-06, "loss": 0.42211714, "memory(GiB)": 34.88, "step": 95140, "train_speed(iter/s)": 0.411518 }, { "acc": 0.92030172, "epoch": 2.576151409308748, "grad_norm": 5.008065223693848, "learning_rate": 3.0828937254674213e-06, "loss": 0.46828861, "memory(GiB)": 34.88, "step": 95145, "train_speed(iter/s)": 0.411519 }, { "acc": 0.9304636, "epoch": 2.576286789591964, "grad_norm": 8.899870872497559, "learning_rate": 3.0823769714156504e-06, "loss": 0.38975277, "memory(GiB)": 34.88, "step": 95150, "train_speed(iter/s)": 0.41152 }, { "acc": 0.92997885, "epoch": 2.5764221698751792, "grad_norm": 8.210429191589355, "learning_rate": 3.08186024139214e-06, "loss": 0.39010844, "memory(GiB)": 34.88, "step": 95155, "train_speed(iter/s)": 0.411522 }, { "acc": 0.91215363, "epoch": 2.576557550158395, "grad_norm": 12.500655174255371, "learning_rate": 3.081343535403364e-06, "loss": 0.54200516, "memory(GiB)": 34.88, "step": 95160, "train_speed(iter/s)": 0.411523 }, { "acc": 0.92470627, "epoch": 2.5766929304416104, "grad_norm": 8.464962005615234, "learning_rate": 3.0808268534557915e-06, "loss": 0.38258982, "memory(GiB)": 34.88, "step": 95165, "train_speed(iter/s)": 0.411524 }, { "acc": 0.93480301, "epoch": 2.576828310724826, "grad_norm": 6.592055320739746, "learning_rate": 3.0803101955558995e-06, "loss": 0.3711597, "memory(GiB)": 34.88, "step": 95170, "train_speed(iter/s)": 0.411525 }, { "acc": 0.92426758, "epoch": 2.5769636910080416, "grad_norm": 8.821908950805664, "learning_rate": 3.0797935617101558e-06, "loss": 0.43794918, "memory(GiB)": 34.88, "step": 95175, "train_speed(iter/s)": 0.411526 }, { "acc": 0.92660637, "epoch": 2.577099071291257, "grad_norm": 7.2803215980529785, "learning_rate": 3.0792769519250365e-06, "loss": 0.40884595, "memory(GiB)": 34.88, "step": 95180, "train_speed(iter/s)": 0.411527 }, { "acc": 0.91855307, "epoch": 2.5772344515744727, "grad_norm": 8.35506820678711, "learning_rate": 3.07876036620701e-06, "loss": 0.49791927, "memory(GiB)": 34.88, "step": 95185, "train_speed(iter/s)": 0.411528 }, { "acc": 0.92008152, "epoch": 2.577369831857688, "grad_norm": 6.789951801300049, "learning_rate": 3.078243804562549e-06, "loss": 0.37375484, "memory(GiB)": 34.88, "step": 95190, "train_speed(iter/s)": 0.411529 }, { "acc": 0.93809471, "epoch": 2.577505212140904, "grad_norm": 2.9386470317840576, "learning_rate": 3.0777272669981235e-06, "loss": 0.34589076, "memory(GiB)": 34.88, "step": 95195, "train_speed(iter/s)": 0.41153 }, { "acc": 0.93439035, "epoch": 2.5776405924241192, "grad_norm": 6.29514217376709, "learning_rate": 3.077210753520205e-06, "loss": 0.33189812, "memory(GiB)": 34.88, "step": 95200, "train_speed(iter/s)": 0.411532 }, { "acc": 0.9294775, "epoch": 2.577775972707335, "grad_norm": 5.564078330993652, "learning_rate": 3.0766942641352645e-06, "loss": 0.41919451, "memory(GiB)": 34.88, "step": 95205, "train_speed(iter/s)": 0.411533 }, { "acc": 0.92622528, "epoch": 2.5779113529905504, "grad_norm": 4.984031677246094, "learning_rate": 3.0761777988497693e-06, "loss": 0.47571297, "memory(GiB)": 34.88, "step": 95210, "train_speed(iter/s)": 0.411534 }, { "acc": 0.92407608, "epoch": 2.5780467332737658, "grad_norm": 7.92716646194458, "learning_rate": 3.0756613576701927e-06, "loss": 0.37107322, "memory(GiB)": 34.88, "step": 95215, "train_speed(iter/s)": 0.411535 }, { "acc": 0.93543034, "epoch": 2.5781821135569816, "grad_norm": 8.2982759475708, "learning_rate": 3.075144940603001e-06, "loss": 0.39647875, "memory(GiB)": 34.88, "step": 95220, "train_speed(iter/s)": 0.411536 }, { "acc": 0.93600445, "epoch": 2.5783174938401974, "grad_norm": 6.983553886413574, "learning_rate": 3.074628547654666e-06, "loss": 0.36381047, "memory(GiB)": 34.88, "step": 95225, "train_speed(iter/s)": 0.411537 }, { "acc": 0.93517771, "epoch": 2.5784528741234127, "grad_norm": 9.824358940124512, "learning_rate": 3.074112178831654e-06, "loss": 0.37965646, "memory(GiB)": 34.88, "step": 95230, "train_speed(iter/s)": 0.411538 }, { "acc": 0.93207045, "epoch": 2.578588254406628, "grad_norm": 24.57147789001465, "learning_rate": 3.0735958341404364e-06, "loss": 0.37518973, "memory(GiB)": 34.88, "step": 95235, "train_speed(iter/s)": 0.41154 }, { "acc": 0.93125944, "epoch": 2.578723634689844, "grad_norm": 5.993736743927002, "learning_rate": 3.073079513587478e-06, "loss": 0.4468821, "memory(GiB)": 34.88, "step": 95240, "train_speed(iter/s)": 0.411541 }, { "acc": 0.92924547, "epoch": 2.5788590149730592, "grad_norm": 5.201663017272949, "learning_rate": 3.0725632171792503e-06, "loss": 0.39240327, "memory(GiB)": 34.88, "step": 95245, "train_speed(iter/s)": 0.411542 }, { "acc": 0.9444313, "epoch": 2.5789943952562746, "grad_norm": 2.7681281566619873, "learning_rate": 3.0720469449222186e-06, "loss": 0.30836592, "memory(GiB)": 34.88, "step": 95250, "train_speed(iter/s)": 0.411543 }, { "acc": 0.93309622, "epoch": 2.5791297755394904, "grad_norm": 7.320733547210693, "learning_rate": 3.07153069682285e-06, "loss": 0.29577703, "memory(GiB)": 34.88, "step": 95255, "train_speed(iter/s)": 0.411544 }, { "acc": 0.92312613, "epoch": 2.579265155822706, "grad_norm": 3.7287747859954834, "learning_rate": 3.0710144728876133e-06, "loss": 0.4129406, "memory(GiB)": 34.88, "step": 95260, "train_speed(iter/s)": 0.411545 }, { "acc": 0.91726742, "epoch": 2.5794005361059216, "grad_norm": 6.336364269256592, "learning_rate": 3.070498273122973e-06, "loss": 0.40453329, "memory(GiB)": 34.88, "step": 95265, "train_speed(iter/s)": 0.411546 }, { "acc": 0.94470291, "epoch": 2.579535916389137, "grad_norm": 3.6464457511901855, "learning_rate": 3.0699820975353985e-06, "loss": 0.37382107, "memory(GiB)": 34.88, "step": 95270, "train_speed(iter/s)": 0.411547 }, { "acc": 0.9408349, "epoch": 2.5796712966723527, "grad_norm": 8.609816551208496, "learning_rate": 3.0694659461313527e-06, "loss": 0.30193357, "memory(GiB)": 34.88, "step": 95275, "train_speed(iter/s)": 0.411548 }, { "acc": 0.94012146, "epoch": 2.579806676955568, "grad_norm": 5.797150135040283, "learning_rate": 3.0689498189173033e-06, "loss": 0.31028094, "memory(GiB)": 34.88, "step": 95280, "train_speed(iter/s)": 0.411549 }, { "acc": 0.91712646, "epoch": 2.579942057238784, "grad_norm": 10.680514335632324, "learning_rate": 3.0684337158997147e-06, "loss": 0.45359864, "memory(GiB)": 34.88, "step": 95285, "train_speed(iter/s)": 0.41155 }, { "acc": 0.93324108, "epoch": 2.5800774375219993, "grad_norm": 18.780858993530273, "learning_rate": 3.067917637085055e-06, "loss": 0.40190973, "memory(GiB)": 34.88, "step": 95290, "train_speed(iter/s)": 0.411552 }, { "acc": 0.91301575, "epoch": 2.580212817805215, "grad_norm": 5.5610222816467285, "learning_rate": 3.0674015824797866e-06, "loss": 0.49452648, "memory(GiB)": 34.88, "step": 95295, "train_speed(iter/s)": 0.411553 }, { "acc": 0.92276096, "epoch": 2.5803481980884304, "grad_norm": 4.608246326446533, "learning_rate": 3.066885552090373e-06, "loss": 0.40557284, "memory(GiB)": 34.88, "step": 95300, "train_speed(iter/s)": 0.411554 }, { "acc": 0.92793007, "epoch": 2.5804835783716458, "grad_norm": 4.993556976318359, "learning_rate": 3.0663695459232807e-06, "loss": 0.37938402, "memory(GiB)": 34.88, "step": 95305, "train_speed(iter/s)": 0.411555 }, { "acc": 0.92939186, "epoch": 2.5806189586548616, "grad_norm": 8.476645469665527, "learning_rate": 3.065853563984972e-06, "loss": 0.37849545, "memory(GiB)": 34.88, "step": 95310, "train_speed(iter/s)": 0.411556 }, { "acc": 0.93937607, "epoch": 2.580754338938077, "grad_norm": 4.521352291107178, "learning_rate": 3.0653376062819124e-06, "loss": 0.29824734, "memory(GiB)": 34.88, "step": 95315, "train_speed(iter/s)": 0.411557 }, { "acc": 0.92821503, "epoch": 2.5808897192212927, "grad_norm": 11.754196166992188, "learning_rate": 3.0648216728205624e-06, "loss": 0.37979348, "memory(GiB)": 34.88, "step": 95320, "train_speed(iter/s)": 0.411559 }, { "acc": 0.9269352, "epoch": 2.581025099504508, "grad_norm": 7.636620998382568, "learning_rate": 3.0643057636073903e-06, "loss": 0.3869288, "memory(GiB)": 34.88, "step": 95325, "train_speed(iter/s)": 0.41156 }, { "acc": 0.93922234, "epoch": 2.581160479787724, "grad_norm": 4.615363121032715, "learning_rate": 3.0637898786488545e-06, "loss": 0.35744731, "memory(GiB)": 34.88, "step": 95330, "train_speed(iter/s)": 0.411561 }, { "acc": 0.93546686, "epoch": 2.5812958600709393, "grad_norm": 6.033225059509277, "learning_rate": 3.063274017951417e-06, "loss": 0.36747766, "memory(GiB)": 34.88, "step": 95335, "train_speed(iter/s)": 0.411562 }, { "acc": 0.92463531, "epoch": 2.5814312403541546, "grad_norm": 6.9866790771484375, "learning_rate": 3.0627581815215435e-06, "loss": 0.4698441, "memory(GiB)": 34.88, "step": 95340, "train_speed(iter/s)": 0.411563 }, { "acc": 0.94742241, "epoch": 2.5815666206373704, "grad_norm": 10.496058464050293, "learning_rate": 3.062242369365692e-06, "loss": 0.27498398, "memory(GiB)": 34.88, "step": 95345, "train_speed(iter/s)": 0.411564 }, { "acc": 0.93712988, "epoch": 2.5817020009205858, "grad_norm": 4.462498188018799, "learning_rate": 3.061726581490328e-06, "loss": 0.34317601, "memory(GiB)": 34.88, "step": 95350, "train_speed(iter/s)": 0.411565 }, { "acc": 0.945959, "epoch": 2.5818373812038016, "grad_norm": 7.232249736785889, "learning_rate": 3.0612108179019086e-06, "loss": 0.29105866, "memory(GiB)": 34.88, "step": 95355, "train_speed(iter/s)": 0.411567 }, { "acc": 0.9311615, "epoch": 2.581972761487017, "grad_norm": 7.592105865478516, "learning_rate": 3.0606950786068994e-06, "loss": 0.39669833, "memory(GiB)": 34.88, "step": 95360, "train_speed(iter/s)": 0.411568 }, { "acc": 0.9354497, "epoch": 2.5821081417702327, "grad_norm": 6.744028568267822, "learning_rate": 3.060179363611756e-06, "loss": 0.41618466, "memory(GiB)": 34.88, "step": 95365, "train_speed(iter/s)": 0.411569 }, { "acc": 0.93445454, "epoch": 2.582243522053448, "grad_norm": 7.395319938659668, "learning_rate": 3.0596636729229432e-06, "loss": 0.36782761, "memory(GiB)": 34.88, "step": 95370, "train_speed(iter/s)": 0.41157 }, { "acc": 0.92961617, "epoch": 2.5823789023366635, "grad_norm": 6.412796974182129, "learning_rate": 3.0591480065469194e-06, "loss": 0.36649785, "memory(GiB)": 34.88, "step": 95375, "train_speed(iter/s)": 0.411571 }, { "acc": 0.94555702, "epoch": 2.5825142826198793, "grad_norm": 5.89908504486084, "learning_rate": 3.0586323644901423e-06, "loss": 0.24334741, "memory(GiB)": 34.88, "step": 95380, "train_speed(iter/s)": 0.411573 }, { "acc": 0.93563862, "epoch": 2.582649662903095, "grad_norm": 7.484153747558594, "learning_rate": 3.0581167467590757e-06, "loss": 0.30906122, "memory(GiB)": 34.88, "step": 95385, "train_speed(iter/s)": 0.411574 }, { "acc": 0.93235035, "epoch": 2.5827850431863104, "grad_norm": 8.919071197509766, "learning_rate": 3.057601153360173e-06, "loss": 0.4368176, "memory(GiB)": 34.88, "step": 95390, "train_speed(iter/s)": 0.411575 }, { "acc": 0.93601227, "epoch": 2.5829204234695258, "grad_norm": 10.592082977294922, "learning_rate": 3.057085584299898e-06, "loss": 0.31372676, "memory(GiB)": 34.88, "step": 95395, "train_speed(iter/s)": 0.411576 }, { "acc": 0.92095985, "epoch": 2.5830558037527416, "grad_norm": 7.422934532165527, "learning_rate": 3.0565700395847055e-06, "loss": 0.41340456, "memory(GiB)": 34.88, "step": 95400, "train_speed(iter/s)": 0.411577 }, { "acc": 0.93298416, "epoch": 2.583191184035957, "grad_norm": 5.853975772857666, "learning_rate": 3.0560545192210584e-06, "loss": 0.38361602, "memory(GiB)": 34.88, "step": 95405, "train_speed(iter/s)": 0.411578 }, { "acc": 0.93163986, "epoch": 2.5833265643191723, "grad_norm": 14.340543746948242, "learning_rate": 3.0555390232154085e-06, "loss": 0.38147564, "memory(GiB)": 34.88, "step": 95410, "train_speed(iter/s)": 0.41158 }, { "acc": 0.91920872, "epoch": 2.583461944602388, "grad_norm": 14.093127250671387, "learning_rate": 3.0550235515742205e-06, "loss": 0.48399134, "memory(GiB)": 34.88, "step": 95415, "train_speed(iter/s)": 0.41158 }, { "acc": 0.92318439, "epoch": 2.583597324885604, "grad_norm": 11.014742851257324, "learning_rate": 3.054508104303946e-06, "loss": 0.46488934, "memory(GiB)": 34.88, "step": 95420, "train_speed(iter/s)": 0.411582 }, { "acc": 0.92084684, "epoch": 2.5837327051688193, "grad_norm": 4.479671478271484, "learning_rate": 3.0539926814110434e-06, "loss": 0.38227458, "memory(GiB)": 34.88, "step": 95425, "train_speed(iter/s)": 0.411583 }, { "acc": 0.95136595, "epoch": 2.5838680854520346, "grad_norm": 7.449933052062988, "learning_rate": 3.0534772829019703e-06, "loss": 0.279685, "memory(GiB)": 34.88, "step": 95430, "train_speed(iter/s)": 0.411584 }, { "acc": 0.9393528, "epoch": 2.5840034657352504, "grad_norm": 6.18454647064209, "learning_rate": 3.052961908783182e-06, "loss": 0.37995505, "memory(GiB)": 34.88, "step": 95435, "train_speed(iter/s)": 0.411585 }, { "acc": 0.93323555, "epoch": 2.5841388460184658, "grad_norm": 6.650134086608887, "learning_rate": 3.052446559061137e-06, "loss": 0.40308971, "memory(GiB)": 34.88, "step": 95440, "train_speed(iter/s)": 0.411586 }, { "acc": 0.9477807, "epoch": 2.5842742263016816, "grad_norm": 4.528713703155518, "learning_rate": 3.0519312337422855e-06, "loss": 0.21217403, "memory(GiB)": 34.88, "step": 95445, "train_speed(iter/s)": 0.411587 }, { "acc": 0.93140659, "epoch": 2.584409606584897, "grad_norm": 22.794179916381836, "learning_rate": 3.051415932833091e-06, "loss": 0.4133687, "memory(GiB)": 34.88, "step": 95450, "train_speed(iter/s)": 0.411588 }, { "acc": 0.92757435, "epoch": 2.5845449868681127, "grad_norm": 8.044188499450684, "learning_rate": 3.050900656340003e-06, "loss": 0.38745975, "memory(GiB)": 34.88, "step": 95455, "train_speed(iter/s)": 0.41159 }, { "acc": 0.91735821, "epoch": 2.584680367151328, "grad_norm": 13.824480056762695, "learning_rate": 3.050385404269477e-06, "loss": 0.4800921, "memory(GiB)": 34.88, "step": 95460, "train_speed(iter/s)": 0.411591 }, { "acc": 0.92692461, "epoch": 2.5848157474345435, "grad_norm": 21.464420318603516, "learning_rate": 3.049870176627969e-06, "loss": 0.4181138, "memory(GiB)": 34.88, "step": 95465, "train_speed(iter/s)": 0.411592 }, { "acc": 0.93160362, "epoch": 2.5849511277177593, "grad_norm": 9.698444366455078, "learning_rate": 3.049354973421931e-06, "loss": 0.38290577, "memory(GiB)": 34.88, "step": 95470, "train_speed(iter/s)": 0.411593 }, { "acc": 0.93948479, "epoch": 2.5850865080009746, "grad_norm": 5.805935859680176, "learning_rate": 3.0488397946578207e-06, "loss": 0.33689528, "memory(GiB)": 34.88, "step": 95475, "train_speed(iter/s)": 0.411594 }, { "acc": 0.91713953, "epoch": 2.5852218882841904, "grad_norm": 4.275986194610596, "learning_rate": 3.0483246403420867e-06, "loss": 0.46252995, "memory(GiB)": 34.88, "step": 95480, "train_speed(iter/s)": 0.411595 }, { "acc": 0.93407125, "epoch": 2.5853572685674058, "grad_norm": 11.594940185546875, "learning_rate": 3.0478095104811882e-06, "loss": 0.35685825, "memory(GiB)": 34.88, "step": 95485, "train_speed(iter/s)": 0.411596 }, { "acc": 0.92650814, "epoch": 2.5854926488506216, "grad_norm": 8.519208908081055, "learning_rate": 3.047294405081572e-06, "loss": 0.36762679, "memory(GiB)": 34.88, "step": 95490, "train_speed(iter/s)": 0.411598 }, { "acc": 0.9288805, "epoch": 2.585628029133837, "grad_norm": 8.52652359008789, "learning_rate": 3.0467793241496967e-06, "loss": 0.44534926, "memory(GiB)": 34.88, "step": 95495, "train_speed(iter/s)": 0.411599 }, { "acc": 0.93291368, "epoch": 2.5857634094170523, "grad_norm": 5.895395278930664, "learning_rate": 3.0462642676920124e-06, "loss": 0.32965736, "memory(GiB)": 34.88, "step": 95500, "train_speed(iter/s)": 0.4116 }, { "acc": 0.91932793, "epoch": 2.585898789700268, "grad_norm": 5.5469207763671875, "learning_rate": 3.045749235714967e-06, "loss": 0.47367234, "memory(GiB)": 34.88, "step": 95505, "train_speed(iter/s)": 0.411601 }, { "acc": 0.92277527, "epoch": 2.5860341699834835, "grad_norm": 6.300912380218506, "learning_rate": 3.045234228225019e-06, "loss": 0.50239453, "memory(GiB)": 34.88, "step": 95510, "train_speed(iter/s)": 0.411602 }, { "acc": 0.94670658, "epoch": 2.5861695502666993, "grad_norm": 9.382671356201172, "learning_rate": 3.0447192452286155e-06, "loss": 0.30505152, "memory(GiB)": 34.88, "step": 95515, "train_speed(iter/s)": 0.411603 }, { "acc": 0.95345116, "epoch": 2.5863049305499146, "grad_norm": 3.236616373062134, "learning_rate": 3.044204286732211e-06, "loss": 0.26754365, "memory(GiB)": 34.88, "step": 95520, "train_speed(iter/s)": 0.411605 }, { "acc": 0.93088226, "epoch": 2.5864403108331304, "grad_norm": 6.917883396148682, "learning_rate": 3.0436893527422513e-06, "loss": 0.3138027, "memory(GiB)": 34.88, "step": 95525, "train_speed(iter/s)": 0.411606 }, { "acc": 0.93699226, "epoch": 2.5865756911163458, "grad_norm": 10.997687339782715, "learning_rate": 3.0431744432651925e-06, "loss": 0.32024903, "memory(GiB)": 34.88, "step": 95530, "train_speed(iter/s)": 0.411607 }, { "acc": 0.91449347, "epoch": 2.586711071399561, "grad_norm": 8.776750564575195, "learning_rate": 3.0426595583074807e-06, "loss": 0.48678327, "memory(GiB)": 34.88, "step": 95535, "train_speed(iter/s)": 0.411608 }, { "acc": 0.90543165, "epoch": 2.586846451682777, "grad_norm": 8.913714408874512, "learning_rate": 3.0421446978755694e-06, "loss": 0.52217994, "memory(GiB)": 34.88, "step": 95540, "train_speed(iter/s)": 0.411609 }, { "acc": 0.94090576, "epoch": 2.5869818319659927, "grad_norm": 4.868438720703125, "learning_rate": 3.0416298619759065e-06, "loss": 0.33665907, "memory(GiB)": 34.88, "step": 95545, "train_speed(iter/s)": 0.41161 }, { "acc": 0.93681736, "epoch": 2.587117212249208, "grad_norm": 5.882089614868164, "learning_rate": 3.041115050614939e-06, "loss": 0.38809381, "memory(GiB)": 34.88, "step": 95550, "train_speed(iter/s)": 0.411611 }, { "acc": 0.91835918, "epoch": 2.5872525925324235, "grad_norm": 8.151424407958984, "learning_rate": 3.040600263799121e-06, "loss": 0.46024857, "memory(GiB)": 34.88, "step": 95555, "train_speed(iter/s)": 0.411612 }, { "acc": 0.94882488, "epoch": 2.5873879728156393, "grad_norm": 5.971429824829102, "learning_rate": 3.0400855015348955e-06, "loss": 0.30896573, "memory(GiB)": 34.88, "step": 95560, "train_speed(iter/s)": 0.411613 }, { "acc": 0.92862139, "epoch": 2.5875233530988546, "grad_norm": 5.997053623199463, "learning_rate": 3.0395707638287163e-06, "loss": 0.3706871, "memory(GiB)": 34.88, "step": 95565, "train_speed(iter/s)": 0.411614 }, { "acc": 0.94523048, "epoch": 2.58765873338207, "grad_norm": 14.878711700439453, "learning_rate": 3.039056050687027e-06, "loss": 0.33670583, "memory(GiB)": 34.88, "step": 95570, "train_speed(iter/s)": 0.411616 }, { "acc": 0.93680744, "epoch": 2.5877941136652858, "grad_norm": 5.110311508178711, "learning_rate": 3.0385413621162787e-06, "loss": 0.32907784, "memory(GiB)": 34.88, "step": 95575, "train_speed(iter/s)": 0.411617 }, { "acc": 0.92534828, "epoch": 2.5879294939485016, "grad_norm": 10.981558799743652, "learning_rate": 3.0380266981229185e-06, "loss": 0.44638004, "memory(GiB)": 34.88, "step": 95580, "train_speed(iter/s)": 0.411618 }, { "acc": 0.93211746, "epoch": 2.588064874231717, "grad_norm": 6.8879570960998535, "learning_rate": 3.03751205871339e-06, "loss": 0.3339025, "memory(GiB)": 34.88, "step": 95585, "train_speed(iter/s)": 0.411619 }, { "acc": 0.93222103, "epoch": 2.5882002545149323, "grad_norm": 7.182747840881348, "learning_rate": 3.0369974438941454e-06, "loss": 0.35167072, "memory(GiB)": 34.88, "step": 95590, "train_speed(iter/s)": 0.41162 }, { "acc": 0.91023531, "epoch": 2.588335634798148, "grad_norm": 6.2245049476623535, "learning_rate": 3.0364828536716256e-06, "loss": 0.55589347, "memory(GiB)": 34.88, "step": 95595, "train_speed(iter/s)": 0.411621 }, { "acc": 0.92429962, "epoch": 2.5884710150813635, "grad_norm": 5.853466510772705, "learning_rate": 3.0359682880522835e-06, "loss": 0.53755531, "memory(GiB)": 34.88, "step": 95600, "train_speed(iter/s)": 0.411622 }, { "acc": 0.94534283, "epoch": 2.5886063953645793, "grad_norm": 7.239969730377197, "learning_rate": 3.035453747042557e-06, "loss": 0.3814894, "memory(GiB)": 34.88, "step": 95605, "train_speed(iter/s)": 0.411623 }, { "acc": 0.92291164, "epoch": 2.5887417756477946, "grad_norm": 12.64707088470459, "learning_rate": 3.0349392306489007e-06, "loss": 0.44732685, "memory(GiB)": 34.88, "step": 95610, "train_speed(iter/s)": 0.411624 }, { "acc": 0.90929108, "epoch": 2.5888771559310104, "grad_norm": 8.112224578857422, "learning_rate": 3.0344247388777513e-06, "loss": 0.53892322, "memory(GiB)": 34.88, "step": 95615, "train_speed(iter/s)": 0.411626 }, { "acc": 0.95015469, "epoch": 2.589012536214226, "grad_norm": 5.538701057434082, "learning_rate": 3.033910271735561e-06, "loss": 0.25514193, "memory(GiB)": 34.88, "step": 95620, "train_speed(iter/s)": 0.411627 }, { "acc": 0.93462868, "epoch": 2.589147916497441, "grad_norm": 6.597470760345459, "learning_rate": 3.0333958292287714e-06, "loss": 0.38235259, "memory(GiB)": 34.88, "step": 95625, "train_speed(iter/s)": 0.411628 }, { "acc": 0.92875004, "epoch": 2.589283296780657, "grad_norm": 8.295058250427246, "learning_rate": 3.0328814113638238e-06, "loss": 0.35122316, "memory(GiB)": 34.88, "step": 95630, "train_speed(iter/s)": 0.411629 }, { "acc": 0.93076057, "epoch": 2.5894186770638723, "grad_norm": 7.482825756072998, "learning_rate": 3.032367018147168e-06, "loss": 0.39526491, "memory(GiB)": 34.88, "step": 95635, "train_speed(iter/s)": 0.41163 }, { "acc": 0.93774776, "epoch": 2.589554057347088, "grad_norm": 8.64309310913086, "learning_rate": 3.0318526495852437e-06, "loss": 0.3729342, "memory(GiB)": 34.88, "step": 95640, "train_speed(iter/s)": 0.411631 }, { "acc": 0.94202843, "epoch": 2.5896894376303035, "grad_norm": 8.922846794128418, "learning_rate": 3.0313383056844975e-06, "loss": 0.39979827, "memory(GiB)": 34.88, "step": 95645, "train_speed(iter/s)": 0.411633 }, { "acc": 0.91477108, "epoch": 2.5898248179135193, "grad_norm": 10.606154441833496, "learning_rate": 3.030823986451369e-06, "loss": 0.50427046, "memory(GiB)": 34.88, "step": 95650, "train_speed(iter/s)": 0.411633 }, { "acc": 0.93375902, "epoch": 2.5899601981967346, "grad_norm": 4.622729301452637, "learning_rate": 3.030309691892304e-06, "loss": 0.33829103, "memory(GiB)": 34.88, "step": 95655, "train_speed(iter/s)": 0.411634 }, { "acc": 0.93150482, "epoch": 2.59009557847995, "grad_norm": 6.8856096267700195, "learning_rate": 3.029795422013743e-06, "loss": 0.36605811, "memory(GiB)": 34.88, "step": 95660, "train_speed(iter/s)": 0.411635 }, { "acc": 0.9176053, "epoch": 2.590230958763166, "grad_norm": 11.066393852233887, "learning_rate": 3.0292811768221313e-06, "loss": 0.45568962, "memory(GiB)": 34.88, "step": 95665, "train_speed(iter/s)": 0.411636 }, { "acc": 0.92550545, "epoch": 2.590366339046381, "grad_norm": 11.583233833312988, "learning_rate": 3.0287669563239093e-06, "loss": 0.42753925, "memory(GiB)": 34.88, "step": 95670, "train_speed(iter/s)": 0.411637 }, { "acc": 0.92641258, "epoch": 2.590501719329597, "grad_norm": 9.145341873168945, "learning_rate": 3.0282527605255154e-06, "loss": 0.43366499, "memory(GiB)": 34.88, "step": 95675, "train_speed(iter/s)": 0.411638 }, { "acc": 0.93140965, "epoch": 2.5906370996128123, "grad_norm": 5.780230522155762, "learning_rate": 3.027738589433396e-06, "loss": 0.37583275, "memory(GiB)": 34.88, "step": 95680, "train_speed(iter/s)": 0.411639 }, { "acc": 0.93420582, "epoch": 2.590772479896028, "grad_norm": 7.936881065368652, "learning_rate": 3.0272244430539868e-06, "loss": 0.35034757, "memory(GiB)": 34.88, "step": 95685, "train_speed(iter/s)": 0.41164 }, { "acc": 0.93760118, "epoch": 2.5909078601792435, "grad_norm": 10.279348373413086, "learning_rate": 3.0267103213937338e-06, "loss": 0.30075843, "memory(GiB)": 34.88, "step": 95690, "train_speed(iter/s)": 0.411641 }, { "acc": 0.93437595, "epoch": 2.591043240462459, "grad_norm": 6.000079154968262, "learning_rate": 3.026196224459074e-06, "loss": 0.38578212, "memory(GiB)": 34.88, "step": 95695, "train_speed(iter/s)": 0.411642 }, { "acc": 0.95206203, "epoch": 2.5911786207456746, "grad_norm": 5.769021034240723, "learning_rate": 3.0256821522564483e-06, "loss": 0.21364248, "memory(GiB)": 34.88, "step": 95700, "train_speed(iter/s)": 0.411643 }, { "acc": 0.92166595, "epoch": 2.5913140010288904, "grad_norm": 20.495086669921875, "learning_rate": 3.025168104792299e-06, "loss": 0.38523264, "memory(GiB)": 34.88, "step": 95705, "train_speed(iter/s)": 0.411644 }, { "acc": 0.93412104, "epoch": 2.591449381312106, "grad_norm": 7.0876784324646, "learning_rate": 3.02465408207306e-06, "loss": 0.32249124, "memory(GiB)": 34.88, "step": 95710, "train_speed(iter/s)": 0.411645 }, { "acc": 0.92071953, "epoch": 2.591584761595321, "grad_norm": 6.232622146606445, "learning_rate": 3.0241400841051766e-06, "loss": 0.4535965, "memory(GiB)": 34.88, "step": 95715, "train_speed(iter/s)": 0.411646 }, { "acc": 0.94036512, "epoch": 2.591720141878537, "grad_norm": 9.178820610046387, "learning_rate": 3.023626110895082e-06, "loss": 0.30437307, "memory(GiB)": 34.88, "step": 95720, "train_speed(iter/s)": 0.411647 }, { "acc": 0.93815269, "epoch": 2.5918555221617523, "grad_norm": 6.4408650398254395, "learning_rate": 3.023112162449218e-06, "loss": 0.35008645, "memory(GiB)": 34.88, "step": 95725, "train_speed(iter/s)": 0.411648 }, { "acc": 0.92668724, "epoch": 2.5919909024449677, "grad_norm": 10.887076377868652, "learning_rate": 3.022598238774023e-06, "loss": 0.42150478, "memory(GiB)": 34.88, "step": 95730, "train_speed(iter/s)": 0.411649 }, { "acc": 0.94231062, "epoch": 2.5921262827281835, "grad_norm": 5.847777366638184, "learning_rate": 3.022084339875934e-06, "loss": 0.31253767, "memory(GiB)": 34.88, "step": 95735, "train_speed(iter/s)": 0.41165 }, { "acc": 0.91964025, "epoch": 2.5922616630113993, "grad_norm": 7.664258003234863, "learning_rate": 3.0215704657613885e-06, "loss": 0.41058283, "memory(GiB)": 34.88, "step": 95740, "train_speed(iter/s)": 0.411651 }, { "acc": 0.93125324, "epoch": 2.5923970432946146, "grad_norm": 10.58488655090332, "learning_rate": 3.021056616436825e-06, "loss": 0.40281553, "memory(GiB)": 34.88, "step": 95745, "train_speed(iter/s)": 0.411652 }, { "acc": 0.93162718, "epoch": 2.59253242357783, "grad_norm": 5.006788730621338, "learning_rate": 3.02054279190868e-06, "loss": 0.39044986, "memory(GiB)": 34.88, "step": 95750, "train_speed(iter/s)": 0.411653 }, { "acc": 0.93278389, "epoch": 2.592667803861046, "grad_norm": 7.865736961364746, "learning_rate": 3.0200289921833865e-06, "loss": 0.36279545, "memory(GiB)": 34.88, "step": 95755, "train_speed(iter/s)": 0.411654 }, { "acc": 0.94310942, "epoch": 2.592803184144261, "grad_norm": 4.929306507110596, "learning_rate": 3.0195152172673864e-06, "loss": 0.3267364, "memory(GiB)": 34.88, "step": 95760, "train_speed(iter/s)": 0.411655 }, { "acc": 0.92487803, "epoch": 2.592938564427477, "grad_norm": 8.42820930480957, "learning_rate": 3.019001467167112e-06, "loss": 0.4014163, "memory(GiB)": 34.88, "step": 95765, "train_speed(iter/s)": 0.411656 }, { "acc": 0.92127075, "epoch": 2.5930739447106923, "grad_norm": 10.051112174987793, "learning_rate": 3.018487741889001e-06, "loss": 0.47840409, "memory(GiB)": 34.88, "step": 95770, "train_speed(iter/s)": 0.411657 }, { "acc": 0.92685394, "epoch": 2.593209324993908, "grad_norm": 9.257903099060059, "learning_rate": 3.017974041439488e-06, "loss": 0.4394824, "memory(GiB)": 34.88, "step": 95775, "train_speed(iter/s)": 0.411658 }, { "acc": 0.93457317, "epoch": 2.5933447052771235, "grad_norm": 9.596636772155762, "learning_rate": 3.017460365825009e-06, "loss": 0.35467401, "memory(GiB)": 34.88, "step": 95780, "train_speed(iter/s)": 0.411659 }, { "acc": 0.94678402, "epoch": 2.593480085560339, "grad_norm": 7.606053352355957, "learning_rate": 3.0169467150519973e-06, "loss": 0.25938952, "memory(GiB)": 34.88, "step": 95785, "train_speed(iter/s)": 0.411661 }, { "acc": 0.94511976, "epoch": 2.5936154658435546, "grad_norm": 6.650891304016113, "learning_rate": 3.016433089126889e-06, "loss": 0.32867115, "memory(GiB)": 34.88, "step": 95790, "train_speed(iter/s)": 0.411662 }, { "acc": 0.93484898, "epoch": 2.59375084612677, "grad_norm": 10.481046676635742, "learning_rate": 3.0159194880561187e-06, "loss": 0.32640905, "memory(GiB)": 34.88, "step": 95795, "train_speed(iter/s)": 0.411663 }, { "acc": 0.94027586, "epoch": 2.593886226409986, "grad_norm": 7.842794418334961, "learning_rate": 3.015405911846116e-06, "loss": 0.34444506, "memory(GiB)": 34.88, "step": 95800, "train_speed(iter/s)": 0.411664 }, { "acc": 0.92862158, "epoch": 2.594021606693201, "grad_norm": 9.66759204864502, "learning_rate": 3.0148923605033188e-06, "loss": 0.37756162, "memory(GiB)": 34.88, "step": 95805, "train_speed(iter/s)": 0.411665 }, { "acc": 0.91518726, "epoch": 2.594156986976417, "grad_norm": 9.016339302062988, "learning_rate": 3.014378834034158e-06, "loss": 0.51723051, "memory(GiB)": 34.88, "step": 95810, "train_speed(iter/s)": 0.411666 }, { "acc": 0.92196541, "epoch": 2.5942923672596323, "grad_norm": 11.822729110717773, "learning_rate": 3.013865332445069e-06, "loss": 0.50070462, "memory(GiB)": 34.88, "step": 95815, "train_speed(iter/s)": 0.411667 }, { "acc": 0.92638206, "epoch": 2.5944277475428477, "grad_norm": 7.184514045715332, "learning_rate": 3.0133518557424817e-06, "loss": 0.41770339, "memory(GiB)": 34.88, "step": 95820, "train_speed(iter/s)": 0.411668 }, { "acc": 0.94433327, "epoch": 2.5945631278260635, "grad_norm": 7.44607400894165, "learning_rate": 3.0128384039328313e-06, "loss": 0.31951909, "memory(GiB)": 34.88, "step": 95825, "train_speed(iter/s)": 0.411669 }, { "acc": 0.93360415, "epoch": 2.594698508109279, "grad_norm": 8.429574012756348, "learning_rate": 3.012324977022546e-06, "loss": 0.39071078, "memory(GiB)": 34.88, "step": 95830, "train_speed(iter/s)": 0.41167 }, { "acc": 0.92490168, "epoch": 2.5948338883924946, "grad_norm": 4.44753885269165, "learning_rate": 3.0118115750180616e-06, "loss": 0.3766098, "memory(GiB)": 34.88, "step": 95835, "train_speed(iter/s)": 0.411671 }, { "acc": 0.9550108, "epoch": 2.59496926867571, "grad_norm": 2.334101438522339, "learning_rate": 3.0112981979258066e-06, "loss": 0.23906848, "memory(GiB)": 34.88, "step": 95840, "train_speed(iter/s)": 0.411672 }, { "acc": 0.92635136, "epoch": 2.595104648958926, "grad_norm": 7.629072666168213, "learning_rate": 3.0107848457522127e-06, "loss": 0.35786505, "memory(GiB)": 34.88, "step": 95845, "train_speed(iter/s)": 0.411673 }, { "acc": 0.92377014, "epoch": 2.595240029242141, "grad_norm": 7.265622138977051, "learning_rate": 3.010271518503712e-06, "loss": 0.40586271, "memory(GiB)": 34.88, "step": 95850, "train_speed(iter/s)": 0.411674 }, { "acc": 0.94499607, "epoch": 2.5953754095253565, "grad_norm": 12.440942764282227, "learning_rate": 3.009758216186732e-06, "loss": 0.32668924, "memory(GiB)": 34.88, "step": 95855, "train_speed(iter/s)": 0.411675 }, { "acc": 0.91931038, "epoch": 2.5955107898085723, "grad_norm": 17.539772033691406, "learning_rate": 3.009244938807706e-06, "loss": 0.45013161, "memory(GiB)": 34.88, "step": 95860, "train_speed(iter/s)": 0.411675 }, { "acc": 0.92248993, "epoch": 2.595646170091788, "grad_norm": 13.78439998626709, "learning_rate": 3.0087316863730613e-06, "loss": 0.42812166, "memory(GiB)": 34.88, "step": 95865, "train_speed(iter/s)": 0.411677 }, { "acc": 0.94163151, "epoch": 2.5957815503750035, "grad_norm": 12.604689598083496, "learning_rate": 3.00821845888923e-06, "loss": 0.35355897, "memory(GiB)": 34.88, "step": 95870, "train_speed(iter/s)": 0.411678 }, { "acc": 0.93026409, "epoch": 2.595916930658219, "grad_norm": 9.478474617004395, "learning_rate": 3.00770525636264e-06, "loss": 0.38693585, "memory(GiB)": 34.88, "step": 95875, "train_speed(iter/s)": 0.411679 }, { "acc": 0.93805208, "epoch": 2.5960523109414346, "grad_norm": 4.748465538024902, "learning_rate": 3.007192078799718e-06, "loss": 0.34035802, "memory(GiB)": 34.88, "step": 95880, "train_speed(iter/s)": 0.41168 }, { "acc": 0.92245598, "epoch": 2.59618769122465, "grad_norm": 5.936519622802734, "learning_rate": 3.0066789262068966e-06, "loss": 0.38937888, "memory(GiB)": 34.88, "step": 95885, "train_speed(iter/s)": 0.411681 }, { "acc": 0.92585297, "epoch": 2.5963230715078653, "grad_norm": 6.622122764587402, "learning_rate": 3.006165798590601e-06, "loss": 0.40171385, "memory(GiB)": 34.88, "step": 95890, "train_speed(iter/s)": 0.411682 }, { "acc": 0.92420969, "epoch": 2.596458451791081, "grad_norm": 7.9022393226623535, "learning_rate": 3.00565269595726e-06, "loss": 0.48155527, "memory(GiB)": 34.88, "step": 95895, "train_speed(iter/s)": 0.411683 }, { "acc": 0.94343414, "epoch": 2.596593832074297, "grad_norm": 9.860557556152344, "learning_rate": 3.0051396183133016e-06, "loss": 0.36517396, "memory(GiB)": 34.88, "step": 95900, "train_speed(iter/s)": 0.411684 }, { "acc": 0.93886967, "epoch": 2.5967292123575123, "grad_norm": 4.454427719116211, "learning_rate": 3.004626565665153e-06, "loss": 0.34534569, "memory(GiB)": 34.88, "step": 95905, "train_speed(iter/s)": 0.411685 }, { "acc": 0.9320961, "epoch": 2.5968645926407277, "grad_norm": 14.846734046936035, "learning_rate": 3.0041135380192403e-06, "loss": 0.39745088, "memory(GiB)": 34.88, "step": 95910, "train_speed(iter/s)": 0.411686 }, { "acc": 0.92978325, "epoch": 2.5969999729239435, "grad_norm": 6.960862636566162, "learning_rate": 3.003600535381992e-06, "loss": 0.38054276, "memory(GiB)": 34.88, "step": 95915, "train_speed(iter/s)": 0.411687 }, { "acc": 0.93502846, "epoch": 2.597135353207159, "grad_norm": 10.276774406433105, "learning_rate": 3.003087557759833e-06, "loss": 0.38219366, "memory(GiB)": 34.88, "step": 95920, "train_speed(iter/s)": 0.411688 }, { "acc": 0.92337055, "epoch": 2.5972707334903746, "grad_norm": 6.090136528015137, "learning_rate": 3.0025746051591896e-06, "loss": 0.39083147, "memory(GiB)": 34.88, "step": 95925, "train_speed(iter/s)": 0.411689 }, { "acc": 0.94475632, "epoch": 2.59740611377359, "grad_norm": 7.389458656311035, "learning_rate": 3.002061677586488e-06, "loss": 0.36200626, "memory(GiB)": 34.88, "step": 95930, "train_speed(iter/s)": 0.411691 }, { "acc": 0.93572721, "epoch": 2.597541494056806, "grad_norm": 4.601932048797607, "learning_rate": 3.001548775048153e-06, "loss": 0.38257806, "memory(GiB)": 34.88, "step": 95935, "train_speed(iter/s)": 0.411692 }, { "acc": 0.93841696, "epoch": 2.597676874340021, "grad_norm": 2.7378125190734863, "learning_rate": 3.001035897550611e-06, "loss": 0.28979561, "memory(GiB)": 34.88, "step": 95940, "train_speed(iter/s)": 0.411693 }, { "acc": 0.9288414, "epoch": 2.5978122546232365, "grad_norm": 12.175708770751953, "learning_rate": 3.0005230451002836e-06, "loss": 0.38937171, "memory(GiB)": 34.88, "step": 95945, "train_speed(iter/s)": 0.411694 }, { "acc": 0.94400444, "epoch": 2.5979476349064523, "grad_norm": 6.062468528747559, "learning_rate": 3.0000102177036e-06, "loss": 0.32822151, "memory(GiB)": 34.88, "step": 95950, "train_speed(iter/s)": 0.411695 }, { "acc": 0.93086891, "epoch": 2.5980830151896677, "grad_norm": 7.478597164154053, "learning_rate": 2.99949741536698e-06, "loss": 0.43169222, "memory(GiB)": 34.88, "step": 95955, "train_speed(iter/s)": 0.411696 }, { "acc": 0.93441544, "epoch": 2.5982183954728835, "grad_norm": 7.220104694366455, "learning_rate": 2.99898463809685e-06, "loss": 0.39904692, "memory(GiB)": 34.88, "step": 95960, "train_speed(iter/s)": 0.411697 }, { "acc": 0.93934689, "epoch": 2.598353775756099, "grad_norm": 4.606869220733643, "learning_rate": 2.998471885899634e-06, "loss": 0.36495125, "memory(GiB)": 34.88, "step": 95965, "train_speed(iter/s)": 0.411698 }, { "acc": 0.92839622, "epoch": 2.5984891560393146, "grad_norm": 6.947132110595703, "learning_rate": 2.997959158781752e-06, "loss": 0.3686348, "memory(GiB)": 34.88, "step": 95970, "train_speed(iter/s)": 0.411699 }, { "acc": 0.92450428, "epoch": 2.59862453632253, "grad_norm": 5.165772914886475, "learning_rate": 2.9974464567496308e-06, "loss": 0.46805496, "memory(GiB)": 34.88, "step": 95975, "train_speed(iter/s)": 0.4117 }, { "acc": 0.93785353, "epoch": 2.5987599166057453, "grad_norm": 7.498006820678711, "learning_rate": 2.9969337798096897e-06, "loss": 0.3606708, "memory(GiB)": 34.88, "step": 95980, "train_speed(iter/s)": 0.411701 }, { "acc": 0.93779135, "epoch": 2.598895296888961, "grad_norm": 8.02886962890625, "learning_rate": 2.9964211279683542e-06, "loss": 0.34303255, "memory(GiB)": 34.88, "step": 95985, "train_speed(iter/s)": 0.411702 }, { "acc": 0.93233089, "epoch": 2.5990306771721765, "grad_norm": 11.192028045654297, "learning_rate": 2.9959085012320428e-06, "loss": 0.40866613, "memory(GiB)": 34.88, "step": 95990, "train_speed(iter/s)": 0.411703 }, { "acc": 0.93742762, "epoch": 2.5991660574553923, "grad_norm": 9.517999649047852, "learning_rate": 2.99539589960718e-06, "loss": 0.29568653, "memory(GiB)": 34.88, "step": 95995, "train_speed(iter/s)": 0.411704 }, { "acc": 0.94305363, "epoch": 2.5993014377386077, "grad_norm": 15.271402359008789, "learning_rate": 2.9948833231001867e-06, "loss": 0.32229662, "memory(GiB)": 34.88, "step": 96000, "train_speed(iter/s)": 0.411706 }, { "acc": 0.9453928, "epoch": 2.5994368180218235, "grad_norm": 4.194827556610107, "learning_rate": 2.994370771717482e-06, "loss": 0.34151974, "memory(GiB)": 34.88, "step": 96005, "train_speed(iter/s)": 0.411707 }, { "acc": 0.92796717, "epoch": 2.599572198305039, "grad_norm": 12.931652069091797, "learning_rate": 2.993858245465489e-06, "loss": 0.44911485, "memory(GiB)": 34.88, "step": 96010, "train_speed(iter/s)": 0.411707 }, { "acc": 0.9251483, "epoch": 2.599707578588254, "grad_norm": 10.087783813476562, "learning_rate": 2.993345744350625e-06, "loss": 0.44140654, "memory(GiB)": 34.88, "step": 96015, "train_speed(iter/s)": 0.411709 }, { "acc": 0.93575058, "epoch": 2.59984295887147, "grad_norm": 6.611688613891602, "learning_rate": 2.992833268379315e-06, "loss": 0.31302426, "memory(GiB)": 34.88, "step": 96020, "train_speed(iter/s)": 0.41171 }, { "acc": 0.93831968, "epoch": 2.5999783391546853, "grad_norm": 4.8973069190979, "learning_rate": 2.992320817557974e-06, "loss": 0.35401638, "memory(GiB)": 34.88, "step": 96025, "train_speed(iter/s)": 0.411711 }, { "acc": 0.94002552, "epoch": 2.600113719437901, "grad_norm": 7.210830211639404, "learning_rate": 2.9918083918930234e-06, "loss": 0.3553406, "memory(GiB)": 34.88, "step": 96030, "train_speed(iter/s)": 0.411712 }, { "acc": 0.94512005, "epoch": 2.6002490997211165, "grad_norm": 7.772894382476807, "learning_rate": 2.9912959913908816e-06, "loss": 0.29438448, "memory(GiB)": 34.88, "step": 96035, "train_speed(iter/s)": 0.411713 }, { "acc": 0.91682234, "epoch": 2.6003844800043323, "grad_norm": 11.161974906921387, "learning_rate": 2.99078361605797e-06, "loss": 0.44928465, "memory(GiB)": 34.88, "step": 96040, "train_speed(iter/s)": 0.411714 }, { "acc": 0.92534103, "epoch": 2.6005198602875477, "grad_norm": 9.11547565460205, "learning_rate": 2.990271265900705e-06, "loss": 0.45099516, "memory(GiB)": 34.88, "step": 96045, "train_speed(iter/s)": 0.411715 }, { "acc": 0.93318195, "epoch": 2.600655240570763, "grad_norm": 4.5904316902160645, "learning_rate": 2.989758940925504e-06, "loss": 0.37290263, "memory(GiB)": 34.88, "step": 96050, "train_speed(iter/s)": 0.411717 }, { "acc": 0.91949673, "epoch": 2.600790620853979, "grad_norm": 11.893023490905762, "learning_rate": 2.9892466411387866e-06, "loss": 0.48112316, "memory(GiB)": 34.88, "step": 96055, "train_speed(iter/s)": 0.411718 }, { "acc": 0.93239775, "epoch": 2.6009260011371946, "grad_norm": 5.923816680908203, "learning_rate": 2.988734366546968e-06, "loss": 0.47870665, "memory(GiB)": 34.88, "step": 96060, "train_speed(iter/s)": 0.411719 }, { "acc": 0.9410346, "epoch": 2.60106138142041, "grad_norm": 7.462642669677734, "learning_rate": 2.988222117156469e-06, "loss": 0.39462399, "memory(GiB)": 34.88, "step": 96065, "train_speed(iter/s)": 0.41172 }, { "acc": 0.91810799, "epoch": 2.6011967617036253, "grad_norm": 9.019253730773926, "learning_rate": 2.987709892973702e-06, "loss": 0.51969166, "memory(GiB)": 34.88, "step": 96070, "train_speed(iter/s)": 0.411721 }, { "acc": 0.9344532, "epoch": 2.601332141986841, "grad_norm": 4.914588451385498, "learning_rate": 2.9871976940050897e-06, "loss": 0.3612175, "memory(GiB)": 34.88, "step": 96075, "train_speed(iter/s)": 0.411722 }, { "acc": 0.9435648, "epoch": 2.6014675222700565, "grad_norm": 6.2604475021362305, "learning_rate": 2.9866855202570416e-06, "loss": 0.23456337, "memory(GiB)": 34.88, "step": 96080, "train_speed(iter/s)": 0.411723 }, { "acc": 0.91313086, "epoch": 2.601602902553272, "grad_norm": 11.661136627197266, "learning_rate": 2.98617337173598e-06, "loss": 0.44625006, "memory(GiB)": 34.88, "step": 96085, "train_speed(iter/s)": 0.411724 }, { "acc": 0.93226976, "epoch": 2.6017382828364877, "grad_norm": 6.604926109313965, "learning_rate": 2.9856612484483173e-06, "loss": 0.38318374, "memory(GiB)": 34.88, "step": 96090, "train_speed(iter/s)": 0.411726 }, { "acc": 0.94102802, "epoch": 2.6018736631197035, "grad_norm": 8.816052436828613, "learning_rate": 2.9851491504004672e-06, "loss": 0.32675967, "memory(GiB)": 34.88, "step": 96095, "train_speed(iter/s)": 0.411727 }, { "acc": 0.93576221, "epoch": 2.602009043402919, "grad_norm": 5.855734348297119, "learning_rate": 2.9846370775988475e-06, "loss": 0.4144722, "memory(GiB)": 34.88, "step": 96100, "train_speed(iter/s)": 0.411728 }, { "acc": 0.92665215, "epoch": 2.602144423686134, "grad_norm": 5.608246803283691, "learning_rate": 2.984125030049871e-06, "loss": 0.37185993, "memory(GiB)": 34.88, "step": 96105, "train_speed(iter/s)": 0.411729 }, { "acc": 0.93516045, "epoch": 2.60227980396935, "grad_norm": 10.402931213378906, "learning_rate": 2.983613007759956e-06, "loss": 0.42238278, "memory(GiB)": 34.88, "step": 96110, "train_speed(iter/s)": 0.41173 }, { "acc": 0.91643715, "epoch": 2.6024151842525653, "grad_norm": 8.733935356140137, "learning_rate": 2.9831010107355096e-06, "loss": 0.51608205, "memory(GiB)": 34.88, "step": 96115, "train_speed(iter/s)": 0.411731 }, { "acc": 0.92961922, "epoch": 2.602550564535781, "grad_norm": 7.604910373687744, "learning_rate": 2.9825890389829537e-06, "loss": 0.41658831, "memory(GiB)": 34.88, "step": 96120, "train_speed(iter/s)": 0.411732 }, { "acc": 0.93586578, "epoch": 2.6026859448189965, "grad_norm": 4.8235764503479, "learning_rate": 2.9820770925086968e-06, "loss": 0.34276824, "memory(GiB)": 34.88, "step": 96125, "train_speed(iter/s)": 0.411734 }, { "acc": 0.93894196, "epoch": 2.6028213251022123, "grad_norm": 7.189459323883057, "learning_rate": 2.9815651713191518e-06, "loss": 0.33450942, "memory(GiB)": 34.88, "step": 96130, "train_speed(iter/s)": 0.411735 }, { "acc": 0.93089123, "epoch": 2.6029567053854277, "grad_norm": 7.326180458068848, "learning_rate": 2.9810532754207337e-06, "loss": 0.35256031, "memory(GiB)": 34.88, "step": 96135, "train_speed(iter/s)": 0.411736 }, { "acc": 0.92651587, "epoch": 2.603092085668643, "grad_norm": 13.632357597351074, "learning_rate": 2.980541404819853e-06, "loss": 0.37560747, "memory(GiB)": 34.88, "step": 96140, "train_speed(iter/s)": 0.411737 }, { "acc": 0.95054913, "epoch": 2.603227465951859, "grad_norm": 5.328371524810791, "learning_rate": 2.9800295595229247e-06, "loss": 0.27239771, "memory(GiB)": 34.88, "step": 96145, "train_speed(iter/s)": 0.411738 }, { "acc": 0.93269548, "epoch": 2.603362846235074, "grad_norm": 9.711828231811523, "learning_rate": 2.9795177395363558e-06, "loss": 0.35167646, "memory(GiB)": 34.88, "step": 96150, "train_speed(iter/s)": 0.411739 }, { "acc": 0.91086998, "epoch": 2.60349822651829, "grad_norm": 7.8817338943481445, "learning_rate": 2.979005944866565e-06, "loss": 0.52556791, "memory(GiB)": 34.88, "step": 96155, "train_speed(iter/s)": 0.41174 }, { "acc": 0.92500572, "epoch": 2.6036336068015054, "grad_norm": 8.38644027709961, "learning_rate": 2.9784941755199547e-06, "loss": 0.40567026, "memory(GiB)": 34.88, "step": 96160, "train_speed(iter/s)": 0.411741 }, { "acc": 0.9216754, "epoch": 2.603768987084721, "grad_norm": 4.93541955947876, "learning_rate": 2.9779824315029453e-06, "loss": 0.38270323, "memory(GiB)": 34.88, "step": 96165, "train_speed(iter/s)": 0.411743 }, { "acc": 0.93986626, "epoch": 2.6039043673679365, "grad_norm": 19.73621940612793, "learning_rate": 2.9774707128219404e-06, "loss": 0.44864812, "memory(GiB)": 34.88, "step": 96170, "train_speed(iter/s)": 0.411744 }, { "acc": 0.94026823, "epoch": 2.604039747651152, "grad_norm": 5.403656005859375, "learning_rate": 2.9769590194833527e-06, "loss": 0.29851508, "memory(GiB)": 34.88, "step": 96175, "train_speed(iter/s)": 0.411745 }, { "acc": 0.929667, "epoch": 2.6041751279343677, "grad_norm": 12.959179878234863, "learning_rate": 2.9764473514935927e-06, "loss": 0.42685637, "memory(GiB)": 34.88, "step": 96180, "train_speed(iter/s)": 0.411746 }, { "acc": 0.91748848, "epoch": 2.604310508217583, "grad_norm": 6.454170227050781, "learning_rate": 2.975935708859067e-06, "loss": 0.47550435, "memory(GiB)": 34.88, "step": 96185, "train_speed(iter/s)": 0.411747 }, { "acc": 0.94905148, "epoch": 2.604445888500799, "grad_norm": 4.146175861358643, "learning_rate": 2.975424091586191e-06, "loss": 0.34082131, "memory(GiB)": 34.88, "step": 96190, "train_speed(iter/s)": 0.411748 }, { "acc": 0.93366957, "epoch": 2.604581268784014, "grad_norm": 10.953315734863281, "learning_rate": 2.9749124996813654e-06, "loss": 0.37532454, "memory(GiB)": 34.88, "step": 96195, "train_speed(iter/s)": 0.411749 }, { "acc": 0.93681602, "epoch": 2.60471664906723, "grad_norm": 28.64608383178711, "learning_rate": 2.9744009331510083e-06, "loss": 0.32972593, "memory(GiB)": 34.88, "step": 96200, "train_speed(iter/s)": 0.41175 }, { "acc": 0.92958336, "epoch": 2.6048520293504454, "grad_norm": 10.512635231018066, "learning_rate": 2.97388939200152e-06, "loss": 0.40880861, "memory(GiB)": 34.88, "step": 96205, "train_speed(iter/s)": 0.411751 }, { "acc": 0.95072651, "epoch": 2.6049874096336607, "grad_norm": 6.8940582275390625, "learning_rate": 2.973377876239314e-06, "loss": 0.23095882, "memory(GiB)": 34.88, "step": 96210, "train_speed(iter/s)": 0.411753 }, { "acc": 0.94003668, "epoch": 2.6051227899168765, "grad_norm": 6.496806621551514, "learning_rate": 2.972866385870795e-06, "loss": 0.3626564, "memory(GiB)": 34.88, "step": 96215, "train_speed(iter/s)": 0.411754 }, { "acc": 0.92833271, "epoch": 2.6052581702000923, "grad_norm": 5.970523834228516, "learning_rate": 2.9723549209023705e-06, "loss": 0.41365232, "memory(GiB)": 34.88, "step": 96220, "train_speed(iter/s)": 0.411755 }, { "acc": 0.93190098, "epoch": 2.6053935504833077, "grad_norm": 11.265203475952148, "learning_rate": 2.9718434813404506e-06, "loss": 0.36537309, "memory(GiB)": 34.88, "step": 96225, "train_speed(iter/s)": 0.411756 }, { "acc": 0.93989315, "epoch": 2.605528930766523, "grad_norm": 10.396758079528809, "learning_rate": 2.971332067191437e-06, "loss": 0.32904406, "memory(GiB)": 34.88, "step": 96230, "train_speed(iter/s)": 0.411757 }, { "acc": 0.95450726, "epoch": 2.605664311049739, "grad_norm": 13.900338172912598, "learning_rate": 2.9708206784617416e-06, "loss": 0.28182497, "memory(GiB)": 34.88, "step": 96235, "train_speed(iter/s)": 0.411758 }, { "acc": 0.93536606, "epoch": 2.605799691332954, "grad_norm": 11.041460990905762, "learning_rate": 2.970309315157766e-06, "loss": 0.40805092, "memory(GiB)": 34.88, "step": 96240, "train_speed(iter/s)": 0.411759 }, { "acc": 0.93748989, "epoch": 2.6059350716161696, "grad_norm": 10.35632038116455, "learning_rate": 2.9697979772859203e-06, "loss": 0.34363437, "memory(GiB)": 34.88, "step": 96245, "train_speed(iter/s)": 0.41176 }, { "acc": 0.93856821, "epoch": 2.6060704518993854, "grad_norm": 3.7706143856048584, "learning_rate": 2.969286664852607e-06, "loss": 0.33868973, "memory(GiB)": 34.88, "step": 96250, "train_speed(iter/s)": 0.411762 }, { "acc": 0.93402653, "epoch": 2.606205832182601, "grad_norm": 5.257168769836426, "learning_rate": 2.9687753778642296e-06, "loss": 0.37266517, "memory(GiB)": 34.88, "step": 96255, "train_speed(iter/s)": 0.411763 }, { "acc": 0.93923254, "epoch": 2.6063412124658165, "grad_norm": 6.434998989105225, "learning_rate": 2.968264116327199e-06, "loss": 0.34828882, "memory(GiB)": 34.88, "step": 96260, "train_speed(iter/s)": 0.411764 }, { "acc": 0.93166533, "epoch": 2.606476592749032, "grad_norm": 8.063926696777344, "learning_rate": 2.967752880247913e-06, "loss": 0.42509799, "memory(GiB)": 34.88, "step": 96265, "train_speed(iter/s)": 0.411765 }, { "acc": 0.94337664, "epoch": 2.6066119730322477, "grad_norm": 4.01198673248291, "learning_rate": 2.9672416696327823e-06, "loss": 0.27462327, "memory(GiB)": 34.88, "step": 96270, "train_speed(iter/s)": 0.411766 }, { "acc": 0.92419758, "epoch": 2.606747353315463, "grad_norm": 12.111602783203125, "learning_rate": 2.9667304844882035e-06, "loss": 0.45240803, "memory(GiB)": 34.88, "step": 96275, "train_speed(iter/s)": 0.411767 }, { "acc": 0.93755798, "epoch": 2.606882733598679, "grad_norm": 19.759841918945312, "learning_rate": 2.9662193248205885e-06, "loss": 0.2813844, "memory(GiB)": 34.88, "step": 96280, "train_speed(iter/s)": 0.411768 }, { "acc": 0.92309837, "epoch": 2.607018113881894, "grad_norm": 4.996069431304932, "learning_rate": 2.9657081906363323e-06, "loss": 0.45801053, "memory(GiB)": 34.88, "step": 96285, "train_speed(iter/s)": 0.411769 }, { "acc": 0.93453274, "epoch": 2.60715349416511, "grad_norm": 6.514102458953857, "learning_rate": 2.9651970819418446e-06, "loss": 0.35481677, "memory(GiB)": 34.88, "step": 96290, "train_speed(iter/s)": 0.41177 }, { "acc": 0.9192049, "epoch": 2.6072888744483254, "grad_norm": 13.452561378479004, "learning_rate": 2.964685998743526e-06, "loss": 0.43769345, "memory(GiB)": 34.88, "step": 96295, "train_speed(iter/s)": 0.411771 }, { "acc": 0.93463535, "epoch": 2.6074242547315407, "grad_norm": 5.775906562805176, "learning_rate": 2.9641749410477748e-06, "loss": 0.35154867, "memory(GiB)": 34.88, "step": 96300, "train_speed(iter/s)": 0.411772 }, { "acc": 0.93239765, "epoch": 2.6075596350147565, "grad_norm": 6.962295055389404, "learning_rate": 2.9636639088609997e-06, "loss": 0.38854525, "memory(GiB)": 34.88, "step": 96305, "train_speed(iter/s)": 0.411773 }, { "acc": 0.9333457, "epoch": 2.607695015297972, "grad_norm": 5.405831813812256, "learning_rate": 2.9631529021895968e-06, "loss": 0.36112018, "memory(GiB)": 34.88, "step": 96310, "train_speed(iter/s)": 0.411774 }, { "acc": 0.92087536, "epoch": 2.6078303955811877, "grad_norm": 7.30916690826416, "learning_rate": 2.9626419210399716e-06, "loss": 0.50152636, "memory(GiB)": 34.88, "step": 96315, "train_speed(iter/s)": 0.411776 }, { "acc": 0.91448841, "epoch": 2.607965775864403, "grad_norm": 4.127590179443359, "learning_rate": 2.9621309654185202e-06, "loss": 0.49343548, "memory(GiB)": 34.88, "step": 96320, "train_speed(iter/s)": 0.411777 }, { "acc": 0.92773685, "epoch": 2.608101156147619, "grad_norm": 6.045072555541992, "learning_rate": 2.9616200353316486e-06, "loss": 0.43085451, "memory(GiB)": 34.88, "step": 96325, "train_speed(iter/s)": 0.411778 }, { "acc": 0.94490738, "epoch": 2.608236536430834, "grad_norm": 11.312203407287598, "learning_rate": 2.961109130785754e-06, "loss": 0.34009466, "memory(GiB)": 34.88, "step": 96330, "train_speed(iter/s)": 0.411779 }, { "acc": 0.92356339, "epoch": 2.6083719167140496, "grad_norm": 6.212738037109375, "learning_rate": 2.9605982517872385e-06, "loss": 0.45181084, "memory(GiB)": 34.88, "step": 96335, "train_speed(iter/s)": 0.41178 }, { "acc": 0.9148756, "epoch": 2.6085072969972654, "grad_norm": 6.680089950561523, "learning_rate": 2.960087398342502e-06, "loss": 0.5009923, "memory(GiB)": 34.88, "step": 96340, "train_speed(iter/s)": 0.411781 }, { "acc": 0.93557014, "epoch": 2.6086426772804807, "grad_norm": 8.833782196044922, "learning_rate": 2.9595765704579394e-06, "loss": 0.31554811, "memory(GiB)": 34.88, "step": 96345, "train_speed(iter/s)": 0.411783 }, { "acc": 0.93438911, "epoch": 2.6087780575636965, "grad_norm": 6.501243591308594, "learning_rate": 2.9590657681399566e-06, "loss": 0.37697935, "memory(GiB)": 34.88, "step": 96350, "train_speed(iter/s)": 0.411784 }, { "acc": 0.93366623, "epoch": 2.608913437846912, "grad_norm": 8.869030952453613, "learning_rate": 2.958554991394945e-06, "loss": 0.38436904, "memory(GiB)": 34.88, "step": 96355, "train_speed(iter/s)": 0.411785 }, { "acc": 0.94694748, "epoch": 2.6090488181301277, "grad_norm": 10.064966201782227, "learning_rate": 2.958044240229311e-06, "loss": 0.2796313, "memory(GiB)": 34.88, "step": 96360, "train_speed(iter/s)": 0.411786 }, { "acc": 0.92788754, "epoch": 2.609184198413343, "grad_norm": 6.658825397491455, "learning_rate": 2.9575335146494454e-06, "loss": 0.3749994, "memory(GiB)": 34.88, "step": 96365, "train_speed(iter/s)": 0.411787 }, { "acc": 0.92338514, "epoch": 2.6093195786965584, "grad_norm": 5.22675085067749, "learning_rate": 2.9570228146617515e-06, "loss": 0.40436187, "memory(GiB)": 34.88, "step": 96370, "train_speed(iter/s)": 0.411788 }, { "acc": 0.94069557, "epoch": 2.609454958979774, "grad_norm": 12.1076078414917, "learning_rate": 2.956512140272624e-06, "loss": 0.32980127, "memory(GiB)": 34.88, "step": 96375, "train_speed(iter/s)": 0.411789 }, { "acc": 0.93318195, "epoch": 2.60959033926299, "grad_norm": 12.4160737991333, "learning_rate": 2.9560014914884616e-06, "loss": 0.34648428, "memory(GiB)": 34.88, "step": 96380, "train_speed(iter/s)": 0.411791 }, { "acc": 0.94616728, "epoch": 2.6097257195462054, "grad_norm": 8.315634727478027, "learning_rate": 2.955490868315662e-06, "loss": 0.24721162, "memory(GiB)": 34.88, "step": 96385, "train_speed(iter/s)": 0.411792 }, { "acc": 0.9337389, "epoch": 2.6098610998294207, "grad_norm": 6.955907344818115, "learning_rate": 2.9549802707606164e-06, "loss": 0.41919055, "memory(GiB)": 34.88, "step": 96390, "train_speed(iter/s)": 0.411793 }, { "acc": 0.92218437, "epoch": 2.6099964801126365, "grad_norm": 10.85415267944336, "learning_rate": 2.954469698829728e-06, "loss": 0.41457787, "memory(GiB)": 34.88, "step": 96395, "train_speed(iter/s)": 0.411794 }, { "acc": 0.93582506, "epoch": 2.610131860395852, "grad_norm": 6.186361789703369, "learning_rate": 2.9539591525293863e-06, "loss": 0.42829385, "memory(GiB)": 34.88, "step": 96400, "train_speed(iter/s)": 0.411795 }, { "acc": 0.92619171, "epoch": 2.6102672406790672, "grad_norm": 7.297664165496826, "learning_rate": 2.9534486318659932e-06, "loss": 0.39074302, "memory(GiB)": 34.88, "step": 96405, "train_speed(iter/s)": 0.411796 }, { "acc": 0.92621765, "epoch": 2.610402620962283, "grad_norm": 13.865155220031738, "learning_rate": 2.9529381368459394e-06, "loss": 0.45591283, "memory(GiB)": 34.88, "step": 96410, "train_speed(iter/s)": 0.411797 }, { "acc": 0.95016918, "epoch": 2.610538001245499, "grad_norm": 5.490132808685303, "learning_rate": 2.952427667475622e-06, "loss": 0.31039562, "memory(GiB)": 34.88, "step": 96415, "train_speed(iter/s)": 0.411798 }, { "acc": 0.91799469, "epoch": 2.610673381528714, "grad_norm": 8.723718643188477, "learning_rate": 2.9519172237614364e-06, "loss": 0.51959205, "memory(GiB)": 34.88, "step": 96420, "train_speed(iter/s)": 0.411799 }, { "acc": 0.95303879, "epoch": 2.6108087618119296, "grad_norm": 10.216468811035156, "learning_rate": 2.9514068057097728e-06, "loss": 0.21598547, "memory(GiB)": 34.88, "step": 96425, "train_speed(iter/s)": 0.4118 }, { "acc": 0.93103399, "epoch": 2.6109441420951454, "grad_norm": 7.366950035095215, "learning_rate": 2.95089641332703e-06, "loss": 0.363835, "memory(GiB)": 34.88, "step": 96430, "train_speed(iter/s)": 0.411801 }, { "acc": 0.93196564, "epoch": 2.6110795223783607, "grad_norm": 7.125993728637695, "learning_rate": 2.9503860466195976e-06, "loss": 0.40293584, "memory(GiB)": 34.88, "step": 96435, "train_speed(iter/s)": 0.411803 }, { "acc": 0.93279848, "epoch": 2.6112149026615765, "grad_norm": 6.881340980529785, "learning_rate": 2.949875705593872e-06, "loss": 0.29210601, "memory(GiB)": 34.88, "step": 96440, "train_speed(iter/s)": 0.411804 }, { "acc": 0.9304101, "epoch": 2.611350282944792, "grad_norm": 5.236222267150879, "learning_rate": 2.9493653902562454e-06, "loss": 0.38744695, "memory(GiB)": 34.88, "step": 96445, "train_speed(iter/s)": 0.411805 }, { "acc": 0.92331257, "epoch": 2.6114856632280077, "grad_norm": 7.245815277099609, "learning_rate": 2.9488551006131104e-06, "loss": 0.48204746, "memory(GiB)": 34.88, "step": 96450, "train_speed(iter/s)": 0.411806 }, { "acc": 0.93973713, "epoch": 2.611621043511223, "grad_norm": 5.64005708694458, "learning_rate": 2.9483448366708584e-06, "loss": 0.33969059, "memory(GiB)": 34.88, "step": 96455, "train_speed(iter/s)": 0.411807 }, { "acc": 0.93514977, "epoch": 2.6117564237944384, "grad_norm": 7.781612873077393, "learning_rate": 2.947834598435883e-06, "loss": 0.32657692, "memory(GiB)": 34.88, "step": 96460, "train_speed(iter/s)": 0.411808 }, { "acc": 0.94735003, "epoch": 2.611891804077654, "grad_norm": 5.734773635864258, "learning_rate": 2.9473243859145766e-06, "loss": 0.285989, "memory(GiB)": 34.88, "step": 96465, "train_speed(iter/s)": 0.411809 }, { "acc": 0.93291388, "epoch": 2.6120271843608696, "grad_norm": 6.714181423187256, "learning_rate": 2.9468141991133265e-06, "loss": 0.36362176, "memory(GiB)": 34.88, "step": 96470, "train_speed(iter/s)": 0.41181 }, { "acc": 0.92649021, "epoch": 2.6121625646440854, "grad_norm": 10.98892593383789, "learning_rate": 2.946304038038528e-06, "loss": 0.52672253, "memory(GiB)": 34.88, "step": 96475, "train_speed(iter/s)": 0.411812 }, { "acc": 0.9325758, "epoch": 2.6122979449273007, "grad_norm": 13.202693939208984, "learning_rate": 2.9457939026965693e-06, "loss": 0.36997833, "memory(GiB)": 34.88, "step": 96480, "train_speed(iter/s)": 0.411813 }, { "acc": 0.9216136, "epoch": 2.6124333252105165, "grad_norm": 11.01846981048584, "learning_rate": 2.945283793093843e-06, "loss": 0.39538445, "memory(GiB)": 34.88, "step": 96485, "train_speed(iter/s)": 0.411814 }, { "acc": 0.93210907, "epoch": 2.612568705493732, "grad_norm": 5.580843925476074, "learning_rate": 2.9447737092367385e-06, "loss": 0.40496864, "memory(GiB)": 34.88, "step": 96490, "train_speed(iter/s)": 0.411815 }, { "acc": 0.9193326, "epoch": 2.6127040857769472, "grad_norm": 14.52782154083252, "learning_rate": 2.944263651131645e-06, "loss": 0.47949524, "memory(GiB)": 34.88, "step": 96495, "train_speed(iter/s)": 0.411816 }, { "acc": 0.93117485, "epoch": 2.612839466060163, "grad_norm": 16.124664306640625, "learning_rate": 2.9437536187849515e-06, "loss": 0.42407131, "memory(GiB)": 34.88, "step": 96500, "train_speed(iter/s)": 0.411817 }, { "acc": 0.93852425, "epoch": 2.6129748463433784, "grad_norm": 9.849621772766113, "learning_rate": 2.9432436122030485e-06, "loss": 0.30621953, "memory(GiB)": 34.88, "step": 96505, "train_speed(iter/s)": 0.411818 }, { "acc": 0.91557102, "epoch": 2.613110226626594, "grad_norm": 7.0356597900390625, "learning_rate": 2.942733631392325e-06, "loss": 0.49687667, "memory(GiB)": 34.88, "step": 96510, "train_speed(iter/s)": 0.411819 }, { "acc": 0.93187218, "epoch": 2.6132456069098096, "grad_norm": 4.858737945556641, "learning_rate": 2.9422236763591664e-06, "loss": 0.36279345, "memory(GiB)": 34.88, "step": 96515, "train_speed(iter/s)": 0.41182 }, { "acc": 0.92740259, "epoch": 2.6133809871930254, "grad_norm": 7.53251314163208, "learning_rate": 2.941713747109966e-06, "loss": 0.37185411, "memory(GiB)": 34.88, "step": 96520, "train_speed(iter/s)": 0.411822 }, { "acc": 0.94712372, "epoch": 2.6135163674762407, "grad_norm": 9.283699989318848, "learning_rate": 2.9412038436511074e-06, "loss": 0.32263925, "memory(GiB)": 34.88, "step": 96525, "train_speed(iter/s)": 0.411823 }, { "acc": 0.91551304, "epoch": 2.613651747759456, "grad_norm": 7.709856986999512, "learning_rate": 2.9406939659889805e-06, "loss": 0.51014376, "memory(GiB)": 34.88, "step": 96530, "train_speed(iter/s)": 0.411824 }, { "acc": 0.93891392, "epoch": 2.613787128042672, "grad_norm": 6.146711349487305, "learning_rate": 2.9401841141299704e-06, "loss": 0.30195322, "memory(GiB)": 34.88, "step": 96535, "train_speed(iter/s)": 0.411825 }, { "acc": 0.93204098, "epoch": 2.6139225083258877, "grad_norm": 10.330851554870605, "learning_rate": 2.9396742880804663e-06, "loss": 0.41553488, "memory(GiB)": 34.88, "step": 96540, "train_speed(iter/s)": 0.411826 }, { "acc": 0.92602282, "epoch": 2.614057888609103, "grad_norm": 8.92868709564209, "learning_rate": 2.9391644878468557e-06, "loss": 0.47782364, "memory(GiB)": 34.88, "step": 96545, "train_speed(iter/s)": 0.411827 }, { "acc": 0.94513712, "epoch": 2.6141932688923184, "grad_norm": 5.684475421905518, "learning_rate": 2.938654713435519e-06, "loss": 0.24890423, "memory(GiB)": 34.88, "step": 96550, "train_speed(iter/s)": 0.411828 }, { "acc": 0.93382664, "epoch": 2.614328649175534, "grad_norm": 19.38356590270996, "learning_rate": 2.938144964852848e-06, "loss": 0.34452479, "memory(GiB)": 34.88, "step": 96555, "train_speed(iter/s)": 0.41183 }, { "acc": 0.93175907, "epoch": 2.6144640294587496, "grad_norm": 24.954980850219727, "learning_rate": 2.937635242105226e-06, "loss": 0.39434834, "memory(GiB)": 34.88, "step": 96560, "train_speed(iter/s)": 0.411831 }, { "acc": 0.92905807, "epoch": 2.614599409741965, "grad_norm": 7.999075889587402, "learning_rate": 2.937125545199039e-06, "loss": 0.39078984, "memory(GiB)": 34.88, "step": 96565, "train_speed(iter/s)": 0.411832 }, { "acc": 0.92747746, "epoch": 2.6147347900251807, "grad_norm": 7.1665358543396, "learning_rate": 2.9366158741406707e-06, "loss": 0.33349342, "memory(GiB)": 34.88, "step": 96570, "train_speed(iter/s)": 0.411833 }, { "acc": 0.93017044, "epoch": 2.6148701703083965, "grad_norm": 4.19234561920166, "learning_rate": 2.9361062289365083e-06, "loss": 0.41626558, "memory(GiB)": 34.88, "step": 96575, "train_speed(iter/s)": 0.411834 }, { "acc": 0.93887615, "epoch": 2.615005550591612, "grad_norm": 4.2361602783203125, "learning_rate": 2.935596609592932e-06, "loss": 0.37310481, "memory(GiB)": 34.88, "step": 96580, "train_speed(iter/s)": 0.411835 }, { "acc": 0.93767414, "epoch": 2.6151409308748272, "grad_norm": 13.283084869384766, "learning_rate": 2.9350870161163302e-06, "loss": 0.38043251, "memory(GiB)": 34.88, "step": 96585, "train_speed(iter/s)": 0.411836 }, { "acc": 0.91346111, "epoch": 2.615276311158043, "grad_norm": 9.930981636047363, "learning_rate": 2.934577448513084e-06, "loss": 0.54802427, "memory(GiB)": 34.88, "step": 96590, "train_speed(iter/s)": 0.411837 }, { "acc": 0.92716446, "epoch": 2.6154116914412584, "grad_norm": 7.574501991271973, "learning_rate": 2.9340679067895773e-06, "loss": 0.45015373, "memory(GiB)": 34.88, "step": 96595, "train_speed(iter/s)": 0.411838 }, { "acc": 0.94422102, "epoch": 2.615547071724474, "grad_norm": 4.897796630859375, "learning_rate": 2.9335583909521936e-06, "loss": 0.31494281, "memory(GiB)": 34.88, "step": 96600, "train_speed(iter/s)": 0.411839 }, { "acc": 0.93015308, "epoch": 2.6156824520076896, "grad_norm": 6.365379810333252, "learning_rate": 2.9330489010073137e-06, "loss": 0.38441992, "memory(GiB)": 34.88, "step": 96605, "train_speed(iter/s)": 0.411841 }, { "acc": 0.92431822, "epoch": 2.6158178322909054, "grad_norm": 8.399632453918457, "learning_rate": 2.9325394369613225e-06, "loss": 0.37964339, "memory(GiB)": 34.88, "step": 96610, "train_speed(iter/s)": 0.411842 }, { "acc": 0.92906837, "epoch": 2.6159532125741207, "grad_norm": 8.999307632446289, "learning_rate": 2.9320299988206e-06, "loss": 0.44382896, "memory(GiB)": 34.88, "step": 96615, "train_speed(iter/s)": 0.411843 }, { "acc": 0.94141293, "epoch": 2.616088592857336, "grad_norm": 12.09115219116211, "learning_rate": 2.93152058659153e-06, "loss": 0.33039033, "memory(GiB)": 34.88, "step": 96620, "train_speed(iter/s)": 0.411844 }, { "acc": 0.92756424, "epoch": 2.616223973140552, "grad_norm": 8.097931861877441, "learning_rate": 2.9310112002804907e-06, "loss": 0.43846316, "memory(GiB)": 34.88, "step": 96625, "train_speed(iter/s)": 0.411845 }, { "acc": 0.93601179, "epoch": 2.6163593534237672, "grad_norm": 8.969167709350586, "learning_rate": 2.9305018398938677e-06, "loss": 0.36174622, "memory(GiB)": 34.88, "step": 96630, "train_speed(iter/s)": 0.411846 }, { "acc": 0.92457447, "epoch": 2.616494733706983, "grad_norm": 15.45465087890625, "learning_rate": 2.9299925054380373e-06, "loss": 0.41239319, "memory(GiB)": 34.88, "step": 96635, "train_speed(iter/s)": 0.411847 }, { "acc": 0.92270432, "epoch": 2.6166301139901984, "grad_norm": 4.433737277984619, "learning_rate": 2.929483196919382e-06, "loss": 0.37382152, "memory(GiB)": 34.88, "step": 96640, "train_speed(iter/s)": 0.411848 }, { "acc": 0.94549885, "epoch": 2.616765494273414, "grad_norm": 4.504131317138672, "learning_rate": 2.928973914344283e-06, "loss": 0.26205842, "memory(GiB)": 34.88, "step": 96645, "train_speed(iter/s)": 0.411849 }, { "acc": 0.93965883, "epoch": 2.6169008745566296, "grad_norm": 5.655757904052734, "learning_rate": 2.9284646577191174e-06, "loss": 0.33832512, "memory(GiB)": 34.88, "step": 96650, "train_speed(iter/s)": 0.41185 }, { "acc": 0.92041073, "epoch": 2.617036254839845, "grad_norm": 7.405756950378418, "learning_rate": 2.9279554270502675e-06, "loss": 0.48289895, "memory(GiB)": 34.88, "step": 96655, "train_speed(iter/s)": 0.411851 }, { "acc": 0.9264246, "epoch": 2.6171716351230607, "grad_norm": 71.54170989990234, "learning_rate": 2.9274462223441087e-06, "loss": 0.43180532, "memory(GiB)": 34.88, "step": 96660, "train_speed(iter/s)": 0.411853 }, { "acc": 0.93920298, "epoch": 2.617307015406276, "grad_norm": 34.9556884765625, "learning_rate": 2.9269370436070237e-06, "loss": 0.35100329, "memory(GiB)": 34.88, "step": 96665, "train_speed(iter/s)": 0.411854 }, { "acc": 0.9270895, "epoch": 2.617442395689492, "grad_norm": 4.416334629058838, "learning_rate": 2.9264278908453895e-06, "loss": 0.43639483, "memory(GiB)": 34.88, "step": 96670, "train_speed(iter/s)": 0.411855 }, { "acc": 0.93414783, "epoch": 2.6175777759727072, "grad_norm": 3.4570276737213135, "learning_rate": 2.925918764065583e-06, "loss": 0.35710976, "memory(GiB)": 34.88, "step": 96675, "train_speed(iter/s)": 0.411856 }, { "acc": 0.93363695, "epoch": 2.617713156255923, "grad_norm": 9.010110855102539, "learning_rate": 2.925409663273984e-06, "loss": 0.3959769, "memory(GiB)": 34.88, "step": 96680, "train_speed(iter/s)": 0.411857 }, { "acc": 0.92715588, "epoch": 2.6178485365391384, "grad_norm": 5.771417140960693, "learning_rate": 2.924900588476968e-06, "loss": 0.43305502, "memory(GiB)": 34.88, "step": 96685, "train_speed(iter/s)": 0.411858 }, { "acc": 0.93304348, "epoch": 2.6179839168223538, "grad_norm": 5.069726943969727, "learning_rate": 2.924391539680913e-06, "loss": 0.37577353, "memory(GiB)": 34.88, "step": 96690, "train_speed(iter/s)": 0.411859 }, { "acc": 0.93257236, "epoch": 2.6181192971055696, "grad_norm": 8.037359237670898, "learning_rate": 2.9238825168921965e-06, "loss": 0.35375667, "memory(GiB)": 34.88, "step": 96695, "train_speed(iter/s)": 0.41186 }, { "acc": 0.93892612, "epoch": 2.6182546773887854, "grad_norm": 22.198843002319336, "learning_rate": 2.923373520117195e-06, "loss": 0.3371244, "memory(GiB)": 34.88, "step": 96700, "train_speed(iter/s)": 0.411862 }, { "acc": 0.93161068, "epoch": 2.6183900576720007, "grad_norm": 13.111437797546387, "learning_rate": 2.9228645493622832e-06, "loss": 0.3418725, "memory(GiB)": 34.88, "step": 96705, "train_speed(iter/s)": 0.411862 }, { "acc": 0.92366905, "epoch": 2.618525437955216, "grad_norm": 7.129689693450928, "learning_rate": 2.922355604633839e-06, "loss": 0.38331501, "memory(GiB)": 34.88, "step": 96710, "train_speed(iter/s)": 0.411863 }, { "acc": 0.94281321, "epoch": 2.618660818238432, "grad_norm": 4.638238430023193, "learning_rate": 2.9218466859382368e-06, "loss": 0.29752445, "memory(GiB)": 34.88, "step": 96715, "train_speed(iter/s)": 0.411865 }, { "acc": 0.92128391, "epoch": 2.6187961985216472, "grad_norm": 16.010631561279297, "learning_rate": 2.921337793281851e-06, "loss": 0.46865182, "memory(GiB)": 34.88, "step": 96720, "train_speed(iter/s)": 0.411866 }, { "acc": 0.9378581, "epoch": 2.6189315788048626, "grad_norm": 6.696362018585205, "learning_rate": 2.920828926671058e-06, "loss": 0.37326865, "memory(GiB)": 34.88, "step": 96725, "train_speed(iter/s)": 0.411867 }, { "acc": 0.91779051, "epoch": 2.6190669590880784, "grad_norm": 6.058075904846191, "learning_rate": 2.9203200861122315e-06, "loss": 0.50343919, "memory(GiB)": 34.88, "step": 96730, "train_speed(iter/s)": 0.411868 }, { "acc": 0.93746071, "epoch": 2.619202339371294, "grad_norm": 7.1189961433410645, "learning_rate": 2.919811271611748e-06, "loss": 0.38127477, "memory(GiB)": 34.88, "step": 96735, "train_speed(iter/s)": 0.411869 }, { "acc": 0.92684822, "epoch": 2.6193377196545096, "grad_norm": 6.086606979370117, "learning_rate": 2.919302483175975e-06, "loss": 0.42178693, "memory(GiB)": 34.88, "step": 96740, "train_speed(iter/s)": 0.41187 }, { "acc": 0.92842178, "epoch": 2.619473099937725, "grad_norm": 8.066620826721191, "learning_rate": 2.9187937208112945e-06, "loss": 0.39751086, "memory(GiB)": 34.88, "step": 96745, "train_speed(iter/s)": 0.411871 }, { "acc": 0.92306585, "epoch": 2.6196084802209407, "grad_norm": 13.96042251586914, "learning_rate": 2.9182849845240722e-06, "loss": 0.39372361, "memory(GiB)": 34.88, "step": 96750, "train_speed(iter/s)": 0.411872 }, { "acc": 0.91769304, "epoch": 2.619743860504156, "grad_norm": 5.793911457061768, "learning_rate": 2.9177762743206866e-06, "loss": 0.46120958, "memory(GiB)": 34.88, "step": 96755, "train_speed(iter/s)": 0.411873 }, { "acc": 0.92251759, "epoch": 2.619879240787372, "grad_norm": 6.899490833282471, "learning_rate": 2.91726759020751e-06, "loss": 0.45888462, "memory(GiB)": 34.88, "step": 96760, "train_speed(iter/s)": 0.411874 }, { "acc": 0.92648296, "epoch": 2.6200146210705872, "grad_norm": 3.250875234603882, "learning_rate": 2.91675893219091e-06, "loss": 0.39746323, "memory(GiB)": 34.88, "step": 96765, "train_speed(iter/s)": 0.411875 }, { "acc": 0.9494113, "epoch": 2.620150001353803, "grad_norm": 5.838701248168945, "learning_rate": 2.9162503002772636e-06, "loss": 0.27172406, "memory(GiB)": 34.88, "step": 96770, "train_speed(iter/s)": 0.411877 }, { "acc": 0.93719625, "epoch": 2.6202853816370184, "grad_norm": 5.497352600097656, "learning_rate": 2.9157416944729368e-06, "loss": 0.28791065, "memory(GiB)": 34.88, "step": 96775, "train_speed(iter/s)": 0.411878 }, { "acc": 0.92137432, "epoch": 2.6204207619202338, "grad_norm": 8.805704116821289, "learning_rate": 2.915233114784308e-06, "loss": 0.43080206, "memory(GiB)": 34.88, "step": 96780, "train_speed(iter/s)": 0.411879 }, { "acc": 0.92784805, "epoch": 2.6205561422034496, "grad_norm": 7.413957595825195, "learning_rate": 2.914724561217743e-06, "loss": 0.40082812, "memory(GiB)": 34.88, "step": 96785, "train_speed(iter/s)": 0.41188 }, { "acc": 0.94445953, "epoch": 2.620691522486665, "grad_norm": 5.776186943054199, "learning_rate": 2.9142160337796125e-06, "loss": 0.35489936, "memory(GiB)": 34.88, "step": 96790, "train_speed(iter/s)": 0.411881 }, { "acc": 0.94377222, "epoch": 2.6208269027698807, "grad_norm": 11.555216789245605, "learning_rate": 2.913707532476291e-06, "loss": 0.31817136, "memory(GiB)": 34.88, "step": 96795, "train_speed(iter/s)": 0.411882 }, { "acc": 0.94890385, "epoch": 2.620962283053096, "grad_norm": 5.102971076965332, "learning_rate": 2.913199057314144e-06, "loss": 0.25116663, "memory(GiB)": 34.88, "step": 96800, "train_speed(iter/s)": 0.411883 }, { "acc": 0.94521351, "epoch": 2.621097663336312, "grad_norm": 9.475431442260742, "learning_rate": 2.912690608299545e-06, "loss": 0.28004117, "memory(GiB)": 34.88, "step": 96805, "train_speed(iter/s)": 0.411884 }, { "acc": 0.93391972, "epoch": 2.6212330436195272, "grad_norm": 4.562957286834717, "learning_rate": 2.912182185438856e-06, "loss": 0.36632333, "memory(GiB)": 34.88, "step": 96810, "train_speed(iter/s)": 0.411886 }, { "acc": 0.92063236, "epoch": 2.6213684239027426, "grad_norm": 5.616861820220947, "learning_rate": 2.9116737887384562e-06, "loss": 0.45474668, "memory(GiB)": 34.88, "step": 96815, "train_speed(iter/s)": 0.411887 }, { "acc": 0.92858, "epoch": 2.6215038041859584, "grad_norm": 8.862866401672363, "learning_rate": 2.9111654182047074e-06, "loss": 0.40950861, "memory(GiB)": 34.88, "step": 96820, "train_speed(iter/s)": 0.411888 }, { "acc": 0.94001484, "epoch": 2.6216391844691738, "grad_norm": 9.585054397583008, "learning_rate": 2.910657073843979e-06, "loss": 0.42710481, "memory(GiB)": 34.88, "step": 96825, "train_speed(iter/s)": 0.411889 }, { "acc": 0.9423171, "epoch": 2.6217745647523896, "grad_norm": 13.579787254333496, "learning_rate": 2.91014875566264e-06, "loss": 0.38919632, "memory(GiB)": 34.88, "step": 96830, "train_speed(iter/s)": 0.41189 }, { "acc": 0.94611654, "epoch": 2.621909945035605, "grad_norm": 11.33716869354248, "learning_rate": 2.90964046366706e-06, "loss": 0.37176285, "memory(GiB)": 34.88, "step": 96835, "train_speed(iter/s)": 0.411891 }, { "acc": 0.93137569, "epoch": 2.6220453253188207, "grad_norm": 16.3848876953125, "learning_rate": 2.909132197863605e-06, "loss": 0.34197297, "memory(GiB)": 34.88, "step": 96840, "train_speed(iter/s)": 0.411892 }, { "acc": 0.93485975, "epoch": 2.622180705602036, "grad_norm": 9.093586921691895, "learning_rate": 2.9086239582586354e-06, "loss": 0.36026912, "memory(GiB)": 34.88, "step": 96845, "train_speed(iter/s)": 0.411893 }, { "acc": 0.90307693, "epoch": 2.6223160858852514, "grad_norm": 7.693897724151611, "learning_rate": 2.9081157448585294e-06, "loss": 0.52569609, "memory(GiB)": 34.88, "step": 96850, "train_speed(iter/s)": 0.411894 }, { "acc": 0.92673054, "epoch": 2.6224514661684672, "grad_norm": 8.441534042358398, "learning_rate": 2.907607557669645e-06, "loss": 0.46732464, "memory(GiB)": 34.88, "step": 96855, "train_speed(iter/s)": 0.411895 }, { "acc": 0.92898426, "epoch": 2.622586846451683, "grad_norm": 12.278043746948242, "learning_rate": 2.907099396698351e-06, "loss": 0.4045094, "memory(GiB)": 34.88, "step": 96860, "train_speed(iter/s)": 0.411896 }, { "acc": 0.94271746, "epoch": 2.6227222267348984, "grad_norm": 9.732850074768066, "learning_rate": 2.9065912619510132e-06, "loss": 0.32994123, "memory(GiB)": 34.88, "step": 96865, "train_speed(iter/s)": 0.411897 }, { "acc": 0.92689524, "epoch": 2.6228576070181138, "grad_norm": 9.674110412597656, "learning_rate": 2.906083153433999e-06, "loss": 0.42619166, "memory(GiB)": 34.88, "step": 96870, "train_speed(iter/s)": 0.411898 }, { "acc": 0.93690224, "epoch": 2.6229929873013296, "grad_norm": 5.593390941619873, "learning_rate": 2.9055750711536696e-06, "loss": 0.35319076, "memory(GiB)": 34.88, "step": 96875, "train_speed(iter/s)": 0.4119 }, { "acc": 0.92429504, "epoch": 2.623128367584545, "grad_norm": 7.068421840667725, "learning_rate": 2.9050670151163906e-06, "loss": 0.390941, "memory(GiB)": 34.88, "step": 96880, "train_speed(iter/s)": 0.4119 }, { "acc": 0.94363089, "epoch": 2.6232637478677603, "grad_norm": 5.093443870544434, "learning_rate": 2.90455898532853e-06, "loss": 0.33724368, "memory(GiB)": 34.88, "step": 96885, "train_speed(iter/s)": 0.411901 }, { "acc": 0.9421792, "epoch": 2.623399128150976, "grad_norm": 8.221338272094727, "learning_rate": 2.9040509817964467e-06, "loss": 0.31534886, "memory(GiB)": 34.88, "step": 96890, "train_speed(iter/s)": 0.411903 }, { "acc": 0.94814434, "epoch": 2.623534508434192, "grad_norm": 6.023845195770264, "learning_rate": 2.9035430045265067e-06, "loss": 0.32928257, "memory(GiB)": 34.88, "step": 96895, "train_speed(iter/s)": 0.411904 }, { "acc": 0.93295364, "epoch": 2.6236698887174073, "grad_norm": 9.273125648498535, "learning_rate": 2.9030350535250744e-06, "loss": 0.40504456, "memory(GiB)": 34.88, "step": 96900, "train_speed(iter/s)": 0.411905 }, { "acc": 0.92759342, "epoch": 2.6238052690006226, "grad_norm": 10.204131126403809, "learning_rate": 2.9025271287985135e-06, "loss": 0.44174085, "memory(GiB)": 34.88, "step": 96905, "train_speed(iter/s)": 0.411906 }, { "acc": 0.92579002, "epoch": 2.6239406492838384, "grad_norm": 4.91494607925415, "learning_rate": 2.9020192303531822e-06, "loss": 0.43201332, "memory(GiB)": 34.88, "step": 96910, "train_speed(iter/s)": 0.411907 }, { "acc": 0.94085464, "epoch": 2.6240760295670538, "grad_norm": 4.687372207641602, "learning_rate": 2.9015113581954486e-06, "loss": 0.29558864, "memory(GiB)": 34.88, "step": 96915, "train_speed(iter/s)": 0.411908 }, { "acc": 0.91147013, "epoch": 2.6242114098502696, "grad_norm": 14.221515655517578, "learning_rate": 2.9010035123316704e-06, "loss": 0.49115372, "memory(GiB)": 34.88, "step": 96920, "train_speed(iter/s)": 0.411909 }, { "acc": 0.92752113, "epoch": 2.624346790133485, "grad_norm": 16.437877655029297, "learning_rate": 2.9004956927682136e-06, "loss": 0.40937777, "memory(GiB)": 34.88, "step": 96925, "train_speed(iter/s)": 0.41191 }, { "acc": 0.9250452, "epoch": 2.6244821704167007, "grad_norm": 8.464347839355469, "learning_rate": 2.899987899511435e-06, "loss": 0.41431441, "memory(GiB)": 34.88, "step": 96930, "train_speed(iter/s)": 0.411911 }, { "acc": 0.936096, "epoch": 2.624617550699916, "grad_norm": 8.921430587768555, "learning_rate": 2.8994801325676987e-06, "loss": 0.36968985, "memory(GiB)": 34.88, "step": 96935, "train_speed(iter/s)": 0.411912 }, { "acc": 0.92849455, "epoch": 2.6247529309831314, "grad_norm": 18.21858024597168, "learning_rate": 2.8989723919433664e-06, "loss": 0.41055851, "memory(GiB)": 34.88, "step": 96940, "train_speed(iter/s)": 0.411913 }, { "acc": 0.92981415, "epoch": 2.6248883112663473, "grad_norm": 7.0784687995910645, "learning_rate": 2.8984646776447945e-06, "loss": 0.40934472, "memory(GiB)": 34.88, "step": 96945, "train_speed(iter/s)": 0.411914 }, { "acc": 0.93945274, "epoch": 2.6250236915495626, "grad_norm": 16.830997467041016, "learning_rate": 2.8979569896783457e-06, "loss": 0.37206314, "memory(GiB)": 34.88, "step": 96950, "train_speed(iter/s)": 0.411915 }, { "acc": 0.93626871, "epoch": 2.6251590718327784, "grad_norm": 9.116050720214844, "learning_rate": 2.8974493280503796e-06, "loss": 0.41524944, "memory(GiB)": 34.88, "step": 96955, "train_speed(iter/s)": 0.411916 }, { "acc": 0.94135256, "epoch": 2.6252944521159938, "grad_norm": 2.993882894515991, "learning_rate": 2.8969416927672583e-06, "loss": 0.30057268, "memory(GiB)": 34.88, "step": 96960, "train_speed(iter/s)": 0.411917 }, { "acc": 0.94053745, "epoch": 2.6254298323992096, "grad_norm": 7.815917015075684, "learning_rate": 2.8964340838353354e-06, "loss": 0.34524817, "memory(GiB)": 34.88, "step": 96965, "train_speed(iter/s)": 0.411918 }, { "acc": 0.91990118, "epoch": 2.625565212682425, "grad_norm": 7.671247482299805, "learning_rate": 2.895926501260973e-06, "loss": 0.53457651, "memory(GiB)": 34.88, "step": 96970, "train_speed(iter/s)": 0.411919 }, { "acc": 0.92520161, "epoch": 2.6257005929656403, "grad_norm": 8.79035472869873, "learning_rate": 2.895418945050531e-06, "loss": 0.37696562, "memory(GiB)": 34.88, "step": 96975, "train_speed(iter/s)": 0.41192 }, { "acc": 0.94036541, "epoch": 2.625835973248856, "grad_norm": 5.964370250701904, "learning_rate": 2.8949114152103636e-06, "loss": 0.31885178, "memory(GiB)": 34.88, "step": 96980, "train_speed(iter/s)": 0.411922 }, { "acc": 0.93702335, "epoch": 2.6259713535320715, "grad_norm": 6.255256175994873, "learning_rate": 2.8944039117468307e-06, "loss": 0.37990482, "memory(GiB)": 34.88, "step": 96985, "train_speed(iter/s)": 0.411923 }, { "acc": 0.93445911, "epoch": 2.6261067338152873, "grad_norm": 5.510097026824951, "learning_rate": 2.89389643466629e-06, "loss": 0.35002038, "memory(GiB)": 34.88, "step": 96990, "train_speed(iter/s)": 0.411924 }, { "acc": 0.94603481, "epoch": 2.6262421140985026, "grad_norm": 6.852492332458496, "learning_rate": 2.8933889839751006e-06, "loss": 0.30438161, "memory(GiB)": 34.88, "step": 96995, "train_speed(iter/s)": 0.411925 }, { "acc": 0.94647512, "epoch": 2.6263774943817184, "grad_norm": 11.99824333190918, "learning_rate": 2.8928815596796143e-06, "loss": 0.28612523, "memory(GiB)": 34.88, "step": 97000, "train_speed(iter/s)": 0.411926 }, { "acc": 0.9218317, "epoch": 2.6265128746649338, "grad_norm": 10.20541000366211, "learning_rate": 2.8923741617861912e-06, "loss": 0.47388215, "memory(GiB)": 34.88, "step": 97005, "train_speed(iter/s)": 0.411927 }, { "acc": 0.9286377, "epoch": 2.626648254948149, "grad_norm": 10.28471565246582, "learning_rate": 2.8918667903011886e-06, "loss": 0.47104187, "memory(GiB)": 34.88, "step": 97010, "train_speed(iter/s)": 0.411928 }, { "acc": 0.91105947, "epoch": 2.626783635231365, "grad_norm": 7.358028411865234, "learning_rate": 2.8913594452309585e-06, "loss": 0.56405864, "memory(GiB)": 34.88, "step": 97015, "train_speed(iter/s)": 0.411929 }, { "acc": 0.92613983, "epoch": 2.6269190155145803, "grad_norm": 8.522534370422363, "learning_rate": 2.8908521265818586e-06, "loss": 0.41930227, "memory(GiB)": 34.88, "step": 97020, "train_speed(iter/s)": 0.411931 }, { "acc": 0.93680649, "epoch": 2.627054395797796, "grad_norm": 5.480799198150635, "learning_rate": 2.8903448343602426e-06, "loss": 0.37920444, "memory(GiB)": 34.88, "step": 97025, "train_speed(iter/s)": 0.411932 }, { "acc": 0.95273361, "epoch": 2.6271897760810115, "grad_norm": 10.524025917053223, "learning_rate": 2.8898375685724695e-06, "loss": 0.249633, "memory(GiB)": 34.88, "step": 97030, "train_speed(iter/s)": 0.411933 }, { "acc": 0.93932381, "epoch": 2.6273251563642273, "grad_norm": 14.687289237976074, "learning_rate": 2.8893303292248888e-06, "loss": 0.3605803, "memory(GiB)": 34.88, "step": 97035, "train_speed(iter/s)": 0.411934 }, { "acc": 0.94757061, "epoch": 2.6274605366474426, "grad_norm": 9.930495262145996, "learning_rate": 2.8888231163238556e-06, "loss": 0.2569104, "memory(GiB)": 34.88, "step": 97040, "train_speed(iter/s)": 0.411935 }, { "acc": 0.92764053, "epoch": 2.627595916930658, "grad_norm": 15.058069229125977, "learning_rate": 2.888315929875726e-06, "loss": 0.46896267, "memory(GiB)": 34.88, "step": 97045, "train_speed(iter/s)": 0.411936 }, { "acc": 0.92790499, "epoch": 2.6277312972138738, "grad_norm": 8.573578834533691, "learning_rate": 2.8878087698868533e-06, "loss": 0.35254197, "memory(GiB)": 34.88, "step": 97050, "train_speed(iter/s)": 0.411937 }, { "acc": 0.92583313, "epoch": 2.6278666774970896, "grad_norm": 7.530969619750977, "learning_rate": 2.8873016363635888e-06, "loss": 0.34684613, "memory(GiB)": 34.88, "step": 97055, "train_speed(iter/s)": 0.411938 }, { "acc": 0.92228088, "epoch": 2.628002057780305, "grad_norm": 7.417958736419678, "learning_rate": 2.886794529312285e-06, "loss": 0.49115124, "memory(GiB)": 34.88, "step": 97060, "train_speed(iter/s)": 0.411939 }, { "acc": 0.93354588, "epoch": 2.6281374380635203, "grad_norm": 12.909228324890137, "learning_rate": 2.886287448739299e-06, "loss": 0.41616607, "memory(GiB)": 34.88, "step": 97065, "train_speed(iter/s)": 0.41194 }, { "acc": 0.93003998, "epoch": 2.628272818346736, "grad_norm": 10.537548065185547, "learning_rate": 2.885780394650977e-06, "loss": 0.41103868, "memory(GiB)": 34.88, "step": 97070, "train_speed(iter/s)": 0.411941 }, { "acc": 0.93006306, "epoch": 2.6284081986299515, "grad_norm": 9.290119171142578, "learning_rate": 2.8852733670536733e-06, "loss": 0.43436794, "memory(GiB)": 34.88, "step": 97075, "train_speed(iter/s)": 0.411942 }, { "acc": 0.94938297, "epoch": 2.628543578913167, "grad_norm": 9.380064964294434, "learning_rate": 2.88476636595374e-06, "loss": 0.24091563, "memory(GiB)": 34.88, "step": 97080, "train_speed(iter/s)": 0.411943 }, { "acc": 0.93592625, "epoch": 2.6286789591963826, "grad_norm": 6.908747673034668, "learning_rate": 2.8842593913575295e-06, "loss": 0.42047834, "memory(GiB)": 34.88, "step": 97085, "train_speed(iter/s)": 0.411945 }, { "acc": 0.93216572, "epoch": 2.6288143394795984, "grad_norm": 6.1495466232299805, "learning_rate": 2.8837524432713883e-06, "loss": 0.40437393, "memory(GiB)": 34.88, "step": 97090, "train_speed(iter/s)": 0.411946 }, { "acc": 0.94663725, "epoch": 2.6289497197628138, "grad_norm": 10.247307777404785, "learning_rate": 2.8832455217016707e-06, "loss": 0.2669229, "memory(GiB)": 34.88, "step": 97095, "train_speed(iter/s)": 0.411947 }, { "acc": 0.92769737, "epoch": 2.629085100046029, "grad_norm": 10.510005950927734, "learning_rate": 2.882738626654728e-06, "loss": 0.4433135, "memory(GiB)": 34.88, "step": 97100, "train_speed(iter/s)": 0.411948 }, { "acc": 0.92459011, "epoch": 2.629220480329245, "grad_norm": 9.470245361328125, "learning_rate": 2.8822317581369054e-06, "loss": 0.41638427, "memory(GiB)": 34.88, "step": 97105, "train_speed(iter/s)": 0.411949 }, { "acc": 0.93448124, "epoch": 2.6293558606124603, "grad_norm": 14.202467918395996, "learning_rate": 2.8817249161545548e-06, "loss": 0.34749105, "memory(GiB)": 34.88, "step": 97110, "train_speed(iter/s)": 0.41195 }, { "acc": 0.93023357, "epoch": 2.629491240895676, "grad_norm": 10.222047805786133, "learning_rate": 2.8812181007140255e-06, "loss": 0.36555352, "memory(GiB)": 34.88, "step": 97115, "train_speed(iter/s)": 0.411951 }, { "acc": 0.94079199, "epoch": 2.6296266211788915, "grad_norm": 7.812453746795654, "learning_rate": 2.8807113118216697e-06, "loss": 0.30127769, "memory(GiB)": 34.88, "step": 97120, "train_speed(iter/s)": 0.411953 }, { "acc": 0.93155403, "epoch": 2.6297620014621073, "grad_norm": 9.292610168457031, "learning_rate": 2.880204549483827e-06, "loss": 0.37711375, "memory(GiB)": 34.88, "step": 97125, "train_speed(iter/s)": 0.411954 }, { "acc": 0.93215561, "epoch": 2.6298973817453226, "grad_norm": 11.463278770446777, "learning_rate": 2.8796978137068547e-06, "loss": 0.41522961, "memory(GiB)": 34.88, "step": 97130, "train_speed(iter/s)": 0.411955 }, { "acc": 0.93044052, "epoch": 2.630032762028538, "grad_norm": 11.0161714553833, "learning_rate": 2.8791911044970993e-06, "loss": 0.45507932, "memory(GiB)": 34.88, "step": 97135, "train_speed(iter/s)": 0.411956 }, { "acc": 0.94371471, "epoch": 2.6301681423117538, "grad_norm": 3.9499940872192383, "learning_rate": 2.878684421860902e-06, "loss": 0.32176642, "memory(GiB)": 34.88, "step": 97140, "train_speed(iter/s)": 0.411957 }, { "acc": 0.93968287, "epoch": 2.630303522594969, "grad_norm": 3.964942455291748, "learning_rate": 2.8781777658046156e-06, "loss": 0.3326087, "memory(GiB)": 34.88, "step": 97145, "train_speed(iter/s)": 0.411958 }, { "acc": 0.94222183, "epoch": 2.630438902878185, "grad_norm": 7.622219562530518, "learning_rate": 2.8776711363345855e-06, "loss": 0.31669745, "memory(GiB)": 34.88, "step": 97150, "train_speed(iter/s)": 0.411959 }, { "acc": 0.93755264, "epoch": 2.6305742831614003, "grad_norm": 8.8268404006958, "learning_rate": 2.8771645334571595e-06, "loss": 0.38357215, "memory(GiB)": 34.88, "step": 97155, "train_speed(iter/s)": 0.41196 }, { "acc": 0.92210503, "epoch": 2.630709663444616, "grad_norm": 8.017138481140137, "learning_rate": 2.876657957178679e-06, "loss": 0.39423184, "memory(GiB)": 34.88, "step": 97160, "train_speed(iter/s)": 0.411961 }, { "acc": 0.92060127, "epoch": 2.6308450437278315, "grad_norm": 9.037199974060059, "learning_rate": 2.8761514075054975e-06, "loss": 0.51099329, "memory(GiB)": 34.88, "step": 97165, "train_speed(iter/s)": 0.411962 }, { "acc": 0.92376881, "epoch": 2.630980424011047, "grad_norm": 15.446318626403809, "learning_rate": 2.8756448844439543e-06, "loss": 0.50726776, "memory(GiB)": 34.88, "step": 97170, "train_speed(iter/s)": 0.411963 }, { "acc": 0.92912464, "epoch": 2.6311158042942626, "grad_norm": 5.404362201690674, "learning_rate": 2.8751383880003987e-06, "loss": 0.40569968, "memory(GiB)": 34.88, "step": 97175, "train_speed(iter/s)": 0.411965 }, { "acc": 0.92777901, "epoch": 2.631251184577478, "grad_norm": 13.329048156738281, "learning_rate": 2.874631918181172e-06, "loss": 0.40151525, "memory(GiB)": 34.88, "step": 97180, "train_speed(iter/s)": 0.411966 }, { "acc": 0.93605938, "epoch": 2.6313865648606938, "grad_norm": 7.184181213378906, "learning_rate": 2.8741254749926197e-06, "loss": 0.34037533, "memory(GiB)": 34.88, "step": 97185, "train_speed(iter/s)": 0.411967 }, { "acc": 0.93002396, "epoch": 2.631521945143909, "grad_norm": 9.843202590942383, "learning_rate": 2.873619058441089e-06, "loss": 0.37529798, "memory(GiB)": 34.88, "step": 97190, "train_speed(iter/s)": 0.411968 }, { "acc": 0.92523508, "epoch": 2.631657325427125, "grad_norm": 12.225334167480469, "learning_rate": 2.8731126685329174e-06, "loss": 0.42895517, "memory(GiB)": 34.88, "step": 97195, "train_speed(iter/s)": 0.411969 }, { "acc": 0.92908306, "epoch": 2.6317927057103403, "grad_norm": 4.351566314697266, "learning_rate": 2.872606305274457e-06, "loss": 0.47260518, "memory(GiB)": 34.88, "step": 97200, "train_speed(iter/s)": 0.41197 }, { "acc": 0.92994766, "epoch": 2.6319280859935557, "grad_norm": 15.017585754394531, "learning_rate": 2.872099968672043e-06, "loss": 0.42038226, "memory(GiB)": 34.88, "step": 97205, "train_speed(iter/s)": 0.411971 }, { "acc": 0.95257921, "epoch": 2.6320634662767715, "grad_norm": 8.14224910736084, "learning_rate": 2.8715936587320248e-06, "loss": 0.25581818, "memory(GiB)": 34.88, "step": 97210, "train_speed(iter/s)": 0.411972 }, { "acc": 0.93083706, "epoch": 2.6321988465599873, "grad_norm": 4.970864295959473, "learning_rate": 2.8710873754607397e-06, "loss": 0.34572282, "memory(GiB)": 34.88, "step": 97215, "train_speed(iter/s)": 0.411974 }, { "acc": 0.94197435, "epoch": 2.6323342268432026, "grad_norm": 7.650038242340088, "learning_rate": 2.8705811188645322e-06, "loss": 0.32146249, "memory(GiB)": 34.88, "step": 97220, "train_speed(iter/s)": 0.411975 }, { "acc": 0.93664379, "epoch": 2.632469607126418, "grad_norm": 9.089903831481934, "learning_rate": 2.8700748889497452e-06, "loss": 0.3010392, "memory(GiB)": 34.88, "step": 97225, "train_speed(iter/s)": 0.411976 }, { "acc": 0.93187046, "epoch": 2.632604987409634, "grad_norm": 9.523425102233887, "learning_rate": 2.8695686857227154e-06, "loss": 0.37128263, "memory(GiB)": 34.88, "step": 97230, "train_speed(iter/s)": 0.411976 }, { "acc": 0.93618937, "epoch": 2.632740367692849, "grad_norm": 4.791240692138672, "learning_rate": 2.8690625091897917e-06, "loss": 0.37945032, "memory(GiB)": 34.88, "step": 97235, "train_speed(iter/s)": 0.411977 }, { "acc": 0.93416691, "epoch": 2.6328757479760645, "grad_norm": 5.915002346038818, "learning_rate": 2.868556359357309e-06, "loss": 0.34116368, "memory(GiB)": 34.88, "step": 97240, "train_speed(iter/s)": 0.411979 }, { "acc": 0.94174576, "epoch": 2.6330111282592803, "grad_norm": 8.033256530761719, "learning_rate": 2.8680502362316114e-06, "loss": 0.3446753, "memory(GiB)": 34.88, "step": 97245, "train_speed(iter/s)": 0.41198 }, { "acc": 0.92049446, "epoch": 2.633146508542496, "grad_norm": 4.901086330413818, "learning_rate": 2.8675441398190336e-06, "loss": 0.48317647, "memory(GiB)": 34.88, "step": 97250, "train_speed(iter/s)": 0.411981 }, { "acc": 0.92900915, "epoch": 2.6332818888257115, "grad_norm": 8.509257316589355, "learning_rate": 2.8670380701259234e-06, "loss": 0.40688043, "memory(GiB)": 34.88, "step": 97255, "train_speed(iter/s)": 0.411982 }, { "acc": 0.91812639, "epoch": 2.633417269108927, "grad_norm": 7.801698684692383, "learning_rate": 2.866532027158615e-06, "loss": 0.52817268, "memory(GiB)": 34.88, "step": 97260, "train_speed(iter/s)": 0.411983 }, { "acc": 0.92811661, "epoch": 2.6335526493921426, "grad_norm": 13.96173095703125, "learning_rate": 2.8660260109234458e-06, "loss": 0.44965382, "memory(GiB)": 34.88, "step": 97265, "train_speed(iter/s)": 0.411984 }, { "acc": 0.91532917, "epoch": 2.633688029675358, "grad_norm": 11.527619361877441, "learning_rate": 2.865520021426761e-06, "loss": 0.52333231, "memory(GiB)": 34.88, "step": 97270, "train_speed(iter/s)": 0.411985 }, { "acc": 0.91681042, "epoch": 2.633823409958574, "grad_norm": 9.171356201171875, "learning_rate": 2.865014058674894e-06, "loss": 0.43497095, "memory(GiB)": 34.88, "step": 97275, "train_speed(iter/s)": 0.411986 }, { "acc": 0.93344669, "epoch": 2.633958790241789, "grad_norm": 11.614255905151367, "learning_rate": 2.864508122674186e-06, "loss": 0.36136594, "memory(GiB)": 34.88, "step": 97280, "train_speed(iter/s)": 0.411987 }, { "acc": 0.95362244, "epoch": 2.634094170525005, "grad_norm": 7.080649375915527, "learning_rate": 2.86400221343097e-06, "loss": 0.25170233, "memory(GiB)": 34.88, "step": 97285, "train_speed(iter/s)": 0.411988 }, { "acc": 0.92459793, "epoch": 2.6342295508082203, "grad_norm": 11.78992748260498, "learning_rate": 2.8634963309515905e-06, "loss": 0.42875371, "memory(GiB)": 34.88, "step": 97290, "train_speed(iter/s)": 0.411989 }, { "acc": 0.94175549, "epoch": 2.6343649310914357, "grad_norm": 4.229975700378418, "learning_rate": 2.8629904752423804e-06, "loss": 0.3249732, "memory(GiB)": 34.88, "step": 97295, "train_speed(iter/s)": 0.41199 }, { "acc": 0.92213125, "epoch": 2.6345003113746515, "grad_norm": 13.398375511169434, "learning_rate": 2.862484646309678e-06, "loss": 0.4481575, "memory(GiB)": 34.88, "step": 97300, "train_speed(iter/s)": 0.411991 }, { "acc": 0.95000658, "epoch": 2.634635691657867, "grad_norm": 3.722076416015625, "learning_rate": 2.861978844159818e-06, "loss": 0.30374837, "memory(GiB)": 34.88, "step": 97305, "train_speed(iter/s)": 0.411992 }, { "acc": 0.93772755, "epoch": 2.6347710719410826, "grad_norm": 7.250795364379883, "learning_rate": 2.8614730687991372e-06, "loss": 0.35141993, "memory(GiB)": 34.88, "step": 97310, "train_speed(iter/s)": 0.411993 }, { "acc": 0.93524017, "epoch": 2.634906452224298, "grad_norm": 10.254776000976562, "learning_rate": 2.8609673202339745e-06, "loss": 0.32643266, "memory(GiB)": 34.88, "step": 97315, "train_speed(iter/s)": 0.411994 }, { "acc": 0.92058067, "epoch": 2.635041832507514, "grad_norm": 9.961413383483887, "learning_rate": 2.8604615984706575e-06, "loss": 0.48182888, "memory(GiB)": 34.88, "step": 97320, "train_speed(iter/s)": 0.411996 }, { "acc": 0.93475704, "epoch": 2.635177212790729, "grad_norm": 6.588455677032471, "learning_rate": 2.8599559035155317e-06, "loss": 0.39299324, "memory(GiB)": 34.88, "step": 97325, "train_speed(iter/s)": 0.411997 }, { "acc": 0.91656342, "epoch": 2.6353125930739445, "grad_norm": 6.784852504730225, "learning_rate": 2.859450235374924e-06, "loss": 0.51964073, "memory(GiB)": 34.88, "step": 97330, "train_speed(iter/s)": 0.411998 }, { "acc": 0.92035522, "epoch": 2.6354479733571603, "grad_norm": 8.83735179901123, "learning_rate": 2.8589445940551745e-06, "loss": 0.45717902, "memory(GiB)": 34.88, "step": 97335, "train_speed(iter/s)": 0.411999 }, { "acc": 0.95579367, "epoch": 2.6355833536403757, "grad_norm": 6.957851886749268, "learning_rate": 2.858438979562612e-06, "loss": 0.24602065, "memory(GiB)": 34.88, "step": 97340, "train_speed(iter/s)": 0.412 }, { "acc": 0.92612429, "epoch": 2.6357187339235915, "grad_norm": 8.707324981689453, "learning_rate": 2.8579333919035735e-06, "loss": 0.46261911, "memory(GiB)": 34.88, "step": 97345, "train_speed(iter/s)": 0.412001 }, { "acc": 0.94045334, "epoch": 2.635854114206807, "grad_norm": 5.959807395935059, "learning_rate": 2.8574278310843935e-06, "loss": 0.31204777, "memory(GiB)": 34.88, "step": 97350, "train_speed(iter/s)": 0.412002 }, { "acc": 0.92633648, "epoch": 2.6359894944900226, "grad_norm": 5.261493682861328, "learning_rate": 2.8569222971114e-06, "loss": 0.43439813, "memory(GiB)": 34.88, "step": 97355, "train_speed(iter/s)": 0.412003 }, { "acc": 0.92343254, "epoch": 2.636124874773238, "grad_norm": 10.122223854064941, "learning_rate": 2.8564167899909334e-06, "loss": 0.41549888, "memory(GiB)": 34.88, "step": 97360, "train_speed(iter/s)": 0.412004 }, { "acc": 0.92249317, "epoch": 2.6362602550564533, "grad_norm": 19.096158981323242, "learning_rate": 2.8559113097293202e-06, "loss": 0.4725666, "memory(GiB)": 34.88, "step": 97365, "train_speed(iter/s)": 0.412005 }, { "acc": 0.93999577, "epoch": 2.636395635339669, "grad_norm": 6.107336044311523, "learning_rate": 2.8554058563328953e-06, "loss": 0.31137609, "memory(GiB)": 34.88, "step": 97370, "train_speed(iter/s)": 0.412007 }, { "acc": 0.92868118, "epoch": 2.636531015622885, "grad_norm": 5.079596042633057, "learning_rate": 2.854900429807987e-06, "loss": 0.38097446, "memory(GiB)": 34.88, "step": 97375, "train_speed(iter/s)": 0.412008 }, { "acc": 0.942243, "epoch": 2.6366663959061003, "grad_norm": 4.544261932373047, "learning_rate": 2.8543950301609326e-06, "loss": 0.28681257, "memory(GiB)": 34.88, "step": 97380, "train_speed(iter/s)": 0.412009 }, { "acc": 0.92932091, "epoch": 2.6368017761893157, "grad_norm": 7.219157695770264, "learning_rate": 2.85388965739806e-06, "loss": 0.41837578, "memory(GiB)": 34.88, "step": 97385, "train_speed(iter/s)": 0.41201 }, { "acc": 0.92493801, "epoch": 2.6369371564725315, "grad_norm": 6.3009514808654785, "learning_rate": 2.8533843115256966e-06, "loss": 0.43158441, "memory(GiB)": 34.88, "step": 97390, "train_speed(iter/s)": 0.412011 }, { "acc": 0.94855595, "epoch": 2.637072536755747, "grad_norm": 5.462525844573975, "learning_rate": 2.85287899255018e-06, "loss": 0.25936127, "memory(GiB)": 34.88, "step": 97395, "train_speed(iter/s)": 0.412012 }, { "acc": 0.92127151, "epoch": 2.637207917038962, "grad_norm": 9.281203269958496, "learning_rate": 2.8523737004778345e-06, "loss": 0.44026299, "memory(GiB)": 34.88, "step": 97400, "train_speed(iter/s)": 0.412013 }, { "acc": 0.94564304, "epoch": 2.637343297322178, "grad_norm": 7.719045162200928, "learning_rate": 2.851868435314994e-06, "loss": 0.26719141, "memory(GiB)": 34.88, "step": 97405, "train_speed(iter/s)": 0.412014 }, { "acc": 0.93785496, "epoch": 2.637478677605394, "grad_norm": 14.060361862182617, "learning_rate": 2.851363197067982e-06, "loss": 0.41381054, "memory(GiB)": 34.88, "step": 97410, "train_speed(iter/s)": 0.412015 }, { "acc": 0.9196888, "epoch": 2.637614057888609, "grad_norm": 5.890594005584717, "learning_rate": 2.850857985743135e-06, "loss": 0.48264313, "memory(GiB)": 34.88, "step": 97415, "train_speed(iter/s)": 0.412016 }, { "acc": 0.94634628, "epoch": 2.6377494381718245, "grad_norm": 9.92967700958252, "learning_rate": 2.8503528013467775e-06, "loss": 0.29865527, "memory(GiB)": 34.88, "step": 97420, "train_speed(iter/s)": 0.412017 }, { "acc": 0.93256378, "epoch": 2.6378848184550403, "grad_norm": 4.919096946716309, "learning_rate": 2.8498476438852375e-06, "loss": 0.38103092, "memory(GiB)": 34.88, "step": 97425, "train_speed(iter/s)": 0.412018 }, { "acc": 0.93960447, "epoch": 2.6380201987382557, "grad_norm": 9.93353271484375, "learning_rate": 2.849342513364847e-06, "loss": 0.37338769, "memory(GiB)": 34.88, "step": 97430, "train_speed(iter/s)": 0.412019 }, { "acc": 0.93907528, "epoch": 2.6381555790214715, "grad_norm": 6.1667890548706055, "learning_rate": 2.8488374097919286e-06, "loss": 0.35658879, "memory(GiB)": 34.88, "step": 97435, "train_speed(iter/s)": 0.41202 }, { "acc": 0.92718897, "epoch": 2.638290959304687, "grad_norm": 3.468268871307373, "learning_rate": 2.848332333172814e-06, "loss": 0.45074883, "memory(GiB)": 34.88, "step": 97440, "train_speed(iter/s)": 0.412022 }, { "acc": 0.92194462, "epoch": 2.6384263395879026, "grad_norm": 8.435422897338867, "learning_rate": 2.847827283513825e-06, "loss": 0.41858759, "memory(GiB)": 34.88, "step": 97445, "train_speed(iter/s)": 0.412022 }, { "acc": 0.91551971, "epoch": 2.638561719871118, "grad_norm": 7.572701930999756, "learning_rate": 2.847322260821295e-06, "loss": 0.5176342, "memory(GiB)": 34.88, "step": 97450, "train_speed(iter/s)": 0.412023 }, { "acc": 0.94306536, "epoch": 2.6386971001543333, "grad_norm": 3.348695993423462, "learning_rate": 2.846817265101546e-06, "loss": 0.28777399, "memory(GiB)": 34.88, "step": 97455, "train_speed(iter/s)": 0.412024 }, { "acc": 0.91581211, "epoch": 2.638832480437549, "grad_norm": 17.867141723632812, "learning_rate": 2.8463122963609054e-06, "loss": 0.47566175, "memory(GiB)": 34.88, "step": 97460, "train_speed(iter/s)": 0.412026 }, { "acc": 0.93716755, "epoch": 2.6389678607207645, "grad_norm": 27.68750762939453, "learning_rate": 2.8458073546056973e-06, "loss": 0.35492759, "memory(GiB)": 34.88, "step": 97465, "train_speed(iter/s)": 0.412026 }, { "acc": 0.91507988, "epoch": 2.6391032410039803, "grad_norm": 3.9111931324005127, "learning_rate": 2.845302439842252e-06, "loss": 0.44380946, "memory(GiB)": 34.88, "step": 97470, "train_speed(iter/s)": 0.412028 }, { "acc": 0.92783985, "epoch": 2.6392386212871957, "grad_norm": 9.516226768493652, "learning_rate": 2.84479755207689e-06, "loss": 0.41299882, "memory(GiB)": 34.88, "step": 97475, "train_speed(iter/s)": 0.412029 }, { "acc": 0.93026304, "epoch": 2.6393740015704115, "grad_norm": 7.5422821044921875, "learning_rate": 2.8442926913159334e-06, "loss": 0.42960157, "memory(GiB)": 34.88, "step": 97480, "train_speed(iter/s)": 0.412029 }, { "acc": 0.93488598, "epoch": 2.639509381853627, "grad_norm": 12.059632301330566, "learning_rate": 2.843787857565715e-06, "loss": 0.37131758, "memory(GiB)": 34.88, "step": 97485, "train_speed(iter/s)": 0.412031 }, { "acc": 0.93530483, "epoch": 2.639644762136842, "grad_norm": 29.624862670898438, "learning_rate": 2.843283050832552e-06, "loss": 0.33386748, "memory(GiB)": 34.88, "step": 97490, "train_speed(iter/s)": 0.412032 }, { "acc": 0.92298889, "epoch": 2.639780142420058, "grad_norm": 9.205480575561523, "learning_rate": 2.8427782711227696e-06, "loss": 0.43492813, "memory(GiB)": 34.88, "step": 97495, "train_speed(iter/s)": 0.412033 }, { "acc": 0.93288784, "epoch": 2.6399155227032733, "grad_norm": 7.163215637207031, "learning_rate": 2.8422735184426926e-06, "loss": 0.3730799, "memory(GiB)": 34.88, "step": 97500, "train_speed(iter/s)": 0.412034 }, { "acc": 0.9489933, "epoch": 2.640050902986489, "grad_norm": 9.42663288116455, "learning_rate": 2.8417687927986444e-06, "loss": 0.28021867, "memory(GiB)": 34.88, "step": 97505, "train_speed(iter/s)": 0.412035 }, { "acc": 0.93203564, "epoch": 2.6401862832697045, "grad_norm": 9.265583992004395, "learning_rate": 2.841264094196947e-06, "loss": 0.42926111, "memory(GiB)": 34.88, "step": 97510, "train_speed(iter/s)": 0.412036 }, { "acc": 0.94027672, "epoch": 2.6403216635529203, "grad_norm": 14.066831588745117, "learning_rate": 2.8407594226439174e-06, "loss": 0.3295722, "memory(GiB)": 34.88, "step": 97515, "train_speed(iter/s)": 0.412037 }, { "acc": 0.93478146, "epoch": 2.6404570438361357, "grad_norm": 5.840423583984375, "learning_rate": 2.8402547781458872e-06, "loss": 0.36452396, "memory(GiB)": 34.88, "step": 97520, "train_speed(iter/s)": 0.412038 }, { "acc": 0.91216545, "epoch": 2.640592424119351, "grad_norm": 8.866615295410156, "learning_rate": 2.8397501607091702e-06, "loss": 0.50957808, "memory(GiB)": 34.88, "step": 97525, "train_speed(iter/s)": 0.412039 }, { "acc": 0.93222027, "epoch": 2.640727804402567, "grad_norm": 4.485856056213379, "learning_rate": 2.8392455703400924e-06, "loss": 0.37063184, "memory(GiB)": 34.88, "step": 97530, "train_speed(iter/s)": 0.41204 }, { "acc": 0.91298046, "epoch": 2.6408631846857826, "grad_norm": 6.222318649291992, "learning_rate": 2.838741007044972e-06, "loss": 0.44945645, "memory(GiB)": 34.88, "step": 97535, "train_speed(iter/s)": 0.412042 }, { "acc": 0.94372559, "epoch": 2.640998564968998, "grad_norm": 6.786581993103027, "learning_rate": 2.8382364708301335e-06, "loss": 0.30032587, "memory(GiB)": 34.88, "step": 97540, "train_speed(iter/s)": 0.412043 }, { "acc": 0.92289276, "epoch": 2.6411339452522133, "grad_norm": 5.3757452964782715, "learning_rate": 2.8377319617018923e-06, "loss": 0.42474475, "memory(GiB)": 34.88, "step": 97545, "train_speed(iter/s)": 0.412044 }, { "acc": 0.92774124, "epoch": 2.641269325535429, "grad_norm": 4.998399257659912, "learning_rate": 2.8372274796665713e-06, "loss": 0.3825263, "memory(GiB)": 34.88, "step": 97550, "train_speed(iter/s)": 0.412045 }, { "acc": 0.93144083, "epoch": 2.6414047058186445, "grad_norm": 14.635122299194336, "learning_rate": 2.83672302473049e-06, "loss": 0.37201676, "memory(GiB)": 34.88, "step": 97555, "train_speed(iter/s)": 0.412046 }, { "acc": 0.92570066, "epoch": 2.64154008610186, "grad_norm": 15.779740333557129, "learning_rate": 2.8362185968999663e-06, "loss": 0.43402042, "memory(GiB)": 34.88, "step": 97560, "train_speed(iter/s)": 0.412047 }, { "acc": 0.93056374, "epoch": 2.6416754663850757, "grad_norm": 5.392148017883301, "learning_rate": 2.8357141961813193e-06, "loss": 0.41245937, "memory(GiB)": 34.88, "step": 97565, "train_speed(iter/s)": 0.412048 }, { "acc": 0.9165452, "epoch": 2.6418108466682915, "grad_norm": 12.399242401123047, "learning_rate": 2.8352098225808688e-06, "loss": 0.41804457, "memory(GiB)": 34.88, "step": 97570, "train_speed(iter/s)": 0.412049 }, { "acc": 0.93454704, "epoch": 2.641946226951507, "grad_norm": 6.0582990646362305, "learning_rate": 2.8347054761049344e-06, "loss": 0.36020429, "memory(GiB)": 34.88, "step": 97575, "train_speed(iter/s)": 0.41205 }, { "acc": 0.91191587, "epoch": 2.642081607234722, "grad_norm": 9.206305503845215, "learning_rate": 2.83420115675983e-06, "loss": 0.4669601, "memory(GiB)": 34.88, "step": 97580, "train_speed(iter/s)": 0.412051 }, { "acc": 0.92093287, "epoch": 2.642216987517938, "grad_norm": 11.705904006958008, "learning_rate": 2.8336968645518747e-06, "loss": 0.45614309, "memory(GiB)": 34.88, "step": 97585, "train_speed(iter/s)": 0.412052 }, { "acc": 0.93061972, "epoch": 2.6423523678011533, "grad_norm": 12.356191635131836, "learning_rate": 2.8331925994873876e-06, "loss": 0.47015562, "memory(GiB)": 34.88, "step": 97590, "train_speed(iter/s)": 0.412053 }, { "acc": 0.94204235, "epoch": 2.642487748084369, "grad_norm": 12.509061813354492, "learning_rate": 2.832688361572686e-06, "loss": 0.39896021, "memory(GiB)": 34.88, "step": 97595, "train_speed(iter/s)": 0.412054 }, { "acc": 0.93173199, "epoch": 2.6426231283675845, "grad_norm": 7.697322845458984, "learning_rate": 2.832184150814083e-06, "loss": 0.32732363, "memory(GiB)": 34.88, "step": 97600, "train_speed(iter/s)": 0.412055 }, { "acc": 0.92532883, "epoch": 2.6427585086508003, "grad_norm": 15.04218864440918, "learning_rate": 2.831679967217897e-06, "loss": 0.45222931, "memory(GiB)": 34.88, "step": 97605, "train_speed(iter/s)": 0.412056 }, { "acc": 0.92688961, "epoch": 2.6428938889340157, "grad_norm": 5.774813652038574, "learning_rate": 2.8311758107904446e-06, "loss": 0.43186426, "memory(GiB)": 34.88, "step": 97610, "train_speed(iter/s)": 0.412057 }, { "acc": 0.93718042, "epoch": 2.643029269217231, "grad_norm": 4.8790974617004395, "learning_rate": 2.8306716815380403e-06, "loss": 0.33110819, "memory(GiB)": 34.88, "step": 97615, "train_speed(iter/s)": 0.412058 }, { "acc": 0.9162611, "epoch": 2.643164649500447, "grad_norm": 8.883285522460938, "learning_rate": 2.830167579466997e-06, "loss": 0.47176495, "memory(GiB)": 34.88, "step": 97620, "train_speed(iter/s)": 0.412059 }, { "acc": 0.93281803, "epoch": 2.643300029783662, "grad_norm": 3.3443427085876465, "learning_rate": 2.8296635045836336e-06, "loss": 0.38961289, "memory(GiB)": 34.88, "step": 97625, "train_speed(iter/s)": 0.41206 }, { "acc": 0.93171797, "epoch": 2.643435410066878, "grad_norm": 7.9782395362854, "learning_rate": 2.829159456894265e-06, "loss": 0.42615366, "memory(GiB)": 34.88, "step": 97630, "train_speed(iter/s)": 0.412061 }, { "acc": 0.92934446, "epoch": 2.6435707903500933, "grad_norm": 5.865490436553955, "learning_rate": 2.8286554364052005e-06, "loss": 0.42159424, "memory(GiB)": 34.88, "step": 97635, "train_speed(iter/s)": 0.412062 }, { "acc": 0.95104342, "epoch": 2.643706170633309, "grad_norm": 5.097192287445068, "learning_rate": 2.8281514431227575e-06, "loss": 0.23167, "memory(GiB)": 34.88, "step": 97640, "train_speed(iter/s)": 0.412063 }, { "acc": 0.9276329, "epoch": 2.6438415509165245, "grad_norm": 10.196444511413574, "learning_rate": 2.827647477053251e-06, "loss": 0.38595693, "memory(GiB)": 34.88, "step": 97645, "train_speed(iter/s)": 0.412064 }, { "acc": 0.92405338, "epoch": 2.64397693119974, "grad_norm": 12.254669189453125, "learning_rate": 2.8271435382029895e-06, "loss": 0.37949533, "memory(GiB)": 34.88, "step": 97650, "train_speed(iter/s)": 0.412065 }, { "acc": 0.92539577, "epoch": 2.6441123114829557, "grad_norm": 6.953622817993164, "learning_rate": 2.8266396265782873e-06, "loss": 0.42149668, "memory(GiB)": 34.88, "step": 97655, "train_speed(iter/s)": 0.412067 }, { "acc": 0.92783117, "epoch": 2.644247691766171, "grad_norm": 4.617934703826904, "learning_rate": 2.8261357421854598e-06, "loss": 0.40426092, "memory(GiB)": 34.88, "step": 97660, "train_speed(iter/s)": 0.412068 }, { "acc": 0.92210617, "epoch": 2.644383072049387, "grad_norm": 7.311375617980957, "learning_rate": 2.825631885030818e-06, "loss": 0.46596527, "memory(GiB)": 34.88, "step": 97665, "train_speed(iter/s)": 0.412069 }, { "acc": 0.9375145, "epoch": 2.644518452332602, "grad_norm": 9.794641494750977, "learning_rate": 2.8251280551206718e-06, "loss": 0.35864248, "memory(GiB)": 34.88, "step": 97670, "train_speed(iter/s)": 0.412069 }, { "acc": 0.93590021, "epoch": 2.644653832615818, "grad_norm": 4.590321063995361, "learning_rate": 2.824624252461334e-06, "loss": 0.34120841, "memory(GiB)": 34.88, "step": 97675, "train_speed(iter/s)": 0.41207 }, { "acc": 0.92524481, "epoch": 2.6447892128990333, "grad_norm": 67.24472045898438, "learning_rate": 2.8241204770591164e-06, "loss": 0.39989381, "memory(GiB)": 34.88, "step": 97680, "train_speed(iter/s)": 0.412072 }, { "acc": 0.94039478, "epoch": 2.6449245931822487, "grad_norm": 5.92227840423584, "learning_rate": 2.823616728920327e-06, "loss": 0.4134665, "memory(GiB)": 34.88, "step": 97685, "train_speed(iter/s)": 0.412073 }, { "acc": 0.93167725, "epoch": 2.6450599734654645, "grad_norm": 9.259773254394531, "learning_rate": 2.8231130080512793e-06, "loss": 0.41693954, "memory(GiB)": 34.88, "step": 97690, "train_speed(iter/s)": 0.412074 }, { "acc": 0.92357788, "epoch": 2.6451953537486803, "grad_norm": 6.478658199310303, "learning_rate": 2.8226093144582817e-06, "loss": 0.39251697, "memory(GiB)": 34.88, "step": 97695, "train_speed(iter/s)": 0.412075 }, { "acc": 0.94448385, "epoch": 2.6453307340318957, "grad_norm": 9.107505798339844, "learning_rate": 2.8221056481476457e-06, "loss": 0.30164809, "memory(GiB)": 34.88, "step": 97700, "train_speed(iter/s)": 0.412076 }, { "acc": 0.93361168, "epoch": 2.645466114315111, "grad_norm": 4.202975749969482, "learning_rate": 2.8216020091256783e-06, "loss": 0.38768544, "memory(GiB)": 34.88, "step": 97705, "train_speed(iter/s)": 0.412077 }, { "acc": 0.93658504, "epoch": 2.645601494598327, "grad_norm": 8.184247016906738, "learning_rate": 2.8210983973986895e-06, "loss": 0.37671413, "memory(GiB)": 34.88, "step": 97710, "train_speed(iter/s)": 0.412078 }, { "acc": 0.93383989, "epoch": 2.645736874881542, "grad_norm": 7.6849284172058105, "learning_rate": 2.820594812972989e-06, "loss": 0.34971166, "memory(GiB)": 34.88, "step": 97715, "train_speed(iter/s)": 0.412079 }, { "acc": 0.94346733, "epoch": 2.6458722551647575, "grad_norm": 4.8763275146484375, "learning_rate": 2.8200912558548854e-06, "loss": 0.28446827, "memory(GiB)": 34.88, "step": 97720, "train_speed(iter/s)": 0.41208 }, { "acc": 0.92863503, "epoch": 2.6460076354479733, "grad_norm": 7.837368965148926, "learning_rate": 2.8195877260506837e-06, "loss": 0.43117046, "memory(GiB)": 34.88, "step": 97725, "train_speed(iter/s)": 0.412081 }, { "acc": 0.93063517, "epoch": 2.646143015731189, "grad_norm": 6.839639663696289, "learning_rate": 2.819084223566695e-06, "loss": 0.42094936, "memory(GiB)": 34.88, "step": 97730, "train_speed(iter/s)": 0.412083 }, { "acc": 0.92939091, "epoch": 2.6462783960144045, "grad_norm": 6.976457595825195, "learning_rate": 2.8185807484092266e-06, "loss": 0.33211091, "memory(GiB)": 34.88, "step": 97735, "train_speed(iter/s)": 0.412084 }, { "acc": 0.91922321, "epoch": 2.64641377629762, "grad_norm": 9.906949996948242, "learning_rate": 2.818077300584582e-06, "loss": 0.47394938, "memory(GiB)": 34.88, "step": 97740, "train_speed(iter/s)": 0.412085 }, { "acc": 0.9214592, "epoch": 2.6465491565808357, "grad_norm": 7.526461124420166, "learning_rate": 2.8175738800990703e-06, "loss": 0.3822824, "memory(GiB)": 34.88, "step": 97745, "train_speed(iter/s)": 0.412086 }, { "acc": 0.93122864, "epoch": 2.646684536864051, "grad_norm": 4.237339019775391, "learning_rate": 2.8170704869589984e-06, "loss": 0.35514772, "memory(GiB)": 34.88, "step": 97750, "train_speed(iter/s)": 0.412087 }, { "acc": 0.92632627, "epoch": 2.646819917147267, "grad_norm": 7.07651948928833, "learning_rate": 2.816567121170673e-06, "loss": 0.50550213, "memory(GiB)": 34.88, "step": 97755, "train_speed(iter/s)": 0.412088 }, { "acc": 0.94199982, "epoch": 2.646955297430482, "grad_norm": 8.565402030944824, "learning_rate": 2.8160637827403963e-06, "loss": 0.34011869, "memory(GiB)": 34.88, "step": 97760, "train_speed(iter/s)": 0.412089 }, { "acc": 0.93422403, "epoch": 2.647090677713698, "grad_norm": 4.007040023803711, "learning_rate": 2.8155604716744755e-06, "loss": 0.29788563, "memory(GiB)": 34.88, "step": 97765, "train_speed(iter/s)": 0.41209 }, { "acc": 0.91222591, "epoch": 2.6472260579969134, "grad_norm": 9.620116233825684, "learning_rate": 2.815057187979218e-06, "loss": 0.49639134, "memory(GiB)": 34.88, "step": 97770, "train_speed(iter/s)": 0.412091 }, { "acc": 0.9347702, "epoch": 2.6473614382801287, "grad_norm": 13.376440048217773, "learning_rate": 2.8145539316609245e-06, "loss": 0.37430472, "memory(GiB)": 34.88, "step": 97775, "train_speed(iter/s)": 0.412092 }, { "acc": 0.91093426, "epoch": 2.6474968185633445, "grad_norm": 4.73837947845459, "learning_rate": 2.8140507027259e-06, "loss": 0.53263497, "memory(GiB)": 34.88, "step": 97780, "train_speed(iter/s)": 0.412093 }, { "acc": 0.93632088, "epoch": 2.64763219884656, "grad_norm": 5.8738274574279785, "learning_rate": 2.813547501180449e-06, "loss": 0.34688702, "memory(GiB)": 34.88, "step": 97785, "train_speed(iter/s)": 0.412095 }, { "acc": 0.93988466, "epoch": 2.6477675791297757, "grad_norm": 7.212991237640381, "learning_rate": 2.813044327030879e-06, "loss": 0.34775167, "memory(GiB)": 34.88, "step": 97790, "train_speed(iter/s)": 0.412096 }, { "acc": 0.92402821, "epoch": 2.647902959412991, "grad_norm": 5.125429630279541, "learning_rate": 2.8125411802834846e-06, "loss": 0.42025609, "memory(GiB)": 34.88, "step": 97795, "train_speed(iter/s)": 0.412097 }, { "acc": 0.93458357, "epoch": 2.648038339696207, "grad_norm": 9.19340705871582, "learning_rate": 2.8120380609445776e-06, "loss": 0.37351799, "memory(GiB)": 34.88, "step": 97800, "train_speed(iter/s)": 0.412098 }, { "acc": 0.92495995, "epoch": 2.648173719979422, "grad_norm": 6.566025257110596, "learning_rate": 2.8115349690204573e-06, "loss": 0.43885341, "memory(GiB)": 34.88, "step": 97805, "train_speed(iter/s)": 0.412099 }, { "acc": 0.91381826, "epoch": 2.6483091002626375, "grad_norm": 7.592220306396484, "learning_rate": 2.811031904517423e-06, "loss": 0.54008598, "memory(GiB)": 34.88, "step": 97810, "train_speed(iter/s)": 0.4121 }, { "acc": 0.94405804, "epoch": 2.6484444805458534, "grad_norm": 8.900505065917969, "learning_rate": 2.810528867441779e-06, "loss": 0.29915965, "memory(GiB)": 34.88, "step": 97815, "train_speed(iter/s)": 0.412101 }, { "acc": 0.92961159, "epoch": 2.6485798608290687, "grad_norm": 5.867158889770508, "learning_rate": 2.810025857799827e-06, "loss": 0.35569358, "memory(GiB)": 34.88, "step": 97820, "train_speed(iter/s)": 0.412102 }, { "acc": 0.92691765, "epoch": 2.6487152411122845, "grad_norm": 7.252603054046631, "learning_rate": 2.8095228755978693e-06, "loss": 0.40840659, "memory(GiB)": 34.88, "step": 97825, "train_speed(iter/s)": 0.412103 }, { "acc": 0.93750811, "epoch": 2.6488506213955, "grad_norm": 7.087072849273682, "learning_rate": 2.8090199208422036e-06, "loss": 0.3380451, "memory(GiB)": 34.88, "step": 97830, "train_speed(iter/s)": 0.412104 }, { "acc": 0.93495998, "epoch": 2.6489860016787157, "grad_norm": 11.34230899810791, "learning_rate": 2.8085169935391322e-06, "loss": 0.41440039, "memory(GiB)": 34.88, "step": 97835, "train_speed(iter/s)": 0.412105 }, { "acc": 0.93684902, "epoch": 2.649121381961931, "grad_norm": 3.6399219036102295, "learning_rate": 2.8080140936949548e-06, "loss": 0.38557861, "memory(GiB)": 34.88, "step": 97840, "train_speed(iter/s)": 0.412107 }, { "acc": 0.92864628, "epoch": 2.6492567622451464, "grad_norm": 6.283891677856445, "learning_rate": 2.8075112213159744e-06, "loss": 0.36158366, "memory(GiB)": 34.88, "step": 97845, "train_speed(iter/s)": 0.412108 }, { "acc": 0.93047581, "epoch": 2.649392142528362, "grad_norm": 7.260753154754639, "learning_rate": 2.807008376408485e-06, "loss": 0.3558022, "memory(GiB)": 34.88, "step": 97850, "train_speed(iter/s)": 0.412109 }, { "acc": 0.94307442, "epoch": 2.649527522811578, "grad_norm": 6.487572193145752, "learning_rate": 2.8065055589787883e-06, "loss": 0.40486178, "memory(GiB)": 34.88, "step": 97855, "train_speed(iter/s)": 0.41211 }, { "acc": 0.94343777, "epoch": 2.6496629030947934, "grad_norm": 5.916514873504639, "learning_rate": 2.806002769033185e-06, "loss": 0.24066553, "memory(GiB)": 34.88, "step": 97860, "train_speed(iter/s)": 0.412111 }, { "acc": 0.92988329, "epoch": 2.6497982833780087, "grad_norm": 6.904064178466797, "learning_rate": 2.8055000065779707e-06, "loss": 0.36339192, "memory(GiB)": 34.88, "step": 97865, "train_speed(iter/s)": 0.412112 }, { "acc": 0.93836727, "epoch": 2.6499336636612245, "grad_norm": 7.113619804382324, "learning_rate": 2.804997271619444e-06, "loss": 0.30790291, "memory(GiB)": 34.88, "step": 97870, "train_speed(iter/s)": 0.412113 }, { "acc": 0.93513393, "epoch": 2.65006904394444, "grad_norm": 8.394947052001953, "learning_rate": 2.804494564163903e-06, "loss": 0.35144675, "memory(GiB)": 34.88, "step": 97875, "train_speed(iter/s)": 0.412114 }, { "acc": 0.93295422, "epoch": 2.6502044242276552, "grad_norm": 8.06525993347168, "learning_rate": 2.803991884217647e-06, "loss": 0.34647758, "memory(GiB)": 34.88, "step": 97880, "train_speed(iter/s)": 0.412115 }, { "acc": 0.9154726, "epoch": 2.650339804510871, "grad_norm": 20.112651824951172, "learning_rate": 2.8034892317869698e-06, "loss": 0.51755371, "memory(GiB)": 34.88, "step": 97885, "train_speed(iter/s)": 0.412117 }, { "acc": 0.93331823, "epoch": 2.650475184794087, "grad_norm": 4.382057189941406, "learning_rate": 2.802986606878169e-06, "loss": 0.40279012, "memory(GiB)": 34.88, "step": 97890, "train_speed(iter/s)": 0.412118 }, { "acc": 0.93169746, "epoch": 2.650610565077302, "grad_norm": 12.204421043395996, "learning_rate": 2.8024840094975444e-06, "loss": 0.41421738, "memory(GiB)": 34.88, "step": 97895, "train_speed(iter/s)": 0.412119 }, { "acc": 0.92663994, "epoch": 2.6507459453605176, "grad_norm": 5.168430328369141, "learning_rate": 2.8019814396513867e-06, "loss": 0.49643445, "memory(GiB)": 34.88, "step": 97900, "train_speed(iter/s)": 0.41212 }, { "acc": 0.91759882, "epoch": 2.6508813256437334, "grad_norm": 8.00333023071289, "learning_rate": 2.8014788973459937e-06, "loss": 0.39068351, "memory(GiB)": 34.88, "step": 97905, "train_speed(iter/s)": 0.412121 }, { "acc": 0.92615395, "epoch": 2.6510167059269487, "grad_norm": 7.604510307312012, "learning_rate": 2.8009763825876613e-06, "loss": 0.37563119, "memory(GiB)": 34.88, "step": 97910, "train_speed(iter/s)": 0.412122 }, { "acc": 0.94655342, "epoch": 2.6511520862101645, "grad_norm": 7.541818141937256, "learning_rate": 2.800473895382687e-06, "loss": 0.2849273, "memory(GiB)": 34.88, "step": 97915, "train_speed(iter/s)": 0.412123 }, { "acc": 0.92384815, "epoch": 2.65128746649338, "grad_norm": 7.059214115142822, "learning_rate": 2.7999714357373575e-06, "loss": 0.42464209, "memory(GiB)": 34.88, "step": 97920, "train_speed(iter/s)": 0.412124 }, { "acc": 0.94376135, "epoch": 2.6514228467765957, "grad_norm": 6.9342451095581055, "learning_rate": 2.7994690036579777e-06, "loss": 0.3196243, "memory(GiB)": 34.88, "step": 97925, "train_speed(iter/s)": 0.412125 }, { "acc": 0.93251534, "epoch": 2.651558227059811, "grad_norm": 4.462143421173096, "learning_rate": 2.7989665991508353e-06, "loss": 0.39809208, "memory(GiB)": 34.88, "step": 97930, "train_speed(iter/s)": 0.412126 }, { "acc": 0.92962875, "epoch": 2.6516936073430264, "grad_norm": 7.871820449829102, "learning_rate": 2.798464222222222e-06, "loss": 0.36828046, "memory(GiB)": 34.88, "step": 97935, "train_speed(iter/s)": 0.412127 }, { "acc": 0.914114, "epoch": 2.651828987626242, "grad_norm": 14.296693801879883, "learning_rate": 2.7979618728784358e-06, "loss": 0.46046968, "memory(GiB)": 34.88, "step": 97940, "train_speed(iter/s)": 0.412128 }, { "acc": 0.94075146, "epoch": 2.6519643679094576, "grad_norm": 6.486715793609619, "learning_rate": 2.7974595511257656e-06, "loss": 0.32338295, "memory(GiB)": 34.88, "step": 97945, "train_speed(iter/s)": 0.412129 }, { "acc": 0.92772045, "epoch": 2.6520997481926734, "grad_norm": 19.983856201171875, "learning_rate": 2.7969572569705088e-06, "loss": 0.50290089, "memory(GiB)": 34.88, "step": 97950, "train_speed(iter/s)": 0.41213 }, { "acc": 0.93260403, "epoch": 2.6522351284758887, "grad_norm": 8.686036109924316, "learning_rate": 2.7964549904189508e-06, "loss": 0.36186342, "memory(GiB)": 34.88, "step": 97955, "train_speed(iter/s)": 0.412131 }, { "acc": 0.92909966, "epoch": 2.6523705087591045, "grad_norm": 6.905704975128174, "learning_rate": 2.7959527514773914e-06, "loss": 0.38011708, "memory(GiB)": 34.88, "step": 97960, "train_speed(iter/s)": 0.412132 }, { "acc": 0.92898922, "epoch": 2.65250588904232, "grad_norm": 5.591427803039551, "learning_rate": 2.7954505401521164e-06, "loss": 0.38918097, "memory(GiB)": 34.88, "step": 97965, "train_speed(iter/s)": 0.412134 }, { "acc": 0.93044949, "epoch": 2.6526412693255352, "grad_norm": 7.566479682922363, "learning_rate": 2.7949483564494206e-06, "loss": 0.45451584, "memory(GiB)": 34.88, "step": 97970, "train_speed(iter/s)": 0.412135 }, { "acc": 0.92175465, "epoch": 2.652776649608751, "grad_norm": 5.517079830169678, "learning_rate": 2.794446200375592e-06, "loss": 0.40776148, "memory(GiB)": 34.88, "step": 97975, "train_speed(iter/s)": 0.412136 }, { "acc": 0.93331394, "epoch": 2.6529120298919664, "grad_norm": 7.35791015625, "learning_rate": 2.793944071936921e-06, "loss": 0.32114275, "memory(GiB)": 34.88, "step": 97980, "train_speed(iter/s)": 0.412137 }, { "acc": 0.92881756, "epoch": 2.653047410175182, "grad_norm": 3.252944231033325, "learning_rate": 2.793441971139701e-06, "loss": 0.40224967, "memory(GiB)": 34.88, "step": 97985, "train_speed(iter/s)": 0.412138 }, { "acc": 0.95034637, "epoch": 2.6531827904583976, "grad_norm": 8.806520462036133, "learning_rate": 2.792939897990216e-06, "loss": 0.28380568, "memory(GiB)": 34.88, "step": 97990, "train_speed(iter/s)": 0.412139 }, { "acc": 0.92859335, "epoch": 2.6533181707416134, "grad_norm": 7.128754615783691, "learning_rate": 2.7924378524947628e-06, "loss": 0.41403713, "memory(GiB)": 34.88, "step": 97995, "train_speed(iter/s)": 0.41214 }, { "acc": 0.92782688, "epoch": 2.6534535510248287, "grad_norm": 4.8993730545043945, "learning_rate": 2.791935834659625e-06, "loss": 0.39797814, "memory(GiB)": 34.88, "step": 98000, "train_speed(iter/s)": 0.412141 }, { "acc": 0.93029318, "epoch": 2.653588931308044, "grad_norm": 6.822002410888672, "learning_rate": 2.791433844491095e-06, "loss": 0.42870402, "memory(GiB)": 34.88, "step": 98005, "train_speed(iter/s)": 0.412142 }, { "acc": 0.92939253, "epoch": 2.65372431159126, "grad_norm": 6.539746284484863, "learning_rate": 2.7909318819954557e-06, "loss": 0.453233, "memory(GiB)": 34.88, "step": 98010, "train_speed(iter/s)": 0.412143 }, { "acc": 0.94004288, "epoch": 2.6538596918744752, "grad_norm": 12.668146133422852, "learning_rate": 2.7904299471790025e-06, "loss": 0.26539757, "memory(GiB)": 34.88, "step": 98015, "train_speed(iter/s)": 0.412144 }, { "acc": 0.92039051, "epoch": 2.653995072157691, "grad_norm": 7.817364692687988, "learning_rate": 2.7899280400480194e-06, "loss": 0.42402134, "memory(GiB)": 34.88, "step": 98020, "train_speed(iter/s)": 0.412145 }, { "acc": 0.94603939, "epoch": 2.6541304524409064, "grad_norm": 12.34221076965332, "learning_rate": 2.7894261606087913e-06, "loss": 0.29574893, "memory(GiB)": 34.88, "step": 98025, "train_speed(iter/s)": 0.412146 }, { "acc": 0.92367926, "epoch": 2.654265832724122, "grad_norm": 17.3927059173584, "learning_rate": 2.7889243088676105e-06, "loss": 0.40777998, "memory(GiB)": 34.88, "step": 98030, "train_speed(iter/s)": 0.412147 }, { "acc": 0.9245369, "epoch": 2.6544012130073376, "grad_norm": 8.48952865600586, "learning_rate": 2.7884224848307586e-06, "loss": 0.45166807, "memory(GiB)": 34.88, "step": 98035, "train_speed(iter/s)": 0.412148 }, { "acc": 0.92394714, "epoch": 2.654536593290553, "grad_norm": 17.108348846435547, "learning_rate": 2.7879206885045272e-06, "loss": 0.37042437, "memory(GiB)": 34.88, "step": 98040, "train_speed(iter/s)": 0.412149 }, { "acc": 0.93138952, "epoch": 2.6546719735737687, "grad_norm": 4.353015422821045, "learning_rate": 2.787418919895195e-06, "loss": 0.40161076, "memory(GiB)": 34.88, "step": 98045, "train_speed(iter/s)": 0.412149 }, { "acc": 0.92847652, "epoch": 2.6548073538569845, "grad_norm": 8.404777526855469, "learning_rate": 2.7869171790090566e-06, "loss": 0.34156208, "memory(GiB)": 34.88, "step": 98050, "train_speed(iter/s)": 0.41215 }, { "acc": 0.91697321, "epoch": 2.6549427341402, "grad_norm": 33.64493179321289, "learning_rate": 2.786415465852391e-06, "loss": 0.49791956, "memory(GiB)": 34.88, "step": 98055, "train_speed(iter/s)": 0.412151 }, { "acc": 0.94257336, "epoch": 2.6550781144234152, "grad_norm": 7.7560296058654785, "learning_rate": 2.7859137804314827e-06, "loss": 0.2828639, "memory(GiB)": 34.88, "step": 98060, "train_speed(iter/s)": 0.412152 }, { "acc": 0.94438591, "epoch": 2.655213494706631, "grad_norm": 5.815051555633545, "learning_rate": 2.7854121227526216e-06, "loss": 0.28377495, "memory(GiB)": 34.88, "step": 98065, "train_speed(iter/s)": 0.412153 }, { "acc": 0.91438656, "epoch": 2.6553488749898464, "grad_norm": 13.37646484375, "learning_rate": 2.784910492822087e-06, "loss": 0.50925436, "memory(GiB)": 34.88, "step": 98070, "train_speed(iter/s)": 0.412154 }, { "acc": 0.94181652, "epoch": 2.6554842552730618, "grad_norm": 5.522517681121826, "learning_rate": 2.784408890646167e-06, "loss": 0.34690583, "memory(GiB)": 34.88, "step": 98075, "train_speed(iter/s)": 0.412155 }, { "acc": 0.92412386, "epoch": 2.6556196355562776, "grad_norm": 7.705509662628174, "learning_rate": 2.783907316231138e-06, "loss": 0.43168144, "memory(GiB)": 34.88, "step": 98080, "train_speed(iter/s)": 0.412156 }, { "acc": 0.93761148, "epoch": 2.6557550158394934, "grad_norm": 4.867008209228516, "learning_rate": 2.7834057695832923e-06, "loss": 0.32538517, "memory(GiB)": 34.88, "step": 98085, "train_speed(iter/s)": 0.412157 }, { "acc": 0.92926846, "epoch": 2.6558903961227087, "grad_norm": 9.743914604187012, "learning_rate": 2.7829042507089065e-06, "loss": 0.36508117, "memory(GiB)": 34.88, "step": 98090, "train_speed(iter/s)": 0.412158 }, { "acc": 0.94118557, "epoch": 2.656025776405924, "grad_norm": 5.203058242797852, "learning_rate": 2.7824027596142655e-06, "loss": 0.34247241, "memory(GiB)": 34.88, "step": 98095, "train_speed(iter/s)": 0.412159 }, { "acc": 0.94241943, "epoch": 2.65616115668914, "grad_norm": 7.048094749450684, "learning_rate": 2.7819012963056523e-06, "loss": 0.31879215, "memory(GiB)": 34.88, "step": 98100, "train_speed(iter/s)": 0.41216 }, { "acc": 0.92442837, "epoch": 2.6562965369723552, "grad_norm": 8.25425910949707, "learning_rate": 2.7813998607893454e-06, "loss": 0.44370947, "memory(GiB)": 34.88, "step": 98105, "train_speed(iter/s)": 0.412161 }, { "acc": 0.92887821, "epoch": 2.656431917255571, "grad_norm": 6.82319450378418, "learning_rate": 2.7808984530716303e-06, "loss": 0.3759306, "memory(GiB)": 34.88, "step": 98110, "train_speed(iter/s)": 0.412162 }, { "acc": 0.93389816, "epoch": 2.6565672975387864, "grad_norm": 7.0525898933410645, "learning_rate": 2.7803970731587822e-06, "loss": 0.36306455, "memory(GiB)": 34.88, "step": 98115, "train_speed(iter/s)": 0.412163 }, { "acc": 0.9202529, "epoch": 2.656702677822002, "grad_norm": 4.374092102050781, "learning_rate": 2.77989572105709e-06, "loss": 0.41115627, "memory(GiB)": 34.88, "step": 98120, "train_speed(iter/s)": 0.412164 }, { "acc": 0.93549614, "epoch": 2.6568380581052176, "grad_norm": 7.934024810791016, "learning_rate": 2.779394396772828e-06, "loss": 0.36805177, "memory(GiB)": 34.88, "step": 98125, "train_speed(iter/s)": 0.412165 }, { "acc": 0.93256254, "epoch": 2.656973438388433, "grad_norm": 10.758523941040039, "learning_rate": 2.778893100312277e-06, "loss": 0.34203167, "memory(GiB)": 34.88, "step": 98130, "train_speed(iter/s)": 0.412166 }, { "acc": 0.94239464, "epoch": 2.6571088186716487, "grad_norm": 6.450891017913818, "learning_rate": 2.7783918316817193e-06, "loss": 0.2908545, "memory(GiB)": 34.88, "step": 98135, "train_speed(iter/s)": 0.412167 }, { "acc": 0.9337656, "epoch": 2.657244198954864, "grad_norm": 4.23832893371582, "learning_rate": 2.7778905908874344e-06, "loss": 0.40263205, "memory(GiB)": 34.88, "step": 98140, "train_speed(iter/s)": 0.412168 }, { "acc": 0.94329166, "epoch": 2.65737957923808, "grad_norm": 3.9416422843933105, "learning_rate": 2.7773893779356993e-06, "loss": 0.37691519, "memory(GiB)": 34.88, "step": 98145, "train_speed(iter/s)": 0.412169 }, { "acc": 0.93599052, "epoch": 2.6575149595212952, "grad_norm": 11.087602615356445, "learning_rate": 2.776888192832789e-06, "loss": 0.34772558, "memory(GiB)": 34.88, "step": 98150, "train_speed(iter/s)": 0.41217 }, { "acc": 0.93206186, "epoch": 2.657650339804511, "grad_norm": 15.511261940002441, "learning_rate": 2.7763870355849905e-06, "loss": 0.37000341, "memory(GiB)": 34.88, "step": 98155, "train_speed(iter/s)": 0.412171 }, { "acc": 0.94635105, "epoch": 2.6577857200877264, "grad_norm": 5.731611728668213, "learning_rate": 2.7758859061985753e-06, "loss": 0.25155301, "memory(GiB)": 34.88, "step": 98160, "train_speed(iter/s)": 0.412172 }, { "acc": 0.92581749, "epoch": 2.6579211003709418, "grad_norm": 9.305665969848633, "learning_rate": 2.7753848046798232e-06, "loss": 0.39258659, "memory(GiB)": 34.88, "step": 98165, "train_speed(iter/s)": 0.412173 }, { "acc": 0.93828812, "epoch": 2.6580564806541576, "grad_norm": 6.396143436431885, "learning_rate": 2.7748837310350103e-06, "loss": 0.4099968, "memory(GiB)": 34.88, "step": 98170, "train_speed(iter/s)": 0.412174 }, { "acc": 0.93913488, "epoch": 2.658191860937373, "grad_norm": 9.682941436767578, "learning_rate": 2.7743826852704176e-06, "loss": 0.39100599, "memory(GiB)": 34.88, "step": 98175, "train_speed(iter/s)": 0.412175 }, { "acc": 0.93305883, "epoch": 2.6583272412205887, "grad_norm": 9.319868087768555, "learning_rate": 2.773881667392318e-06, "loss": 0.38860204, "memory(GiB)": 34.88, "step": 98180, "train_speed(iter/s)": 0.412176 }, { "acc": 0.9364049, "epoch": 2.658462621503804, "grad_norm": 6.587129592895508, "learning_rate": 2.773380677406984e-06, "loss": 0.30490174, "memory(GiB)": 34.88, "step": 98185, "train_speed(iter/s)": 0.412177 }, { "acc": 0.92299442, "epoch": 2.65859800178702, "grad_norm": 16.580482482910156, "learning_rate": 2.7728797153207006e-06, "loss": 0.40099592, "memory(GiB)": 34.88, "step": 98190, "train_speed(iter/s)": 0.412178 }, { "acc": 0.94451141, "epoch": 2.6587333820702352, "grad_norm": 25.84796142578125, "learning_rate": 2.7723787811397362e-06, "loss": 0.29584486, "memory(GiB)": 34.88, "step": 98195, "train_speed(iter/s)": 0.412179 }, { "acc": 0.9260664, "epoch": 2.6588687623534506, "grad_norm": 11.902352333068848, "learning_rate": 2.771877874870368e-06, "loss": 0.41776996, "memory(GiB)": 34.88, "step": 98200, "train_speed(iter/s)": 0.41218 }, { "acc": 0.93651066, "epoch": 2.6590041426366664, "grad_norm": 5.206284046173096, "learning_rate": 2.7713769965188715e-06, "loss": 0.31538305, "memory(GiB)": 34.88, "step": 98205, "train_speed(iter/s)": 0.412181 }, { "acc": 0.91607704, "epoch": 2.659139522919882, "grad_norm": 8.913044929504395, "learning_rate": 2.7708761460915224e-06, "loss": 0.53870111, "memory(GiB)": 34.88, "step": 98210, "train_speed(iter/s)": 0.412182 }, { "acc": 0.9378829, "epoch": 2.6592749032030976, "grad_norm": 20.544992446899414, "learning_rate": 2.770375323594592e-06, "loss": 0.3100646, "memory(GiB)": 34.88, "step": 98215, "train_speed(iter/s)": 0.412183 }, { "acc": 0.93180971, "epoch": 2.659410283486313, "grad_norm": 6.886734485626221, "learning_rate": 2.769874529034355e-06, "loss": 0.39831867, "memory(GiB)": 34.88, "step": 98220, "train_speed(iter/s)": 0.412184 }, { "acc": 0.92889786, "epoch": 2.6595456637695287, "grad_norm": 5.855218410491943, "learning_rate": 2.769373762417087e-06, "loss": 0.46290326, "memory(GiB)": 34.88, "step": 98225, "train_speed(iter/s)": 0.412185 }, { "acc": 0.94492283, "epoch": 2.659681044052744, "grad_norm": 6.716031074523926, "learning_rate": 2.7688730237490574e-06, "loss": 0.31267862, "memory(GiB)": 34.88, "step": 98230, "train_speed(iter/s)": 0.412186 }, { "acc": 0.92249489, "epoch": 2.6598164243359594, "grad_norm": 10.656030654907227, "learning_rate": 2.7683723130365426e-06, "loss": 0.45693798, "memory(GiB)": 34.88, "step": 98235, "train_speed(iter/s)": 0.412187 }, { "acc": 0.92946529, "epoch": 2.6599518046191752, "grad_norm": 20.318464279174805, "learning_rate": 2.76787163028581e-06, "loss": 0.42885885, "memory(GiB)": 34.88, "step": 98240, "train_speed(iter/s)": 0.412188 }, { "acc": 0.92782555, "epoch": 2.660087184902391, "grad_norm": 7.265064239501953, "learning_rate": 2.7673709755031382e-06, "loss": 0.4079072, "memory(GiB)": 34.88, "step": 98245, "train_speed(iter/s)": 0.412189 }, { "acc": 0.91359396, "epoch": 2.6602225651856064, "grad_norm": 7.744016647338867, "learning_rate": 2.766870348694793e-06, "loss": 0.48784966, "memory(GiB)": 34.88, "step": 98250, "train_speed(iter/s)": 0.412189 }, { "acc": 0.94110594, "epoch": 2.6603579454688218, "grad_norm": 6.238050937652588, "learning_rate": 2.766369749867049e-06, "loss": 0.37845716, "memory(GiB)": 34.88, "step": 98255, "train_speed(iter/s)": 0.41219 }, { "acc": 0.93807421, "epoch": 2.6604933257520376, "grad_norm": 7.178772926330566, "learning_rate": 2.765869179026176e-06, "loss": 0.35229697, "memory(GiB)": 34.88, "step": 98260, "train_speed(iter/s)": 0.412191 }, { "acc": 0.94173317, "epoch": 2.660628706035253, "grad_norm": 10.923320770263672, "learning_rate": 2.7653686361784478e-06, "loss": 0.31152143, "memory(GiB)": 34.88, "step": 98265, "train_speed(iter/s)": 0.412192 }, { "acc": 0.92231216, "epoch": 2.6607640863184687, "grad_norm": 7.577514171600342, "learning_rate": 2.7648681213301314e-06, "loss": 0.42542367, "memory(GiB)": 34.88, "step": 98270, "train_speed(iter/s)": 0.412193 }, { "acc": 0.9391798, "epoch": 2.660899466601684, "grad_norm": 7.9273223876953125, "learning_rate": 2.764367634487493e-06, "loss": 0.40901489, "memory(GiB)": 34.88, "step": 98275, "train_speed(iter/s)": 0.412194 }, { "acc": 0.91077175, "epoch": 2.6610348468849, "grad_norm": 11.472615242004395, "learning_rate": 2.7638671756568113e-06, "loss": 0.49309921, "memory(GiB)": 34.88, "step": 98280, "train_speed(iter/s)": 0.412196 }, { "acc": 0.93802834, "epoch": 2.6611702271681152, "grad_norm": 3.512617826461792, "learning_rate": 2.763366744844348e-06, "loss": 0.30142975, "memory(GiB)": 34.88, "step": 98285, "train_speed(iter/s)": 0.412197 }, { "acc": 0.91933327, "epoch": 2.6613056074513306, "grad_norm": 22.253759384155273, "learning_rate": 2.762866342056375e-06, "loss": 0.43242407, "memory(GiB)": 34.88, "step": 98290, "train_speed(iter/s)": 0.412198 }, { "acc": 0.94090147, "epoch": 2.6614409877345464, "grad_norm": 2.9210596084594727, "learning_rate": 2.762365967299161e-06, "loss": 0.26743517, "memory(GiB)": 34.88, "step": 98295, "train_speed(iter/s)": 0.412199 }, { "acc": 0.93729229, "epoch": 2.6615763680177618, "grad_norm": 4.705645561218262, "learning_rate": 2.761865620578975e-06, "loss": 0.39140682, "memory(GiB)": 34.88, "step": 98300, "train_speed(iter/s)": 0.4122 }, { "acc": 0.93268871, "epoch": 2.6617117483009776, "grad_norm": 9.146590232849121, "learning_rate": 2.7613653019020835e-06, "loss": 0.40158825, "memory(GiB)": 34.88, "step": 98305, "train_speed(iter/s)": 0.412201 }, { "acc": 0.94656973, "epoch": 2.661847128584193, "grad_norm": 5.466830253601074, "learning_rate": 2.7608650112747503e-06, "loss": 0.29533153, "memory(GiB)": 34.88, "step": 98310, "train_speed(iter/s)": 0.412202 }, { "acc": 0.93768473, "epoch": 2.6619825088674087, "grad_norm": 50.97372055053711, "learning_rate": 2.76036474870325e-06, "loss": 0.31210251, "memory(GiB)": 34.88, "step": 98315, "train_speed(iter/s)": 0.412203 }, { "acc": 0.93124533, "epoch": 2.662117889150624, "grad_norm": 14.99808120727539, "learning_rate": 2.7598645141938433e-06, "loss": 0.42345695, "memory(GiB)": 34.88, "step": 98320, "train_speed(iter/s)": 0.412204 }, { "acc": 0.9278657, "epoch": 2.6622532694338394, "grad_norm": 10.34284496307373, "learning_rate": 2.7593643077527993e-06, "loss": 0.36208425, "memory(GiB)": 34.88, "step": 98325, "train_speed(iter/s)": 0.412205 }, { "acc": 0.93543882, "epoch": 2.6623886497170552, "grad_norm": 6.53629207611084, "learning_rate": 2.758864129386384e-06, "loss": 0.33756244, "memory(GiB)": 34.88, "step": 98330, "train_speed(iter/s)": 0.412206 }, { "acc": 0.9472331, "epoch": 2.6625240300002706, "grad_norm": 2.8395488262176514, "learning_rate": 2.7583639791008647e-06, "loss": 0.3298321, "memory(GiB)": 34.88, "step": 98335, "train_speed(iter/s)": 0.412207 }, { "acc": 0.927386, "epoch": 2.6626594102834864, "grad_norm": 6.532614707946777, "learning_rate": 2.7578638569025022e-06, "loss": 0.43629613, "memory(GiB)": 34.88, "step": 98340, "train_speed(iter/s)": 0.412208 }, { "acc": 0.94284458, "epoch": 2.6627947905667018, "grad_norm": 5.806831359863281, "learning_rate": 2.7573637627975646e-06, "loss": 0.3585979, "memory(GiB)": 34.88, "step": 98345, "train_speed(iter/s)": 0.412209 }, { "acc": 0.93323402, "epoch": 2.6629301708499176, "grad_norm": 4.781790733337402, "learning_rate": 2.756863696792318e-06, "loss": 0.36215525, "memory(GiB)": 34.88, "step": 98350, "train_speed(iter/s)": 0.41221 }, { "acc": 0.91169672, "epoch": 2.663065551133133, "grad_norm": 10.242626190185547, "learning_rate": 2.7563636588930227e-06, "loss": 0.50656528, "memory(GiB)": 34.88, "step": 98355, "train_speed(iter/s)": 0.412211 }, { "acc": 0.93900452, "epoch": 2.6632009314163483, "grad_norm": 7.445886135101318, "learning_rate": 2.7558636491059447e-06, "loss": 0.29885664, "memory(GiB)": 34.88, "step": 98360, "train_speed(iter/s)": 0.412213 }, { "acc": 0.92299576, "epoch": 2.663336311699564, "grad_norm": 11.001790046691895, "learning_rate": 2.755363667437348e-06, "loss": 0.44031634, "memory(GiB)": 34.88, "step": 98365, "train_speed(iter/s)": 0.412214 }, { "acc": 0.918853, "epoch": 2.66347169198278, "grad_norm": 14.666558265686035, "learning_rate": 2.7548637138934974e-06, "loss": 0.49400163, "memory(GiB)": 34.88, "step": 98370, "train_speed(iter/s)": 0.412215 }, { "acc": 0.9483284, "epoch": 2.6636070722659952, "grad_norm": 4.474931716918945, "learning_rate": 2.7543637884806524e-06, "loss": 0.27221649, "memory(GiB)": 34.88, "step": 98375, "train_speed(iter/s)": 0.412215 }, { "acc": 0.93752956, "epoch": 2.6637424525492106, "grad_norm": 5.144430637359619, "learning_rate": 2.753863891205077e-06, "loss": 0.41594496, "memory(GiB)": 34.88, "step": 98380, "train_speed(iter/s)": 0.412216 }, { "acc": 0.9388628, "epoch": 2.6638778328324264, "grad_norm": 8.150956153869629, "learning_rate": 2.7533640220730328e-06, "loss": 0.40694971, "memory(GiB)": 34.88, "step": 98385, "train_speed(iter/s)": 0.412218 }, { "acc": 0.92921371, "epoch": 2.6640132131156418, "grad_norm": 10.808751106262207, "learning_rate": 2.7528641810907853e-06, "loss": 0.40194311, "memory(GiB)": 34.88, "step": 98390, "train_speed(iter/s)": 0.412219 }, { "acc": 0.93804207, "epoch": 2.664148593398857, "grad_norm": 9.088457107543945, "learning_rate": 2.7523643682645907e-06, "loss": 0.34709611, "memory(GiB)": 34.88, "step": 98395, "train_speed(iter/s)": 0.41222 }, { "acc": 0.93623648, "epoch": 2.664283973682073, "grad_norm": 5.662139892578125, "learning_rate": 2.7518645836007118e-06, "loss": 0.36014323, "memory(GiB)": 34.88, "step": 98400, "train_speed(iter/s)": 0.412221 }, { "acc": 0.92403736, "epoch": 2.6644193539652887, "grad_norm": 27.670230865478516, "learning_rate": 2.751364827105413e-06, "loss": 0.48562822, "memory(GiB)": 34.88, "step": 98405, "train_speed(iter/s)": 0.412222 }, { "acc": 0.933496, "epoch": 2.664554734248504, "grad_norm": 6.914615631103516, "learning_rate": 2.7508650987849493e-06, "loss": 0.43012843, "memory(GiB)": 34.88, "step": 98410, "train_speed(iter/s)": 0.412223 }, { "acc": 0.93788261, "epoch": 2.6646901145317194, "grad_norm": 3.3904621601104736, "learning_rate": 2.750365398645583e-06, "loss": 0.37085581, "memory(GiB)": 34.88, "step": 98415, "train_speed(iter/s)": 0.412224 }, { "acc": 0.94239178, "epoch": 2.6648254948149352, "grad_norm": 5.2509307861328125, "learning_rate": 2.749865726693575e-06, "loss": 0.25390129, "memory(GiB)": 34.88, "step": 98420, "train_speed(iter/s)": 0.412225 }, { "acc": 0.91684017, "epoch": 2.6649608750981506, "grad_norm": 6.2684006690979, "learning_rate": 2.7493660829351844e-06, "loss": 0.40360484, "memory(GiB)": 34.88, "step": 98425, "train_speed(iter/s)": 0.412226 }, { "acc": 0.94511356, "epoch": 2.6650962553813664, "grad_norm": 7.898221492767334, "learning_rate": 2.7488664673766684e-06, "loss": 0.29615698, "memory(GiB)": 34.88, "step": 98430, "train_speed(iter/s)": 0.412227 }, { "acc": 0.93921461, "epoch": 2.6652316356645818, "grad_norm": 9.010287284851074, "learning_rate": 2.7483668800242865e-06, "loss": 0.38787692, "memory(GiB)": 34.88, "step": 98435, "train_speed(iter/s)": 0.412228 }, { "acc": 0.93736572, "epoch": 2.6653670159477976, "grad_norm": 6.197094440460205, "learning_rate": 2.7478673208843e-06, "loss": 0.34242377, "memory(GiB)": 34.88, "step": 98440, "train_speed(iter/s)": 0.412229 }, { "acc": 0.91788273, "epoch": 2.665502396231013, "grad_norm": 14.726804733276367, "learning_rate": 2.7473677899629614e-06, "loss": 0.51393023, "memory(GiB)": 34.88, "step": 98445, "train_speed(iter/s)": 0.41223 }, { "acc": 0.93722649, "epoch": 2.6656377765142283, "grad_norm": 4.500412464141846, "learning_rate": 2.746868287266532e-06, "loss": 0.34766591, "memory(GiB)": 34.88, "step": 98450, "train_speed(iter/s)": 0.412231 }, { "acc": 0.94089909, "epoch": 2.665773156797444, "grad_norm": 6.009647846221924, "learning_rate": 2.7463688128012674e-06, "loss": 0.27430077, "memory(GiB)": 34.88, "step": 98455, "train_speed(iter/s)": 0.412232 }, { "acc": 0.91214275, "epoch": 2.6659085370806594, "grad_norm": 16.648889541625977, "learning_rate": 2.7458693665734276e-06, "loss": 0.4877635, "memory(GiB)": 34.88, "step": 98460, "train_speed(iter/s)": 0.412233 }, { "acc": 0.90925446, "epoch": 2.6660439173638752, "grad_norm": 8.217338562011719, "learning_rate": 2.745369948589265e-06, "loss": 0.63651786, "memory(GiB)": 34.88, "step": 98465, "train_speed(iter/s)": 0.412235 }, { "acc": 0.93456402, "epoch": 2.6661792976470906, "grad_norm": 5.912667274475098, "learning_rate": 2.7448705588550373e-06, "loss": 0.36508653, "memory(GiB)": 34.88, "step": 98470, "train_speed(iter/s)": 0.412236 }, { "acc": 0.93424816, "epoch": 2.6663146779303064, "grad_norm": 6.0161027908325195, "learning_rate": 2.7443711973770033e-06, "loss": 0.33102226, "memory(GiB)": 34.88, "step": 98475, "train_speed(iter/s)": 0.412237 }, { "acc": 0.93787746, "epoch": 2.6664500582135218, "grad_norm": 6.457449913024902, "learning_rate": 2.7438718641614132e-06, "loss": 0.3427484, "memory(GiB)": 34.88, "step": 98480, "train_speed(iter/s)": 0.412238 }, { "acc": 0.91830492, "epoch": 2.666585438496737, "grad_norm": 17.66437339782715, "learning_rate": 2.7433725592145254e-06, "loss": 0.50599718, "memory(GiB)": 34.88, "step": 98485, "train_speed(iter/s)": 0.412239 }, { "acc": 0.94656563, "epoch": 2.666720818779953, "grad_norm": 7.17811393737793, "learning_rate": 2.742873282542593e-06, "loss": 0.27582064, "memory(GiB)": 34.88, "step": 98490, "train_speed(iter/s)": 0.41224 }, { "acc": 0.94258337, "epoch": 2.6668561990631683, "grad_norm": 6.285512924194336, "learning_rate": 2.742374034151874e-06, "loss": 0.25701385, "memory(GiB)": 34.88, "step": 98495, "train_speed(iter/s)": 0.412241 }, { "acc": 0.92861824, "epoch": 2.666991579346384, "grad_norm": 11.731099128723145, "learning_rate": 2.7418748140486183e-06, "loss": 0.43874826, "memory(GiB)": 34.88, "step": 98500, "train_speed(iter/s)": 0.412242 }, { "acc": 0.94533968, "epoch": 2.6671269596295994, "grad_norm": 9.717697143554688, "learning_rate": 2.741375622239081e-06, "loss": 0.30449061, "memory(GiB)": 34.88, "step": 98505, "train_speed(iter/s)": 0.412243 }, { "acc": 0.9354167, "epoch": 2.6672623399128152, "grad_norm": 7.183483600616455, "learning_rate": 2.740876458729515e-06, "loss": 0.42474813, "memory(GiB)": 34.88, "step": 98510, "train_speed(iter/s)": 0.412244 }, { "acc": 0.93144855, "epoch": 2.6673977201960306, "grad_norm": 7.84957218170166, "learning_rate": 2.740377323526177e-06, "loss": 0.3539659, "memory(GiB)": 34.88, "step": 98515, "train_speed(iter/s)": 0.412245 }, { "acc": 0.93206978, "epoch": 2.667533100479246, "grad_norm": 19.4886474609375, "learning_rate": 2.7398782166353145e-06, "loss": 0.37632699, "memory(GiB)": 34.88, "step": 98520, "train_speed(iter/s)": 0.412246 }, { "acc": 0.94254456, "epoch": 2.6676684807624618, "grad_norm": 6.934788227081299, "learning_rate": 2.7393791380631814e-06, "loss": 0.36712432, "memory(GiB)": 34.88, "step": 98525, "train_speed(iter/s)": 0.412248 }, { "acc": 0.93971405, "epoch": 2.6678038610456776, "grad_norm": 10.304205894470215, "learning_rate": 2.738880087816033e-06, "loss": 0.39746242, "memory(GiB)": 34.88, "step": 98530, "train_speed(iter/s)": 0.412249 }, { "acc": 0.93724718, "epoch": 2.667939241328893, "grad_norm": 4.075494289398193, "learning_rate": 2.7383810659001154e-06, "loss": 0.37118435, "memory(GiB)": 34.88, "step": 98535, "train_speed(iter/s)": 0.41225 }, { "acc": 0.93292007, "epoch": 2.6680746216121083, "grad_norm": 9.127851486206055, "learning_rate": 2.7378820723216826e-06, "loss": 0.38072886, "memory(GiB)": 34.88, "step": 98540, "train_speed(iter/s)": 0.412251 }, { "acc": 0.92385607, "epoch": 2.668210001895324, "grad_norm": 6.412014961242676, "learning_rate": 2.7373831070869857e-06, "loss": 0.45252576, "memory(GiB)": 34.88, "step": 98545, "train_speed(iter/s)": 0.412252 }, { "acc": 0.92410421, "epoch": 2.6683453821785394, "grad_norm": 8.180930137634277, "learning_rate": 2.7368841702022765e-06, "loss": 0.3849884, "memory(GiB)": 34.88, "step": 98550, "train_speed(iter/s)": 0.412253 }, { "acc": 0.94333754, "epoch": 2.668480762461755, "grad_norm": 11.0693998336792, "learning_rate": 2.7363852616737994e-06, "loss": 0.32833655, "memory(GiB)": 34.88, "step": 98555, "train_speed(iter/s)": 0.412254 }, { "acc": 0.94275932, "epoch": 2.6686161427449706, "grad_norm": 4.529460906982422, "learning_rate": 2.7358863815078123e-06, "loss": 0.30303349, "memory(GiB)": 34.88, "step": 98560, "train_speed(iter/s)": 0.412255 }, { "acc": 0.94644451, "epoch": 2.6687515230281864, "grad_norm": 5.2827301025390625, "learning_rate": 2.7353875297105616e-06, "loss": 0.28325329, "memory(GiB)": 34.88, "step": 98565, "train_speed(iter/s)": 0.412256 }, { "acc": 0.91947021, "epoch": 2.6688869033114018, "grad_norm": 7.993724346160889, "learning_rate": 2.734888706288293e-06, "loss": 0.53990297, "memory(GiB)": 34.88, "step": 98570, "train_speed(iter/s)": 0.412257 }, { "acc": 0.92839413, "epoch": 2.669022283594617, "grad_norm": 3.8353657722473145, "learning_rate": 2.734389911247258e-06, "loss": 0.39934199, "memory(GiB)": 34.88, "step": 98575, "train_speed(iter/s)": 0.412259 }, { "acc": 0.94280796, "epoch": 2.669157663877833, "grad_norm": 26.137609481811523, "learning_rate": 2.733891144593705e-06, "loss": 0.34549508, "memory(GiB)": 34.88, "step": 98580, "train_speed(iter/s)": 0.41226 }, { "acc": 0.93679447, "epoch": 2.6692930441610483, "grad_norm": 5.220433235168457, "learning_rate": 2.7333924063338836e-06, "loss": 0.36296949, "memory(GiB)": 34.88, "step": 98585, "train_speed(iter/s)": 0.412261 }, { "acc": 0.93710175, "epoch": 2.669428424444264, "grad_norm": 15.903712272644043, "learning_rate": 2.732893696474035e-06, "loss": 0.38226521, "memory(GiB)": 34.88, "step": 98590, "train_speed(iter/s)": 0.412262 }, { "acc": 0.92372427, "epoch": 2.6695638047274795, "grad_norm": 8.455326080322266, "learning_rate": 2.7323950150204153e-06, "loss": 0.4100071, "memory(GiB)": 34.88, "step": 98595, "train_speed(iter/s)": 0.412263 }, { "acc": 0.94909019, "epoch": 2.6696991850106953, "grad_norm": 3.576873540878296, "learning_rate": 2.7318963619792682e-06, "loss": 0.26249747, "memory(GiB)": 34.88, "step": 98600, "train_speed(iter/s)": 0.412264 }, { "acc": 0.9381259, "epoch": 2.6698345652939106, "grad_norm": 6.391331195831299, "learning_rate": 2.731397737356837e-06, "loss": 0.3551908, "memory(GiB)": 34.88, "step": 98605, "train_speed(iter/s)": 0.412265 }, { "acc": 0.92481985, "epoch": 2.669969945577126, "grad_norm": 9.492615699768066, "learning_rate": 2.73089914115937e-06, "loss": 0.40712013, "memory(GiB)": 34.88, "step": 98610, "train_speed(iter/s)": 0.412266 }, { "acc": 0.93445368, "epoch": 2.6701053258603418, "grad_norm": 10.36103343963623, "learning_rate": 2.730400573393114e-06, "loss": 0.3459511, "memory(GiB)": 34.88, "step": 98615, "train_speed(iter/s)": 0.412267 }, { "acc": 0.93474903, "epoch": 2.670240706143557, "grad_norm": 4.581522464752197, "learning_rate": 2.7299020340643163e-06, "loss": 0.32482519, "memory(GiB)": 34.88, "step": 98620, "train_speed(iter/s)": 0.412268 }, { "acc": 0.93344011, "epoch": 2.670376086426773, "grad_norm": 2.7311272621154785, "learning_rate": 2.7294035231792155e-06, "loss": 0.32438254, "memory(GiB)": 34.88, "step": 98625, "train_speed(iter/s)": 0.412269 }, { "acc": 0.92299061, "epoch": 2.6705114667099883, "grad_norm": 11.690574645996094, "learning_rate": 2.728905040744065e-06, "loss": 0.48773069, "memory(GiB)": 34.88, "step": 98630, "train_speed(iter/s)": 0.41227 }, { "acc": 0.94294586, "epoch": 2.670646846993204, "grad_norm": 4.794066429138184, "learning_rate": 2.7284065867651024e-06, "loss": 0.32894301, "memory(GiB)": 34.88, "step": 98635, "train_speed(iter/s)": 0.412271 }, { "acc": 0.92327633, "epoch": 2.6707822272764195, "grad_norm": 10.841432571411133, "learning_rate": 2.727908161248577e-06, "loss": 0.37790773, "memory(GiB)": 34.88, "step": 98640, "train_speed(iter/s)": 0.412273 }, { "acc": 0.92237873, "epoch": 2.670917607559635, "grad_norm": 9.046072006225586, "learning_rate": 2.727409764200728e-06, "loss": 0.46789556, "memory(GiB)": 34.88, "step": 98645, "train_speed(iter/s)": 0.412273 }, { "acc": 0.93970957, "epoch": 2.6710529878428506, "grad_norm": 15.155948638916016, "learning_rate": 2.726911395627801e-06, "loss": 0.31615846, "memory(GiB)": 34.88, "step": 98650, "train_speed(iter/s)": 0.412274 }, { "acc": 0.93045521, "epoch": 2.671188368126066, "grad_norm": 8.772541046142578, "learning_rate": 2.7264130555360396e-06, "loss": 0.38021798, "memory(GiB)": 34.88, "step": 98655, "train_speed(iter/s)": 0.412276 }, { "acc": 0.92494507, "epoch": 2.6713237484092818, "grad_norm": 13.066513061523438, "learning_rate": 2.725914743931683e-06, "loss": 0.46779194, "memory(GiB)": 34.88, "step": 98660, "train_speed(iter/s)": 0.412277 }, { "acc": 0.92888565, "epoch": 2.671459128692497, "grad_norm": 11.627243041992188, "learning_rate": 2.7254164608209805e-06, "loss": 0.37060213, "memory(GiB)": 34.88, "step": 98665, "train_speed(iter/s)": 0.412278 }, { "acc": 0.92833633, "epoch": 2.671594508975713, "grad_norm": 7.824827194213867, "learning_rate": 2.724918206210167e-06, "loss": 0.33872848, "memory(GiB)": 34.88, "step": 98670, "train_speed(iter/s)": 0.412279 }, { "acc": 0.93972664, "epoch": 2.6717298892589283, "grad_norm": 6.0910186767578125, "learning_rate": 2.7244199801054892e-06, "loss": 0.31915751, "memory(GiB)": 34.88, "step": 98675, "train_speed(iter/s)": 0.41228 }, { "acc": 0.95017347, "epoch": 2.6718652695421437, "grad_norm": 12.187114715576172, "learning_rate": 2.7239217825131824e-06, "loss": 0.29039838, "memory(GiB)": 34.88, "step": 98680, "train_speed(iter/s)": 0.412281 }, { "acc": 0.94625282, "epoch": 2.6720006498253595, "grad_norm": 6.17311954498291, "learning_rate": 2.7234236134394954e-06, "loss": 0.26078377, "memory(GiB)": 34.88, "step": 98685, "train_speed(iter/s)": 0.412282 }, { "acc": 0.91414633, "epoch": 2.6721360301085753, "grad_norm": 16.27794647216797, "learning_rate": 2.722925472890664e-06, "loss": 0.50189619, "memory(GiB)": 34.88, "step": 98690, "train_speed(iter/s)": 0.412283 }, { "acc": 0.93929663, "epoch": 2.6722714103917906, "grad_norm": 7.437828540802002, "learning_rate": 2.722427360872925e-06, "loss": 0.28913486, "memory(GiB)": 34.88, "step": 98695, "train_speed(iter/s)": 0.412285 }, { "acc": 0.93702049, "epoch": 2.672406790675006, "grad_norm": 6.086218357086182, "learning_rate": 2.7219292773925273e-06, "loss": 0.34492092, "memory(GiB)": 34.88, "step": 98700, "train_speed(iter/s)": 0.412286 }, { "acc": 0.94541302, "epoch": 2.6725421709582218, "grad_norm": 8.460531234741211, "learning_rate": 2.7214312224557023e-06, "loss": 0.33157468, "memory(GiB)": 34.88, "step": 98705, "train_speed(iter/s)": 0.412287 }, { "acc": 0.93610144, "epoch": 2.672677551241437, "grad_norm": 5.619757175445557, "learning_rate": 2.7209331960686946e-06, "loss": 0.37375286, "memory(GiB)": 34.88, "step": 98710, "train_speed(iter/s)": 0.412288 }, { "acc": 0.93504477, "epoch": 2.6728129315246525, "grad_norm": 7.454627513885498, "learning_rate": 2.7204351982377357e-06, "loss": 0.33783321, "memory(GiB)": 34.88, "step": 98715, "train_speed(iter/s)": 0.412289 }, { "acc": 0.93027344, "epoch": 2.6729483118078683, "grad_norm": 3.6639058589935303, "learning_rate": 2.7199372289690734e-06, "loss": 0.33951099, "memory(GiB)": 34.88, "step": 98720, "train_speed(iter/s)": 0.41229 }, { "acc": 0.92146521, "epoch": 2.673083692091084, "grad_norm": 12.11201000213623, "learning_rate": 2.719439288268941e-06, "loss": 0.4818511, "memory(GiB)": 34.88, "step": 98725, "train_speed(iter/s)": 0.412291 }, { "acc": 0.94804001, "epoch": 2.6732190723742995, "grad_norm": 5.597224712371826, "learning_rate": 2.718941376143572e-06, "loss": 0.307686, "memory(GiB)": 34.88, "step": 98730, "train_speed(iter/s)": 0.412292 }, { "acc": 0.94297838, "epoch": 2.673354452657515, "grad_norm": 12.709268569946289, "learning_rate": 2.7184434925992113e-06, "loss": 0.27566512, "memory(GiB)": 34.88, "step": 98735, "train_speed(iter/s)": 0.412293 }, { "acc": 0.92896309, "epoch": 2.6734898329407306, "grad_norm": 6.514575004577637, "learning_rate": 2.7179456376420926e-06, "loss": 0.38121109, "memory(GiB)": 34.88, "step": 98740, "train_speed(iter/s)": 0.412294 }, { "acc": 0.92010698, "epoch": 2.673625213223946, "grad_norm": 14.490621566772461, "learning_rate": 2.7174478112784535e-06, "loss": 0.42632751, "memory(GiB)": 34.88, "step": 98745, "train_speed(iter/s)": 0.412296 }, { "acc": 0.92547712, "epoch": 2.6737605935071618, "grad_norm": 9.625864028930664, "learning_rate": 2.7169500135145245e-06, "loss": 0.39908519, "memory(GiB)": 34.88, "step": 98750, "train_speed(iter/s)": 0.412297 }, { "acc": 0.93164978, "epoch": 2.673895973790377, "grad_norm": 6.816679954528809, "learning_rate": 2.716452244356551e-06, "loss": 0.43913879, "memory(GiB)": 34.88, "step": 98755, "train_speed(iter/s)": 0.412298 }, { "acc": 0.93409042, "epoch": 2.674031354073593, "grad_norm": 6.689877510070801, "learning_rate": 2.715954503810762e-06, "loss": 0.37941077, "memory(GiB)": 34.88, "step": 98760, "train_speed(iter/s)": 0.412299 }, { "acc": 0.94025955, "epoch": 2.6741667343568083, "grad_norm": 7.4545745849609375, "learning_rate": 2.715456791883394e-06, "loss": 0.39047761, "memory(GiB)": 34.88, "step": 98765, "train_speed(iter/s)": 0.4123 }, { "acc": 0.92980661, "epoch": 2.6743021146400237, "grad_norm": 35.766536712646484, "learning_rate": 2.714959108580685e-06, "loss": 0.40967255, "memory(GiB)": 34.88, "step": 98770, "train_speed(iter/s)": 0.412301 }, { "acc": 0.93485155, "epoch": 2.6744374949232395, "grad_norm": 6.794150352478027, "learning_rate": 2.7144614539088637e-06, "loss": 0.36871498, "memory(GiB)": 34.88, "step": 98775, "train_speed(iter/s)": 0.412302 }, { "acc": 0.93959827, "epoch": 2.674572875206455, "grad_norm": 4.735115051269531, "learning_rate": 2.7139638278741693e-06, "loss": 0.41770821, "memory(GiB)": 34.88, "step": 98780, "train_speed(iter/s)": 0.412303 }, { "acc": 0.93896942, "epoch": 2.6747082554896706, "grad_norm": 4.714811325073242, "learning_rate": 2.71346623048283e-06, "loss": 0.28671494, "memory(GiB)": 34.88, "step": 98785, "train_speed(iter/s)": 0.412304 }, { "acc": 0.92492342, "epoch": 2.674843635772886, "grad_norm": 7.019775867462158, "learning_rate": 2.712968661741087e-06, "loss": 0.41501989, "memory(GiB)": 34.88, "step": 98790, "train_speed(iter/s)": 0.412305 }, { "acc": 0.92602711, "epoch": 2.6749790160561018, "grad_norm": 10.897762298583984, "learning_rate": 2.7124711216551665e-06, "loss": 0.45807924, "memory(GiB)": 34.88, "step": 98795, "train_speed(iter/s)": 0.412306 }, { "acc": 0.93363266, "epoch": 2.675114396339317, "grad_norm": 5.8171257972717285, "learning_rate": 2.711973610231306e-06, "loss": 0.43406363, "memory(GiB)": 34.88, "step": 98800, "train_speed(iter/s)": 0.412308 }, { "acc": 0.9394701, "epoch": 2.6752497766225325, "grad_norm": 10.6880521774292, "learning_rate": 2.711476127475731e-06, "loss": 0.33280787, "memory(GiB)": 34.88, "step": 98805, "train_speed(iter/s)": 0.412309 }, { "acc": 0.92558384, "epoch": 2.6753851569057483, "grad_norm": 4.596624851226807, "learning_rate": 2.7109786733946826e-06, "loss": 0.45132456, "memory(GiB)": 34.88, "step": 98810, "train_speed(iter/s)": 0.41231 }, { "acc": 0.94515009, "epoch": 2.6755205371889637, "grad_norm": 9.282621383666992, "learning_rate": 2.7104812479943866e-06, "loss": 0.36735229, "memory(GiB)": 34.88, "step": 98815, "train_speed(iter/s)": 0.412311 }, { "acc": 0.93684731, "epoch": 2.6756559174721795, "grad_norm": 12.209006309509277, "learning_rate": 2.709983851281073e-06, "loss": 0.34965205, "memory(GiB)": 34.88, "step": 98820, "train_speed(iter/s)": 0.412312 }, { "acc": 0.9348278, "epoch": 2.675791297755395, "grad_norm": 8.878908157348633, "learning_rate": 2.7094864832609775e-06, "loss": 0.40023942, "memory(GiB)": 34.88, "step": 98825, "train_speed(iter/s)": 0.412313 }, { "acc": 0.94263887, "epoch": 2.6759266780386106, "grad_norm": 5.882766246795654, "learning_rate": 2.7089891439403263e-06, "loss": 0.27814798, "memory(GiB)": 34.88, "step": 98830, "train_speed(iter/s)": 0.412314 }, { "acc": 0.92888222, "epoch": 2.676062058321826, "grad_norm": 10.251323699951172, "learning_rate": 2.708491833325354e-06, "loss": 0.39395969, "memory(GiB)": 34.88, "step": 98835, "train_speed(iter/s)": 0.412315 }, { "acc": 0.9394824, "epoch": 2.6761974386050413, "grad_norm": 4.702298164367676, "learning_rate": 2.707994551422283e-06, "loss": 0.30935915, "memory(GiB)": 34.88, "step": 98840, "train_speed(iter/s)": 0.412316 }, { "acc": 0.93943367, "epoch": 2.676332818888257, "grad_norm": 6.775916576385498, "learning_rate": 2.7074972982373527e-06, "loss": 0.35247629, "memory(GiB)": 34.88, "step": 98845, "train_speed(iter/s)": 0.412317 }, { "acc": 0.93725204, "epoch": 2.676468199171473, "grad_norm": 7.823876857757568, "learning_rate": 2.7070000737767866e-06, "loss": 0.33197649, "memory(GiB)": 34.88, "step": 98850, "train_speed(iter/s)": 0.412318 }, { "acc": 0.92471561, "epoch": 2.6766035794546883, "grad_norm": 6.049578666687012, "learning_rate": 2.7065028780468095e-06, "loss": 0.37927747, "memory(GiB)": 34.88, "step": 98855, "train_speed(iter/s)": 0.412319 }, { "acc": 0.91419678, "epoch": 2.6767389597379037, "grad_norm": 8.79865837097168, "learning_rate": 2.7060057110536575e-06, "loss": 0.46841869, "memory(GiB)": 34.88, "step": 98860, "train_speed(iter/s)": 0.41232 }, { "acc": 0.9367672, "epoch": 2.6768743400211195, "grad_norm": 12.181196212768555, "learning_rate": 2.7055085728035534e-06, "loss": 0.39722283, "memory(GiB)": 34.88, "step": 98865, "train_speed(iter/s)": 0.412321 }, { "acc": 0.92907085, "epoch": 2.677009720304335, "grad_norm": 6.357794284820557, "learning_rate": 2.7050114633027297e-06, "loss": 0.34017541, "memory(GiB)": 34.88, "step": 98870, "train_speed(iter/s)": 0.412322 }, { "acc": 0.93837214, "epoch": 2.67714510058755, "grad_norm": 15.756747245788574, "learning_rate": 2.7045143825574057e-06, "loss": 0.36020908, "memory(GiB)": 34.88, "step": 98875, "train_speed(iter/s)": 0.412323 }, { "acc": 0.93665352, "epoch": 2.677280480870766, "grad_norm": 4.017443656921387, "learning_rate": 2.7040173305738172e-06, "loss": 0.37729893, "memory(GiB)": 34.88, "step": 98880, "train_speed(iter/s)": 0.412324 }, { "acc": 0.93052015, "epoch": 2.677415861153982, "grad_norm": 7.84920597076416, "learning_rate": 2.7035203073581845e-06, "loss": 0.40815563, "memory(GiB)": 34.88, "step": 98885, "train_speed(iter/s)": 0.412325 }, { "acc": 0.91400318, "epoch": 2.677551241437197, "grad_norm": 15.639488220214844, "learning_rate": 2.703023312916736e-06, "loss": 0.4767458, "memory(GiB)": 34.88, "step": 98890, "train_speed(iter/s)": 0.412327 }, { "acc": 0.93353577, "epoch": 2.6776866217204125, "grad_norm": 9.262386322021484, "learning_rate": 2.7025263472557e-06, "loss": 0.45577312, "memory(GiB)": 34.88, "step": 98895, "train_speed(iter/s)": 0.412328 }, { "acc": 0.93709049, "epoch": 2.6778220020036283, "grad_norm": 8.515253067016602, "learning_rate": 2.7020294103812973e-06, "loss": 0.38393204, "memory(GiB)": 34.88, "step": 98900, "train_speed(iter/s)": 0.412328 }, { "acc": 0.93654718, "epoch": 2.6779573822868437, "grad_norm": 7.4051313400268555, "learning_rate": 2.7015325022997576e-06, "loss": 0.406811, "memory(GiB)": 34.88, "step": 98905, "train_speed(iter/s)": 0.41233 }, { "acc": 0.92428732, "epoch": 2.6780927625700595, "grad_norm": 9.469895362854004, "learning_rate": 2.7010356230172983e-06, "loss": 0.4538713, "memory(GiB)": 34.88, "step": 98910, "train_speed(iter/s)": 0.412331 }, { "acc": 0.93747292, "epoch": 2.678228142853275, "grad_norm": 11.103706359863281, "learning_rate": 2.700538772540153e-06, "loss": 0.3305079, "memory(GiB)": 34.88, "step": 98915, "train_speed(iter/s)": 0.412331 }, { "acc": 0.94032745, "epoch": 2.6783635231364906, "grad_norm": 9.053449630737305, "learning_rate": 2.7000419508745402e-06, "loss": 0.37344198, "memory(GiB)": 34.88, "step": 98920, "train_speed(iter/s)": 0.412333 }, { "acc": 0.93015633, "epoch": 2.678498903419706, "grad_norm": 6.728882312774658, "learning_rate": 2.6995451580266848e-06, "loss": 0.36339393, "memory(GiB)": 34.88, "step": 98925, "train_speed(iter/s)": 0.412334 }, { "acc": 0.94453697, "epoch": 2.6786342837029213, "grad_norm": 4.6950297355651855, "learning_rate": 2.69904839400281e-06, "loss": 0.29333854, "memory(GiB)": 34.88, "step": 98930, "train_speed(iter/s)": 0.412335 }, { "acc": 0.94925699, "epoch": 2.678769663986137, "grad_norm": 10.668615341186523, "learning_rate": 2.6985516588091397e-06, "loss": 0.27160239, "memory(GiB)": 34.88, "step": 98935, "train_speed(iter/s)": 0.412335 }, { "acc": 0.93673191, "epoch": 2.6789050442693525, "grad_norm": 7.363590240478516, "learning_rate": 2.6980549524518973e-06, "loss": 0.31477413, "memory(GiB)": 34.88, "step": 98940, "train_speed(iter/s)": 0.412336 }, { "acc": 0.92517033, "epoch": 2.6790404245525683, "grad_norm": 6.038570880889893, "learning_rate": 2.697558274937299e-06, "loss": 0.4175015, "memory(GiB)": 34.88, "step": 98945, "train_speed(iter/s)": 0.412337 }, { "acc": 0.94508858, "epoch": 2.6791758048357837, "grad_norm": 3.294567584991455, "learning_rate": 2.6970616262715743e-06, "loss": 0.32676535, "memory(GiB)": 34.88, "step": 98950, "train_speed(iter/s)": 0.412339 }, { "acc": 0.91581278, "epoch": 2.6793111851189995, "grad_norm": 13.608536720275879, "learning_rate": 2.6965650064609406e-06, "loss": 0.45622907, "memory(GiB)": 34.88, "step": 98955, "train_speed(iter/s)": 0.41234 }, { "acc": 0.91419516, "epoch": 2.679446565402215, "grad_norm": 12.661918640136719, "learning_rate": 2.6960684155116195e-06, "loss": 0.5285574, "memory(GiB)": 34.88, "step": 98960, "train_speed(iter/s)": 0.412341 }, { "acc": 0.94554157, "epoch": 2.67958194568543, "grad_norm": 7.2363691329956055, "learning_rate": 2.695571853429832e-06, "loss": 0.29786129, "memory(GiB)": 34.88, "step": 98965, "train_speed(iter/s)": 0.412342 }, { "acc": 0.94049301, "epoch": 2.679717325968646, "grad_norm": 5.542149543762207, "learning_rate": 2.6950753202218016e-06, "loss": 0.39477634, "memory(GiB)": 34.88, "step": 98970, "train_speed(iter/s)": 0.412343 }, { "acc": 0.93687754, "epoch": 2.6798527062518613, "grad_norm": 5.346299171447754, "learning_rate": 2.694578815893744e-06, "loss": 0.32374146, "memory(GiB)": 34.88, "step": 98975, "train_speed(iter/s)": 0.412344 }, { "acc": 0.9170496, "epoch": 2.679988086535077, "grad_norm": 20.288570404052734, "learning_rate": 2.6940823404518773e-06, "loss": 0.47368789, "memory(GiB)": 34.88, "step": 98980, "train_speed(iter/s)": 0.412345 }, { "acc": 0.93189211, "epoch": 2.6801234668182925, "grad_norm": 10.510891914367676, "learning_rate": 2.693585893902427e-06, "loss": 0.41218476, "memory(GiB)": 34.88, "step": 98985, "train_speed(iter/s)": 0.412346 }, { "acc": 0.91992092, "epoch": 2.6802588471015083, "grad_norm": 10.10791301727295, "learning_rate": 2.693089476251608e-06, "loss": 0.52222939, "memory(GiB)": 34.88, "step": 98990, "train_speed(iter/s)": 0.412347 }, { "acc": 0.92819424, "epoch": 2.6803942273847237, "grad_norm": 8.66547966003418, "learning_rate": 2.6925930875056396e-06, "loss": 0.39690237, "memory(GiB)": 34.88, "step": 98995, "train_speed(iter/s)": 0.412348 }, { "acc": 0.94027767, "epoch": 2.680529607667939, "grad_norm": 5.313392162322998, "learning_rate": 2.6920967276707405e-06, "loss": 0.30725091, "memory(GiB)": 34.88, "step": 99000, "train_speed(iter/s)": 0.412349 }, { "acc": 0.94015598, "epoch": 2.680664987951155, "grad_norm": 7.13081693649292, "learning_rate": 2.69160039675313e-06, "loss": 0.2849823, "memory(GiB)": 34.88, "step": 99005, "train_speed(iter/s)": 0.41235 }, { "acc": 0.93049545, "epoch": 2.6808003682343706, "grad_norm": 4.432124137878418, "learning_rate": 2.691104094759022e-06, "loss": 0.40665455, "memory(GiB)": 34.88, "step": 99010, "train_speed(iter/s)": 0.412351 }, { "acc": 0.9323947, "epoch": 2.680935748517586, "grad_norm": 8.538384437561035, "learning_rate": 2.6906078216946356e-06, "loss": 0.43101997, "memory(GiB)": 34.88, "step": 99015, "train_speed(iter/s)": 0.412352 }, { "acc": 0.92944317, "epoch": 2.6810711288008013, "grad_norm": 10.820009231567383, "learning_rate": 2.69011157756619e-06, "loss": 0.33717496, "memory(GiB)": 34.88, "step": 99020, "train_speed(iter/s)": 0.412353 }, { "acc": 0.92200336, "epoch": 2.681206509084017, "grad_norm": 13.786635398864746, "learning_rate": 2.6896153623798965e-06, "loss": 0.45666609, "memory(GiB)": 34.88, "step": 99025, "train_speed(iter/s)": 0.412354 }, { "acc": 0.93030758, "epoch": 2.6813418893672325, "grad_norm": 5.208943843841553, "learning_rate": 2.689119176141975e-06, "loss": 0.44130144, "memory(GiB)": 34.88, "step": 99030, "train_speed(iter/s)": 0.412355 }, { "acc": 0.91720324, "epoch": 2.681477269650448, "grad_norm": 7.1485772132873535, "learning_rate": 2.6886230188586397e-06, "loss": 0.45537286, "memory(GiB)": 34.88, "step": 99035, "train_speed(iter/s)": 0.412356 }, { "acc": 0.9262249, "epoch": 2.6816126499336637, "grad_norm": 7.515495777130127, "learning_rate": 2.6881268905361092e-06, "loss": 0.40950747, "memory(GiB)": 34.88, "step": 99040, "train_speed(iter/s)": 0.412357 }, { "acc": 0.92061043, "epoch": 2.6817480302168795, "grad_norm": 9.640242576599121, "learning_rate": 2.687630791180592e-06, "loss": 0.43071189, "memory(GiB)": 34.88, "step": 99045, "train_speed(iter/s)": 0.412358 }, { "acc": 0.91073914, "epoch": 2.681883410500095, "grad_norm": 8.452956199645996, "learning_rate": 2.687134720798307e-06, "loss": 0.47787676, "memory(GiB)": 34.88, "step": 99050, "train_speed(iter/s)": 0.412359 }, { "acc": 0.91301346, "epoch": 2.68201879078331, "grad_norm": 7.717789173126221, "learning_rate": 2.686638679395469e-06, "loss": 0.44666462, "memory(GiB)": 34.88, "step": 99055, "train_speed(iter/s)": 0.41236 }, { "acc": 0.94942179, "epoch": 2.682154171066526, "grad_norm": 3.7294154167175293, "learning_rate": 2.6861426669782925e-06, "loss": 0.30628228, "memory(GiB)": 34.88, "step": 99060, "train_speed(iter/s)": 0.412361 }, { "acc": 0.93441658, "epoch": 2.6822895513497413, "grad_norm": 6.502049446105957, "learning_rate": 2.6856466835529865e-06, "loss": 0.37562919, "memory(GiB)": 34.88, "step": 99065, "train_speed(iter/s)": 0.412362 }, { "acc": 0.94132442, "epoch": 2.6824249316329567, "grad_norm": 10.36478042602539, "learning_rate": 2.6851507291257666e-06, "loss": 0.27646766, "memory(GiB)": 34.88, "step": 99070, "train_speed(iter/s)": 0.412364 }, { "acc": 0.93253918, "epoch": 2.6825603119161725, "grad_norm": 8.5325345993042, "learning_rate": 2.6846548037028493e-06, "loss": 0.28724027, "memory(GiB)": 34.88, "step": 99075, "train_speed(iter/s)": 0.412365 }, { "acc": 0.92049484, "epoch": 2.6826956921993883, "grad_norm": 8.67279052734375, "learning_rate": 2.6841589072904406e-06, "loss": 0.4839963, "memory(GiB)": 34.88, "step": 99080, "train_speed(iter/s)": 0.412366 }, { "acc": 0.93716288, "epoch": 2.6828310724826037, "grad_norm": 8.64632797241211, "learning_rate": 2.6836630398947566e-06, "loss": 0.33964727, "memory(GiB)": 34.88, "step": 99085, "train_speed(iter/s)": 0.412366 }, { "acc": 0.94337902, "epoch": 2.682966452765819, "grad_norm": 4.627460956573486, "learning_rate": 2.6831672015220077e-06, "loss": 0.2612061, "memory(GiB)": 34.88, "step": 99090, "train_speed(iter/s)": 0.412367 }, { "acc": 0.91673002, "epoch": 2.683101833049035, "grad_norm": 3.123556137084961, "learning_rate": 2.6826713921784077e-06, "loss": 0.52972078, "memory(GiB)": 34.88, "step": 99095, "train_speed(iter/s)": 0.412369 }, { "acc": 0.93532495, "epoch": 2.68323721333225, "grad_norm": 7.520134449005127, "learning_rate": 2.682175611870164e-06, "loss": 0.34007444, "memory(GiB)": 34.88, "step": 99100, "train_speed(iter/s)": 0.412369 }, { "acc": 0.94246769, "epoch": 2.683372593615466, "grad_norm": 5.008500099182129, "learning_rate": 2.6816798606034876e-06, "loss": 0.30236511, "memory(GiB)": 34.88, "step": 99105, "train_speed(iter/s)": 0.41237 }, { "acc": 0.92282114, "epoch": 2.6835079738986813, "grad_norm": 21.840166091918945, "learning_rate": 2.681184138384593e-06, "loss": 0.48436217, "memory(GiB)": 34.88, "step": 99110, "train_speed(iter/s)": 0.412372 }, { "acc": 0.94086514, "epoch": 2.683643354181897, "grad_norm": 6.565344333648682, "learning_rate": 2.6806884452196843e-06, "loss": 0.32984607, "memory(GiB)": 34.88, "step": 99115, "train_speed(iter/s)": 0.412373 }, { "acc": 0.93852243, "epoch": 2.6837787344651125, "grad_norm": 6.935049533843994, "learning_rate": 2.6801927811149734e-06, "loss": 0.33953257, "memory(GiB)": 34.88, "step": 99120, "train_speed(iter/s)": 0.412374 }, { "acc": 0.93215942, "epoch": 2.683914114748328, "grad_norm": 7.553797721862793, "learning_rate": 2.679697146076671e-06, "loss": 0.45578804, "memory(GiB)": 34.88, "step": 99125, "train_speed(iter/s)": 0.412375 }, { "acc": 0.92894125, "epoch": 2.6840494950315437, "grad_norm": 4.3049845695495605, "learning_rate": 2.679201540110986e-06, "loss": 0.37924538, "memory(GiB)": 34.88, "step": 99130, "train_speed(iter/s)": 0.412376 }, { "acc": 0.93631334, "epoch": 2.684184875314759, "grad_norm": 9.572565078735352, "learning_rate": 2.6787059632241237e-06, "loss": 0.33216648, "memory(GiB)": 34.88, "step": 99135, "train_speed(iter/s)": 0.412377 }, { "acc": 0.93588057, "epoch": 2.684320255597975, "grad_norm": 6.6404709815979, "learning_rate": 2.6782104154222944e-06, "loss": 0.44724398, "memory(GiB)": 34.88, "step": 99140, "train_speed(iter/s)": 0.412378 }, { "acc": 0.91999245, "epoch": 2.68445563588119, "grad_norm": 6.000827312469482, "learning_rate": 2.6777148967117063e-06, "loss": 0.39847772, "memory(GiB)": 34.88, "step": 99145, "train_speed(iter/s)": 0.412379 }, { "acc": 0.92820425, "epoch": 2.684591016164406, "grad_norm": 17.03463363647461, "learning_rate": 2.677219407098565e-06, "loss": 0.41414156, "memory(GiB)": 34.88, "step": 99150, "train_speed(iter/s)": 0.41238 }, { "acc": 0.93503628, "epoch": 2.6847263964476213, "grad_norm": 4.952550411224365, "learning_rate": 2.6767239465890784e-06, "loss": 0.37092028, "memory(GiB)": 34.88, "step": 99155, "train_speed(iter/s)": 0.412381 }, { "acc": 0.92504129, "epoch": 2.6848617767308367, "grad_norm": 8.659460067749023, "learning_rate": 2.6762285151894517e-06, "loss": 0.3740335, "memory(GiB)": 34.88, "step": 99160, "train_speed(iter/s)": 0.412382 }, { "acc": 0.93190889, "epoch": 2.6849971570140525, "grad_norm": 7.54115629196167, "learning_rate": 2.6757331129058957e-06, "loss": 0.37210658, "memory(GiB)": 34.88, "step": 99165, "train_speed(iter/s)": 0.412383 }, { "acc": 0.94942703, "epoch": 2.685132537297268, "grad_norm": 4.598761081695557, "learning_rate": 2.675237739744611e-06, "loss": 0.23278437, "memory(GiB)": 34.88, "step": 99170, "train_speed(iter/s)": 0.412384 }, { "acc": 0.92918291, "epoch": 2.6852679175804837, "grad_norm": 6.348943710327148, "learning_rate": 2.674742395711805e-06, "loss": 0.36454937, "memory(GiB)": 34.88, "step": 99175, "train_speed(iter/s)": 0.412385 }, { "acc": 0.94581947, "epoch": 2.685403297863699, "grad_norm": 7.584754943847656, "learning_rate": 2.674247080813684e-06, "loss": 0.28654671, "memory(GiB)": 34.88, "step": 99180, "train_speed(iter/s)": 0.412386 }, { "acc": 0.9352952, "epoch": 2.685538678146915, "grad_norm": 8.305785179138184, "learning_rate": 2.673751795056453e-06, "loss": 0.3702755, "memory(GiB)": 34.88, "step": 99185, "train_speed(iter/s)": 0.412387 }, { "acc": 0.93958378, "epoch": 2.68567405843013, "grad_norm": 13.448482513427734, "learning_rate": 2.673256538446313e-06, "loss": 0.33572586, "memory(GiB)": 34.88, "step": 99190, "train_speed(iter/s)": 0.412389 }, { "acc": 0.93343973, "epoch": 2.6858094387133455, "grad_norm": 6.600323677062988, "learning_rate": 2.6727613109894714e-06, "loss": 0.3395098, "memory(GiB)": 34.88, "step": 99195, "train_speed(iter/s)": 0.41239 }, { "acc": 0.92478561, "epoch": 2.6859448189965613, "grad_norm": 2.954237699508667, "learning_rate": 2.6722661126921323e-06, "loss": 0.43131075, "memory(GiB)": 34.88, "step": 99200, "train_speed(iter/s)": 0.412391 }, { "acc": 0.92382231, "epoch": 2.686080199279777, "grad_norm": 7.859264373779297, "learning_rate": 2.6717709435604954e-06, "loss": 0.42333097, "memory(GiB)": 34.88, "step": 99205, "train_speed(iter/s)": 0.412392 }, { "acc": 0.93385057, "epoch": 2.6862155795629925, "grad_norm": 5.03495979309082, "learning_rate": 2.6712758036007663e-06, "loss": 0.35467706, "memory(GiB)": 34.88, "step": 99210, "train_speed(iter/s)": 0.412393 }, { "acc": 0.94332685, "epoch": 2.686350959846208, "grad_norm": 4.611069679260254, "learning_rate": 2.6707806928191467e-06, "loss": 0.28720548, "memory(GiB)": 34.88, "step": 99215, "train_speed(iter/s)": 0.412394 }, { "acc": 0.93206854, "epoch": 2.6864863401294237, "grad_norm": 3.5369040966033936, "learning_rate": 2.670285611221842e-06, "loss": 0.33311727, "memory(GiB)": 34.88, "step": 99220, "train_speed(iter/s)": 0.412395 }, { "acc": 0.93519564, "epoch": 2.686621720412639, "grad_norm": 8.306696891784668, "learning_rate": 2.669790558815047e-06, "loss": 0.33125434, "memory(GiB)": 34.88, "step": 99225, "train_speed(iter/s)": 0.412396 }, { "acc": 0.9380827, "epoch": 2.6867571006958544, "grad_norm": 14.408127784729004, "learning_rate": 2.6692955356049717e-06, "loss": 0.30754137, "memory(GiB)": 34.88, "step": 99230, "train_speed(iter/s)": 0.412397 }, { "acc": 0.93735247, "epoch": 2.68689248097907, "grad_norm": 7.959345817565918, "learning_rate": 2.6688005415978125e-06, "loss": 0.40759869, "memory(GiB)": 34.88, "step": 99235, "train_speed(iter/s)": 0.412398 }, { "acc": 0.92635822, "epoch": 2.687027861262286, "grad_norm": 7.654079437255859, "learning_rate": 2.6683055767997696e-06, "loss": 0.4789083, "memory(GiB)": 34.88, "step": 99240, "train_speed(iter/s)": 0.412399 }, { "acc": 0.94062405, "epoch": 2.6871632415455013, "grad_norm": 6.10432767868042, "learning_rate": 2.6678106412170446e-06, "loss": 0.32586226, "memory(GiB)": 34.88, "step": 99245, "train_speed(iter/s)": 0.4124 }, { "acc": 0.93227463, "epoch": 2.6872986218287167, "grad_norm": 5.735906600952148, "learning_rate": 2.6673157348558366e-06, "loss": 0.44513044, "memory(GiB)": 34.88, "step": 99250, "train_speed(iter/s)": 0.412401 }, { "acc": 0.94206009, "epoch": 2.6874340021119325, "grad_norm": 9.998063087463379, "learning_rate": 2.666820857722349e-06, "loss": 0.33852634, "memory(GiB)": 34.88, "step": 99255, "train_speed(iter/s)": 0.412402 }, { "acc": 0.95050507, "epoch": 2.687569382395148, "grad_norm": 7.258626461029053, "learning_rate": 2.6663260098227743e-06, "loss": 0.3046761, "memory(GiB)": 34.88, "step": 99260, "train_speed(iter/s)": 0.412403 }, { "acc": 0.93097839, "epoch": 2.6877047626783637, "grad_norm": 6.24788761138916, "learning_rate": 2.6658311911633195e-06, "loss": 0.41017809, "memory(GiB)": 34.88, "step": 99265, "train_speed(iter/s)": 0.412404 }, { "acc": 0.92498608, "epoch": 2.687840142961579, "grad_norm": 8.160027503967285, "learning_rate": 2.6653364017501793e-06, "loss": 0.4614964, "memory(GiB)": 34.88, "step": 99270, "train_speed(iter/s)": 0.412405 }, { "acc": 0.92684708, "epoch": 2.687975523244795, "grad_norm": 16.438241958618164, "learning_rate": 2.6648416415895485e-06, "loss": 0.39616599, "memory(GiB)": 34.88, "step": 99275, "train_speed(iter/s)": 0.412406 }, { "acc": 0.92566814, "epoch": 2.68811090352801, "grad_norm": 6.927085876464844, "learning_rate": 2.6643469106876296e-06, "loss": 0.43887491, "memory(GiB)": 34.88, "step": 99280, "train_speed(iter/s)": 0.412407 }, { "acc": 0.91972141, "epoch": 2.6882462838112255, "grad_norm": 6.2860870361328125, "learning_rate": 2.6638522090506185e-06, "loss": 0.39107912, "memory(GiB)": 34.88, "step": 99285, "train_speed(iter/s)": 0.412408 }, { "acc": 0.93501921, "epoch": 2.6883816640944413, "grad_norm": 8.983794212341309, "learning_rate": 2.6633575366847142e-06, "loss": 0.34417255, "memory(GiB)": 34.88, "step": 99290, "train_speed(iter/s)": 0.412409 }, { "acc": 0.94311161, "epoch": 2.6885170443776567, "grad_norm": 4.057128429412842, "learning_rate": 2.662862893596108e-06, "loss": 0.27565022, "memory(GiB)": 34.88, "step": 99295, "train_speed(iter/s)": 0.41241 }, { "acc": 0.93938971, "epoch": 2.6886524246608725, "grad_norm": 8.32180118560791, "learning_rate": 2.6623682797910033e-06, "loss": 0.35208864, "memory(GiB)": 34.88, "step": 99300, "train_speed(iter/s)": 0.412411 }, { "acc": 0.92883863, "epoch": 2.688787804944088, "grad_norm": 6.417819976806641, "learning_rate": 2.6618736952755913e-06, "loss": 0.42099981, "memory(GiB)": 34.88, "step": 99305, "train_speed(iter/s)": 0.412412 }, { "acc": 0.93490305, "epoch": 2.6889231852273037, "grad_norm": 8.820693016052246, "learning_rate": 2.661379140056071e-06, "loss": 0.34896312, "memory(GiB)": 34.88, "step": 99310, "train_speed(iter/s)": 0.412413 }, { "acc": 0.93889027, "epoch": 2.689058565510519, "grad_norm": 15.242379188537598, "learning_rate": 2.660884614138634e-06, "loss": 0.32494457, "memory(GiB)": 34.88, "step": 99315, "train_speed(iter/s)": 0.412414 }, { "acc": 0.92287674, "epoch": 2.6891939457937344, "grad_norm": 9.938735961914062, "learning_rate": 2.6603901175294773e-06, "loss": 0.46102705, "memory(GiB)": 34.88, "step": 99320, "train_speed(iter/s)": 0.412415 }, { "acc": 0.93287163, "epoch": 2.68932932607695, "grad_norm": 5.071017742156982, "learning_rate": 2.6598956502347965e-06, "loss": 0.36765165, "memory(GiB)": 34.88, "step": 99325, "train_speed(iter/s)": 0.412416 }, { "acc": 0.92958317, "epoch": 2.6894647063601655, "grad_norm": 6.6911396980285645, "learning_rate": 2.6594012122607808e-06, "loss": 0.39250784, "memory(GiB)": 34.88, "step": 99330, "train_speed(iter/s)": 0.412418 }, { "acc": 0.93325424, "epoch": 2.6896000866433813, "grad_norm": 7.282262325286865, "learning_rate": 2.6589068036136325e-06, "loss": 0.33341713, "memory(GiB)": 34.88, "step": 99335, "train_speed(iter/s)": 0.412419 }, { "acc": 0.95410252, "epoch": 2.6897354669265967, "grad_norm": 3.519907236099243, "learning_rate": 2.6584124242995367e-06, "loss": 0.21297448, "memory(GiB)": 34.88, "step": 99340, "train_speed(iter/s)": 0.41242 }, { "acc": 0.92995577, "epoch": 2.6898708472098125, "grad_norm": 8.09012508392334, "learning_rate": 2.657918074324693e-06, "loss": 0.42209816, "memory(GiB)": 34.88, "step": 99345, "train_speed(iter/s)": 0.412421 }, { "acc": 0.9460907, "epoch": 2.690006227493028, "grad_norm": 8.015878677368164, "learning_rate": 2.657423753695287e-06, "loss": 0.2688323, "memory(GiB)": 34.88, "step": 99350, "train_speed(iter/s)": 0.412422 }, { "acc": 0.91432085, "epoch": 2.6901416077762432, "grad_norm": 9.452838897705078, "learning_rate": 2.656929462417519e-06, "loss": 0.50120149, "memory(GiB)": 34.88, "step": 99355, "train_speed(iter/s)": 0.412423 }, { "acc": 0.92300167, "epoch": 2.690276988059459, "grad_norm": 9.15095329284668, "learning_rate": 2.6564352004975763e-06, "loss": 0.44381003, "memory(GiB)": 34.88, "step": 99360, "train_speed(iter/s)": 0.412424 }, { "acc": 0.93460293, "epoch": 2.690412368342675, "grad_norm": 9.213706970214844, "learning_rate": 2.6559409679416502e-06, "loss": 0.40520673, "memory(GiB)": 34.88, "step": 99365, "train_speed(iter/s)": 0.412425 }, { "acc": 0.94363594, "epoch": 2.69054774862589, "grad_norm": 4.369395732879639, "learning_rate": 2.6554467647559327e-06, "loss": 0.27393286, "memory(GiB)": 34.88, "step": 99370, "train_speed(iter/s)": 0.412426 }, { "acc": 0.93723011, "epoch": 2.6906831289091055, "grad_norm": 10.583746910095215, "learning_rate": 2.654952590946615e-06, "loss": 0.32214637, "memory(GiB)": 34.88, "step": 99375, "train_speed(iter/s)": 0.412427 }, { "acc": 0.9265584, "epoch": 2.6908185091923214, "grad_norm": 9.114335060119629, "learning_rate": 2.65445844651989e-06, "loss": 0.42302475, "memory(GiB)": 34.88, "step": 99380, "train_speed(iter/s)": 0.412428 }, { "acc": 0.93415203, "epoch": 2.6909538894755367, "grad_norm": 4.290557861328125, "learning_rate": 2.6539643314819412e-06, "loss": 0.32065299, "memory(GiB)": 34.88, "step": 99385, "train_speed(iter/s)": 0.412429 }, { "acc": 0.93774462, "epoch": 2.691089269758752, "grad_norm": 5.954254627227783, "learning_rate": 2.6534702458389667e-06, "loss": 0.35048661, "memory(GiB)": 34.88, "step": 99390, "train_speed(iter/s)": 0.41243 }, { "acc": 0.94073353, "epoch": 2.691224650041968, "grad_norm": 7.901590824127197, "learning_rate": 2.6529761895971516e-06, "loss": 0.29642456, "memory(GiB)": 34.88, "step": 99395, "train_speed(iter/s)": 0.412431 }, { "acc": 0.94096632, "epoch": 2.6913600303251837, "grad_norm": 10.090035438537598, "learning_rate": 2.6524821627626836e-06, "loss": 0.38312304, "memory(GiB)": 34.88, "step": 99400, "train_speed(iter/s)": 0.412432 }, { "acc": 0.93735867, "epoch": 2.691495410608399, "grad_norm": 9.409724235534668, "learning_rate": 2.651988165341752e-06, "loss": 0.36655185, "memory(GiB)": 34.88, "step": 99405, "train_speed(iter/s)": 0.412433 }, { "acc": 0.92713375, "epoch": 2.6916307908916144, "grad_norm": 4.25762939453125, "learning_rate": 2.6514941973405466e-06, "loss": 0.45921679, "memory(GiB)": 34.88, "step": 99410, "train_speed(iter/s)": 0.412434 }, { "acc": 0.94018459, "epoch": 2.69176617117483, "grad_norm": 15.515803337097168, "learning_rate": 2.651000258765256e-06, "loss": 0.37490306, "memory(GiB)": 34.88, "step": 99415, "train_speed(iter/s)": 0.412436 }, { "acc": 0.92474651, "epoch": 2.6919015514580455, "grad_norm": 13.498907089233398, "learning_rate": 2.650506349622063e-06, "loss": 0.48091292, "memory(GiB)": 34.88, "step": 99420, "train_speed(iter/s)": 0.412437 }, { "acc": 0.93643074, "epoch": 2.6920369317412614, "grad_norm": 8.007168769836426, "learning_rate": 2.6500124699171616e-06, "loss": 0.42329187, "memory(GiB)": 34.88, "step": 99425, "train_speed(iter/s)": 0.412438 }, { "acc": 0.94139223, "epoch": 2.6921723120244767, "grad_norm": 6.595036029815674, "learning_rate": 2.6495186196567335e-06, "loss": 0.29273441, "memory(GiB)": 34.88, "step": 99430, "train_speed(iter/s)": 0.412439 }, { "acc": 0.93192768, "epoch": 2.6923076923076925, "grad_norm": 10.497014999389648, "learning_rate": 2.6490247988469693e-06, "loss": 0.37158344, "memory(GiB)": 34.88, "step": 99435, "train_speed(iter/s)": 0.41244 }, { "acc": 0.92105188, "epoch": 2.692443072590908, "grad_norm": 5.537487030029297, "learning_rate": 2.6485310074940502e-06, "loss": 0.37592044, "memory(GiB)": 34.88, "step": 99440, "train_speed(iter/s)": 0.412441 }, { "acc": 0.94754333, "epoch": 2.6925784528741232, "grad_norm": 6.099255084991455, "learning_rate": 2.6480372456041647e-06, "loss": 0.29612184, "memory(GiB)": 34.88, "step": 99445, "train_speed(iter/s)": 0.412442 }, { "acc": 0.92669687, "epoch": 2.692713833157339, "grad_norm": 4.909290790557861, "learning_rate": 2.6475435131834994e-06, "loss": 0.52735572, "memory(GiB)": 34.88, "step": 99450, "train_speed(iter/s)": 0.412443 }, { "acc": 0.93204393, "epoch": 2.6928492134405544, "grad_norm": 5.31790828704834, "learning_rate": 2.647049810238233e-06, "loss": 0.43794613, "memory(GiB)": 34.88, "step": 99455, "train_speed(iter/s)": 0.412444 }, { "acc": 0.94141932, "epoch": 2.69298459372377, "grad_norm": 9.121603012084961, "learning_rate": 2.6465561367745603e-06, "loss": 0.34409583, "memory(GiB)": 34.88, "step": 99460, "train_speed(iter/s)": 0.412445 }, { "acc": 0.94046535, "epoch": 2.6931199740069856, "grad_norm": 10.079548835754395, "learning_rate": 2.646062492798657e-06, "loss": 0.33541248, "memory(GiB)": 34.88, "step": 99465, "train_speed(iter/s)": 0.412446 }, { "acc": 0.93270073, "epoch": 2.6932553542902014, "grad_norm": 13.779363632202148, "learning_rate": 2.645568878316712e-06, "loss": 0.35398579, "memory(GiB)": 34.88, "step": 99470, "train_speed(iter/s)": 0.412447 }, { "acc": 0.93024302, "epoch": 2.6933907345734167, "grad_norm": 8.472769737243652, "learning_rate": 2.645075293334903e-06, "loss": 0.35754051, "memory(GiB)": 34.88, "step": 99475, "train_speed(iter/s)": 0.412448 }, { "acc": 0.93426809, "epoch": 2.693526114856632, "grad_norm": 6.412637710571289, "learning_rate": 2.6445817378594214e-06, "loss": 0.39242201, "memory(GiB)": 34.88, "step": 99480, "train_speed(iter/s)": 0.412449 }, { "acc": 0.9368722, "epoch": 2.693661495139848, "grad_norm": 10.61734676361084, "learning_rate": 2.644088211896444e-06, "loss": 0.41889019, "memory(GiB)": 34.88, "step": 99485, "train_speed(iter/s)": 0.41245 }, { "acc": 0.94575958, "epoch": 2.6937968754230632, "grad_norm": 5.541751384735107, "learning_rate": 2.6435947154521517e-06, "loss": 0.29434783, "memory(GiB)": 34.88, "step": 99490, "train_speed(iter/s)": 0.412451 }, { "acc": 0.92086029, "epoch": 2.693932255706279, "grad_norm": 5.899036407470703, "learning_rate": 2.643101248532733e-06, "loss": 0.43247328, "memory(GiB)": 34.88, "step": 99495, "train_speed(iter/s)": 0.412452 }, { "acc": 0.9275444, "epoch": 2.6940676359894944, "grad_norm": 9.818316459655762, "learning_rate": 2.6426078111443636e-06, "loss": 0.42140713, "memory(GiB)": 34.88, "step": 99500, "train_speed(iter/s)": 0.412453 }, { "acc": 0.93549528, "epoch": 2.69420301627271, "grad_norm": 7.0088629722595215, "learning_rate": 2.6421144032932296e-06, "loss": 0.33578963, "memory(GiB)": 34.88, "step": 99505, "train_speed(iter/s)": 0.412454 }, { "acc": 0.95331068, "epoch": 2.6943383965559256, "grad_norm": 4.2585835456848145, "learning_rate": 2.641621024985505e-06, "loss": 0.25062771, "memory(GiB)": 34.88, "step": 99510, "train_speed(iter/s)": 0.412455 }, { "acc": 0.94402943, "epoch": 2.694473776839141, "grad_norm": 4.7862043380737305, "learning_rate": 2.6411276762273792e-06, "loss": 0.24270334, "memory(GiB)": 34.88, "step": 99515, "train_speed(iter/s)": 0.412456 }, { "acc": 0.92319851, "epoch": 2.6946091571223567, "grad_norm": 3.090013027191162, "learning_rate": 2.6406343570250283e-06, "loss": 0.42410069, "memory(GiB)": 34.88, "step": 99520, "train_speed(iter/s)": 0.412457 }, { "acc": 0.9226572, "epoch": 2.6947445374055725, "grad_norm": 9.583600044250488, "learning_rate": 2.640141067384626e-06, "loss": 0.46326776, "memory(GiB)": 34.88, "step": 99525, "train_speed(iter/s)": 0.412458 }, { "acc": 0.93472643, "epoch": 2.694879917688788, "grad_norm": 9.770501136779785, "learning_rate": 2.6396478073123633e-06, "loss": 0.36480808, "memory(GiB)": 34.88, "step": 99530, "train_speed(iter/s)": 0.412459 }, { "acc": 0.93106251, "epoch": 2.6950152979720032, "grad_norm": 13.259773254394531, "learning_rate": 2.6391545768144104e-06, "loss": 0.40460649, "memory(GiB)": 34.88, "step": 99535, "train_speed(iter/s)": 0.41246 }, { "acc": 0.94211798, "epoch": 2.695150678255219, "grad_norm": 7.4798808097839355, "learning_rate": 2.6386613758969516e-06, "loss": 0.2959549, "memory(GiB)": 34.88, "step": 99540, "train_speed(iter/s)": 0.412461 }, { "acc": 0.93136349, "epoch": 2.6952860585384344, "grad_norm": 8.421738624572754, "learning_rate": 2.638168204566158e-06, "loss": 0.43884873, "memory(GiB)": 34.88, "step": 99545, "train_speed(iter/s)": 0.412463 }, { "acc": 0.91633587, "epoch": 2.6954214388216498, "grad_norm": 6.5949177742004395, "learning_rate": 2.6376750628282154e-06, "loss": 0.45520749, "memory(GiB)": 34.88, "step": 99550, "train_speed(iter/s)": 0.412464 }, { "acc": 0.92660904, "epoch": 2.6955568191048656, "grad_norm": 5.701086044311523, "learning_rate": 2.6371819506892975e-06, "loss": 0.4238308, "memory(GiB)": 34.88, "step": 99555, "train_speed(iter/s)": 0.412464 }, { "acc": 0.94143457, "epoch": 2.6956921993880814, "grad_norm": 8.48517894744873, "learning_rate": 2.6366888681555813e-06, "loss": 0.34298692, "memory(GiB)": 34.88, "step": 99560, "train_speed(iter/s)": 0.412466 }, { "acc": 0.94204216, "epoch": 2.6958275796712967, "grad_norm": 10.775867462158203, "learning_rate": 2.6361958152332475e-06, "loss": 0.33441491, "memory(GiB)": 34.88, "step": 99565, "train_speed(iter/s)": 0.412467 }, { "acc": 0.92597561, "epoch": 2.695962959954512, "grad_norm": 7.021274089813232, "learning_rate": 2.6357027919284668e-06, "loss": 0.4240447, "memory(GiB)": 34.88, "step": 99570, "train_speed(iter/s)": 0.412468 }, { "acc": 0.93162346, "epoch": 2.696098340237728, "grad_norm": 8.82255744934082, "learning_rate": 2.6352097982474195e-06, "loss": 0.38673344, "memory(GiB)": 34.88, "step": 99575, "train_speed(iter/s)": 0.412469 }, { "acc": 0.9329052, "epoch": 2.6962337205209432, "grad_norm": 6.23267936706543, "learning_rate": 2.634716834196275e-06, "loss": 0.40318794, "memory(GiB)": 34.88, "step": 99580, "train_speed(iter/s)": 0.41247 }, { "acc": 0.93202, "epoch": 2.696369100804159, "grad_norm": 6.465517997741699, "learning_rate": 2.634223899781219e-06, "loss": 0.41099563, "memory(GiB)": 34.88, "step": 99585, "train_speed(iter/s)": 0.412471 }, { "acc": 0.91706333, "epoch": 2.6965044810873744, "grad_norm": 11.097624778747559, "learning_rate": 2.6337309950084184e-06, "loss": 0.46773281, "memory(GiB)": 34.88, "step": 99590, "train_speed(iter/s)": 0.412472 }, { "acc": 0.93275089, "epoch": 2.69663986137059, "grad_norm": 8.933323860168457, "learning_rate": 2.63323811988405e-06, "loss": 0.35549812, "memory(GiB)": 34.88, "step": 99595, "train_speed(iter/s)": 0.412473 }, { "acc": 0.93467617, "epoch": 2.6967752416538056, "grad_norm": 11.821366310119629, "learning_rate": 2.6327452744142884e-06, "loss": 0.40043278, "memory(GiB)": 34.88, "step": 99600, "train_speed(iter/s)": 0.412474 }, { "acc": 0.95299397, "epoch": 2.696910621937021, "grad_norm": 8.69735336303711, "learning_rate": 2.6322524586053095e-06, "loss": 0.245609, "memory(GiB)": 34.88, "step": 99605, "train_speed(iter/s)": 0.412475 }, { "acc": 0.93451633, "epoch": 2.6970460022202367, "grad_norm": 6.097263813018799, "learning_rate": 2.631759672463285e-06, "loss": 0.41556559, "memory(GiB)": 34.88, "step": 99610, "train_speed(iter/s)": 0.412476 }, { "acc": 0.92847328, "epoch": 2.697181382503452, "grad_norm": 7.9307074546813965, "learning_rate": 2.6312669159943833e-06, "loss": 0.3786869, "memory(GiB)": 34.88, "step": 99615, "train_speed(iter/s)": 0.412477 }, { "acc": 0.94431238, "epoch": 2.697316762786668, "grad_norm": 5.687337875366211, "learning_rate": 2.6307741892047856e-06, "loss": 0.29448638, "memory(GiB)": 34.88, "step": 99620, "train_speed(iter/s)": 0.412478 }, { "acc": 0.93400402, "epoch": 2.6974521430698832, "grad_norm": 6.066502571105957, "learning_rate": 2.6302814921006574e-06, "loss": 0.39794557, "memory(GiB)": 34.88, "step": 99625, "train_speed(iter/s)": 0.412479 }, { "acc": 0.91656857, "epoch": 2.697587523353099, "grad_norm": 11.418917655944824, "learning_rate": 2.6297888246881744e-06, "loss": 0.50694752, "memory(GiB)": 34.88, "step": 99630, "train_speed(iter/s)": 0.41248 }, { "acc": 0.94854631, "epoch": 2.6977229036363144, "grad_norm": 8.829819679260254, "learning_rate": 2.629296186973507e-06, "loss": 0.26617408, "memory(GiB)": 34.88, "step": 99635, "train_speed(iter/s)": 0.412481 }, { "acc": 0.91555815, "epoch": 2.6978582839195298, "grad_norm": 9.768649101257324, "learning_rate": 2.6288035789628298e-06, "loss": 0.53360424, "memory(GiB)": 34.88, "step": 99640, "train_speed(iter/s)": 0.412482 }, { "acc": 0.93659983, "epoch": 2.6979936642027456, "grad_norm": 11.749398231506348, "learning_rate": 2.628311000662307e-06, "loss": 0.36269124, "memory(GiB)": 34.88, "step": 99645, "train_speed(iter/s)": 0.412483 }, { "acc": 0.93991489, "epoch": 2.698129044485961, "grad_norm": 6.186488151550293, "learning_rate": 2.6278184520781135e-06, "loss": 0.3611191, "memory(GiB)": 34.88, "step": 99650, "train_speed(iter/s)": 0.412484 }, { "acc": 0.93382797, "epoch": 2.6982644247691767, "grad_norm": 12.272799491882324, "learning_rate": 2.627325933216421e-06, "loss": 0.34871409, "memory(GiB)": 34.88, "step": 99655, "train_speed(iter/s)": 0.412485 }, { "acc": 0.935256, "epoch": 2.698399805052392, "grad_norm": 5.659265995025635, "learning_rate": 2.626833444083395e-06, "loss": 0.31593742, "memory(GiB)": 34.88, "step": 99660, "train_speed(iter/s)": 0.412486 }, { "acc": 0.96020145, "epoch": 2.698535185335608, "grad_norm": 2.9696149826049805, "learning_rate": 2.626340984685206e-06, "loss": 0.17750061, "memory(GiB)": 34.88, "step": 99665, "train_speed(iter/s)": 0.412487 }, { "acc": 0.91561422, "epoch": 2.6986705656188232, "grad_norm": 8.844758987426758, "learning_rate": 2.6258485550280245e-06, "loss": 0.54683714, "memory(GiB)": 34.88, "step": 99670, "train_speed(iter/s)": 0.412488 }, { "acc": 0.92500486, "epoch": 2.6988059459020386, "grad_norm": 12.65331745147705, "learning_rate": 2.6253561551180203e-06, "loss": 0.41870189, "memory(GiB)": 34.88, "step": 99675, "train_speed(iter/s)": 0.412489 }, { "acc": 0.9406249, "epoch": 2.6989413261852544, "grad_norm": 10.429587364196777, "learning_rate": 2.624863784961358e-06, "loss": 0.3615818, "memory(GiB)": 34.88, "step": 99680, "train_speed(iter/s)": 0.41249 }, { "acc": 0.93113918, "epoch": 2.69907670646847, "grad_norm": 8.612388610839844, "learning_rate": 2.624371444564207e-06, "loss": 0.34528508, "memory(GiB)": 34.88, "step": 99685, "train_speed(iter/s)": 0.412491 }, { "acc": 0.92421446, "epoch": 2.6992120867516856, "grad_norm": 8.555265426635742, "learning_rate": 2.623879133932738e-06, "loss": 0.47208338, "memory(GiB)": 34.88, "step": 99690, "train_speed(iter/s)": 0.412492 }, { "acc": 0.9354001, "epoch": 2.699347467034901, "grad_norm": 7.896356582641602, "learning_rate": 2.6233868530731127e-06, "loss": 0.33159788, "memory(GiB)": 34.88, "step": 99695, "train_speed(iter/s)": 0.412494 }, { "acc": 0.93054371, "epoch": 2.6994828473181167, "grad_norm": 9.397294044494629, "learning_rate": 2.6228946019915013e-06, "loss": 0.39603038, "memory(GiB)": 34.88, "step": 99700, "train_speed(iter/s)": 0.412494 }, { "acc": 0.93226376, "epoch": 2.699618227601332, "grad_norm": 3.9327890872955322, "learning_rate": 2.622402380694069e-06, "loss": 0.39415312, "memory(GiB)": 34.88, "step": 99705, "train_speed(iter/s)": 0.412496 }, { "acc": 0.9335824, "epoch": 2.6997536078845474, "grad_norm": 14.784976959228516, "learning_rate": 2.621910189186985e-06, "loss": 0.42621255, "memory(GiB)": 34.88, "step": 99710, "train_speed(iter/s)": 0.412497 }, { "acc": 0.93397331, "epoch": 2.6998889881677632, "grad_norm": 9.955252647399902, "learning_rate": 2.6214180274764093e-06, "loss": 0.41688147, "memory(GiB)": 34.88, "step": 99715, "train_speed(iter/s)": 0.412498 }, { "acc": 0.93033791, "epoch": 2.700024368450979, "grad_norm": 5.1641669273376465, "learning_rate": 2.620925895568511e-06, "loss": 0.43432398, "memory(GiB)": 34.88, "step": 99720, "train_speed(iter/s)": 0.412499 }, { "acc": 0.92146873, "epoch": 2.7001597487341944, "grad_norm": 11.270716667175293, "learning_rate": 2.6204337934694536e-06, "loss": 0.46517467, "memory(GiB)": 34.88, "step": 99725, "train_speed(iter/s)": 0.4125 }, { "acc": 0.92735891, "epoch": 2.7002951290174098, "grad_norm": 4.407464027404785, "learning_rate": 2.619941721185404e-06, "loss": 0.43892784, "memory(GiB)": 34.88, "step": 99730, "train_speed(iter/s)": 0.412501 }, { "acc": 0.93454247, "epoch": 2.7004305093006256, "grad_norm": 5.0230021476745605, "learning_rate": 2.6194496787225253e-06, "loss": 0.37226126, "memory(GiB)": 34.88, "step": 99735, "train_speed(iter/s)": 0.412502 }, { "acc": 0.9224577, "epoch": 2.700565889583841, "grad_norm": 9.243678092956543, "learning_rate": 2.6189576660869765e-06, "loss": 0.49718232, "memory(GiB)": 34.88, "step": 99740, "train_speed(iter/s)": 0.412503 }, { "acc": 0.9327177, "epoch": 2.7007012698670567, "grad_norm": 5.589548587799072, "learning_rate": 2.6184656832849297e-06, "loss": 0.33824139, "memory(GiB)": 34.88, "step": 99745, "train_speed(iter/s)": 0.412504 }, { "acc": 0.93945999, "epoch": 2.700836650150272, "grad_norm": 6.022854328155518, "learning_rate": 2.6179737303225412e-06, "loss": 0.35962374, "memory(GiB)": 34.88, "step": 99750, "train_speed(iter/s)": 0.412505 }, { "acc": 0.93822889, "epoch": 2.700972030433488, "grad_norm": 10.834879875183105, "learning_rate": 2.617481807205975e-06, "loss": 0.35377817, "memory(GiB)": 34.88, "step": 99755, "train_speed(iter/s)": 0.412506 }, { "acc": 0.9225771, "epoch": 2.7011074107167032, "grad_norm": 7.756383419036865, "learning_rate": 2.616989913941395e-06, "loss": 0.42797694, "memory(GiB)": 34.88, "step": 99760, "train_speed(iter/s)": 0.412507 }, { "acc": 0.91992493, "epoch": 2.7012427909999186, "grad_norm": 19.967144012451172, "learning_rate": 2.616498050534964e-06, "loss": 0.49825473, "memory(GiB)": 34.88, "step": 99765, "train_speed(iter/s)": 0.412508 }, { "acc": 0.94274578, "epoch": 2.7013781712831344, "grad_norm": 7.785825729370117, "learning_rate": 2.61600621699284e-06, "loss": 0.36201074, "memory(GiB)": 34.88, "step": 99770, "train_speed(iter/s)": 0.412509 }, { "acc": 0.93522072, "epoch": 2.7015135515663498, "grad_norm": 6.087509632110596, "learning_rate": 2.6155144133211867e-06, "loss": 0.39295976, "memory(GiB)": 34.88, "step": 99775, "train_speed(iter/s)": 0.41251 }, { "acc": 0.92174816, "epoch": 2.7016489318495656, "grad_norm": 4.8037567138671875, "learning_rate": 2.6150226395261663e-06, "loss": 0.45573721, "memory(GiB)": 34.88, "step": 99780, "train_speed(iter/s)": 0.412511 }, { "acc": 0.94419441, "epoch": 2.701784312132781, "grad_norm": 6.634253978729248, "learning_rate": 2.6145308956139346e-06, "loss": 0.32253079, "memory(GiB)": 34.88, "step": 99785, "train_speed(iter/s)": 0.412512 }, { "acc": 0.92596846, "epoch": 2.7019196924159967, "grad_norm": 6.010239601135254, "learning_rate": 2.6140391815906547e-06, "loss": 0.43799706, "memory(GiB)": 34.88, "step": 99790, "train_speed(iter/s)": 0.412513 }, { "acc": 0.94805698, "epoch": 2.702055072699212, "grad_norm": 4.260788917541504, "learning_rate": 2.6135474974624858e-06, "loss": 0.25126045, "memory(GiB)": 34.88, "step": 99795, "train_speed(iter/s)": 0.412514 }, { "acc": 0.94946461, "epoch": 2.7021904529824274, "grad_norm": 9.872132301330566, "learning_rate": 2.61305584323559e-06, "loss": 0.28072844, "memory(GiB)": 34.88, "step": 99800, "train_speed(iter/s)": 0.412515 }, { "acc": 0.92340937, "epoch": 2.7023258332656432, "grad_norm": 8.832442283630371, "learning_rate": 2.612564218916121e-06, "loss": 0.41161737, "memory(GiB)": 34.88, "step": 99805, "train_speed(iter/s)": 0.412516 }, { "acc": 0.94920788, "epoch": 2.7024612135488586, "grad_norm": 6.910992622375488, "learning_rate": 2.61207262451024e-06, "loss": 0.29325666, "memory(GiB)": 34.88, "step": 99810, "train_speed(iter/s)": 0.412517 }, { "acc": 0.92875299, "epoch": 2.7025965938320744, "grad_norm": 8.695432662963867, "learning_rate": 2.611581060024107e-06, "loss": 0.37749393, "memory(GiB)": 34.88, "step": 99815, "train_speed(iter/s)": 0.412518 }, { "acc": 0.95226669, "epoch": 2.7027319741152898, "grad_norm": 15.203215599060059, "learning_rate": 2.611089525463876e-06, "loss": 0.25554409, "memory(GiB)": 34.88, "step": 99820, "train_speed(iter/s)": 0.41252 }, { "acc": 0.93772869, "epoch": 2.7028673543985056, "grad_norm": 8.043807029724121, "learning_rate": 2.610598020835707e-06, "loss": 0.32765827, "memory(GiB)": 34.88, "step": 99825, "train_speed(iter/s)": 0.41252 }, { "acc": 0.94297314, "epoch": 2.703002734681721, "grad_norm": 5.536617279052734, "learning_rate": 2.6101065461457558e-06, "loss": 0.35564132, "memory(GiB)": 34.88, "step": 99830, "train_speed(iter/s)": 0.412521 }, { "acc": 0.93080168, "epoch": 2.7031381149649363, "grad_norm": 10.676502227783203, "learning_rate": 2.6096151014001824e-06, "loss": 0.4014101, "memory(GiB)": 34.88, "step": 99835, "train_speed(iter/s)": 0.412523 }, { "acc": 0.94488144, "epoch": 2.703273495248152, "grad_norm": 5.388904571533203, "learning_rate": 2.609123686605138e-06, "loss": 0.30817318, "memory(GiB)": 34.88, "step": 99840, "train_speed(iter/s)": 0.412524 }, { "acc": 0.92760658, "epoch": 2.703408875531368, "grad_norm": 10.94793701171875, "learning_rate": 2.6086323017667805e-06, "loss": 0.42936096, "memory(GiB)": 34.88, "step": 99845, "train_speed(iter/s)": 0.412525 }, { "acc": 0.94425888, "epoch": 2.7035442558145832, "grad_norm": 24.853618621826172, "learning_rate": 2.608140946891266e-06, "loss": 0.32682626, "memory(GiB)": 34.88, "step": 99850, "train_speed(iter/s)": 0.412526 }, { "acc": 0.93182602, "epoch": 2.7036796360977986, "grad_norm": 6.642458915710449, "learning_rate": 2.6076496219847526e-06, "loss": 0.46643209, "memory(GiB)": 34.88, "step": 99855, "train_speed(iter/s)": 0.412527 }, { "acc": 0.93407688, "epoch": 2.7038150163810144, "grad_norm": 12.337368965148926, "learning_rate": 2.607158327053389e-06, "loss": 0.41098881, "memory(GiB)": 34.88, "step": 99860, "train_speed(iter/s)": 0.412528 }, { "acc": 0.93981771, "epoch": 2.7039503966642298, "grad_norm": 5.722897052764893, "learning_rate": 2.6066670621033335e-06, "loss": 0.35505867, "memory(GiB)": 34.88, "step": 99865, "train_speed(iter/s)": 0.412529 }, { "acc": 0.9357439, "epoch": 2.704085776947445, "grad_norm": 4.448730945587158, "learning_rate": 2.6061758271407407e-06, "loss": 0.31523128, "memory(GiB)": 34.88, "step": 99870, "train_speed(iter/s)": 0.41253 }, { "acc": 0.9366416, "epoch": 2.704221157230661, "grad_norm": 4.963337421417236, "learning_rate": 2.605684622171761e-06, "loss": 0.35756547, "memory(GiB)": 34.88, "step": 99875, "train_speed(iter/s)": 0.412531 }, { "acc": 0.9271266, "epoch": 2.7043565375138767, "grad_norm": 4.594350814819336, "learning_rate": 2.6051934472025507e-06, "loss": 0.4388155, "memory(GiB)": 34.88, "step": 99880, "train_speed(iter/s)": 0.412532 }, { "acc": 0.94606247, "epoch": 2.704491917797092, "grad_norm": 5.731751441955566, "learning_rate": 2.6047023022392613e-06, "loss": 0.28209028, "memory(GiB)": 34.88, "step": 99885, "train_speed(iter/s)": 0.412533 }, { "acc": 0.92123222, "epoch": 2.7046272980803074, "grad_norm": 5.794949531555176, "learning_rate": 2.6042111872880475e-06, "loss": 0.45622053, "memory(GiB)": 34.88, "step": 99890, "train_speed(iter/s)": 0.412534 }, { "acc": 0.93524628, "epoch": 2.7047626783635232, "grad_norm": 8.396295547485352, "learning_rate": 2.603720102355058e-06, "loss": 0.32388561, "memory(GiB)": 34.88, "step": 99895, "train_speed(iter/s)": 0.412535 }, { "acc": 0.92837448, "epoch": 2.7048980586467386, "grad_norm": 17.541040420532227, "learning_rate": 2.603229047446446e-06, "loss": 0.41731133, "memory(GiB)": 34.88, "step": 99900, "train_speed(iter/s)": 0.412536 }, { "acc": 0.92498713, "epoch": 2.7050334389299544, "grad_norm": 5.219758987426758, "learning_rate": 2.6027380225683665e-06, "loss": 0.3970974, "memory(GiB)": 34.88, "step": 99905, "train_speed(iter/s)": 0.412537 }, { "acc": 0.92972984, "epoch": 2.7051688192131698, "grad_norm": 11.17066478729248, "learning_rate": 2.602247027726964e-06, "loss": 0.38027263, "memory(GiB)": 34.88, "step": 99910, "train_speed(iter/s)": 0.412538 }, { "acc": 0.92242088, "epoch": 2.7053041994963856, "grad_norm": 8.089716911315918, "learning_rate": 2.6017560629283937e-06, "loss": 0.42898951, "memory(GiB)": 34.88, "step": 99915, "train_speed(iter/s)": 0.412539 }, { "acc": 0.92046356, "epoch": 2.705439579779601, "grad_norm": 9.313990592956543, "learning_rate": 2.6012651281788037e-06, "loss": 0.50454264, "memory(GiB)": 34.88, "step": 99920, "train_speed(iter/s)": 0.412541 }, { "acc": 0.93186836, "epoch": 2.7055749600628163, "grad_norm": 12.297623634338379, "learning_rate": 2.6007742234843473e-06, "loss": 0.35580978, "memory(GiB)": 34.88, "step": 99925, "train_speed(iter/s)": 0.412542 }, { "acc": 0.93782635, "epoch": 2.705710340346032, "grad_norm": 20.967134475708008, "learning_rate": 2.60028334885117e-06, "loss": 0.32271492, "memory(GiB)": 34.88, "step": 99930, "train_speed(iter/s)": 0.412543 }, { "acc": 0.9303359, "epoch": 2.7058457206292474, "grad_norm": 13.460143089294434, "learning_rate": 2.599792504285422e-06, "loss": 0.36348953, "memory(GiB)": 34.88, "step": 99935, "train_speed(iter/s)": 0.412544 }, { "acc": 0.95251656, "epoch": 2.7059811009124632, "grad_norm": 2.1295366287231445, "learning_rate": 2.599301689793256e-06, "loss": 0.23535891, "memory(GiB)": 34.88, "step": 99940, "train_speed(iter/s)": 0.412545 }, { "acc": 0.9394969, "epoch": 2.7061164811956786, "grad_norm": 9.784893989562988, "learning_rate": 2.5988109053808142e-06, "loss": 0.3300117, "memory(GiB)": 34.88, "step": 99945, "train_speed(iter/s)": 0.412546 }, { "acc": 0.94775572, "epoch": 2.7062518614788944, "grad_norm": 2.334840774536133, "learning_rate": 2.598320151054248e-06, "loss": 0.25088916, "memory(GiB)": 34.88, "step": 99950, "train_speed(iter/s)": 0.412547 }, { "acc": 0.92508907, "epoch": 2.7063872417621098, "grad_norm": 4.682159423828125, "learning_rate": 2.597829426819704e-06, "loss": 0.41701183, "memory(GiB)": 34.88, "step": 99955, "train_speed(iter/s)": 0.412548 }, { "acc": 0.9283905, "epoch": 2.706522622045325, "grad_norm": 3.9807522296905518, "learning_rate": 2.597338732683333e-06, "loss": 0.34181046, "memory(GiB)": 34.88, "step": 99960, "train_speed(iter/s)": 0.412549 }, { "acc": 0.94653292, "epoch": 2.706658002328541, "grad_norm": 10.09925651550293, "learning_rate": 2.5968480686512766e-06, "loss": 0.26013024, "memory(GiB)": 34.88, "step": 99965, "train_speed(iter/s)": 0.41255 }, { "acc": 0.91695271, "epoch": 2.7067933826117563, "grad_norm": 11.90345287322998, "learning_rate": 2.596357434729683e-06, "loss": 0.46547842, "memory(GiB)": 34.88, "step": 99970, "train_speed(iter/s)": 0.412551 }, { "acc": 0.93721333, "epoch": 2.706928762894972, "grad_norm": 9.040053367614746, "learning_rate": 2.595866830924701e-06, "loss": 0.34598598, "memory(GiB)": 34.88, "step": 99975, "train_speed(iter/s)": 0.412552 }, { "acc": 0.93953247, "epoch": 2.7070641431781874, "grad_norm": 2.798454761505127, "learning_rate": 2.595376257242475e-06, "loss": 0.32490897, "memory(GiB)": 34.88, "step": 99980, "train_speed(iter/s)": 0.412553 }, { "acc": 0.91382694, "epoch": 2.7071995234614032, "grad_norm": 23.459293365478516, "learning_rate": 2.5948857136891486e-06, "loss": 0.57534237, "memory(GiB)": 34.88, "step": 99985, "train_speed(iter/s)": 0.412554 }, { "acc": 0.93407593, "epoch": 2.7073349037446186, "grad_norm": 5.828984260559082, "learning_rate": 2.594395200270869e-06, "loss": 0.37969675, "memory(GiB)": 34.88, "step": 99990, "train_speed(iter/s)": 0.412555 }, { "acc": 0.92177315, "epoch": 2.707470284027834, "grad_norm": 6.563992500305176, "learning_rate": 2.593904716993781e-06, "loss": 0.37637291, "memory(GiB)": 34.88, "step": 99995, "train_speed(iter/s)": 0.412556 }, { "acc": 0.94752131, "epoch": 2.7076056643110498, "grad_norm": 3.6319282054901123, "learning_rate": 2.5934142638640263e-06, "loss": 0.32860167, "memory(GiB)": 34.88, "step": 100000, "train_speed(iter/s)": 0.412557 }, { "epoch": 2.7076056643110498, "eval_acc": 0.6266083344995992, "eval_loss": 1.1282122135162354, "eval_runtime": 1297.5545, "eval_samples_per_second": 66.514, "eval_steps_per_second": 2.079, "step": 100000 }, { "acc": 0.93947926, "epoch": 2.7077410445942656, "grad_norm": 8.077323913574219, "learning_rate": 2.59292384088775e-06, "loss": 0.38678863, "memory(GiB)": 34.88, "step": 100005, "train_speed(iter/s)": 0.410326 }, { "acc": 0.94921474, "epoch": 2.707876424877481, "grad_norm": 7.842177391052246, "learning_rate": 2.5924334480710963e-06, "loss": 0.35956514, "memory(GiB)": 34.88, "step": 100010, "train_speed(iter/s)": 0.410327 }, { "acc": 0.91249056, "epoch": 2.7080118051606963, "grad_norm": 10.52524185180664, "learning_rate": 2.591943085420209e-06, "loss": 0.52681007, "memory(GiB)": 34.88, "step": 100015, "train_speed(iter/s)": 0.410329 }, { "acc": 0.93660965, "epoch": 2.708147185443912, "grad_norm": 17.967010498046875, "learning_rate": 2.591452752941227e-06, "loss": 0.38245401, "memory(GiB)": 34.88, "step": 100020, "train_speed(iter/s)": 0.41033 }, { "acc": 0.93056269, "epoch": 2.7082825657271274, "grad_norm": 11.53564167022705, "learning_rate": 2.5909624506402987e-06, "loss": 0.38520732, "memory(GiB)": 34.88, "step": 100025, "train_speed(iter/s)": 0.410331 }, { "acc": 0.93162498, "epoch": 2.708417946010343, "grad_norm": 6.77163028717041, "learning_rate": 2.590472178523564e-06, "loss": 0.35756173, "memory(GiB)": 34.88, "step": 100030, "train_speed(iter/s)": 0.410332 }, { "acc": 0.9476387, "epoch": 2.7085533262935586, "grad_norm": 5.229235649108887, "learning_rate": 2.5899819365971602e-06, "loss": 0.30575731, "memory(GiB)": 34.88, "step": 100035, "train_speed(iter/s)": 0.410333 }, { "acc": 0.93001537, "epoch": 2.7086887065767744, "grad_norm": 18.166128158569336, "learning_rate": 2.589491724867233e-06, "loss": 0.36517565, "memory(GiB)": 34.88, "step": 100040, "train_speed(iter/s)": 0.410334 }, { "acc": 0.93430099, "epoch": 2.7088240868599898, "grad_norm": 7.041818141937256, "learning_rate": 2.589001543339922e-06, "loss": 0.39387922, "memory(GiB)": 34.88, "step": 100045, "train_speed(iter/s)": 0.410335 }, { "acc": 0.94709282, "epoch": 2.708959467143205, "grad_norm": 7.1599249839782715, "learning_rate": 2.588511392021368e-06, "loss": 0.31172025, "memory(GiB)": 34.88, "step": 100050, "train_speed(iter/s)": 0.410336 }, { "acc": 0.9405242, "epoch": 2.709094847426421, "grad_norm": 7.17086935043335, "learning_rate": 2.588021270917709e-06, "loss": 0.32391391, "memory(GiB)": 34.88, "step": 100055, "train_speed(iter/s)": 0.410337 }, { "acc": 0.94527578, "epoch": 2.7092302277096363, "grad_norm": 8.015876770019531, "learning_rate": 2.5875311800350894e-06, "loss": 0.32962532, "memory(GiB)": 34.88, "step": 100060, "train_speed(iter/s)": 0.410339 }, { "acc": 0.93379307, "epoch": 2.709365607992852, "grad_norm": 3.812819242477417, "learning_rate": 2.5870411193796462e-06, "loss": 0.37951288, "memory(GiB)": 34.88, "step": 100065, "train_speed(iter/s)": 0.41034 }, { "acc": 0.94696522, "epoch": 2.7095009882760674, "grad_norm": 6.747315883636475, "learning_rate": 2.5865510889575156e-06, "loss": 0.25978055, "memory(GiB)": 34.88, "step": 100070, "train_speed(iter/s)": 0.410341 }, { "acc": 0.91804771, "epoch": 2.7096363685592832, "grad_norm": 15.346269607543945, "learning_rate": 2.586061088774839e-06, "loss": 0.50045161, "memory(GiB)": 34.88, "step": 100075, "train_speed(iter/s)": 0.410342 }, { "acc": 0.92360554, "epoch": 2.7097717488424986, "grad_norm": 13.485616683959961, "learning_rate": 2.5855711188377536e-06, "loss": 0.44860125, "memory(GiB)": 34.88, "step": 100080, "train_speed(iter/s)": 0.410343 }, { "acc": 0.93447094, "epoch": 2.709907129125714, "grad_norm": 7.041679382324219, "learning_rate": 2.5850811791523995e-06, "loss": 0.37890234, "memory(GiB)": 34.88, "step": 100085, "train_speed(iter/s)": 0.410344 }, { "acc": 0.94836254, "epoch": 2.7100425094089298, "grad_norm": 5.612321376800537, "learning_rate": 2.584591269724909e-06, "loss": 0.33562775, "memory(GiB)": 34.88, "step": 100090, "train_speed(iter/s)": 0.410345 }, { "acc": 0.93938351, "epoch": 2.710177889692145, "grad_norm": 12.463113784790039, "learning_rate": 2.584101390561427e-06, "loss": 0.35344882, "memory(GiB)": 34.88, "step": 100095, "train_speed(iter/s)": 0.410346 }, { "acc": 0.93325119, "epoch": 2.710313269975361, "grad_norm": 24.657516479492188, "learning_rate": 2.5836115416680836e-06, "loss": 0.38233771, "memory(GiB)": 34.88, "step": 100100, "train_speed(iter/s)": 0.410347 }, { "acc": 0.93461771, "epoch": 2.7104486502585763, "grad_norm": 5.606318473815918, "learning_rate": 2.5831217230510185e-06, "loss": 0.3201782, "memory(GiB)": 34.88, "step": 100105, "train_speed(iter/s)": 0.410348 }, { "acc": 0.92338066, "epoch": 2.710584030541792, "grad_norm": 10.649189949035645, "learning_rate": 2.5826319347163655e-06, "loss": 0.37496188, "memory(GiB)": 34.88, "step": 100110, "train_speed(iter/s)": 0.410349 }, { "acc": 0.94504976, "epoch": 2.7107194108250074, "grad_norm": 4.320247650146484, "learning_rate": 2.5821421766702605e-06, "loss": 0.31707678, "memory(GiB)": 34.88, "step": 100115, "train_speed(iter/s)": 0.41035 }, { "acc": 0.92638855, "epoch": 2.710854791108223, "grad_norm": 4.847696781158447, "learning_rate": 2.581652448918841e-06, "loss": 0.43125443, "memory(GiB)": 34.88, "step": 100120, "train_speed(iter/s)": 0.410351 }, { "acc": 0.93733664, "epoch": 2.7109901713914386, "grad_norm": 5.381436347961426, "learning_rate": 2.581162751468237e-06, "loss": 0.36061127, "memory(GiB)": 34.88, "step": 100125, "train_speed(iter/s)": 0.410352 }, { "acc": 0.93155632, "epoch": 2.711125551674654, "grad_norm": 5.467755317687988, "learning_rate": 2.5806730843245893e-06, "loss": 0.39016364, "memory(GiB)": 34.88, "step": 100130, "train_speed(iter/s)": 0.410353 }, { "acc": 0.9298254, "epoch": 2.7112609319578698, "grad_norm": 8.603055000305176, "learning_rate": 2.5801834474940274e-06, "loss": 0.42677593, "memory(GiB)": 34.88, "step": 100135, "train_speed(iter/s)": 0.410354 }, { "acc": 0.91644058, "epoch": 2.711396312241085, "grad_norm": 4.846587657928467, "learning_rate": 2.5796938409826875e-06, "loss": 0.56656027, "memory(GiB)": 34.88, "step": 100140, "train_speed(iter/s)": 0.410355 }, { "acc": 0.93991413, "epoch": 2.711531692524301, "grad_norm": 3.2353458404541016, "learning_rate": 2.5792042647966976e-06, "loss": 0.3691294, "memory(GiB)": 34.88, "step": 100145, "train_speed(iter/s)": 0.410356 }, { "acc": 0.93507977, "epoch": 2.7116670728075163, "grad_norm": 6.511559009552002, "learning_rate": 2.578714718942199e-06, "loss": 0.40053997, "memory(GiB)": 34.88, "step": 100150, "train_speed(iter/s)": 0.410357 }, { "acc": 0.92707825, "epoch": 2.7118024530907316, "grad_norm": 8.473653793334961, "learning_rate": 2.578225203425319e-06, "loss": 0.45117254, "memory(GiB)": 34.88, "step": 100155, "train_speed(iter/s)": 0.410358 }, { "acc": 0.92804012, "epoch": 2.7119378333739474, "grad_norm": 6.9656476974487305, "learning_rate": 2.5777357182521876e-06, "loss": 0.40503454, "memory(GiB)": 34.88, "step": 100160, "train_speed(iter/s)": 0.410359 }, { "acc": 0.92531719, "epoch": 2.712073213657163, "grad_norm": 7.282907009124756, "learning_rate": 2.5772462634289437e-06, "loss": 0.4856081, "memory(GiB)": 34.88, "step": 100165, "train_speed(iter/s)": 0.41036 }, { "acc": 0.95203037, "epoch": 2.7122085939403786, "grad_norm": 13.57822322845459, "learning_rate": 2.5767568389617127e-06, "loss": 0.28627789, "memory(GiB)": 34.88, "step": 100170, "train_speed(iter/s)": 0.410362 }, { "acc": 0.9291935, "epoch": 2.712343974223594, "grad_norm": 5.47947359085083, "learning_rate": 2.5762674448566276e-06, "loss": 0.41003757, "memory(GiB)": 34.88, "step": 100175, "train_speed(iter/s)": 0.410363 }, { "acc": 0.94236374, "epoch": 2.7124793545068098, "grad_norm": 10.777175903320312, "learning_rate": 2.575778081119817e-06, "loss": 0.4038692, "memory(GiB)": 34.88, "step": 100180, "train_speed(iter/s)": 0.410364 }, { "acc": 0.94824781, "epoch": 2.712614734790025, "grad_norm": 14.012711524963379, "learning_rate": 2.5752887477574157e-06, "loss": 0.3475152, "memory(GiB)": 34.88, "step": 100185, "train_speed(iter/s)": 0.410365 }, { "acc": 0.94845753, "epoch": 2.7127501150732405, "grad_norm": 5.16591739654541, "learning_rate": 2.5747994447755493e-06, "loss": 0.28044052, "memory(GiB)": 34.88, "step": 100190, "train_speed(iter/s)": 0.410366 }, { "acc": 0.93623562, "epoch": 2.7128854953564563, "grad_norm": 5.445508003234863, "learning_rate": 2.5743101721803487e-06, "loss": 0.37876165, "memory(GiB)": 34.88, "step": 100195, "train_speed(iter/s)": 0.410367 }, { "acc": 0.94683418, "epoch": 2.713020875639672, "grad_norm": 8.93596076965332, "learning_rate": 2.5738209299779455e-06, "loss": 0.34483898, "memory(GiB)": 34.88, "step": 100200, "train_speed(iter/s)": 0.410368 }, { "acc": 0.94006577, "epoch": 2.7131562559228874, "grad_norm": 7.274400234222412, "learning_rate": 2.573331718174464e-06, "loss": 0.37965038, "memory(GiB)": 34.88, "step": 100205, "train_speed(iter/s)": 0.410369 }, { "acc": 0.94024248, "epoch": 2.713291636206103, "grad_norm": 5.871154308319092, "learning_rate": 2.5728425367760362e-06, "loss": 0.29010482, "memory(GiB)": 34.88, "step": 100210, "train_speed(iter/s)": 0.41037 }, { "acc": 0.92222767, "epoch": 2.7134270164893186, "grad_norm": 7.015834808349609, "learning_rate": 2.572353385788785e-06, "loss": 0.50819392, "memory(GiB)": 34.88, "step": 100215, "train_speed(iter/s)": 0.410371 }, { "acc": 0.95488291, "epoch": 2.713562396772534, "grad_norm": 3.8562088012695312, "learning_rate": 2.5718642652188447e-06, "loss": 0.27754893, "memory(GiB)": 34.88, "step": 100220, "train_speed(iter/s)": 0.410373 }, { "acc": 0.94321003, "epoch": 2.7136977770557493, "grad_norm": 9.205503463745117, "learning_rate": 2.5713751750723378e-06, "loss": 0.26800089, "memory(GiB)": 34.88, "step": 100225, "train_speed(iter/s)": 0.410374 }, { "acc": 0.93160076, "epoch": 2.713833157338965, "grad_norm": 7.029662132263184, "learning_rate": 2.5708861153553925e-06, "loss": 0.38487294, "memory(GiB)": 34.88, "step": 100230, "train_speed(iter/s)": 0.410375 }, { "acc": 0.9307642, "epoch": 2.713968537622181, "grad_norm": 8.044251441955566, "learning_rate": 2.570397086074137e-06, "loss": 0.39404645, "memory(GiB)": 34.88, "step": 100235, "train_speed(iter/s)": 0.410376 }, { "acc": 0.93112774, "epoch": 2.7141039179053963, "grad_norm": 5.768810749053955, "learning_rate": 2.569908087234694e-06, "loss": 0.40626512, "memory(GiB)": 34.88, "step": 100240, "train_speed(iter/s)": 0.410377 }, { "acc": 0.94733467, "epoch": 2.7142392981886116, "grad_norm": 6.824122905731201, "learning_rate": 2.5694191188431925e-06, "loss": 0.34002628, "memory(GiB)": 34.88, "step": 100245, "train_speed(iter/s)": 0.410378 }, { "acc": 0.92661238, "epoch": 2.7143746784718275, "grad_norm": 9.058707237243652, "learning_rate": 2.568930180905752e-06, "loss": 0.38524461, "memory(GiB)": 34.88, "step": 100250, "train_speed(iter/s)": 0.41038 }, { "acc": 0.94790621, "epoch": 2.714510058755043, "grad_norm": 11.921303749084473, "learning_rate": 2.5684412734285063e-06, "loss": 0.28505707, "memory(GiB)": 34.88, "step": 100255, "train_speed(iter/s)": 0.410381 }, { "acc": 0.92085152, "epoch": 2.7146454390382586, "grad_norm": 4.293605327606201, "learning_rate": 2.5679523964175728e-06, "loss": 0.42917633, "memory(GiB)": 34.88, "step": 100260, "train_speed(iter/s)": 0.410382 }, { "acc": 0.93285627, "epoch": 2.714780819321474, "grad_norm": 8.728815078735352, "learning_rate": 2.5674635498790773e-06, "loss": 0.39282897, "memory(GiB)": 34.88, "step": 100265, "train_speed(iter/s)": 0.410383 }, { "acc": 0.92607069, "epoch": 2.7149161996046898, "grad_norm": 10.43088150024414, "learning_rate": 2.5669747338191447e-06, "loss": 0.48300076, "memory(GiB)": 34.88, "step": 100270, "train_speed(iter/s)": 0.410384 }, { "acc": 0.9235096, "epoch": 2.715051579887905, "grad_norm": 7.76964807510376, "learning_rate": 2.5664859482439e-06, "loss": 0.47894878, "memory(GiB)": 34.88, "step": 100275, "train_speed(iter/s)": 0.410385 }, { "acc": 0.92860804, "epoch": 2.7151869601711205, "grad_norm": 9.539801597595215, "learning_rate": 2.5659971931594645e-06, "loss": 0.41427755, "memory(GiB)": 34.88, "step": 100280, "train_speed(iter/s)": 0.410386 }, { "acc": 0.94247856, "epoch": 2.7153223404543363, "grad_norm": 4.840551853179932, "learning_rate": 2.5655084685719557e-06, "loss": 0.34794891, "memory(GiB)": 34.88, "step": 100285, "train_speed(iter/s)": 0.410387 }, { "acc": 0.94823351, "epoch": 2.7154577207375517, "grad_norm": 15.940436363220215, "learning_rate": 2.5650197744875055e-06, "loss": 0.33346343, "memory(GiB)": 34.88, "step": 100290, "train_speed(iter/s)": 0.410388 }, { "acc": 0.946031, "epoch": 2.7155931010207675, "grad_norm": 6.45119571685791, "learning_rate": 2.564531110912228e-06, "loss": 0.34252548, "memory(GiB)": 34.88, "step": 100295, "train_speed(iter/s)": 0.410389 }, { "acc": 0.92944937, "epoch": 2.715728481303983, "grad_norm": 10.985121726989746, "learning_rate": 2.56404247785225e-06, "loss": 0.38104405, "memory(GiB)": 34.88, "step": 100300, "train_speed(iter/s)": 0.41039 }, { "acc": 0.9429657, "epoch": 2.7158638615871986, "grad_norm": 9.971153259277344, "learning_rate": 2.563553875313685e-06, "loss": 0.30835323, "memory(GiB)": 34.88, "step": 100305, "train_speed(iter/s)": 0.410391 }, { "acc": 0.9284173, "epoch": 2.715999241870414, "grad_norm": 13.636832237243652, "learning_rate": 2.563065303302665e-06, "loss": 0.50021353, "memory(GiB)": 34.88, "step": 100310, "train_speed(iter/s)": 0.410392 }, { "acc": 0.93074989, "epoch": 2.7161346221536293, "grad_norm": 12.647022247314453, "learning_rate": 2.5625767618253e-06, "loss": 0.36538196, "memory(GiB)": 34.88, "step": 100315, "train_speed(iter/s)": 0.410393 }, { "acc": 0.92920494, "epoch": 2.716270002436845, "grad_norm": 11.445895195007324, "learning_rate": 2.562088250887715e-06, "loss": 0.38545098, "memory(GiB)": 34.88, "step": 100320, "train_speed(iter/s)": 0.410394 }, { "acc": 0.93060875, "epoch": 2.7164053827200605, "grad_norm": 3.9660537242889404, "learning_rate": 2.5615997704960305e-06, "loss": 0.3694258, "memory(GiB)": 34.88, "step": 100325, "train_speed(iter/s)": 0.410395 }, { "acc": 0.94756203, "epoch": 2.7165407630032763, "grad_norm": 4.174529075622559, "learning_rate": 2.561111320656362e-06, "loss": 0.28408346, "memory(GiB)": 34.88, "step": 100330, "train_speed(iter/s)": 0.410397 }, { "acc": 0.92777538, "epoch": 2.7166761432864917, "grad_norm": 6.065781593322754, "learning_rate": 2.5606229013748316e-06, "loss": 0.32028356, "memory(GiB)": 34.88, "step": 100335, "train_speed(iter/s)": 0.410398 }, { "acc": 0.93051596, "epoch": 2.7168115235697075, "grad_norm": 5.192908763885498, "learning_rate": 2.5601345126575526e-06, "loss": 0.41028118, "memory(GiB)": 34.88, "step": 100340, "train_speed(iter/s)": 0.410399 }, { "acc": 0.94123306, "epoch": 2.716946903852923, "grad_norm": 5.456808567047119, "learning_rate": 2.559646154510651e-06, "loss": 0.33488107, "memory(GiB)": 34.88, "step": 100345, "train_speed(iter/s)": 0.4104 }, { "acc": 0.93574085, "epoch": 2.717082284136138, "grad_norm": 6.523322105407715, "learning_rate": 2.559157826940237e-06, "loss": 0.37729137, "memory(GiB)": 34.88, "step": 100350, "train_speed(iter/s)": 0.410401 }, { "acc": 0.94599304, "epoch": 2.717217664419354, "grad_norm": 7.59485387802124, "learning_rate": 2.558669529952432e-06, "loss": 0.36556363, "memory(GiB)": 34.88, "step": 100355, "train_speed(iter/s)": 0.410402 }, { "acc": 0.95540028, "epoch": 2.7173530447025698, "grad_norm": 5.742984294891357, "learning_rate": 2.5581812635533533e-06, "loss": 0.2401382, "memory(GiB)": 34.88, "step": 100360, "train_speed(iter/s)": 0.410403 }, { "acc": 0.93124914, "epoch": 2.717488424985785, "grad_norm": 4.171583652496338, "learning_rate": 2.557693027749114e-06, "loss": 0.4148334, "memory(GiB)": 34.88, "step": 100365, "train_speed(iter/s)": 0.410404 }, { "acc": 0.94693041, "epoch": 2.7176238052690005, "grad_norm": 3.934858560562134, "learning_rate": 2.5572048225458333e-06, "loss": 0.26181712, "memory(GiB)": 34.88, "step": 100370, "train_speed(iter/s)": 0.410405 }, { "acc": 0.93278923, "epoch": 2.7177591855522163, "grad_norm": 8.588373184204102, "learning_rate": 2.5567166479496234e-06, "loss": 0.41608896, "memory(GiB)": 34.88, "step": 100375, "train_speed(iter/s)": 0.410407 }, { "acc": 0.93049679, "epoch": 2.7178945658354317, "grad_norm": 9.771960258483887, "learning_rate": 2.5562285039666056e-06, "loss": 0.33883901, "memory(GiB)": 34.88, "step": 100380, "train_speed(iter/s)": 0.410408 }, { "acc": 0.92508926, "epoch": 2.718029946118647, "grad_norm": 12.357763290405273, "learning_rate": 2.555740390602889e-06, "loss": 0.44245872, "memory(GiB)": 34.88, "step": 100385, "train_speed(iter/s)": 0.410409 }, { "acc": 0.94580193, "epoch": 2.718165326401863, "grad_norm": 7.458549976348877, "learning_rate": 2.55525230786459e-06, "loss": 0.3056241, "memory(GiB)": 34.88, "step": 100390, "train_speed(iter/s)": 0.41041 }, { "acc": 0.92871246, "epoch": 2.7183007066850786, "grad_norm": 3.6570799350738525, "learning_rate": 2.5547642557578236e-06, "loss": 0.38445916, "memory(GiB)": 34.88, "step": 100395, "train_speed(iter/s)": 0.410411 }, { "acc": 0.94616261, "epoch": 2.718436086968294, "grad_norm": 4.596352577209473, "learning_rate": 2.554276234288705e-06, "loss": 0.28238237, "memory(GiB)": 34.88, "step": 100400, "train_speed(iter/s)": 0.410412 }, { "acc": 0.93868294, "epoch": 2.7185714672515093, "grad_norm": 20.131683349609375, "learning_rate": 2.553788243463346e-06, "loss": 0.40813785, "memory(GiB)": 34.88, "step": 100405, "train_speed(iter/s)": 0.410413 }, { "acc": 0.9397934, "epoch": 2.718706847534725, "grad_norm": 6.060650825500488, "learning_rate": 2.5533002832878557e-06, "loss": 0.32524281, "memory(GiB)": 34.88, "step": 100410, "train_speed(iter/s)": 0.410414 }, { "acc": 0.92514591, "epoch": 2.7188422278179405, "grad_norm": 12.081031799316406, "learning_rate": 2.5528123537683548e-06, "loss": 0.37573047, "memory(GiB)": 34.88, "step": 100415, "train_speed(iter/s)": 0.410416 }, { "acc": 0.93280125, "epoch": 2.7189776081011563, "grad_norm": 14.291545867919922, "learning_rate": 2.5523244549109493e-06, "loss": 0.4069129, "memory(GiB)": 34.88, "step": 100420, "train_speed(iter/s)": 0.410417 }, { "acc": 0.94844227, "epoch": 2.7191129883843717, "grad_norm": 5.540064334869385, "learning_rate": 2.551836586721753e-06, "loss": 0.31620588, "memory(GiB)": 34.88, "step": 100425, "train_speed(iter/s)": 0.410418 }, { "acc": 0.93038464, "epoch": 2.7192483686675875, "grad_norm": 6.427107810974121, "learning_rate": 2.5513487492068774e-06, "loss": 0.42199855, "memory(GiB)": 34.88, "step": 100430, "train_speed(iter/s)": 0.410419 }, { "acc": 0.93857555, "epoch": 2.719383748950803, "grad_norm": 6.427586078643799, "learning_rate": 2.5508609423724357e-06, "loss": 0.31936913, "memory(GiB)": 34.88, "step": 100435, "train_speed(iter/s)": 0.41042 }, { "acc": 0.92209282, "epoch": 2.719519129234018, "grad_norm": 11.935712814331055, "learning_rate": 2.550373166224535e-06, "loss": 0.49510584, "memory(GiB)": 34.88, "step": 100440, "train_speed(iter/s)": 0.410421 }, { "acc": 0.95027351, "epoch": 2.719654509517234, "grad_norm": 3.254701614379883, "learning_rate": 2.5498854207692876e-06, "loss": 0.23925743, "memory(GiB)": 34.88, "step": 100445, "train_speed(iter/s)": 0.410422 }, { "acc": 0.93612633, "epoch": 2.7197898898004493, "grad_norm": 14.368040084838867, "learning_rate": 2.5493977060128046e-06, "loss": 0.31611474, "memory(GiB)": 34.88, "step": 100450, "train_speed(iter/s)": 0.410424 }, { "acc": 0.93098774, "epoch": 2.719925270083665, "grad_norm": 7.542089462280273, "learning_rate": 2.5489100219611928e-06, "loss": 0.39410129, "memory(GiB)": 34.88, "step": 100455, "train_speed(iter/s)": 0.410425 }, { "acc": 0.94178877, "epoch": 2.7200606503668805, "grad_norm": 11.69884967803955, "learning_rate": 2.5484223686205633e-06, "loss": 0.37939003, "memory(GiB)": 34.88, "step": 100460, "train_speed(iter/s)": 0.410426 }, { "acc": 0.9480032, "epoch": 2.7201960306500963, "grad_norm": 3.6432127952575684, "learning_rate": 2.5479347459970238e-06, "loss": 0.22970896, "memory(GiB)": 34.88, "step": 100465, "train_speed(iter/s)": 0.410427 }, { "acc": 0.93215218, "epoch": 2.7203314109333117, "grad_norm": 7.918500900268555, "learning_rate": 2.547447154096685e-06, "loss": 0.39713836, "memory(GiB)": 34.88, "step": 100470, "train_speed(iter/s)": 0.410428 }, { "acc": 0.93025131, "epoch": 2.720466791216527, "grad_norm": 3.4350733757019043, "learning_rate": 2.546959592925652e-06, "loss": 0.36918266, "memory(GiB)": 34.88, "step": 100475, "train_speed(iter/s)": 0.410429 }, { "acc": 0.93085318, "epoch": 2.720602171499743, "grad_norm": 6.810367584228516, "learning_rate": 2.5464720624900337e-06, "loss": 0.39032927, "memory(GiB)": 34.88, "step": 100480, "train_speed(iter/s)": 0.41043 }, { "acc": 0.94390106, "epoch": 2.720737551782958, "grad_norm": 4.805307865142822, "learning_rate": 2.545984562795939e-06, "loss": 0.34290717, "memory(GiB)": 34.88, "step": 100485, "train_speed(iter/s)": 0.410431 }, { "acc": 0.93978882, "epoch": 2.720872932066174, "grad_norm": 5.072778224945068, "learning_rate": 2.5454970938494724e-06, "loss": 0.40726204, "memory(GiB)": 34.88, "step": 100490, "train_speed(iter/s)": 0.410432 }, { "acc": 0.94726009, "epoch": 2.7210083123493893, "grad_norm": 3.8372647762298584, "learning_rate": 2.54500965565674e-06, "loss": 0.28963728, "memory(GiB)": 34.88, "step": 100495, "train_speed(iter/s)": 0.410434 }, { "acc": 0.9357048, "epoch": 2.721143692632605, "grad_norm": 7.307981014251709, "learning_rate": 2.54452224822385e-06, "loss": 0.34911942, "memory(GiB)": 34.88, "step": 100500, "train_speed(iter/s)": 0.410435 }, { "acc": 0.9357069, "epoch": 2.7212790729158205, "grad_norm": 6.561330795288086, "learning_rate": 2.5440348715569086e-06, "loss": 0.34012966, "memory(GiB)": 34.88, "step": 100505, "train_speed(iter/s)": 0.410436 }, { "acc": 0.92242384, "epoch": 2.721414453199036, "grad_norm": 5.984534740447998, "learning_rate": 2.5435475256620183e-06, "loss": 0.41257195, "memory(GiB)": 34.88, "step": 100510, "train_speed(iter/s)": 0.410437 }, { "acc": 0.93713474, "epoch": 2.7215498334822517, "grad_norm": 6.681818008422852, "learning_rate": 2.5430602105452856e-06, "loss": 0.4045743, "memory(GiB)": 34.88, "step": 100515, "train_speed(iter/s)": 0.410438 }, { "acc": 0.93805504, "epoch": 2.7216852137654675, "grad_norm": 9.471447944641113, "learning_rate": 2.542572926212815e-06, "loss": 0.36558876, "memory(GiB)": 34.88, "step": 100520, "train_speed(iter/s)": 0.410439 }, { "acc": 0.9245018, "epoch": 2.721820594048683, "grad_norm": 6.655158996582031, "learning_rate": 2.542085672670713e-06, "loss": 0.42707243, "memory(GiB)": 34.88, "step": 100525, "train_speed(iter/s)": 0.41044 }, { "acc": 0.93808136, "epoch": 2.721955974331898, "grad_norm": 4.183603286743164, "learning_rate": 2.5415984499250795e-06, "loss": 0.36523523, "memory(GiB)": 34.88, "step": 100530, "train_speed(iter/s)": 0.410442 }, { "acc": 0.93718281, "epoch": 2.722091354615114, "grad_norm": 8.69653034210205, "learning_rate": 2.5411112579820193e-06, "loss": 0.37904305, "memory(GiB)": 34.88, "step": 100535, "train_speed(iter/s)": 0.410443 }, { "acc": 0.94605036, "epoch": 2.7222267348983293, "grad_norm": 3.8648617267608643, "learning_rate": 2.5406240968476378e-06, "loss": 0.29389579, "memory(GiB)": 34.88, "step": 100540, "train_speed(iter/s)": 0.410444 }, { "acc": 0.92876778, "epoch": 2.7223621151815447, "grad_norm": 13.364212036132812, "learning_rate": 2.540136966528033e-06, "loss": 0.41599493, "memory(GiB)": 34.88, "step": 100545, "train_speed(iter/s)": 0.410445 }, { "acc": 0.92319965, "epoch": 2.7224974954647605, "grad_norm": 4.838221549987793, "learning_rate": 2.5396498670293107e-06, "loss": 0.4912962, "memory(GiB)": 34.88, "step": 100550, "train_speed(iter/s)": 0.410446 }, { "acc": 0.91798897, "epoch": 2.7226328757479763, "grad_norm": 6.964335918426514, "learning_rate": 2.539162798357572e-06, "loss": 0.44616899, "memory(GiB)": 34.88, "step": 100555, "train_speed(iter/s)": 0.410447 }, { "acc": 0.93153839, "epoch": 2.7227682560311917, "grad_norm": 9.696682929992676, "learning_rate": 2.53867576051892e-06, "loss": 0.32422764, "memory(GiB)": 34.88, "step": 100560, "train_speed(iter/s)": 0.410448 }, { "acc": 0.93309383, "epoch": 2.722903636314407, "grad_norm": 5.977554798126221, "learning_rate": 2.538188753519452e-06, "loss": 0.36987662, "memory(GiB)": 34.88, "step": 100565, "train_speed(iter/s)": 0.41045 }, { "acc": 0.93844624, "epoch": 2.723039016597623, "grad_norm": 7.459624290466309, "learning_rate": 2.5377017773652705e-06, "loss": 0.3611583, "memory(GiB)": 34.88, "step": 100570, "train_speed(iter/s)": 0.410451 }, { "acc": 0.9431303, "epoch": 2.723174396880838, "grad_norm": 5.752199649810791, "learning_rate": 2.5372148320624785e-06, "loss": 0.35988317, "memory(GiB)": 34.88, "step": 100575, "train_speed(iter/s)": 0.410452 }, { "acc": 0.92508087, "epoch": 2.723309777164054, "grad_norm": 8.289630889892578, "learning_rate": 2.5367279176171717e-06, "loss": 0.36767805, "memory(GiB)": 34.88, "step": 100580, "train_speed(iter/s)": 0.410453 }, { "acc": 0.91894836, "epoch": 2.7234451574472693, "grad_norm": 3.2117815017700195, "learning_rate": 2.536241034035451e-06, "loss": 0.45507755, "memory(GiB)": 34.88, "step": 100585, "train_speed(iter/s)": 0.410454 }, { "acc": 0.93893814, "epoch": 2.723580537730485, "grad_norm": 8.615726470947266, "learning_rate": 2.5357541813234167e-06, "loss": 0.2927937, "memory(GiB)": 34.88, "step": 100590, "train_speed(iter/s)": 0.410455 }, { "acc": 0.92326126, "epoch": 2.7237159180137005, "grad_norm": 9.360421180725098, "learning_rate": 2.535267359487168e-06, "loss": 0.3571404, "memory(GiB)": 34.88, "step": 100595, "train_speed(iter/s)": 0.410456 }, { "acc": 0.93169193, "epoch": 2.723851298296916, "grad_norm": 8.615266799926758, "learning_rate": 2.5347805685328016e-06, "loss": 0.40658855, "memory(GiB)": 34.88, "step": 100600, "train_speed(iter/s)": 0.410458 }, { "acc": 0.94767742, "epoch": 2.7239866785801317, "grad_norm": 2.0045995712280273, "learning_rate": 2.5342938084664153e-06, "loss": 0.29265246, "memory(GiB)": 34.88, "step": 100605, "train_speed(iter/s)": 0.410459 }, { "acc": 0.93613091, "epoch": 2.724122058863347, "grad_norm": 8.253767967224121, "learning_rate": 2.53380707929411e-06, "loss": 0.43515987, "memory(GiB)": 34.88, "step": 100610, "train_speed(iter/s)": 0.41046 }, { "acc": 0.93764381, "epoch": 2.724257439146563, "grad_norm": 5.520955562591553, "learning_rate": 2.5333203810219785e-06, "loss": 0.39360414, "memory(GiB)": 34.88, "step": 100615, "train_speed(iter/s)": 0.410461 }, { "acc": 0.93355885, "epoch": 2.724392819429778, "grad_norm": 10.737438201904297, "learning_rate": 2.5328337136561197e-06, "loss": 0.36136897, "memory(GiB)": 34.88, "step": 100620, "train_speed(iter/s)": 0.410462 }, { "acc": 0.9419548, "epoch": 2.724528199712994, "grad_norm": 12.274744033813477, "learning_rate": 2.5323470772026305e-06, "loss": 0.3783565, "memory(GiB)": 34.88, "step": 100625, "train_speed(iter/s)": 0.410463 }, { "acc": 0.95059776, "epoch": 2.7246635799962093, "grad_norm": 8.699292182922363, "learning_rate": 2.5318604716676078e-06, "loss": 0.23329687, "memory(GiB)": 34.88, "step": 100630, "train_speed(iter/s)": 0.410464 }, { "acc": 0.94233284, "epoch": 2.7247989602794247, "grad_norm": 15.386198997497559, "learning_rate": 2.531373897057145e-06, "loss": 0.37200675, "memory(GiB)": 34.88, "step": 100635, "train_speed(iter/s)": 0.410466 }, { "acc": 0.94225483, "epoch": 2.7249343405626405, "grad_norm": 5.3290486335754395, "learning_rate": 2.5308873533773375e-06, "loss": 0.36143949, "memory(GiB)": 34.88, "step": 100640, "train_speed(iter/s)": 0.410467 }, { "acc": 0.93085413, "epoch": 2.725069720845856, "grad_norm": 9.163238525390625, "learning_rate": 2.5304008406342816e-06, "loss": 0.39877236, "memory(GiB)": 34.88, "step": 100645, "train_speed(iter/s)": 0.410468 }, { "acc": 0.92386198, "epoch": 2.7252051011290717, "grad_norm": 12.052301406860352, "learning_rate": 2.5299143588340737e-06, "loss": 0.4150207, "memory(GiB)": 34.88, "step": 100650, "train_speed(iter/s)": 0.410469 }, { "acc": 0.94548244, "epoch": 2.725340481412287, "grad_norm": 7.442874431610107, "learning_rate": 2.5294279079828034e-06, "loss": 0.30889449, "memory(GiB)": 34.88, "step": 100655, "train_speed(iter/s)": 0.41047 }, { "acc": 0.93028107, "epoch": 2.725475861695503, "grad_norm": 9.205192565917969, "learning_rate": 2.528941488086567e-06, "loss": 0.39729466, "memory(GiB)": 34.88, "step": 100660, "train_speed(iter/s)": 0.410471 }, { "acc": 0.93220768, "epoch": 2.725611241978718, "grad_norm": 11.038980484008789, "learning_rate": 2.528455099151459e-06, "loss": 0.36572266, "memory(GiB)": 34.88, "step": 100665, "train_speed(iter/s)": 0.410472 }, { "acc": 0.91687584, "epoch": 2.7257466222619335, "grad_norm": 11.792945861816406, "learning_rate": 2.527968741183569e-06, "loss": 0.46531281, "memory(GiB)": 34.88, "step": 100670, "train_speed(iter/s)": 0.410473 }, { "acc": 0.93012695, "epoch": 2.7258820025451493, "grad_norm": 9.564409255981445, "learning_rate": 2.5274824141889916e-06, "loss": 0.37668753, "memory(GiB)": 34.88, "step": 100675, "train_speed(iter/s)": 0.410474 }, { "acc": 0.94368696, "epoch": 2.726017382828365, "grad_norm": 9.956689834594727, "learning_rate": 2.5269961181738194e-06, "loss": 0.3659781, "memory(GiB)": 34.88, "step": 100680, "train_speed(iter/s)": 0.410476 }, { "acc": 0.93202, "epoch": 2.7261527631115805, "grad_norm": 7.061216354370117, "learning_rate": 2.526509853144145e-06, "loss": 0.39149404, "memory(GiB)": 34.88, "step": 100685, "train_speed(iter/s)": 0.410477 }, { "acc": 0.91668892, "epoch": 2.726288143394796, "grad_norm": 7.2502312660217285, "learning_rate": 2.5260236191060556e-06, "loss": 0.45735803, "memory(GiB)": 34.88, "step": 100690, "train_speed(iter/s)": 0.410478 }, { "acc": 0.95120354, "epoch": 2.7264235236780117, "grad_norm": 5.937707424163818, "learning_rate": 2.525537416065648e-06, "loss": 0.22742677, "memory(GiB)": 34.88, "step": 100695, "train_speed(iter/s)": 0.410479 }, { "acc": 0.94271984, "epoch": 2.726558903961227, "grad_norm": 6.0125627517700195, "learning_rate": 2.5250512440290104e-06, "loss": 0.33198307, "memory(GiB)": 34.88, "step": 100700, "train_speed(iter/s)": 0.41048 }, { "acc": 0.93820801, "epoch": 2.7266942842444424, "grad_norm": 7.381727695465088, "learning_rate": 2.524565103002231e-06, "loss": 0.40433311, "memory(GiB)": 34.88, "step": 100705, "train_speed(iter/s)": 0.410481 }, { "acc": 0.93482571, "epoch": 2.726829664527658, "grad_norm": 9.124886512756348, "learning_rate": 2.524078992991402e-06, "loss": 0.40083942, "memory(GiB)": 34.88, "step": 100710, "train_speed(iter/s)": 0.410482 }, { "acc": 0.94666586, "epoch": 2.726965044810874, "grad_norm": 6.302928924560547, "learning_rate": 2.523592914002611e-06, "loss": 0.30553644, "memory(GiB)": 34.88, "step": 100715, "train_speed(iter/s)": 0.410483 }, { "acc": 0.93102722, "epoch": 2.7271004250940893, "grad_norm": 7.44223165512085, "learning_rate": 2.523106866041951e-06, "loss": 0.38908458, "memory(GiB)": 34.88, "step": 100720, "train_speed(iter/s)": 0.410485 }, { "acc": 0.93059711, "epoch": 2.7272358053773047, "grad_norm": 83.96781158447266, "learning_rate": 2.5226208491155043e-06, "loss": 0.42444305, "memory(GiB)": 34.88, "step": 100725, "train_speed(iter/s)": 0.410486 }, { "acc": 0.94022188, "epoch": 2.7273711856605205, "grad_norm": 3.1357033252716064, "learning_rate": 2.5221348632293663e-06, "loss": 0.3336925, "memory(GiB)": 34.88, "step": 100730, "train_speed(iter/s)": 0.410487 }, { "acc": 0.95057278, "epoch": 2.727506565943736, "grad_norm": 2.7193007469177246, "learning_rate": 2.5216489083896204e-06, "loss": 0.28104815, "memory(GiB)": 34.88, "step": 100735, "train_speed(iter/s)": 0.410488 }, { "acc": 0.9404747, "epoch": 2.7276419462269517, "grad_norm": 9.206999778747559, "learning_rate": 2.5211629846023574e-06, "loss": 0.32804816, "memory(GiB)": 34.88, "step": 100740, "train_speed(iter/s)": 0.410489 }, { "acc": 0.92364368, "epoch": 2.727777326510167, "grad_norm": 13.812459945678711, "learning_rate": 2.5206770918736593e-06, "loss": 0.47931743, "memory(GiB)": 34.88, "step": 100745, "train_speed(iter/s)": 0.41049 }, { "acc": 0.9436533, "epoch": 2.727912706793383, "grad_norm": 1.699708104133606, "learning_rate": 2.520191230209617e-06, "loss": 0.27839327, "memory(GiB)": 34.88, "step": 100750, "train_speed(iter/s)": 0.410491 }, { "acc": 0.92707834, "epoch": 2.728048087076598, "grad_norm": 24.28106689453125, "learning_rate": 2.5197053996163173e-06, "loss": 0.39712474, "memory(GiB)": 34.88, "step": 100755, "train_speed(iter/s)": 0.410492 }, { "acc": 0.95398903, "epoch": 2.7281834673598135, "grad_norm": 2.087068796157837, "learning_rate": 2.5192196000998414e-06, "loss": 0.27078915, "memory(GiB)": 34.88, "step": 100760, "train_speed(iter/s)": 0.410493 }, { "acc": 0.94117489, "epoch": 2.7283188476430293, "grad_norm": 4.833062171936035, "learning_rate": 2.5187338316662813e-06, "loss": 0.29786634, "memory(GiB)": 34.88, "step": 100765, "train_speed(iter/s)": 0.410495 }, { "acc": 0.93061743, "epoch": 2.7284542279262447, "grad_norm": 5.721688747406006, "learning_rate": 2.5182480943217176e-06, "loss": 0.38307667, "memory(GiB)": 34.88, "step": 100770, "train_speed(iter/s)": 0.410496 }, { "acc": 0.93169975, "epoch": 2.7285896082094605, "grad_norm": 8.059205055236816, "learning_rate": 2.5177623880722386e-06, "loss": 0.38704343, "memory(GiB)": 34.88, "step": 100775, "train_speed(iter/s)": 0.410497 }, { "acc": 0.93814249, "epoch": 2.728724988492676, "grad_norm": 3.7572484016418457, "learning_rate": 2.517276712923925e-06, "loss": 0.42622061, "memory(GiB)": 34.88, "step": 100780, "train_speed(iter/s)": 0.410498 }, { "acc": 0.93640232, "epoch": 2.7288603687758917, "grad_norm": 10.13757610321045, "learning_rate": 2.5167910688828624e-06, "loss": 0.36158164, "memory(GiB)": 34.88, "step": 100785, "train_speed(iter/s)": 0.410499 }, { "acc": 0.92720232, "epoch": 2.728995749059107, "grad_norm": 9.402359008789062, "learning_rate": 2.5163054559551363e-06, "loss": 0.41766138, "memory(GiB)": 34.88, "step": 100790, "train_speed(iter/s)": 0.4105 }, { "acc": 0.92773581, "epoch": 2.7291311293423224, "grad_norm": 9.972025871276855, "learning_rate": 2.5158198741468247e-06, "loss": 0.41972685, "memory(GiB)": 34.88, "step": 100795, "train_speed(iter/s)": 0.410501 }, { "acc": 0.92250195, "epoch": 2.729266509625538, "grad_norm": 19.23052406311035, "learning_rate": 2.515334323464018e-06, "loss": 0.54131231, "memory(GiB)": 34.88, "step": 100800, "train_speed(iter/s)": 0.410502 }, { "acc": 0.93346128, "epoch": 2.7294018899087535, "grad_norm": 9.963563919067383, "learning_rate": 2.514848803912793e-06, "loss": 0.3907594, "memory(GiB)": 34.88, "step": 100805, "train_speed(iter/s)": 0.410503 }, { "acc": 0.94746475, "epoch": 2.7295372701919693, "grad_norm": 7.753678321838379, "learning_rate": 2.514363315499235e-06, "loss": 0.28639216, "memory(GiB)": 34.88, "step": 100810, "train_speed(iter/s)": 0.410504 }, { "acc": 0.93934307, "epoch": 2.7296726504751847, "grad_norm": 9.008110046386719, "learning_rate": 2.5138778582294203e-06, "loss": 0.36564474, "memory(GiB)": 34.88, "step": 100815, "train_speed(iter/s)": 0.410505 }, { "acc": 0.93010273, "epoch": 2.7298080307584005, "grad_norm": 6.444643020629883, "learning_rate": 2.5133924321094384e-06, "loss": 0.42181826, "memory(GiB)": 34.88, "step": 100820, "train_speed(iter/s)": 0.410506 }, { "acc": 0.91432934, "epoch": 2.729943411041616, "grad_norm": 12.485583305358887, "learning_rate": 2.512907037145366e-06, "loss": 0.4643023, "memory(GiB)": 34.88, "step": 100825, "train_speed(iter/s)": 0.410507 }, { "acc": 0.91562958, "epoch": 2.7300787913248312, "grad_norm": 14.066250801086426, "learning_rate": 2.5124216733432788e-06, "loss": 0.46560812, "memory(GiB)": 34.88, "step": 100830, "train_speed(iter/s)": 0.410508 }, { "acc": 0.93214092, "epoch": 2.730214171608047, "grad_norm": 22.432937622070312, "learning_rate": 2.5119363407092666e-06, "loss": 0.44842596, "memory(GiB)": 34.88, "step": 100835, "train_speed(iter/s)": 0.410509 }, { "acc": 0.93522224, "epoch": 2.730349551891263, "grad_norm": 15.56583023071289, "learning_rate": 2.5114510392494016e-06, "loss": 0.47190337, "memory(GiB)": 34.88, "step": 100840, "train_speed(iter/s)": 0.410511 }, { "acc": 0.95082016, "epoch": 2.730484932174478, "grad_norm": 3.218855857849121, "learning_rate": 2.5109657689697685e-06, "loss": 0.25904391, "memory(GiB)": 34.88, "step": 100845, "train_speed(iter/s)": 0.410512 }, { "acc": 0.93957243, "epoch": 2.7306203124576935, "grad_norm": 4.876727104187012, "learning_rate": 2.5104805298764397e-06, "loss": 0.31803775, "memory(GiB)": 34.88, "step": 100850, "train_speed(iter/s)": 0.410513 }, { "acc": 0.92212362, "epoch": 2.7307556927409093, "grad_norm": 15.1879243850708, "learning_rate": 2.5099953219755007e-06, "loss": 0.40772972, "memory(GiB)": 34.88, "step": 100855, "train_speed(iter/s)": 0.410514 }, { "acc": 0.92867908, "epoch": 2.7308910730241247, "grad_norm": 5.966320991516113, "learning_rate": 2.509510145273025e-06, "loss": 0.37206068, "memory(GiB)": 34.88, "step": 100860, "train_speed(iter/s)": 0.410515 }, { "acc": 0.93798971, "epoch": 2.73102645330734, "grad_norm": 7.620375156402588, "learning_rate": 2.509024999775094e-06, "loss": 0.43653989, "memory(GiB)": 34.88, "step": 100865, "train_speed(iter/s)": 0.410516 }, { "acc": 0.94362164, "epoch": 2.731161833590556, "grad_norm": 4.798404693603516, "learning_rate": 2.5085398854877817e-06, "loss": 0.32957549, "memory(GiB)": 34.88, "step": 100870, "train_speed(iter/s)": 0.410517 }, { "acc": 0.93020868, "epoch": 2.7312972138737717, "grad_norm": 8.267309188842773, "learning_rate": 2.5080548024171658e-06, "loss": 0.37528396, "memory(GiB)": 34.88, "step": 100875, "train_speed(iter/s)": 0.410518 }, { "acc": 0.9304966, "epoch": 2.731432594156987, "grad_norm": 5.645974159240723, "learning_rate": 2.5075697505693257e-06, "loss": 0.43218493, "memory(GiB)": 34.88, "step": 100880, "train_speed(iter/s)": 0.410519 }, { "acc": 0.94482746, "epoch": 2.7315679744402024, "grad_norm": 5.185060977935791, "learning_rate": 2.5070847299503317e-06, "loss": 0.29500632, "memory(GiB)": 34.88, "step": 100885, "train_speed(iter/s)": 0.41052 }, { "acc": 0.93493919, "epoch": 2.731703354723418, "grad_norm": 7.735785007476807, "learning_rate": 2.506599740566268e-06, "loss": 0.3919064, "memory(GiB)": 34.88, "step": 100890, "train_speed(iter/s)": 0.410522 }, { "acc": 0.92583513, "epoch": 2.7318387350066335, "grad_norm": 9.503984451293945, "learning_rate": 2.506114782423203e-06, "loss": 0.45750284, "memory(GiB)": 34.88, "step": 100895, "train_speed(iter/s)": 0.410523 }, { "acc": 0.93888416, "epoch": 2.7319741152898493, "grad_norm": 10.684565544128418, "learning_rate": 2.5056298555272164e-06, "loss": 0.26982012, "memory(GiB)": 34.88, "step": 100900, "train_speed(iter/s)": 0.410524 }, { "acc": 0.93511391, "epoch": 2.7321094955730647, "grad_norm": 6.679852485656738, "learning_rate": 2.505144959884379e-06, "loss": 0.42801065, "memory(GiB)": 34.88, "step": 100905, "train_speed(iter/s)": 0.410525 }, { "acc": 0.93979025, "epoch": 2.7322448758562805, "grad_norm": 10.752275466918945, "learning_rate": 2.504660095500766e-06, "loss": 0.42737322, "memory(GiB)": 34.88, "step": 100910, "train_speed(iter/s)": 0.410526 }, { "acc": 0.94946194, "epoch": 2.732380256139496, "grad_norm": 6.934587478637695, "learning_rate": 2.5041752623824552e-06, "loss": 0.22354314, "memory(GiB)": 34.88, "step": 100915, "train_speed(iter/s)": 0.410527 }, { "acc": 0.93906326, "epoch": 2.7325156364227112, "grad_norm": 5.498942852020264, "learning_rate": 2.503690460535513e-06, "loss": 0.33692093, "memory(GiB)": 34.88, "step": 100920, "train_speed(iter/s)": 0.410528 }, { "acc": 0.93006878, "epoch": 2.732651016705927, "grad_norm": 8.323456764221191, "learning_rate": 2.5032056899660197e-06, "loss": 0.40726576, "memory(GiB)": 34.88, "step": 100925, "train_speed(iter/s)": 0.410529 }, { "acc": 0.92397041, "epoch": 2.7327863969891424, "grad_norm": 3.9126076698303223, "learning_rate": 2.502720950680043e-06, "loss": 0.39745147, "memory(GiB)": 34.88, "step": 100930, "train_speed(iter/s)": 0.41053 }, { "acc": 0.9399807, "epoch": 2.732921777272358, "grad_norm": 10.325460433959961, "learning_rate": 2.5022362426836593e-06, "loss": 0.41301513, "memory(GiB)": 34.88, "step": 100935, "train_speed(iter/s)": 0.410532 }, { "acc": 0.93031073, "epoch": 2.7330571575555735, "grad_norm": 5.379417419433594, "learning_rate": 2.5017515659829346e-06, "loss": 0.41103258, "memory(GiB)": 34.88, "step": 100940, "train_speed(iter/s)": 0.410533 }, { "acc": 0.93836355, "epoch": 2.7331925378387893, "grad_norm": 6.428805351257324, "learning_rate": 2.5012669205839474e-06, "loss": 0.35714505, "memory(GiB)": 34.88, "step": 100945, "train_speed(iter/s)": 0.410534 }, { "acc": 0.93517265, "epoch": 2.7333279181220047, "grad_norm": 9.60142993927002, "learning_rate": 2.5007823064927655e-06, "loss": 0.44771013, "memory(GiB)": 34.88, "step": 100950, "train_speed(iter/s)": 0.410535 }, { "acc": 0.91821785, "epoch": 2.73346329840522, "grad_norm": 8.898277282714844, "learning_rate": 2.5002977237154565e-06, "loss": 0.49896779, "memory(GiB)": 34.88, "step": 100955, "train_speed(iter/s)": 0.410536 }, { "acc": 0.93475628, "epoch": 2.733598678688436, "grad_norm": 7.520143508911133, "learning_rate": 2.499813172258097e-06, "loss": 0.38754289, "memory(GiB)": 34.88, "step": 100960, "train_speed(iter/s)": 0.410537 }, { "acc": 0.92582779, "epoch": 2.7337340589716512, "grad_norm": 5.905246734619141, "learning_rate": 2.4993286521267522e-06, "loss": 0.40289621, "memory(GiB)": 34.88, "step": 100965, "train_speed(iter/s)": 0.410538 }, { "acc": 0.93283138, "epoch": 2.733869439254867, "grad_norm": 6.946840763092041, "learning_rate": 2.498844163327495e-06, "loss": 0.40310731, "memory(GiB)": 34.88, "step": 100970, "train_speed(iter/s)": 0.410539 }, { "acc": 0.92776966, "epoch": 2.7340048195380824, "grad_norm": 5.148767471313477, "learning_rate": 2.4983597058663893e-06, "loss": 0.38785982, "memory(GiB)": 34.88, "step": 100975, "train_speed(iter/s)": 0.41054 }, { "acc": 0.94216595, "epoch": 2.734140199821298, "grad_norm": 7.011788368225098, "learning_rate": 2.497875279749511e-06, "loss": 0.27128644, "memory(GiB)": 34.88, "step": 100980, "train_speed(iter/s)": 0.410541 }, { "acc": 0.94352188, "epoch": 2.7342755801045135, "grad_norm": 13.173748016357422, "learning_rate": 2.4973908849829237e-06, "loss": 0.33574202, "memory(GiB)": 34.88, "step": 100985, "train_speed(iter/s)": 0.410542 }, { "acc": 0.9347352, "epoch": 2.734410960387729, "grad_norm": 8.184060096740723, "learning_rate": 2.4969065215726964e-06, "loss": 0.30648465, "memory(GiB)": 34.88, "step": 100990, "train_speed(iter/s)": 0.410543 }, { "acc": 0.9232563, "epoch": 2.7345463406709447, "grad_norm": 6.2574920654296875, "learning_rate": 2.496422189524899e-06, "loss": 0.40579147, "memory(GiB)": 34.88, "step": 100995, "train_speed(iter/s)": 0.410544 }, { "acc": 0.93151283, "epoch": 2.7346817209541605, "grad_norm": 9.313365936279297, "learning_rate": 2.495937888845594e-06, "loss": 0.38822904, "memory(GiB)": 34.88, "step": 101000, "train_speed(iter/s)": 0.410545 }, { "acc": 0.94752331, "epoch": 2.734817101237376, "grad_norm": 4.983224868774414, "learning_rate": 2.4954536195408535e-06, "loss": 0.30595527, "memory(GiB)": 34.88, "step": 101005, "train_speed(iter/s)": 0.410546 }, { "acc": 0.93130646, "epoch": 2.7349524815205912, "grad_norm": 19.113365173339844, "learning_rate": 2.4949693816167374e-06, "loss": 0.40518389, "memory(GiB)": 34.88, "step": 101010, "train_speed(iter/s)": 0.410548 }, { "acc": 0.92949944, "epoch": 2.735087861803807, "grad_norm": 7.904709815979004, "learning_rate": 2.4944851750793197e-06, "loss": 0.32015285, "memory(GiB)": 34.88, "step": 101015, "train_speed(iter/s)": 0.410549 }, { "acc": 0.93339872, "epoch": 2.7352232420870224, "grad_norm": 7.913595676422119, "learning_rate": 2.4940009999346594e-06, "loss": 0.40592213, "memory(GiB)": 34.88, "step": 101020, "train_speed(iter/s)": 0.41055 }, { "acc": 0.93829679, "epoch": 2.7353586223702377, "grad_norm": 4.684904098510742, "learning_rate": 2.493516856188825e-06, "loss": 0.35010695, "memory(GiB)": 34.88, "step": 101025, "train_speed(iter/s)": 0.410551 }, { "acc": 0.93483, "epoch": 2.7354940026534535, "grad_norm": 19.224576950073242, "learning_rate": 2.4930327438478825e-06, "loss": 0.3728704, "memory(GiB)": 34.88, "step": 101030, "train_speed(iter/s)": 0.410552 }, { "acc": 0.94152412, "epoch": 2.7356293829366694, "grad_norm": 3.6135473251342773, "learning_rate": 2.492548662917892e-06, "loss": 0.3610992, "memory(GiB)": 34.88, "step": 101035, "train_speed(iter/s)": 0.410553 }, { "acc": 0.92715244, "epoch": 2.7357647632198847, "grad_norm": 9.633102416992188, "learning_rate": 2.4920646134049225e-06, "loss": 0.37819102, "memory(GiB)": 34.88, "step": 101040, "train_speed(iter/s)": 0.410554 }, { "acc": 0.94022484, "epoch": 2.7359001435031, "grad_norm": 9.807432174682617, "learning_rate": 2.4915805953150315e-06, "loss": 0.31209798, "memory(GiB)": 34.88, "step": 101045, "train_speed(iter/s)": 0.410555 }, { "acc": 0.93105164, "epoch": 2.736035523786316, "grad_norm": 8.879212379455566, "learning_rate": 2.49109660865429e-06, "loss": 0.39280119, "memory(GiB)": 34.88, "step": 101050, "train_speed(iter/s)": 0.410557 }, { "acc": 0.93004723, "epoch": 2.7361709040695312, "grad_norm": 8.022193908691406, "learning_rate": 2.490612653428755e-06, "loss": 0.38493638, "memory(GiB)": 34.88, "step": 101055, "train_speed(iter/s)": 0.410558 }, { "acc": 0.94244251, "epoch": 2.736306284352747, "grad_norm": 7.400404453277588, "learning_rate": 2.4901287296444904e-06, "loss": 0.32347109, "memory(GiB)": 34.88, "step": 101060, "train_speed(iter/s)": 0.410559 }, { "acc": 0.92885551, "epoch": 2.7364416646359624, "grad_norm": 5.976565837860107, "learning_rate": 2.4896448373075595e-06, "loss": 0.4329917, "memory(GiB)": 34.88, "step": 101065, "train_speed(iter/s)": 0.41056 }, { "acc": 0.93306723, "epoch": 2.736577044919178, "grad_norm": 10.911872863769531, "learning_rate": 2.4891609764240253e-06, "loss": 0.35330701, "memory(GiB)": 34.88, "step": 101070, "train_speed(iter/s)": 0.410561 }, { "acc": 0.91490784, "epoch": 2.7367124252023936, "grad_norm": 6.126951694488525, "learning_rate": 2.4886771469999464e-06, "loss": 0.53669128, "memory(GiB)": 34.88, "step": 101075, "train_speed(iter/s)": 0.410562 }, { "acc": 0.92565689, "epoch": 2.736847805485609, "grad_norm": 7.0655903816223145, "learning_rate": 2.4881933490413814e-06, "loss": 0.42366943, "memory(GiB)": 34.88, "step": 101080, "train_speed(iter/s)": 0.410563 }, { "acc": 0.93919029, "epoch": 2.7369831857688247, "grad_norm": 4.677055835723877, "learning_rate": 2.487709582554397e-06, "loss": 0.30607958, "memory(GiB)": 34.88, "step": 101085, "train_speed(iter/s)": 0.410564 }, { "acc": 0.93041821, "epoch": 2.73711856605204, "grad_norm": 5.040460586547852, "learning_rate": 2.487225847545049e-06, "loss": 0.45182128, "memory(GiB)": 34.88, "step": 101090, "train_speed(iter/s)": 0.410566 }, { "acc": 0.94060898, "epoch": 2.737253946335256, "grad_norm": 17.283451080322266, "learning_rate": 2.486742144019398e-06, "loss": 0.41652145, "memory(GiB)": 34.88, "step": 101095, "train_speed(iter/s)": 0.410567 }, { "acc": 0.93463545, "epoch": 2.7373893266184712, "grad_norm": 43.67898178100586, "learning_rate": 2.486258471983504e-06, "loss": 0.41231604, "memory(GiB)": 34.88, "step": 101100, "train_speed(iter/s)": 0.410568 }, { "acc": 0.94021502, "epoch": 2.737524706901687, "grad_norm": 6.229604244232178, "learning_rate": 2.485774831443427e-06, "loss": 0.30633426, "memory(GiB)": 34.88, "step": 101105, "train_speed(iter/s)": 0.410569 }, { "acc": 0.94056301, "epoch": 2.7376600871849024, "grad_norm": 6.6789703369140625, "learning_rate": 2.485291222405222e-06, "loss": 0.36634016, "memory(GiB)": 34.88, "step": 101110, "train_speed(iter/s)": 0.41057 }, { "acc": 0.94540062, "epoch": 2.7377954674681177, "grad_norm": 9.9208984375, "learning_rate": 2.48480764487495e-06, "loss": 0.29809561, "memory(GiB)": 34.88, "step": 101115, "train_speed(iter/s)": 0.410571 }, { "acc": 0.92989483, "epoch": 2.7379308477513336, "grad_norm": 3.708465099334717, "learning_rate": 2.4843240988586695e-06, "loss": 0.31610024, "memory(GiB)": 34.88, "step": 101120, "train_speed(iter/s)": 0.410572 }, { "acc": 0.94776936, "epoch": 2.738066228034549, "grad_norm": 9.37032699584961, "learning_rate": 2.4838405843624353e-06, "loss": 0.3029798, "memory(GiB)": 34.88, "step": 101125, "train_speed(iter/s)": 0.410573 }, { "acc": 0.94533272, "epoch": 2.7382016083177647, "grad_norm": 8.086071014404297, "learning_rate": 2.483357101392305e-06, "loss": 0.26946445, "memory(GiB)": 34.88, "step": 101130, "train_speed(iter/s)": 0.410574 }, { "acc": 0.92589865, "epoch": 2.73833698860098, "grad_norm": 5.930852890014648, "learning_rate": 2.4828736499543355e-06, "loss": 0.41132803, "memory(GiB)": 34.88, "step": 101135, "train_speed(iter/s)": 0.410576 }, { "acc": 0.9291955, "epoch": 2.738472368884196, "grad_norm": 9.607001304626465, "learning_rate": 2.482390230054585e-06, "loss": 0.40969272, "memory(GiB)": 34.88, "step": 101140, "train_speed(iter/s)": 0.410577 }, { "acc": 0.94922523, "epoch": 2.7386077491674112, "grad_norm": 4.7109246253967285, "learning_rate": 2.481906841699106e-06, "loss": 0.25649941, "memory(GiB)": 34.88, "step": 101145, "train_speed(iter/s)": 0.410578 }, { "acc": 0.94604597, "epoch": 2.7387431294506266, "grad_norm": 3.676051139831543, "learning_rate": 2.481423484893955e-06, "loss": 0.28752885, "memory(GiB)": 34.88, "step": 101150, "train_speed(iter/s)": 0.410579 }, { "acc": 0.92340431, "epoch": 2.7388785097338424, "grad_norm": 5.945462226867676, "learning_rate": 2.480940159645189e-06, "loss": 0.48442593, "memory(GiB)": 34.88, "step": 101155, "train_speed(iter/s)": 0.41058 }, { "acc": 0.94522715, "epoch": 2.7390138900170578, "grad_norm": 4.7936530113220215, "learning_rate": 2.48045686595886e-06, "loss": 0.34399567, "memory(GiB)": 34.88, "step": 101160, "train_speed(iter/s)": 0.410581 }, { "acc": 0.94875851, "epoch": 2.7391492703002736, "grad_norm": 10.566435813903809, "learning_rate": 2.479973603841022e-06, "loss": 0.26694899, "memory(GiB)": 34.88, "step": 101165, "train_speed(iter/s)": 0.410582 }, { "acc": 0.93693581, "epoch": 2.739284650583489, "grad_norm": 7.042971611022949, "learning_rate": 2.4794903732977307e-06, "loss": 0.33113074, "memory(GiB)": 34.88, "step": 101170, "train_speed(iter/s)": 0.410584 }, { "acc": 0.94166241, "epoch": 2.7394200308667047, "grad_norm": 7.270033359527588, "learning_rate": 2.4790071743350404e-06, "loss": 0.35570698, "memory(GiB)": 34.88, "step": 101175, "train_speed(iter/s)": 0.410585 }, { "acc": 0.92564392, "epoch": 2.73955541114992, "grad_norm": 6.757108688354492, "learning_rate": 2.4785240069590007e-06, "loss": 0.40497131, "memory(GiB)": 34.88, "step": 101180, "train_speed(iter/s)": 0.410586 }, { "acc": 0.9334734, "epoch": 2.7396907914331354, "grad_norm": 6.829526424407959, "learning_rate": 2.4780408711756657e-06, "loss": 0.32619848, "memory(GiB)": 34.88, "step": 101185, "train_speed(iter/s)": 0.410587 }, { "acc": 0.93185196, "epoch": 2.7398261717163512, "grad_norm": 5.574225425720215, "learning_rate": 2.477557766991088e-06, "loss": 0.35028968, "memory(GiB)": 34.88, "step": 101190, "train_speed(iter/s)": 0.410588 }, { "acc": 0.91637383, "epoch": 2.739961551999567, "grad_norm": 5.277526378631592, "learning_rate": 2.477074694411321e-06, "loss": 0.51375213, "memory(GiB)": 34.88, "step": 101195, "train_speed(iter/s)": 0.410589 }, { "acc": 0.9344903, "epoch": 2.7400969322827824, "grad_norm": 8.365095138549805, "learning_rate": 2.476591653442413e-06, "loss": 0.39119711, "memory(GiB)": 34.88, "step": 101200, "train_speed(iter/s)": 0.41059 }, { "acc": 0.93399334, "epoch": 2.7402323125659978, "grad_norm": 9.91719913482666, "learning_rate": 2.476108644090417e-06, "loss": 0.40299988, "memory(GiB)": 34.88, "step": 101205, "train_speed(iter/s)": 0.410592 }, { "acc": 0.94256439, "epoch": 2.7403676928492136, "grad_norm": 4.798312187194824, "learning_rate": 2.475625666361384e-06, "loss": 0.28551426, "memory(GiB)": 34.88, "step": 101210, "train_speed(iter/s)": 0.410593 }, { "acc": 0.92570667, "epoch": 2.740503073132429, "grad_norm": 4.656317234039307, "learning_rate": 2.475142720261362e-06, "loss": 0.37331529, "memory(GiB)": 34.88, "step": 101215, "train_speed(iter/s)": 0.410594 }, { "acc": 0.92254906, "epoch": 2.7406384534156443, "grad_norm": 9.556897163391113, "learning_rate": 2.4746598057964024e-06, "loss": 0.39152379, "memory(GiB)": 34.88, "step": 101220, "train_speed(iter/s)": 0.410595 }, { "acc": 0.94153843, "epoch": 2.74077383369886, "grad_norm": 11.040496826171875, "learning_rate": 2.474176922972554e-06, "loss": 0.3506798, "memory(GiB)": 34.88, "step": 101225, "train_speed(iter/s)": 0.410596 }, { "acc": 0.92876186, "epoch": 2.740909213982076, "grad_norm": 11.531426429748535, "learning_rate": 2.4736940717958687e-06, "loss": 0.43793492, "memory(GiB)": 34.88, "step": 101230, "train_speed(iter/s)": 0.410597 }, { "acc": 0.94002876, "epoch": 2.7410445942652912, "grad_norm": 8.235726356506348, "learning_rate": 2.4732112522723906e-06, "loss": 0.39109707, "memory(GiB)": 34.88, "step": 101235, "train_speed(iter/s)": 0.410598 }, { "acc": 0.93636217, "epoch": 2.7411799745485066, "grad_norm": 16.341415405273438, "learning_rate": 2.47272846440817e-06, "loss": 0.35884094, "memory(GiB)": 34.88, "step": 101240, "train_speed(iter/s)": 0.410599 }, { "acc": 0.93441715, "epoch": 2.7413153548317224, "grad_norm": 4.924473762512207, "learning_rate": 2.4722457082092574e-06, "loss": 0.3773335, "memory(GiB)": 34.88, "step": 101245, "train_speed(iter/s)": 0.4106 }, { "acc": 0.94604836, "epoch": 2.7414507351149378, "grad_norm": 16.790205001831055, "learning_rate": 2.471762983681696e-06, "loss": 0.32394931, "memory(GiB)": 34.88, "step": 101250, "train_speed(iter/s)": 0.410602 }, { "acc": 0.93733521, "epoch": 2.7415861153981536, "grad_norm": 4.8698906898498535, "learning_rate": 2.4712802908315338e-06, "loss": 0.34070308, "memory(GiB)": 34.88, "step": 101255, "train_speed(iter/s)": 0.410603 }, { "acc": 0.94897346, "epoch": 2.741721495681369, "grad_norm": 7.179633140563965, "learning_rate": 2.470797629664819e-06, "loss": 0.29356208, "memory(GiB)": 34.88, "step": 101260, "train_speed(iter/s)": 0.410604 }, { "acc": 0.9367135, "epoch": 2.7418568759645847, "grad_norm": 5.347659111022949, "learning_rate": 2.470315000187599e-06, "loss": 0.36912313, "memory(GiB)": 34.88, "step": 101265, "train_speed(iter/s)": 0.410605 }, { "acc": 0.9317379, "epoch": 2.7419922562478, "grad_norm": 8.57179069519043, "learning_rate": 2.469832402405916e-06, "loss": 0.39074509, "memory(GiB)": 34.88, "step": 101270, "train_speed(iter/s)": 0.410606 }, { "acc": 0.9497921, "epoch": 2.7421276365310154, "grad_norm": 8.589059829711914, "learning_rate": 2.4693498363258177e-06, "loss": 0.27249975, "memory(GiB)": 34.88, "step": 101275, "train_speed(iter/s)": 0.410607 }, { "acc": 0.93371458, "epoch": 2.7422630168142312, "grad_norm": 36.52470779418945, "learning_rate": 2.4688673019533488e-06, "loss": 0.42633047, "memory(GiB)": 34.88, "step": 101280, "train_speed(iter/s)": 0.410608 }, { "acc": 0.94382706, "epoch": 2.7423983970974466, "grad_norm": 3.706059217453003, "learning_rate": 2.468384799294556e-06, "loss": 0.30203798, "memory(GiB)": 34.88, "step": 101285, "train_speed(iter/s)": 0.41061 }, { "acc": 0.9334959, "epoch": 2.7425337773806624, "grad_norm": 6.511481285095215, "learning_rate": 2.4679023283554795e-06, "loss": 0.28822837, "memory(GiB)": 34.88, "step": 101290, "train_speed(iter/s)": 0.410611 }, { "acc": 0.95547428, "epoch": 2.7426691576638778, "grad_norm": 3.195660352706909, "learning_rate": 2.4674198891421655e-06, "loss": 0.22960019, "memory(GiB)": 34.88, "step": 101295, "train_speed(iter/s)": 0.410612 }, { "acc": 0.9418561, "epoch": 2.7428045379470936, "grad_norm": 18.538976669311523, "learning_rate": 2.4669374816606594e-06, "loss": 0.29877024, "memory(GiB)": 34.88, "step": 101300, "train_speed(iter/s)": 0.410613 }, { "acc": 0.93933105, "epoch": 2.742939918230309, "grad_norm": 6.728615760803223, "learning_rate": 2.4664551059170005e-06, "loss": 0.34818897, "memory(GiB)": 34.88, "step": 101305, "train_speed(iter/s)": 0.410614 }, { "acc": 0.94451075, "epoch": 2.7430752985135243, "grad_norm": 5.397433757781982, "learning_rate": 2.4659727619172333e-06, "loss": 0.33999705, "memory(GiB)": 34.88, "step": 101310, "train_speed(iter/s)": 0.410615 }, { "acc": 0.93232927, "epoch": 2.74321067879674, "grad_norm": 12.277959823608398, "learning_rate": 2.4654904496673996e-06, "loss": 0.39290376, "memory(GiB)": 34.88, "step": 101315, "train_speed(iter/s)": 0.410617 }, { "acc": 0.94182148, "epoch": 2.7433460590799554, "grad_norm": 10.644981384277344, "learning_rate": 2.4650081691735443e-06, "loss": 0.30093603, "memory(GiB)": 34.88, "step": 101320, "train_speed(iter/s)": 0.410618 }, { "acc": 0.93883305, "epoch": 2.7434814393631712, "grad_norm": 10.324859619140625, "learning_rate": 2.4645259204417043e-06, "loss": 0.34078443, "memory(GiB)": 34.88, "step": 101325, "train_speed(iter/s)": 0.410619 }, { "acc": 0.93158112, "epoch": 2.7436168196463866, "grad_norm": 10.296140670776367, "learning_rate": 2.464043703477922e-06, "loss": 0.38176832, "memory(GiB)": 34.88, "step": 101330, "train_speed(iter/s)": 0.41062 }, { "acc": 0.94160147, "epoch": 2.7437521999296024, "grad_norm": 10.652024269104004, "learning_rate": 2.463561518288242e-06, "loss": 0.36007206, "memory(GiB)": 34.88, "step": 101335, "train_speed(iter/s)": 0.410621 }, { "acc": 0.9330492, "epoch": 2.7438875802128178, "grad_norm": 5.882064342498779, "learning_rate": 2.463079364878699e-06, "loss": 0.39767382, "memory(GiB)": 34.88, "step": 101340, "train_speed(iter/s)": 0.410622 }, { "acc": 0.95237799, "epoch": 2.744022960496033, "grad_norm": 3.6552438735961914, "learning_rate": 2.4625972432553357e-06, "loss": 0.27676558, "memory(GiB)": 34.88, "step": 101345, "train_speed(iter/s)": 0.410624 }, { "acc": 0.94698906, "epoch": 2.744158340779249, "grad_norm": 4.751138687133789, "learning_rate": 2.4621151534241923e-06, "loss": 0.29516864, "memory(GiB)": 34.88, "step": 101350, "train_speed(iter/s)": 0.410625 }, { "acc": 0.94037647, "epoch": 2.7442937210624647, "grad_norm": 9.884428024291992, "learning_rate": 2.4616330953913074e-06, "loss": 0.38664265, "memory(GiB)": 34.88, "step": 101355, "train_speed(iter/s)": 0.410626 }, { "acc": 0.93775711, "epoch": 2.74442910134568, "grad_norm": 6.746804714202881, "learning_rate": 2.461151069162717e-06, "loss": 0.37898889, "memory(GiB)": 34.88, "step": 101360, "train_speed(iter/s)": 0.410627 }, { "acc": 0.94337807, "epoch": 2.7445644816288954, "grad_norm": 8.915797233581543, "learning_rate": 2.4606690747444643e-06, "loss": 0.35183339, "memory(GiB)": 34.88, "step": 101365, "train_speed(iter/s)": 0.410628 }, { "acc": 0.92862053, "epoch": 2.7446998619121112, "grad_norm": 5.95070219039917, "learning_rate": 2.460187112142585e-06, "loss": 0.40810213, "memory(GiB)": 34.88, "step": 101370, "train_speed(iter/s)": 0.410629 }, { "acc": 0.93324652, "epoch": 2.7448352421953266, "grad_norm": 2.7833054065704346, "learning_rate": 2.459705181363115e-06, "loss": 0.35494916, "memory(GiB)": 34.88, "step": 101375, "train_speed(iter/s)": 0.41063 }, { "acc": 0.93393726, "epoch": 2.744970622478542, "grad_norm": 3.582707166671753, "learning_rate": 2.459223282412092e-06, "loss": 0.32598686, "memory(GiB)": 34.88, "step": 101380, "train_speed(iter/s)": 0.410631 }, { "acc": 0.93327675, "epoch": 2.7451060027617578, "grad_norm": 15.171050071716309, "learning_rate": 2.458741415295554e-06, "loss": 0.41370754, "memory(GiB)": 34.88, "step": 101385, "train_speed(iter/s)": 0.410632 }, { "acc": 0.932584, "epoch": 2.7452413830449736, "grad_norm": 7.544447422027588, "learning_rate": 2.4582595800195382e-06, "loss": 0.39225783, "memory(GiB)": 34.88, "step": 101390, "train_speed(iter/s)": 0.410633 }, { "acc": 0.93120174, "epoch": 2.745376763328189, "grad_norm": 12.1659517288208, "learning_rate": 2.457777776590076e-06, "loss": 0.34353848, "memory(GiB)": 34.88, "step": 101395, "train_speed(iter/s)": 0.410634 }, { "acc": 0.94400578, "epoch": 2.7455121436114043, "grad_norm": 16.505762100219727, "learning_rate": 2.4572960050132095e-06, "loss": 0.33580182, "memory(GiB)": 34.88, "step": 101400, "train_speed(iter/s)": 0.410636 }, { "acc": 0.94245529, "epoch": 2.74564752389462, "grad_norm": 4.2227044105529785, "learning_rate": 2.456814265294969e-06, "loss": 0.36414857, "memory(GiB)": 34.88, "step": 101405, "train_speed(iter/s)": 0.410637 }, { "acc": 0.92129307, "epoch": 2.7457829041778354, "grad_norm": 14.254403114318848, "learning_rate": 2.456332557441392e-06, "loss": 0.45965567, "memory(GiB)": 34.88, "step": 101410, "train_speed(iter/s)": 0.410638 }, { "acc": 0.92757435, "epoch": 2.7459182844610512, "grad_norm": 7.986648082733154, "learning_rate": 2.4558508814585096e-06, "loss": 0.36159387, "memory(GiB)": 34.88, "step": 101415, "train_speed(iter/s)": 0.410639 }, { "acc": 0.9319314, "epoch": 2.7460536647442666, "grad_norm": 7.066932678222656, "learning_rate": 2.455369237352358e-06, "loss": 0.35717039, "memory(GiB)": 34.88, "step": 101420, "train_speed(iter/s)": 0.41064 }, { "acc": 0.95004997, "epoch": 2.7461890450274824, "grad_norm": 6.361331939697266, "learning_rate": 2.454887625128972e-06, "loss": 0.30681109, "memory(GiB)": 34.88, "step": 101425, "train_speed(iter/s)": 0.410641 }, { "acc": 0.94038019, "epoch": 2.7463244253106978, "grad_norm": 7.570276737213135, "learning_rate": 2.4544060447943814e-06, "loss": 0.30740013, "memory(GiB)": 34.88, "step": 101430, "train_speed(iter/s)": 0.410642 }, { "acc": 0.94436245, "epoch": 2.746459805593913, "grad_norm": 4.340968132019043, "learning_rate": 2.4539244963546203e-06, "loss": 0.28925066, "memory(GiB)": 34.88, "step": 101435, "train_speed(iter/s)": 0.410643 }, { "acc": 0.91918869, "epoch": 2.746595185877129, "grad_norm": 6.756077766418457, "learning_rate": 2.4534429798157214e-06, "loss": 0.49956293, "memory(GiB)": 34.88, "step": 101440, "train_speed(iter/s)": 0.410644 }, { "acc": 0.93449478, "epoch": 2.7467305661603443, "grad_norm": 5.775102138519287, "learning_rate": 2.452961495183719e-06, "loss": 0.3398711, "memory(GiB)": 34.88, "step": 101445, "train_speed(iter/s)": 0.410645 }, { "acc": 0.94963799, "epoch": 2.74686594644356, "grad_norm": 7.195685863494873, "learning_rate": 2.4524800424646404e-06, "loss": 0.24971783, "memory(GiB)": 34.88, "step": 101450, "train_speed(iter/s)": 0.410646 }, { "acc": 0.92236042, "epoch": 2.7470013267267754, "grad_norm": 4.370616912841797, "learning_rate": 2.4519986216645185e-06, "loss": 0.45178795, "memory(GiB)": 34.88, "step": 101455, "train_speed(iter/s)": 0.410647 }, { "acc": 0.93766899, "epoch": 2.7471367070099912, "grad_norm": 7.076016426086426, "learning_rate": 2.4515172327893856e-06, "loss": 0.37573626, "memory(GiB)": 34.88, "step": 101460, "train_speed(iter/s)": 0.410649 }, { "acc": 0.94193001, "epoch": 2.7472720872932066, "grad_norm": 4.80350923538208, "learning_rate": 2.451035875845269e-06, "loss": 0.36187503, "memory(GiB)": 34.88, "step": 101465, "train_speed(iter/s)": 0.41065 }, { "acc": 0.94381552, "epoch": 2.747407467576422, "grad_norm": 15.052078247070312, "learning_rate": 2.4505545508382005e-06, "loss": 0.31320326, "memory(GiB)": 34.88, "step": 101470, "train_speed(iter/s)": 0.410651 }, { "acc": 0.94133987, "epoch": 2.7475428478596378, "grad_norm": 5.858483791351318, "learning_rate": 2.450073257774209e-06, "loss": 0.28680649, "memory(GiB)": 34.88, "step": 101475, "train_speed(iter/s)": 0.410652 }, { "acc": 0.94836979, "epoch": 2.747678228142853, "grad_norm": 4.458576202392578, "learning_rate": 2.449591996659326e-06, "loss": 0.32673619, "memory(GiB)": 34.88, "step": 101480, "train_speed(iter/s)": 0.410653 }, { "acc": 0.92751446, "epoch": 2.747813608426069, "grad_norm": 24.092552185058594, "learning_rate": 2.4491107674995747e-06, "loss": 0.39870245, "memory(GiB)": 34.88, "step": 101485, "train_speed(iter/s)": 0.410654 }, { "acc": 0.94735222, "epoch": 2.7479489887092843, "grad_norm": 3.8903043270111084, "learning_rate": 2.448629570300991e-06, "loss": 0.30001063, "memory(GiB)": 34.88, "step": 101490, "train_speed(iter/s)": 0.410655 }, { "acc": 0.94385757, "epoch": 2.7480843689925, "grad_norm": 4.3546342849731445, "learning_rate": 2.448148405069598e-06, "loss": 0.24424567, "memory(GiB)": 34.88, "step": 101495, "train_speed(iter/s)": 0.410656 }, { "acc": 0.94172802, "epoch": 2.7482197492757154, "grad_norm": 5.082285404205322, "learning_rate": 2.4476672718114227e-06, "loss": 0.292171, "memory(GiB)": 34.88, "step": 101500, "train_speed(iter/s)": 0.410657 }, { "acc": 0.94595985, "epoch": 2.748355129558931, "grad_norm": 6.233717918395996, "learning_rate": 2.447186170532494e-06, "loss": 0.32137237, "memory(GiB)": 34.88, "step": 101505, "train_speed(iter/s)": 0.410659 }, { "acc": 0.9449832, "epoch": 2.7484905098421466, "grad_norm": 7.917556285858154, "learning_rate": 2.446705101238838e-06, "loss": 0.25592771, "memory(GiB)": 34.88, "step": 101510, "train_speed(iter/s)": 0.41066 }, { "acc": 0.93976803, "epoch": 2.7486258901253624, "grad_norm": 5.831257343292236, "learning_rate": 2.4462240639364827e-06, "loss": 0.34825747, "memory(GiB)": 34.88, "step": 101515, "train_speed(iter/s)": 0.410661 }, { "acc": 0.94413738, "epoch": 2.7487612704085778, "grad_norm": 5.0160136222839355, "learning_rate": 2.4457430586314494e-06, "loss": 0.38592334, "memory(GiB)": 34.88, "step": 101520, "train_speed(iter/s)": 0.410662 }, { "acc": 0.94268827, "epoch": 2.748896650691793, "grad_norm": 2.303792715072632, "learning_rate": 2.445262085329771e-06, "loss": 0.30888236, "memory(GiB)": 34.88, "step": 101525, "train_speed(iter/s)": 0.410663 }, { "acc": 0.94626236, "epoch": 2.749032030975009, "grad_norm": 13.290766716003418, "learning_rate": 2.444781144037466e-06, "loss": 0.33670268, "memory(GiB)": 34.88, "step": 101530, "train_speed(iter/s)": 0.410664 }, { "acc": 0.92073107, "epoch": 2.7491674112582243, "grad_norm": 6.1884565353393555, "learning_rate": 2.4443002347605634e-06, "loss": 0.40302801, "memory(GiB)": 34.88, "step": 101535, "train_speed(iter/s)": 0.410665 }, { "acc": 0.91469574, "epoch": 2.7493027915414396, "grad_norm": 11.892518043518066, "learning_rate": 2.4438193575050845e-06, "loss": 0.4808835, "memory(GiB)": 34.88, "step": 101540, "train_speed(iter/s)": 0.410666 }, { "acc": 0.93382301, "epoch": 2.7494381718246554, "grad_norm": 12.50068187713623, "learning_rate": 2.443338512277054e-06, "loss": 0.37237353, "memory(GiB)": 34.88, "step": 101545, "train_speed(iter/s)": 0.410668 }, { "acc": 0.93059998, "epoch": 2.7495735521078712, "grad_norm": 7.924835205078125, "learning_rate": 2.442857699082498e-06, "loss": 0.39051998, "memory(GiB)": 34.88, "step": 101550, "train_speed(iter/s)": 0.410669 }, { "acc": 0.94379501, "epoch": 2.7497089323910866, "grad_norm": 7.244260311126709, "learning_rate": 2.442376917927434e-06, "loss": 0.40348177, "memory(GiB)": 34.88, "step": 101555, "train_speed(iter/s)": 0.41067 }, { "acc": 0.93098221, "epoch": 2.749844312674302, "grad_norm": 3.915574073791504, "learning_rate": 2.441896168817892e-06, "loss": 0.39031003, "memory(GiB)": 34.88, "step": 101560, "train_speed(iter/s)": 0.410671 }, { "acc": 0.9355608, "epoch": 2.7499796929575178, "grad_norm": 14.363320350646973, "learning_rate": 2.441415451759889e-06, "loss": 0.34815812, "memory(GiB)": 34.88, "step": 101565, "train_speed(iter/s)": 0.410672 }, { "acc": 0.91621838, "epoch": 2.750115073240733, "grad_norm": 7.9779133796691895, "learning_rate": 2.4409347667594492e-06, "loss": 0.46952534, "memory(GiB)": 34.88, "step": 101570, "train_speed(iter/s)": 0.410673 }, { "acc": 0.96095333, "epoch": 2.750250453523949, "grad_norm": 7.827141761779785, "learning_rate": 2.4404541138225927e-06, "loss": 0.20830936, "memory(GiB)": 34.88, "step": 101575, "train_speed(iter/s)": 0.410674 }, { "acc": 0.93751659, "epoch": 2.7503858338071643, "grad_norm": 6.124871253967285, "learning_rate": 2.4399734929553413e-06, "loss": 0.31673255, "memory(GiB)": 34.88, "step": 101580, "train_speed(iter/s)": 0.410675 }, { "acc": 0.94835682, "epoch": 2.75052121409038, "grad_norm": 3.31050705909729, "learning_rate": 2.4394929041637173e-06, "loss": 0.24498231, "memory(GiB)": 34.88, "step": 101585, "train_speed(iter/s)": 0.410677 }, { "acc": 0.92315292, "epoch": 2.7506565943735954, "grad_norm": 5.461232662200928, "learning_rate": 2.4390123474537364e-06, "loss": 0.45641479, "memory(GiB)": 34.88, "step": 101590, "train_speed(iter/s)": 0.410678 }, { "acc": 0.93359394, "epoch": 2.750791974656811, "grad_norm": 6.845743656158447, "learning_rate": 2.438531822831425e-06, "loss": 0.4039938, "memory(GiB)": 34.88, "step": 101595, "train_speed(iter/s)": 0.410679 }, { "acc": 0.91864071, "epoch": 2.7509273549400266, "grad_norm": 5.578693866729736, "learning_rate": 2.4380513303027976e-06, "loss": 0.46534824, "memory(GiB)": 34.88, "step": 101600, "train_speed(iter/s)": 0.41068 }, { "acc": 0.95464592, "epoch": 2.751062735223242, "grad_norm": 3.8376429080963135, "learning_rate": 2.4375708698738765e-06, "loss": 0.19735105, "memory(GiB)": 34.88, "step": 101605, "train_speed(iter/s)": 0.410681 }, { "acc": 0.92654343, "epoch": 2.7511981155064578, "grad_norm": 13.512548446655273, "learning_rate": 2.437090441550676e-06, "loss": 0.48497977, "memory(GiB)": 34.88, "step": 101610, "train_speed(iter/s)": 0.410682 }, { "acc": 0.93694592, "epoch": 2.751333495789673, "grad_norm": 11.802168846130371, "learning_rate": 2.43661004533922e-06, "loss": 0.43247337, "memory(GiB)": 34.88, "step": 101615, "train_speed(iter/s)": 0.410684 }, { "acc": 0.94004183, "epoch": 2.751468876072889, "grad_norm": 12.213793754577637, "learning_rate": 2.436129681245524e-06, "loss": 0.37360353, "memory(GiB)": 34.88, "step": 101620, "train_speed(iter/s)": 0.410685 }, { "acc": 0.93767405, "epoch": 2.7516042563561043, "grad_norm": 9.409456253051758, "learning_rate": 2.4356493492756023e-06, "loss": 0.24959698, "memory(GiB)": 34.88, "step": 101625, "train_speed(iter/s)": 0.410686 }, { "acc": 0.92726574, "epoch": 2.7517396366393196, "grad_norm": 8.507996559143066, "learning_rate": 2.435169049435478e-06, "loss": 0.49728308, "memory(GiB)": 34.88, "step": 101630, "train_speed(iter/s)": 0.410687 }, { "acc": 0.94243898, "epoch": 2.7518750169225354, "grad_norm": 5.297915935516357, "learning_rate": 2.434688781731163e-06, "loss": 0.34006858, "memory(GiB)": 34.88, "step": 101635, "train_speed(iter/s)": 0.410688 }, { "acc": 0.93480606, "epoch": 2.752010397205751, "grad_norm": 6.938305377960205, "learning_rate": 2.4342085461686775e-06, "loss": 0.36439753, "memory(GiB)": 34.88, "step": 101640, "train_speed(iter/s)": 0.410689 }, { "acc": 0.92872171, "epoch": 2.7521457774889666, "grad_norm": 8.831524848937988, "learning_rate": 2.4337283427540306e-06, "loss": 0.35318213, "memory(GiB)": 34.88, "step": 101645, "train_speed(iter/s)": 0.41069 }, { "acc": 0.93385048, "epoch": 2.752281157772182, "grad_norm": 6.2673468589782715, "learning_rate": 2.4332481714932465e-06, "loss": 0.37883441, "memory(GiB)": 34.88, "step": 101650, "train_speed(iter/s)": 0.410691 }, { "acc": 0.9235714, "epoch": 2.7524165380553978, "grad_norm": 8.8891019821167, "learning_rate": 2.432768032392334e-06, "loss": 0.41982341, "memory(GiB)": 34.88, "step": 101655, "train_speed(iter/s)": 0.410692 }, { "acc": 0.93048553, "epoch": 2.752551918338613, "grad_norm": 10.206842422485352, "learning_rate": 2.432287925457311e-06, "loss": 0.45419607, "memory(GiB)": 34.88, "step": 101660, "train_speed(iter/s)": 0.410693 }, { "acc": 0.93490906, "epoch": 2.7526872986218285, "grad_norm": 6.491826057434082, "learning_rate": 2.431807850694192e-06, "loss": 0.38800263, "memory(GiB)": 34.88, "step": 101665, "train_speed(iter/s)": 0.410694 }, { "acc": 0.94372206, "epoch": 2.7528226789050443, "grad_norm": 9.130746841430664, "learning_rate": 2.4313278081089877e-06, "loss": 0.33883419, "memory(GiB)": 34.88, "step": 101670, "train_speed(iter/s)": 0.410695 }, { "acc": 0.95479765, "epoch": 2.75295805918826, "grad_norm": 7.796764373779297, "learning_rate": 2.4308477977077157e-06, "loss": 0.24456432, "memory(GiB)": 34.88, "step": 101675, "train_speed(iter/s)": 0.410697 }, { "acc": 0.93054419, "epoch": 2.7530934394714754, "grad_norm": 2.961967945098877, "learning_rate": 2.4303678194963824e-06, "loss": 0.32864976, "memory(GiB)": 34.88, "step": 101680, "train_speed(iter/s)": 0.410698 }, { "acc": 0.94787407, "epoch": 2.753228819754691, "grad_norm": 7.617645740509033, "learning_rate": 2.42988787348101e-06, "loss": 0.31997697, "memory(GiB)": 34.88, "step": 101685, "train_speed(iter/s)": 0.410699 }, { "acc": 0.90008955, "epoch": 2.7533642000379066, "grad_norm": 5.705787658691406, "learning_rate": 2.4294079596676027e-06, "loss": 0.61781635, "memory(GiB)": 34.88, "step": 101690, "train_speed(iter/s)": 0.4107 }, { "acc": 0.91785889, "epoch": 2.753499580321122, "grad_norm": 15.577093124389648, "learning_rate": 2.428928078062176e-06, "loss": 0.50181084, "memory(GiB)": 34.88, "step": 101695, "train_speed(iter/s)": 0.410701 }, { "acc": 0.93388004, "epoch": 2.7536349606043373, "grad_norm": 8.996221542358398, "learning_rate": 2.4284482286707425e-06, "loss": 0.30317311, "memory(GiB)": 34.88, "step": 101700, "train_speed(iter/s)": 0.410702 }, { "acc": 0.9406826, "epoch": 2.753770340887553, "grad_norm": 7.5568413734436035, "learning_rate": 2.427968411499309e-06, "loss": 0.31786771, "memory(GiB)": 34.88, "step": 101705, "train_speed(iter/s)": 0.410703 }, { "acc": 0.94784946, "epoch": 2.753905721170769, "grad_norm": 9.917479515075684, "learning_rate": 2.4274886265538914e-06, "loss": 0.32479298, "memory(GiB)": 34.88, "step": 101710, "train_speed(iter/s)": 0.410704 }, { "acc": 0.94857082, "epoch": 2.7540411014539843, "grad_norm": 8.191540718078613, "learning_rate": 2.427008873840493e-06, "loss": 0.26497545, "memory(GiB)": 34.88, "step": 101715, "train_speed(iter/s)": 0.410705 }, { "acc": 0.93023319, "epoch": 2.7541764817371996, "grad_norm": 19.684988021850586, "learning_rate": 2.4265291533651324e-06, "loss": 0.39690676, "memory(GiB)": 34.88, "step": 101720, "train_speed(iter/s)": 0.410706 }, { "acc": 0.93288965, "epoch": 2.7543118620204154, "grad_norm": 3.330479621887207, "learning_rate": 2.4260494651338116e-06, "loss": 0.40168238, "memory(GiB)": 34.88, "step": 101725, "train_speed(iter/s)": 0.410708 }, { "acc": 0.95163612, "epoch": 2.754447242303631, "grad_norm": 8.205470085144043, "learning_rate": 2.425569809152543e-06, "loss": 0.25986242, "memory(GiB)": 34.88, "step": 101730, "train_speed(iter/s)": 0.410709 }, { "acc": 0.93409071, "epoch": 2.7545826225868466, "grad_norm": 5.914209365844727, "learning_rate": 2.4250901854273346e-06, "loss": 0.40226474, "memory(GiB)": 34.88, "step": 101735, "train_speed(iter/s)": 0.41071 }, { "acc": 0.92021866, "epoch": 2.754718002870062, "grad_norm": 13.17398452758789, "learning_rate": 2.424610593964197e-06, "loss": 0.47980676, "memory(GiB)": 34.88, "step": 101740, "train_speed(iter/s)": 0.410711 }, { "acc": 0.92790041, "epoch": 2.7548533831532778, "grad_norm": 3.9201700687408447, "learning_rate": 2.4241310347691355e-06, "loss": 0.40283237, "memory(GiB)": 34.88, "step": 101745, "train_speed(iter/s)": 0.410712 }, { "acc": 0.94082298, "epoch": 2.754988763436493, "grad_norm": 6.860485076904297, "learning_rate": 2.423651507848154e-06, "loss": 0.31295452, "memory(GiB)": 34.88, "step": 101750, "train_speed(iter/s)": 0.410713 }, { "acc": 0.93631802, "epoch": 2.7551241437197085, "grad_norm": 22.840837478637695, "learning_rate": 2.4231720132072672e-06, "loss": 0.36284881, "memory(GiB)": 34.88, "step": 101755, "train_speed(iter/s)": 0.410714 }, { "acc": 0.93894615, "epoch": 2.7552595240029243, "grad_norm": 11.26667594909668, "learning_rate": 2.4226925508524764e-06, "loss": 0.40535946, "memory(GiB)": 34.88, "step": 101760, "train_speed(iter/s)": 0.410716 }, { "acc": 0.92592201, "epoch": 2.7553949042861396, "grad_norm": 6.269981384277344, "learning_rate": 2.422213120789789e-06, "loss": 0.42634711, "memory(GiB)": 34.88, "step": 101765, "train_speed(iter/s)": 0.410717 }, { "acc": 0.89955864, "epoch": 2.7555302845693554, "grad_norm": 10.972185134887695, "learning_rate": 2.4217337230252107e-06, "loss": 0.58996592, "memory(GiB)": 34.88, "step": 101770, "train_speed(iter/s)": 0.410718 }, { "acc": 0.93681126, "epoch": 2.755665664852571, "grad_norm": 6.28516960144043, "learning_rate": 2.42125435756475e-06, "loss": 0.32059422, "memory(GiB)": 34.88, "step": 101775, "train_speed(iter/s)": 0.410719 }, { "acc": 0.93087721, "epoch": 2.7558010451357866, "grad_norm": 4.61011266708374, "learning_rate": 2.4207750244144064e-06, "loss": 0.3668005, "memory(GiB)": 34.88, "step": 101780, "train_speed(iter/s)": 0.41072 }, { "acc": 0.94333801, "epoch": 2.755936425419002, "grad_norm": 5.941847801208496, "learning_rate": 2.420295723580188e-06, "loss": 0.24827096, "memory(GiB)": 34.88, "step": 101785, "train_speed(iter/s)": 0.410721 }, { "acc": 0.94532223, "epoch": 2.7560718057022173, "grad_norm": 5.705618858337402, "learning_rate": 2.4198164550680995e-06, "loss": 0.32207513, "memory(GiB)": 34.88, "step": 101790, "train_speed(iter/s)": 0.410722 }, { "acc": 0.93624859, "epoch": 2.756207185985433, "grad_norm": 6.146258354187012, "learning_rate": 2.419337218884142e-06, "loss": 0.34382429, "memory(GiB)": 34.88, "step": 101795, "train_speed(iter/s)": 0.410723 }, { "acc": 0.93568077, "epoch": 2.7563425662686485, "grad_norm": 6.7425432205200195, "learning_rate": 2.418858015034322e-06, "loss": 0.35337389, "memory(GiB)": 34.88, "step": 101800, "train_speed(iter/s)": 0.410724 }, { "acc": 0.9304162, "epoch": 2.7564779465518643, "grad_norm": 8.926172256469727, "learning_rate": 2.4183788435246374e-06, "loss": 0.35683193, "memory(GiB)": 34.88, "step": 101805, "train_speed(iter/s)": 0.410725 }, { "acc": 0.93825951, "epoch": 2.7566133268350796, "grad_norm": 11.230618476867676, "learning_rate": 2.4178997043610973e-06, "loss": 0.36981921, "memory(GiB)": 34.88, "step": 101810, "train_speed(iter/s)": 0.410726 }, { "acc": 0.94871063, "epoch": 2.7567487071182954, "grad_norm": 5.927836894989014, "learning_rate": 2.417420597549699e-06, "loss": 0.29820833, "memory(GiB)": 34.88, "step": 101815, "train_speed(iter/s)": 0.410728 }, { "acc": 0.92708321, "epoch": 2.756884087401511, "grad_norm": 8.718650817871094, "learning_rate": 2.416941523096446e-06, "loss": 0.42677383, "memory(GiB)": 34.88, "step": 101820, "train_speed(iter/s)": 0.410729 }, { "acc": 0.93869867, "epoch": 2.757019467684726, "grad_norm": 9.442793846130371, "learning_rate": 2.41646248100734e-06, "loss": 0.39197042, "memory(GiB)": 34.88, "step": 101825, "train_speed(iter/s)": 0.41073 }, { "acc": 0.92749186, "epoch": 2.757154847967942, "grad_norm": 10.675189018249512, "learning_rate": 2.415983471288383e-06, "loss": 0.454637, "memory(GiB)": 34.88, "step": 101830, "train_speed(iter/s)": 0.410731 }, { "acc": 0.92471466, "epoch": 2.7572902282511578, "grad_norm": 5.734333515167236, "learning_rate": 2.415504493945574e-06, "loss": 0.37055564, "memory(GiB)": 34.88, "step": 101835, "train_speed(iter/s)": 0.410732 }, { "acc": 0.9479744, "epoch": 2.757425608534373, "grad_norm": 5.799387454986572, "learning_rate": 2.41502554898491e-06, "loss": 0.2955255, "memory(GiB)": 34.88, "step": 101840, "train_speed(iter/s)": 0.410733 }, { "acc": 0.94058704, "epoch": 2.7575609888175885, "grad_norm": 6.120772361755371, "learning_rate": 2.414546636412397e-06, "loss": 0.32115374, "memory(GiB)": 34.88, "step": 101845, "train_speed(iter/s)": 0.410734 }, { "acc": 0.94107971, "epoch": 2.7576963691008043, "grad_norm": 7.353198051452637, "learning_rate": 2.41406775623403e-06, "loss": 0.33313277, "memory(GiB)": 34.88, "step": 101850, "train_speed(iter/s)": 0.410735 }, { "acc": 0.94299355, "epoch": 2.7578317493840196, "grad_norm": 12.37459659576416, "learning_rate": 2.413588908455809e-06, "loss": 0.35749707, "memory(GiB)": 34.88, "step": 101855, "train_speed(iter/s)": 0.410736 }, { "acc": 0.92305603, "epoch": 2.757967129667235, "grad_norm": 3.38409686088562, "learning_rate": 2.413110093083733e-06, "loss": 0.47074304, "memory(GiB)": 34.88, "step": 101860, "train_speed(iter/s)": 0.410737 }, { "acc": 0.93795834, "epoch": 2.758102509950451, "grad_norm": 8.531994819641113, "learning_rate": 2.412631310123801e-06, "loss": 0.38169715, "memory(GiB)": 34.88, "step": 101865, "train_speed(iter/s)": 0.410738 }, { "acc": 0.92718782, "epoch": 2.7582378902336666, "grad_norm": 6.27098274230957, "learning_rate": 2.41215255958201e-06, "loss": 0.37332397, "memory(GiB)": 34.88, "step": 101870, "train_speed(iter/s)": 0.41074 }, { "acc": 0.93162251, "epoch": 2.758373270516882, "grad_norm": 8.858930587768555, "learning_rate": 2.4116738414643528e-06, "loss": 0.42763367, "memory(GiB)": 34.88, "step": 101875, "train_speed(iter/s)": 0.410741 }, { "acc": 0.92890816, "epoch": 2.7585086508000973, "grad_norm": 5.6925950050354, "learning_rate": 2.4111951557768336e-06, "loss": 0.43015213, "memory(GiB)": 34.88, "step": 101880, "train_speed(iter/s)": 0.410741 }, { "acc": 0.93226528, "epoch": 2.758644031083313, "grad_norm": 5.402749538421631, "learning_rate": 2.4107165025254446e-06, "loss": 0.43519306, "memory(GiB)": 34.88, "step": 101885, "train_speed(iter/s)": 0.410743 }, { "acc": 0.93314266, "epoch": 2.7587794113665285, "grad_norm": 5.794945240020752, "learning_rate": 2.4102378817161823e-06, "loss": 0.41825891, "memory(GiB)": 34.88, "step": 101890, "train_speed(iter/s)": 0.410744 }, { "acc": 0.93978004, "epoch": 2.7589147916497443, "grad_norm": 8.734296798706055, "learning_rate": 2.4097592933550426e-06, "loss": 0.3908906, "memory(GiB)": 34.88, "step": 101895, "train_speed(iter/s)": 0.410745 }, { "acc": 0.94155474, "epoch": 2.7590501719329596, "grad_norm": 10.37389850616455, "learning_rate": 2.4092807374480235e-06, "loss": 0.3548564, "memory(GiB)": 34.88, "step": 101900, "train_speed(iter/s)": 0.410746 }, { "acc": 0.94112015, "epoch": 2.7591855522161755, "grad_norm": 6.031230449676514, "learning_rate": 2.4088022140011154e-06, "loss": 0.3227222, "memory(GiB)": 34.88, "step": 101905, "train_speed(iter/s)": 0.410747 }, { "acc": 0.93955078, "epoch": 2.759320932499391, "grad_norm": 7.132816314697266, "learning_rate": 2.4083237230203145e-06, "loss": 0.34259105, "memory(GiB)": 34.88, "step": 101910, "train_speed(iter/s)": 0.410748 }, { "acc": 0.93787251, "epoch": 2.759456312782606, "grad_norm": 7.789519309997559, "learning_rate": 2.407845264511617e-06, "loss": 0.29486656, "memory(GiB)": 34.88, "step": 101915, "train_speed(iter/s)": 0.410749 }, { "acc": 0.93387661, "epoch": 2.759591693065822, "grad_norm": 8.8916015625, "learning_rate": 2.4073668384810133e-06, "loss": 0.38202729, "memory(GiB)": 34.88, "step": 101920, "train_speed(iter/s)": 0.41075 }, { "acc": 0.94246254, "epoch": 2.7597270733490373, "grad_norm": 8.923805236816406, "learning_rate": 2.4068884449344974e-06, "loss": 0.34877083, "memory(GiB)": 34.88, "step": 101925, "train_speed(iter/s)": 0.410751 }, { "acc": 0.94130001, "epoch": 2.759862453632253, "grad_norm": 3.979433298110962, "learning_rate": 2.406410083878062e-06, "loss": 0.34045274, "memory(GiB)": 34.88, "step": 101930, "train_speed(iter/s)": 0.410752 }, { "acc": 0.93843536, "epoch": 2.7599978339154685, "grad_norm": 8.562200546264648, "learning_rate": 2.4059317553177032e-06, "loss": 0.35261359, "memory(GiB)": 34.88, "step": 101935, "train_speed(iter/s)": 0.410753 }, { "acc": 0.9375824, "epoch": 2.7601332141986843, "grad_norm": 8.824305534362793, "learning_rate": 2.405453459259408e-06, "loss": 0.37422121, "memory(GiB)": 34.88, "step": 101940, "train_speed(iter/s)": 0.410754 }, { "acc": 0.94752522, "epoch": 2.7602685944818997, "grad_norm": 4.347896099090576, "learning_rate": 2.404975195709169e-06, "loss": 0.28420863, "memory(GiB)": 34.88, "step": 101945, "train_speed(iter/s)": 0.410755 }, { "acc": 0.93569555, "epoch": 2.760403974765115, "grad_norm": 16.457584381103516, "learning_rate": 2.404496964672979e-06, "loss": 0.37640355, "memory(GiB)": 34.88, "step": 101950, "train_speed(iter/s)": 0.410757 }, { "acc": 0.94219627, "epoch": 2.760539355048331, "grad_norm": 7.5493621826171875, "learning_rate": 2.4040187661568306e-06, "loss": 0.30439968, "memory(GiB)": 34.88, "step": 101955, "train_speed(iter/s)": 0.410758 }, { "acc": 0.94736309, "epoch": 2.760674735331546, "grad_norm": 9.001432418823242, "learning_rate": 2.403540600166709e-06, "loss": 0.31199131, "memory(GiB)": 34.88, "step": 101960, "train_speed(iter/s)": 0.410759 }, { "acc": 0.92380428, "epoch": 2.760810115614762, "grad_norm": 11.622118949890137, "learning_rate": 2.4030624667086074e-06, "loss": 0.4663671, "memory(GiB)": 34.88, "step": 101965, "train_speed(iter/s)": 0.41076 }, { "acc": 0.93882275, "epoch": 2.7609454958979773, "grad_norm": 6.378326892852783, "learning_rate": 2.4025843657885165e-06, "loss": 0.35520372, "memory(GiB)": 34.88, "step": 101970, "train_speed(iter/s)": 0.410761 }, { "acc": 0.93004999, "epoch": 2.761080876181193, "grad_norm": 9.488941192626953, "learning_rate": 2.402106297412422e-06, "loss": 0.46075959, "memory(GiB)": 34.88, "step": 101975, "train_speed(iter/s)": 0.410762 }, { "acc": 0.904177, "epoch": 2.7612162564644085, "grad_norm": 8.795616149902344, "learning_rate": 2.401628261586315e-06, "loss": 0.55080576, "memory(GiB)": 34.88, "step": 101980, "train_speed(iter/s)": 0.410763 }, { "acc": 0.94093113, "epoch": 2.761351636747624, "grad_norm": 17.257461547851562, "learning_rate": 2.4011502583161823e-06, "loss": 0.36741478, "memory(GiB)": 34.88, "step": 101985, "train_speed(iter/s)": 0.410764 }, { "acc": 0.92519341, "epoch": 2.7614870170308397, "grad_norm": 9.896032333374023, "learning_rate": 2.4006722876080148e-06, "loss": 0.51101875, "memory(GiB)": 34.88, "step": 101990, "train_speed(iter/s)": 0.410765 }, { "acc": 0.9279418, "epoch": 2.7616223973140555, "grad_norm": 8.4880952835083, "learning_rate": 2.400194349467797e-06, "loss": 0.41005726, "memory(GiB)": 34.88, "step": 101995, "train_speed(iter/s)": 0.410766 }, { "acc": 0.93863659, "epoch": 2.761757777597271, "grad_norm": 5.020476341247559, "learning_rate": 2.3997164439015168e-06, "loss": 0.36782575, "memory(GiB)": 34.88, "step": 102000, "train_speed(iter/s)": 0.410768 }, { "acc": 0.92468548, "epoch": 2.761893157880486, "grad_norm": 7.312379837036133, "learning_rate": 2.399238570915162e-06, "loss": 0.47798562, "memory(GiB)": 34.88, "step": 102005, "train_speed(iter/s)": 0.410769 }, { "acc": 0.91166458, "epoch": 2.762028538163702, "grad_norm": 8.374504089355469, "learning_rate": 2.3987607305147176e-06, "loss": 0.45620112, "memory(GiB)": 34.88, "step": 102010, "train_speed(iter/s)": 0.41077 }, { "acc": 0.92402363, "epoch": 2.7621639184469173, "grad_norm": 11.200521469116211, "learning_rate": 2.3982829227061693e-06, "loss": 0.41624012, "memory(GiB)": 34.88, "step": 102015, "train_speed(iter/s)": 0.410771 }, { "acc": 0.93446465, "epoch": 2.7622992987301327, "grad_norm": 5.921695232391357, "learning_rate": 2.397805147495503e-06, "loss": 0.3791642, "memory(GiB)": 34.88, "step": 102020, "train_speed(iter/s)": 0.410772 }, { "acc": 0.95385485, "epoch": 2.7624346790133485, "grad_norm": 5.285192489624023, "learning_rate": 2.3973274048887068e-06, "loss": 0.24261508, "memory(GiB)": 34.88, "step": 102025, "train_speed(iter/s)": 0.410773 }, { "acc": 0.94491644, "epoch": 2.7625700592965643, "grad_norm": 2.9445455074310303, "learning_rate": 2.39684969489176e-06, "loss": 0.31249287, "memory(GiB)": 34.88, "step": 102030, "train_speed(iter/s)": 0.410774 }, { "acc": 0.94344807, "epoch": 2.7627054395797797, "grad_norm": 3.2314014434814453, "learning_rate": 2.3963720175106496e-06, "loss": 0.29852638, "memory(GiB)": 34.88, "step": 102035, "train_speed(iter/s)": 0.410775 }, { "acc": 0.93974018, "epoch": 2.762840819862995, "grad_norm": 8.639270782470703, "learning_rate": 2.395894372751361e-06, "loss": 0.32241755, "memory(GiB)": 34.88, "step": 102040, "train_speed(iter/s)": 0.410776 }, { "acc": 0.93782673, "epoch": 2.762976200146211, "grad_norm": 6.1043877601623535, "learning_rate": 2.3954167606198746e-06, "loss": 0.35533035, "memory(GiB)": 34.88, "step": 102045, "train_speed(iter/s)": 0.410777 }, { "acc": 0.93896961, "epoch": 2.763111580429426, "grad_norm": 5.889471530914307, "learning_rate": 2.394939181122174e-06, "loss": 0.35889735, "memory(GiB)": 34.88, "step": 102050, "train_speed(iter/s)": 0.410778 }, { "acc": 0.92761049, "epoch": 2.763246960712642, "grad_norm": 4.972422122955322, "learning_rate": 2.3944616342642434e-06, "loss": 0.38481133, "memory(GiB)": 34.88, "step": 102055, "train_speed(iter/s)": 0.41078 }, { "acc": 0.93608704, "epoch": 2.7633823409958573, "grad_norm": 6.821261405944824, "learning_rate": 2.393984120052065e-06, "loss": 0.36350849, "memory(GiB)": 34.88, "step": 102060, "train_speed(iter/s)": 0.410781 }, { "acc": 0.92828751, "epoch": 2.763517721279073, "grad_norm": 16.30548858642578, "learning_rate": 2.393506638491619e-06, "loss": 0.38968902, "memory(GiB)": 34.88, "step": 102065, "train_speed(iter/s)": 0.410782 }, { "acc": 0.93312206, "epoch": 2.7636531015622885, "grad_norm": 5.892992973327637, "learning_rate": 2.393029189588887e-06, "loss": 0.36924961, "memory(GiB)": 34.88, "step": 102070, "train_speed(iter/s)": 0.410783 }, { "acc": 0.95042143, "epoch": 2.763788481845504, "grad_norm": 7.167569160461426, "learning_rate": 2.3925517733498505e-06, "loss": 0.28419485, "memory(GiB)": 34.88, "step": 102075, "train_speed(iter/s)": 0.410784 }, { "acc": 0.95784388, "epoch": 2.7639238621287197, "grad_norm": 11.744908332824707, "learning_rate": 2.3920743897804923e-06, "loss": 0.22484386, "memory(GiB)": 34.88, "step": 102080, "train_speed(iter/s)": 0.410785 }, { "acc": 0.93723841, "epoch": 2.764059242411935, "grad_norm": 9.04968547821045, "learning_rate": 2.3915970388867878e-06, "loss": 0.37425885, "memory(GiB)": 34.88, "step": 102085, "train_speed(iter/s)": 0.410786 }, { "acc": 0.91570492, "epoch": 2.764194622695151, "grad_norm": 15.056710243225098, "learning_rate": 2.3911197206747204e-06, "loss": 0.42791862, "memory(GiB)": 34.88, "step": 102090, "train_speed(iter/s)": 0.410787 }, { "acc": 0.93264599, "epoch": 2.764330002978366, "grad_norm": 5.541839122772217, "learning_rate": 2.39064243515027e-06, "loss": 0.46546335, "memory(GiB)": 34.88, "step": 102095, "train_speed(iter/s)": 0.410789 }, { "acc": 0.92204533, "epoch": 2.764465383261582, "grad_norm": 7.32314395904541, "learning_rate": 2.3901651823194116e-06, "loss": 0.43885336, "memory(GiB)": 34.88, "step": 102100, "train_speed(iter/s)": 0.41079 }, { "acc": 0.93610477, "epoch": 2.7646007635447973, "grad_norm": 5.510838031768799, "learning_rate": 2.3896879621881264e-06, "loss": 0.35900428, "memory(GiB)": 34.88, "step": 102105, "train_speed(iter/s)": 0.410791 }, { "acc": 0.93688412, "epoch": 2.7647361438280127, "grad_norm": 7.187366485595703, "learning_rate": 2.3892107747623926e-06, "loss": 0.3709187, "memory(GiB)": 34.88, "step": 102110, "train_speed(iter/s)": 0.410792 }, { "acc": 0.91370716, "epoch": 2.7648715241112285, "grad_norm": 9.823037147521973, "learning_rate": 2.3887336200481886e-06, "loss": 0.54781904, "memory(GiB)": 34.88, "step": 102115, "train_speed(iter/s)": 0.410793 }, { "acc": 0.94546242, "epoch": 2.765006904394444, "grad_norm": 14.0728178024292, "learning_rate": 2.388256498051489e-06, "loss": 0.31149843, "memory(GiB)": 34.88, "step": 102120, "train_speed(iter/s)": 0.410794 }, { "acc": 0.95684271, "epoch": 2.7651422846776597, "grad_norm": 7.482630252838135, "learning_rate": 2.387779408778273e-06, "loss": 0.26107128, "memory(GiB)": 34.88, "step": 102125, "train_speed(iter/s)": 0.410795 }, { "acc": 0.94556255, "epoch": 2.765277664960875, "grad_norm": 8.79780101776123, "learning_rate": 2.3873023522345176e-06, "loss": 0.24916129, "memory(GiB)": 34.88, "step": 102130, "train_speed(iter/s)": 0.410796 }, { "acc": 0.93474541, "epoch": 2.765413045244091, "grad_norm": 7.088486194610596, "learning_rate": 2.3868253284261958e-06, "loss": 0.2976944, "memory(GiB)": 34.88, "step": 102135, "train_speed(iter/s)": 0.410797 }, { "acc": 0.92030258, "epoch": 2.765548425527306, "grad_norm": 7.0590643882751465, "learning_rate": 2.3863483373592854e-06, "loss": 0.49178858, "memory(GiB)": 34.88, "step": 102140, "train_speed(iter/s)": 0.410798 }, { "acc": 0.93813, "epoch": 2.7656838058105215, "grad_norm": 7.829929828643799, "learning_rate": 2.3858713790397614e-06, "loss": 0.37051573, "memory(GiB)": 34.88, "step": 102145, "train_speed(iter/s)": 0.410799 }, { "acc": 0.90740519, "epoch": 2.7658191860937373, "grad_norm": 9.657431602478027, "learning_rate": 2.3853944534736004e-06, "loss": 0.56761532, "memory(GiB)": 34.88, "step": 102150, "train_speed(iter/s)": 0.4108 }, { "acc": 0.93588715, "epoch": 2.7659545663769527, "grad_norm": 3.1921188831329346, "learning_rate": 2.384917560666771e-06, "loss": 0.48802357, "memory(GiB)": 34.88, "step": 102155, "train_speed(iter/s)": 0.410802 }, { "acc": 0.93657227, "epoch": 2.7660899466601685, "grad_norm": 4.254071235656738, "learning_rate": 2.3844407006252554e-06, "loss": 0.32506118, "memory(GiB)": 34.88, "step": 102160, "train_speed(iter/s)": 0.410803 }, { "acc": 0.93937864, "epoch": 2.766225326943384, "grad_norm": 6.309614658355713, "learning_rate": 2.383963873355023e-06, "loss": 0.36205564, "memory(GiB)": 34.88, "step": 102165, "train_speed(iter/s)": 0.410804 }, { "acc": 0.92253437, "epoch": 2.7663607072265997, "grad_norm": 5.860782146453857, "learning_rate": 2.383487078862045e-06, "loss": 0.38219638, "memory(GiB)": 34.88, "step": 102170, "train_speed(iter/s)": 0.410805 }, { "acc": 0.94707947, "epoch": 2.766496087509815, "grad_norm": 2.479469060897827, "learning_rate": 2.3830103171522965e-06, "loss": 0.26996312, "memory(GiB)": 34.88, "step": 102175, "train_speed(iter/s)": 0.410806 }, { "acc": 0.92094269, "epoch": 2.7666314677930304, "grad_norm": 11.586315155029297, "learning_rate": 2.382533588231749e-06, "loss": 0.5476223, "memory(GiB)": 34.88, "step": 102180, "train_speed(iter/s)": 0.410807 }, { "acc": 0.92976856, "epoch": 2.766766848076246, "grad_norm": 7.643078327178955, "learning_rate": 2.382056892106378e-06, "loss": 0.35927253, "memory(GiB)": 34.88, "step": 102185, "train_speed(iter/s)": 0.410809 }, { "acc": 0.92455482, "epoch": 2.766902228359462, "grad_norm": 9.743566513061523, "learning_rate": 2.381580228782148e-06, "loss": 0.44554954, "memory(GiB)": 34.88, "step": 102190, "train_speed(iter/s)": 0.41081 }, { "acc": 0.93801708, "epoch": 2.7670376086426773, "grad_norm": 5.60173225402832, "learning_rate": 2.381103598265038e-06, "loss": 0.39844553, "memory(GiB)": 34.88, "step": 102195, "train_speed(iter/s)": 0.410811 }, { "acc": 0.91851311, "epoch": 2.7671729889258927, "grad_norm": 15.587785720825195, "learning_rate": 2.380627000561013e-06, "loss": 0.54002428, "memory(GiB)": 34.88, "step": 102200, "train_speed(iter/s)": 0.410812 }, { "acc": 0.93698368, "epoch": 2.7673083692091085, "grad_norm": 6.151906490325928, "learning_rate": 2.3801504356760475e-06, "loss": 0.30398908, "memory(GiB)": 34.88, "step": 102205, "train_speed(iter/s)": 0.410813 }, { "acc": 0.93673019, "epoch": 2.767443749492324, "grad_norm": 7.575311183929443, "learning_rate": 2.379673903616107e-06, "loss": 0.39139585, "memory(GiB)": 34.88, "step": 102210, "train_speed(iter/s)": 0.410814 }, { "acc": 0.93177195, "epoch": 2.767579129775539, "grad_norm": 7.845498561859131, "learning_rate": 2.3791974043871634e-06, "loss": 0.3638546, "memory(GiB)": 34.88, "step": 102215, "train_speed(iter/s)": 0.410815 }, { "acc": 0.93919868, "epoch": 2.767714510058755, "grad_norm": 8.411968231201172, "learning_rate": 2.3787209379951877e-06, "loss": 0.32318344, "memory(GiB)": 34.88, "step": 102220, "train_speed(iter/s)": 0.410816 }, { "acc": 0.93668461, "epoch": 2.767849890341971, "grad_norm": 9.237527847290039, "learning_rate": 2.3782445044461424e-06, "loss": 0.32507846, "memory(GiB)": 34.88, "step": 102225, "train_speed(iter/s)": 0.410817 }, { "acc": 0.92928543, "epoch": 2.767985270625186, "grad_norm": 8.501816749572754, "learning_rate": 2.3777681037460043e-06, "loss": 0.45078416, "memory(GiB)": 34.88, "step": 102230, "train_speed(iter/s)": 0.410818 }, { "acc": 0.93689499, "epoch": 2.7681206509084015, "grad_norm": 6.289240837097168, "learning_rate": 2.3772917359007343e-06, "loss": 0.33576474, "memory(GiB)": 34.88, "step": 102235, "train_speed(iter/s)": 0.410819 }, { "acc": 0.93983459, "epoch": 2.7682560311916173, "grad_norm": 13.361761093139648, "learning_rate": 2.376815400916305e-06, "loss": 0.36714709, "memory(GiB)": 34.88, "step": 102240, "train_speed(iter/s)": 0.41082 }, { "acc": 0.93790159, "epoch": 2.7683914114748327, "grad_norm": 2.9225687980651855, "learning_rate": 2.3763390987986786e-06, "loss": 0.37648759, "memory(GiB)": 34.88, "step": 102245, "train_speed(iter/s)": 0.410821 }, { "acc": 0.93250237, "epoch": 2.7685267917580485, "grad_norm": 3.8193788528442383, "learning_rate": 2.3758628295538237e-06, "loss": 0.34562416, "memory(GiB)": 34.88, "step": 102250, "train_speed(iter/s)": 0.410822 }, { "acc": 0.94446659, "epoch": 2.768662172041264, "grad_norm": 16.83656120300293, "learning_rate": 2.3753865931877094e-06, "loss": 0.31077411, "memory(GiB)": 34.88, "step": 102255, "train_speed(iter/s)": 0.410823 }, { "acc": 0.94721642, "epoch": 2.7687975523244797, "grad_norm": 7.746914863586426, "learning_rate": 2.3749103897062944e-06, "loss": 0.29436703, "memory(GiB)": 34.88, "step": 102260, "train_speed(iter/s)": 0.410824 }, { "acc": 0.92534418, "epoch": 2.768932932607695, "grad_norm": 5.958256721496582, "learning_rate": 2.3744342191155525e-06, "loss": 0.42758827, "memory(GiB)": 34.88, "step": 102265, "train_speed(iter/s)": 0.410825 }, { "acc": 0.92573128, "epoch": 2.7690683128909104, "grad_norm": 11.10184097290039, "learning_rate": 2.3739580814214424e-06, "loss": 0.49208236, "memory(GiB)": 34.88, "step": 102270, "train_speed(iter/s)": 0.410827 }, { "acc": 0.92810354, "epoch": 2.769203693174126, "grad_norm": 30.104324340820312, "learning_rate": 2.3734819766299334e-06, "loss": 0.42620811, "memory(GiB)": 34.88, "step": 102275, "train_speed(iter/s)": 0.410828 }, { "acc": 0.93662872, "epoch": 2.7693390734573415, "grad_norm": 23.734025955200195, "learning_rate": 2.3730059047469838e-06, "loss": 0.37066979, "memory(GiB)": 34.88, "step": 102280, "train_speed(iter/s)": 0.410829 }, { "acc": 0.92269592, "epoch": 2.7694744537405573, "grad_norm": 14.093892097473145, "learning_rate": 2.372529865778564e-06, "loss": 0.48141084, "memory(GiB)": 34.88, "step": 102285, "train_speed(iter/s)": 0.41083 }, { "acc": 0.92613163, "epoch": 2.7696098340237727, "grad_norm": 6.394861221313477, "learning_rate": 2.3720538597306342e-06, "loss": 0.36891284, "memory(GiB)": 34.88, "step": 102290, "train_speed(iter/s)": 0.410831 }, { "acc": 0.93417673, "epoch": 2.7697452143069885, "grad_norm": 12.542827606201172, "learning_rate": 2.3715778866091534e-06, "loss": 0.3905354, "memory(GiB)": 34.88, "step": 102295, "train_speed(iter/s)": 0.410832 }, { "acc": 0.94013176, "epoch": 2.769880594590204, "grad_norm": 6.394748687744141, "learning_rate": 2.371101946420091e-06, "loss": 0.38619311, "memory(GiB)": 34.88, "step": 102300, "train_speed(iter/s)": 0.410833 }, { "acc": 0.91813984, "epoch": 2.770015974873419, "grad_norm": 9.016633033752441, "learning_rate": 2.370626039169405e-06, "loss": 0.48997273, "memory(GiB)": 34.88, "step": 102305, "train_speed(iter/s)": 0.410834 }, { "acc": 0.93622732, "epoch": 2.770151355156635, "grad_norm": 7.64279842376709, "learning_rate": 2.37015016486306e-06, "loss": 0.36498108, "memory(GiB)": 34.88, "step": 102310, "train_speed(iter/s)": 0.410835 }, { "acc": 0.94106731, "epoch": 2.7702867354398504, "grad_norm": 11.06029224395752, "learning_rate": 2.369674323507011e-06, "loss": 0.37466116, "memory(GiB)": 34.88, "step": 102315, "train_speed(iter/s)": 0.410836 }, { "acc": 0.9300642, "epoch": 2.770422115723066, "grad_norm": 9.44541072845459, "learning_rate": 2.369198515107227e-06, "loss": 0.39857655, "memory(GiB)": 34.88, "step": 102320, "train_speed(iter/s)": 0.410837 }, { "acc": 0.93647785, "epoch": 2.7705574960062815, "grad_norm": 2.974381923675537, "learning_rate": 2.368722739669663e-06, "loss": 0.35800989, "memory(GiB)": 34.88, "step": 102325, "train_speed(iter/s)": 0.410838 }, { "acc": 0.93024731, "epoch": 2.7706928762894973, "grad_norm": 9.392151832580566, "learning_rate": 2.3682469972002807e-06, "loss": 0.39203658, "memory(GiB)": 34.88, "step": 102330, "train_speed(iter/s)": 0.410839 }, { "acc": 0.92900372, "epoch": 2.7708282565727127, "grad_norm": 14.891433715820312, "learning_rate": 2.367771287705041e-06, "loss": 0.37943399, "memory(GiB)": 34.88, "step": 102335, "train_speed(iter/s)": 0.41084 }, { "acc": 0.93833332, "epoch": 2.770963636855928, "grad_norm": 5.246331691741943, "learning_rate": 2.3672956111899e-06, "loss": 0.28372197, "memory(GiB)": 34.88, "step": 102340, "train_speed(iter/s)": 0.410841 }, { "acc": 0.93464317, "epoch": 2.771099017139144, "grad_norm": 6.832103252410889, "learning_rate": 2.3668199676608204e-06, "loss": 0.34493525, "memory(GiB)": 34.88, "step": 102345, "train_speed(iter/s)": 0.410842 }, { "acc": 0.9285553, "epoch": 2.7712343974223597, "grad_norm": 7.862177848815918, "learning_rate": 2.366344357123755e-06, "loss": 0.36788342, "memory(GiB)": 34.88, "step": 102350, "train_speed(iter/s)": 0.410843 }, { "acc": 0.94191408, "epoch": 2.771369777705575, "grad_norm": 8.225936889648438, "learning_rate": 2.365868779584668e-06, "loss": 0.38329723, "memory(GiB)": 34.88, "step": 102355, "train_speed(iter/s)": 0.410844 }, { "acc": 0.94908905, "epoch": 2.7715051579887904, "grad_norm": 7.792425155639648, "learning_rate": 2.3653932350495124e-06, "loss": 0.3218343, "memory(GiB)": 34.88, "step": 102360, "train_speed(iter/s)": 0.410845 }, { "acc": 0.92977295, "epoch": 2.771640538272006, "grad_norm": 7.726480007171631, "learning_rate": 2.364917723524249e-06, "loss": 0.45862131, "memory(GiB)": 34.88, "step": 102365, "train_speed(iter/s)": 0.410845 }, { "acc": 0.94374132, "epoch": 2.7717759185552215, "grad_norm": 23.282394409179688, "learning_rate": 2.3644422450148286e-06, "loss": 0.30735133, "memory(GiB)": 34.88, "step": 102370, "train_speed(iter/s)": 0.410847 }, { "acc": 0.93948298, "epoch": 2.771911298838437, "grad_norm": 8.408370018005371, "learning_rate": 2.3639667995272154e-06, "loss": 0.32485318, "memory(GiB)": 34.88, "step": 102375, "train_speed(iter/s)": 0.410848 }, { "acc": 0.93328533, "epoch": 2.7720466791216527, "grad_norm": 8.191017150878906, "learning_rate": 2.3634913870673617e-06, "loss": 0.37684312, "memory(GiB)": 34.88, "step": 102380, "train_speed(iter/s)": 0.410849 }, { "acc": 0.95280952, "epoch": 2.7721820594048685, "grad_norm": 4.63682222366333, "learning_rate": 2.363016007641218e-06, "loss": 0.22480834, "memory(GiB)": 34.88, "step": 102385, "train_speed(iter/s)": 0.41085 }, { "acc": 0.92272539, "epoch": 2.772317439688084, "grad_norm": 10.730902671813965, "learning_rate": 2.3625406612547484e-06, "loss": 0.43427086, "memory(GiB)": 34.88, "step": 102390, "train_speed(iter/s)": 0.410851 }, { "acc": 0.941712, "epoch": 2.772452819971299, "grad_norm": 7.496999740600586, "learning_rate": 2.3620653479139007e-06, "loss": 0.3223907, "memory(GiB)": 34.88, "step": 102395, "train_speed(iter/s)": 0.410852 }, { "acc": 0.94637775, "epoch": 2.772588200254515, "grad_norm": 6.933361530303955, "learning_rate": 2.361590067624634e-06, "loss": 0.3160291, "memory(GiB)": 34.88, "step": 102400, "train_speed(iter/s)": 0.410853 }, { "acc": 0.94696865, "epoch": 2.7727235805377304, "grad_norm": 6.077654838562012, "learning_rate": 2.361114820392895e-06, "loss": 0.28003612, "memory(GiB)": 34.88, "step": 102405, "train_speed(iter/s)": 0.410854 }, { "acc": 0.94567213, "epoch": 2.772858960820946, "grad_norm": 5.448111534118652, "learning_rate": 2.360639606224647e-06, "loss": 0.23258252, "memory(GiB)": 34.88, "step": 102410, "train_speed(iter/s)": 0.410855 }, { "acc": 0.91761408, "epoch": 2.7729943411041615, "grad_norm": 12.865776062011719, "learning_rate": 2.3601644251258364e-06, "loss": 0.46923075, "memory(GiB)": 34.88, "step": 102415, "train_speed(iter/s)": 0.410856 }, { "acc": 0.93213463, "epoch": 2.7731297213873773, "grad_norm": 5.361170291900635, "learning_rate": 2.3596892771024135e-06, "loss": 0.43262358, "memory(GiB)": 34.88, "step": 102420, "train_speed(iter/s)": 0.410857 }, { "acc": 0.94188213, "epoch": 2.7732651016705927, "grad_norm": 9.046732902526855, "learning_rate": 2.359214162160338e-06, "loss": 0.33335149, "memory(GiB)": 34.88, "step": 102425, "train_speed(iter/s)": 0.410858 }, { "acc": 0.91667004, "epoch": 2.773400481953808, "grad_norm": 8.20603084564209, "learning_rate": 2.358739080305556e-06, "loss": 0.46198139, "memory(GiB)": 34.88, "step": 102430, "train_speed(iter/s)": 0.410859 }, { "acc": 0.95863333, "epoch": 2.773535862237024, "grad_norm": 3.3913800716400146, "learning_rate": 2.3582640315440226e-06, "loss": 0.24801998, "memory(GiB)": 34.88, "step": 102435, "train_speed(iter/s)": 0.41086 }, { "acc": 0.92893429, "epoch": 2.773671242520239, "grad_norm": 7.9623517990112305, "learning_rate": 2.357789015881683e-06, "loss": 0.41378818, "memory(GiB)": 34.88, "step": 102440, "train_speed(iter/s)": 0.410861 }, { "acc": 0.93683376, "epoch": 2.773806622803455, "grad_norm": 3.9586262702941895, "learning_rate": 2.3573140333244948e-06, "loss": 0.36095245, "memory(GiB)": 34.88, "step": 102445, "train_speed(iter/s)": 0.410862 }, { "acc": 0.92839403, "epoch": 2.7739420030866704, "grad_norm": 8.876395225524902, "learning_rate": 2.3568390838784024e-06, "loss": 0.36212654, "memory(GiB)": 34.88, "step": 102450, "train_speed(iter/s)": 0.410864 }, { "acc": 0.92845478, "epoch": 2.774077383369886, "grad_norm": 7.584791660308838, "learning_rate": 2.3563641675493586e-06, "loss": 0.43231435, "memory(GiB)": 34.88, "step": 102455, "train_speed(iter/s)": 0.410865 }, { "acc": 0.94252319, "epoch": 2.7742127636531015, "grad_norm": 7.379406452178955, "learning_rate": 2.3558892843433133e-06, "loss": 0.365517, "memory(GiB)": 34.88, "step": 102460, "train_speed(iter/s)": 0.410866 }, { "acc": 0.93655195, "epoch": 2.774348143936317, "grad_norm": 3.6083152294158936, "learning_rate": 2.3554144342662123e-06, "loss": 0.37321095, "memory(GiB)": 34.88, "step": 102465, "train_speed(iter/s)": 0.410867 }, { "acc": 0.92892504, "epoch": 2.7744835242195327, "grad_norm": 9.293827056884766, "learning_rate": 2.354939617324007e-06, "loss": 0.43107677, "memory(GiB)": 34.88, "step": 102470, "train_speed(iter/s)": 0.410868 }, { "acc": 0.93943615, "epoch": 2.774618904502748, "grad_norm": 8.777549743652344, "learning_rate": 2.354464833522641e-06, "loss": 0.30274751, "memory(GiB)": 34.88, "step": 102475, "train_speed(iter/s)": 0.410869 }, { "acc": 0.93414059, "epoch": 2.774754284785964, "grad_norm": 8.745979309082031, "learning_rate": 2.3539900828680674e-06, "loss": 0.36590543, "memory(GiB)": 34.88, "step": 102480, "train_speed(iter/s)": 0.41087 }, { "acc": 0.94519453, "epoch": 2.7748896650691792, "grad_norm": 6.678377151489258, "learning_rate": 2.3535153653662297e-06, "loss": 0.28495789, "memory(GiB)": 34.88, "step": 102485, "train_speed(iter/s)": 0.410871 }, { "acc": 0.93818493, "epoch": 2.775025045352395, "grad_norm": 23.0174617767334, "learning_rate": 2.3530406810230764e-06, "loss": 0.34788489, "memory(GiB)": 34.88, "step": 102490, "train_speed(iter/s)": 0.410872 }, { "acc": 0.93374376, "epoch": 2.7751604256356104, "grad_norm": 2.9896762371063232, "learning_rate": 2.3525660298445525e-06, "loss": 0.32523894, "memory(GiB)": 34.88, "step": 102495, "train_speed(iter/s)": 0.410874 }, { "acc": 0.93163185, "epoch": 2.7752958059188257, "grad_norm": 6.120491027832031, "learning_rate": 2.3520914118366074e-06, "loss": 0.40983067, "memory(GiB)": 34.88, "step": 102500, "train_speed(iter/s)": 0.410874 }, { "acc": 0.94154711, "epoch": 2.7754311862020415, "grad_norm": 4.269775390625, "learning_rate": 2.3516168270051832e-06, "loss": 0.31910017, "memory(GiB)": 34.88, "step": 102505, "train_speed(iter/s)": 0.410876 }, { "acc": 0.93710251, "epoch": 2.7755665664852573, "grad_norm": 5.737485885620117, "learning_rate": 2.351142275356222e-06, "loss": 0.38760052, "memory(GiB)": 34.88, "step": 102510, "train_speed(iter/s)": 0.410876 }, { "acc": 0.93837872, "epoch": 2.7757019467684727, "grad_norm": 8.102428436279297, "learning_rate": 2.350667756895677e-06, "loss": 0.36232576, "memory(GiB)": 34.88, "step": 102515, "train_speed(iter/s)": 0.410877 }, { "acc": 0.93653793, "epoch": 2.775837327051688, "grad_norm": 4.119956016540527, "learning_rate": 2.3501932716294855e-06, "loss": 0.40440183, "memory(GiB)": 34.88, "step": 102520, "train_speed(iter/s)": 0.410879 }, { "acc": 0.93110313, "epoch": 2.775972707334904, "grad_norm": 6.860453128814697, "learning_rate": 2.349718819563593e-06, "loss": 0.37383881, "memory(GiB)": 34.88, "step": 102525, "train_speed(iter/s)": 0.41088 }, { "acc": 0.94777803, "epoch": 2.7761080876181192, "grad_norm": 3.468201160430908, "learning_rate": 2.349244400703944e-06, "loss": 0.37367139, "memory(GiB)": 34.88, "step": 102530, "train_speed(iter/s)": 0.41088 }, { "acc": 0.91725311, "epoch": 2.7762434679013346, "grad_norm": 6.217010498046875, "learning_rate": 2.3487700150564827e-06, "loss": 0.43893795, "memory(GiB)": 34.88, "step": 102535, "train_speed(iter/s)": 0.410881 }, { "acc": 0.94335861, "epoch": 2.7763788481845504, "grad_norm": 8.49708366394043, "learning_rate": 2.348295662627151e-06, "loss": 0.31812768, "memory(GiB)": 34.88, "step": 102540, "train_speed(iter/s)": 0.410882 }, { "acc": 0.93463993, "epoch": 2.776514228467766, "grad_norm": 4.097367286682129, "learning_rate": 2.3478213434218857e-06, "loss": 0.38299749, "memory(GiB)": 34.88, "step": 102545, "train_speed(iter/s)": 0.410883 }, { "acc": 0.93599892, "epoch": 2.7766496087509815, "grad_norm": 6.85260534286499, "learning_rate": 2.3473470574466365e-06, "loss": 0.36323543, "memory(GiB)": 34.88, "step": 102550, "train_speed(iter/s)": 0.410885 }, { "acc": 0.92982702, "epoch": 2.776784989034197, "grad_norm": 12.306679725646973, "learning_rate": 2.34687280470734e-06, "loss": 0.35624537, "memory(GiB)": 34.88, "step": 102555, "train_speed(iter/s)": 0.410886 }, { "acc": 0.92922306, "epoch": 2.7769203693174127, "grad_norm": 16.92385482788086, "learning_rate": 2.3463985852099388e-06, "loss": 0.41118107, "memory(GiB)": 34.88, "step": 102560, "train_speed(iter/s)": 0.410887 }, { "acc": 0.93282099, "epoch": 2.777055749600628, "grad_norm": 5.398044109344482, "learning_rate": 2.3459243989603724e-06, "loss": 0.37755322, "memory(GiB)": 34.88, "step": 102565, "train_speed(iter/s)": 0.410888 }, { "acc": 0.94501095, "epoch": 2.777191129883844, "grad_norm": 8.291092872619629, "learning_rate": 2.345450245964584e-06, "loss": 0.28399899, "memory(GiB)": 34.88, "step": 102570, "train_speed(iter/s)": 0.410889 }, { "acc": 0.93476276, "epoch": 2.7773265101670592, "grad_norm": 23.06479263305664, "learning_rate": 2.3449761262285087e-06, "loss": 0.37121351, "memory(GiB)": 34.88, "step": 102575, "train_speed(iter/s)": 0.41089 }, { "acc": 0.93568954, "epoch": 2.777461890450275, "grad_norm": 5.779806613922119, "learning_rate": 2.344502039758088e-06, "loss": 0.3899416, "memory(GiB)": 34.88, "step": 102580, "train_speed(iter/s)": 0.410891 }, { "acc": 0.93452101, "epoch": 2.7775972707334904, "grad_norm": 2.7605020999908447, "learning_rate": 2.344027986559263e-06, "loss": 0.36153154, "memory(GiB)": 34.88, "step": 102585, "train_speed(iter/s)": 0.410892 }, { "acc": 0.93427906, "epoch": 2.7777326510167057, "grad_norm": 7.977004051208496, "learning_rate": 2.3435539666379674e-06, "loss": 0.36555438, "memory(GiB)": 34.88, "step": 102590, "train_speed(iter/s)": 0.410893 }, { "acc": 0.92782078, "epoch": 2.7778680312999215, "grad_norm": 10.622697830200195, "learning_rate": 2.3430799800001426e-06, "loss": 0.4176847, "memory(GiB)": 34.88, "step": 102595, "train_speed(iter/s)": 0.410894 }, { "acc": 0.9329278, "epoch": 2.778003411583137, "grad_norm": 8.622373580932617, "learning_rate": 2.342606026651725e-06, "loss": 0.37543945, "memory(GiB)": 34.88, "step": 102600, "train_speed(iter/s)": 0.410895 }, { "acc": 0.96114645, "epoch": 2.7781387918663527, "grad_norm": 6.44835090637207, "learning_rate": 2.3421321065986543e-06, "loss": 0.22138805, "memory(GiB)": 34.88, "step": 102605, "train_speed(iter/s)": 0.410897 }, { "acc": 0.93709593, "epoch": 2.778274172149568, "grad_norm": 13.208085060119629, "learning_rate": 2.341658219846863e-06, "loss": 0.3523385, "memory(GiB)": 34.88, "step": 102610, "train_speed(iter/s)": 0.410898 }, { "acc": 0.92187119, "epoch": 2.778409552432784, "grad_norm": 7.868519306182861, "learning_rate": 2.3411843664022894e-06, "loss": 0.51114836, "memory(GiB)": 34.88, "step": 102615, "train_speed(iter/s)": 0.410899 }, { "acc": 0.91177864, "epoch": 2.7785449327159992, "grad_norm": 10.189793586730957, "learning_rate": 2.34071054627087e-06, "loss": 0.62504711, "memory(GiB)": 34.88, "step": 102620, "train_speed(iter/s)": 0.4109 }, { "acc": 0.93596973, "epoch": 2.7786803129992146, "grad_norm": 10.970541954040527, "learning_rate": 2.3402367594585422e-06, "loss": 0.41501184, "memory(GiB)": 34.88, "step": 102625, "train_speed(iter/s)": 0.410901 }, { "acc": 0.94161148, "epoch": 2.7788156932824304, "grad_norm": 4.982310771942139, "learning_rate": 2.339763005971236e-06, "loss": 0.29184008, "memory(GiB)": 34.88, "step": 102630, "train_speed(iter/s)": 0.410902 }, { "acc": 0.93194237, "epoch": 2.7789510735656457, "grad_norm": 10.476902961730957, "learning_rate": 2.3392892858148895e-06, "loss": 0.3991941, "memory(GiB)": 34.88, "step": 102635, "train_speed(iter/s)": 0.410903 }, { "acc": 0.92421284, "epoch": 2.7790864538488615, "grad_norm": 5.695716381072998, "learning_rate": 2.338815598995438e-06, "loss": 0.45123959, "memory(GiB)": 34.88, "step": 102640, "train_speed(iter/s)": 0.410904 }, { "acc": 0.93595762, "epoch": 2.779221834132077, "grad_norm": 8.416764259338379, "learning_rate": 2.3383419455188125e-06, "loss": 0.36750584, "memory(GiB)": 34.88, "step": 102645, "train_speed(iter/s)": 0.410905 }, { "acc": 0.94262829, "epoch": 2.7793572144152927, "grad_norm": 6.8534345626831055, "learning_rate": 2.337868325390948e-06, "loss": 0.29639952, "memory(GiB)": 34.88, "step": 102650, "train_speed(iter/s)": 0.410907 }, { "acc": 0.93160744, "epoch": 2.779492594698508, "grad_norm": 5.4598565101623535, "learning_rate": 2.3373947386177767e-06, "loss": 0.38285263, "memory(GiB)": 34.88, "step": 102655, "train_speed(iter/s)": 0.410908 }, { "acc": 0.93829918, "epoch": 2.7796279749817234, "grad_norm": 8.819849014282227, "learning_rate": 2.3369211852052336e-06, "loss": 0.34358678, "memory(GiB)": 34.88, "step": 102660, "train_speed(iter/s)": 0.410909 }, { "acc": 0.93966627, "epoch": 2.7797633552649392, "grad_norm": 5.531440258026123, "learning_rate": 2.336447665159248e-06, "loss": 0.36189885, "memory(GiB)": 34.88, "step": 102665, "train_speed(iter/s)": 0.41091 }, { "acc": 0.92395115, "epoch": 2.779898735548155, "grad_norm": 11.715987205505371, "learning_rate": 2.335974178485752e-06, "loss": 0.4213439, "memory(GiB)": 34.88, "step": 102670, "train_speed(iter/s)": 0.410911 }, { "acc": 0.93251419, "epoch": 2.7800341158313704, "grad_norm": 19.751697540283203, "learning_rate": 2.3355007251906797e-06, "loss": 0.41217227, "memory(GiB)": 34.88, "step": 102675, "train_speed(iter/s)": 0.410912 }, { "acc": 0.93072643, "epoch": 2.7801694961145857, "grad_norm": 5.031949520111084, "learning_rate": 2.3350273052799587e-06, "loss": 0.37790833, "memory(GiB)": 34.88, "step": 102680, "train_speed(iter/s)": 0.410912 }, { "acc": 0.94760103, "epoch": 2.7803048763978016, "grad_norm": 6.857646942138672, "learning_rate": 2.33455391875952e-06, "loss": 0.3082284, "memory(GiB)": 34.88, "step": 102685, "train_speed(iter/s)": 0.410913 }, { "acc": 0.9457037, "epoch": 2.780440256681017, "grad_norm": 6.117770195007324, "learning_rate": 2.334080565635295e-06, "loss": 0.30033767, "memory(GiB)": 34.88, "step": 102690, "train_speed(iter/s)": 0.410915 }, { "acc": 0.93486328, "epoch": 2.7805756369642323, "grad_norm": 10.65040397644043, "learning_rate": 2.333607245913215e-06, "loss": 0.41518621, "memory(GiB)": 34.88, "step": 102695, "train_speed(iter/s)": 0.410915 }, { "acc": 0.93605804, "epoch": 2.780711017247448, "grad_norm": 4.499954700469971, "learning_rate": 2.3331339595992044e-06, "loss": 0.44202991, "memory(GiB)": 34.88, "step": 102700, "train_speed(iter/s)": 0.410916 }, { "acc": 0.93754759, "epoch": 2.780846397530664, "grad_norm": 8.734610557556152, "learning_rate": 2.332660706699196e-06, "loss": 0.28390422, "memory(GiB)": 34.88, "step": 102705, "train_speed(iter/s)": 0.410917 }, { "acc": 0.93890915, "epoch": 2.7809817778138792, "grad_norm": 3.560060739517212, "learning_rate": 2.332187487219118e-06, "loss": 0.31495082, "memory(GiB)": 34.88, "step": 102710, "train_speed(iter/s)": 0.410918 }, { "acc": 0.92916803, "epoch": 2.7811171580970946, "grad_norm": 5.887285232543945, "learning_rate": 2.331714301164896e-06, "loss": 0.39514875, "memory(GiB)": 34.88, "step": 102715, "train_speed(iter/s)": 0.410919 }, { "acc": 0.92685032, "epoch": 2.7812525383803104, "grad_norm": 7.770122051239014, "learning_rate": 2.331241148542459e-06, "loss": 0.41721087, "memory(GiB)": 34.88, "step": 102720, "train_speed(iter/s)": 0.41092 }, { "acc": 0.94920177, "epoch": 2.7813879186635257, "grad_norm": 6.4798688888549805, "learning_rate": 2.3307680293577344e-06, "loss": 0.26834388, "memory(GiB)": 34.88, "step": 102725, "train_speed(iter/s)": 0.410922 }, { "acc": 0.94505386, "epoch": 2.7815232989467416, "grad_norm": 7.461825847625732, "learning_rate": 2.3302949436166505e-06, "loss": 0.34083831, "memory(GiB)": 34.88, "step": 102730, "train_speed(iter/s)": 0.410923 }, { "acc": 0.92253914, "epoch": 2.781658679229957, "grad_norm": 8.134988784790039, "learning_rate": 2.32982189132513e-06, "loss": 0.42451367, "memory(GiB)": 34.88, "step": 102735, "train_speed(iter/s)": 0.410924 }, { "acc": 0.91788559, "epoch": 2.7817940595131727, "grad_norm": 7.773365497589111, "learning_rate": 2.3293488724891002e-06, "loss": 0.4851841, "memory(GiB)": 34.88, "step": 102740, "train_speed(iter/s)": 0.410925 }, { "acc": 0.94869814, "epoch": 2.781929439796388, "grad_norm": 4.515909194946289, "learning_rate": 2.3288758871144872e-06, "loss": 0.26396065, "memory(GiB)": 34.88, "step": 102745, "train_speed(iter/s)": 0.410926 }, { "acc": 0.9432539, "epoch": 2.7820648200796034, "grad_norm": 8.17917537689209, "learning_rate": 2.3284029352072183e-06, "loss": 0.32055564, "memory(GiB)": 34.88, "step": 102750, "train_speed(iter/s)": 0.410927 }, { "acc": 0.94777184, "epoch": 2.7822002003628192, "grad_norm": 9.5441312789917, "learning_rate": 2.327930016773214e-06, "loss": 0.33630841, "memory(GiB)": 34.88, "step": 102755, "train_speed(iter/s)": 0.410928 }, { "acc": 0.93157454, "epoch": 2.7823355806460346, "grad_norm": 4.76334810256958, "learning_rate": 2.3274571318183996e-06, "loss": 0.35977585, "memory(GiB)": 34.88, "step": 102760, "train_speed(iter/s)": 0.410929 }, { "acc": 0.93677349, "epoch": 2.7824709609292504, "grad_norm": 4.388449668884277, "learning_rate": 2.3269842803487024e-06, "loss": 0.38237009, "memory(GiB)": 34.88, "step": 102765, "train_speed(iter/s)": 0.41093 }, { "acc": 0.92847824, "epoch": 2.7826063412124658, "grad_norm": 3.402477502822876, "learning_rate": 2.3265114623700403e-06, "loss": 0.44244919, "memory(GiB)": 34.88, "step": 102770, "train_speed(iter/s)": 0.410931 }, { "acc": 0.93340321, "epoch": 2.7827417214956816, "grad_norm": 13.004941940307617, "learning_rate": 2.3260386778883396e-06, "loss": 0.3320581, "memory(GiB)": 34.88, "step": 102775, "train_speed(iter/s)": 0.410932 }, { "acc": 0.92339716, "epoch": 2.782877101778897, "grad_norm": 15.203946113586426, "learning_rate": 2.325565926909522e-06, "loss": 0.43197956, "memory(GiB)": 34.88, "step": 102780, "train_speed(iter/s)": 0.410933 }, { "acc": 0.94797525, "epoch": 2.7830124820621123, "grad_norm": 5.0980143547058105, "learning_rate": 2.325093209439512e-06, "loss": 0.30316579, "memory(GiB)": 34.88, "step": 102785, "train_speed(iter/s)": 0.410934 }, { "acc": 0.93019762, "epoch": 2.783147862345328, "grad_norm": 5.572633743286133, "learning_rate": 2.3246205254842275e-06, "loss": 0.38578937, "memory(GiB)": 34.88, "step": 102790, "train_speed(iter/s)": 0.410935 }, { "acc": 0.92165041, "epoch": 2.7832832426285434, "grad_norm": 5.760158538818359, "learning_rate": 2.324147875049591e-06, "loss": 0.43810921, "memory(GiB)": 34.88, "step": 102795, "train_speed(iter/s)": 0.410936 }, { "acc": 0.94154205, "epoch": 2.7834186229117592, "grad_norm": 6.1211628913879395, "learning_rate": 2.3236752581415258e-06, "loss": 0.37458699, "memory(GiB)": 34.88, "step": 102800, "train_speed(iter/s)": 0.410937 }, { "acc": 0.9258667, "epoch": 2.7835540031949746, "grad_norm": 20.506431579589844, "learning_rate": 2.3232026747659484e-06, "loss": 0.3683938, "memory(GiB)": 34.88, "step": 102805, "train_speed(iter/s)": 0.410938 }, { "acc": 0.94645481, "epoch": 2.7836893834781904, "grad_norm": 9.248235702514648, "learning_rate": 2.3227301249287816e-06, "loss": 0.3301512, "memory(GiB)": 34.88, "step": 102810, "train_speed(iter/s)": 0.41094 }, { "acc": 0.93106232, "epoch": 2.7838247637614058, "grad_norm": 9.606417655944824, "learning_rate": 2.322257608635944e-06, "loss": 0.3347199, "memory(GiB)": 34.88, "step": 102815, "train_speed(iter/s)": 0.410941 }, { "acc": 0.94081669, "epoch": 2.783960144044621, "grad_norm": 9.464576721191406, "learning_rate": 2.321785125893357e-06, "loss": 0.29826899, "memory(GiB)": 34.88, "step": 102820, "train_speed(iter/s)": 0.410942 }, { "acc": 0.94137192, "epoch": 2.784095524327837, "grad_norm": 10.331947326660156, "learning_rate": 2.3213126767069336e-06, "loss": 0.32448413, "memory(GiB)": 34.88, "step": 102825, "train_speed(iter/s)": 0.410943 }, { "acc": 0.93733301, "epoch": 2.7842309046110527, "grad_norm": 8.7271089553833, "learning_rate": 2.3208402610826e-06, "loss": 0.36216371, "memory(GiB)": 34.88, "step": 102830, "train_speed(iter/s)": 0.410944 }, { "acc": 0.94778328, "epoch": 2.784366284894268, "grad_norm": 6.0135321617126465, "learning_rate": 2.3203678790262694e-06, "loss": 0.32361684, "memory(GiB)": 34.88, "step": 102835, "train_speed(iter/s)": 0.410945 }, { "acc": 0.94164696, "epoch": 2.7845016651774834, "grad_norm": 7.742560386657715, "learning_rate": 2.3198955305438588e-06, "loss": 0.30343482, "memory(GiB)": 34.88, "step": 102840, "train_speed(iter/s)": 0.410946 }, { "acc": 0.94825497, "epoch": 2.7846370454606992, "grad_norm": 5.396263599395752, "learning_rate": 2.3194232156412863e-06, "loss": 0.27199442, "memory(GiB)": 34.88, "step": 102845, "train_speed(iter/s)": 0.410947 }, { "acc": 0.93559875, "epoch": 2.7847724257439146, "grad_norm": 21.699413299560547, "learning_rate": 2.318950934324469e-06, "loss": 0.37255712, "memory(GiB)": 34.88, "step": 102850, "train_speed(iter/s)": 0.410948 }, { "acc": 0.92896967, "epoch": 2.78490780602713, "grad_norm": 9.061453819274902, "learning_rate": 2.318478686599325e-06, "loss": 0.47353039, "memory(GiB)": 34.88, "step": 102855, "train_speed(iter/s)": 0.410949 }, { "acc": 0.94697819, "epoch": 2.7850431863103458, "grad_norm": 5.948360919952393, "learning_rate": 2.3180064724717637e-06, "loss": 0.35262156, "memory(GiB)": 34.88, "step": 102860, "train_speed(iter/s)": 0.41095 }, { "acc": 0.94755135, "epoch": 2.7851785665935616, "grad_norm": 2.402843713760376, "learning_rate": 2.31753429194771e-06, "loss": 0.27698417, "memory(GiB)": 34.88, "step": 102865, "train_speed(iter/s)": 0.410951 }, { "acc": 0.93825016, "epoch": 2.785313946876777, "grad_norm": 7.796205043792725, "learning_rate": 2.3170621450330707e-06, "loss": 0.34563591, "memory(GiB)": 34.88, "step": 102870, "train_speed(iter/s)": 0.410952 }, { "acc": 0.92273417, "epoch": 2.7854493271599923, "grad_norm": 9.91158390045166, "learning_rate": 2.3165900317337655e-06, "loss": 0.41316104, "memory(GiB)": 34.88, "step": 102875, "train_speed(iter/s)": 0.410953 }, { "acc": 0.94359131, "epoch": 2.785584707443208, "grad_norm": 6.810786247253418, "learning_rate": 2.316117952055705e-06, "loss": 0.2843225, "memory(GiB)": 34.88, "step": 102880, "train_speed(iter/s)": 0.410954 }, { "acc": 0.93744164, "epoch": 2.7857200877264234, "grad_norm": 6.2916579246521, "learning_rate": 2.315645906004804e-06, "loss": 0.33963037, "memory(GiB)": 34.88, "step": 102885, "train_speed(iter/s)": 0.410955 }, { "acc": 0.94335747, "epoch": 2.7858554680096392, "grad_norm": 8.466424942016602, "learning_rate": 2.315173893586979e-06, "loss": 0.32669287, "memory(GiB)": 34.88, "step": 102890, "train_speed(iter/s)": 0.410956 }, { "acc": 0.94005756, "epoch": 2.7859908482928546, "grad_norm": 9.233318328857422, "learning_rate": 2.314701914808136e-06, "loss": 0.35082493, "memory(GiB)": 34.88, "step": 102895, "train_speed(iter/s)": 0.410957 }, { "acc": 0.93986187, "epoch": 2.7861262285760704, "grad_norm": 5.213565349578857, "learning_rate": 2.314229969674195e-06, "loss": 0.34708855, "memory(GiB)": 34.88, "step": 102900, "train_speed(iter/s)": 0.410958 }, { "acc": 0.93195839, "epoch": 2.7862616088592858, "grad_norm": 3.479283094406128, "learning_rate": 2.3137580581910634e-06, "loss": 0.36182616, "memory(GiB)": 34.88, "step": 102905, "train_speed(iter/s)": 0.41096 }, { "acc": 0.9419652, "epoch": 2.786396989142501, "grad_norm": 9.09173583984375, "learning_rate": 2.3132861803646555e-06, "loss": 0.36416121, "memory(GiB)": 34.88, "step": 102910, "train_speed(iter/s)": 0.410961 }, { "acc": 0.93064718, "epoch": 2.786532369425717, "grad_norm": 9.046441078186035, "learning_rate": 2.312814336200877e-06, "loss": 0.37065027, "memory(GiB)": 34.88, "step": 102915, "train_speed(iter/s)": 0.410962 }, { "acc": 0.92480078, "epoch": 2.7866677497089323, "grad_norm": 9.786873817443848, "learning_rate": 2.312342525705647e-06, "loss": 0.47745447, "memory(GiB)": 34.88, "step": 102920, "train_speed(iter/s)": 0.410963 }, { "acc": 0.94854202, "epoch": 2.786803129992148, "grad_norm": 8.569191932678223, "learning_rate": 2.311870748884871e-06, "loss": 0.26607842, "memory(GiB)": 34.88, "step": 102925, "train_speed(iter/s)": 0.410964 }, { "acc": 0.93775234, "epoch": 2.7869385102753634, "grad_norm": 5.130374431610107, "learning_rate": 2.3113990057444586e-06, "loss": 0.30465107, "memory(GiB)": 34.88, "step": 102930, "train_speed(iter/s)": 0.410965 }, { "acc": 0.93052959, "epoch": 2.7870738905585792, "grad_norm": 6.046244144439697, "learning_rate": 2.3109272962903194e-06, "loss": 0.41629543, "memory(GiB)": 34.88, "step": 102935, "train_speed(iter/s)": 0.410966 }, { "acc": 0.93330498, "epoch": 2.7872092708417946, "grad_norm": 7.052031517028809, "learning_rate": 2.310455620528364e-06, "loss": 0.38691928, "memory(GiB)": 34.88, "step": 102940, "train_speed(iter/s)": 0.410967 }, { "acc": 0.93919001, "epoch": 2.78734465112501, "grad_norm": 17.0367488861084, "learning_rate": 2.309983978464502e-06, "loss": 0.36482935, "memory(GiB)": 34.88, "step": 102945, "train_speed(iter/s)": 0.410968 }, { "acc": 0.94873114, "epoch": 2.7874800314082258, "grad_norm": 3.9800684452056885, "learning_rate": 2.3095123701046355e-06, "loss": 0.23494086, "memory(GiB)": 34.88, "step": 102950, "train_speed(iter/s)": 0.410969 }, { "acc": 0.92807312, "epoch": 2.787615411691441, "grad_norm": 8.21523666381836, "learning_rate": 2.3090407954546815e-06, "loss": 0.32769465, "memory(GiB)": 34.88, "step": 102955, "train_speed(iter/s)": 0.410971 }, { "acc": 0.93907862, "epoch": 2.787750791974657, "grad_norm": 10.787236213684082, "learning_rate": 2.3085692545205423e-06, "loss": 0.3852936, "memory(GiB)": 34.88, "step": 102960, "train_speed(iter/s)": 0.410972 }, { "acc": 0.94161978, "epoch": 2.7878861722578723, "grad_norm": 14.618034362792969, "learning_rate": 2.308097747308124e-06, "loss": 0.33572357, "memory(GiB)": 34.88, "step": 102965, "train_speed(iter/s)": 0.410973 }, { "acc": 0.92822237, "epoch": 2.788021552541088, "grad_norm": 6.5578694343566895, "learning_rate": 2.307626273823334e-06, "loss": 0.44708467, "memory(GiB)": 34.88, "step": 102970, "train_speed(iter/s)": 0.410974 }, { "acc": 0.93648968, "epoch": 2.7881569328243034, "grad_norm": 11.54288387298584, "learning_rate": 2.3071548340720787e-06, "loss": 0.35640981, "memory(GiB)": 34.88, "step": 102975, "train_speed(iter/s)": 0.410975 }, { "acc": 0.92967672, "epoch": 2.788292313107519, "grad_norm": 50.515586853027344, "learning_rate": 2.3066834280602663e-06, "loss": 0.37880177, "memory(GiB)": 34.88, "step": 102980, "train_speed(iter/s)": 0.410976 }, { "acc": 0.93941936, "epoch": 2.7884276933907346, "grad_norm": 5.717289924621582, "learning_rate": 2.3062120557937963e-06, "loss": 0.37392585, "memory(GiB)": 34.88, "step": 102985, "train_speed(iter/s)": 0.410977 }, { "acc": 0.92339077, "epoch": 2.7885630736739504, "grad_norm": 8.321718215942383, "learning_rate": 2.3057407172785802e-06, "loss": 0.50268612, "memory(GiB)": 34.88, "step": 102990, "train_speed(iter/s)": 0.410978 }, { "acc": 0.92974577, "epoch": 2.7886984539571658, "grad_norm": 6.0861310958862305, "learning_rate": 2.3052694125205176e-06, "loss": 0.40661974, "memory(GiB)": 34.88, "step": 102995, "train_speed(iter/s)": 0.410979 }, { "acc": 0.93774214, "epoch": 2.788833834240381, "grad_norm": 11.745772361755371, "learning_rate": 2.304798141525516e-06, "loss": 0.39981272, "memory(GiB)": 34.88, "step": 103000, "train_speed(iter/s)": 0.41098 }, { "acc": 0.93114204, "epoch": 2.788969214523597, "grad_norm": 7.7369384765625, "learning_rate": 2.304326904299476e-06, "loss": 0.31849523, "memory(GiB)": 34.88, "step": 103005, "train_speed(iter/s)": 0.410981 }, { "acc": 0.94004822, "epoch": 2.7891045948068123, "grad_norm": 8.21523380279541, "learning_rate": 2.3038557008483013e-06, "loss": 0.39609413, "memory(GiB)": 34.88, "step": 103010, "train_speed(iter/s)": 0.410982 }, { "acc": 0.93339787, "epoch": 2.7892399750900276, "grad_norm": 7.291988849639893, "learning_rate": 2.303384531177897e-06, "loss": 0.34134734, "memory(GiB)": 34.88, "step": 103015, "train_speed(iter/s)": 0.410984 }, { "acc": 0.93970242, "epoch": 2.7893753553732434, "grad_norm": 5.940826892852783, "learning_rate": 2.3029133952941606e-06, "loss": 0.29991798, "memory(GiB)": 34.88, "step": 103020, "train_speed(iter/s)": 0.410985 }, { "acc": 0.94805374, "epoch": 2.7895107356564592, "grad_norm": 7.736087322235107, "learning_rate": 2.302442293203001e-06, "loss": 0.26626208, "memory(GiB)": 34.88, "step": 103025, "train_speed(iter/s)": 0.410985 }, { "acc": 0.94242554, "epoch": 2.7896461159396746, "grad_norm": 8.794815063476562, "learning_rate": 2.301971224910314e-06, "loss": 0.32033372, "memory(GiB)": 34.88, "step": 103030, "train_speed(iter/s)": 0.410986 }, { "acc": 0.94649916, "epoch": 2.78978149622289, "grad_norm": 6.753493309020996, "learning_rate": 2.3015001904220036e-06, "loss": 0.29456906, "memory(GiB)": 34.88, "step": 103035, "train_speed(iter/s)": 0.410987 }, { "acc": 0.94119396, "epoch": 2.7899168765061058, "grad_norm": 7.310902118682861, "learning_rate": 2.3010291897439664e-06, "loss": 0.33381941, "memory(GiB)": 34.88, "step": 103040, "train_speed(iter/s)": 0.410988 }, { "acc": 0.93572388, "epoch": 2.790052256789321, "grad_norm": 7.312131404876709, "learning_rate": 2.300558222882109e-06, "loss": 0.35634592, "memory(GiB)": 34.88, "step": 103045, "train_speed(iter/s)": 0.410989 }, { "acc": 0.93737211, "epoch": 2.790187637072537, "grad_norm": 26.562639236450195, "learning_rate": 2.300087289842328e-06, "loss": 0.33602695, "memory(GiB)": 34.88, "step": 103050, "train_speed(iter/s)": 0.41099 }, { "acc": 0.93105488, "epoch": 2.7903230173557523, "grad_norm": 8.425113677978516, "learning_rate": 2.299616390630518e-06, "loss": 0.33848577, "memory(GiB)": 34.88, "step": 103055, "train_speed(iter/s)": 0.410991 }, { "acc": 0.94281063, "epoch": 2.790458397638968, "grad_norm": 8.11621379852295, "learning_rate": 2.299145525252587e-06, "loss": 0.32690637, "memory(GiB)": 34.88, "step": 103060, "train_speed(iter/s)": 0.410993 }, { "acc": 0.9371953, "epoch": 2.7905937779221834, "grad_norm": 10.474522590637207, "learning_rate": 2.298674693714427e-06, "loss": 0.4260397, "memory(GiB)": 34.88, "step": 103065, "train_speed(iter/s)": 0.410994 }, { "acc": 0.94499855, "epoch": 2.790729158205399, "grad_norm": 7.625406742095947, "learning_rate": 2.2982038960219394e-06, "loss": 0.29901705, "memory(GiB)": 34.88, "step": 103070, "train_speed(iter/s)": 0.410994 }, { "acc": 0.93476028, "epoch": 2.7908645384886146, "grad_norm": 6.063112735748291, "learning_rate": 2.297733132181017e-06, "loss": 0.41244226, "memory(GiB)": 34.88, "step": 103075, "train_speed(iter/s)": 0.410996 }, { "acc": 0.93783932, "epoch": 2.79099991877183, "grad_norm": 4.316243648529053, "learning_rate": 2.2972624021975647e-06, "loss": 0.34249535, "memory(GiB)": 34.88, "step": 103080, "train_speed(iter/s)": 0.410996 }, { "acc": 0.94121532, "epoch": 2.7911352990550458, "grad_norm": 8.205180168151855, "learning_rate": 2.2967917060774747e-06, "loss": 0.32017872, "memory(GiB)": 34.88, "step": 103085, "train_speed(iter/s)": 0.410997 }, { "acc": 0.94892693, "epoch": 2.791270679338261, "grad_norm": 6.855834484100342, "learning_rate": 2.2963210438266403e-06, "loss": 0.25373974, "memory(GiB)": 34.88, "step": 103090, "train_speed(iter/s)": 0.410999 }, { "acc": 0.92595997, "epoch": 2.791406059621477, "grad_norm": 9.810406684875488, "learning_rate": 2.2958504154509647e-06, "loss": 0.39003675, "memory(GiB)": 34.88, "step": 103095, "train_speed(iter/s)": 0.411 }, { "acc": 0.94771805, "epoch": 2.7915414399046923, "grad_norm": 6.2462849617004395, "learning_rate": 2.2953798209563376e-06, "loss": 0.28731267, "memory(GiB)": 34.88, "step": 103100, "train_speed(iter/s)": 0.411001 }, { "acc": 0.9532383, "epoch": 2.7916768201879076, "grad_norm": 6.52974796295166, "learning_rate": 2.2949092603486586e-06, "loss": 0.23120234, "memory(GiB)": 34.88, "step": 103105, "train_speed(iter/s)": 0.411002 }, { "acc": 0.94007826, "epoch": 2.7918122004711234, "grad_norm": 5.266971588134766, "learning_rate": 2.294438733633816e-06, "loss": 0.30464666, "memory(GiB)": 34.88, "step": 103110, "train_speed(iter/s)": 0.411003 }, { "acc": 0.93716879, "epoch": 2.791947580754339, "grad_norm": 6.278138160705566, "learning_rate": 2.2939682408177124e-06, "loss": 0.36927123, "memory(GiB)": 34.88, "step": 103115, "train_speed(iter/s)": 0.411004 }, { "acc": 0.95101652, "epoch": 2.7920829610375546, "grad_norm": 4.273050308227539, "learning_rate": 2.293497781906235e-06, "loss": 0.29749012, "memory(GiB)": 34.88, "step": 103120, "train_speed(iter/s)": 0.411005 }, { "acc": 0.92253504, "epoch": 2.79221834132077, "grad_norm": 7.837904930114746, "learning_rate": 2.2930273569052796e-06, "loss": 0.52747345, "memory(GiB)": 34.88, "step": 103125, "train_speed(iter/s)": 0.411006 }, { "acc": 0.95006618, "epoch": 2.7923537216039858, "grad_norm": 2.9433374404907227, "learning_rate": 2.292556965820742e-06, "loss": 0.29117427, "memory(GiB)": 34.88, "step": 103130, "train_speed(iter/s)": 0.411007 }, { "acc": 0.94581003, "epoch": 2.792489101887201, "grad_norm": 8.143152236938477, "learning_rate": 2.292086608658509e-06, "loss": 0.36611624, "memory(GiB)": 34.88, "step": 103135, "train_speed(iter/s)": 0.411009 }, { "acc": 0.94095821, "epoch": 2.7926244821704165, "grad_norm": 4.276567459106445, "learning_rate": 2.291616285424478e-06, "loss": 0.33709576, "memory(GiB)": 34.88, "step": 103140, "train_speed(iter/s)": 0.41101 }, { "acc": 0.93114061, "epoch": 2.7927598624536323, "grad_norm": 4.889104843139648, "learning_rate": 2.291145996124535e-06, "loss": 0.39506216, "memory(GiB)": 34.88, "step": 103145, "train_speed(iter/s)": 0.411011 }, { "acc": 0.93145847, "epoch": 2.792895242736848, "grad_norm": 17.88957977294922, "learning_rate": 2.290675740764579e-06, "loss": 0.3732522, "memory(GiB)": 34.88, "step": 103150, "train_speed(iter/s)": 0.411012 }, { "acc": 0.932026, "epoch": 2.7930306230200634, "grad_norm": 8.636234283447266, "learning_rate": 2.2902055193504937e-06, "loss": 0.3988133, "memory(GiB)": 34.88, "step": 103155, "train_speed(iter/s)": 0.411013 }, { "acc": 0.92984543, "epoch": 2.793166003303279, "grad_norm": 8.452799797058105, "learning_rate": 2.2897353318881727e-06, "loss": 0.42122998, "memory(GiB)": 34.88, "step": 103160, "train_speed(iter/s)": 0.411014 }, { "acc": 0.9356987, "epoch": 2.7933013835864946, "grad_norm": 6.252155303955078, "learning_rate": 2.2892651783835064e-06, "loss": 0.41164846, "memory(GiB)": 34.88, "step": 103165, "train_speed(iter/s)": 0.411015 }, { "acc": 0.9215167, "epoch": 2.79343676386971, "grad_norm": 8.584525108337402, "learning_rate": 2.2887950588423854e-06, "loss": 0.4090724, "memory(GiB)": 34.88, "step": 103170, "train_speed(iter/s)": 0.411016 }, { "acc": 0.93000717, "epoch": 2.7935721441529253, "grad_norm": 9.481900215148926, "learning_rate": 2.2883249732706974e-06, "loss": 0.39956918, "memory(GiB)": 34.88, "step": 103175, "train_speed(iter/s)": 0.411017 }, { "acc": 0.94604759, "epoch": 2.793707524436141, "grad_norm": 4.949584484100342, "learning_rate": 2.2878549216743267e-06, "loss": 0.31716318, "memory(GiB)": 34.88, "step": 103180, "train_speed(iter/s)": 0.411018 }, { "acc": 0.93153229, "epoch": 2.793842904719357, "grad_norm": 6.573482513427734, "learning_rate": 2.28738490405917e-06, "loss": 0.37746203, "memory(GiB)": 34.88, "step": 103185, "train_speed(iter/s)": 0.411019 }, { "acc": 0.92871723, "epoch": 2.7939782850025723, "grad_norm": 14.174308776855469, "learning_rate": 2.2869149204311093e-06, "loss": 0.43273268, "memory(GiB)": 34.88, "step": 103190, "train_speed(iter/s)": 0.41102 }, { "acc": 0.95553646, "epoch": 2.7941136652857876, "grad_norm": 4.063516616821289, "learning_rate": 2.286444970796034e-06, "loss": 0.24825931, "memory(GiB)": 34.88, "step": 103195, "train_speed(iter/s)": 0.411021 }, { "acc": 0.93846169, "epoch": 2.7942490455690034, "grad_norm": 3.849991798400879, "learning_rate": 2.2859750551598314e-06, "loss": 0.34888206, "memory(GiB)": 34.88, "step": 103200, "train_speed(iter/s)": 0.411022 }, { "acc": 0.9288949, "epoch": 2.794384425852219, "grad_norm": 6.971681594848633, "learning_rate": 2.285505173528389e-06, "loss": 0.41988049, "memory(GiB)": 34.88, "step": 103205, "train_speed(iter/s)": 0.411023 }, { "acc": 0.93797665, "epoch": 2.794519806135434, "grad_norm": 11.019472122192383, "learning_rate": 2.285035325907592e-06, "loss": 0.37505844, "memory(GiB)": 34.88, "step": 103210, "train_speed(iter/s)": 0.411024 }, { "acc": 0.94204388, "epoch": 2.79465518641865, "grad_norm": 9.971468925476074, "learning_rate": 2.284565512303322e-06, "loss": 0.28693514, "memory(GiB)": 34.88, "step": 103215, "train_speed(iter/s)": 0.411026 }, { "acc": 0.93585911, "epoch": 2.7947905667018658, "grad_norm": 3.786271095275879, "learning_rate": 2.2840957327214726e-06, "loss": 0.36615686, "memory(GiB)": 34.88, "step": 103220, "train_speed(iter/s)": 0.411027 }, { "acc": 0.94170036, "epoch": 2.794925946985081, "grad_norm": 14.677559852600098, "learning_rate": 2.2836259871679217e-06, "loss": 0.35251465, "memory(GiB)": 34.88, "step": 103225, "train_speed(iter/s)": 0.411028 }, { "acc": 0.95073719, "epoch": 2.7950613272682965, "grad_norm": 6.6350417137146, "learning_rate": 2.283156275648557e-06, "loss": 0.26228323, "memory(GiB)": 34.88, "step": 103230, "train_speed(iter/s)": 0.411029 }, { "acc": 0.91838741, "epoch": 2.7951967075515123, "grad_norm": 10.60411548614502, "learning_rate": 2.2826865981692614e-06, "loss": 0.50567122, "memory(GiB)": 34.88, "step": 103235, "train_speed(iter/s)": 0.41103 }, { "acc": 0.92442579, "epoch": 2.7953320878347276, "grad_norm": 5.130860805511475, "learning_rate": 2.282216954735922e-06, "loss": 0.43138781, "memory(GiB)": 34.88, "step": 103240, "train_speed(iter/s)": 0.411031 }, { "acc": 0.93315849, "epoch": 2.7954674681179434, "grad_norm": 5.800204753875732, "learning_rate": 2.2817473453544166e-06, "loss": 0.3386462, "memory(GiB)": 34.88, "step": 103245, "train_speed(iter/s)": 0.411032 }, { "acc": 0.92422123, "epoch": 2.795602848401159, "grad_norm": 13.199762344360352, "learning_rate": 2.2812777700306305e-06, "loss": 0.3696908, "memory(GiB)": 34.88, "step": 103250, "train_speed(iter/s)": 0.411033 }, { "acc": 0.9446044, "epoch": 2.7957382286843746, "grad_norm": 9.409832954406738, "learning_rate": 2.280808228770448e-06, "loss": 0.35951648, "memory(GiB)": 34.88, "step": 103255, "train_speed(iter/s)": 0.411034 }, { "acc": 0.94583645, "epoch": 2.79587360896759, "grad_norm": 8.159979820251465, "learning_rate": 2.2803387215797472e-06, "loss": 0.27936361, "memory(GiB)": 34.88, "step": 103260, "train_speed(iter/s)": 0.411036 }, { "acc": 0.94581203, "epoch": 2.7960089892508053, "grad_norm": 13.102164268493652, "learning_rate": 2.279869248464412e-06, "loss": 0.30458684, "memory(GiB)": 34.88, "step": 103265, "train_speed(iter/s)": 0.411036 }, { "acc": 0.94585228, "epoch": 2.796144369534021, "grad_norm": 6.162764072418213, "learning_rate": 2.2793998094303226e-06, "loss": 0.31602051, "memory(GiB)": 34.88, "step": 103270, "train_speed(iter/s)": 0.411037 }, { "acc": 0.93482456, "epoch": 2.7962797498172365, "grad_norm": 11.136195182800293, "learning_rate": 2.278930404483362e-06, "loss": 0.37493572, "memory(GiB)": 34.88, "step": 103275, "train_speed(iter/s)": 0.411039 }, { "acc": 0.92752171, "epoch": 2.7964151301004523, "grad_norm": 12.289846420288086, "learning_rate": 2.2784610336294068e-06, "loss": 0.365239, "memory(GiB)": 34.88, "step": 103280, "train_speed(iter/s)": 0.41104 }, { "acc": 0.95793705, "epoch": 2.7965505103836676, "grad_norm": 4.394724369049072, "learning_rate": 2.2779916968743383e-06, "loss": 0.24874144, "memory(GiB)": 34.88, "step": 103285, "train_speed(iter/s)": 0.411041 }, { "acc": 0.92689857, "epoch": 2.7966858906668834, "grad_norm": 9.053071022033691, "learning_rate": 2.277522394224036e-06, "loss": 0.4365942, "memory(GiB)": 34.88, "step": 103290, "train_speed(iter/s)": 0.411042 }, { "acc": 0.94050503, "epoch": 2.796821270950099, "grad_norm": 6.114715576171875, "learning_rate": 2.2770531256843805e-06, "loss": 0.32570596, "memory(GiB)": 34.88, "step": 103295, "train_speed(iter/s)": 0.411043 }, { "acc": 0.94436455, "epoch": 2.796956651233314, "grad_norm": 11.376998901367188, "learning_rate": 2.276583891261249e-06, "loss": 0.29644985, "memory(GiB)": 34.88, "step": 103300, "train_speed(iter/s)": 0.411044 }, { "acc": 0.93107395, "epoch": 2.79709203151653, "grad_norm": 9.613619804382324, "learning_rate": 2.2761146909605153e-06, "loss": 0.42011099, "memory(GiB)": 34.88, "step": 103305, "train_speed(iter/s)": 0.411045 }, { "acc": 0.94016628, "epoch": 2.7972274117997453, "grad_norm": 3.8927536010742188, "learning_rate": 2.2756455247880645e-06, "loss": 0.36570818, "memory(GiB)": 34.88, "step": 103310, "train_speed(iter/s)": 0.411046 }, { "acc": 0.9426218, "epoch": 2.797362792082961, "grad_norm": 14.292463302612305, "learning_rate": 2.275176392749769e-06, "loss": 0.31260681, "memory(GiB)": 34.88, "step": 103315, "train_speed(iter/s)": 0.411047 }, { "acc": 0.9305584, "epoch": 2.7974981723661765, "grad_norm": 5.974065780639648, "learning_rate": 2.2747072948515065e-06, "loss": 0.36789532, "memory(GiB)": 34.88, "step": 103320, "train_speed(iter/s)": 0.411048 }, { "acc": 0.92831812, "epoch": 2.7976335526493923, "grad_norm": 6.7108540534973145, "learning_rate": 2.2742382310991546e-06, "loss": 0.42684112, "memory(GiB)": 34.88, "step": 103325, "train_speed(iter/s)": 0.411049 }, { "acc": 0.93410397, "epoch": 2.7977689329326076, "grad_norm": 6.226470470428467, "learning_rate": 2.273769201498589e-06, "loss": 0.42317648, "memory(GiB)": 34.88, "step": 103330, "train_speed(iter/s)": 0.41105 }, { "acc": 0.95272989, "epoch": 2.797904313215823, "grad_norm": 5.1867547035217285, "learning_rate": 2.2733002060556857e-06, "loss": 0.25109096, "memory(GiB)": 34.88, "step": 103335, "train_speed(iter/s)": 0.411051 }, { "acc": 0.94141808, "epoch": 2.798039693499039, "grad_norm": 5.7878737449646, "learning_rate": 2.272831244776315e-06, "loss": 0.35534184, "memory(GiB)": 34.88, "step": 103340, "train_speed(iter/s)": 0.411052 }, { "acc": 0.93141289, "epoch": 2.7981750737822546, "grad_norm": 3.7439351081848145, "learning_rate": 2.2723623176663587e-06, "loss": 0.34695113, "memory(GiB)": 34.88, "step": 103345, "train_speed(iter/s)": 0.411053 }, { "acc": 0.92685194, "epoch": 2.79831045406547, "grad_norm": 7.585517406463623, "learning_rate": 2.2718934247316858e-06, "loss": 0.39005132, "memory(GiB)": 34.88, "step": 103350, "train_speed(iter/s)": 0.411054 }, { "acc": 0.94642935, "epoch": 2.7984458343486853, "grad_norm": 4.878058910369873, "learning_rate": 2.271424565978172e-06, "loss": 0.29329243, "memory(GiB)": 34.88, "step": 103355, "train_speed(iter/s)": 0.411056 }, { "acc": 0.94478092, "epoch": 2.798581214631901, "grad_norm": 7.023803234100342, "learning_rate": 2.270955741411691e-06, "loss": 0.29984238, "memory(GiB)": 34.88, "step": 103360, "train_speed(iter/s)": 0.411057 }, { "acc": 0.9235445, "epoch": 2.7987165949151165, "grad_norm": 8.957913398742676, "learning_rate": 2.270486951038117e-06, "loss": 0.41787262, "memory(GiB)": 34.88, "step": 103365, "train_speed(iter/s)": 0.411058 }, { "acc": 0.92809696, "epoch": 2.798851975198332, "grad_norm": 43.202537536621094, "learning_rate": 2.270018194863319e-06, "loss": 0.41988726, "memory(GiB)": 34.88, "step": 103370, "train_speed(iter/s)": 0.411059 }, { "acc": 0.91922064, "epoch": 2.7989873554815476, "grad_norm": 7.11845588684082, "learning_rate": 2.2695494728931703e-06, "loss": 0.43262486, "memory(GiB)": 34.88, "step": 103375, "train_speed(iter/s)": 0.41106 }, { "acc": 0.9438406, "epoch": 2.7991227357647634, "grad_norm": 5.600348949432373, "learning_rate": 2.2690807851335463e-06, "loss": 0.27596292, "memory(GiB)": 34.88, "step": 103380, "train_speed(iter/s)": 0.411061 }, { "acc": 0.91072168, "epoch": 2.799258116047979, "grad_norm": 13.911930084228516, "learning_rate": 2.2686121315903128e-06, "loss": 0.5916256, "memory(GiB)": 34.88, "step": 103385, "train_speed(iter/s)": 0.411062 }, { "acc": 0.934233, "epoch": 2.799393496331194, "grad_norm": 6.505385398864746, "learning_rate": 2.2681435122693427e-06, "loss": 0.34506798, "memory(GiB)": 34.88, "step": 103390, "train_speed(iter/s)": 0.411063 }, { "acc": 0.93758507, "epoch": 2.79952887661441, "grad_norm": 6.272921085357666, "learning_rate": 2.267674927176507e-06, "loss": 0.39673142, "memory(GiB)": 34.88, "step": 103395, "train_speed(iter/s)": 0.411064 }, { "acc": 0.93934183, "epoch": 2.7996642568976253, "grad_norm": 6.356886863708496, "learning_rate": 2.267206376317677e-06, "loss": 0.35961974, "memory(GiB)": 34.88, "step": 103400, "train_speed(iter/s)": 0.411066 }, { "acc": 0.92898407, "epoch": 2.799799637180841, "grad_norm": 4.084817409515381, "learning_rate": 2.266737859698719e-06, "loss": 0.44714675, "memory(GiB)": 34.88, "step": 103405, "train_speed(iter/s)": 0.411067 }, { "acc": 0.9434639, "epoch": 2.7999350174640565, "grad_norm": 7.964322090148926, "learning_rate": 2.2662693773255033e-06, "loss": 0.32390003, "memory(GiB)": 34.88, "step": 103410, "train_speed(iter/s)": 0.411068 }, { "acc": 0.94541407, "epoch": 2.8000703977472723, "grad_norm": 17.271671295166016, "learning_rate": 2.265800929203899e-06, "loss": 0.34615693, "memory(GiB)": 34.88, "step": 103415, "train_speed(iter/s)": 0.411069 }, { "acc": 0.94884415, "epoch": 2.8002057780304876, "grad_norm": 9.349556922912598, "learning_rate": 2.265332515339776e-06, "loss": 0.28847556, "memory(GiB)": 34.88, "step": 103420, "train_speed(iter/s)": 0.41107 }, { "acc": 0.93354692, "epoch": 2.800341158313703, "grad_norm": 15.944061279296875, "learning_rate": 2.264864135738999e-06, "loss": 0.35660567, "memory(GiB)": 34.88, "step": 103425, "train_speed(iter/s)": 0.411071 }, { "acc": 0.92924833, "epoch": 2.800476538596919, "grad_norm": 13.443134307861328, "learning_rate": 2.264395790407436e-06, "loss": 0.36730549, "memory(GiB)": 34.88, "step": 103430, "train_speed(iter/s)": 0.411072 }, { "acc": 0.94636612, "epoch": 2.800611918880134, "grad_norm": 8.325264930725098, "learning_rate": 2.2639274793509574e-06, "loss": 0.30902612, "memory(GiB)": 34.88, "step": 103435, "train_speed(iter/s)": 0.411073 }, { "acc": 0.93955116, "epoch": 2.80074729916335, "grad_norm": 6.8336005210876465, "learning_rate": 2.2634592025754242e-06, "loss": 0.3906584, "memory(GiB)": 34.88, "step": 103440, "train_speed(iter/s)": 0.411074 }, { "acc": 0.94169159, "epoch": 2.8008826794465653, "grad_norm": 5.775781631469727, "learning_rate": 2.2629909600867057e-06, "loss": 0.32564137, "memory(GiB)": 34.88, "step": 103445, "train_speed(iter/s)": 0.411075 }, { "acc": 0.94652796, "epoch": 2.801018059729781, "grad_norm": 3.323627471923828, "learning_rate": 2.2625227518906674e-06, "loss": 0.2773793, "memory(GiB)": 34.88, "step": 103450, "train_speed(iter/s)": 0.411076 }, { "acc": 0.94915085, "epoch": 2.8011534400129965, "grad_norm": 11.010881423950195, "learning_rate": 2.2620545779931756e-06, "loss": 0.29826646, "memory(GiB)": 34.88, "step": 103455, "train_speed(iter/s)": 0.411077 }, { "acc": 0.94158258, "epoch": 2.801288820296212, "grad_norm": 11.977777481079102, "learning_rate": 2.261586438400092e-06, "loss": 0.37857618, "memory(GiB)": 34.88, "step": 103460, "train_speed(iter/s)": 0.411078 }, { "acc": 0.93566084, "epoch": 2.8014242005794276, "grad_norm": 6.536961078643799, "learning_rate": 2.2611183331172833e-06, "loss": 0.33741932, "memory(GiB)": 34.88, "step": 103465, "train_speed(iter/s)": 0.411079 }, { "acc": 0.93320313, "epoch": 2.801559580862643, "grad_norm": 4.300828456878662, "learning_rate": 2.260650262150614e-06, "loss": 0.29206891, "memory(GiB)": 34.88, "step": 103470, "train_speed(iter/s)": 0.41108 }, { "acc": 0.9348402, "epoch": 2.801694961145859, "grad_norm": 10.051736831665039, "learning_rate": 2.260182225505945e-06, "loss": 0.3642837, "memory(GiB)": 34.88, "step": 103475, "train_speed(iter/s)": 0.411081 }, { "acc": 0.95301132, "epoch": 2.801830341429074, "grad_norm": 6.87739896774292, "learning_rate": 2.2597142231891405e-06, "loss": 0.26666994, "memory(GiB)": 34.88, "step": 103480, "train_speed(iter/s)": 0.411083 }, { "acc": 0.93092699, "epoch": 2.80196572171229, "grad_norm": 6.104390621185303, "learning_rate": 2.2592462552060637e-06, "loss": 0.38110259, "memory(GiB)": 34.88, "step": 103485, "train_speed(iter/s)": 0.411084 }, { "acc": 0.93760061, "epoch": 2.8021011019955053, "grad_norm": 8.115920066833496, "learning_rate": 2.258778321562578e-06, "loss": 0.3609303, "memory(GiB)": 34.88, "step": 103490, "train_speed(iter/s)": 0.411085 }, { "acc": 0.93550177, "epoch": 2.8022364822787207, "grad_norm": 10.218246459960938, "learning_rate": 2.2583104222645423e-06, "loss": 0.38597744, "memory(GiB)": 34.88, "step": 103495, "train_speed(iter/s)": 0.411086 }, { "acc": 0.91986303, "epoch": 2.8023718625619365, "grad_norm": 9.691248893737793, "learning_rate": 2.25784255731782e-06, "loss": 0.46176124, "memory(GiB)": 34.88, "step": 103500, "train_speed(iter/s)": 0.411087 }, { "acc": 0.93379536, "epoch": 2.8025072428451523, "grad_norm": 27.613704681396484, "learning_rate": 2.2573747267282728e-06, "loss": 0.42484298, "memory(GiB)": 34.88, "step": 103505, "train_speed(iter/s)": 0.411088 }, { "acc": 0.92843599, "epoch": 2.8026426231283676, "grad_norm": 4.266580581665039, "learning_rate": 2.256906930501758e-06, "loss": 0.42537045, "memory(GiB)": 34.88, "step": 103510, "train_speed(iter/s)": 0.411089 }, { "acc": 0.94321413, "epoch": 2.802778003411583, "grad_norm": 13.264540672302246, "learning_rate": 2.256439168644137e-06, "loss": 0.33813353, "memory(GiB)": 34.88, "step": 103515, "train_speed(iter/s)": 0.41109 }, { "acc": 0.94268064, "epoch": 2.802913383694799, "grad_norm": 5.415980339050293, "learning_rate": 2.2559714411612714e-06, "loss": 0.29849818, "memory(GiB)": 34.88, "step": 103520, "train_speed(iter/s)": 0.411091 }, { "acc": 0.93511829, "epoch": 2.803048763978014, "grad_norm": 12.086769104003906, "learning_rate": 2.25550374805902e-06, "loss": 0.3937439, "memory(GiB)": 34.88, "step": 103525, "train_speed(iter/s)": 0.411092 }, { "acc": 0.93081341, "epoch": 2.8031841442612295, "grad_norm": 9.468053817749023, "learning_rate": 2.2550360893432385e-06, "loss": 0.33968768, "memory(GiB)": 34.88, "step": 103530, "train_speed(iter/s)": 0.411093 }, { "acc": 0.94010563, "epoch": 2.8033195245444453, "grad_norm": 9.452934265136719, "learning_rate": 2.2545684650197874e-06, "loss": 0.33816714, "memory(GiB)": 34.88, "step": 103535, "train_speed(iter/s)": 0.411094 }, { "acc": 0.95789385, "epoch": 2.803454904827661, "grad_norm": 9.58847427368164, "learning_rate": 2.2541008750945247e-06, "loss": 0.24745855, "memory(GiB)": 34.88, "step": 103540, "train_speed(iter/s)": 0.411095 }, { "acc": 0.94201736, "epoch": 2.8035902851108765, "grad_norm": 7.322767734527588, "learning_rate": 2.2536333195733098e-06, "loss": 0.31023493, "memory(GiB)": 34.88, "step": 103545, "train_speed(iter/s)": 0.411096 }, { "acc": 0.92906437, "epoch": 2.803725665394092, "grad_norm": 6.300626754760742, "learning_rate": 2.253165798461995e-06, "loss": 0.42708788, "memory(GiB)": 34.88, "step": 103550, "train_speed(iter/s)": 0.411098 }, { "acc": 0.93751402, "epoch": 2.8038610456773077, "grad_norm": 9.331050872802734, "learning_rate": 2.2526983117664407e-06, "loss": 0.32168984, "memory(GiB)": 34.88, "step": 103555, "train_speed(iter/s)": 0.411099 }, { "acc": 0.93456516, "epoch": 2.803996425960523, "grad_norm": 5.537704944610596, "learning_rate": 2.252230859492503e-06, "loss": 0.4046319, "memory(GiB)": 34.88, "step": 103560, "train_speed(iter/s)": 0.4111 }, { "acc": 0.93327551, "epoch": 2.804131806243739, "grad_norm": 3.035921096801758, "learning_rate": 2.251763441646035e-06, "loss": 0.44296899, "memory(GiB)": 34.88, "step": 103565, "train_speed(iter/s)": 0.411101 }, { "acc": 0.93398361, "epoch": 2.804267186526954, "grad_norm": 11.05002498626709, "learning_rate": 2.2512960582328932e-06, "loss": 0.3618845, "memory(GiB)": 34.88, "step": 103570, "train_speed(iter/s)": 0.411102 }, { "acc": 0.93729897, "epoch": 2.80440256681017, "grad_norm": 9.496392250061035, "learning_rate": 2.2508287092589337e-06, "loss": 0.36561735, "memory(GiB)": 34.88, "step": 103575, "train_speed(iter/s)": 0.411103 }, { "acc": 0.93220053, "epoch": 2.8045379470933853, "grad_norm": 12.399372100830078, "learning_rate": 2.250361394730012e-06, "loss": 0.42720656, "memory(GiB)": 34.88, "step": 103580, "train_speed(iter/s)": 0.411104 }, { "acc": 0.94055128, "epoch": 2.8046733273766007, "grad_norm": 4.070083141326904, "learning_rate": 2.2498941146519764e-06, "loss": 0.30588059, "memory(GiB)": 34.88, "step": 103585, "train_speed(iter/s)": 0.411105 }, { "acc": 0.92929602, "epoch": 2.8048087076598165, "grad_norm": 7.6851487159729, "learning_rate": 2.249426869030688e-06, "loss": 0.38695283, "memory(GiB)": 34.88, "step": 103590, "train_speed(iter/s)": 0.411106 }, { "acc": 0.92723103, "epoch": 2.804944087943032, "grad_norm": 12.610509872436523, "learning_rate": 2.2489596578719968e-06, "loss": 0.41820803, "memory(GiB)": 34.88, "step": 103595, "train_speed(iter/s)": 0.411107 }, { "acc": 0.94009867, "epoch": 2.8050794682262477, "grad_norm": 16.849693298339844, "learning_rate": 2.248492481181753e-06, "loss": 0.41459789, "memory(GiB)": 34.88, "step": 103600, "train_speed(iter/s)": 0.411108 }, { "acc": 0.92280369, "epoch": 2.805214848509463, "grad_norm": 14.047196388244629, "learning_rate": 2.2480253389658112e-06, "loss": 0.44188347, "memory(GiB)": 34.88, "step": 103605, "train_speed(iter/s)": 0.411109 }, { "acc": 0.93312683, "epoch": 2.805350228792679, "grad_norm": 17.729904174804688, "learning_rate": 2.2475582312300225e-06, "loss": 0.38032703, "memory(GiB)": 34.88, "step": 103610, "train_speed(iter/s)": 0.41111 }, { "acc": 0.93382092, "epoch": 2.805485609075894, "grad_norm": 4.57205057144165, "learning_rate": 2.2470911579802416e-06, "loss": 0.3802207, "memory(GiB)": 34.88, "step": 103615, "train_speed(iter/s)": 0.411111 }, { "acc": 0.93462543, "epoch": 2.8056209893591095, "grad_norm": 4.549519062042236, "learning_rate": 2.246624119222313e-06, "loss": 0.33267875, "memory(GiB)": 34.88, "step": 103620, "train_speed(iter/s)": 0.411112 }, { "acc": 0.92642918, "epoch": 2.8057563696423253, "grad_norm": 8.244991302490234, "learning_rate": 2.2461571149620946e-06, "loss": 0.49318018, "memory(GiB)": 34.88, "step": 103625, "train_speed(iter/s)": 0.411113 }, { "acc": 0.93934698, "epoch": 2.8058917499255407, "grad_norm": 8.6618013381958, "learning_rate": 2.2456901452054335e-06, "loss": 0.30099893, "memory(GiB)": 34.88, "step": 103630, "train_speed(iter/s)": 0.411115 }, { "acc": 0.9414711, "epoch": 2.8060271302087565, "grad_norm": 5.698817729949951, "learning_rate": 2.245223209958177e-06, "loss": 0.38839326, "memory(GiB)": 34.88, "step": 103635, "train_speed(iter/s)": 0.411116 }, { "acc": 0.95179253, "epoch": 2.806162510491972, "grad_norm": 4.887643814086914, "learning_rate": 2.2447563092261763e-06, "loss": 0.28953078, "memory(GiB)": 34.88, "step": 103640, "train_speed(iter/s)": 0.411117 }, { "acc": 0.94871006, "epoch": 2.8062978907751877, "grad_norm": 5.570382118225098, "learning_rate": 2.244289443015281e-06, "loss": 0.30569272, "memory(GiB)": 34.88, "step": 103645, "train_speed(iter/s)": 0.411118 }, { "acc": 0.94168777, "epoch": 2.806433271058403, "grad_norm": 9.075769424438477, "learning_rate": 2.2438226113313398e-06, "loss": 0.33877816, "memory(GiB)": 34.88, "step": 103650, "train_speed(iter/s)": 0.411119 }, { "acc": 0.93569412, "epoch": 2.8065686513416184, "grad_norm": 11.379129409790039, "learning_rate": 2.2433558141801965e-06, "loss": 0.38917699, "memory(GiB)": 34.88, "step": 103655, "train_speed(iter/s)": 0.41112 }, { "acc": 0.92765589, "epoch": 2.806704031624834, "grad_norm": 7.44833517074585, "learning_rate": 2.2428890515677056e-06, "loss": 0.3687602, "memory(GiB)": 34.88, "step": 103660, "train_speed(iter/s)": 0.411121 }, { "acc": 0.93216991, "epoch": 2.80683941190805, "grad_norm": 9.982871055603027, "learning_rate": 2.2424223234997085e-06, "loss": 0.37384353, "memory(GiB)": 34.88, "step": 103665, "train_speed(iter/s)": 0.411122 }, { "acc": 0.94409103, "epoch": 2.8069747921912653, "grad_norm": 4.753552436828613, "learning_rate": 2.2419556299820557e-06, "loss": 0.28786983, "memory(GiB)": 34.88, "step": 103670, "train_speed(iter/s)": 0.411123 }, { "acc": 0.92721729, "epoch": 2.8071101724744807, "grad_norm": 21.063005447387695, "learning_rate": 2.2414889710205905e-06, "loss": 0.43890347, "memory(GiB)": 34.88, "step": 103675, "train_speed(iter/s)": 0.411124 }, { "acc": 0.94263973, "epoch": 2.8072455527576965, "grad_norm": 5.266417503356934, "learning_rate": 2.241022346621159e-06, "loss": 0.32371764, "memory(GiB)": 34.88, "step": 103680, "train_speed(iter/s)": 0.411125 }, { "acc": 0.93465891, "epoch": 2.807380933040912, "grad_norm": 10.906970977783203, "learning_rate": 2.24055575678961e-06, "loss": 0.36960213, "memory(GiB)": 34.88, "step": 103685, "train_speed(iter/s)": 0.411126 }, { "acc": 0.92173586, "epoch": 2.807516313324127, "grad_norm": 10.491168022155762, "learning_rate": 2.2400892015317817e-06, "loss": 0.45467596, "memory(GiB)": 34.88, "step": 103690, "train_speed(iter/s)": 0.411127 }, { "acc": 0.94486895, "epoch": 2.807651693607343, "grad_norm": 8.534038543701172, "learning_rate": 2.2396226808535265e-06, "loss": 0.26893635, "memory(GiB)": 34.88, "step": 103695, "train_speed(iter/s)": 0.411129 }, { "acc": 0.92569313, "epoch": 2.807787073890559, "grad_norm": 5.545661926269531, "learning_rate": 2.2391561947606837e-06, "loss": 0.41114473, "memory(GiB)": 34.88, "step": 103700, "train_speed(iter/s)": 0.41113 }, { "acc": 0.95006905, "epoch": 2.807922454173774, "grad_norm": 20.94261360168457, "learning_rate": 2.2386897432590993e-06, "loss": 0.26905375, "memory(GiB)": 34.88, "step": 103705, "train_speed(iter/s)": 0.411131 }, { "acc": 0.93715324, "epoch": 2.8080578344569895, "grad_norm": 6.834137439727783, "learning_rate": 2.2382233263546118e-06, "loss": 0.39100158, "memory(GiB)": 34.88, "step": 103710, "train_speed(iter/s)": 0.411132 }, { "acc": 0.94733524, "epoch": 2.8081932147402053, "grad_norm": 19.195220947265625, "learning_rate": 2.237756944053071e-06, "loss": 0.27720151, "memory(GiB)": 34.88, "step": 103715, "train_speed(iter/s)": 0.411133 }, { "acc": 0.9426342, "epoch": 2.8083285950234207, "grad_norm": 9.037339210510254, "learning_rate": 2.2372905963603163e-06, "loss": 0.33769736, "memory(GiB)": 34.88, "step": 103720, "train_speed(iter/s)": 0.411134 }, { "acc": 0.95202351, "epoch": 2.8084639753066365, "grad_norm": 4.410411357879639, "learning_rate": 2.2368242832821852e-06, "loss": 0.20853944, "memory(GiB)": 34.88, "step": 103725, "train_speed(iter/s)": 0.411135 }, { "acc": 0.95035057, "epoch": 2.808599355589852, "grad_norm": 8.665975570678711, "learning_rate": 2.2363580048245264e-06, "loss": 0.27814565, "memory(GiB)": 34.88, "step": 103730, "train_speed(iter/s)": 0.411136 }, { "acc": 0.94765358, "epoch": 2.8087347358730677, "grad_norm": 5.223295211791992, "learning_rate": 2.2358917609931764e-06, "loss": 0.28208966, "memory(GiB)": 34.88, "step": 103735, "train_speed(iter/s)": 0.411137 }, { "acc": 0.94309368, "epoch": 2.808870116156283, "grad_norm": 6.477447509765625, "learning_rate": 2.235425551793978e-06, "loss": 0.35997639, "memory(GiB)": 34.88, "step": 103740, "train_speed(iter/s)": 0.411138 }, { "acc": 0.94278717, "epoch": 2.8090054964394984, "grad_norm": 6.918893337249756, "learning_rate": 2.2349593772327673e-06, "loss": 0.37304304, "memory(GiB)": 34.88, "step": 103745, "train_speed(iter/s)": 0.411139 }, { "acc": 0.93611898, "epoch": 2.809140876722714, "grad_norm": 24.659523010253906, "learning_rate": 2.234493237315391e-06, "loss": 0.37631412, "memory(GiB)": 34.88, "step": 103750, "train_speed(iter/s)": 0.41114 }, { "acc": 0.93925037, "epoch": 2.8092762570059295, "grad_norm": 13.16751766204834, "learning_rate": 2.234027132047684e-06, "loss": 0.39230611, "memory(GiB)": 34.88, "step": 103755, "train_speed(iter/s)": 0.411141 }, { "acc": 0.9251133, "epoch": 2.8094116372891453, "grad_norm": 8.097752571105957, "learning_rate": 2.2335610614354827e-06, "loss": 0.4555356, "memory(GiB)": 34.88, "step": 103760, "train_speed(iter/s)": 0.411142 }, { "acc": 0.94567833, "epoch": 2.8095470175723607, "grad_norm": 6.665738582611084, "learning_rate": 2.233095025484632e-06, "loss": 0.286063, "memory(GiB)": 34.88, "step": 103765, "train_speed(iter/s)": 0.411143 }, { "acc": 0.94436741, "epoch": 2.8096823978555765, "grad_norm": 3.787919282913208, "learning_rate": 2.2326290242009648e-06, "loss": 0.3230701, "memory(GiB)": 34.88, "step": 103770, "train_speed(iter/s)": 0.411144 }, { "acc": 0.94546003, "epoch": 2.809817778138792, "grad_norm": 6.4064435958862305, "learning_rate": 2.232163057590322e-06, "loss": 0.3781523, "memory(GiB)": 34.88, "step": 103775, "train_speed(iter/s)": 0.411145 }, { "acc": 0.94354229, "epoch": 2.809953158422007, "grad_norm": 5.402515411376953, "learning_rate": 2.231697125658536e-06, "loss": 0.32073867, "memory(GiB)": 34.88, "step": 103780, "train_speed(iter/s)": 0.411146 }, { "acc": 0.93567772, "epoch": 2.810088538705223, "grad_norm": 13.407575607299805, "learning_rate": 2.23123122841145e-06, "loss": 0.38027434, "memory(GiB)": 34.88, "step": 103785, "train_speed(iter/s)": 0.411147 }, { "acc": 0.92590313, "epoch": 2.8102239189884384, "grad_norm": 8.765989303588867, "learning_rate": 2.2307653658548955e-06, "loss": 0.35858932, "memory(GiB)": 34.88, "step": 103790, "train_speed(iter/s)": 0.411148 }, { "acc": 0.9185854, "epoch": 2.810359299271654, "grad_norm": 21.194971084594727, "learning_rate": 2.230299537994709e-06, "loss": 0.45853395, "memory(GiB)": 34.88, "step": 103795, "train_speed(iter/s)": 0.411149 }, { "acc": 0.94314604, "epoch": 2.8104946795548695, "grad_norm": 9.05572509765625, "learning_rate": 2.2298337448367295e-06, "loss": 0.39599919, "memory(GiB)": 34.88, "step": 103800, "train_speed(iter/s)": 0.41115 }, { "acc": 0.92375965, "epoch": 2.8106300598380853, "grad_norm": 4.492473125457764, "learning_rate": 2.229367986386787e-06, "loss": 0.44445658, "memory(GiB)": 34.88, "step": 103805, "train_speed(iter/s)": 0.411151 }, { "acc": 0.95270996, "epoch": 2.8107654401213007, "grad_norm": 4.354397296905518, "learning_rate": 2.22890226265072e-06, "loss": 0.2856168, "memory(GiB)": 34.88, "step": 103810, "train_speed(iter/s)": 0.411152 }, { "acc": 0.94199343, "epoch": 2.810900820404516, "grad_norm": 6.100101947784424, "learning_rate": 2.228436573634357e-06, "loss": 0.28799505, "memory(GiB)": 34.88, "step": 103815, "train_speed(iter/s)": 0.411153 }, { "acc": 0.93267307, "epoch": 2.811036200687732, "grad_norm": 12.016777038574219, "learning_rate": 2.22797091934354e-06, "loss": 0.34706697, "memory(GiB)": 34.88, "step": 103820, "train_speed(iter/s)": 0.411154 }, { "acc": 0.93875456, "epoch": 2.8111715809709477, "grad_norm": 5.97646427154541, "learning_rate": 2.2275052997840947e-06, "loss": 0.41363187, "memory(GiB)": 34.88, "step": 103825, "train_speed(iter/s)": 0.411155 }, { "acc": 0.93201542, "epoch": 2.811306961254163, "grad_norm": 4.7884840965271, "learning_rate": 2.2270397149618575e-06, "loss": 0.47371826, "memory(GiB)": 34.88, "step": 103830, "train_speed(iter/s)": 0.411157 }, { "acc": 0.93933487, "epoch": 2.8114423415373784, "grad_norm": 4.2280473709106445, "learning_rate": 2.2265741648826603e-06, "loss": 0.38686495, "memory(GiB)": 34.88, "step": 103835, "train_speed(iter/s)": 0.411158 }, { "acc": 0.95016651, "epoch": 2.811577721820594, "grad_norm": 6.656663417816162, "learning_rate": 2.2261086495523367e-06, "loss": 0.29208393, "memory(GiB)": 34.88, "step": 103840, "train_speed(iter/s)": 0.411159 }, { "acc": 0.93245907, "epoch": 2.8117131021038095, "grad_norm": 5.710745334625244, "learning_rate": 2.2256431689767165e-06, "loss": 0.42669601, "memory(GiB)": 34.88, "step": 103845, "train_speed(iter/s)": 0.41116 }, { "acc": 0.9326086, "epoch": 2.811848482387025, "grad_norm": 11.90246295928955, "learning_rate": 2.225177723161627e-06, "loss": 0.36526275, "memory(GiB)": 34.88, "step": 103850, "train_speed(iter/s)": 0.411161 }, { "acc": 0.92504635, "epoch": 2.8119838626702407, "grad_norm": 5.392993927001953, "learning_rate": 2.2247123121129064e-06, "loss": 0.43037548, "memory(GiB)": 34.88, "step": 103855, "train_speed(iter/s)": 0.411162 }, { "acc": 0.92816067, "epoch": 2.8121192429534565, "grad_norm": 18.52092170715332, "learning_rate": 2.224246935836379e-06, "loss": 0.43030558, "memory(GiB)": 34.88, "step": 103860, "train_speed(iter/s)": 0.411162 }, { "acc": 0.91907644, "epoch": 2.812254623236672, "grad_norm": 13.391358375549316, "learning_rate": 2.2237815943378778e-06, "loss": 0.44861803, "memory(GiB)": 34.88, "step": 103865, "train_speed(iter/s)": 0.411164 }, { "acc": 0.9359005, "epoch": 2.812390003519887, "grad_norm": 6.5516839027404785, "learning_rate": 2.223316287623228e-06, "loss": 0.43192806, "memory(GiB)": 34.88, "step": 103870, "train_speed(iter/s)": 0.411165 }, { "acc": 0.95017214, "epoch": 2.812525383803103, "grad_norm": 8.590285301208496, "learning_rate": 2.2228510156982645e-06, "loss": 0.2741708, "memory(GiB)": 34.88, "step": 103875, "train_speed(iter/s)": 0.411166 }, { "acc": 0.94004183, "epoch": 2.8126607640863184, "grad_norm": 5.190802574157715, "learning_rate": 2.222385778568813e-06, "loss": 0.36122112, "memory(GiB)": 34.88, "step": 103880, "train_speed(iter/s)": 0.411167 }, { "acc": 0.93428745, "epoch": 2.812796144369534, "grad_norm": 8.60994815826416, "learning_rate": 2.2219205762406975e-06, "loss": 0.36788387, "memory(GiB)": 34.88, "step": 103885, "train_speed(iter/s)": 0.411168 }, { "acc": 0.94003639, "epoch": 2.8129315246527495, "grad_norm": 9.418234825134277, "learning_rate": 2.2214554087197525e-06, "loss": 0.39009559, "memory(GiB)": 34.88, "step": 103890, "train_speed(iter/s)": 0.411169 }, { "acc": 0.92200785, "epoch": 2.8130669049359653, "grad_norm": 6.019197463989258, "learning_rate": 2.2209902760118e-06, "loss": 0.43070369, "memory(GiB)": 34.88, "step": 103895, "train_speed(iter/s)": 0.41117 }, { "acc": 0.93049726, "epoch": 2.8132022852191807, "grad_norm": 13.666396141052246, "learning_rate": 2.2205251781226705e-06, "loss": 0.3486578, "memory(GiB)": 34.88, "step": 103900, "train_speed(iter/s)": 0.411171 }, { "acc": 0.93729477, "epoch": 2.813337665502396, "grad_norm": 5.472085475921631, "learning_rate": 2.220060115058185e-06, "loss": 0.35352027, "memory(GiB)": 34.88, "step": 103905, "train_speed(iter/s)": 0.411172 }, { "acc": 0.93632622, "epoch": 2.813473045785612, "grad_norm": 11.959794044494629, "learning_rate": 2.2195950868241754e-06, "loss": 0.32832191, "memory(GiB)": 34.88, "step": 103910, "train_speed(iter/s)": 0.411173 }, { "acc": 0.93984299, "epoch": 2.813608426068827, "grad_norm": 5.818312644958496, "learning_rate": 2.219130093426463e-06, "loss": 0.34049201, "memory(GiB)": 34.88, "step": 103915, "train_speed(iter/s)": 0.411174 }, { "acc": 0.94017754, "epoch": 2.813743806352043, "grad_norm": 13.390741348266602, "learning_rate": 2.218665134870874e-06, "loss": 0.35891142, "memory(GiB)": 34.88, "step": 103920, "train_speed(iter/s)": 0.411175 }, { "acc": 0.93406906, "epoch": 2.8138791866352584, "grad_norm": 8.047375679016113, "learning_rate": 2.2182002111632353e-06, "loss": 0.36662331, "memory(GiB)": 34.88, "step": 103925, "train_speed(iter/s)": 0.411176 }, { "acc": 0.93088341, "epoch": 2.814014566918474, "grad_norm": 10.446878433227539, "learning_rate": 2.217735322309366e-06, "loss": 0.42240639, "memory(GiB)": 34.88, "step": 103930, "train_speed(iter/s)": 0.411177 }, { "acc": 0.91549854, "epoch": 2.8141499472016895, "grad_norm": 12.603888511657715, "learning_rate": 2.2172704683150945e-06, "loss": 0.52122822, "memory(GiB)": 34.88, "step": 103935, "train_speed(iter/s)": 0.411178 }, { "acc": 0.93333817, "epoch": 2.814285327484905, "grad_norm": 8.416215896606445, "learning_rate": 2.216805649186238e-06, "loss": 0.33642137, "memory(GiB)": 34.88, "step": 103940, "train_speed(iter/s)": 0.41118 }, { "acc": 0.93368874, "epoch": 2.8144207077681207, "grad_norm": 9.446247100830078, "learning_rate": 2.216340864928628e-06, "loss": 0.37916322, "memory(GiB)": 34.88, "step": 103945, "train_speed(iter/s)": 0.411181 }, { "acc": 0.94558105, "epoch": 2.814556088051336, "grad_norm": 3.7059214115142822, "learning_rate": 2.2158761155480787e-06, "loss": 0.36715832, "memory(GiB)": 34.88, "step": 103950, "train_speed(iter/s)": 0.411181 }, { "acc": 0.92185764, "epoch": 2.814691468334552, "grad_norm": 13.877408981323242, "learning_rate": 2.2154114010504164e-06, "loss": 0.45115814, "memory(GiB)": 34.88, "step": 103955, "train_speed(iter/s)": 0.411183 }, { "acc": 0.93070574, "epoch": 2.814826848617767, "grad_norm": 4.543381214141846, "learning_rate": 2.214946721441461e-06, "loss": 0.37994218, "memory(GiB)": 34.88, "step": 103960, "train_speed(iter/s)": 0.411184 }, { "acc": 0.92277298, "epoch": 2.814962228900983, "grad_norm": 12.997444152832031, "learning_rate": 2.2144820767270363e-06, "loss": 0.47993689, "memory(GiB)": 34.88, "step": 103965, "train_speed(iter/s)": 0.411185 }, { "acc": 0.93208513, "epoch": 2.8150976091841984, "grad_norm": 4.926611423492432, "learning_rate": 2.2140174669129604e-06, "loss": 0.41366096, "memory(GiB)": 34.88, "step": 103970, "train_speed(iter/s)": 0.411186 }, { "acc": 0.93287544, "epoch": 2.8152329894674137, "grad_norm": 5.429373741149902, "learning_rate": 2.2135528920050497e-06, "loss": 0.35093346, "memory(GiB)": 34.88, "step": 103975, "train_speed(iter/s)": 0.411187 }, { "acc": 0.92901363, "epoch": 2.8153683697506295, "grad_norm": 8.353633880615234, "learning_rate": 2.2130883520091316e-06, "loss": 0.46443543, "memory(GiB)": 34.88, "step": 103980, "train_speed(iter/s)": 0.411188 }, { "acc": 0.93677883, "epoch": 2.8155037500338453, "grad_norm": 13.232376098632812, "learning_rate": 2.2126238469310198e-06, "loss": 0.41927524, "memory(GiB)": 34.88, "step": 103985, "train_speed(iter/s)": 0.411189 }, { "acc": 0.94251747, "epoch": 2.8156391303170607, "grad_norm": 12.455945014953613, "learning_rate": 2.2121593767765354e-06, "loss": 0.36267848, "memory(GiB)": 34.88, "step": 103990, "train_speed(iter/s)": 0.41119 }, { "acc": 0.93426247, "epoch": 2.815774510600276, "grad_norm": 5.75972843170166, "learning_rate": 2.2116949415514956e-06, "loss": 0.35310612, "memory(GiB)": 34.88, "step": 103995, "train_speed(iter/s)": 0.411191 }, { "acc": 0.9564353, "epoch": 2.815909890883492, "grad_norm": 5.396259784698486, "learning_rate": 2.211230541261721e-06, "loss": 0.27017283, "memory(GiB)": 34.88, "step": 104000, "train_speed(iter/s)": 0.411192 }, { "acc": 0.94037209, "epoch": 2.816045271166707, "grad_norm": 8.744484901428223, "learning_rate": 2.2107661759130264e-06, "loss": 0.35782199, "memory(GiB)": 34.88, "step": 104005, "train_speed(iter/s)": 0.411193 }, { "acc": 0.9278986, "epoch": 2.8161806514499226, "grad_norm": 4.950349807739258, "learning_rate": 2.2103018455112298e-06, "loss": 0.42347269, "memory(GiB)": 34.88, "step": 104010, "train_speed(iter/s)": 0.411194 }, { "acc": 0.94550018, "epoch": 2.8163160317331384, "grad_norm": 9.324816703796387, "learning_rate": 2.2098375500621486e-06, "loss": 0.33576474, "memory(GiB)": 34.88, "step": 104015, "train_speed(iter/s)": 0.411195 }, { "acc": 0.9315979, "epoch": 2.816451412016354, "grad_norm": 5.196799278259277, "learning_rate": 2.2093732895715973e-06, "loss": 0.38866744, "memory(GiB)": 34.88, "step": 104020, "train_speed(iter/s)": 0.411196 }, { "acc": 0.93263702, "epoch": 2.8165867922995695, "grad_norm": 10.911032676696777, "learning_rate": 2.208909064045392e-06, "loss": 0.37104955, "memory(GiB)": 34.88, "step": 104025, "train_speed(iter/s)": 0.411198 }, { "acc": 0.93625202, "epoch": 2.816722172582785, "grad_norm": 9.045726776123047, "learning_rate": 2.2084448734893487e-06, "loss": 0.41591077, "memory(GiB)": 34.88, "step": 104030, "train_speed(iter/s)": 0.411199 }, { "acc": 0.93657684, "epoch": 2.8168575528660007, "grad_norm": 4.385487079620361, "learning_rate": 2.2079807179092845e-06, "loss": 0.39568491, "memory(GiB)": 34.88, "step": 104035, "train_speed(iter/s)": 0.411199 }, { "acc": 0.95264797, "epoch": 2.816992933149216, "grad_norm": 6.634794235229492, "learning_rate": 2.207516597311009e-06, "loss": 0.27059708, "memory(GiB)": 34.88, "step": 104040, "train_speed(iter/s)": 0.4112 }, { "acc": 0.93560276, "epoch": 2.817128313432432, "grad_norm": 11.178046226501465, "learning_rate": 2.2070525117003398e-06, "loss": 0.38565879, "memory(GiB)": 34.88, "step": 104045, "train_speed(iter/s)": 0.411202 }, { "acc": 0.93564825, "epoch": 2.817263693715647, "grad_norm": 6.534101963043213, "learning_rate": 2.206588461083091e-06, "loss": 0.30925634, "memory(GiB)": 34.88, "step": 104050, "train_speed(iter/s)": 0.411203 }, { "acc": 0.94278831, "epoch": 2.817399073998863, "grad_norm": 8.064764022827148, "learning_rate": 2.206124445465072e-06, "loss": 0.29365926, "memory(GiB)": 34.88, "step": 104055, "train_speed(iter/s)": 0.411204 }, { "acc": 0.94258413, "epoch": 2.8175344542820784, "grad_norm": 7.624876976013184, "learning_rate": 2.2056604648520983e-06, "loss": 0.32896593, "memory(GiB)": 34.88, "step": 104060, "train_speed(iter/s)": 0.411205 }, { "acc": 0.94169197, "epoch": 2.8176698345652937, "grad_norm": 7.463064193725586, "learning_rate": 2.205196519249981e-06, "loss": 0.39908557, "memory(GiB)": 34.88, "step": 104065, "train_speed(iter/s)": 0.411206 }, { "acc": 0.95242548, "epoch": 2.8178052148485095, "grad_norm": 3.7893893718719482, "learning_rate": 2.204732608664535e-06, "loss": 0.26712768, "memory(GiB)": 34.88, "step": 104070, "train_speed(iter/s)": 0.411207 }, { "acc": 0.93255663, "epoch": 2.817940595131725, "grad_norm": 15.104997634887695, "learning_rate": 2.204268733101567e-06, "loss": 0.44208031, "memory(GiB)": 34.88, "step": 104075, "train_speed(iter/s)": 0.411208 }, { "acc": 0.93857813, "epoch": 2.8180759754149407, "grad_norm": 25.261980056762695, "learning_rate": 2.2038048925668903e-06, "loss": 0.37980533, "memory(GiB)": 34.88, "step": 104080, "train_speed(iter/s)": 0.411209 }, { "acc": 0.92939777, "epoch": 2.818211355698156, "grad_norm": 8.277596473693848, "learning_rate": 2.2033410870663153e-06, "loss": 0.41275673, "memory(GiB)": 34.88, "step": 104085, "train_speed(iter/s)": 0.41121 }, { "acc": 0.94749708, "epoch": 2.818346735981372, "grad_norm": 5.019002914428711, "learning_rate": 2.202877316605654e-06, "loss": 0.28537345, "memory(GiB)": 34.88, "step": 104090, "train_speed(iter/s)": 0.411211 }, { "acc": 0.91969318, "epoch": 2.8184821162645872, "grad_norm": 11.644791603088379, "learning_rate": 2.2024135811907128e-06, "loss": 0.48284388, "memory(GiB)": 34.88, "step": 104095, "train_speed(iter/s)": 0.411212 }, { "acc": 0.94297438, "epoch": 2.8186174965478026, "grad_norm": 13.76812744140625, "learning_rate": 2.2019498808273015e-06, "loss": 0.28892584, "memory(GiB)": 34.88, "step": 104100, "train_speed(iter/s)": 0.411213 }, { "acc": 0.94076557, "epoch": 2.8187528768310184, "grad_norm": 6.717433452606201, "learning_rate": 2.201486215521231e-06, "loss": 0.35233002, "memory(GiB)": 34.88, "step": 104105, "train_speed(iter/s)": 0.411214 }, { "acc": 0.94097672, "epoch": 2.8188882571142337, "grad_norm": 5.661168575286865, "learning_rate": 2.2010225852783073e-06, "loss": 0.31254332, "memory(GiB)": 34.88, "step": 104110, "train_speed(iter/s)": 0.411215 }, { "acc": 0.92515116, "epoch": 2.8190236373974495, "grad_norm": 6.134685516357422, "learning_rate": 2.200558990104339e-06, "loss": 0.47554741, "memory(GiB)": 34.88, "step": 104115, "train_speed(iter/s)": 0.411216 }, { "acc": 0.94257412, "epoch": 2.819159017680665, "grad_norm": 8.357404708862305, "learning_rate": 2.2000954300051335e-06, "loss": 0.34130201, "memory(GiB)": 34.88, "step": 104120, "train_speed(iter/s)": 0.411217 }, { "acc": 0.94496088, "epoch": 2.8192943979638807, "grad_norm": 7.156616687774658, "learning_rate": 2.1996319049864997e-06, "loss": 0.35075598, "memory(GiB)": 34.88, "step": 104125, "train_speed(iter/s)": 0.411218 }, { "acc": 0.94205036, "epoch": 2.819429778247096, "grad_norm": 3.6756720542907715, "learning_rate": 2.199168415054241e-06, "loss": 0.38877852, "memory(GiB)": 34.88, "step": 104130, "train_speed(iter/s)": 0.411219 }, { "acc": 0.93469419, "epoch": 2.8195651585303114, "grad_norm": 13.483010292053223, "learning_rate": 2.198704960214165e-06, "loss": 0.42104564, "memory(GiB)": 34.88, "step": 104135, "train_speed(iter/s)": 0.41122 }, { "acc": 0.93067274, "epoch": 2.8197005388135272, "grad_norm": 5.6281585693359375, "learning_rate": 2.1982415404720793e-06, "loss": 0.36115255, "memory(GiB)": 34.88, "step": 104140, "train_speed(iter/s)": 0.411221 }, { "acc": 0.93978386, "epoch": 2.819835919096743, "grad_norm": 2.3847250938415527, "learning_rate": 2.1977781558337854e-06, "loss": 0.31240048, "memory(GiB)": 34.88, "step": 104145, "train_speed(iter/s)": 0.411223 }, { "acc": 0.9440691, "epoch": 2.8199712993799584, "grad_norm": 13.103266716003418, "learning_rate": 2.19731480630509e-06, "loss": 0.28487513, "memory(GiB)": 34.88, "step": 104150, "train_speed(iter/s)": 0.411224 }, { "acc": 0.94023962, "epoch": 2.8201066796631737, "grad_norm": 7.782891750335693, "learning_rate": 2.196851491891797e-06, "loss": 0.32546811, "memory(GiB)": 34.88, "step": 104155, "train_speed(iter/s)": 0.411225 }, { "acc": 0.93642864, "epoch": 2.8202420599463895, "grad_norm": 6.6927385330200195, "learning_rate": 2.1963882125997123e-06, "loss": 0.32768278, "memory(GiB)": 34.88, "step": 104160, "train_speed(iter/s)": 0.411226 }, { "acc": 0.93420982, "epoch": 2.820377440229605, "grad_norm": 3.4731149673461914, "learning_rate": 2.1959249684346363e-06, "loss": 0.30383933, "memory(GiB)": 34.88, "step": 104165, "train_speed(iter/s)": 0.411227 }, { "acc": 0.95916071, "epoch": 2.8205128205128203, "grad_norm": 2.2476789951324463, "learning_rate": 2.195461759402374e-06, "loss": 0.22005358, "memory(GiB)": 34.88, "step": 104170, "train_speed(iter/s)": 0.411228 }, { "acc": 0.93885899, "epoch": 2.820648200796036, "grad_norm": 4.252398490905762, "learning_rate": 2.1949985855087293e-06, "loss": 0.25995259, "memory(GiB)": 34.88, "step": 104175, "train_speed(iter/s)": 0.411229 }, { "acc": 0.95483179, "epoch": 2.820783581079252, "grad_norm": 9.104879379272461, "learning_rate": 2.1945354467595005e-06, "loss": 0.27380753, "memory(GiB)": 34.88, "step": 104180, "train_speed(iter/s)": 0.41123 }, { "acc": 0.93047485, "epoch": 2.8209189613624672, "grad_norm": 7.544317245483398, "learning_rate": 2.194072343160491e-06, "loss": 0.39443741, "memory(GiB)": 34.88, "step": 104185, "train_speed(iter/s)": 0.411231 }, { "acc": 0.94192247, "epoch": 2.8210543416456826, "grad_norm": 6.0902910232543945, "learning_rate": 2.1936092747175034e-06, "loss": 0.31752517, "memory(GiB)": 34.88, "step": 104190, "train_speed(iter/s)": 0.411232 }, { "acc": 0.92599831, "epoch": 2.8211897219288984, "grad_norm": 4.88960599899292, "learning_rate": 2.193146241436339e-06, "loss": 0.40584884, "memory(GiB)": 34.88, "step": 104195, "train_speed(iter/s)": 0.411233 }, { "acc": 0.93360806, "epoch": 2.8213251022121137, "grad_norm": 11.899886131286621, "learning_rate": 2.1926832433227954e-06, "loss": 0.39192965, "memory(GiB)": 34.88, "step": 104200, "train_speed(iter/s)": 0.411234 }, { "acc": 0.92663422, "epoch": 2.821460482495329, "grad_norm": 4.763476848602295, "learning_rate": 2.1922202803826736e-06, "loss": 0.36527474, "memory(GiB)": 34.88, "step": 104205, "train_speed(iter/s)": 0.411235 }, { "acc": 0.94307079, "epoch": 2.821595862778545, "grad_norm": 21.877046585083008, "learning_rate": 2.191757352621774e-06, "loss": 0.32302475, "memory(GiB)": 34.88, "step": 104210, "train_speed(iter/s)": 0.411236 }, { "acc": 0.9497407, "epoch": 2.8217312430617607, "grad_norm": 13.863707542419434, "learning_rate": 2.1912944600458964e-06, "loss": 0.32297046, "memory(GiB)": 34.88, "step": 104215, "train_speed(iter/s)": 0.411237 }, { "acc": 0.9158844, "epoch": 2.821866623344976, "grad_norm": 10.077491760253906, "learning_rate": 2.190831602660837e-06, "loss": 0.50591717, "memory(GiB)": 34.88, "step": 104220, "train_speed(iter/s)": 0.411238 }, { "acc": 0.94660711, "epoch": 2.8220020036281914, "grad_norm": 5.180707931518555, "learning_rate": 2.1903687804723946e-06, "loss": 0.29514074, "memory(GiB)": 34.88, "step": 104225, "train_speed(iter/s)": 0.411239 }, { "acc": 0.93406067, "epoch": 2.8221373839114072, "grad_norm": 3.957179307937622, "learning_rate": 2.18990599348637e-06, "loss": 0.33709154, "memory(GiB)": 34.88, "step": 104230, "train_speed(iter/s)": 0.41124 }, { "acc": 0.94559546, "epoch": 2.8222727641946226, "grad_norm": 10.293835639953613, "learning_rate": 2.189443241708557e-06, "loss": 0.29017754, "memory(GiB)": 34.88, "step": 104235, "train_speed(iter/s)": 0.411241 }, { "acc": 0.93442698, "epoch": 2.8224081444778384, "grad_norm": 25.278322219848633, "learning_rate": 2.1889805251447525e-06, "loss": 0.37748675, "memory(GiB)": 34.88, "step": 104240, "train_speed(iter/s)": 0.411242 }, { "acc": 0.94208622, "epoch": 2.8225435247610537, "grad_norm": 6.916659832000732, "learning_rate": 2.188517843800754e-06, "loss": 0.31947837, "memory(GiB)": 34.88, "step": 104245, "train_speed(iter/s)": 0.411243 }, { "acc": 0.94185638, "epoch": 2.8226789050442695, "grad_norm": 9.65079116821289, "learning_rate": 2.18805519768236e-06, "loss": 0.35966783, "memory(GiB)": 34.88, "step": 104250, "train_speed(iter/s)": 0.411244 }, { "acc": 0.94649029, "epoch": 2.822814285327485, "grad_norm": 4.412467002868652, "learning_rate": 2.1875925867953597e-06, "loss": 0.30315208, "memory(GiB)": 34.88, "step": 104255, "train_speed(iter/s)": 0.411245 }, { "acc": 0.93587332, "epoch": 2.8229496656107003, "grad_norm": 2.5700201988220215, "learning_rate": 2.1871300111455557e-06, "loss": 0.37223177, "memory(GiB)": 34.88, "step": 104260, "train_speed(iter/s)": 0.411246 }, { "acc": 0.91678391, "epoch": 2.823085045893916, "grad_norm": 13.31626033782959, "learning_rate": 2.186667470738739e-06, "loss": 0.53834453, "memory(GiB)": 34.88, "step": 104265, "train_speed(iter/s)": 0.411247 }, { "acc": 0.93799667, "epoch": 2.8232204261771314, "grad_norm": 3.5478153228759766, "learning_rate": 2.1862049655807025e-06, "loss": 0.3229629, "memory(GiB)": 34.88, "step": 104270, "train_speed(iter/s)": 0.411248 }, { "acc": 0.93462353, "epoch": 2.8233558064603472, "grad_norm": 5.129916667938232, "learning_rate": 2.1857424956772407e-06, "loss": 0.42329216, "memory(GiB)": 34.88, "step": 104275, "train_speed(iter/s)": 0.41125 }, { "acc": 0.93623781, "epoch": 2.8234911867435626, "grad_norm": 6.934408187866211, "learning_rate": 2.185280061034148e-06, "loss": 0.32048037, "memory(GiB)": 34.88, "step": 104280, "train_speed(iter/s)": 0.411251 }, { "acc": 0.96077633, "epoch": 2.8236265670267784, "grad_norm": 34.99457931518555, "learning_rate": 2.184817661657219e-06, "loss": 0.23000305, "memory(GiB)": 34.88, "step": 104285, "train_speed(iter/s)": 0.411252 }, { "acc": 0.95096569, "epoch": 2.8237619473099937, "grad_norm": 3.8530731201171875, "learning_rate": 2.1843552975522402e-06, "loss": 0.29859009, "memory(GiB)": 34.88, "step": 104290, "train_speed(iter/s)": 0.411253 }, { "acc": 0.91059303, "epoch": 2.823897327593209, "grad_norm": 6.323892116546631, "learning_rate": 2.1838929687250114e-06, "loss": 0.50099311, "memory(GiB)": 34.88, "step": 104295, "train_speed(iter/s)": 0.411254 }, { "acc": 0.93308868, "epoch": 2.824032707876425, "grad_norm": 10.125127792358398, "learning_rate": 2.18343067518132e-06, "loss": 0.4140666, "memory(GiB)": 34.88, "step": 104300, "train_speed(iter/s)": 0.411255 }, { "acc": 0.93738194, "epoch": 2.8241680881596403, "grad_norm": 7.346733570098877, "learning_rate": 2.1829684169269555e-06, "loss": 0.30818081, "memory(GiB)": 34.88, "step": 104305, "train_speed(iter/s)": 0.411256 }, { "acc": 0.92983885, "epoch": 2.824303468442856, "grad_norm": 6.834176063537598, "learning_rate": 2.1825061939677108e-06, "loss": 0.47187252, "memory(GiB)": 34.88, "step": 104310, "train_speed(iter/s)": 0.411257 }, { "acc": 0.93450069, "epoch": 2.8244388487260714, "grad_norm": 4.512722492218018, "learning_rate": 2.1820440063093756e-06, "loss": 0.38971832, "memory(GiB)": 34.88, "step": 104315, "train_speed(iter/s)": 0.411258 }, { "acc": 0.93994465, "epoch": 2.8245742290092872, "grad_norm": 8.09357738494873, "learning_rate": 2.181581853957742e-06, "loss": 0.35852063, "memory(GiB)": 34.88, "step": 104320, "train_speed(iter/s)": 0.411259 }, { "acc": 0.948246, "epoch": 2.8247096092925026, "grad_norm": 8.065428733825684, "learning_rate": 2.181119736918594e-06, "loss": 0.21592338, "memory(GiB)": 34.88, "step": 104325, "train_speed(iter/s)": 0.41126 }, { "acc": 0.93965874, "epoch": 2.824844989575718, "grad_norm": 5.3910698890686035, "learning_rate": 2.180657655197727e-06, "loss": 0.34669714, "memory(GiB)": 34.88, "step": 104330, "train_speed(iter/s)": 0.411261 }, { "acc": 0.94258785, "epoch": 2.8249803698589337, "grad_norm": 11.50892448425293, "learning_rate": 2.1801956088009248e-06, "loss": 0.26307669, "memory(GiB)": 34.88, "step": 104335, "train_speed(iter/s)": 0.411262 }, { "acc": 0.92897663, "epoch": 2.8251157501421496, "grad_norm": 8.408587455749512, "learning_rate": 2.179733597733978e-06, "loss": 0.4131424, "memory(GiB)": 34.88, "step": 104340, "train_speed(iter/s)": 0.411263 }, { "acc": 0.93482723, "epoch": 2.825251130425365, "grad_norm": 6.516315937042236, "learning_rate": 2.1792716220026726e-06, "loss": 0.37440596, "memory(GiB)": 34.88, "step": 104345, "train_speed(iter/s)": 0.411265 }, { "acc": 0.95069942, "epoch": 2.8253865107085803, "grad_norm": 4.520627498626709, "learning_rate": 2.178809681612796e-06, "loss": 0.3464462, "memory(GiB)": 34.88, "step": 104350, "train_speed(iter/s)": 0.411266 }, { "acc": 0.94024668, "epoch": 2.825521890991796, "grad_norm": 7.843508720397949, "learning_rate": 2.1783477765701367e-06, "loss": 0.30963576, "memory(GiB)": 34.88, "step": 104355, "train_speed(iter/s)": 0.411267 }, { "acc": 0.91448994, "epoch": 2.8256572712750114, "grad_norm": 6.267253875732422, "learning_rate": 2.1778859068804758e-06, "loss": 0.49745235, "memory(GiB)": 34.88, "step": 104360, "train_speed(iter/s)": 0.411268 }, { "acc": 0.93526649, "epoch": 2.825792651558227, "grad_norm": 6.412263870239258, "learning_rate": 2.1774240725496073e-06, "loss": 0.31304474, "memory(GiB)": 34.88, "step": 104365, "train_speed(iter/s)": 0.411269 }, { "acc": 0.94193497, "epoch": 2.8259280318414426, "grad_norm": 12.368254661560059, "learning_rate": 2.1769622735833096e-06, "loss": 0.3614892, "memory(GiB)": 34.88, "step": 104370, "train_speed(iter/s)": 0.41127 }, { "acc": 0.94881668, "epoch": 2.8260634121246584, "grad_norm": 12.489811897277832, "learning_rate": 2.1765005099873718e-06, "loss": 0.26765049, "memory(GiB)": 34.88, "step": 104375, "train_speed(iter/s)": 0.411271 }, { "acc": 0.94306908, "epoch": 2.8261987924078737, "grad_norm": 6.264331340789795, "learning_rate": 2.1760387817675736e-06, "loss": 0.33297431, "memory(GiB)": 34.88, "step": 104380, "train_speed(iter/s)": 0.411272 }, { "acc": 0.92632399, "epoch": 2.826334172691089, "grad_norm": 5.82855749130249, "learning_rate": 2.1755770889297053e-06, "loss": 0.45824747, "memory(GiB)": 34.88, "step": 104385, "train_speed(iter/s)": 0.411273 }, { "acc": 0.94046078, "epoch": 2.826469552974305, "grad_norm": 11.067702293395996, "learning_rate": 2.1751154314795485e-06, "loss": 0.35457168, "memory(GiB)": 34.88, "step": 104390, "train_speed(iter/s)": 0.411274 }, { "acc": 0.93661747, "epoch": 2.8266049332575203, "grad_norm": 6.718964099884033, "learning_rate": 2.17465380942288e-06, "loss": 0.30843587, "memory(GiB)": 34.88, "step": 104395, "train_speed(iter/s)": 0.411275 }, { "acc": 0.94157066, "epoch": 2.826740313540736, "grad_norm": 6.425388813018799, "learning_rate": 2.1741922227654927e-06, "loss": 0.40441227, "memory(GiB)": 34.88, "step": 104400, "train_speed(iter/s)": 0.411276 }, { "acc": 0.95250273, "epoch": 2.8268756938239514, "grad_norm": 14.756091117858887, "learning_rate": 2.1737306715131622e-06, "loss": 0.29019232, "memory(GiB)": 34.88, "step": 104405, "train_speed(iter/s)": 0.411277 }, { "acc": 0.94667282, "epoch": 2.8270110741071672, "grad_norm": 7.499098300933838, "learning_rate": 2.1732691556716734e-06, "loss": 0.30243807, "memory(GiB)": 34.88, "step": 104410, "train_speed(iter/s)": 0.411278 }, { "acc": 0.94630699, "epoch": 2.8271464543903826, "grad_norm": 4.910796165466309, "learning_rate": 2.1728076752468036e-06, "loss": 0.28383462, "memory(GiB)": 34.88, "step": 104415, "train_speed(iter/s)": 0.411279 }, { "acc": 0.94545975, "epoch": 2.827281834673598, "grad_norm": 6.424991607666016, "learning_rate": 2.1723462302443395e-06, "loss": 0.32905574, "memory(GiB)": 34.88, "step": 104420, "train_speed(iter/s)": 0.41128 }, { "acc": 0.93742504, "epoch": 2.8274172149568138, "grad_norm": 15.006231307983398, "learning_rate": 2.1718848206700584e-06, "loss": 0.36495759, "memory(GiB)": 34.88, "step": 104425, "train_speed(iter/s)": 0.411281 }, { "acc": 0.95518675, "epoch": 2.827552595240029, "grad_norm": 10.967818260192871, "learning_rate": 2.1714234465297397e-06, "loss": 0.18752092, "memory(GiB)": 34.88, "step": 104430, "train_speed(iter/s)": 0.411282 }, { "acc": 0.92421465, "epoch": 2.827687975523245, "grad_norm": 5.8648600578308105, "learning_rate": 2.1709621078291634e-06, "loss": 0.46787705, "memory(GiB)": 34.88, "step": 104435, "train_speed(iter/s)": 0.411283 }, { "acc": 0.93863869, "epoch": 2.8278233558064603, "grad_norm": 4.046743869781494, "learning_rate": 2.1705008045741094e-06, "loss": 0.33127174, "memory(GiB)": 34.88, "step": 104440, "train_speed(iter/s)": 0.411284 }, { "acc": 0.94564953, "epoch": 2.827958736089676, "grad_norm": 6.344625473022461, "learning_rate": 2.1700395367703577e-06, "loss": 0.28995152, "memory(GiB)": 34.88, "step": 104445, "train_speed(iter/s)": 0.411285 }, { "acc": 0.94410009, "epoch": 2.8280941163728914, "grad_norm": 8.665083885192871, "learning_rate": 2.169578304423682e-06, "loss": 0.34438975, "memory(GiB)": 34.88, "step": 104450, "train_speed(iter/s)": 0.411286 }, { "acc": 0.92312307, "epoch": 2.828229496656107, "grad_norm": 15.457391738891602, "learning_rate": 2.1691171075398666e-06, "loss": 0.40259457, "memory(GiB)": 34.88, "step": 104455, "train_speed(iter/s)": 0.411287 }, { "acc": 0.94400568, "epoch": 2.8283648769393226, "grad_norm": 6.080802917480469, "learning_rate": 2.168655946124684e-06, "loss": 0.30127866, "memory(GiB)": 34.88, "step": 104460, "train_speed(iter/s)": 0.411288 }, { "acc": 0.92999077, "epoch": 2.828500257222538, "grad_norm": 10.912569046020508, "learning_rate": 2.1681948201839147e-06, "loss": 0.36935906, "memory(GiB)": 34.88, "step": 104465, "train_speed(iter/s)": 0.411289 }, { "acc": 0.93604202, "epoch": 2.8286356375057538, "grad_norm": 13.522968292236328, "learning_rate": 2.167733729723331e-06, "loss": 0.39076152, "memory(GiB)": 34.88, "step": 104470, "train_speed(iter/s)": 0.41129 }, { "acc": 0.93425674, "epoch": 2.828771017788969, "grad_norm": 6.504202365875244, "learning_rate": 2.1672726747487117e-06, "loss": 0.28514338, "memory(GiB)": 34.88, "step": 104475, "train_speed(iter/s)": 0.411291 }, { "acc": 0.95231075, "epoch": 2.828906398072185, "grad_norm": 9.82113265991211, "learning_rate": 2.1668116552658334e-06, "loss": 0.30698268, "memory(GiB)": 34.88, "step": 104480, "train_speed(iter/s)": 0.411292 }, { "acc": 0.93582191, "epoch": 2.8290417783554003, "grad_norm": 3.7212367057800293, "learning_rate": 2.166350671280466e-06, "loss": 0.37903907, "memory(GiB)": 34.88, "step": 104485, "train_speed(iter/s)": 0.411294 }, { "acc": 0.92690115, "epoch": 2.8291771586386156, "grad_norm": 7.661344528198242, "learning_rate": 2.165889722798392e-06, "loss": 0.41874542, "memory(GiB)": 34.88, "step": 104490, "train_speed(iter/s)": 0.411295 }, { "acc": 0.93863544, "epoch": 2.8293125389218314, "grad_norm": 4.733333110809326, "learning_rate": 2.1654288098253797e-06, "loss": 0.31311803, "memory(GiB)": 34.88, "step": 104495, "train_speed(iter/s)": 0.411296 }, { "acc": 0.92680864, "epoch": 2.8294479192050472, "grad_norm": 5.879062652587891, "learning_rate": 2.1649679323672067e-06, "loss": 0.43117599, "memory(GiB)": 34.88, "step": 104500, "train_speed(iter/s)": 0.411297 }, { "acc": 0.92373123, "epoch": 2.8295832994882626, "grad_norm": 13.1205472946167, "learning_rate": 2.1645070904296412e-06, "loss": 0.46715312, "memory(GiB)": 34.88, "step": 104505, "train_speed(iter/s)": 0.411298 }, { "acc": 0.94059153, "epoch": 2.829718679771478, "grad_norm": 6.601968765258789, "learning_rate": 2.164046284018463e-06, "loss": 0.37390375, "memory(GiB)": 34.88, "step": 104510, "train_speed(iter/s)": 0.411299 }, { "acc": 0.92441826, "epoch": 2.8298540600546938, "grad_norm": 19.18245506286621, "learning_rate": 2.163585513139442e-06, "loss": 0.37109461, "memory(GiB)": 34.88, "step": 104515, "train_speed(iter/s)": 0.4113 }, { "acc": 0.93573895, "epoch": 2.829989440337909, "grad_norm": 6.7373552322387695, "learning_rate": 2.163124777798345e-06, "loss": 0.33837504, "memory(GiB)": 34.88, "step": 104520, "train_speed(iter/s)": 0.411301 }, { "acc": 0.93615742, "epoch": 2.8301248206211245, "grad_norm": 6.053958415985107, "learning_rate": 2.162664078000952e-06, "loss": 0.41500125, "memory(GiB)": 34.88, "step": 104525, "train_speed(iter/s)": 0.411302 }, { "acc": 0.9406683, "epoch": 2.8302602009043403, "grad_norm": 7.573443412780762, "learning_rate": 2.1622034137530286e-06, "loss": 0.41030922, "memory(GiB)": 34.88, "step": 104530, "train_speed(iter/s)": 0.411303 }, { "acc": 0.93424091, "epoch": 2.830395581187556, "grad_norm": 7.502261638641357, "learning_rate": 2.161742785060349e-06, "loss": 0.39939542, "memory(GiB)": 34.88, "step": 104535, "train_speed(iter/s)": 0.411304 }, { "acc": 0.94709187, "epoch": 2.8305309614707714, "grad_norm": 7.1515021324157715, "learning_rate": 2.161282191928678e-06, "loss": 0.28552432, "memory(GiB)": 34.88, "step": 104540, "train_speed(iter/s)": 0.411305 }, { "acc": 0.93194771, "epoch": 2.830666341753987, "grad_norm": 7.336206436157227, "learning_rate": 2.1608216343637922e-06, "loss": 0.40268459, "memory(GiB)": 34.88, "step": 104545, "train_speed(iter/s)": 0.411306 }, { "acc": 0.94089756, "epoch": 2.8308017220372026, "grad_norm": 6.4433979988098145, "learning_rate": 2.1603611123714566e-06, "loss": 0.30322828, "memory(GiB)": 34.88, "step": 104550, "train_speed(iter/s)": 0.411307 }, { "acc": 0.95582447, "epoch": 2.830937102320418, "grad_norm": 4.065378189086914, "learning_rate": 2.1599006259574415e-06, "loss": 0.22275743, "memory(GiB)": 34.88, "step": 104555, "train_speed(iter/s)": 0.411308 }, { "acc": 0.93613424, "epoch": 2.8310724826036338, "grad_norm": 16.644739151000977, "learning_rate": 2.159440175127517e-06, "loss": 0.36483328, "memory(GiB)": 34.88, "step": 104560, "train_speed(iter/s)": 0.411309 }, { "acc": 0.94117508, "epoch": 2.831207862886849, "grad_norm": 4.903120517730713, "learning_rate": 2.158979759887448e-06, "loss": 0.31494427, "memory(GiB)": 34.88, "step": 104565, "train_speed(iter/s)": 0.411311 }, { "acc": 0.93631649, "epoch": 2.831343243170065, "grad_norm": 7.549101829528809, "learning_rate": 2.158519380243005e-06, "loss": 0.37209537, "memory(GiB)": 34.88, "step": 104570, "train_speed(iter/s)": 0.411312 }, { "acc": 0.9330986, "epoch": 2.8314786234532803, "grad_norm": 5.640074253082275, "learning_rate": 2.15805903619995e-06, "loss": 0.34245989, "memory(GiB)": 34.88, "step": 104575, "train_speed(iter/s)": 0.411313 }, { "acc": 0.92797003, "epoch": 2.8316140037364956, "grad_norm": 4.2989349365234375, "learning_rate": 2.157598727764057e-06, "loss": 0.39751194, "memory(GiB)": 34.88, "step": 104580, "train_speed(iter/s)": 0.411314 }, { "acc": 0.93088703, "epoch": 2.8317493840197114, "grad_norm": 5.125956058502197, "learning_rate": 2.1571384549410875e-06, "loss": 0.40226016, "memory(GiB)": 34.88, "step": 104585, "train_speed(iter/s)": 0.411315 }, { "acc": 0.92411547, "epoch": 2.831884764302927, "grad_norm": 23.46004867553711, "learning_rate": 2.1566782177368074e-06, "loss": 0.50961866, "memory(GiB)": 34.88, "step": 104590, "train_speed(iter/s)": 0.411316 }, { "acc": 0.93850803, "epoch": 2.8320201445861426, "grad_norm": 6.053999423980713, "learning_rate": 2.1562180161569865e-06, "loss": 0.38953564, "memory(GiB)": 34.88, "step": 104595, "train_speed(iter/s)": 0.411317 }, { "acc": 0.94051065, "epoch": 2.832155524869358, "grad_norm": 3.5327606201171875, "learning_rate": 2.1557578502073837e-06, "loss": 0.36795425, "memory(GiB)": 34.88, "step": 104600, "train_speed(iter/s)": 0.411318 }, { "acc": 0.93206587, "epoch": 2.8322909051525738, "grad_norm": 12.263836860656738, "learning_rate": 2.1552977198937686e-06, "loss": 0.43699293, "memory(GiB)": 34.88, "step": 104605, "train_speed(iter/s)": 0.411319 }, { "acc": 0.94901428, "epoch": 2.832426285435789, "grad_norm": 2.7358992099761963, "learning_rate": 2.1548376252218986e-06, "loss": 0.29357135, "memory(GiB)": 34.88, "step": 104610, "train_speed(iter/s)": 0.41132 }, { "acc": 0.93497829, "epoch": 2.8325616657190045, "grad_norm": 6.855649948120117, "learning_rate": 2.1543775661975457e-06, "loss": 0.40663409, "memory(GiB)": 34.88, "step": 104615, "train_speed(iter/s)": 0.411321 }, { "acc": 0.9330286, "epoch": 2.8326970460022203, "grad_norm": 7.338598251342773, "learning_rate": 2.153917542826467e-06, "loss": 0.3508625, "memory(GiB)": 34.88, "step": 104620, "train_speed(iter/s)": 0.411322 }, { "acc": 0.95818501, "epoch": 2.8328324262854356, "grad_norm": 4.47335147857666, "learning_rate": 2.153457555114427e-06, "loss": 0.22202339, "memory(GiB)": 34.88, "step": 104625, "train_speed(iter/s)": 0.411323 }, { "acc": 0.94901028, "epoch": 2.8329678065686514, "grad_norm": 3.043818235397339, "learning_rate": 2.152997603067188e-06, "loss": 0.32805314, "memory(GiB)": 34.88, "step": 104630, "train_speed(iter/s)": 0.411324 }, { "acc": 0.94846706, "epoch": 2.833103186851867, "grad_norm": 7.301961898803711, "learning_rate": 2.152537686690514e-06, "loss": 0.26601605, "memory(GiB)": 34.88, "step": 104635, "train_speed(iter/s)": 0.411325 }, { "acc": 0.933778, "epoch": 2.8332385671350826, "grad_norm": 8.46541976928711, "learning_rate": 2.1520778059901633e-06, "loss": 0.4213829, "memory(GiB)": 34.88, "step": 104640, "train_speed(iter/s)": 0.411326 }, { "acc": 0.94079275, "epoch": 2.833373947418298, "grad_norm": 6.5116424560546875, "learning_rate": 2.1516179609718956e-06, "loss": 0.37691085, "memory(GiB)": 34.88, "step": 104645, "train_speed(iter/s)": 0.411327 }, { "acc": 0.94510403, "epoch": 2.8335093277015133, "grad_norm": 4.391505241394043, "learning_rate": 2.1511581516414756e-06, "loss": 0.28339834, "memory(GiB)": 34.88, "step": 104650, "train_speed(iter/s)": 0.411328 }, { "acc": 0.94799786, "epoch": 2.833644707984729, "grad_norm": 9.546110153198242, "learning_rate": 2.1506983780046602e-06, "loss": 0.28510618, "memory(GiB)": 34.88, "step": 104655, "train_speed(iter/s)": 0.411329 }, { "acc": 0.94870224, "epoch": 2.833780088267945, "grad_norm": 5.263004302978516, "learning_rate": 2.1502386400672097e-06, "loss": 0.31081667, "memory(GiB)": 34.88, "step": 104660, "train_speed(iter/s)": 0.41133 }, { "acc": 0.94493389, "epoch": 2.8339154685511603, "grad_norm": 4.098760604858398, "learning_rate": 2.1497789378348843e-06, "loss": 0.25593722, "memory(GiB)": 34.88, "step": 104665, "train_speed(iter/s)": 0.411331 }, { "acc": 0.94608393, "epoch": 2.8340508488343756, "grad_norm": 8.031844139099121, "learning_rate": 2.1493192713134425e-06, "loss": 0.28105736, "memory(GiB)": 34.88, "step": 104670, "train_speed(iter/s)": 0.411332 }, { "acc": 0.95168114, "epoch": 2.8341862291175914, "grad_norm": 5.399776935577393, "learning_rate": 2.1488596405086407e-06, "loss": 0.27727704, "memory(GiB)": 34.88, "step": 104675, "train_speed(iter/s)": 0.411333 }, { "acc": 0.93239498, "epoch": 2.834321609400807, "grad_norm": 7.898688316345215, "learning_rate": 2.1484000454262377e-06, "loss": 0.3722805, "memory(GiB)": 34.88, "step": 104680, "train_speed(iter/s)": 0.411334 }, { "acc": 0.93123093, "epoch": 2.834456989684022, "grad_norm": 5.28262186050415, "learning_rate": 2.1479404860719926e-06, "loss": 0.44685802, "memory(GiB)": 34.88, "step": 104685, "train_speed(iter/s)": 0.411335 }, { "acc": 0.94667425, "epoch": 2.834592369967238, "grad_norm": 5.479551792144775, "learning_rate": 2.147480962451659e-06, "loss": 0.31614165, "memory(GiB)": 34.88, "step": 104690, "train_speed(iter/s)": 0.411336 }, { "acc": 0.94640102, "epoch": 2.8347277502504538, "grad_norm": 11.346068382263184, "learning_rate": 2.147021474570995e-06, "loss": 0.29772925, "memory(GiB)": 34.88, "step": 104695, "train_speed(iter/s)": 0.411337 }, { "acc": 0.93983927, "epoch": 2.834863130533669, "grad_norm": 6.880844593048096, "learning_rate": 2.1465620224357574e-06, "loss": 0.35574369, "memory(GiB)": 34.88, "step": 104700, "train_speed(iter/s)": 0.411338 }, { "acc": 0.94738302, "epoch": 2.8349985108168845, "grad_norm": 9.611832618713379, "learning_rate": 2.146102606051702e-06, "loss": 0.2582186, "memory(GiB)": 34.88, "step": 104705, "train_speed(iter/s)": 0.411339 }, { "acc": 0.91646729, "epoch": 2.8351338911001003, "grad_norm": 7.790877819061279, "learning_rate": 2.1456432254245806e-06, "loss": 0.45193472, "memory(GiB)": 34.88, "step": 104710, "train_speed(iter/s)": 0.41134 }, { "acc": 0.94288416, "epoch": 2.8352692713833156, "grad_norm": 6.503729820251465, "learning_rate": 2.145183880560151e-06, "loss": 0.37135344, "memory(GiB)": 34.88, "step": 104715, "train_speed(iter/s)": 0.411341 }, { "acc": 0.93973179, "epoch": 2.8354046516665314, "grad_norm": 8.768197059631348, "learning_rate": 2.144724571464168e-06, "loss": 0.38884938, "memory(GiB)": 34.88, "step": 104720, "train_speed(iter/s)": 0.411342 }, { "acc": 0.95376339, "epoch": 2.835540031949747, "grad_norm": 8.834362983703613, "learning_rate": 2.1442652981423816e-06, "loss": 0.27108068, "memory(GiB)": 34.88, "step": 104725, "train_speed(iter/s)": 0.411343 }, { "acc": 0.94268398, "epoch": 2.8356754122329626, "grad_norm": 8.54626750946045, "learning_rate": 2.1438060606005477e-06, "loss": 0.34950881, "memory(GiB)": 34.88, "step": 104730, "train_speed(iter/s)": 0.411344 }, { "acc": 0.92754898, "epoch": 2.835810792516178, "grad_norm": 6.761119365692139, "learning_rate": 2.143346858844419e-06, "loss": 0.42977419, "memory(GiB)": 34.88, "step": 104735, "train_speed(iter/s)": 0.411345 }, { "acc": 0.9318882, "epoch": 2.8359461727993933, "grad_norm": 7.217097282409668, "learning_rate": 2.1428876928797493e-06, "loss": 0.38414321, "memory(GiB)": 34.88, "step": 104740, "train_speed(iter/s)": 0.411346 }, { "acc": 0.93546076, "epoch": 2.836081553082609, "grad_norm": 3.3818957805633545, "learning_rate": 2.1424285627122866e-06, "loss": 0.3712718, "memory(GiB)": 34.88, "step": 104745, "train_speed(iter/s)": 0.411347 }, { "acc": 0.9445343, "epoch": 2.8362169333658245, "grad_norm": 22.09760093688965, "learning_rate": 2.1419694683477856e-06, "loss": 0.36793897, "memory(GiB)": 34.88, "step": 104750, "train_speed(iter/s)": 0.411348 }, { "acc": 0.93740425, "epoch": 2.8363523136490403, "grad_norm": 13.55535888671875, "learning_rate": 2.141510409791996e-06, "loss": 0.40179987, "memory(GiB)": 34.88, "step": 104755, "train_speed(iter/s)": 0.411348 }, { "acc": 0.94786987, "epoch": 2.8364876939322556, "grad_norm": 9.123745918273926, "learning_rate": 2.1410513870506704e-06, "loss": 0.28115768, "memory(GiB)": 34.88, "step": 104760, "train_speed(iter/s)": 0.411349 }, { "acc": 0.94132671, "epoch": 2.8366230742154714, "grad_norm": 8.571935653686523, "learning_rate": 2.1405924001295565e-06, "loss": 0.32127981, "memory(GiB)": 34.88, "step": 104765, "train_speed(iter/s)": 0.411351 }, { "acc": 0.94908466, "epoch": 2.836758454498687, "grad_norm": 4.489231109619141, "learning_rate": 2.1401334490344045e-06, "loss": 0.22380974, "memory(GiB)": 34.88, "step": 104770, "train_speed(iter/s)": 0.411352 }, { "acc": 0.93906345, "epoch": 2.836893834781902, "grad_norm": 3.739760637283325, "learning_rate": 2.139674533770966e-06, "loss": 0.37100606, "memory(GiB)": 34.88, "step": 104775, "train_speed(iter/s)": 0.411352 }, { "acc": 0.92539482, "epoch": 2.837029215065118, "grad_norm": 11.16180419921875, "learning_rate": 2.1392156543449864e-06, "loss": 0.40412335, "memory(GiB)": 34.88, "step": 104780, "train_speed(iter/s)": 0.411353 }, { "acc": 0.94507961, "epoch": 2.8371645953483333, "grad_norm": 8.168249130249023, "learning_rate": 2.1387568107622153e-06, "loss": 0.35314987, "memory(GiB)": 34.88, "step": 104785, "train_speed(iter/s)": 0.411354 }, { "acc": 0.9499773, "epoch": 2.837299975631549, "grad_norm": 9.554969787597656, "learning_rate": 2.138298003028401e-06, "loss": 0.27110896, "memory(GiB)": 34.88, "step": 104790, "train_speed(iter/s)": 0.411355 }, { "acc": 0.93956461, "epoch": 2.8374353559147645, "grad_norm": 8.7901611328125, "learning_rate": 2.137839231149293e-06, "loss": 0.33718038, "memory(GiB)": 34.88, "step": 104795, "train_speed(iter/s)": 0.411356 }, { "acc": 0.93330717, "epoch": 2.8375707361979803, "grad_norm": 11.021798133850098, "learning_rate": 2.1373804951306347e-06, "loss": 0.40326986, "memory(GiB)": 34.88, "step": 104800, "train_speed(iter/s)": 0.411357 }, { "acc": 0.93660803, "epoch": 2.8377061164811956, "grad_norm": 7.082917213439941, "learning_rate": 2.136921794978174e-06, "loss": 0.38897042, "memory(GiB)": 34.88, "step": 104805, "train_speed(iter/s)": 0.411358 }, { "acc": 0.93773117, "epoch": 2.837841496764411, "grad_norm": 9.48172664642334, "learning_rate": 2.1364631306976583e-06, "loss": 0.3920855, "memory(GiB)": 34.88, "step": 104810, "train_speed(iter/s)": 0.411359 }, { "acc": 0.9355526, "epoch": 2.837976877047627, "grad_norm": 6.081124305725098, "learning_rate": 2.136004502294831e-06, "loss": 0.3408215, "memory(GiB)": 34.88, "step": 104815, "train_speed(iter/s)": 0.41136 }, { "acc": 0.92345524, "epoch": 2.8381122573308426, "grad_norm": 5.595930576324463, "learning_rate": 2.135545909775439e-06, "loss": 0.49890532, "memory(GiB)": 34.88, "step": 104820, "train_speed(iter/s)": 0.411361 }, { "acc": 0.93977146, "epoch": 2.838247637614058, "grad_norm": 10.211642265319824, "learning_rate": 2.1350873531452258e-06, "loss": 0.32464738, "memory(GiB)": 34.88, "step": 104825, "train_speed(iter/s)": 0.411362 }, { "acc": 0.93444128, "epoch": 2.8383830178972733, "grad_norm": 8.665472030639648, "learning_rate": 2.134628832409939e-06, "loss": 0.38902977, "memory(GiB)": 34.88, "step": 104830, "train_speed(iter/s)": 0.411363 }, { "acc": 0.93662186, "epoch": 2.838518398180489, "grad_norm": 11.613149642944336, "learning_rate": 2.134170347575318e-06, "loss": 0.35023477, "memory(GiB)": 34.88, "step": 104835, "train_speed(iter/s)": 0.411364 }, { "acc": 0.9364006, "epoch": 2.8386537784637045, "grad_norm": 8.71316909790039, "learning_rate": 2.133711898647108e-06, "loss": 0.38287199, "memory(GiB)": 34.88, "step": 104840, "train_speed(iter/s)": 0.411365 }, { "acc": 0.92743883, "epoch": 2.83878915874692, "grad_norm": 7.456440448760986, "learning_rate": 2.1332534856310536e-06, "loss": 0.40521011, "memory(GiB)": 34.88, "step": 104845, "train_speed(iter/s)": 0.411366 }, { "acc": 0.93911152, "epoch": 2.8389245390301356, "grad_norm": 9.16490650177002, "learning_rate": 2.1327951085328944e-06, "loss": 0.34528661, "memory(GiB)": 34.88, "step": 104850, "train_speed(iter/s)": 0.411367 }, { "acc": 0.93788366, "epoch": 2.8390599193133514, "grad_norm": 5.754271984100342, "learning_rate": 2.1323367673583745e-06, "loss": 0.37612066, "memory(GiB)": 34.88, "step": 104855, "train_speed(iter/s)": 0.411368 }, { "acc": 0.94413252, "epoch": 2.839195299596567, "grad_norm": 9.269713401794434, "learning_rate": 2.131878462113235e-06, "loss": 0.36811476, "memory(GiB)": 34.88, "step": 104860, "train_speed(iter/s)": 0.411369 }, { "acc": 0.94977493, "epoch": 2.839330679879782, "grad_norm": 3.1064493656158447, "learning_rate": 2.131420192803219e-06, "loss": 0.31096239, "memory(GiB)": 34.88, "step": 104865, "train_speed(iter/s)": 0.41137 }, { "acc": 0.9345295, "epoch": 2.839466060162998, "grad_norm": 8.025156021118164, "learning_rate": 2.1309619594340626e-06, "loss": 0.3684577, "memory(GiB)": 34.88, "step": 104870, "train_speed(iter/s)": 0.411371 }, { "acc": 0.93168297, "epoch": 2.8396014404462133, "grad_norm": 7.997061729431152, "learning_rate": 2.130503762011509e-06, "loss": 0.41300673, "memory(GiB)": 34.88, "step": 104875, "train_speed(iter/s)": 0.411372 }, { "acc": 0.92908039, "epoch": 2.839736820729429, "grad_norm": 10.565211296081543, "learning_rate": 2.1300456005412987e-06, "loss": 0.44628682, "memory(GiB)": 34.88, "step": 104880, "train_speed(iter/s)": 0.411373 }, { "acc": 0.93701096, "epoch": 2.8398722010126445, "grad_norm": 3.810939073562622, "learning_rate": 2.1295874750291713e-06, "loss": 0.35651536, "memory(GiB)": 34.88, "step": 104885, "train_speed(iter/s)": 0.411374 }, { "acc": 0.94494505, "epoch": 2.8400075812958603, "grad_norm": 6.635044574737549, "learning_rate": 2.1291293854808626e-06, "loss": 0.27701876, "memory(GiB)": 34.88, "step": 104890, "train_speed(iter/s)": 0.411375 }, { "acc": 0.93155556, "epoch": 2.8401429615790756, "grad_norm": 16.073118209838867, "learning_rate": 2.1286713319021137e-06, "loss": 0.46541314, "memory(GiB)": 34.88, "step": 104895, "train_speed(iter/s)": 0.411376 }, { "acc": 0.94171648, "epoch": 2.840278341862291, "grad_norm": 20.08548927307129, "learning_rate": 2.1282133142986634e-06, "loss": 0.37621403, "memory(GiB)": 34.88, "step": 104900, "train_speed(iter/s)": 0.411377 }, { "acc": 0.93270969, "epoch": 2.840413722145507, "grad_norm": 9.004738807678223, "learning_rate": 2.1277553326762465e-06, "loss": 0.33721535, "memory(GiB)": 34.88, "step": 104905, "train_speed(iter/s)": 0.411378 }, { "acc": 0.94303236, "epoch": 2.840549102428722, "grad_norm": 7.458360195159912, "learning_rate": 2.1272973870406007e-06, "loss": 0.31822977, "memory(GiB)": 34.88, "step": 104910, "train_speed(iter/s)": 0.411379 }, { "acc": 0.94441633, "epoch": 2.840684482711938, "grad_norm": 4.8626389503479, "learning_rate": 2.126839477397464e-06, "loss": 0.34335063, "memory(GiB)": 34.88, "step": 104915, "train_speed(iter/s)": 0.41138 }, { "acc": 0.93975315, "epoch": 2.8408198629951533, "grad_norm": 6.746772289276123, "learning_rate": 2.126381603752574e-06, "loss": 0.32422569, "memory(GiB)": 34.88, "step": 104920, "train_speed(iter/s)": 0.411381 }, { "acc": 0.94427757, "epoch": 2.840955243278369, "grad_norm": 41.61617660522461, "learning_rate": 2.1259237661116604e-06, "loss": 0.30168891, "memory(GiB)": 34.88, "step": 104925, "train_speed(iter/s)": 0.411381 }, { "acc": 0.9574091, "epoch": 2.8410906235615845, "grad_norm": 5.233633041381836, "learning_rate": 2.125465964480467e-06, "loss": 0.23695092, "memory(GiB)": 34.88, "step": 104930, "train_speed(iter/s)": 0.411383 }, { "acc": 0.93225412, "epoch": 2.8412260038448, "grad_norm": 16.231578826904297, "learning_rate": 2.125008198864724e-06, "loss": 0.36032374, "memory(GiB)": 34.88, "step": 104935, "train_speed(iter/s)": 0.411383 }, { "acc": 0.94425831, "epoch": 2.8413613841280156, "grad_norm": 3.6120543479919434, "learning_rate": 2.1245504692701643e-06, "loss": 0.36017373, "memory(GiB)": 34.88, "step": 104940, "train_speed(iter/s)": 0.411384 }, { "acc": 0.94653273, "epoch": 2.841496764411231, "grad_norm": 3.818718671798706, "learning_rate": 2.1240927757025235e-06, "loss": 0.28769784, "memory(GiB)": 34.88, "step": 104945, "train_speed(iter/s)": 0.411385 }, { "acc": 0.93337784, "epoch": 2.841632144694447, "grad_norm": 5.142622470855713, "learning_rate": 2.123635118167536e-06, "loss": 0.3700428, "memory(GiB)": 34.88, "step": 104950, "train_speed(iter/s)": 0.411386 }, { "acc": 0.93108273, "epoch": 2.841767524977662, "grad_norm": 6.342361927032471, "learning_rate": 2.1231774966709348e-06, "loss": 0.37778091, "memory(GiB)": 34.88, "step": 104955, "train_speed(iter/s)": 0.411387 }, { "acc": 0.93192463, "epoch": 2.841902905260878, "grad_norm": 6.229697227478027, "learning_rate": 2.122719911218449e-06, "loss": 0.40245061, "memory(GiB)": 34.88, "step": 104960, "train_speed(iter/s)": 0.411388 }, { "acc": 0.92394447, "epoch": 2.8420382855440933, "grad_norm": 10.430146217346191, "learning_rate": 2.1222623618158174e-06, "loss": 0.41282997, "memory(GiB)": 34.88, "step": 104965, "train_speed(iter/s)": 0.411389 }, { "acc": 0.94986992, "epoch": 2.8421736658273087, "grad_norm": 6.13472318649292, "learning_rate": 2.121804848468768e-06, "loss": 0.25193949, "memory(GiB)": 34.88, "step": 104970, "train_speed(iter/s)": 0.41139 }, { "acc": 0.9278862, "epoch": 2.8423090461105245, "grad_norm": 5.504405498504639, "learning_rate": 2.1213473711830298e-06, "loss": 0.37539642, "memory(GiB)": 34.88, "step": 104975, "train_speed(iter/s)": 0.411391 }, { "acc": 0.94107265, "epoch": 2.8424444263937403, "grad_norm": 6.686820030212402, "learning_rate": 2.1208899299643357e-06, "loss": 0.33644073, "memory(GiB)": 34.88, "step": 104980, "train_speed(iter/s)": 0.411392 }, { "acc": 0.92877684, "epoch": 2.8425798066769556, "grad_norm": 5.973801136016846, "learning_rate": 2.1204325248184167e-06, "loss": 0.28655815, "memory(GiB)": 34.88, "step": 104985, "train_speed(iter/s)": 0.411393 }, { "acc": 0.93996563, "epoch": 2.842715186960171, "grad_norm": 7.325125694274902, "learning_rate": 2.1199751557510033e-06, "loss": 0.31443572, "memory(GiB)": 34.88, "step": 104990, "train_speed(iter/s)": 0.411394 }, { "acc": 0.94718819, "epoch": 2.842850567243387, "grad_norm": 6.3101115226745605, "learning_rate": 2.1195178227678225e-06, "loss": 0.35401096, "memory(GiB)": 34.88, "step": 104995, "train_speed(iter/s)": 0.411395 }, { "acc": 0.94326305, "epoch": 2.842985947526602, "grad_norm": 8.68287467956543, "learning_rate": 2.1190605258746043e-06, "loss": 0.30983648, "memory(GiB)": 34.88, "step": 105000, "train_speed(iter/s)": 0.411396 }, { "acc": 0.93837709, "epoch": 2.8431213278098175, "grad_norm": 10.846402168273926, "learning_rate": 2.1186032650770777e-06, "loss": 0.38860245, "memory(GiB)": 34.88, "step": 105005, "train_speed(iter/s)": 0.411397 }, { "acc": 0.92856398, "epoch": 2.8432567080930333, "grad_norm": 8.697196006774902, "learning_rate": 2.1181460403809724e-06, "loss": 0.43179159, "memory(GiB)": 34.88, "step": 105010, "train_speed(iter/s)": 0.411398 }, { "acc": 0.93888359, "epoch": 2.843392088376249, "grad_norm": 8.095094680786133, "learning_rate": 2.1176888517920126e-06, "loss": 0.28234148, "memory(GiB)": 34.88, "step": 105015, "train_speed(iter/s)": 0.411399 }, { "acc": 0.93687878, "epoch": 2.8435274686594645, "grad_norm": 5.943717956542969, "learning_rate": 2.117231699315927e-06, "loss": 0.33639114, "memory(GiB)": 34.88, "step": 105020, "train_speed(iter/s)": 0.4114 }, { "acc": 0.92749329, "epoch": 2.84366284894268, "grad_norm": 8.095602035522461, "learning_rate": 2.116774582958444e-06, "loss": 0.4117105, "memory(GiB)": 34.88, "step": 105025, "train_speed(iter/s)": 0.411401 }, { "acc": 0.91865807, "epoch": 2.8437982292258956, "grad_norm": 12.341069221496582, "learning_rate": 2.116317502725287e-06, "loss": 0.38622055, "memory(GiB)": 34.88, "step": 105030, "train_speed(iter/s)": 0.411402 }, { "acc": 0.94059582, "epoch": 2.843933609509111, "grad_norm": 6.107967853546143, "learning_rate": 2.1158604586221832e-06, "loss": 0.43431187, "memory(GiB)": 34.88, "step": 105035, "train_speed(iter/s)": 0.411403 }, { "acc": 0.93330812, "epoch": 2.844068989792327, "grad_norm": 9.372967720031738, "learning_rate": 2.115403450654858e-06, "loss": 0.35938411, "memory(GiB)": 34.88, "step": 105040, "train_speed(iter/s)": 0.411404 }, { "acc": 0.94939823, "epoch": 2.844204370075542, "grad_norm": 7.104184627532959, "learning_rate": 2.1149464788290374e-06, "loss": 0.28655472, "memory(GiB)": 34.88, "step": 105045, "train_speed(iter/s)": 0.411405 }, { "acc": 0.94123878, "epoch": 2.844339750358758, "grad_norm": 5.617603302001953, "learning_rate": 2.1144895431504412e-06, "loss": 0.34264932, "memory(GiB)": 34.88, "step": 105050, "train_speed(iter/s)": 0.411406 }, { "acc": 0.93564167, "epoch": 2.8444751306419733, "grad_norm": 4.834423542022705, "learning_rate": 2.1140326436248015e-06, "loss": 0.32524667, "memory(GiB)": 34.88, "step": 105055, "train_speed(iter/s)": 0.411407 }, { "acc": 0.94381161, "epoch": 2.8446105109251887, "grad_norm": 5.5657057762146, "learning_rate": 2.113575780257837e-06, "loss": 0.30682716, "memory(GiB)": 34.88, "step": 105060, "train_speed(iter/s)": 0.411408 }, { "acc": 0.93454666, "epoch": 2.8447458912084045, "grad_norm": 20.034509658813477, "learning_rate": 2.113118953055269e-06, "loss": 0.45077615, "memory(GiB)": 34.88, "step": 105065, "train_speed(iter/s)": 0.411409 }, { "acc": 0.94974918, "epoch": 2.84488127149162, "grad_norm": 6.694456577301025, "learning_rate": 2.1126621620228223e-06, "loss": 0.27827942, "memory(GiB)": 34.88, "step": 105070, "train_speed(iter/s)": 0.41141 }, { "acc": 0.95367413, "epoch": 2.8450166517748356, "grad_norm": 4.200068473815918, "learning_rate": 2.1122054071662193e-06, "loss": 0.23035188, "memory(GiB)": 34.88, "step": 105075, "train_speed(iter/s)": 0.411411 }, { "acc": 0.94372768, "epoch": 2.845152032058051, "grad_norm": 10.822652816772461, "learning_rate": 2.1117486884911834e-06, "loss": 0.31835792, "memory(GiB)": 34.88, "step": 105080, "train_speed(iter/s)": 0.411412 }, { "acc": 0.93259315, "epoch": 2.845287412341267, "grad_norm": 3.788296937942505, "learning_rate": 2.11129200600343e-06, "loss": 0.36181977, "memory(GiB)": 34.88, "step": 105085, "train_speed(iter/s)": 0.411413 }, { "acc": 0.95391493, "epoch": 2.845422792624482, "grad_norm": 6.644550800323486, "learning_rate": 2.1108353597086886e-06, "loss": 0.21389151, "memory(GiB)": 34.88, "step": 105090, "train_speed(iter/s)": 0.411414 }, { "acc": 0.94632301, "epoch": 2.8455581729076975, "grad_norm": 4.331570625305176, "learning_rate": 2.1103787496126725e-06, "loss": 0.34662087, "memory(GiB)": 34.88, "step": 105095, "train_speed(iter/s)": 0.411415 }, { "acc": 0.92300034, "epoch": 2.8456935531909133, "grad_norm": 15.84642505645752, "learning_rate": 2.109922175721106e-06, "loss": 0.41273456, "memory(GiB)": 34.88, "step": 105100, "train_speed(iter/s)": 0.411416 }, { "acc": 0.93833122, "epoch": 2.8458289334741287, "grad_norm": 6.593094348907471, "learning_rate": 2.109465638039705e-06, "loss": 0.30535979, "memory(GiB)": 34.88, "step": 105105, "train_speed(iter/s)": 0.411417 }, { "acc": 0.95669909, "epoch": 2.8459643137573445, "grad_norm": 4.8729166984558105, "learning_rate": 2.1090091365741907e-06, "loss": 0.23918843, "memory(GiB)": 34.88, "step": 105110, "train_speed(iter/s)": 0.411418 }, { "acc": 0.94246235, "epoch": 2.84609969404056, "grad_norm": 3.535533905029297, "learning_rate": 2.108552671330282e-06, "loss": 0.31451402, "memory(GiB)": 34.88, "step": 105115, "train_speed(iter/s)": 0.411419 }, { "acc": 0.94203854, "epoch": 2.8462350743237756, "grad_norm": 19.89596939086914, "learning_rate": 2.1080962423136935e-06, "loss": 0.41268439, "memory(GiB)": 34.88, "step": 105120, "train_speed(iter/s)": 0.411421 }, { "acc": 0.93829994, "epoch": 2.846370454606991, "grad_norm": 16.838274002075195, "learning_rate": 2.1076398495301486e-06, "loss": 0.41944318, "memory(GiB)": 34.88, "step": 105125, "train_speed(iter/s)": 0.411422 }, { "acc": 0.93558207, "epoch": 2.8465058348902064, "grad_norm": 9.232636451721191, "learning_rate": 2.1071834929853604e-06, "loss": 0.38871698, "memory(GiB)": 34.88, "step": 105130, "train_speed(iter/s)": 0.411423 }, { "acc": 0.92799616, "epoch": 2.846641215173422, "grad_norm": 17.772537231445312, "learning_rate": 2.106727172685048e-06, "loss": 0.39094832, "memory(GiB)": 34.88, "step": 105135, "train_speed(iter/s)": 0.411424 }, { "acc": 0.93809128, "epoch": 2.846776595456638, "grad_norm": 8.364450454711914, "learning_rate": 2.1062708886349252e-06, "loss": 0.37119174, "memory(GiB)": 34.88, "step": 105140, "train_speed(iter/s)": 0.411425 }, { "acc": 0.94039288, "epoch": 2.8469119757398533, "grad_norm": 5.022745609283447, "learning_rate": 2.105814640840709e-06, "loss": 0.31662409, "memory(GiB)": 34.88, "step": 105145, "train_speed(iter/s)": 0.411426 }, { "acc": 0.93479214, "epoch": 2.8470473560230687, "grad_norm": 5.281081199645996, "learning_rate": 2.1053584293081168e-06, "loss": 0.39374034, "memory(GiB)": 34.88, "step": 105150, "train_speed(iter/s)": 0.411427 }, { "acc": 0.92571411, "epoch": 2.8471827363062845, "grad_norm": 12.245687484741211, "learning_rate": 2.104902254042858e-06, "loss": 0.42245297, "memory(GiB)": 34.88, "step": 105155, "train_speed(iter/s)": 0.411428 }, { "acc": 0.92809124, "epoch": 2.8473181165895, "grad_norm": 8.21475887298584, "learning_rate": 2.104446115050654e-06, "loss": 0.41819549, "memory(GiB)": 34.88, "step": 105160, "train_speed(iter/s)": 0.411429 }, { "acc": 0.92647047, "epoch": 2.847453496872715, "grad_norm": 12.187243461608887, "learning_rate": 2.1039900123372147e-06, "loss": 0.46575441, "memory(GiB)": 34.88, "step": 105165, "train_speed(iter/s)": 0.41143 }, { "acc": 0.95094481, "epoch": 2.847588877155931, "grad_norm": 14.727266311645508, "learning_rate": 2.103533945908256e-06, "loss": 0.25573132, "memory(GiB)": 34.88, "step": 105170, "train_speed(iter/s)": 0.411431 }, { "acc": 0.93354521, "epoch": 2.847724257439147, "grad_norm": 9.87153148651123, "learning_rate": 2.1030779157694865e-06, "loss": 0.43402166, "memory(GiB)": 34.88, "step": 105175, "train_speed(iter/s)": 0.411432 }, { "acc": 0.94108305, "epoch": 2.847859637722362, "grad_norm": 101.46599578857422, "learning_rate": 2.102621921926625e-06, "loss": 0.28063979, "memory(GiB)": 34.88, "step": 105180, "train_speed(iter/s)": 0.411433 }, { "acc": 0.9364172, "epoch": 2.8479950180055775, "grad_norm": 7.498562335968018, "learning_rate": 2.102165964385381e-06, "loss": 0.33446403, "memory(GiB)": 34.88, "step": 105185, "train_speed(iter/s)": 0.411434 }, { "acc": 0.93602257, "epoch": 2.8481303982887933, "grad_norm": 8.257015228271484, "learning_rate": 2.1017100431514624e-06, "loss": 0.34313982, "memory(GiB)": 34.88, "step": 105190, "train_speed(iter/s)": 0.411435 }, { "acc": 0.92967892, "epoch": 2.8482657785720087, "grad_norm": 5.607168197631836, "learning_rate": 2.1012541582305874e-06, "loss": 0.33184705, "memory(GiB)": 34.88, "step": 105195, "train_speed(iter/s)": 0.411436 }, { "acc": 0.93224449, "epoch": 2.8484011588552245, "grad_norm": 11.38511848449707, "learning_rate": 2.100798309628462e-06, "loss": 0.34301372, "memory(GiB)": 34.88, "step": 105200, "train_speed(iter/s)": 0.411437 }, { "acc": 0.9446106, "epoch": 2.84853653913844, "grad_norm": 8.54167652130127, "learning_rate": 2.1003424973508007e-06, "loss": 0.30209217, "memory(GiB)": 34.88, "step": 105205, "train_speed(iter/s)": 0.411438 }, { "acc": 0.93747082, "epoch": 2.8486719194216557, "grad_norm": 9.127789497375488, "learning_rate": 2.0998867214033063e-06, "loss": 0.35678082, "memory(GiB)": 34.88, "step": 105210, "train_speed(iter/s)": 0.411439 }, { "acc": 0.92847652, "epoch": 2.848807299704871, "grad_norm": 10.297313690185547, "learning_rate": 2.099430981791697e-06, "loss": 0.39568934, "memory(GiB)": 34.88, "step": 105215, "train_speed(iter/s)": 0.41144 }, { "acc": 0.94643183, "epoch": 2.8489426799880864, "grad_norm": 9.096505165100098, "learning_rate": 2.0989752785216754e-06, "loss": 0.27207239, "memory(GiB)": 34.88, "step": 105220, "train_speed(iter/s)": 0.411442 }, { "acc": 0.93487206, "epoch": 2.849078060271302, "grad_norm": 9.804119110107422, "learning_rate": 2.098519611598953e-06, "loss": 0.32261949, "memory(GiB)": 34.88, "step": 105225, "train_speed(iter/s)": 0.411442 }, { "acc": 0.92708387, "epoch": 2.8492134405545175, "grad_norm": 8.462471961975098, "learning_rate": 2.0980639810292395e-06, "loss": 0.36173921, "memory(GiB)": 34.88, "step": 105230, "train_speed(iter/s)": 0.411444 }, { "acc": 0.94988823, "epoch": 2.8493488208377333, "grad_norm": 8.416792869567871, "learning_rate": 2.097608386818238e-06, "loss": 0.26292233, "memory(GiB)": 34.88, "step": 105235, "train_speed(iter/s)": 0.411445 }, { "acc": 0.93901587, "epoch": 2.8494842011209487, "grad_norm": 5.047799110412598, "learning_rate": 2.09715282897166e-06, "loss": 0.33693714, "memory(GiB)": 34.88, "step": 105240, "train_speed(iter/s)": 0.411446 }, { "acc": 0.93700161, "epoch": 2.8496195814041645, "grad_norm": 2.5451467037200928, "learning_rate": 2.096697307495207e-06, "loss": 0.33967509, "memory(GiB)": 34.88, "step": 105245, "train_speed(iter/s)": 0.411447 }, { "acc": 0.94584856, "epoch": 2.84975496168738, "grad_norm": 9.686968803405762, "learning_rate": 2.0962418223945922e-06, "loss": 0.33390436, "memory(GiB)": 34.88, "step": 105250, "train_speed(iter/s)": 0.411448 }, { "acc": 0.92211533, "epoch": 2.849890341970595, "grad_norm": 7.885850429534912, "learning_rate": 2.095786373675516e-06, "loss": 0.45457325, "memory(GiB)": 34.88, "step": 105255, "train_speed(iter/s)": 0.411449 }, { "acc": 0.93715086, "epoch": 2.850025722253811, "grad_norm": 6.983311176300049, "learning_rate": 2.095330961343686e-06, "loss": 0.30981236, "memory(GiB)": 34.88, "step": 105260, "train_speed(iter/s)": 0.41145 }, { "acc": 0.95060539, "epoch": 2.8501611025370264, "grad_norm": 6.342369556427002, "learning_rate": 2.094875585404808e-06, "loss": 0.25485024, "memory(GiB)": 34.88, "step": 105265, "train_speed(iter/s)": 0.411451 }, { "acc": 0.92171869, "epoch": 2.850296482820242, "grad_norm": 8.123790740966797, "learning_rate": 2.0944202458645838e-06, "loss": 0.48527632, "memory(GiB)": 34.88, "step": 105270, "train_speed(iter/s)": 0.411452 }, { "acc": 0.9468008, "epoch": 2.8504318631034575, "grad_norm": 7.536987781524658, "learning_rate": 2.0939649427287214e-06, "loss": 0.28686249, "memory(GiB)": 34.88, "step": 105275, "train_speed(iter/s)": 0.411453 }, { "acc": 0.93306866, "epoch": 2.8505672433866733, "grad_norm": 6.727200031280518, "learning_rate": 2.0935096760029175e-06, "loss": 0.35112634, "memory(GiB)": 34.88, "step": 105280, "train_speed(iter/s)": 0.411454 }, { "acc": 0.94020185, "epoch": 2.8507026236698887, "grad_norm": 10.810639381408691, "learning_rate": 2.0930544456928837e-06, "loss": 0.38079944, "memory(GiB)": 34.88, "step": 105285, "train_speed(iter/s)": 0.411455 }, { "acc": 0.93370838, "epoch": 2.850838003953104, "grad_norm": 11.48106861114502, "learning_rate": 2.0925992518043162e-06, "loss": 0.36612878, "memory(GiB)": 34.88, "step": 105290, "train_speed(iter/s)": 0.411456 }, { "acc": 0.93241119, "epoch": 2.85097338423632, "grad_norm": 15.354435920715332, "learning_rate": 2.0921440943429204e-06, "loss": 0.39609847, "memory(GiB)": 34.88, "step": 105295, "train_speed(iter/s)": 0.411457 }, { "acc": 0.93532753, "epoch": 2.851108764519535, "grad_norm": 6.146082401275635, "learning_rate": 2.0916889733143974e-06, "loss": 0.29536324, "memory(GiB)": 34.88, "step": 105300, "train_speed(iter/s)": 0.411458 }, { "acc": 0.95025463, "epoch": 2.851244144802751, "grad_norm": 6.645822525024414, "learning_rate": 2.091233888724449e-06, "loss": 0.33154168, "memory(GiB)": 34.88, "step": 105305, "train_speed(iter/s)": 0.411459 }, { "acc": 0.94636564, "epoch": 2.8513795250859664, "grad_norm": 8.861891746520996, "learning_rate": 2.090778840578776e-06, "loss": 0.30302017, "memory(GiB)": 34.88, "step": 105310, "train_speed(iter/s)": 0.41146 }, { "acc": 0.94759998, "epoch": 2.851514905369182, "grad_norm": 4.311059474945068, "learning_rate": 2.090323828883075e-06, "loss": 0.24873495, "memory(GiB)": 34.88, "step": 105315, "train_speed(iter/s)": 0.411461 }, { "acc": 0.927285, "epoch": 2.8516502856523975, "grad_norm": 9.081857681274414, "learning_rate": 2.089868853643052e-06, "loss": 0.40381284, "memory(GiB)": 34.88, "step": 105320, "train_speed(iter/s)": 0.411462 }, { "acc": 0.95422792, "epoch": 2.851785665935613, "grad_norm": 5.942281723022461, "learning_rate": 2.0894139148644015e-06, "loss": 0.26394157, "memory(GiB)": 34.88, "step": 105325, "train_speed(iter/s)": 0.411463 }, { "acc": 0.93106117, "epoch": 2.8519210462188287, "grad_norm": 5.187885284423828, "learning_rate": 2.0889590125528243e-06, "loss": 0.37243092, "memory(GiB)": 34.88, "step": 105330, "train_speed(iter/s)": 0.411464 }, { "acc": 0.9285141, "epoch": 2.8520564265020445, "grad_norm": 5.404172897338867, "learning_rate": 2.0885041467140196e-06, "loss": 0.38738437, "memory(GiB)": 34.88, "step": 105335, "train_speed(iter/s)": 0.411465 }, { "acc": 0.93740158, "epoch": 2.85219180678526, "grad_norm": 7.62860631942749, "learning_rate": 2.0880493173536873e-06, "loss": 0.31785603, "memory(GiB)": 34.88, "step": 105340, "train_speed(iter/s)": 0.411466 }, { "acc": 0.94321156, "epoch": 2.852327187068475, "grad_norm": 11.335250854492188, "learning_rate": 2.0875945244775205e-06, "loss": 0.28175411, "memory(GiB)": 34.88, "step": 105345, "train_speed(iter/s)": 0.411467 }, { "acc": 0.93716774, "epoch": 2.852462567351691, "grad_norm": 14.279989242553711, "learning_rate": 2.0871397680912184e-06, "loss": 0.3226366, "memory(GiB)": 34.88, "step": 105350, "train_speed(iter/s)": 0.411468 }, { "acc": 0.94807587, "epoch": 2.8525979476349064, "grad_norm": 8.457647323608398, "learning_rate": 2.08668504820048e-06, "loss": 0.28362157, "memory(GiB)": 34.88, "step": 105355, "train_speed(iter/s)": 0.411469 }, { "acc": 0.94095449, "epoch": 2.8527333279181217, "grad_norm": 9.739542961120605, "learning_rate": 2.0862303648109973e-06, "loss": 0.29968202, "memory(GiB)": 34.88, "step": 105360, "train_speed(iter/s)": 0.41147 }, { "acc": 0.94532452, "epoch": 2.8528687082013375, "grad_norm": 7.7367167472839355, "learning_rate": 2.0857757179284704e-06, "loss": 0.30760307, "memory(GiB)": 34.88, "step": 105365, "train_speed(iter/s)": 0.411471 }, { "acc": 0.92835827, "epoch": 2.8530040884845533, "grad_norm": 12.289098739624023, "learning_rate": 2.085321107558589e-06, "loss": 0.46629868, "memory(GiB)": 34.88, "step": 105370, "train_speed(iter/s)": 0.411472 }, { "acc": 0.93691292, "epoch": 2.8531394687677687, "grad_norm": 6.034179210662842, "learning_rate": 2.0848665337070554e-06, "loss": 0.28766003, "memory(GiB)": 34.88, "step": 105375, "train_speed(iter/s)": 0.411473 }, { "acc": 0.94096718, "epoch": 2.853274849050984, "grad_norm": 7.254546165466309, "learning_rate": 2.0844119963795582e-06, "loss": 0.36076326, "memory(GiB)": 34.88, "step": 105380, "train_speed(iter/s)": 0.411474 }, { "acc": 0.93374634, "epoch": 2.8534102293342, "grad_norm": 4.5451226234436035, "learning_rate": 2.083957495581793e-06, "loss": 0.31974669, "memory(GiB)": 34.88, "step": 105385, "train_speed(iter/s)": 0.411475 }, { "acc": 0.94473553, "epoch": 2.853545609617415, "grad_norm": 8.517327308654785, "learning_rate": 2.0835030313194556e-06, "loss": 0.30411391, "memory(GiB)": 34.88, "step": 105390, "train_speed(iter/s)": 0.411476 }, { "acc": 0.94300108, "epoch": 2.853680989900631, "grad_norm": 5.1728057861328125, "learning_rate": 2.0830486035982357e-06, "loss": 0.34593539, "memory(GiB)": 34.88, "step": 105395, "train_speed(iter/s)": 0.411477 }, { "acc": 0.9342267, "epoch": 2.8538163701838464, "grad_norm": 4.29435920715332, "learning_rate": 2.082594212423829e-06, "loss": 0.33935728, "memory(GiB)": 34.88, "step": 105400, "train_speed(iter/s)": 0.411478 }, { "acc": 0.93812628, "epoch": 2.853951750467062, "grad_norm": 8.40710735321045, "learning_rate": 2.0821398578019225e-06, "loss": 0.37375131, "memory(GiB)": 34.88, "step": 105405, "train_speed(iter/s)": 0.411479 }, { "acc": 0.92594271, "epoch": 2.8540871307502775, "grad_norm": 7.774704456329346, "learning_rate": 2.0816855397382143e-06, "loss": 0.35681312, "memory(GiB)": 34.88, "step": 105410, "train_speed(iter/s)": 0.41148 }, { "acc": 0.94725285, "epoch": 2.854222511033493, "grad_norm": 4.463424205780029, "learning_rate": 2.0812312582383912e-06, "loss": 0.33812706, "memory(GiB)": 34.88, "step": 105415, "train_speed(iter/s)": 0.411481 }, { "acc": 0.92130642, "epoch": 2.8543578913167087, "grad_norm": 8.51264476776123, "learning_rate": 2.0807770133081455e-06, "loss": 0.47704864, "memory(GiB)": 34.88, "step": 105420, "train_speed(iter/s)": 0.411482 }, { "acc": 0.92154465, "epoch": 2.854493271599924, "grad_norm": 5.548060894012451, "learning_rate": 2.080322804953168e-06, "loss": 0.42355695, "memory(GiB)": 34.88, "step": 105425, "train_speed(iter/s)": 0.411483 }, { "acc": 0.93556709, "epoch": 2.85462865188314, "grad_norm": 8.009673118591309, "learning_rate": 2.079868633179149e-06, "loss": 0.37442389, "memory(GiB)": 34.88, "step": 105430, "train_speed(iter/s)": 0.411484 }, { "acc": 0.93405132, "epoch": 2.854764032166355, "grad_norm": 12.00416088104248, "learning_rate": 2.0794144979917772e-06, "loss": 0.37653217, "memory(GiB)": 34.88, "step": 105435, "train_speed(iter/s)": 0.411485 }, { "acc": 0.95375748, "epoch": 2.854899412449571, "grad_norm": 2.7066822052001953, "learning_rate": 2.078960399396738e-06, "loss": 0.33923097, "memory(GiB)": 34.88, "step": 105440, "train_speed(iter/s)": 0.411486 }, { "acc": 0.92753649, "epoch": 2.8550347927327864, "grad_norm": 14.193950653076172, "learning_rate": 2.078506337399726e-06, "loss": 0.44005795, "memory(GiB)": 34.88, "step": 105445, "train_speed(iter/s)": 0.411487 }, { "acc": 0.93273973, "epoch": 2.8551701730160017, "grad_norm": 8.477027893066406, "learning_rate": 2.0780523120064246e-06, "loss": 0.38846269, "memory(GiB)": 34.88, "step": 105450, "train_speed(iter/s)": 0.411488 }, { "acc": 0.94436073, "epoch": 2.8553055532992175, "grad_norm": 6.680718898773193, "learning_rate": 2.0775983232225238e-06, "loss": 0.41381283, "memory(GiB)": 34.88, "step": 105455, "train_speed(iter/s)": 0.41149 }, { "acc": 0.94241171, "epoch": 2.855440933582433, "grad_norm": 13.691264152526855, "learning_rate": 2.077144371053709e-06, "loss": 0.31793759, "memory(GiB)": 34.88, "step": 105460, "train_speed(iter/s)": 0.411491 }, { "acc": 0.93434238, "epoch": 2.8555763138656487, "grad_norm": 4.571909427642822, "learning_rate": 2.0766904555056708e-06, "loss": 0.33830161, "memory(GiB)": 34.88, "step": 105465, "train_speed(iter/s)": 0.411492 }, { "acc": 0.94677725, "epoch": 2.855711694148864, "grad_norm": 3.9672691822052, "learning_rate": 2.0762365765840896e-06, "loss": 0.29481511, "memory(GiB)": 34.88, "step": 105470, "train_speed(iter/s)": 0.411493 }, { "acc": 0.93441238, "epoch": 2.85584707443208, "grad_norm": 14.096817016601562, "learning_rate": 2.0757827342946543e-06, "loss": 0.44769583, "memory(GiB)": 34.88, "step": 105475, "train_speed(iter/s)": 0.411494 }, { "acc": 0.9331871, "epoch": 2.855982454715295, "grad_norm": 10.178747177124023, "learning_rate": 2.0753289286430515e-06, "loss": 0.34061501, "memory(GiB)": 34.88, "step": 105480, "train_speed(iter/s)": 0.411494 }, { "acc": 0.95019493, "epoch": 2.8561178349985106, "grad_norm": 5.646490097045898, "learning_rate": 2.074875159634963e-06, "loss": 0.31055956, "memory(GiB)": 34.88, "step": 105485, "train_speed(iter/s)": 0.411496 }, { "acc": 0.94196634, "epoch": 2.8562532152817264, "grad_norm": 6.43131685256958, "learning_rate": 2.0744214272760738e-06, "loss": 0.32431788, "memory(GiB)": 34.88, "step": 105490, "train_speed(iter/s)": 0.411497 }, { "acc": 0.92234716, "epoch": 2.856388595564942, "grad_norm": 8.269560813903809, "learning_rate": 2.073967731572069e-06, "loss": 0.45501928, "memory(GiB)": 34.88, "step": 105495, "train_speed(iter/s)": 0.411498 }, { "acc": 0.94586468, "epoch": 2.8565239758481575, "grad_norm": 6.785626411437988, "learning_rate": 2.073514072528633e-06, "loss": 0.3067466, "memory(GiB)": 34.88, "step": 105500, "train_speed(iter/s)": 0.411499 }, { "acc": 0.95600033, "epoch": 2.856659356131373, "grad_norm": 3.9345149993896484, "learning_rate": 2.0730604501514453e-06, "loss": 0.24810336, "memory(GiB)": 34.88, "step": 105505, "train_speed(iter/s)": 0.4115 }, { "acc": 0.95226192, "epoch": 2.8567947364145887, "grad_norm": 7.989314079284668, "learning_rate": 2.072606864446191e-06, "loss": 0.27377911, "memory(GiB)": 34.88, "step": 105510, "train_speed(iter/s)": 0.411501 }, { "acc": 0.93205299, "epoch": 2.856930116697804, "grad_norm": 10.066457748413086, "learning_rate": 2.072153315418553e-06, "loss": 0.4517055, "memory(GiB)": 34.88, "step": 105515, "train_speed(iter/s)": 0.411502 }, { "acc": 0.94565401, "epoch": 2.8570654969810194, "grad_norm": 6.308096885681152, "learning_rate": 2.0716998030742092e-06, "loss": 0.33118083, "memory(GiB)": 34.88, "step": 105520, "train_speed(iter/s)": 0.411503 }, { "acc": 0.94294395, "epoch": 2.857200877264235, "grad_norm": 4.380621433258057, "learning_rate": 2.0712463274188436e-06, "loss": 0.31662056, "memory(GiB)": 34.88, "step": 105525, "train_speed(iter/s)": 0.411504 }, { "acc": 0.94105625, "epoch": 2.857336257547451, "grad_norm": 8.097200393676758, "learning_rate": 2.070792888458136e-06, "loss": 0.32281308, "memory(GiB)": 34.88, "step": 105530, "train_speed(iter/s)": 0.411505 }, { "acc": 0.93775139, "epoch": 2.8574716378306664, "grad_norm": 9.535921096801758, "learning_rate": 2.0703394861977684e-06, "loss": 0.35029907, "memory(GiB)": 34.88, "step": 105535, "train_speed(iter/s)": 0.411505 }, { "acc": 0.93836823, "epoch": 2.8576070181138817, "grad_norm": 7.391901969909668, "learning_rate": 2.069886120643417e-06, "loss": 0.33189216, "memory(GiB)": 34.88, "step": 105540, "train_speed(iter/s)": 0.411507 }, { "acc": 0.9324707, "epoch": 2.8577423983970975, "grad_norm": 3.111937999725342, "learning_rate": 2.0694327918007634e-06, "loss": 0.40775719, "memory(GiB)": 34.88, "step": 105545, "train_speed(iter/s)": 0.411507 }, { "acc": 0.93428621, "epoch": 2.857877778680313, "grad_norm": 5.819864749908447, "learning_rate": 2.0689794996754866e-06, "loss": 0.41082134, "memory(GiB)": 34.88, "step": 105550, "train_speed(iter/s)": 0.411509 }, { "acc": 0.94857788, "epoch": 2.8580131589635287, "grad_norm": 3.9080305099487305, "learning_rate": 2.0685262442732656e-06, "loss": 0.29481149, "memory(GiB)": 34.88, "step": 105555, "train_speed(iter/s)": 0.41151 }, { "acc": 0.94018888, "epoch": 2.858148539246744, "grad_norm": 11.597912788391113, "learning_rate": 2.0680730255997756e-06, "loss": 0.37516842, "memory(GiB)": 34.88, "step": 105560, "train_speed(iter/s)": 0.411511 }, { "acc": 0.9299819, "epoch": 2.85828391952996, "grad_norm": 3.9492409229278564, "learning_rate": 2.0676198436606963e-06, "loss": 0.35746899, "memory(GiB)": 34.88, "step": 105565, "train_speed(iter/s)": 0.411512 }, { "acc": 0.93151522, "epoch": 2.858419299813175, "grad_norm": 23.92377471923828, "learning_rate": 2.0671666984617045e-06, "loss": 0.42756443, "memory(GiB)": 34.88, "step": 105570, "train_speed(iter/s)": 0.411513 }, { "acc": 0.93174791, "epoch": 2.8585546800963906, "grad_norm": 4.248964786529541, "learning_rate": 2.0667135900084754e-06, "loss": 0.35668566, "memory(GiB)": 34.88, "step": 105575, "train_speed(iter/s)": 0.411514 }, { "acc": 0.93458366, "epoch": 2.8586900603796064, "grad_norm": 12.18000602722168, "learning_rate": 2.0662605183066862e-06, "loss": 0.34455147, "memory(GiB)": 34.88, "step": 105580, "train_speed(iter/s)": 0.411515 }, { "acc": 0.93003845, "epoch": 2.8588254406628217, "grad_norm": 6.099696636199951, "learning_rate": 2.0658074833620116e-06, "loss": 0.39649065, "memory(GiB)": 34.88, "step": 105585, "train_speed(iter/s)": 0.411516 }, { "acc": 0.94441891, "epoch": 2.8589608209460375, "grad_norm": 2.977992057800293, "learning_rate": 2.0653544851801297e-06, "loss": 0.27994008, "memory(GiB)": 34.88, "step": 105590, "train_speed(iter/s)": 0.411517 }, { "acc": 0.93361721, "epoch": 2.859096201229253, "grad_norm": 5.833068370819092, "learning_rate": 2.064901523766711e-06, "loss": 0.31693654, "memory(GiB)": 34.88, "step": 105595, "train_speed(iter/s)": 0.411518 }, { "acc": 0.93045397, "epoch": 2.8592315815124687, "grad_norm": 8.460127830505371, "learning_rate": 2.0644485991274318e-06, "loss": 0.43744226, "memory(GiB)": 34.88, "step": 105600, "train_speed(iter/s)": 0.411519 }, { "acc": 0.94393749, "epoch": 2.859366961795684, "grad_norm": 1.9794894456863403, "learning_rate": 2.063995711267967e-06, "loss": 0.34260366, "memory(GiB)": 34.88, "step": 105605, "train_speed(iter/s)": 0.41152 }, { "acc": 0.9486805, "epoch": 2.8595023420788994, "grad_norm": 2.872424840927124, "learning_rate": 2.0635428601939873e-06, "loss": 0.27298307, "memory(GiB)": 34.88, "step": 105610, "train_speed(iter/s)": 0.411521 }, { "acc": 0.95482483, "epoch": 2.859637722362115, "grad_norm": 1.9389047622680664, "learning_rate": 2.0630900459111665e-06, "loss": 0.20508671, "memory(GiB)": 34.88, "step": 105615, "train_speed(iter/s)": 0.411522 }, { "acc": 0.92048779, "epoch": 2.8597731026453306, "grad_norm": 16.19295883178711, "learning_rate": 2.062637268425178e-06, "loss": 0.48801579, "memory(GiB)": 34.88, "step": 105620, "train_speed(iter/s)": 0.411523 }, { "acc": 0.93696575, "epoch": 2.8599084829285464, "grad_norm": 21.711400985717773, "learning_rate": 2.0621845277416945e-06, "loss": 0.39671521, "memory(GiB)": 34.88, "step": 105625, "train_speed(iter/s)": 0.411524 }, { "acc": 0.94115505, "epoch": 2.8600438632117617, "grad_norm": 7.356010913848877, "learning_rate": 2.0617318238663843e-06, "loss": 0.31221714, "memory(GiB)": 34.88, "step": 105630, "train_speed(iter/s)": 0.411525 }, { "acc": 0.93644276, "epoch": 2.8601792434949775, "grad_norm": 9.590503692626953, "learning_rate": 2.06127915680492e-06, "loss": 0.36833146, "memory(GiB)": 34.88, "step": 105635, "train_speed(iter/s)": 0.411526 }, { "acc": 0.95097189, "epoch": 2.860314623778193, "grad_norm": 3.625635862350464, "learning_rate": 2.0608265265629725e-06, "loss": 0.29621739, "memory(GiB)": 34.88, "step": 105640, "train_speed(iter/s)": 0.411527 }, { "acc": 0.93164587, "epoch": 2.8604500040614083, "grad_norm": 4.710740566253662, "learning_rate": 2.0603739331462135e-06, "loss": 0.41567793, "memory(GiB)": 34.88, "step": 105645, "train_speed(iter/s)": 0.411528 }, { "acc": 0.92655773, "epoch": 2.860585384344624, "grad_norm": 4.712735652923584, "learning_rate": 2.0599213765603093e-06, "loss": 0.40333781, "memory(GiB)": 34.88, "step": 105650, "train_speed(iter/s)": 0.411529 }, { "acc": 0.94017286, "epoch": 2.86072076462784, "grad_norm": 8.81159496307373, "learning_rate": 2.05946885681093e-06, "loss": 0.34610915, "memory(GiB)": 34.88, "step": 105655, "train_speed(iter/s)": 0.41153 }, { "acc": 0.93511047, "epoch": 2.860856144911055, "grad_norm": 7.928293228149414, "learning_rate": 2.0590163739037474e-06, "loss": 0.36564212, "memory(GiB)": 34.88, "step": 105660, "train_speed(iter/s)": 0.411531 }, { "acc": 0.94313698, "epoch": 2.8609915251942706, "grad_norm": 6.219457626342773, "learning_rate": 2.058563927844425e-06, "loss": 0.32497437, "memory(GiB)": 34.88, "step": 105665, "train_speed(iter/s)": 0.411533 }, { "acc": 0.92799911, "epoch": 2.8611269054774864, "grad_norm": 14.663637161254883, "learning_rate": 2.0581115186386327e-06, "loss": 0.44344115, "memory(GiB)": 34.88, "step": 105670, "train_speed(iter/s)": 0.411534 }, { "acc": 0.93833008, "epoch": 2.8612622857607017, "grad_norm": 8.247401237487793, "learning_rate": 2.057659146292038e-06, "loss": 0.28719811, "memory(GiB)": 34.88, "step": 105675, "train_speed(iter/s)": 0.411535 }, { "acc": 0.95393353, "epoch": 2.861397666043917, "grad_norm": 10.882753372192383, "learning_rate": 2.0572068108103093e-06, "loss": 0.26000814, "memory(GiB)": 34.88, "step": 105680, "train_speed(iter/s)": 0.411536 }, { "acc": 0.9274107, "epoch": 2.861533046327133, "grad_norm": 17.480852127075195, "learning_rate": 2.0567545121991096e-06, "loss": 0.46502056, "memory(GiB)": 34.88, "step": 105685, "train_speed(iter/s)": 0.411537 }, { "acc": 0.93094501, "epoch": 2.8616684266103487, "grad_norm": 6.909308910369873, "learning_rate": 2.056302250464107e-06, "loss": 0.37246394, "memory(GiB)": 34.88, "step": 105690, "train_speed(iter/s)": 0.411538 }, { "acc": 0.93833275, "epoch": 2.861803806893564, "grad_norm": 5.553924083709717, "learning_rate": 2.0558500256109682e-06, "loss": 0.39875762, "memory(GiB)": 34.88, "step": 105695, "train_speed(iter/s)": 0.411539 }, { "acc": 0.94167538, "epoch": 2.8619391871767794, "grad_norm": 9.705275535583496, "learning_rate": 2.0553978376453543e-06, "loss": 0.37084677, "memory(GiB)": 34.88, "step": 105700, "train_speed(iter/s)": 0.41154 }, { "acc": 0.94834471, "epoch": 2.8620745674599952, "grad_norm": 12.569652557373047, "learning_rate": 2.054945686572933e-06, "loss": 0.34166682, "memory(GiB)": 34.88, "step": 105705, "train_speed(iter/s)": 0.411541 }, { "acc": 0.92670116, "epoch": 2.8622099477432106, "grad_norm": 9.586371421813965, "learning_rate": 2.0544935723993667e-06, "loss": 0.42586665, "memory(GiB)": 34.88, "step": 105710, "train_speed(iter/s)": 0.411542 }, { "acc": 0.94660072, "epoch": 2.8623453280264264, "grad_norm": 9.26326847076416, "learning_rate": 2.0540414951303215e-06, "loss": 0.27766397, "memory(GiB)": 34.88, "step": 105715, "train_speed(iter/s)": 0.411543 }, { "acc": 0.94895258, "epoch": 2.8624807083096417, "grad_norm": 5.5740227699279785, "learning_rate": 2.0535894547714554e-06, "loss": 0.27928152, "memory(GiB)": 34.88, "step": 105720, "train_speed(iter/s)": 0.411544 }, { "acc": 0.93258276, "epoch": 2.8626160885928575, "grad_norm": 5.4481048583984375, "learning_rate": 2.053137451328439e-06, "loss": 0.41057639, "memory(GiB)": 34.88, "step": 105725, "train_speed(iter/s)": 0.411545 }, { "acc": 0.93889942, "epoch": 2.862751468876073, "grad_norm": 5.277621269226074, "learning_rate": 2.052685484806929e-06, "loss": 0.32136395, "memory(GiB)": 34.88, "step": 105730, "train_speed(iter/s)": 0.411546 }, { "acc": 0.94319344, "epoch": 2.8628868491592883, "grad_norm": 9.061235427856445, "learning_rate": 2.052233555212587e-06, "loss": 0.3107161, "memory(GiB)": 34.88, "step": 105735, "train_speed(iter/s)": 0.411547 }, { "acc": 0.92270851, "epoch": 2.863022229442504, "grad_norm": 6.582661151885986, "learning_rate": 2.051781662551075e-06, "loss": 0.48653603, "memory(GiB)": 34.88, "step": 105740, "train_speed(iter/s)": 0.411548 }, { "acc": 0.94186172, "epoch": 2.8631576097257194, "grad_norm": 10.55516529083252, "learning_rate": 2.0513298068280544e-06, "loss": 0.35668485, "memory(GiB)": 34.88, "step": 105745, "train_speed(iter/s)": 0.411549 }, { "acc": 0.93455086, "epoch": 2.8632929900089352, "grad_norm": 5.784204006195068, "learning_rate": 2.0508779880491884e-06, "loss": 0.40830479, "memory(GiB)": 34.88, "step": 105750, "train_speed(iter/s)": 0.41155 }, { "acc": 0.94966354, "epoch": 2.8634283702921506, "grad_norm": 22.06513214111328, "learning_rate": 2.0504262062201298e-06, "loss": 0.33219726, "memory(GiB)": 34.88, "step": 105755, "train_speed(iter/s)": 0.411551 }, { "acc": 0.93563643, "epoch": 2.8635637505753664, "grad_norm": 11.033699989318848, "learning_rate": 2.049974461346546e-06, "loss": 0.41137524, "memory(GiB)": 34.88, "step": 105760, "train_speed(iter/s)": 0.411552 }, { "acc": 0.91887007, "epoch": 2.8636991308585817, "grad_norm": 22.896339416503906, "learning_rate": 2.0495227534340907e-06, "loss": 0.48442106, "memory(GiB)": 34.88, "step": 105765, "train_speed(iter/s)": 0.411553 }, { "acc": 0.93213434, "epoch": 2.863834511141797, "grad_norm": 7.031769275665283, "learning_rate": 2.0490710824884254e-06, "loss": 0.42686729, "memory(GiB)": 34.88, "step": 105770, "train_speed(iter/s)": 0.411554 }, { "acc": 0.9548687, "epoch": 2.863969891425013, "grad_norm": 25.297040939331055, "learning_rate": 2.0486194485152057e-06, "loss": 0.32962875, "memory(GiB)": 34.88, "step": 105775, "train_speed(iter/s)": 0.411555 }, { "acc": 0.9283432, "epoch": 2.8641052717082283, "grad_norm": 6.655376434326172, "learning_rate": 2.04816785152009e-06, "loss": 0.45798445, "memory(GiB)": 34.88, "step": 105780, "train_speed(iter/s)": 0.411556 }, { "acc": 0.92759056, "epoch": 2.864240651991444, "grad_norm": 11.590311050415039, "learning_rate": 2.047716291508738e-06, "loss": 0.41237583, "memory(GiB)": 34.88, "step": 105785, "train_speed(iter/s)": 0.411557 }, { "acc": 0.93021889, "epoch": 2.8643760322746594, "grad_norm": 5.585917949676514, "learning_rate": 2.0472647684867995e-06, "loss": 0.39168386, "memory(GiB)": 34.88, "step": 105790, "train_speed(iter/s)": 0.411558 }, { "acc": 0.93761139, "epoch": 2.8645114125578752, "grad_norm": 2.0543510913848877, "learning_rate": 2.04681328245994e-06, "loss": 0.35887375, "memory(GiB)": 34.88, "step": 105795, "train_speed(iter/s)": 0.411559 }, { "acc": 0.94352112, "epoch": 2.8646467928410906, "grad_norm": 5.453207969665527, "learning_rate": 2.0463618334338083e-06, "loss": 0.30170588, "memory(GiB)": 34.88, "step": 105800, "train_speed(iter/s)": 0.41156 }, { "acc": 0.9563302, "epoch": 2.864782173124306, "grad_norm": 3.14426851272583, "learning_rate": 2.045910421414064e-06, "loss": 0.24935822, "memory(GiB)": 34.88, "step": 105805, "train_speed(iter/s)": 0.411561 }, { "acc": 0.92753735, "epoch": 2.8649175534075217, "grad_norm": 6.558318138122559, "learning_rate": 2.0454590464063577e-06, "loss": 0.36444602, "memory(GiB)": 34.88, "step": 105810, "train_speed(iter/s)": 0.411562 }, { "acc": 0.93870296, "epoch": 2.8650529336907375, "grad_norm": 9.565994262695312, "learning_rate": 2.045007708416347e-06, "loss": 0.31018059, "memory(GiB)": 34.88, "step": 105815, "train_speed(iter/s)": 0.411563 }, { "acc": 0.91939297, "epoch": 2.865188313973953, "grad_norm": 9.69332504272461, "learning_rate": 2.044556407449686e-06, "loss": 0.44713755, "memory(GiB)": 34.88, "step": 105820, "train_speed(iter/s)": 0.411564 }, { "acc": 0.93665609, "epoch": 2.8653236942571683, "grad_norm": 18.34682273864746, "learning_rate": 2.044105143512023e-06, "loss": 0.39974923, "memory(GiB)": 34.88, "step": 105825, "train_speed(iter/s)": 0.411565 }, { "acc": 0.94272594, "epoch": 2.865459074540384, "grad_norm": 4.0150957107543945, "learning_rate": 2.0436539166090185e-06, "loss": 0.29919329, "memory(GiB)": 34.88, "step": 105830, "train_speed(iter/s)": 0.411566 }, { "acc": 0.94683962, "epoch": 2.8655944548235994, "grad_norm": 6.39129638671875, "learning_rate": 2.0432027267463196e-06, "loss": 0.3364439, "memory(GiB)": 34.88, "step": 105835, "train_speed(iter/s)": 0.411568 }, { "acc": 0.94411526, "epoch": 2.865729835106815, "grad_norm": 6.035290718078613, "learning_rate": 2.042751573929583e-06, "loss": 0.3045397, "memory(GiB)": 34.88, "step": 105840, "train_speed(iter/s)": 0.411569 }, { "acc": 0.92308207, "epoch": 2.8658652153900306, "grad_norm": 11.514100074768066, "learning_rate": 2.0423004581644524e-06, "loss": 0.4180069, "memory(GiB)": 34.88, "step": 105845, "train_speed(iter/s)": 0.411569 }, { "acc": 0.94187927, "epoch": 2.8660005956732464, "grad_norm": 8.9590482711792, "learning_rate": 2.041849379456588e-06, "loss": 0.33638909, "memory(GiB)": 34.88, "step": 105850, "train_speed(iter/s)": 0.41157 }, { "acc": 0.94105072, "epoch": 2.8661359759564617, "grad_norm": 5.267635822296143, "learning_rate": 2.0413983378116366e-06, "loss": 0.32086616, "memory(GiB)": 34.88, "step": 105855, "train_speed(iter/s)": 0.411572 }, { "acc": 0.93473997, "epoch": 2.866271356239677, "grad_norm": 5.867871284484863, "learning_rate": 2.0409473332352442e-06, "loss": 0.37884202, "memory(GiB)": 34.88, "step": 105860, "train_speed(iter/s)": 0.411573 }, { "acc": 0.93301315, "epoch": 2.866406736522893, "grad_norm": 23.025564193725586, "learning_rate": 2.0404963657330685e-06, "loss": 0.36829839, "memory(GiB)": 34.88, "step": 105865, "train_speed(iter/s)": 0.411574 }, { "acc": 0.91833744, "epoch": 2.8665421168061083, "grad_norm": 9.175433158874512, "learning_rate": 2.0400454353107524e-06, "loss": 0.42272215, "memory(GiB)": 34.88, "step": 105870, "train_speed(iter/s)": 0.411575 }, { "acc": 0.93631649, "epoch": 2.866677497089324, "grad_norm": 15.40015697479248, "learning_rate": 2.0395945419739493e-06, "loss": 0.37203598, "memory(GiB)": 34.88, "step": 105875, "train_speed(iter/s)": 0.411576 }, { "acc": 0.93624229, "epoch": 2.8668128773725394, "grad_norm": 7.226992607116699, "learning_rate": 2.039143685728301e-06, "loss": 0.39340706, "memory(GiB)": 34.88, "step": 105880, "train_speed(iter/s)": 0.411577 }, { "acc": 0.9301837, "epoch": 2.8669482576557552, "grad_norm": 5.591789245605469, "learning_rate": 2.038692866579464e-06, "loss": 0.39682169, "memory(GiB)": 34.88, "step": 105885, "train_speed(iter/s)": 0.411578 }, { "acc": 0.92034483, "epoch": 2.8670836379389706, "grad_norm": 8.211716651916504, "learning_rate": 2.0382420845330793e-06, "loss": 0.44900808, "memory(GiB)": 34.88, "step": 105890, "train_speed(iter/s)": 0.411579 }, { "acc": 0.9377964, "epoch": 2.867219018222186, "grad_norm": 6.285155773162842, "learning_rate": 2.0377913395947952e-06, "loss": 0.32493467, "memory(GiB)": 34.88, "step": 105895, "train_speed(iter/s)": 0.41158 }, { "acc": 0.93565722, "epoch": 2.8673543985054017, "grad_norm": 4.905422210693359, "learning_rate": 2.037340631770261e-06, "loss": 0.35500898, "memory(GiB)": 34.88, "step": 105900, "train_speed(iter/s)": 0.41158 }, { "acc": 0.92853737, "epoch": 2.867489778788617, "grad_norm": 9.86391544342041, "learning_rate": 2.0368899610651193e-06, "loss": 0.45993929, "memory(GiB)": 34.88, "step": 105905, "train_speed(iter/s)": 0.411581 }, { "acc": 0.93414536, "epoch": 2.867625159071833, "grad_norm": 6.54645299911499, "learning_rate": 2.0364393274850176e-06, "loss": 0.42764521, "memory(GiB)": 34.88, "step": 105910, "train_speed(iter/s)": 0.411582 }, { "acc": 0.92173214, "epoch": 2.8677605393550483, "grad_norm": 6.063512802124023, "learning_rate": 2.035988731035597e-06, "loss": 0.45232859, "memory(GiB)": 34.88, "step": 105915, "train_speed(iter/s)": 0.411583 }, { "acc": 0.91983452, "epoch": 2.867895919638264, "grad_norm": 12.813830375671387, "learning_rate": 2.0355381717225084e-06, "loss": 0.45867281, "memory(GiB)": 34.88, "step": 105920, "train_speed(iter/s)": 0.411584 }, { "acc": 0.94078188, "epoch": 2.8680312999214794, "grad_norm": 14.77917194366455, "learning_rate": 2.035087649551392e-06, "loss": 0.29711556, "memory(GiB)": 34.88, "step": 105925, "train_speed(iter/s)": 0.411585 }, { "acc": 0.95191669, "epoch": 2.868166680204695, "grad_norm": 6.5009660720825195, "learning_rate": 2.0346371645278935e-06, "loss": 0.25933845, "memory(GiB)": 34.88, "step": 105930, "train_speed(iter/s)": 0.411586 }, { "acc": 0.92066326, "epoch": 2.8683020604879106, "grad_norm": 4.814519882202148, "learning_rate": 2.0341867166576534e-06, "loss": 0.43289499, "memory(GiB)": 34.88, "step": 105935, "train_speed(iter/s)": 0.411587 }, { "acc": 0.94538412, "epoch": 2.868437440771126, "grad_norm": 6.887490749359131, "learning_rate": 2.0337363059463154e-06, "loss": 0.31368434, "memory(GiB)": 34.88, "step": 105940, "train_speed(iter/s)": 0.411588 }, { "acc": 0.93546972, "epoch": 2.8685728210543417, "grad_norm": 2.8626787662506104, "learning_rate": 2.033285932399525e-06, "loss": 0.3564393, "memory(GiB)": 34.88, "step": 105945, "train_speed(iter/s)": 0.411589 }, { "acc": 0.9404644, "epoch": 2.868708201337557, "grad_norm": 7.57963228225708, "learning_rate": 2.0328355960229167e-06, "loss": 0.30694339, "memory(GiB)": 34.88, "step": 105950, "train_speed(iter/s)": 0.411591 }, { "acc": 0.94240723, "epoch": 2.868843581620773, "grad_norm": 6.327573776245117, "learning_rate": 2.032385296822141e-06, "loss": 0.23351879, "memory(GiB)": 34.88, "step": 105955, "train_speed(iter/s)": 0.411591 }, { "acc": 0.91226215, "epoch": 2.8689789619039883, "grad_norm": 11.389029502868652, "learning_rate": 2.031935034802832e-06, "loss": 0.52457161, "memory(GiB)": 34.88, "step": 105960, "train_speed(iter/s)": 0.411592 }, { "acc": 0.92801437, "epoch": 2.8691143421872036, "grad_norm": 7.240568161010742, "learning_rate": 2.0314848099706343e-06, "loss": 0.35625372, "memory(GiB)": 34.88, "step": 105965, "train_speed(iter/s)": 0.411593 }, { "acc": 0.93302402, "epoch": 2.8692497224704194, "grad_norm": 6.891421318054199, "learning_rate": 2.0310346223311832e-06, "loss": 0.40494804, "memory(GiB)": 34.88, "step": 105970, "train_speed(iter/s)": 0.411595 }, { "acc": 0.91952705, "epoch": 2.8693851027536352, "grad_norm": 6.158746242523193, "learning_rate": 2.0305844718901234e-06, "loss": 0.41829753, "memory(GiB)": 34.88, "step": 105975, "train_speed(iter/s)": 0.411596 }, { "acc": 0.93682947, "epoch": 2.8695204830368506, "grad_norm": 11.742474555969238, "learning_rate": 2.030134358653092e-06, "loss": 0.32033484, "memory(GiB)": 34.88, "step": 105980, "train_speed(iter/s)": 0.411597 }, { "acc": 0.9493166, "epoch": 2.869655863320066, "grad_norm": 5.770918369293213, "learning_rate": 2.0296842826257233e-06, "loss": 0.34312105, "memory(GiB)": 34.88, "step": 105985, "train_speed(iter/s)": 0.411598 }, { "acc": 0.93831749, "epoch": 2.8697912436032817, "grad_norm": 5.151212692260742, "learning_rate": 2.0292342438136626e-06, "loss": 0.38403709, "memory(GiB)": 34.88, "step": 105990, "train_speed(iter/s)": 0.411599 }, { "acc": 0.94267139, "epoch": 2.869926623886497, "grad_norm": 6.640299320220947, "learning_rate": 2.028784242222543e-06, "loss": 0.36216457, "memory(GiB)": 34.88, "step": 105995, "train_speed(iter/s)": 0.4116 }, { "acc": 0.93456268, "epoch": 2.8700620041697125, "grad_norm": 4.35357666015625, "learning_rate": 2.0283342778580036e-06, "loss": 0.34026079, "memory(GiB)": 34.88, "step": 106000, "train_speed(iter/s)": 0.411601 }, { "acc": 0.94532871, "epoch": 2.8701973844529283, "grad_norm": 6.763885974884033, "learning_rate": 2.0278843507256767e-06, "loss": 0.32888582, "memory(GiB)": 34.88, "step": 106005, "train_speed(iter/s)": 0.411602 }, { "acc": 0.94534531, "epoch": 2.870332764736144, "grad_norm": 12.53821849822998, "learning_rate": 2.0274344608312056e-06, "loss": 0.28692455, "memory(GiB)": 34.88, "step": 106010, "train_speed(iter/s)": 0.411603 }, { "acc": 0.9348835, "epoch": 2.8704681450193594, "grad_norm": 9.507498741149902, "learning_rate": 2.0269846081802205e-06, "loss": 0.31966019, "memory(GiB)": 34.88, "step": 106015, "train_speed(iter/s)": 0.411604 }, { "acc": 0.92514372, "epoch": 2.870603525302575, "grad_norm": 21.967426300048828, "learning_rate": 2.026534792778359e-06, "loss": 0.4054328, "memory(GiB)": 34.88, "step": 106020, "train_speed(iter/s)": 0.411605 }, { "acc": 0.9305934, "epoch": 2.8707389055857906, "grad_norm": 42.76607131958008, "learning_rate": 2.026085014631257e-06, "loss": 0.40160389, "memory(GiB)": 34.88, "step": 106025, "train_speed(iter/s)": 0.411606 }, { "acc": 0.93730545, "epoch": 2.870874285869006, "grad_norm": 8.16580581665039, "learning_rate": 2.0256352737445463e-06, "loss": 0.34406466, "memory(GiB)": 34.88, "step": 106030, "train_speed(iter/s)": 0.411607 }, { "acc": 0.95077982, "epoch": 2.8710096661522218, "grad_norm": 4.115868091583252, "learning_rate": 2.0251855701238625e-06, "loss": 0.31917975, "memory(GiB)": 34.88, "step": 106035, "train_speed(iter/s)": 0.411608 }, { "acc": 0.9484787, "epoch": 2.871145046435437, "grad_norm": 7.016753196716309, "learning_rate": 2.024735903774836e-06, "loss": 0.30234847, "memory(GiB)": 34.88, "step": 106040, "train_speed(iter/s)": 0.411609 }, { "acc": 0.95012894, "epoch": 2.871280426718653, "grad_norm": 10.073508262634277, "learning_rate": 2.024286274703105e-06, "loss": 0.26425798, "memory(GiB)": 34.88, "step": 106045, "train_speed(iter/s)": 0.41161 }, { "acc": 0.91895227, "epoch": 2.8714158070018683, "grad_norm": 6.555093765258789, "learning_rate": 2.023836682914298e-06, "loss": 0.45065432, "memory(GiB)": 34.88, "step": 106050, "train_speed(iter/s)": 0.411611 }, { "acc": 0.92286549, "epoch": 2.8715511872850836, "grad_norm": 52.13484573364258, "learning_rate": 2.0233871284140478e-06, "loss": 0.48090916, "memory(GiB)": 34.88, "step": 106055, "train_speed(iter/s)": 0.411612 }, { "acc": 0.95216427, "epoch": 2.8716865675682994, "grad_norm": 6.44232177734375, "learning_rate": 2.0229376112079884e-06, "loss": 0.304126, "memory(GiB)": 34.88, "step": 106060, "train_speed(iter/s)": 0.411613 }, { "acc": 0.9367609, "epoch": 2.871821947851515, "grad_norm": 7.243693828582764, "learning_rate": 2.022488131301746e-06, "loss": 0.40556269, "memory(GiB)": 34.88, "step": 106065, "train_speed(iter/s)": 0.411614 }, { "acc": 0.94434366, "epoch": 2.8719573281347306, "grad_norm": 9.576261520385742, "learning_rate": 2.022038688700957e-06, "loss": 0.3186635, "memory(GiB)": 34.88, "step": 106070, "train_speed(iter/s)": 0.411615 }, { "acc": 0.93606319, "epoch": 2.872092708417946, "grad_norm": 5.16171932220459, "learning_rate": 2.0215892834112447e-06, "loss": 0.33971865, "memory(GiB)": 34.88, "step": 106075, "train_speed(iter/s)": 0.411616 }, { "acc": 0.95245628, "epoch": 2.8722280887011618, "grad_norm": 3.2737112045288086, "learning_rate": 2.021139915438246e-06, "loss": 0.27544274, "memory(GiB)": 34.88, "step": 106080, "train_speed(iter/s)": 0.411618 }, { "acc": 0.93688145, "epoch": 2.872363468984377, "grad_norm": 39.538028717041016, "learning_rate": 2.0206905847875853e-06, "loss": 0.35523953, "memory(GiB)": 34.88, "step": 106085, "train_speed(iter/s)": 0.411619 }, { "acc": 0.92776833, "epoch": 2.8724988492675925, "grad_norm": 4.441641330718994, "learning_rate": 2.0202412914648925e-06, "loss": 0.41746254, "memory(GiB)": 34.88, "step": 106090, "train_speed(iter/s)": 0.41162 }, { "acc": 0.93131475, "epoch": 2.8726342295508083, "grad_norm": 8.182049751281738, "learning_rate": 2.0197920354757963e-06, "loss": 0.33888693, "memory(GiB)": 34.88, "step": 106095, "train_speed(iter/s)": 0.411621 }, { "acc": 0.93400698, "epoch": 2.8727696098340236, "grad_norm": 5.568126678466797, "learning_rate": 2.0193428168259263e-06, "loss": 0.41244431, "memory(GiB)": 34.88, "step": 106100, "train_speed(iter/s)": 0.411622 }, { "acc": 0.93237686, "epoch": 2.8729049901172394, "grad_norm": 10.211270332336426, "learning_rate": 2.0188936355209085e-06, "loss": 0.43493519, "memory(GiB)": 34.88, "step": 106105, "train_speed(iter/s)": 0.411623 }, { "acc": 0.94420738, "epoch": 2.873040370400455, "grad_norm": 4.085940361022949, "learning_rate": 2.018444491566365e-06, "loss": 0.29286885, "memory(GiB)": 34.88, "step": 106110, "train_speed(iter/s)": 0.411624 }, { "acc": 0.92288952, "epoch": 2.8731757506836706, "grad_norm": 19.409770965576172, "learning_rate": 2.0179953849679306e-06, "loss": 0.4660737, "memory(GiB)": 34.88, "step": 106115, "train_speed(iter/s)": 0.411625 }, { "acc": 0.94577217, "epoch": 2.873311130966886, "grad_norm": 2.602168560028076, "learning_rate": 2.0175463157312246e-06, "loss": 0.32217257, "memory(GiB)": 34.88, "step": 106120, "train_speed(iter/s)": 0.411626 }, { "acc": 0.95135918, "epoch": 2.8734465112501013, "grad_norm": 4.321935176849365, "learning_rate": 2.017097283861875e-06, "loss": 0.25742624, "memory(GiB)": 34.88, "step": 106125, "train_speed(iter/s)": 0.411627 }, { "acc": 0.9215519, "epoch": 2.873581891533317, "grad_norm": 10.509026527404785, "learning_rate": 2.0166482893655075e-06, "loss": 0.44752135, "memory(GiB)": 34.88, "step": 106130, "train_speed(iter/s)": 0.411628 }, { "acc": 0.95196533, "epoch": 2.873717271816533, "grad_norm": 12.778974533081055, "learning_rate": 2.0161993322477474e-06, "loss": 0.26783409, "memory(GiB)": 34.88, "step": 106135, "train_speed(iter/s)": 0.411629 }, { "acc": 0.95059071, "epoch": 2.8738526520997483, "grad_norm": 5.164209842681885, "learning_rate": 2.0157504125142154e-06, "loss": 0.30542068, "memory(GiB)": 34.88, "step": 106140, "train_speed(iter/s)": 0.41163 }, { "acc": 0.92950354, "epoch": 2.8739880323829636, "grad_norm": 11.13860034942627, "learning_rate": 2.0153015301705368e-06, "loss": 0.48151302, "memory(GiB)": 34.88, "step": 106145, "train_speed(iter/s)": 0.411631 }, { "acc": 0.93291664, "epoch": 2.8741234126661794, "grad_norm": 17.506624221801758, "learning_rate": 2.014852685222337e-06, "loss": 0.45084457, "memory(GiB)": 34.88, "step": 106150, "train_speed(iter/s)": 0.411632 }, { "acc": 0.92166405, "epoch": 2.874258792949395, "grad_norm": 4.463417053222656, "learning_rate": 2.014403877675235e-06, "loss": 0.43220835, "memory(GiB)": 34.88, "step": 106155, "train_speed(iter/s)": 0.411633 }, { "acc": 0.9466774, "epoch": 2.87439417323261, "grad_norm": 4.87291145324707, "learning_rate": 2.0139551075348547e-06, "loss": 0.29945502, "memory(GiB)": 34.88, "step": 106160, "train_speed(iter/s)": 0.411634 }, { "acc": 0.94489841, "epoch": 2.874529553515826, "grad_norm": 11.194910049438477, "learning_rate": 2.0135063748068174e-06, "loss": 0.33245015, "memory(GiB)": 34.88, "step": 106165, "train_speed(iter/s)": 0.411635 }, { "acc": 0.93336811, "epoch": 2.8746649337990418, "grad_norm": 13.714950561523438, "learning_rate": 2.0130576794967464e-06, "loss": 0.42155609, "memory(GiB)": 34.88, "step": 106170, "train_speed(iter/s)": 0.411636 }, { "acc": 0.93586607, "epoch": 2.874800314082257, "grad_norm": 8.808022499084473, "learning_rate": 2.0126090216102593e-06, "loss": 0.35349522, "memory(GiB)": 34.88, "step": 106175, "train_speed(iter/s)": 0.411637 }, { "acc": 0.9444519, "epoch": 2.8749356943654725, "grad_norm": 6.931614398956299, "learning_rate": 2.0121604011529778e-06, "loss": 0.3134037, "memory(GiB)": 34.88, "step": 106180, "train_speed(iter/s)": 0.411638 }, { "acc": 0.9124464, "epoch": 2.8750710746486883, "grad_norm": 11.519912719726562, "learning_rate": 2.011711818130522e-06, "loss": 0.51947021, "memory(GiB)": 34.88, "step": 106185, "train_speed(iter/s)": 0.411639 }, { "acc": 0.92127695, "epoch": 2.8752064549319036, "grad_norm": 14.74206829071045, "learning_rate": 2.0112632725485126e-06, "loss": 0.49722428, "memory(GiB)": 34.88, "step": 106190, "train_speed(iter/s)": 0.41164 }, { "acc": 0.94075689, "epoch": 2.8753418352151194, "grad_norm": 8.812117576599121, "learning_rate": 2.0108147644125657e-06, "loss": 0.39312525, "memory(GiB)": 34.88, "step": 106195, "train_speed(iter/s)": 0.411641 }, { "acc": 0.94746847, "epoch": 2.875477215498335, "grad_norm": 14.651453018188477, "learning_rate": 2.0103662937283004e-06, "loss": 0.23407917, "memory(GiB)": 34.88, "step": 106200, "train_speed(iter/s)": 0.411642 }, { "acc": 0.93655014, "epoch": 2.8756125957815506, "grad_norm": 5.693947792053223, "learning_rate": 2.0099178605013376e-06, "loss": 0.38176992, "memory(GiB)": 34.88, "step": 106205, "train_speed(iter/s)": 0.411643 }, { "acc": 0.94631662, "epoch": 2.875747976064766, "grad_norm": 5.947205066680908, "learning_rate": 2.0094694647372907e-06, "loss": 0.22737012, "memory(GiB)": 34.88, "step": 106210, "train_speed(iter/s)": 0.411644 }, { "acc": 0.92887039, "epoch": 2.8758833563479813, "grad_norm": 13.829521179199219, "learning_rate": 2.0090211064417788e-06, "loss": 0.42175622, "memory(GiB)": 34.88, "step": 106215, "train_speed(iter/s)": 0.411645 }, { "acc": 0.94950848, "epoch": 2.876018736631197, "grad_norm": 8.031067848205566, "learning_rate": 2.008572785620418e-06, "loss": 0.30517683, "memory(GiB)": 34.88, "step": 106220, "train_speed(iter/s)": 0.411646 }, { "acc": 0.94847336, "epoch": 2.8761541169144125, "grad_norm": 9.776860237121582, "learning_rate": 2.0081245022788268e-06, "loss": 0.26110811, "memory(GiB)": 34.88, "step": 106225, "train_speed(iter/s)": 0.411647 }, { "acc": 0.9465867, "epoch": 2.8762894971976283, "grad_norm": 15.714117050170898, "learning_rate": 2.0076762564226166e-06, "loss": 0.31914024, "memory(GiB)": 34.88, "step": 106230, "train_speed(iter/s)": 0.411648 }, { "acc": 0.93405046, "epoch": 2.8764248774808436, "grad_norm": 19.242605209350586, "learning_rate": 2.0072280480574052e-06, "loss": 0.39054136, "memory(GiB)": 34.88, "step": 106235, "train_speed(iter/s)": 0.411649 }, { "acc": 0.91760044, "epoch": 2.8765602577640594, "grad_norm": 8.355318069458008, "learning_rate": 2.0067798771888076e-06, "loss": 0.54130626, "memory(GiB)": 34.88, "step": 106240, "train_speed(iter/s)": 0.41165 }, { "acc": 0.94676332, "epoch": 2.876695638047275, "grad_norm": 4.92765998840332, "learning_rate": 2.006331743822436e-06, "loss": 0.30608592, "memory(GiB)": 34.88, "step": 106245, "train_speed(iter/s)": 0.411651 }, { "acc": 0.94608631, "epoch": 2.87683101833049, "grad_norm": 7.810427665710449, "learning_rate": 2.005883647963906e-06, "loss": 0.32002835, "memory(GiB)": 34.88, "step": 106250, "train_speed(iter/s)": 0.411652 }, { "acc": 0.94800472, "epoch": 2.876966398613706, "grad_norm": 6.212001323699951, "learning_rate": 2.005435589618829e-06, "loss": 0.25132146, "memory(GiB)": 34.88, "step": 106255, "train_speed(iter/s)": 0.411653 }, { "acc": 0.943437, "epoch": 2.8771017788969213, "grad_norm": 5.763381004333496, "learning_rate": 2.0049875687928216e-06, "loss": 0.28719325, "memory(GiB)": 34.88, "step": 106260, "train_speed(iter/s)": 0.411654 }, { "acc": 0.9558342, "epoch": 2.877237159180137, "grad_norm": 6.056191444396973, "learning_rate": 2.0045395854914912e-06, "loss": 0.25104511, "memory(GiB)": 34.88, "step": 106265, "train_speed(iter/s)": 0.411655 }, { "acc": 0.93816156, "epoch": 2.8773725394633525, "grad_norm": 5.996485233306885, "learning_rate": 2.004091639720453e-06, "loss": 0.35471032, "memory(GiB)": 34.88, "step": 106270, "train_speed(iter/s)": 0.411656 }, { "acc": 0.93500881, "epoch": 2.8775079197465683, "grad_norm": 6.608569622039795, "learning_rate": 2.0036437314853186e-06, "loss": 0.33581448, "memory(GiB)": 34.88, "step": 106275, "train_speed(iter/s)": 0.411657 }, { "acc": 0.94200487, "epoch": 2.8776433000297836, "grad_norm": 6.7379889488220215, "learning_rate": 2.003195860791696e-06, "loss": 0.38335893, "memory(GiB)": 34.88, "step": 106280, "train_speed(iter/s)": 0.411658 }, { "acc": 0.91912146, "epoch": 2.877778680312999, "grad_norm": 12.282843589782715, "learning_rate": 2.0027480276451974e-06, "loss": 0.45876713, "memory(GiB)": 34.88, "step": 106285, "train_speed(iter/s)": 0.411659 }, { "acc": 0.95410748, "epoch": 2.877914060596215, "grad_norm": 2.5532379150390625, "learning_rate": 2.0023002320514326e-06, "loss": 0.2302031, "memory(GiB)": 34.88, "step": 106290, "train_speed(iter/s)": 0.41166 }, { "acc": 0.92185383, "epoch": 2.87804944087943, "grad_norm": 6.9398722648620605, "learning_rate": 2.0018524740160136e-06, "loss": 0.44151893, "memory(GiB)": 34.88, "step": 106295, "train_speed(iter/s)": 0.411661 }, { "acc": 0.92159328, "epoch": 2.878184821162646, "grad_norm": 7.391613483428955, "learning_rate": 2.001404753544545e-06, "loss": 0.45848961, "memory(GiB)": 34.88, "step": 106300, "train_speed(iter/s)": 0.411662 }, { "acc": 0.94320812, "epoch": 2.8783202014458613, "grad_norm": 6.05475378036499, "learning_rate": 2.000957070642638e-06, "loss": 0.29393563, "memory(GiB)": 34.88, "step": 106305, "train_speed(iter/s)": 0.411663 }, { "acc": 0.92424278, "epoch": 2.878455581729077, "grad_norm": 10.817265510559082, "learning_rate": 2.0005094253158996e-06, "loss": 0.46538167, "memory(GiB)": 34.88, "step": 106310, "train_speed(iter/s)": 0.411664 }, { "acc": 0.9357832, "epoch": 2.8785909620122925, "grad_norm": 4.423692226409912, "learning_rate": 2.0000618175699405e-06, "loss": 0.42986264, "memory(GiB)": 34.88, "step": 106315, "train_speed(iter/s)": 0.411665 }, { "acc": 0.94485836, "epoch": 2.878726342295508, "grad_norm": 6.267566204071045, "learning_rate": 1.9996142474103637e-06, "loss": 0.29552259, "memory(GiB)": 34.88, "step": 106320, "train_speed(iter/s)": 0.411666 }, { "acc": 0.94553337, "epoch": 2.8788617225787236, "grad_norm": 3.5255799293518066, "learning_rate": 1.9991667148427775e-06, "loss": 0.32867246, "memory(GiB)": 34.88, "step": 106325, "train_speed(iter/s)": 0.411667 }, { "acc": 0.93199606, "epoch": 2.8789971028619394, "grad_norm": 9.050116539001465, "learning_rate": 1.9987192198727903e-06, "loss": 0.3603261, "memory(GiB)": 34.88, "step": 106330, "train_speed(iter/s)": 0.411668 }, { "acc": 0.930653, "epoch": 2.879132483145155, "grad_norm": 9.5891695022583, "learning_rate": 1.9982717625060038e-06, "loss": 0.34973617, "memory(GiB)": 34.88, "step": 106335, "train_speed(iter/s)": 0.411669 }, { "acc": 0.92629318, "epoch": 2.87926786342837, "grad_norm": 9.552652359008789, "learning_rate": 1.9978243427480255e-06, "loss": 0.44441137, "memory(GiB)": 34.88, "step": 106340, "train_speed(iter/s)": 0.41167 }, { "acc": 0.93564186, "epoch": 2.879403243711586, "grad_norm": 11.130459785461426, "learning_rate": 1.99737696060446e-06, "loss": 0.38144324, "memory(GiB)": 34.88, "step": 106345, "train_speed(iter/s)": 0.411671 }, { "acc": 0.92981396, "epoch": 2.8795386239948013, "grad_norm": 3.722498655319214, "learning_rate": 1.996929616080913e-06, "loss": 0.34485126, "memory(GiB)": 34.88, "step": 106350, "train_speed(iter/s)": 0.411672 }, { "acc": 0.93365746, "epoch": 2.8796740042780167, "grad_norm": 8.229211807250977, "learning_rate": 1.996482309182986e-06, "loss": 0.32049451, "memory(GiB)": 34.88, "step": 106355, "train_speed(iter/s)": 0.411673 }, { "acc": 0.92295589, "epoch": 2.8798093845612325, "grad_norm": 5.297243595123291, "learning_rate": 1.996035039916283e-06, "loss": 0.39893577, "memory(GiB)": 34.88, "step": 106360, "train_speed(iter/s)": 0.411674 }, { "acc": 0.93125439, "epoch": 2.8799447648444483, "grad_norm": 5.5210394859313965, "learning_rate": 1.9955878082864096e-06, "loss": 0.37601686, "memory(GiB)": 34.88, "step": 106365, "train_speed(iter/s)": 0.411675 }, { "acc": 0.93921108, "epoch": 2.8800801451276636, "grad_norm": 5.533816814422607, "learning_rate": 1.995140614298964e-06, "loss": 0.25282922, "memory(GiB)": 34.88, "step": 106370, "train_speed(iter/s)": 0.411676 }, { "acc": 0.93686476, "epoch": 2.880215525410879, "grad_norm": 9.67913818359375, "learning_rate": 1.9946934579595503e-06, "loss": 0.29546368, "memory(GiB)": 34.88, "step": 106375, "train_speed(iter/s)": 0.411677 }, { "acc": 0.92726307, "epoch": 2.880350905694095, "grad_norm": 7.192162036895752, "learning_rate": 1.9942463392737703e-06, "loss": 0.44004302, "memory(GiB)": 34.88, "step": 106380, "train_speed(iter/s)": 0.411678 }, { "acc": 0.93062744, "epoch": 2.88048628597731, "grad_norm": 7.410422325134277, "learning_rate": 1.9937992582472256e-06, "loss": 0.43997655, "memory(GiB)": 34.88, "step": 106385, "train_speed(iter/s)": 0.411678 }, { "acc": 0.95127296, "epoch": 2.880621666260526, "grad_norm": 6.443727493286133, "learning_rate": 1.9933522148855127e-06, "loss": 0.32426617, "memory(GiB)": 34.88, "step": 106390, "train_speed(iter/s)": 0.411679 }, { "acc": 0.95973511, "epoch": 2.8807570465437413, "grad_norm": 7.4296875, "learning_rate": 1.9929052091942384e-06, "loss": 0.27769094, "memory(GiB)": 34.88, "step": 106395, "train_speed(iter/s)": 0.41168 }, { "acc": 0.9340703, "epoch": 2.880892426826957, "grad_norm": 6.5091094970703125, "learning_rate": 1.992458241178999e-06, "loss": 0.29221315, "memory(GiB)": 34.88, "step": 106400, "train_speed(iter/s)": 0.411681 }, { "acc": 0.93521576, "epoch": 2.8810278071101725, "grad_norm": 7.7785234451293945, "learning_rate": 1.9920113108453917e-06, "loss": 0.33201957, "memory(GiB)": 34.88, "step": 106405, "train_speed(iter/s)": 0.411682 }, { "acc": 0.95308971, "epoch": 2.881163187393388, "grad_norm": 19.73135757446289, "learning_rate": 1.9915644181990168e-06, "loss": 0.29312024, "memory(GiB)": 34.88, "step": 106410, "train_speed(iter/s)": 0.411683 }, { "acc": 0.94653511, "epoch": 2.8812985676766036, "grad_norm": 6.284462928771973, "learning_rate": 1.9911175632454724e-06, "loss": 0.32325258, "memory(GiB)": 34.88, "step": 106415, "train_speed(iter/s)": 0.411684 }, { "acc": 0.93168774, "epoch": 2.881433947959819, "grad_norm": 10.764362335205078, "learning_rate": 1.990670745990358e-06, "loss": 0.35079036, "memory(GiB)": 34.88, "step": 106420, "train_speed(iter/s)": 0.411686 }, { "acc": 0.94082689, "epoch": 2.881569328243035, "grad_norm": 8.258477210998535, "learning_rate": 1.990223966439266e-06, "loss": 0.32556386, "memory(GiB)": 34.88, "step": 106425, "train_speed(iter/s)": 0.411686 }, { "acc": 0.93626842, "epoch": 2.88170470852625, "grad_norm": 6.338136196136475, "learning_rate": 1.9897772245978002e-06, "loss": 0.42394738, "memory(GiB)": 34.88, "step": 106430, "train_speed(iter/s)": 0.411687 }, { "acc": 0.92214136, "epoch": 2.881840088809466, "grad_norm": 8.320898056030273, "learning_rate": 1.989330520471551e-06, "loss": 0.42049942, "memory(GiB)": 34.88, "step": 106435, "train_speed(iter/s)": 0.411688 }, { "acc": 0.93186922, "epoch": 2.8819754690926813, "grad_norm": 6.336507797241211, "learning_rate": 1.988883854066119e-06, "loss": 0.38814712, "memory(GiB)": 34.88, "step": 106440, "train_speed(iter/s)": 0.411689 }, { "acc": 0.92320747, "epoch": 2.8821108493758967, "grad_norm": 5.384406089782715, "learning_rate": 1.988437225387095e-06, "loss": 0.47883644, "memory(GiB)": 34.88, "step": 106445, "train_speed(iter/s)": 0.41169 }, { "acc": 0.94892883, "epoch": 2.8822462296591125, "grad_norm": 15.723448753356934, "learning_rate": 1.9879906344400755e-06, "loss": 0.26038163, "memory(GiB)": 34.88, "step": 106450, "train_speed(iter/s)": 0.411691 }, { "acc": 0.95021658, "epoch": 2.882381609942328, "grad_norm": 4.881435871124268, "learning_rate": 1.987544081230658e-06, "loss": 0.25113862, "memory(GiB)": 34.88, "step": 106455, "train_speed(iter/s)": 0.411692 }, { "acc": 0.94659557, "epoch": 2.8825169902255436, "grad_norm": 4.975140571594238, "learning_rate": 1.98709756576443e-06, "loss": 0.35084889, "memory(GiB)": 34.88, "step": 106460, "train_speed(iter/s)": 0.411693 }, { "acc": 0.95143757, "epoch": 2.882652370508759, "grad_norm": 4.126424312591553, "learning_rate": 1.9866510880469913e-06, "loss": 0.25713377, "memory(GiB)": 34.88, "step": 106465, "train_speed(iter/s)": 0.411694 }, { "acc": 0.92831116, "epoch": 2.882787750791975, "grad_norm": 9.824068069458008, "learning_rate": 1.9862046480839313e-06, "loss": 0.39633884, "memory(GiB)": 34.88, "step": 106470, "train_speed(iter/s)": 0.411695 }, { "acc": 0.95354023, "epoch": 2.88292313107519, "grad_norm": 2.3188223838806152, "learning_rate": 1.985758245880846e-06, "loss": 0.21795449, "memory(GiB)": 34.88, "step": 106475, "train_speed(iter/s)": 0.411696 }, { "acc": 0.95801325, "epoch": 2.8830585113584055, "grad_norm": 7.015071392059326, "learning_rate": 1.985311881443323e-06, "loss": 0.23289115, "memory(GiB)": 34.88, "step": 106480, "train_speed(iter/s)": 0.411697 }, { "acc": 0.93426094, "epoch": 2.8831938916416213, "grad_norm": 9.5252685546875, "learning_rate": 1.984865554776956e-06, "loss": 0.3884275, "memory(GiB)": 34.88, "step": 106485, "train_speed(iter/s)": 0.411698 }, { "acc": 0.93378849, "epoch": 2.883329271924837, "grad_norm": 20.02511978149414, "learning_rate": 1.984419265887338e-06, "loss": 0.41124001, "memory(GiB)": 34.88, "step": 106490, "train_speed(iter/s)": 0.411699 }, { "acc": 0.92271395, "epoch": 2.8834646522080525, "grad_norm": 9.274874687194824, "learning_rate": 1.983973014780056e-06, "loss": 0.44331951, "memory(GiB)": 34.88, "step": 106495, "train_speed(iter/s)": 0.4117 }, { "acc": 0.93028154, "epoch": 2.883600032491268, "grad_norm": 8.681937217712402, "learning_rate": 1.983526801460702e-06, "loss": 0.35000286, "memory(GiB)": 34.88, "step": 106500, "train_speed(iter/s)": 0.411701 }, { "acc": 0.93715992, "epoch": 2.8837354127744836, "grad_norm": 4.24383544921875, "learning_rate": 1.983080625934865e-06, "loss": 0.39683161, "memory(GiB)": 34.88, "step": 106505, "train_speed(iter/s)": 0.411702 }, { "acc": 0.9368824, "epoch": 2.883870793057699, "grad_norm": 8.69878101348877, "learning_rate": 1.9826344882081376e-06, "loss": 0.37514315, "memory(GiB)": 34.88, "step": 106510, "train_speed(iter/s)": 0.411704 }, { "acc": 0.94158821, "epoch": 2.8840061733409144, "grad_norm": 7.403510093688965, "learning_rate": 1.9821883882861015e-06, "loss": 0.33200655, "memory(GiB)": 34.88, "step": 106515, "train_speed(iter/s)": 0.411705 }, { "acc": 0.93924046, "epoch": 2.88414155362413, "grad_norm": 9.67713737487793, "learning_rate": 1.9817423261743533e-06, "loss": 0.35642304, "memory(GiB)": 34.88, "step": 106520, "train_speed(iter/s)": 0.411705 }, { "acc": 0.94124947, "epoch": 2.884276933907346, "grad_norm": 5.604318618774414, "learning_rate": 1.9812963018784765e-06, "loss": 0.3485332, "memory(GiB)": 34.88, "step": 106525, "train_speed(iter/s)": 0.411706 }, { "acc": 0.94975529, "epoch": 2.8844123141905613, "grad_norm": 7.330976486206055, "learning_rate": 1.9808503154040574e-06, "loss": 0.3203964, "memory(GiB)": 34.88, "step": 106530, "train_speed(iter/s)": 0.411707 }, { "acc": 0.93141413, "epoch": 2.8845476944737767, "grad_norm": 6.342355251312256, "learning_rate": 1.9804043667566834e-06, "loss": 0.41611958, "memory(GiB)": 34.88, "step": 106535, "train_speed(iter/s)": 0.411708 }, { "acc": 0.94302053, "epoch": 2.8846830747569925, "grad_norm": 7.675718784332275, "learning_rate": 1.979958455941943e-06, "loss": 0.34520922, "memory(GiB)": 34.88, "step": 106540, "train_speed(iter/s)": 0.411709 }, { "acc": 0.94965725, "epoch": 2.884818455040208, "grad_norm": 3.3437700271606445, "learning_rate": 1.9795125829654214e-06, "loss": 0.28119903, "memory(GiB)": 34.88, "step": 106545, "train_speed(iter/s)": 0.41171 }, { "acc": 0.92939148, "epoch": 2.8849538353234236, "grad_norm": 7.992575168609619, "learning_rate": 1.9790667478327007e-06, "loss": 0.39883633, "memory(GiB)": 34.88, "step": 106550, "train_speed(iter/s)": 0.411711 }, { "acc": 0.93671227, "epoch": 2.885089215606639, "grad_norm": 6.5586018562316895, "learning_rate": 1.9786209505493716e-06, "loss": 0.34881637, "memory(GiB)": 34.88, "step": 106555, "train_speed(iter/s)": 0.411712 }, { "acc": 0.93247433, "epoch": 2.885224595889855, "grad_norm": 14.130865097045898, "learning_rate": 1.9781751911210138e-06, "loss": 0.38229265, "memory(GiB)": 34.88, "step": 106560, "train_speed(iter/s)": 0.411713 }, { "acc": 0.93835411, "epoch": 2.88535997617307, "grad_norm": 4.769176959991455, "learning_rate": 1.977729469553215e-06, "loss": 0.36341066, "memory(GiB)": 34.88, "step": 106565, "train_speed(iter/s)": 0.411714 }, { "acc": 0.95594425, "epoch": 2.8854953564562855, "grad_norm": 3.8735599517822266, "learning_rate": 1.977283785851555e-06, "loss": 0.22274323, "memory(GiB)": 34.88, "step": 106570, "train_speed(iter/s)": 0.411715 }, { "acc": 0.93565502, "epoch": 2.8856307367395013, "grad_norm": 5.941956520080566, "learning_rate": 1.9768381400216185e-06, "loss": 0.39252274, "memory(GiB)": 34.88, "step": 106575, "train_speed(iter/s)": 0.411716 }, { "acc": 0.95960751, "epoch": 2.8857661170227167, "grad_norm": 7.544619560241699, "learning_rate": 1.9763925320689907e-06, "loss": 0.25253425, "memory(GiB)": 34.88, "step": 106580, "train_speed(iter/s)": 0.411717 }, { "acc": 0.93048439, "epoch": 2.8859014973059325, "grad_norm": 6.944447040557861, "learning_rate": 1.9759469619992473e-06, "loss": 0.42827168, "memory(GiB)": 34.88, "step": 106585, "train_speed(iter/s)": 0.411718 }, { "acc": 0.94737463, "epoch": 2.886036877589148, "grad_norm": 5.988282680511475, "learning_rate": 1.975501429817977e-06, "loss": 0.30037563, "memory(GiB)": 34.88, "step": 106590, "train_speed(iter/s)": 0.411719 }, { "acc": 0.93515835, "epoch": 2.8861722578723636, "grad_norm": 8.46617603302002, "learning_rate": 1.975055935530757e-06, "loss": 0.32267966, "memory(GiB)": 34.88, "step": 106595, "train_speed(iter/s)": 0.41172 }, { "acc": 0.93223591, "epoch": 2.886307638155579, "grad_norm": 38.57939529418945, "learning_rate": 1.9746104791431703e-06, "loss": 0.38760033, "memory(GiB)": 34.88, "step": 106600, "train_speed(iter/s)": 0.411721 }, { "acc": 0.92671623, "epoch": 2.8864430184387944, "grad_norm": 6.144417762756348, "learning_rate": 1.9741650606607937e-06, "loss": 0.36634307, "memory(GiB)": 34.88, "step": 106605, "train_speed(iter/s)": 0.411722 }, { "acc": 0.94034863, "epoch": 2.88657839872201, "grad_norm": 3.771272897720337, "learning_rate": 1.9737196800892096e-06, "loss": 0.35719762, "memory(GiB)": 34.88, "step": 106610, "train_speed(iter/s)": 0.411723 }, { "acc": 0.94136314, "epoch": 2.8867137790052255, "grad_norm": 3.344796895980835, "learning_rate": 1.9732743374339983e-06, "loss": 0.36721034, "memory(GiB)": 34.88, "step": 106615, "train_speed(iter/s)": 0.411724 }, { "acc": 0.94251947, "epoch": 2.8868491592884413, "grad_norm": 6.199737071990967, "learning_rate": 1.972829032700733e-06, "loss": 0.33012805, "memory(GiB)": 34.88, "step": 106620, "train_speed(iter/s)": 0.411725 }, { "acc": 0.92074785, "epoch": 2.8869845395716567, "grad_norm": 9.049564361572266, "learning_rate": 1.972383765894999e-06, "loss": 0.47088175, "memory(GiB)": 34.88, "step": 106625, "train_speed(iter/s)": 0.411726 }, { "acc": 0.93018503, "epoch": 2.8871199198548725, "grad_norm": 5.939459800720215, "learning_rate": 1.97193853702237e-06, "loss": 0.38469477, "memory(GiB)": 34.88, "step": 106630, "train_speed(iter/s)": 0.411727 }, { "acc": 0.93057518, "epoch": 2.887255300138088, "grad_norm": 9.324506759643555, "learning_rate": 1.9714933460884263e-06, "loss": 0.37121382, "memory(GiB)": 34.88, "step": 106635, "train_speed(iter/s)": 0.411728 }, { "acc": 0.92925911, "epoch": 2.887390680421303, "grad_norm": 7.0594329833984375, "learning_rate": 1.971048193098739e-06, "loss": 0.39475455, "memory(GiB)": 34.88, "step": 106640, "train_speed(iter/s)": 0.411729 }, { "acc": 0.94629803, "epoch": 2.887526060704519, "grad_norm": 5.3195061683654785, "learning_rate": 1.9706030780588923e-06, "loss": 0.30877256, "memory(GiB)": 34.88, "step": 106645, "train_speed(iter/s)": 0.41173 }, { "acc": 0.92535019, "epoch": 2.887661440987735, "grad_norm": 7.518922328948975, "learning_rate": 1.970158000974458e-06, "loss": 0.36218529, "memory(GiB)": 34.88, "step": 106650, "train_speed(iter/s)": 0.411731 }, { "acc": 0.9388525, "epoch": 2.88779682127095, "grad_norm": 10.586732864379883, "learning_rate": 1.9697129618510084e-06, "loss": 0.34673948, "memory(GiB)": 34.88, "step": 106655, "train_speed(iter/s)": 0.411732 }, { "acc": 0.94073763, "epoch": 2.8879322015541655, "grad_norm": 4.523519992828369, "learning_rate": 1.9692679606941255e-06, "loss": 0.3371613, "memory(GiB)": 34.88, "step": 106660, "train_speed(iter/s)": 0.411733 }, { "acc": 0.92937908, "epoch": 2.8880675818373813, "grad_norm": 3.2608110904693604, "learning_rate": 1.9688229975093785e-06, "loss": 0.42393103, "memory(GiB)": 34.88, "step": 106665, "train_speed(iter/s)": 0.411734 }, { "acc": 0.94193077, "epoch": 2.8882029621205967, "grad_norm": 6.311275482177734, "learning_rate": 1.9683780723023448e-06, "loss": 0.33807559, "memory(GiB)": 34.88, "step": 106670, "train_speed(iter/s)": 0.411735 }, { "acc": 0.93629398, "epoch": 2.888338342403812, "grad_norm": 5.009200572967529, "learning_rate": 1.967933185078593e-06, "loss": 0.37619119, "memory(GiB)": 34.88, "step": 106675, "train_speed(iter/s)": 0.411736 }, { "acc": 0.94787664, "epoch": 2.888473722687028, "grad_norm": 6.257568359375, "learning_rate": 1.967488335843703e-06, "loss": 0.24920197, "memory(GiB)": 34.88, "step": 106680, "train_speed(iter/s)": 0.411737 }, { "acc": 0.95176544, "epoch": 2.8886091029702436, "grad_norm": 4.163061618804932, "learning_rate": 1.9670435246032423e-06, "loss": 0.28086903, "memory(GiB)": 34.88, "step": 106685, "train_speed(iter/s)": 0.411738 }, { "acc": 0.93803692, "epoch": 2.888744483253459, "grad_norm": 12.269759178161621, "learning_rate": 1.966598751362785e-06, "loss": 0.34200091, "memory(GiB)": 34.88, "step": 106690, "train_speed(iter/s)": 0.411739 }, { "acc": 0.92941074, "epoch": 2.8888798635366744, "grad_norm": 11.758782386779785, "learning_rate": 1.9661540161279032e-06, "loss": 0.44330235, "memory(GiB)": 34.88, "step": 106695, "train_speed(iter/s)": 0.41174 }, { "acc": 0.93483887, "epoch": 2.88901524381989, "grad_norm": 12.396852493286133, "learning_rate": 1.9657093189041664e-06, "loss": 0.47779279, "memory(GiB)": 34.88, "step": 106700, "train_speed(iter/s)": 0.411741 }, { "acc": 0.94412842, "epoch": 2.8891506241031055, "grad_norm": 5.729465007781982, "learning_rate": 1.9652646596971474e-06, "loss": 0.39780464, "memory(GiB)": 34.88, "step": 106705, "train_speed(iter/s)": 0.411742 }, { "acc": 0.93853493, "epoch": 2.8892860043863213, "grad_norm": 13.567455291748047, "learning_rate": 1.9648200385124124e-06, "loss": 0.35166292, "memory(GiB)": 34.88, "step": 106710, "train_speed(iter/s)": 0.411743 }, { "acc": 0.94887085, "epoch": 2.8894213846695367, "grad_norm": 7.594233512878418, "learning_rate": 1.964375455355537e-06, "loss": 0.30021234, "memory(GiB)": 34.88, "step": 106715, "train_speed(iter/s)": 0.411744 }, { "acc": 0.96222725, "epoch": 2.8895567649527525, "grad_norm": 4.62955379486084, "learning_rate": 1.9639309102320862e-06, "loss": 0.21605973, "memory(GiB)": 34.88, "step": 106720, "train_speed(iter/s)": 0.411745 }, { "acc": 0.92532282, "epoch": 2.889692145235968, "grad_norm": 8.960183143615723, "learning_rate": 1.9634864031476296e-06, "loss": 0.46352787, "memory(GiB)": 34.88, "step": 106725, "train_speed(iter/s)": 0.411746 }, { "acc": 0.93829966, "epoch": 2.889827525519183, "grad_norm": 4.102510452270508, "learning_rate": 1.9630419341077366e-06, "loss": 0.27993803, "memory(GiB)": 34.88, "step": 106730, "train_speed(iter/s)": 0.411747 }, { "acc": 0.94013624, "epoch": 2.889962905802399, "grad_norm": 5.941758155822754, "learning_rate": 1.9625975031179756e-06, "loss": 0.36307287, "memory(GiB)": 34.88, "step": 106735, "train_speed(iter/s)": 0.411748 }, { "acc": 0.93806086, "epoch": 2.8900982860856144, "grad_norm": 7.676340103149414, "learning_rate": 1.9621531101839133e-06, "loss": 0.33182886, "memory(GiB)": 34.88, "step": 106740, "train_speed(iter/s)": 0.411749 }, { "acc": 0.9484972, "epoch": 2.89023366636883, "grad_norm": 7.554567337036133, "learning_rate": 1.961708755311113e-06, "loss": 0.33282025, "memory(GiB)": 34.88, "step": 106745, "train_speed(iter/s)": 0.41175 }, { "acc": 0.95226526, "epoch": 2.8903690466520455, "grad_norm": 2.5505993366241455, "learning_rate": 1.961264438505147e-06, "loss": 0.2624126, "memory(GiB)": 34.88, "step": 106750, "train_speed(iter/s)": 0.411751 }, { "acc": 0.9441618, "epoch": 2.8905044269352613, "grad_norm": 10.43470287322998, "learning_rate": 1.9608201597715766e-06, "loss": 0.33601241, "memory(GiB)": 34.88, "step": 106755, "train_speed(iter/s)": 0.411752 }, { "acc": 0.93167248, "epoch": 2.8906398072184767, "grad_norm": 7.87656307220459, "learning_rate": 1.9603759191159697e-06, "loss": 0.39398065, "memory(GiB)": 34.88, "step": 106760, "train_speed(iter/s)": 0.411753 }, { "acc": 0.93659515, "epoch": 2.890775187501692, "grad_norm": 12.103394508361816, "learning_rate": 1.9599317165438912e-06, "loss": 0.35888839, "memory(GiB)": 34.88, "step": 106765, "train_speed(iter/s)": 0.411753 }, { "acc": 0.94504242, "epoch": 2.890910567784908, "grad_norm": 8.158980369567871, "learning_rate": 1.959487552060906e-06, "loss": 0.37290618, "memory(GiB)": 34.88, "step": 106770, "train_speed(iter/s)": 0.411754 }, { "acc": 0.94374695, "epoch": 2.891045948068123, "grad_norm": 11.22671127319336, "learning_rate": 1.9590434256725773e-06, "loss": 0.40462427, "memory(GiB)": 34.88, "step": 106775, "train_speed(iter/s)": 0.411755 }, { "acc": 0.92478733, "epoch": 2.891181328351339, "grad_norm": 11.234580993652344, "learning_rate": 1.958599337384465e-06, "loss": 0.47128105, "memory(GiB)": 34.88, "step": 106780, "train_speed(iter/s)": 0.411756 }, { "acc": 0.95649738, "epoch": 2.8913167086345544, "grad_norm": 6.260315418243408, "learning_rate": 1.95815528720214e-06, "loss": 0.23067188, "memory(GiB)": 34.88, "step": 106785, "train_speed(iter/s)": 0.411757 }, { "acc": 0.94599113, "epoch": 2.89145208891777, "grad_norm": 21.605093002319336, "learning_rate": 1.957711275131159e-06, "loss": 0.30968394, "memory(GiB)": 34.88, "step": 106790, "train_speed(iter/s)": 0.411758 }, { "acc": 0.95373592, "epoch": 2.8915874692009855, "grad_norm": 9.594778060913086, "learning_rate": 1.9572673011770855e-06, "loss": 0.26476102, "memory(GiB)": 34.88, "step": 106795, "train_speed(iter/s)": 0.411759 }, { "acc": 0.94973011, "epoch": 2.891722849484201, "grad_norm": 4.462977886199951, "learning_rate": 1.9568233653454814e-06, "loss": 0.25017848, "memory(GiB)": 34.88, "step": 106800, "train_speed(iter/s)": 0.41176 }, { "acc": 0.923209, "epoch": 2.8918582297674167, "grad_norm": 17.87077522277832, "learning_rate": 1.9563794676419106e-06, "loss": 0.48874493, "memory(GiB)": 34.88, "step": 106805, "train_speed(iter/s)": 0.411761 }, { "acc": 0.9576232, "epoch": 2.8919936100506325, "grad_norm": 7.9352593421936035, "learning_rate": 1.955935608071928e-06, "loss": 0.23956254, "memory(GiB)": 34.88, "step": 106810, "train_speed(iter/s)": 0.411762 }, { "acc": 0.9307972, "epoch": 2.892128990333848, "grad_norm": 13.3564453125, "learning_rate": 1.955491786641098e-06, "loss": 0.36707616, "memory(GiB)": 34.88, "step": 106815, "train_speed(iter/s)": 0.411763 }, { "acc": 0.94359398, "epoch": 2.892264370617063, "grad_norm": 2.768934726715088, "learning_rate": 1.9550480033549806e-06, "loss": 0.35887165, "memory(GiB)": 34.88, "step": 106820, "train_speed(iter/s)": 0.411764 }, { "acc": 0.94665127, "epoch": 2.892399750900279, "grad_norm": 20.112031936645508, "learning_rate": 1.954604258219132e-06, "loss": 0.34430661, "memory(GiB)": 34.88, "step": 106825, "train_speed(iter/s)": 0.411765 }, { "acc": 0.945504, "epoch": 2.8925351311834944, "grad_norm": 6.1550092697143555, "learning_rate": 1.9541605512391128e-06, "loss": 0.36868312, "memory(GiB)": 34.88, "step": 106830, "train_speed(iter/s)": 0.411766 }, { "acc": 0.94035244, "epoch": 2.8926705114667097, "grad_norm": 3.5532355308532715, "learning_rate": 1.953716882420481e-06, "loss": 0.38163021, "memory(GiB)": 34.88, "step": 106835, "train_speed(iter/s)": 0.411767 }, { "acc": 0.94636831, "epoch": 2.8928058917499255, "grad_norm": 4.361415863037109, "learning_rate": 1.9532732517687977e-06, "loss": 0.27158647, "memory(GiB)": 34.88, "step": 106840, "train_speed(iter/s)": 0.411768 }, { "acc": 0.94284496, "epoch": 2.8929412720331413, "grad_norm": 11.583674430847168, "learning_rate": 1.9528296592896144e-06, "loss": 0.351963, "memory(GiB)": 34.88, "step": 106845, "train_speed(iter/s)": 0.411769 }, { "acc": 0.93726482, "epoch": 2.8930766523163567, "grad_norm": 19.45336151123047, "learning_rate": 1.9523861049884905e-06, "loss": 0.36405706, "memory(GiB)": 34.88, "step": 106850, "train_speed(iter/s)": 0.41177 }, { "acc": 0.93115044, "epoch": 2.893212032599572, "grad_norm": 5.391343116760254, "learning_rate": 1.9519425888709826e-06, "loss": 0.30497487, "memory(GiB)": 34.88, "step": 106855, "train_speed(iter/s)": 0.411771 }, { "acc": 0.93419285, "epoch": 2.893347412882788, "grad_norm": 11.824790000915527, "learning_rate": 1.9514991109426492e-06, "loss": 0.42149429, "memory(GiB)": 34.88, "step": 106860, "train_speed(iter/s)": 0.411772 }, { "acc": 0.94504213, "epoch": 2.893482793166003, "grad_norm": 9.793413162231445, "learning_rate": 1.951055671209043e-06, "loss": 0.29461012, "memory(GiB)": 34.88, "step": 106865, "train_speed(iter/s)": 0.411773 }, { "acc": 0.92308912, "epoch": 2.893618173449219, "grad_norm": 10.535906791687012, "learning_rate": 1.950612269675716e-06, "loss": 0.52266989, "memory(GiB)": 34.88, "step": 106870, "train_speed(iter/s)": 0.411774 }, { "acc": 0.93050318, "epoch": 2.8937535537324344, "grad_norm": 3.0544304847717285, "learning_rate": 1.9501689063482284e-06, "loss": 0.44372663, "memory(GiB)": 34.88, "step": 106875, "train_speed(iter/s)": 0.411775 }, { "acc": 0.94450417, "epoch": 2.89388893401565, "grad_norm": 6.027926445007324, "learning_rate": 1.94972558123213e-06, "loss": 0.30904922, "memory(GiB)": 34.88, "step": 106880, "train_speed(iter/s)": 0.411776 }, { "acc": 0.94544001, "epoch": 2.8940243142988655, "grad_norm": 8.663984298706055, "learning_rate": 1.9492822943329768e-06, "loss": 0.28168862, "memory(GiB)": 34.88, "step": 106885, "train_speed(iter/s)": 0.411777 }, { "acc": 0.95124607, "epoch": 2.894159694582081, "grad_norm": 7.664599418640137, "learning_rate": 1.9488390456563205e-06, "loss": 0.25117946, "memory(GiB)": 34.88, "step": 106890, "train_speed(iter/s)": 0.411778 }, { "acc": 0.94428196, "epoch": 2.8942950748652967, "grad_norm": 6.530941009521484, "learning_rate": 1.9483958352077154e-06, "loss": 0.31476383, "memory(GiB)": 34.88, "step": 106895, "train_speed(iter/s)": 0.411779 }, { "acc": 0.95504627, "epoch": 2.894430455148512, "grad_norm": 7.698639392852783, "learning_rate": 1.9479526629927127e-06, "loss": 0.24727066, "memory(GiB)": 34.88, "step": 106900, "train_speed(iter/s)": 0.411781 }, { "acc": 0.93995552, "epoch": 2.894565835431728, "grad_norm": 7.746371269226074, "learning_rate": 1.9475095290168608e-06, "loss": 0.32267194, "memory(GiB)": 34.88, "step": 106905, "train_speed(iter/s)": 0.411781 }, { "acc": 0.93820171, "epoch": 2.894701215714943, "grad_norm": 6.021643161773682, "learning_rate": 1.9470664332857166e-06, "loss": 0.34356871, "memory(GiB)": 34.88, "step": 106910, "train_speed(iter/s)": 0.411782 }, { "acc": 0.9433157, "epoch": 2.894836595998159, "grad_norm": 8.561750411987305, "learning_rate": 1.9466233758048265e-06, "loss": 0.35006356, "memory(GiB)": 34.88, "step": 106915, "train_speed(iter/s)": 0.411784 }, { "acc": 0.9536974, "epoch": 2.8949719762813744, "grad_norm": 9.585949897766113, "learning_rate": 1.946180356579741e-06, "loss": 0.27270246, "memory(GiB)": 34.88, "step": 106920, "train_speed(iter/s)": 0.411785 }, { "acc": 0.94295406, "epoch": 2.8951073565645897, "grad_norm": 5.47337532043457, "learning_rate": 1.9457373756160115e-06, "loss": 0.31804023, "memory(GiB)": 34.88, "step": 106925, "train_speed(iter/s)": 0.411786 }, { "acc": 0.92895851, "epoch": 2.8952427368478055, "grad_norm": 3.962865114212036, "learning_rate": 1.9452944329191883e-06, "loss": 0.40209389, "memory(GiB)": 34.88, "step": 106930, "train_speed(iter/s)": 0.411787 }, { "acc": 0.94219894, "epoch": 2.895378117131021, "grad_norm": 7.609138011932373, "learning_rate": 1.944851528494816e-06, "loss": 0.35161381, "memory(GiB)": 34.88, "step": 106935, "train_speed(iter/s)": 0.411788 }, { "acc": 0.93538647, "epoch": 2.8955134974142367, "grad_norm": 4.2264509201049805, "learning_rate": 1.9444086623484455e-06, "loss": 0.32239177, "memory(GiB)": 34.88, "step": 106940, "train_speed(iter/s)": 0.411789 }, { "acc": 0.94119854, "epoch": 2.895648877697452, "grad_norm": 7.5020976066589355, "learning_rate": 1.9439658344856263e-06, "loss": 0.34757171, "memory(GiB)": 34.88, "step": 106945, "train_speed(iter/s)": 0.411789 }, { "acc": 0.94550037, "epoch": 2.895784257980668, "grad_norm": 9.577642440795898, "learning_rate": 1.9435230449119015e-06, "loss": 0.23983092, "memory(GiB)": 34.88, "step": 106950, "train_speed(iter/s)": 0.41179 }, { "acc": 0.92333698, "epoch": 2.895919638263883, "grad_norm": 5.0650763511657715, "learning_rate": 1.9430802936328192e-06, "loss": 0.44555373, "memory(GiB)": 34.88, "step": 106955, "train_speed(iter/s)": 0.411792 }, { "acc": 0.95310268, "epoch": 2.8960550185470986, "grad_norm": 5.666082382202148, "learning_rate": 1.942637580653928e-06, "loss": 0.22941155, "memory(GiB)": 34.88, "step": 106960, "train_speed(iter/s)": 0.411793 }, { "acc": 0.948629, "epoch": 2.8961903988303144, "grad_norm": 7.956515312194824, "learning_rate": 1.9421949059807734e-06, "loss": 0.28610597, "memory(GiB)": 34.88, "step": 106965, "train_speed(iter/s)": 0.411794 }, { "acc": 0.93530741, "epoch": 2.89632577911353, "grad_norm": 10.934489250183105, "learning_rate": 1.941752269618898e-06, "loss": 0.38466377, "memory(GiB)": 34.88, "step": 106970, "train_speed(iter/s)": 0.411794 }, { "acc": 0.93574619, "epoch": 2.8964611593967455, "grad_norm": 3.0992813110351562, "learning_rate": 1.941309671573848e-06, "loss": 0.38884704, "memory(GiB)": 34.88, "step": 106975, "train_speed(iter/s)": 0.411795 }, { "acc": 0.94753284, "epoch": 2.896596539679961, "grad_norm": 4.158698081970215, "learning_rate": 1.9408671118511683e-06, "loss": 0.31787336, "memory(GiB)": 34.88, "step": 106980, "train_speed(iter/s)": 0.411796 }, { "acc": 0.94316616, "epoch": 2.8967319199631767, "grad_norm": 24.525257110595703, "learning_rate": 1.9404245904564044e-06, "loss": 0.35350974, "memory(GiB)": 34.88, "step": 106985, "train_speed(iter/s)": 0.411798 }, { "acc": 0.94793854, "epoch": 2.896867300246392, "grad_norm": 9.2046537399292, "learning_rate": 1.939982107395097e-06, "loss": 0.25890169, "memory(GiB)": 34.88, "step": 106990, "train_speed(iter/s)": 0.411798 }, { "acc": 0.94297161, "epoch": 2.8970026805296074, "grad_norm": 8.457514762878418, "learning_rate": 1.9395396626727893e-06, "loss": 0.31279309, "memory(GiB)": 34.88, "step": 106995, "train_speed(iter/s)": 0.411799 }, { "acc": 0.92561893, "epoch": 2.897138060812823, "grad_norm": 8.800533294677734, "learning_rate": 1.9390972562950262e-06, "loss": 0.46364574, "memory(GiB)": 34.88, "step": 107000, "train_speed(iter/s)": 0.4118 }, { "acc": 0.94604321, "epoch": 2.897273441096039, "grad_norm": 4.685356140136719, "learning_rate": 1.9386548882673473e-06, "loss": 0.26911485, "memory(GiB)": 34.88, "step": 107005, "train_speed(iter/s)": 0.411801 }, { "acc": 0.93456497, "epoch": 2.8974088213792544, "grad_norm": 9.313281059265137, "learning_rate": 1.9382125585952943e-06, "loss": 0.36918452, "memory(GiB)": 34.88, "step": 107010, "train_speed(iter/s)": 0.411802 }, { "acc": 0.9303483, "epoch": 2.8975442016624697, "grad_norm": 14.207594871520996, "learning_rate": 1.937770267284409e-06, "loss": 0.43176885, "memory(GiB)": 34.88, "step": 107015, "train_speed(iter/s)": 0.411803 }, { "acc": 0.94210567, "epoch": 2.8976795819456855, "grad_norm": 4.63252592086792, "learning_rate": 1.9373280143402335e-06, "loss": 0.34104006, "memory(GiB)": 34.88, "step": 107020, "train_speed(iter/s)": 0.411804 }, { "acc": 0.93820562, "epoch": 2.897814962228901, "grad_norm": 7.415426254272461, "learning_rate": 1.9368857997683045e-06, "loss": 0.40663176, "memory(GiB)": 34.88, "step": 107025, "train_speed(iter/s)": 0.411805 }, { "acc": 0.93880272, "epoch": 2.8979503425121167, "grad_norm": 7.056761741638184, "learning_rate": 1.936443623574163e-06, "loss": 0.36208248, "memory(GiB)": 34.88, "step": 107030, "train_speed(iter/s)": 0.411806 }, { "acc": 0.95006294, "epoch": 2.898085722795332, "grad_norm": 5.795290470123291, "learning_rate": 1.9360014857633507e-06, "loss": 0.30621781, "memory(GiB)": 34.88, "step": 107035, "train_speed(iter/s)": 0.411807 }, { "acc": 0.92593689, "epoch": 2.898221103078548, "grad_norm": 7.250420570373535, "learning_rate": 1.9355593863414026e-06, "loss": 0.43748693, "memory(GiB)": 34.88, "step": 107040, "train_speed(iter/s)": 0.411808 }, { "acc": 0.93075037, "epoch": 2.898356483361763, "grad_norm": 6.019650936126709, "learning_rate": 1.935117325313858e-06, "loss": 0.3952749, "memory(GiB)": 34.88, "step": 107045, "train_speed(iter/s)": 0.411809 }, { "acc": 0.93180599, "epoch": 2.8984918636449786, "grad_norm": 5.996410846710205, "learning_rate": 1.9346753026862545e-06, "loss": 0.43243513, "memory(GiB)": 34.88, "step": 107050, "train_speed(iter/s)": 0.41181 }, { "acc": 0.9425149, "epoch": 2.8986272439281944, "grad_norm": 7.991024971008301, "learning_rate": 1.9342333184641317e-06, "loss": 0.36938422, "memory(GiB)": 34.88, "step": 107055, "train_speed(iter/s)": 0.411811 }, { "acc": 0.95879173, "epoch": 2.8987626242114097, "grad_norm": 6.8429412841796875, "learning_rate": 1.9337913726530227e-06, "loss": 0.21192451, "memory(GiB)": 34.88, "step": 107060, "train_speed(iter/s)": 0.411812 }, { "acc": 0.95060501, "epoch": 2.8988980044946255, "grad_norm": 13.291925430297852, "learning_rate": 1.933349465258465e-06, "loss": 0.27144313, "memory(GiB)": 34.88, "step": 107065, "train_speed(iter/s)": 0.411813 }, { "acc": 0.94245644, "epoch": 2.899033384777841, "grad_norm": 6.26230525970459, "learning_rate": 1.9329075962859966e-06, "loss": 0.32330575, "memory(GiB)": 34.88, "step": 107070, "train_speed(iter/s)": 0.411814 }, { "acc": 0.93974113, "epoch": 2.8991687650610567, "grad_norm": 9.644506454467773, "learning_rate": 1.932465765741149e-06, "loss": 0.35508051, "memory(GiB)": 34.88, "step": 107075, "train_speed(iter/s)": 0.411815 }, { "acc": 0.9287425, "epoch": 2.899304145344272, "grad_norm": 6.531691074371338, "learning_rate": 1.932023973629459e-06, "loss": 0.40219898, "memory(GiB)": 34.88, "step": 107080, "train_speed(iter/s)": 0.411816 }, { "acc": 0.94979429, "epoch": 2.8994395256274874, "grad_norm": 9.0543851852417, "learning_rate": 1.9315822199564605e-06, "loss": 0.32056026, "memory(GiB)": 34.88, "step": 107085, "train_speed(iter/s)": 0.411817 }, { "acc": 0.92388687, "epoch": 2.899574905910703, "grad_norm": 12.571571350097656, "learning_rate": 1.93114050472769e-06, "loss": 0.38029468, "memory(GiB)": 34.88, "step": 107090, "train_speed(iter/s)": 0.411818 }, { "acc": 0.9345665, "epoch": 2.8997102861939186, "grad_norm": 12.712057113647461, "learning_rate": 1.930698827948676e-06, "loss": 0.32450366, "memory(GiB)": 34.88, "step": 107095, "train_speed(iter/s)": 0.411819 }, { "acc": 0.94238749, "epoch": 2.8998456664771344, "grad_norm": 14.388343811035156, "learning_rate": 1.930257189624954e-06, "loss": 0.33862593, "memory(GiB)": 34.88, "step": 107100, "train_speed(iter/s)": 0.41182 }, { "acc": 0.94378538, "epoch": 2.8999810467603497, "grad_norm": 8.888761520385742, "learning_rate": 1.9298155897620567e-06, "loss": 0.27332006, "memory(GiB)": 34.88, "step": 107105, "train_speed(iter/s)": 0.411821 }, { "acc": 0.94207592, "epoch": 2.9001164270435655, "grad_norm": 9.066001892089844, "learning_rate": 1.9293740283655167e-06, "loss": 0.3187007, "memory(GiB)": 34.88, "step": 107110, "train_speed(iter/s)": 0.411822 }, { "acc": 0.92969017, "epoch": 2.900251807326781, "grad_norm": 7.536051273345947, "learning_rate": 1.9289325054408624e-06, "loss": 0.45121355, "memory(GiB)": 34.88, "step": 107115, "train_speed(iter/s)": 0.411823 }, { "acc": 0.95297585, "epoch": 2.9003871876099963, "grad_norm": 3.4471113681793213, "learning_rate": 1.9284910209936275e-06, "loss": 0.24769878, "memory(GiB)": 34.88, "step": 107120, "train_speed(iter/s)": 0.411824 }, { "acc": 0.93209686, "epoch": 2.900522567893212, "grad_norm": 7.41569709777832, "learning_rate": 1.9280495750293424e-06, "loss": 0.40116086, "memory(GiB)": 34.88, "step": 107125, "train_speed(iter/s)": 0.411825 }, { "acc": 0.94741144, "epoch": 2.900657948176428, "grad_norm": 8.066017150878906, "learning_rate": 1.927608167553535e-06, "loss": 0.33150156, "memory(GiB)": 34.88, "step": 107130, "train_speed(iter/s)": 0.411825 }, { "acc": 0.9514555, "epoch": 2.900793328459643, "grad_norm": 14.781238555908203, "learning_rate": 1.927166798571736e-06, "loss": 0.28716211, "memory(GiB)": 34.88, "step": 107135, "train_speed(iter/s)": 0.411826 }, { "acc": 0.93917866, "epoch": 2.9009287087428586, "grad_norm": 8.418220520019531, "learning_rate": 1.926725468089474e-06, "loss": 0.34424863, "memory(GiB)": 34.88, "step": 107140, "train_speed(iter/s)": 0.411827 }, { "acc": 0.95314922, "epoch": 2.9010640890260744, "grad_norm": 4.9573493003845215, "learning_rate": 1.9262841761122796e-06, "loss": 0.23395133, "memory(GiB)": 34.88, "step": 107145, "train_speed(iter/s)": 0.411828 }, { "acc": 0.92934046, "epoch": 2.9011994693092897, "grad_norm": 8.885457038879395, "learning_rate": 1.9258429226456783e-06, "loss": 0.4835422, "memory(GiB)": 34.88, "step": 107150, "train_speed(iter/s)": 0.411829 }, { "acc": 0.93037968, "epoch": 2.901334849592505, "grad_norm": 6.7483367919921875, "learning_rate": 1.9254017076951982e-06, "loss": 0.3901845, "memory(GiB)": 34.88, "step": 107155, "train_speed(iter/s)": 0.41183 }, { "acc": 0.90717564, "epoch": 2.901470229875721, "grad_norm": 8.838149070739746, "learning_rate": 1.924960531266368e-06, "loss": 0.52913709, "memory(GiB)": 34.88, "step": 107160, "train_speed(iter/s)": 0.411831 }, { "acc": 0.93144894, "epoch": 2.9016056101589367, "grad_norm": 11.1334810256958, "learning_rate": 1.924519393364712e-06, "loss": 0.40402341, "memory(GiB)": 34.88, "step": 107165, "train_speed(iter/s)": 0.411831 }, { "acc": 0.92609606, "epoch": 2.901740990442152, "grad_norm": 3.2060370445251465, "learning_rate": 1.924078293995757e-06, "loss": 0.45253692, "memory(GiB)": 34.88, "step": 107170, "train_speed(iter/s)": 0.411832 }, { "acc": 0.93561516, "epoch": 2.9018763707253674, "grad_norm": 8.273202896118164, "learning_rate": 1.9236372331650295e-06, "loss": 0.40370541, "memory(GiB)": 34.88, "step": 107175, "train_speed(iter/s)": 0.411833 }, { "acc": 0.93545341, "epoch": 2.902011751008583, "grad_norm": 8.092616081237793, "learning_rate": 1.923196210878055e-06, "loss": 0.3246778, "memory(GiB)": 34.88, "step": 107180, "train_speed(iter/s)": 0.411834 }, { "acc": 0.93703737, "epoch": 2.9021471312917986, "grad_norm": 7.133741855621338, "learning_rate": 1.9227552271403546e-06, "loss": 0.37225478, "memory(GiB)": 34.88, "step": 107185, "train_speed(iter/s)": 0.411835 }, { "acc": 0.93541193, "epoch": 2.9022825115750144, "grad_norm": 7.22280216217041, "learning_rate": 1.9223142819574588e-06, "loss": 0.35866451, "memory(GiB)": 34.88, "step": 107190, "train_speed(iter/s)": 0.411836 }, { "acc": 0.92741728, "epoch": 2.9024178918582297, "grad_norm": 6.375805854797363, "learning_rate": 1.9218733753348874e-06, "loss": 0.47969575, "memory(GiB)": 34.88, "step": 107195, "train_speed(iter/s)": 0.411837 }, { "acc": 0.94541092, "epoch": 2.9025532721414455, "grad_norm": 6.992316246032715, "learning_rate": 1.9214325072781625e-06, "loss": 0.30174298, "memory(GiB)": 34.88, "step": 107200, "train_speed(iter/s)": 0.411838 }, { "acc": 0.94402924, "epoch": 2.902688652424661, "grad_norm": 8.47995662689209, "learning_rate": 1.9209916777928077e-06, "loss": 0.32976954, "memory(GiB)": 34.88, "step": 107205, "train_speed(iter/s)": 0.411839 }, { "acc": 0.91560297, "epoch": 2.9028240327078763, "grad_norm": 27.959848403930664, "learning_rate": 1.9205508868843467e-06, "loss": 0.46453843, "memory(GiB)": 34.88, "step": 107210, "train_speed(iter/s)": 0.41184 }, { "acc": 0.94125252, "epoch": 2.902959412991092, "grad_norm": 9.120945930480957, "learning_rate": 1.9201101345583016e-06, "loss": 0.32897024, "memory(GiB)": 34.88, "step": 107215, "train_speed(iter/s)": 0.411841 }, { "acc": 0.93600769, "epoch": 2.9030947932743074, "grad_norm": 4.428736209869385, "learning_rate": 1.9196694208201893e-06, "loss": 0.31990769, "memory(GiB)": 34.88, "step": 107220, "train_speed(iter/s)": 0.411842 }, { "acc": 0.93366041, "epoch": 2.903230173557523, "grad_norm": 8.059231758117676, "learning_rate": 1.9192287456755373e-06, "loss": 0.3686657, "memory(GiB)": 34.88, "step": 107225, "train_speed(iter/s)": 0.411843 }, { "acc": 0.94187117, "epoch": 2.9033655538407386, "grad_norm": 13.334124565124512, "learning_rate": 1.91878810912986e-06, "loss": 0.28382807, "memory(GiB)": 34.88, "step": 107230, "train_speed(iter/s)": 0.411844 }, { "acc": 0.94365883, "epoch": 2.9035009341239544, "grad_norm": 6.535021781921387, "learning_rate": 1.9183475111886825e-06, "loss": 0.26734593, "memory(GiB)": 34.88, "step": 107235, "train_speed(iter/s)": 0.411845 }, { "acc": 0.94370365, "epoch": 2.9036363144071697, "grad_norm": 4.383772850036621, "learning_rate": 1.917906951857519e-06, "loss": 0.35324893, "memory(GiB)": 34.88, "step": 107240, "train_speed(iter/s)": 0.411846 }, { "acc": 0.94562168, "epoch": 2.903771694690385, "grad_norm": 10.63017463684082, "learning_rate": 1.9174664311418913e-06, "loss": 0.34076362, "memory(GiB)": 34.88, "step": 107245, "train_speed(iter/s)": 0.411847 }, { "acc": 0.94336605, "epoch": 2.903907074973601, "grad_norm": 8.393753051757812, "learning_rate": 1.9170259490473183e-06, "loss": 0.33986008, "memory(GiB)": 34.88, "step": 107250, "train_speed(iter/s)": 0.411848 }, { "acc": 0.9494278, "epoch": 2.9040424552568163, "grad_norm": 7.1938862800598145, "learning_rate": 1.916585505579313e-06, "loss": 0.28920863, "memory(GiB)": 34.88, "step": 107255, "train_speed(iter/s)": 0.411849 }, { "acc": 0.93830557, "epoch": 2.904177835540032, "grad_norm": 8.579473495483398, "learning_rate": 1.9161451007434013e-06, "loss": 0.40316849, "memory(GiB)": 34.88, "step": 107260, "train_speed(iter/s)": 0.41185 }, { "acc": 0.94840069, "epoch": 2.9043132158232474, "grad_norm": 30.35400390625, "learning_rate": 1.9157047345450925e-06, "loss": 0.31852424, "memory(GiB)": 34.88, "step": 107265, "train_speed(iter/s)": 0.411851 }, { "acc": 0.94397964, "epoch": 2.904448596106463, "grad_norm": 4.888636589050293, "learning_rate": 1.9152644069899083e-06, "loss": 0.26180463, "memory(GiB)": 34.88, "step": 107270, "train_speed(iter/s)": 0.411852 }, { "acc": 0.94999809, "epoch": 2.9045839763896786, "grad_norm": 4.454811096191406, "learning_rate": 1.914824118083359e-06, "loss": 0.30781109, "memory(GiB)": 34.88, "step": 107275, "train_speed(iter/s)": 0.411852 }, { "acc": 0.94195213, "epoch": 2.904719356672894, "grad_norm": 4.698634147644043, "learning_rate": 1.9143838678309667e-06, "loss": 0.31212006, "memory(GiB)": 34.88, "step": 107280, "train_speed(iter/s)": 0.411853 }, { "acc": 0.91819878, "epoch": 2.9048547369561097, "grad_norm": 14.977813720703125, "learning_rate": 1.9139436562382432e-06, "loss": 0.5771863, "memory(GiB)": 34.88, "step": 107285, "train_speed(iter/s)": 0.411855 }, { "acc": 0.93825722, "epoch": 2.9049901172393255, "grad_norm": 8.854665756225586, "learning_rate": 1.9135034833106997e-06, "loss": 0.28529117, "memory(GiB)": 34.88, "step": 107290, "train_speed(iter/s)": 0.411856 }, { "acc": 0.93162127, "epoch": 2.905125497522541, "grad_norm": 5.6124796867370605, "learning_rate": 1.9130633490538576e-06, "loss": 0.38446336, "memory(GiB)": 34.88, "step": 107295, "train_speed(iter/s)": 0.411857 }, { "acc": 0.93718529, "epoch": 2.9052608778057563, "grad_norm": 9.969743728637695, "learning_rate": 1.9126232534732234e-06, "loss": 0.38566608, "memory(GiB)": 34.88, "step": 107300, "train_speed(iter/s)": 0.411857 }, { "acc": 0.94287615, "epoch": 2.905396258088972, "grad_norm": 7.010984420776367, "learning_rate": 1.9121831965743156e-06, "loss": 0.31260788, "memory(GiB)": 34.88, "step": 107305, "train_speed(iter/s)": 0.411858 }, { "acc": 0.92569313, "epoch": 2.9055316383721874, "grad_norm": 9.012825012207031, "learning_rate": 1.91174317836264e-06, "loss": 0.46443334, "memory(GiB)": 34.88, "step": 107310, "train_speed(iter/s)": 0.411859 }, { "acc": 0.93794298, "epoch": 2.9056670186554028, "grad_norm": 4.960184574127197, "learning_rate": 1.9113031988437174e-06, "loss": 0.31445699, "memory(GiB)": 34.88, "step": 107315, "train_speed(iter/s)": 0.41186 }, { "acc": 0.94538956, "epoch": 2.9058023989386186, "grad_norm": 5.882816791534424, "learning_rate": 1.9108632580230547e-06, "loss": 0.32302756, "memory(GiB)": 34.88, "step": 107320, "train_speed(iter/s)": 0.411861 }, { "acc": 0.94284487, "epoch": 2.9059377792218344, "grad_norm": 12.160086631774902, "learning_rate": 1.9104233559061593e-06, "loss": 0.3055927, "memory(GiB)": 34.88, "step": 107325, "train_speed(iter/s)": 0.411862 }, { "acc": 0.92287979, "epoch": 2.9060731595050497, "grad_norm": 12.437614440917969, "learning_rate": 1.9099834924985502e-06, "loss": 0.46721869, "memory(GiB)": 34.88, "step": 107330, "train_speed(iter/s)": 0.411863 }, { "acc": 0.94215698, "epoch": 2.906208539788265, "grad_norm": 4.77042818069458, "learning_rate": 1.9095436678057314e-06, "loss": 0.27202246, "memory(GiB)": 34.88, "step": 107335, "train_speed(iter/s)": 0.411864 }, { "acc": 0.94441175, "epoch": 2.906343920071481, "grad_norm": 10.5762939453125, "learning_rate": 1.9091038818332154e-06, "loss": 0.32685223, "memory(GiB)": 34.88, "step": 107340, "train_speed(iter/s)": 0.411865 }, { "acc": 0.94168539, "epoch": 2.9064793003546963, "grad_norm": 5.870828151702881, "learning_rate": 1.9086641345865074e-06, "loss": 0.35957632, "memory(GiB)": 34.88, "step": 107345, "train_speed(iter/s)": 0.411866 }, { "acc": 0.93668137, "epoch": 2.9066146806379116, "grad_norm": 8.36668872833252, "learning_rate": 1.908224426071122e-06, "loss": 0.36323857, "memory(GiB)": 34.88, "step": 107350, "train_speed(iter/s)": 0.411867 }, { "acc": 0.93264904, "epoch": 2.9067500609211274, "grad_norm": 13.701233863830566, "learning_rate": 1.9077847562925634e-06, "loss": 0.41489048, "memory(GiB)": 34.88, "step": 107355, "train_speed(iter/s)": 0.411868 }, { "acc": 0.93617458, "epoch": 2.9068854412043432, "grad_norm": 9.92755126953125, "learning_rate": 1.9073451252563402e-06, "loss": 0.36422358, "memory(GiB)": 34.88, "step": 107360, "train_speed(iter/s)": 0.411869 }, { "acc": 0.93891983, "epoch": 2.9070208214875586, "grad_norm": 15.174556732177734, "learning_rate": 1.9069055329679618e-06, "loss": 0.31629174, "memory(GiB)": 34.88, "step": 107365, "train_speed(iter/s)": 0.41187 }, { "acc": 0.93141928, "epoch": 2.907156201770774, "grad_norm": 10.133842468261719, "learning_rate": 1.906465979432931e-06, "loss": 0.39100256, "memory(GiB)": 34.88, "step": 107370, "train_speed(iter/s)": 0.411871 }, { "acc": 0.92554283, "epoch": 2.9072915820539897, "grad_norm": 5.024002552032471, "learning_rate": 1.9060264646567575e-06, "loss": 0.43099394, "memory(GiB)": 34.88, "step": 107375, "train_speed(iter/s)": 0.411872 }, { "acc": 0.92216635, "epoch": 2.907426962337205, "grad_norm": 43.09174728393555, "learning_rate": 1.9055869886449428e-06, "loss": 0.49071093, "memory(GiB)": 34.88, "step": 107380, "train_speed(iter/s)": 0.411873 }, { "acc": 0.93048153, "epoch": 2.907562342620421, "grad_norm": 9.708512306213379, "learning_rate": 1.905147551402998e-06, "loss": 0.37873044, "memory(GiB)": 34.88, "step": 107385, "train_speed(iter/s)": 0.411874 }, { "acc": 0.94404306, "epoch": 2.9076977229036363, "grad_norm": 4.45724630355835, "learning_rate": 1.9047081529364234e-06, "loss": 0.31987395, "memory(GiB)": 34.88, "step": 107390, "train_speed(iter/s)": 0.411875 }, { "acc": 0.92543335, "epoch": 2.907833103186852, "grad_norm": 12.222944259643555, "learning_rate": 1.9042687932507253e-06, "loss": 0.43984365, "memory(GiB)": 34.88, "step": 107395, "train_speed(iter/s)": 0.411876 }, { "acc": 0.95295048, "epoch": 2.9079684834700674, "grad_norm": 6.297214984893799, "learning_rate": 1.9038294723514067e-06, "loss": 0.31001947, "memory(GiB)": 34.88, "step": 107400, "train_speed(iter/s)": 0.411877 }, { "acc": 0.94168968, "epoch": 2.908103863753283, "grad_norm": 9.071014404296875, "learning_rate": 1.9033901902439733e-06, "loss": 0.37719674, "memory(GiB)": 34.88, "step": 107405, "train_speed(iter/s)": 0.411878 }, { "acc": 0.9413043, "epoch": 2.9082392440364986, "grad_norm": 4.421252250671387, "learning_rate": 1.902950946933926e-06, "loss": 0.33421359, "memory(GiB)": 34.88, "step": 107410, "train_speed(iter/s)": 0.411879 }, { "acc": 0.94528217, "epoch": 2.908374624319714, "grad_norm": 7.455314636230469, "learning_rate": 1.9025117424267638e-06, "loss": 0.30981534, "memory(GiB)": 34.88, "step": 107415, "train_speed(iter/s)": 0.41188 }, { "acc": 0.94465561, "epoch": 2.9085100046029297, "grad_norm": 3.6444385051727295, "learning_rate": 1.9020725767279958e-06, "loss": 0.33022804, "memory(GiB)": 34.88, "step": 107420, "train_speed(iter/s)": 0.411881 }, { "acc": 0.95460567, "epoch": 2.908645384886145, "grad_norm": 6.965876579284668, "learning_rate": 1.9016334498431171e-06, "loss": 0.24733536, "memory(GiB)": 34.88, "step": 107425, "train_speed(iter/s)": 0.411882 }, { "acc": 0.95105314, "epoch": 2.908780765169361, "grad_norm": 16.195289611816406, "learning_rate": 1.9011943617776341e-06, "loss": 0.34437401, "memory(GiB)": 34.88, "step": 107430, "train_speed(iter/s)": 0.411883 }, { "acc": 0.93261795, "epoch": 2.9089161454525763, "grad_norm": 4.002112865447998, "learning_rate": 1.90075531253704e-06, "loss": 0.41270308, "memory(GiB)": 34.88, "step": 107435, "train_speed(iter/s)": 0.411884 }, { "acc": 0.94872437, "epoch": 2.9090515257357916, "grad_norm": 5.058612823486328, "learning_rate": 1.900316302126843e-06, "loss": 0.24717607, "memory(GiB)": 34.88, "step": 107440, "train_speed(iter/s)": 0.411885 }, { "acc": 0.94209433, "epoch": 2.9091869060190074, "grad_norm": 7.073159217834473, "learning_rate": 1.8998773305525386e-06, "loss": 0.31901302, "memory(GiB)": 34.88, "step": 107445, "train_speed(iter/s)": 0.411886 }, { "acc": 0.94060678, "epoch": 2.909322286302223, "grad_norm": 3.4493870735168457, "learning_rate": 1.8994383978196221e-06, "loss": 0.33646517, "memory(GiB)": 34.88, "step": 107450, "train_speed(iter/s)": 0.411887 }, { "acc": 0.95108795, "epoch": 2.9094576665854386, "grad_norm": 7.402078628540039, "learning_rate": 1.8989995039335992e-06, "loss": 0.28019552, "memory(GiB)": 34.88, "step": 107455, "train_speed(iter/s)": 0.411888 }, { "acc": 0.93963604, "epoch": 2.909593046868654, "grad_norm": 8.440738677978516, "learning_rate": 1.8985606488999633e-06, "loss": 0.41640697, "memory(GiB)": 34.88, "step": 107460, "train_speed(iter/s)": 0.411888 }, { "acc": 0.93454762, "epoch": 2.9097284271518697, "grad_norm": 8.869006156921387, "learning_rate": 1.8981218327242145e-06, "loss": 0.36658409, "memory(GiB)": 34.88, "step": 107465, "train_speed(iter/s)": 0.411889 }, { "acc": 0.92750692, "epoch": 2.909863807435085, "grad_norm": 7.954571723937988, "learning_rate": 1.8976830554118447e-06, "loss": 0.44905009, "memory(GiB)": 34.88, "step": 107470, "train_speed(iter/s)": 0.41189 }, { "acc": 0.94206161, "epoch": 2.9099991877183005, "grad_norm": 8.381333351135254, "learning_rate": 1.897244316968358e-06, "loss": 0.33663721, "memory(GiB)": 34.88, "step": 107475, "train_speed(iter/s)": 0.411891 }, { "acc": 0.93953228, "epoch": 2.9101345680015163, "grad_norm": 15.442920684814453, "learning_rate": 1.8968056173992445e-06, "loss": 0.40368085, "memory(GiB)": 34.88, "step": 107480, "train_speed(iter/s)": 0.411892 }, { "acc": 0.93718624, "epoch": 2.910269948284732, "grad_norm": 6.892838954925537, "learning_rate": 1.8963669567100024e-06, "loss": 0.29671817, "memory(GiB)": 34.88, "step": 107485, "train_speed(iter/s)": 0.411893 }, { "acc": 0.92757864, "epoch": 2.9104053285679474, "grad_norm": 8.816452980041504, "learning_rate": 1.8959283349061275e-06, "loss": 0.44383631, "memory(GiB)": 34.88, "step": 107490, "train_speed(iter/s)": 0.411894 }, { "acc": 0.95548668, "epoch": 2.910540708851163, "grad_norm": 6.085860252380371, "learning_rate": 1.8954897519931115e-06, "loss": 0.27401676, "memory(GiB)": 34.88, "step": 107495, "train_speed(iter/s)": 0.411895 }, { "acc": 0.94284058, "epoch": 2.9106760891343786, "grad_norm": 26.222646713256836, "learning_rate": 1.8950512079764522e-06, "loss": 0.33629041, "memory(GiB)": 34.88, "step": 107500, "train_speed(iter/s)": 0.411896 }, { "acc": 0.91540184, "epoch": 2.910811469417594, "grad_norm": 19.58917808532715, "learning_rate": 1.8946127028616379e-06, "loss": 0.48207402, "memory(GiB)": 34.88, "step": 107505, "train_speed(iter/s)": 0.411897 }, { "acc": 0.93955927, "epoch": 2.9109468497008093, "grad_norm": 6.38192892074585, "learning_rate": 1.894174236654168e-06, "loss": 0.28443198, "memory(GiB)": 34.88, "step": 107510, "train_speed(iter/s)": 0.411898 }, { "acc": 0.94039888, "epoch": 2.911082229984025, "grad_norm": 6.102575778961182, "learning_rate": 1.8937358093595303e-06, "loss": 0.33183684, "memory(GiB)": 34.88, "step": 107515, "train_speed(iter/s)": 0.411899 }, { "acc": 0.94454422, "epoch": 2.911217610267241, "grad_norm": 12.747632026672363, "learning_rate": 1.8932974209832197e-06, "loss": 0.27628117, "memory(GiB)": 34.88, "step": 107520, "train_speed(iter/s)": 0.4119 }, { "acc": 0.93417206, "epoch": 2.9113529905504563, "grad_norm": 7.134239196777344, "learning_rate": 1.8928590715307265e-06, "loss": 0.41901569, "memory(GiB)": 34.88, "step": 107525, "train_speed(iter/s)": 0.411901 }, { "acc": 0.94516096, "epoch": 2.9114883708336716, "grad_norm": 24.962350845336914, "learning_rate": 1.8924207610075446e-06, "loss": 0.33354836, "memory(GiB)": 34.88, "step": 107530, "train_speed(iter/s)": 0.411902 }, { "acc": 0.93965178, "epoch": 2.9116237511168874, "grad_norm": 3.0409762859344482, "learning_rate": 1.891982489419162e-06, "loss": 0.34389348, "memory(GiB)": 34.88, "step": 107535, "train_speed(iter/s)": 0.411903 }, { "acc": 0.95718784, "epoch": 2.911759131400103, "grad_norm": 7.068582057952881, "learning_rate": 1.8915442567710664e-06, "loss": 0.28977213, "memory(GiB)": 34.88, "step": 107540, "train_speed(iter/s)": 0.411904 }, { "acc": 0.92805376, "epoch": 2.9118945116833186, "grad_norm": 11.931407928466797, "learning_rate": 1.891106063068754e-06, "loss": 0.42532711, "memory(GiB)": 34.88, "step": 107545, "train_speed(iter/s)": 0.411905 }, { "acc": 0.94238338, "epoch": 2.912029891966534, "grad_norm": 3.5655975341796875, "learning_rate": 1.8906679083177085e-06, "loss": 0.32932382, "memory(GiB)": 34.88, "step": 107550, "train_speed(iter/s)": 0.411906 }, { "acc": 0.9273881, "epoch": 2.9121652722497497, "grad_norm": 34.896766662597656, "learning_rate": 1.8902297925234208e-06, "loss": 0.44159808, "memory(GiB)": 34.88, "step": 107555, "train_speed(iter/s)": 0.411907 }, { "acc": 0.93928185, "epoch": 2.912300652532965, "grad_norm": 9.458852767944336, "learning_rate": 1.8897917156913799e-06, "loss": 0.34603629, "memory(GiB)": 34.88, "step": 107560, "train_speed(iter/s)": 0.411908 }, { "acc": 0.94618187, "epoch": 2.9124360328161805, "grad_norm": 5.718166828155518, "learning_rate": 1.8893536778270733e-06, "loss": 0.39047859, "memory(GiB)": 34.88, "step": 107565, "train_speed(iter/s)": 0.411908 }, { "acc": 0.94735432, "epoch": 2.9125714130993963, "grad_norm": 5.870718479156494, "learning_rate": 1.8889156789359885e-06, "loss": 0.3188343, "memory(GiB)": 34.88, "step": 107570, "train_speed(iter/s)": 0.411909 }, { "acc": 0.93177624, "epoch": 2.9127067933826116, "grad_norm": 3.2072689533233643, "learning_rate": 1.888477719023608e-06, "loss": 0.3929853, "memory(GiB)": 34.88, "step": 107575, "train_speed(iter/s)": 0.41191 }, { "acc": 0.94839249, "epoch": 2.9128421736658274, "grad_norm": 3.965996503829956, "learning_rate": 1.8880397980954252e-06, "loss": 0.3055274, "memory(GiB)": 34.88, "step": 107580, "train_speed(iter/s)": 0.411911 }, { "acc": 0.92192478, "epoch": 2.912977553949043, "grad_norm": 9.153315544128418, "learning_rate": 1.8876019161569206e-06, "loss": 0.47995157, "memory(GiB)": 34.88, "step": 107585, "train_speed(iter/s)": 0.411912 }, { "acc": 0.934972, "epoch": 2.9131129342322586, "grad_norm": 5.6827545166015625, "learning_rate": 1.887164073213581e-06, "loss": 0.42383194, "memory(GiB)": 34.88, "step": 107590, "train_speed(iter/s)": 0.411913 }, { "acc": 0.92583904, "epoch": 2.913248314515474, "grad_norm": 11.275955200195312, "learning_rate": 1.886726269270892e-06, "loss": 0.45519981, "memory(GiB)": 34.88, "step": 107595, "train_speed(iter/s)": 0.411914 }, { "acc": 0.94836779, "epoch": 2.9133836947986893, "grad_norm": 10.616268157958984, "learning_rate": 1.8862885043343392e-06, "loss": 0.27553809, "memory(GiB)": 34.88, "step": 107600, "train_speed(iter/s)": 0.411915 }, { "acc": 0.93757372, "epoch": 2.913519075081905, "grad_norm": 3.7279181480407715, "learning_rate": 1.8858507784094034e-06, "loss": 0.35394449, "memory(GiB)": 34.88, "step": 107605, "train_speed(iter/s)": 0.411916 }, { "acc": 0.93896637, "epoch": 2.9136544553651205, "grad_norm": 4.091830253601074, "learning_rate": 1.8854130915015686e-06, "loss": 0.34186502, "memory(GiB)": 34.88, "step": 107610, "train_speed(iter/s)": 0.411917 }, { "acc": 0.93628159, "epoch": 2.9137898356483363, "grad_norm": 7.264978885650635, "learning_rate": 1.8849754436163206e-06, "loss": 0.34305897, "memory(GiB)": 34.88, "step": 107615, "train_speed(iter/s)": 0.411918 }, { "acc": 0.94840937, "epoch": 2.9139252159315516, "grad_norm": 3.905413866043091, "learning_rate": 1.8845378347591382e-06, "loss": 0.28519835, "memory(GiB)": 34.88, "step": 107620, "train_speed(iter/s)": 0.411919 }, { "acc": 0.9463192, "epoch": 2.9140605962147674, "grad_norm": 5.238722801208496, "learning_rate": 1.8841002649355045e-06, "loss": 0.25959024, "memory(GiB)": 34.88, "step": 107625, "train_speed(iter/s)": 0.41192 }, { "acc": 0.94663572, "epoch": 2.914195976497983, "grad_norm": 9.393524169921875, "learning_rate": 1.8836627341509012e-06, "loss": 0.32279389, "memory(GiB)": 34.88, "step": 107630, "train_speed(iter/s)": 0.411921 }, { "acc": 0.95772381, "epoch": 2.914331356781198, "grad_norm": 7.7290472984313965, "learning_rate": 1.8832252424108113e-06, "loss": 0.23213606, "memory(GiB)": 34.88, "step": 107635, "train_speed(iter/s)": 0.411922 }, { "acc": 0.93074341, "epoch": 2.914466737064414, "grad_norm": 8.233880996704102, "learning_rate": 1.8827877897207105e-06, "loss": 0.38060617, "memory(GiB)": 34.88, "step": 107640, "train_speed(iter/s)": 0.411923 }, { "acc": 0.94540892, "epoch": 2.9146021173476298, "grad_norm": 4.038289546966553, "learning_rate": 1.8823503760860826e-06, "loss": 0.31651986, "memory(GiB)": 34.88, "step": 107645, "train_speed(iter/s)": 0.411924 }, { "acc": 0.93153791, "epoch": 2.914737497630845, "grad_norm": 6.161937713623047, "learning_rate": 1.8819130015124054e-06, "loss": 0.38597558, "memory(GiB)": 34.88, "step": 107650, "train_speed(iter/s)": 0.411925 }, { "acc": 0.94221458, "epoch": 2.9148728779140605, "grad_norm": 6.749971866607666, "learning_rate": 1.8814756660051598e-06, "loss": 0.33032796, "memory(GiB)": 34.88, "step": 107655, "train_speed(iter/s)": 0.411926 }, { "acc": 0.93080606, "epoch": 2.9150082581972763, "grad_norm": 9.402349472045898, "learning_rate": 1.8810383695698215e-06, "loss": 0.38659687, "memory(GiB)": 34.88, "step": 107660, "train_speed(iter/s)": 0.411927 }, { "acc": 0.95129747, "epoch": 2.9151436384804916, "grad_norm": 13.936269760131836, "learning_rate": 1.8806011122118693e-06, "loss": 0.35099831, "memory(GiB)": 34.88, "step": 107665, "train_speed(iter/s)": 0.411928 }, { "acc": 0.93913431, "epoch": 2.915279018763707, "grad_norm": 5.316295623779297, "learning_rate": 1.880163893936784e-06, "loss": 0.41406379, "memory(GiB)": 34.88, "step": 107670, "train_speed(iter/s)": 0.411929 }, { "acc": 0.93487911, "epoch": 2.915414399046923, "grad_norm": 10.408041954040527, "learning_rate": 1.8797267147500372e-06, "loss": 0.39512372, "memory(GiB)": 34.88, "step": 107675, "train_speed(iter/s)": 0.41193 }, { "acc": 0.93698235, "epoch": 2.9155497793301386, "grad_norm": 2.819019079208374, "learning_rate": 1.8792895746571085e-06, "loss": 0.37592483, "memory(GiB)": 34.88, "step": 107680, "train_speed(iter/s)": 0.411931 }, { "acc": 0.93530006, "epoch": 2.915685159613354, "grad_norm": 7.577141284942627, "learning_rate": 1.8788524736634733e-06, "loss": 0.34149423, "memory(GiB)": 34.88, "step": 107685, "train_speed(iter/s)": 0.411932 }, { "acc": 0.94050751, "epoch": 2.9158205398965693, "grad_norm": 5.845223903656006, "learning_rate": 1.8784154117746093e-06, "loss": 0.34424493, "memory(GiB)": 34.88, "step": 107690, "train_speed(iter/s)": 0.411933 }, { "acc": 0.94959774, "epoch": 2.915955920179785, "grad_norm": 8.480040550231934, "learning_rate": 1.877978388995988e-06, "loss": 0.29818048, "memory(GiB)": 34.88, "step": 107695, "train_speed(iter/s)": 0.411934 }, { "acc": 0.94937744, "epoch": 2.9160913004630005, "grad_norm": 6.671080112457275, "learning_rate": 1.8775414053330857e-06, "loss": 0.28824337, "memory(GiB)": 34.88, "step": 107700, "train_speed(iter/s)": 0.411935 }, { "acc": 0.95347319, "epoch": 2.9162266807462163, "grad_norm": 2.6588804721832275, "learning_rate": 1.8771044607913783e-06, "loss": 0.26828501, "memory(GiB)": 34.88, "step": 107705, "train_speed(iter/s)": 0.411936 }, { "acc": 0.93082142, "epoch": 2.9163620610294316, "grad_norm": 6.859854221343994, "learning_rate": 1.8766675553763356e-06, "loss": 0.42035608, "memory(GiB)": 34.88, "step": 107710, "train_speed(iter/s)": 0.411937 }, { "acc": 0.93818827, "epoch": 2.9164974413126474, "grad_norm": 4.792346477508545, "learning_rate": 1.876230689093432e-06, "loss": 0.34500995, "memory(GiB)": 34.88, "step": 107715, "train_speed(iter/s)": 0.411938 }, { "acc": 0.9312768, "epoch": 2.916632821595863, "grad_norm": 9.754210472106934, "learning_rate": 1.8757938619481413e-06, "loss": 0.35721667, "memory(GiB)": 34.88, "step": 107720, "train_speed(iter/s)": 0.411939 }, { "acc": 0.9358325, "epoch": 2.916768201879078, "grad_norm": 10.964937210083008, "learning_rate": 1.8753570739459366e-06, "loss": 0.37403879, "memory(GiB)": 34.88, "step": 107725, "train_speed(iter/s)": 0.41194 }, { "acc": 0.93202429, "epoch": 2.916903582162294, "grad_norm": 19.696125030517578, "learning_rate": 1.874920325092286e-06, "loss": 0.45107093, "memory(GiB)": 34.88, "step": 107730, "train_speed(iter/s)": 0.411941 }, { "acc": 0.94426937, "epoch": 2.9170389624455093, "grad_norm": 3.725677490234375, "learning_rate": 1.8744836153926628e-06, "loss": 0.27300835, "memory(GiB)": 34.88, "step": 107735, "train_speed(iter/s)": 0.411942 }, { "acc": 0.93548336, "epoch": 2.917174342728725, "grad_norm": 15.367076873779297, "learning_rate": 1.8740469448525395e-06, "loss": 0.40305457, "memory(GiB)": 34.88, "step": 107740, "train_speed(iter/s)": 0.411943 }, { "acc": 0.93073874, "epoch": 2.9173097230119405, "grad_norm": 14.345033645629883, "learning_rate": 1.8736103134773816e-06, "loss": 0.42850604, "memory(GiB)": 34.88, "step": 107745, "train_speed(iter/s)": 0.411944 }, { "acc": 0.94958267, "epoch": 2.9174451032951563, "grad_norm": 15.799731254577637, "learning_rate": 1.8731737212726622e-06, "loss": 0.28641295, "memory(GiB)": 34.88, "step": 107750, "train_speed(iter/s)": 0.411945 }, { "acc": 0.93470001, "epoch": 2.9175804835783716, "grad_norm": 11.639728546142578, "learning_rate": 1.8727371682438494e-06, "loss": 0.3687922, "memory(GiB)": 34.88, "step": 107755, "train_speed(iter/s)": 0.411947 }, { "acc": 0.94444218, "epoch": 2.917715863861587, "grad_norm": 8.25676155090332, "learning_rate": 1.8723006543964136e-06, "loss": 0.28270941, "memory(GiB)": 34.88, "step": 107760, "train_speed(iter/s)": 0.411948 }, { "acc": 0.92024498, "epoch": 2.917851244144803, "grad_norm": 8.664098739624023, "learning_rate": 1.87186417973582e-06, "loss": 0.48886127, "memory(GiB)": 34.88, "step": 107765, "train_speed(iter/s)": 0.411949 }, { "acc": 0.93908634, "epoch": 2.917986624428018, "grad_norm": 5.786419868469238, "learning_rate": 1.871427744267537e-06, "loss": 0.3710537, "memory(GiB)": 34.88, "step": 107770, "train_speed(iter/s)": 0.411949 }, { "acc": 0.93357306, "epoch": 2.918122004711234, "grad_norm": 9.350691795349121, "learning_rate": 1.870991347997034e-06, "loss": 0.36691778, "memory(GiB)": 34.88, "step": 107775, "train_speed(iter/s)": 0.41195 }, { "acc": 0.93310585, "epoch": 2.9182573849944493, "grad_norm": 3.1739342212677, "learning_rate": 1.8705549909297772e-06, "loss": 0.36927042, "memory(GiB)": 34.88, "step": 107780, "train_speed(iter/s)": 0.411952 }, { "acc": 0.94022722, "epoch": 2.918392765277665, "grad_norm": 4.159104347229004, "learning_rate": 1.8701186730712306e-06, "loss": 0.34899654, "memory(GiB)": 34.88, "step": 107785, "train_speed(iter/s)": 0.411953 }, { "acc": 0.93170366, "epoch": 2.9185281455608805, "grad_norm": 3.81063175201416, "learning_rate": 1.8696823944268607e-06, "loss": 0.37916293, "memory(GiB)": 34.88, "step": 107790, "train_speed(iter/s)": 0.411954 }, { "acc": 0.95656548, "epoch": 2.918663525844096, "grad_norm": 3.1940908432006836, "learning_rate": 1.8692461550021359e-06, "loss": 0.26584623, "memory(GiB)": 34.88, "step": 107795, "train_speed(iter/s)": 0.411954 }, { "acc": 0.95228682, "epoch": 2.9187989061273116, "grad_norm": 13.432097434997559, "learning_rate": 1.8688099548025162e-06, "loss": 0.2793323, "memory(GiB)": 34.88, "step": 107800, "train_speed(iter/s)": 0.411955 }, { "acc": 0.94765453, "epoch": 2.9189342864105274, "grad_norm": 4.3504133224487305, "learning_rate": 1.868373793833468e-06, "loss": 0.27217219, "memory(GiB)": 34.88, "step": 107805, "train_speed(iter/s)": 0.411956 }, { "acc": 0.95467968, "epoch": 2.919069666693743, "grad_norm": 7.373840808868408, "learning_rate": 1.8679376721004558e-06, "loss": 0.28061733, "memory(GiB)": 34.88, "step": 107810, "train_speed(iter/s)": 0.411957 }, { "acc": 0.9281229, "epoch": 2.919205046976958, "grad_norm": 9.675435066223145, "learning_rate": 1.8675015896089433e-06, "loss": 0.41896248, "memory(GiB)": 34.88, "step": 107815, "train_speed(iter/s)": 0.411958 }, { "acc": 0.93246078, "epoch": 2.919340427260174, "grad_norm": 32.49806594848633, "learning_rate": 1.8670655463643892e-06, "loss": 0.41458378, "memory(GiB)": 34.88, "step": 107820, "train_speed(iter/s)": 0.411959 }, { "acc": 0.95493841, "epoch": 2.9194758075433893, "grad_norm": 6.538331031799316, "learning_rate": 1.8666295423722617e-06, "loss": 0.29108887, "memory(GiB)": 34.88, "step": 107825, "train_speed(iter/s)": 0.41196 }, { "acc": 0.94850988, "epoch": 2.9196111878266047, "grad_norm": 10.281519889831543, "learning_rate": 1.8661935776380199e-06, "loss": 0.2823597, "memory(GiB)": 34.88, "step": 107830, "train_speed(iter/s)": 0.411961 }, { "acc": 0.93567009, "epoch": 2.9197465681098205, "grad_norm": 5.59920597076416, "learning_rate": 1.8657576521671235e-06, "loss": 0.36577291, "memory(GiB)": 34.88, "step": 107835, "train_speed(iter/s)": 0.411962 }, { "acc": 0.93393965, "epoch": 2.9198819483930363, "grad_norm": 6.052803993225098, "learning_rate": 1.8653217659650346e-06, "loss": 0.35474801, "memory(GiB)": 34.88, "step": 107840, "train_speed(iter/s)": 0.411963 }, { "acc": 0.95748692, "epoch": 2.9200173286762516, "grad_norm": 5.220010757446289, "learning_rate": 1.8648859190372137e-06, "loss": 0.27389898, "memory(GiB)": 34.88, "step": 107845, "train_speed(iter/s)": 0.411964 }, { "acc": 0.95402946, "epoch": 2.920152708959467, "grad_norm": 9.404778480529785, "learning_rate": 1.8644501113891221e-06, "loss": 0.25747025, "memory(GiB)": 34.88, "step": 107850, "train_speed(iter/s)": 0.411965 }, { "acc": 0.94303989, "epoch": 2.920288089242683, "grad_norm": 15.050254821777344, "learning_rate": 1.8640143430262147e-06, "loss": 0.25555286, "memory(GiB)": 34.88, "step": 107855, "train_speed(iter/s)": 0.411966 }, { "acc": 0.94060097, "epoch": 2.920423469525898, "grad_norm": 7.925797939300537, "learning_rate": 1.863578613953957e-06, "loss": 0.29658227, "memory(GiB)": 34.88, "step": 107860, "train_speed(iter/s)": 0.411967 }, { "acc": 0.94009724, "epoch": 2.920558849809114, "grad_norm": 7.3398213386535645, "learning_rate": 1.863142924177803e-06, "loss": 0.3230381, "memory(GiB)": 34.88, "step": 107865, "train_speed(iter/s)": 0.411968 }, { "acc": 0.94131279, "epoch": 2.9206942300923293, "grad_norm": 8.83561897277832, "learning_rate": 1.8627072737032101e-06, "loss": 0.35379162, "memory(GiB)": 34.88, "step": 107870, "train_speed(iter/s)": 0.411969 }, { "acc": 0.95272055, "epoch": 2.920829610375545, "grad_norm": 3.208310842514038, "learning_rate": 1.8622716625356364e-06, "loss": 0.25815525, "memory(GiB)": 34.88, "step": 107875, "train_speed(iter/s)": 0.41197 }, { "acc": 0.94159365, "epoch": 2.9209649906587605, "grad_norm": 3.426093101501465, "learning_rate": 1.8618360906805395e-06, "loss": 0.38440075, "memory(GiB)": 34.88, "step": 107880, "train_speed(iter/s)": 0.411971 }, { "acc": 0.93763008, "epoch": 2.921100370941976, "grad_norm": 8.139445304870605, "learning_rate": 1.8614005581433773e-06, "loss": 0.29326491, "memory(GiB)": 34.88, "step": 107885, "train_speed(iter/s)": 0.411972 }, { "acc": 0.94716139, "epoch": 2.9212357512251916, "grad_norm": 4.123749256134033, "learning_rate": 1.8609650649296e-06, "loss": 0.2350162, "memory(GiB)": 34.88, "step": 107890, "train_speed(iter/s)": 0.411973 }, { "acc": 0.94799423, "epoch": 2.921371131508407, "grad_norm": 8.521649360656738, "learning_rate": 1.8605296110446708e-06, "loss": 0.29446707, "memory(GiB)": 34.88, "step": 107895, "train_speed(iter/s)": 0.411974 }, { "acc": 0.94275341, "epoch": 2.921506511791623, "grad_norm": 8.88595199584961, "learning_rate": 1.8600941964940389e-06, "loss": 0.32441111, "memory(GiB)": 34.88, "step": 107900, "train_speed(iter/s)": 0.411975 }, { "acc": 0.9429266, "epoch": 2.921641892074838, "grad_norm": 5.541301250457764, "learning_rate": 1.8596588212831622e-06, "loss": 0.30709612, "memory(GiB)": 34.88, "step": 107905, "train_speed(iter/s)": 0.411976 }, { "acc": 0.94593649, "epoch": 2.921777272358054, "grad_norm": 14.115580558776855, "learning_rate": 1.8592234854174918e-06, "loss": 0.27617798, "memory(GiB)": 34.88, "step": 107910, "train_speed(iter/s)": 0.411976 }, { "acc": 0.93206491, "epoch": 2.9219126526412693, "grad_norm": 10.015235900878906, "learning_rate": 1.8587881889024817e-06, "loss": 0.33497295, "memory(GiB)": 34.88, "step": 107915, "train_speed(iter/s)": 0.411978 }, { "acc": 0.9403142, "epoch": 2.9220480329244847, "grad_norm": 32.26203918457031, "learning_rate": 1.8583529317435877e-06, "loss": 0.36422164, "memory(GiB)": 34.88, "step": 107920, "train_speed(iter/s)": 0.411979 }, { "acc": 0.93089027, "epoch": 2.9221834132077005, "grad_norm": 8.48156452178955, "learning_rate": 1.8579177139462565e-06, "loss": 0.4106287, "memory(GiB)": 34.88, "step": 107925, "train_speed(iter/s)": 0.41198 }, { "acc": 0.93382864, "epoch": 2.922318793490916, "grad_norm": 9.065140724182129, "learning_rate": 1.8574825355159467e-06, "loss": 0.40375547, "memory(GiB)": 34.88, "step": 107930, "train_speed(iter/s)": 0.411981 }, { "acc": 0.94341621, "epoch": 2.9224541737741316, "grad_norm": 12.083395957946777, "learning_rate": 1.8570473964581054e-06, "loss": 0.33541164, "memory(GiB)": 34.88, "step": 107935, "train_speed(iter/s)": 0.411982 }, { "acc": 0.93769922, "epoch": 2.922589554057347, "grad_norm": 4.527332782745361, "learning_rate": 1.8566122967781864e-06, "loss": 0.35826564, "memory(GiB)": 34.88, "step": 107940, "train_speed(iter/s)": 0.411983 }, { "acc": 0.93562899, "epoch": 2.922724934340563, "grad_norm": 10.171360969543457, "learning_rate": 1.8561772364816347e-06, "loss": 0.33804173, "memory(GiB)": 34.88, "step": 107945, "train_speed(iter/s)": 0.411984 }, { "acc": 0.93169746, "epoch": 2.922860314623778, "grad_norm": 8.892321586608887, "learning_rate": 1.8557422155739085e-06, "loss": 0.3241394, "memory(GiB)": 34.88, "step": 107950, "train_speed(iter/s)": 0.411985 }, { "acc": 0.9387311, "epoch": 2.9229956949069935, "grad_norm": 6.40341854095459, "learning_rate": 1.8553072340604521e-06, "loss": 0.38560672, "memory(GiB)": 34.88, "step": 107955, "train_speed(iter/s)": 0.411986 }, { "acc": 0.94806976, "epoch": 2.9231310751902093, "grad_norm": 5.481560230255127, "learning_rate": 1.8548722919467124e-06, "loss": 0.31357579, "memory(GiB)": 34.88, "step": 107960, "train_speed(iter/s)": 0.411986 }, { "acc": 0.93361025, "epoch": 2.923266455473425, "grad_norm": 10.878453254699707, "learning_rate": 1.8544373892381446e-06, "loss": 0.44216433, "memory(GiB)": 34.88, "step": 107965, "train_speed(iter/s)": 0.411987 }, { "acc": 0.93324919, "epoch": 2.9234018357566405, "grad_norm": 12.335144996643066, "learning_rate": 1.8540025259401912e-06, "loss": 0.37822635, "memory(GiB)": 34.88, "step": 107970, "train_speed(iter/s)": 0.411988 }, { "acc": 0.93401642, "epoch": 2.923537216039856, "grad_norm": 5.925078392028809, "learning_rate": 1.8535677020583026e-06, "loss": 0.36243796, "memory(GiB)": 34.88, "step": 107975, "train_speed(iter/s)": 0.411989 }, { "acc": 0.9532999, "epoch": 2.9236725963230716, "grad_norm": 4.9065446853637695, "learning_rate": 1.853132917597922e-06, "loss": 0.31838818, "memory(GiB)": 34.88, "step": 107980, "train_speed(iter/s)": 0.41199 }, { "acc": 0.9511878, "epoch": 2.923807976606287, "grad_norm": 4.317506790161133, "learning_rate": 1.852698172564502e-06, "loss": 0.31586521, "memory(GiB)": 34.88, "step": 107985, "train_speed(iter/s)": 0.411991 }, { "acc": 0.93613758, "epoch": 2.9239433568895024, "grad_norm": 18.072338104248047, "learning_rate": 1.8522634669634846e-06, "loss": 0.43266454, "memory(GiB)": 34.88, "step": 107990, "train_speed(iter/s)": 0.411992 }, { "acc": 0.94077387, "epoch": 2.924078737172718, "grad_norm": 9.457358360290527, "learning_rate": 1.8518288008003151e-06, "loss": 0.30949693, "memory(GiB)": 34.88, "step": 107995, "train_speed(iter/s)": 0.411994 }, { "acc": 0.93690338, "epoch": 2.924214117455934, "grad_norm": 6.770802021026611, "learning_rate": 1.8513941740804391e-06, "loss": 0.39910235, "memory(GiB)": 34.88, "step": 108000, "train_speed(iter/s)": 0.411995 }, { "acc": 0.94423285, "epoch": 2.9243494977391493, "grad_norm": 7.184466361999512, "learning_rate": 1.850959586809302e-06, "loss": 0.30625906, "memory(GiB)": 34.88, "step": 108005, "train_speed(iter/s)": 0.411996 }, { "acc": 0.93620949, "epoch": 2.9244848780223647, "grad_norm": 8.392985343933105, "learning_rate": 1.8505250389923485e-06, "loss": 0.38967969, "memory(GiB)": 34.88, "step": 108010, "train_speed(iter/s)": 0.411997 }, { "acc": 0.94081764, "epoch": 2.9246202583055805, "grad_norm": 3.745208740234375, "learning_rate": 1.8500905306350183e-06, "loss": 0.29569645, "memory(GiB)": 34.88, "step": 108015, "train_speed(iter/s)": 0.411998 }, { "acc": 0.94706917, "epoch": 2.924755638588796, "grad_norm": 5.656189441680908, "learning_rate": 1.8496560617427602e-06, "loss": 0.30087509, "memory(GiB)": 34.88, "step": 108020, "train_speed(iter/s)": 0.411999 }, { "acc": 0.93698463, "epoch": 2.9248910188720116, "grad_norm": 6.586033821105957, "learning_rate": 1.8492216323210125e-06, "loss": 0.31095328, "memory(GiB)": 34.88, "step": 108025, "train_speed(iter/s)": 0.412 }, { "acc": 0.92626553, "epoch": 2.925026399155227, "grad_norm": 10.74109935760498, "learning_rate": 1.8487872423752198e-06, "loss": 0.45973067, "memory(GiB)": 34.88, "step": 108030, "train_speed(iter/s)": 0.412001 }, { "acc": 0.93084402, "epoch": 2.925161779438443, "grad_norm": 14.045170783996582, "learning_rate": 1.8483528919108216e-06, "loss": 0.43825965, "memory(GiB)": 34.88, "step": 108035, "train_speed(iter/s)": 0.412001 }, { "acc": 0.94096327, "epoch": 2.925297159721658, "grad_norm": 6.457934379577637, "learning_rate": 1.8479185809332592e-06, "loss": 0.32487807, "memory(GiB)": 34.88, "step": 108040, "train_speed(iter/s)": 0.412002 }, { "acc": 0.92570286, "epoch": 2.9254325400048735, "grad_norm": 4.385685443878174, "learning_rate": 1.8474843094479754e-06, "loss": 0.46710291, "memory(GiB)": 34.88, "step": 108045, "train_speed(iter/s)": 0.412003 }, { "acc": 0.94514923, "epoch": 2.9255679202880893, "grad_norm": 8.869970321655273, "learning_rate": 1.8470500774604062e-06, "loss": 0.33050818, "memory(GiB)": 34.88, "step": 108050, "train_speed(iter/s)": 0.412004 }, { "acc": 0.93734207, "epoch": 2.9257033005713047, "grad_norm": 5.33566951751709, "learning_rate": 1.8466158849759963e-06, "loss": 0.35873785, "memory(GiB)": 34.88, "step": 108055, "train_speed(iter/s)": 0.412005 }, { "acc": 0.94582634, "epoch": 2.9258386808545205, "grad_norm": 2.164731025695801, "learning_rate": 1.8461817320001813e-06, "loss": 0.31787171, "memory(GiB)": 34.88, "step": 108060, "train_speed(iter/s)": 0.412006 }, { "acc": 0.9358923, "epoch": 2.925974061137736, "grad_norm": 15.066961288452148, "learning_rate": 1.8457476185384022e-06, "loss": 0.43381319, "memory(GiB)": 34.88, "step": 108065, "train_speed(iter/s)": 0.412007 }, { "acc": 0.93605633, "epoch": 2.9261094414209516, "grad_norm": 7.921341896057129, "learning_rate": 1.8453135445960924e-06, "loss": 0.41186724, "memory(GiB)": 34.88, "step": 108070, "train_speed(iter/s)": 0.412008 }, { "acc": 0.93842115, "epoch": 2.926244821704167, "grad_norm": 8.57362174987793, "learning_rate": 1.8448795101786961e-06, "loss": 0.32491796, "memory(GiB)": 34.88, "step": 108075, "train_speed(iter/s)": 0.412009 }, { "acc": 0.94111595, "epoch": 2.9263802019873824, "grad_norm": 4.330693244934082, "learning_rate": 1.844445515291648e-06, "loss": 0.28149078, "memory(GiB)": 34.88, "step": 108080, "train_speed(iter/s)": 0.41201 }, { "acc": 0.92885437, "epoch": 2.926515582270598, "grad_norm": 15.76115608215332, "learning_rate": 1.84401155994038e-06, "loss": 0.36913457, "memory(GiB)": 34.88, "step": 108085, "train_speed(iter/s)": 0.412011 }, { "acc": 0.94642715, "epoch": 2.9266509625538135, "grad_norm": 3.7075109481811523, "learning_rate": 1.8435776441303349e-06, "loss": 0.29730644, "memory(GiB)": 34.88, "step": 108090, "train_speed(iter/s)": 0.412012 }, { "acc": 0.93135653, "epoch": 2.9267863428370293, "grad_norm": 7.1271071434021, "learning_rate": 1.843143767866945e-06, "loss": 0.45486603, "memory(GiB)": 34.88, "step": 108095, "train_speed(iter/s)": 0.412013 }, { "acc": 0.93825769, "epoch": 2.9269217231202447, "grad_norm": 6.149026870727539, "learning_rate": 1.8427099311556477e-06, "loss": 0.30878282, "memory(GiB)": 34.88, "step": 108100, "train_speed(iter/s)": 0.412014 }, { "acc": 0.95730543, "epoch": 2.9270571034034605, "grad_norm": 5.712790012359619, "learning_rate": 1.8422761340018721e-06, "loss": 0.20555134, "memory(GiB)": 34.88, "step": 108105, "train_speed(iter/s)": 0.412015 }, { "acc": 0.9349824, "epoch": 2.927192483686676, "grad_norm": 6.240323066711426, "learning_rate": 1.84184237641106e-06, "loss": 0.43693295, "memory(GiB)": 34.88, "step": 108110, "train_speed(iter/s)": 0.412016 }, { "acc": 0.94496632, "epoch": 2.927327863969891, "grad_norm": 7.188242435455322, "learning_rate": 1.8414086583886415e-06, "loss": 0.2793545, "memory(GiB)": 34.88, "step": 108115, "train_speed(iter/s)": 0.412017 }, { "acc": 0.94168148, "epoch": 2.927463244253107, "grad_norm": 5.901193141937256, "learning_rate": 1.8409749799400459e-06, "loss": 0.35504894, "memory(GiB)": 34.88, "step": 108120, "train_speed(iter/s)": 0.412018 }, { "acc": 0.92376442, "epoch": 2.927598624536323, "grad_norm": 10.606587409973145, "learning_rate": 1.8405413410707134e-06, "loss": 0.46921415, "memory(GiB)": 34.88, "step": 108125, "train_speed(iter/s)": 0.412019 }, { "acc": 0.95333309, "epoch": 2.927734004819538, "grad_norm": 5.621068954467773, "learning_rate": 1.8401077417860707e-06, "loss": 0.26064501, "memory(GiB)": 34.88, "step": 108130, "train_speed(iter/s)": 0.41202 }, { "acc": 0.92460289, "epoch": 2.9278693851027535, "grad_norm": 8.600071907043457, "learning_rate": 1.8396741820915525e-06, "loss": 0.34660108, "memory(GiB)": 34.88, "step": 108135, "train_speed(iter/s)": 0.412021 }, { "acc": 0.94494438, "epoch": 2.9280047653859693, "grad_norm": 8.473066329956055, "learning_rate": 1.839240661992586e-06, "loss": 0.32026999, "memory(GiB)": 34.88, "step": 108140, "train_speed(iter/s)": 0.412022 }, { "acc": 0.93831253, "epoch": 2.9281401456691847, "grad_norm": 7.35392951965332, "learning_rate": 1.838807181494607e-06, "loss": 0.34392099, "memory(GiB)": 34.88, "step": 108145, "train_speed(iter/s)": 0.412023 }, { "acc": 0.92700424, "epoch": 2.9282755259524, "grad_norm": 9.817078590393066, "learning_rate": 1.8383737406030422e-06, "loss": 0.42656765, "memory(GiB)": 34.88, "step": 108150, "train_speed(iter/s)": 0.412024 }, { "acc": 0.94044266, "epoch": 2.928410906235616, "grad_norm": 2.441047430038452, "learning_rate": 1.8379403393233226e-06, "loss": 0.31702266, "memory(GiB)": 34.88, "step": 108155, "train_speed(iter/s)": 0.412025 }, { "acc": 0.9359293, "epoch": 2.9285462865188316, "grad_norm": 4.4480509757995605, "learning_rate": 1.8375069776608786e-06, "loss": 0.35179467, "memory(GiB)": 34.88, "step": 108160, "train_speed(iter/s)": 0.412026 }, { "acc": 0.93375874, "epoch": 2.928681666802047, "grad_norm": 5.187286853790283, "learning_rate": 1.8370736556211358e-06, "loss": 0.36836307, "memory(GiB)": 34.88, "step": 108165, "train_speed(iter/s)": 0.412027 }, { "acc": 0.94489574, "epoch": 2.9288170470852624, "grad_norm": 2.791492223739624, "learning_rate": 1.8366403732095256e-06, "loss": 0.31301508, "memory(GiB)": 34.88, "step": 108170, "train_speed(iter/s)": 0.412028 }, { "acc": 0.93832932, "epoch": 2.928952427368478, "grad_norm": 5.66420316696167, "learning_rate": 1.8362071304314707e-06, "loss": 0.32312973, "memory(GiB)": 34.88, "step": 108175, "train_speed(iter/s)": 0.412029 }, { "acc": 0.95335732, "epoch": 2.9290878076516935, "grad_norm": 5.752973556518555, "learning_rate": 1.8357739272924058e-06, "loss": 0.25532632, "memory(GiB)": 34.88, "step": 108180, "train_speed(iter/s)": 0.41203 }, { "acc": 0.94714985, "epoch": 2.9292231879349093, "grad_norm": 13.089097023010254, "learning_rate": 1.8353407637977513e-06, "loss": 0.33700709, "memory(GiB)": 34.88, "step": 108185, "train_speed(iter/s)": 0.412031 }, { "acc": 0.95972137, "epoch": 2.9293585682181247, "grad_norm": 5.906075954437256, "learning_rate": 1.8349076399529359e-06, "loss": 0.23394113, "memory(GiB)": 34.88, "step": 108190, "train_speed(iter/s)": 0.412032 }, { "acc": 0.93369656, "epoch": 2.9294939485013405, "grad_norm": 8.22773265838623, "learning_rate": 1.8344745557633856e-06, "loss": 0.37934899, "memory(GiB)": 34.88, "step": 108195, "train_speed(iter/s)": 0.412033 }, { "acc": 0.94416199, "epoch": 2.929629328784556, "grad_norm": 6.942986011505127, "learning_rate": 1.8340415112345263e-06, "loss": 0.32592075, "memory(GiB)": 34.88, "step": 108200, "train_speed(iter/s)": 0.412034 }, { "acc": 0.94434357, "epoch": 2.929764709067771, "grad_norm": 4.764411926269531, "learning_rate": 1.8336085063717823e-06, "loss": 0.33018918, "memory(GiB)": 34.88, "step": 108205, "train_speed(iter/s)": 0.412035 }, { "acc": 0.93458557, "epoch": 2.929900089350987, "grad_norm": 8.230531692504883, "learning_rate": 1.8331755411805739e-06, "loss": 0.34581747, "memory(GiB)": 34.88, "step": 108210, "train_speed(iter/s)": 0.412036 }, { "acc": 0.93638496, "epoch": 2.9300354696342024, "grad_norm": 2.413416624069214, "learning_rate": 1.8327426156663305e-06, "loss": 0.33497548, "memory(GiB)": 34.88, "step": 108215, "train_speed(iter/s)": 0.412037 }, { "acc": 0.94490318, "epoch": 2.930170849917418, "grad_norm": 5.773898601531982, "learning_rate": 1.8323097298344717e-06, "loss": 0.24339404, "memory(GiB)": 34.88, "step": 108220, "train_speed(iter/s)": 0.412038 }, { "acc": 0.94132071, "epoch": 2.9303062302006335, "grad_norm": 9.005842208862305, "learning_rate": 1.831876883690422e-06, "loss": 0.38288119, "memory(GiB)": 34.88, "step": 108225, "train_speed(iter/s)": 0.412039 }, { "acc": 0.93366385, "epoch": 2.9304416104838493, "grad_norm": 12.067510604858398, "learning_rate": 1.8314440772396024e-06, "loss": 0.44471998, "memory(GiB)": 34.88, "step": 108230, "train_speed(iter/s)": 0.41204 }, { "acc": 0.95563526, "epoch": 2.9305769907670647, "grad_norm": 9.358227729797363, "learning_rate": 1.8310113104874377e-06, "loss": 0.28313475, "memory(GiB)": 34.88, "step": 108235, "train_speed(iter/s)": 0.412041 }, { "acc": 0.94008198, "epoch": 2.93071237105028, "grad_norm": 9.735729217529297, "learning_rate": 1.830578583439347e-06, "loss": 0.39154248, "memory(GiB)": 34.88, "step": 108240, "train_speed(iter/s)": 0.412042 }, { "acc": 0.93702354, "epoch": 2.930847751333496, "grad_norm": 5.475220680236816, "learning_rate": 1.8301458961007473e-06, "loss": 0.40388041, "memory(GiB)": 34.88, "step": 108245, "train_speed(iter/s)": 0.412042 }, { "acc": 0.90496445, "epoch": 2.930983131616711, "grad_norm": 22.695249557495117, "learning_rate": 1.8297132484770657e-06, "loss": 0.61205893, "memory(GiB)": 34.88, "step": 108250, "train_speed(iter/s)": 0.412043 }, { "acc": 0.9245347, "epoch": 2.931118511899927, "grad_norm": 6.464838981628418, "learning_rate": 1.829280640573717e-06, "loss": 0.48842678, "memory(GiB)": 34.88, "step": 108255, "train_speed(iter/s)": 0.412044 }, { "acc": 0.93001823, "epoch": 2.9312538921831424, "grad_norm": 13.221243858337402, "learning_rate": 1.8288480723961226e-06, "loss": 0.4397686, "memory(GiB)": 34.88, "step": 108260, "train_speed(iter/s)": 0.412045 }, { "acc": 0.93499222, "epoch": 2.931389272466358, "grad_norm": 6.842896938323975, "learning_rate": 1.8284155439497006e-06, "loss": 0.37331645, "memory(GiB)": 34.88, "step": 108265, "train_speed(iter/s)": 0.412046 }, { "acc": 0.94276819, "epoch": 2.9315246527495735, "grad_norm": 5.860467433929443, "learning_rate": 1.8279830552398714e-06, "loss": 0.30834446, "memory(GiB)": 34.88, "step": 108270, "train_speed(iter/s)": 0.412047 }, { "acc": 0.93011742, "epoch": 2.931660033032789, "grad_norm": 2.6092395782470703, "learning_rate": 1.827550606272049e-06, "loss": 0.33845248, "memory(GiB)": 34.88, "step": 108275, "train_speed(iter/s)": 0.412048 }, { "acc": 0.94014254, "epoch": 2.9317954133160047, "grad_norm": 5.061714172363281, "learning_rate": 1.8271181970516528e-06, "loss": 0.36412606, "memory(GiB)": 34.88, "step": 108280, "train_speed(iter/s)": 0.412049 }, { "acc": 0.93582592, "epoch": 2.9319307935992205, "grad_norm": 6.2933549880981445, "learning_rate": 1.8266858275841012e-06, "loss": 0.36298671, "memory(GiB)": 34.88, "step": 108285, "train_speed(iter/s)": 0.41205 }, { "acc": 0.92198381, "epoch": 2.932066173882436, "grad_norm": 9.452969551086426, "learning_rate": 1.8262534978748067e-06, "loss": 0.50941477, "memory(GiB)": 34.88, "step": 108290, "train_speed(iter/s)": 0.412051 }, { "acc": 0.93106842, "epoch": 2.932201554165651, "grad_norm": 7.3200364112854, "learning_rate": 1.825821207929187e-06, "loss": 0.47207661, "memory(GiB)": 34.88, "step": 108295, "train_speed(iter/s)": 0.412052 }, { "acc": 0.95140553, "epoch": 2.932336934448867, "grad_norm": 14.131115913391113, "learning_rate": 1.8253889577526574e-06, "loss": 0.20856836, "memory(GiB)": 34.88, "step": 108300, "train_speed(iter/s)": 0.412053 }, { "acc": 0.93865242, "epoch": 2.9324723147320824, "grad_norm": 8.395849227905273, "learning_rate": 1.8249567473506346e-06, "loss": 0.33157518, "memory(GiB)": 34.88, "step": 108305, "train_speed(iter/s)": 0.412054 }, { "acc": 0.95179586, "epoch": 2.9326076950152977, "grad_norm": 14.474753379821777, "learning_rate": 1.824524576728529e-06, "loss": 0.22108109, "memory(GiB)": 34.88, "step": 108310, "train_speed(iter/s)": 0.412055 }, { "acc": 0.9527832, "epoch": 2.9327430752985135, "grad_norm": 6.0875163078308105, "learning_rate": 1.8240924458917568e-06, "loss": 0.31692772, "memory(GiB)": 34.88, "step": 108315, "train_speed(iter/s)": 0.412056 }, { "acc": 0.93958139, "epoch": 2.9328784555817293, "grad_norm": 5.444601535797119, "learning_rate": 1.8236603548457304e-06, "loss": 0.33051639, "memory(GiB)": 34.88, "step": 108320, "train_speed(iter/s)": 0.412058 }, { "acc": 0.9400425, "epoch": 2.9330138358649447, "grad_norm": 8.202262878417969, "learning_rate": 1.8232283035958648e-06, "loss": 0.39272771, "memory(GiB)": 34.88, "step": 108325, "train_speed(iter/s)": 0.412059 }, { "acc": 0.94122601, "epoch": 2.93314921614816, "grad_norm": 5.591365814208984, "learning_rate": 1.8227962921475688e-06, "loss": 0.33733845, "memory(GiB)": 34.88, "step": 108330, "train_speed(iter/s)": 0.41206 }, { "acc": 0.93656397, "epoch": 2.933284596431376, "grad_norm": 2.6089344024658203, "learning_rate": 1.822364320506256e-06, "loss": 0.32117643, "memory(GiB)": 34.88, "step": 108335, "train_speed(iter/s)": 0.412061 }, { "acc": 0.94569931, "epoch": 2.933419976714591, "grad_norm": 5.895590782165527, "learning_rate": 1.8219323886773393e-06, "loss": 0.29044609, "memory(GiB)": 34.88, "step": 108340, "train_speed(iter/s)": 0.412062 }, { "acc": 0.94608307, "epoch": 2.9335553569978066, "grad_norm": 3.5837368965148926, "learning_rate": 1.8215004966662259e-06, "loss": 0.33682907, "memory(GiB)": 34.88, "step": 108345, "train_speed(iter/s)": 0.412063 }, { "acc": 0.94457884, "epoch": 2.9336907372810224, "grad_norm": 4.2916107177734375, "learning_rate": 1.8210686444783284e-06, "loss": 0.28534272, "memory(GiB)": 34.88, "step": 108350, "train_speed(iter/s)": 0.412064 }, { "acc": 0.92666798, "epoch": 2.933826117564238, "grad_norm": 9.53352165222168, "learning_rate": 1.8206368321190562e-06, "loss": 0.45570726, "memory(GiB)": 34.88, "step": 108355, "train_speed(iter/s)": 0.412065 }, { "acc": 0.93264589, "epoch": 2.9339614978474535, "grad_norm": 13.012901306152344, "learning_rate": 1.82020505959382e-06, "loss": 0.44959517, "memory(GiB)": 34.88, "step": 108360, "train_speed(iter/s)": 0.412066 }, { "acc": 0.92720318, "epoch": 2.934096878130669, "grad_norm": 6.4084272384643555, "learning_rate": 1.8197733269080256e-06, "loss": 0.41808472, "memory(GiB)": 34.88, "step": 108365, "train_speed(iter/s)": 0.412067 }, { "acc": 0.93350506, "epoch": 2.9342322584138847, "grad_norm": 3.1065187454223633, "learning_rate": 1.819341634067083e-06, "loss": 0.38472514, "memory(GiB)": 34.88, "step": 108370, "train_speed(iter/s)": 0.412068 }, { "acc": 0.94015942, "epoch": 2.9343676386971, "grad_norm": 4.032139778137207, "learning_rate": 1.818909981076401e-06, "loss": 0.32560833, "memory(GiB)": 34.88, "step": 108375, "train_speed(iter/s)": 0.412069 }, { "acc": 0.93382492, "epoch": 2.934503018980316, "grad_norm": 6.91808557510376, "learning_rate": 1.818478367941385e-06, "loss": 0.38997967, "memory(GiB)": 34.88, "step": 108380, "train_speed(iter/s)": 0.41207 }, { "acc": 0.9178606, "epoch": 2.934638399263531, "grad_norm": 10.536290168762207, "learning_rate": 1.8180467946674424e-06, "loss": 0.4616518, "memory(GiB)": 34.88, "step": 108385, "train_speed(iter/s)": 0.412071 }, { "acc": 0.95586586, "epoch": 2.934773779546747, "grad_norm": 9.316890716552734, "learning_rate": 1.8176152612599794e-06, "loss": 0.29902844, "memory(GiB)": 34.88, "step": 108390, "train_speed(iter/s)": 0.412072 }, { "acc": 0.95785713, "epoch": 2.9349091598299624, "grad_norm": 5.264980316162109, "learning_rate": 1.817183767724404e-06, "loss": 0.24598634, "memory(GiB)": 34.88, "step": 108395, "train_speed(iter/s)": 0.412073 }, { "acc": 0.93342209, "epoch": 2.9350445401131777, "grad_norm": 7.550907135009766, "learning_rate": 1.8167523140661174e-06, "loss": 0.40545878, "memory(GiB)": 34.88, "step": 108400, "train_speed(iter/s)": 0.412074 }, { "acc": 0.94322023, "epoch": 2.9351799203963935, "grad_norm": 4.349426746368408, "learning_rate": 1.8163209002905275e-06, "loss": 0.34019213, "memory(GiB)": 34.88, "step": 108405, "train_speed(iter/s)": 0.412075 }, { "acc": 0.93807831, "epoch": 2.935315300679609, "grad_norm": 6.929126739501953, "learning_rate": 1.815889526403039e-06, "loss": 0.31317205, "memory(GiB)": 34.88, "step": 108410, "train_speed(iter/s)": 0.412076 }, { "acc": 0.95358715, "epoch": 2.9354506809628247, "grad_norm": 7.414444446563721, "learning_rate": 1.8154581924090523e-06, "loss": 0.26377554, "memory(GiB)": 34.88, "step": 108415, "train_speed(iter/s)": 0.412077 }, { "acc": 0.94754696, "epoch": 2.93558606124604, "grad_norm": 7.443965435028076, "learning_rate": 1.8150268983139735e-06, "loss": 0.25438056, "memory(GiB)": 34.88, "step": 108420, "train_speed(iter/s)": 0.412078 }, { "acc": 0.93163366, "epoch": 2.935721441529256, "grad_norm": 7.334285736083984, "learning_rate": 1.8145956441232044e-06, "loss": 0.43533373, "memory(GiB)": 34.88, "step": 108425, "train_speed(iter/s)": 0.412079 }, { "acc": 0.94303846, "epoch": 2.935856821812471, "grad_norm": 6.4199676513671875, "learning_rate": 1.8141644298421496e-06, "loss": 0.31859598, "memory(GiB)": 34.88, "step": 108430, "train_speed(iter/s)": 0.41208 }, { "acc": 0.93769855, "epoch": 2.9359922020956866, "grad_norm": 6.177644729614258, "learning_rate": 1.813733255476207e-06, "loss": 0.37105687, "memory(GiB)": 34.88, "step": 108435, "train_speed(iter/s)": 0.412081 }, { "acc": 0.95349636, "epoch": 2.9361275823789024, "grad_norm": 7.419992446899414, "learning_rate": 1.81330212103078e-06, "loss": 0.30338998, "memory(GiB)": 34.88, "step": 108440, "train_speed(iter/s)": 0.412082 }, { "acc": 0.92909756, "epoch": 2.9362629626621177, "grad_norm": 10.07206916809082, "learning_rate": 1.8128710265112688e-06, "loss": 0.39828663, "memory(GiB)": 34.88, "step": 108445, "train_speed(iter/s)": 0.412083 }, { "acc": 0.93694649, "epoch": 2.9363983429453335, "grad_norm": 8.45352840423584, "learning_rate": 1.8124399719230764e-06, "loss": 0.33780122, "memory(GiB)": 34.88, "step": 108450, "train_speed(iter/s)": 0.412084 }, { "acc": 0.93068943, "epoch": 2.936533723228549, "grad_norm": 8.051633834838867, "learning_rate": 1.8120089572715987e-06, "loss": 0.39657102, "memory(GiB)": 34.88, "step": 108455, "train_speed(iter/s)": 0.412085 }, { "acc": 0.93582325, "epoch": 2.9366691035117647, "grad_norm": 5.192365646362305, "learning_rate": 1.8115779825622364e-06, "loss": 0.3665451, "memory(GiB)": 34.88, "step": 108460, "train_speed(iter/s)": 0.412086 }, { "acc": 0.92957573, "epoch": 2.93680448379498, "grad_norm": 7.4720869064331055, "learning_rate": 1.8111470478003907e-06, "loss": 0.40801258, "memory(GiB)": 34.88, "step": 108465, "train_speed(iter/s)": 0.412087 }, { "acc": 0.95093842, "epoch": 2.9369398640781954, "grad_norm": 3.0548202991485596, "learning_rate": 1.8107161529914561e-06, "loss": 0.28022652, "memory(GiB)": 34.88, "step": 108470, "train_speed(iter/s)": 0.412087 }, { "acc": 0.93124962, "epoch": 2.937075244361411, "grad_norm": 4.9018964767456055, "learning_rate": 1.8102852981408316e-06, "loss": 0.34305925, "memory(GiB)": 34.88, "step": 108475, "train_speed(iter/s)": 0.412088 }, { "acc": 0.94996958, "epoch": 2.937210624644627, "grad_norm": 7.197012424468994, "learning_rate": 1.8098544832539153e-06, "loss": 0.33397944, "memory(GiB)": 34.88, "step": 108480, "train_speed(iter/s)": 0.412089 }, { "acc": 0.93915501, "epoch": 2.9373460049278424, "grad_norm": 4.7053656578063965, "learning_rate": 1.8094237083361054e-06, "loss": 0.33519979, "memory(GiB)": 34.88, "step": 108485, "train_speed(iter/s)": 0.41209 }, { "acc": 0.94652634, "epoch": 2.9374813852110577, "grad_norm": 4.9705047607421875, "learning_rate": 1.8089929733927927e-06, "loss": 0.30976622, "memory(GiB)": 34.88, "step": 108490, "train_speed(iter/s)": 0.412091 }, { "acc": 0.94364185, "epoch": 2.9376167654942735, "grad_norm": 16.224445343017578, "learning_rate": 1.808562278429381e-06, "loss": 0.32170529, "memory(GiB)": 34.88, "step": 108495, "train_speed(iter/s)": 0.412092 }, { "acc": 0.95362072, "epoch": 2.937752145777489, "grad_norm": 30.55165672302246, "learning_rate": 1.8081316234512606e-06, "loss": 0.25198812, "memory(GiB)": 34.88, "step": 108500, "train_speed(iter/s)": 0.412094 }, { "acc": 0.93094463, "epoch": 2.9378875260607042, "grad_norm": 3.256105422973633, "learning_rate": 1.807701008463826e-06, "loss": 0.37953465, "memory(GiB)": 34.88, "step": 108505, "train_speed(iter/s)": 0.412094 }, { "acc": 0.93689251, "epoch": 2.93802290634392, "grad_norm": 5.630772590637207, "learning_rate": 1.8072704334724718e-06, "loss": 0.34602222, "memory(GiB)": 34.88, "step": 108510, "train_speed(iter/s)": 0.412095 }, { "acc": 0.93284397, "epoch": 2.938158286627136, "grad_norm": 9.535250663757324, "learning_rate": 1.8068398984825935e-06, "loss": 0.38581407, "memory(GiB)": 34.88, "step": 108515, "train_speed(iter/s)": 0.412096 }, { "acc": 0.94396944, "epoch": 2.938293666910351, "grad_norm": 3.313599109649658, "learning_rate": 1.806409403499585e-06, "loss": 0.36050677, "memory(GiB)": 34.88, "step": 108520, "train_speed(iter/s)": 0.412097 }, { "acc": 0.91675129, "epoch": 2.9384290471935666, "grad_norm": 7.508729457855225, "learning_rate": 1.8059789485288342e-06, "loss": 0.4597682, "memory(GiB)": 34.88, "step": 108525, "train_speed(iter/s)": 0.412098 }, { "acc": 0.93994789, "epoch": 2.9385644274767824, "grad_norm": 7.622847557067871, "learning_rate": 1.80554853357574e-06, "loss": 0.34662464, "memory(GiB)": 34.88, "step": 108530, "train_speed(iter/s)": 0.412099 }, { "acc": 0.93795109, "epoch": 2.9386998077599977, "grad_norm": 11.97034740447998, "learning_rate": 1.8051181586456912e-06, "loss": 0.36652782, "memory(GiB)": 34.88, "step": 108535, "train_speed(iter/s)": 0.4121 }, { "acc": 0.95044727, "epoch": 2.9388351880432135, "grad_norm": 6.494522571563721, "learning_rate": 1.8046878237440768e-06, "loss": 0.26405852, "memory(GiB)": 34.88, "step": 108540, "train_speed(iter/s)": 0.412101 }, { "acc": 0.93587742, "epoch": 2.938970568326429, "grad_norm": 10.313946723937988, "learning_rate": 1.80425752887629e-06, "loss": 0.38007846, "memory(GiB)": 34.88, "step": 108545, "train_speed(iter/s)": 0.412102 }, { "acc": 0.93294296, "epoch": 2.9391059486096447, "grad_norm": 3.0528619289398193, "learning_rate": 1.8038272740477206e-06, "loss": 0.39741192, "memory(GiB)": 34.88, "step": 108550, "train_speed(iter/s)": 0.412103 }, { "acc": 0.94771786, "epoch": 2.93924132889286, "grad_norm": 10.073671340942383, "learning_rate": 1.8033970592637601e-06, "loss": 0.22554662, "memory(GiB)": 34.88, "step": 108555, "train_speed(iter/s)": 0.412104 }, { "acc": 0.94070902, "epoch": 2.9393767091760754, "grad_norm": 5.527122497558594, "learning_rate": 1.8029668845297948e-06, "loss": 0.34972858, "memory(GiB)": 34.88, "step": 108560, "train_speed(iter/s)": 0.412105 }, { "acc": 0.93044796, "epoch": 2.939512089459291, "grad_norm": 13.490915298461914, "learning_rate": 1.802536749851215e-06, "loss": 0.46538315, "memory(GiB)": 34.88, "step": 108565, "train_speed(iter/s)": 0.412106 }, { "acc": 0.9279541, "epoch": 2.9396474697425066, "grad_norm": 16.605560302734375, "learning_rate": 1.8021066552334086e-06, "loss": 0.41500354, "memory(GiB)": 34.88, "step": 108570, "train_speed(iter/s)": 0.412107 }, { "acc": 0.93955841, "epoch": 2.9397828500257224, "grad_norm": 6.0602545738220215, "learning_rate": 1.8016766006817655e-06, "loss": 0.36360044, "memory(GiB)": 34.88, "step": 108575, "train_speed(iter/s)": 0.412108 }, { "acc": 0.93202667, "epoch": 2.9399182303089377, "grad_norm": 5.574978828430176, "learning_rate": 1.8012465862016695e-06, "loss": 0.38535712, "memory(GiB)": 34.88, "step": 108580, "train_speed(iter/s)": 0.412109 }, { "acc": 0.92910433, "epoch": 2.9400536105921535, "grad_norm": 13.613724708557129, "learning_rate": 1.800816611798509e-06, "loss": 0.51207123, "memory(GiB)": 34.88, "step": 108585, "train_speed(iter/s)": 0.41211 }, { "acc": 0.93881197, "epoch": 2.940188990875369, "grad_norm": 8.460784912109375, "learning_rate": 1.8003866774776717e-06, "loss": 0.33644938, "memory(GiB)": 34.88, "step": 108590, "train_speed(iter/s)": 0.412111 }, { "acc": 0.9439558, "epoch": 2.9403243711585842, "grad_norm": 14.88264274597168, "learning_rate": 1.7999567832445411e-06, "loss": 0.34612994, "memory(GiB)": 34.88, "step": 108595, "train_speed(iter/s)": 0.412112 }, { "acc": 0.951754, "epoch": 2.9404597514418, "grad_norm": 7.641819477081299, "learning_rate": 1.7995269291045023e-06, "loss": 0.30910687, "memory(GiB)": 34.88, "step": 108600, "train_speed(iter/s)": 0.412112 }, { "acc": 0.92200689, "epoch": 2.9405951317250154, "grad_norm": 6.935666084289551, "learning_rate": 1.7990971150629418e-06, "loss": 0.47651186, "memory(GiB)": 34.88, "step": 108605, "train_speed(iter/s)": 0.412113 }, { "acc": 0.95359488, "epoch": 2.940730512008231, "grad_norm": 6.4645771980285645, "learning_rate": 1.798667341125245e-06, "loss": 0.25737419, "memory(GiB)": 34.88, "step": 108610, "train_speed(iter/s)": 0.412114 }, { "acc": 0.93759527, "epoch": 2.9408658922914466, "grad_norm": 13.454119682312012, "learning_rate": 1.79823760729679e-06, "loss": 0.25221114, "memory(GiB)": 34.88, "step": 108615, "train_speed(iter/s)": 0.412115 }, { "acc": 0.96007347, "epoch": 2.9410012725746624, "grad_norm": 6.65680456161499, "learning_rate": 1.7978079135829676e-06, "loss": 0.26780558, "memory(GiB)": 34.88, "step": 108620, "train_speed(iter/s)": 0.412116 }, { "acc": 0.94621382, "epoch": 2.9411366528578777, "grad_norm": 6.137953758239746, "learning_rate": 1.7973782599891568e-06, "loss": 0.326161, "memory(GiB)": 34.88, "step": 108625, "train_speed(iter/s)": 0.412117 }, { "acc": 0.95148811, "epoch": 2.941272033141093, "grad_norm": 4.815671443939209, "learning_rate": 1.796948646520738e-06, "loss": 0.32572491, "memory(GiB)": 34.88, "step": 108630, "train_speed(iter/s)": 0.412118 }, { "acc": 0.94410439, "epoch": 2.941407413424309, "grad_norm": 6.5767388343811035, "learning_rate": 1.796519073183095e-06, "loss": 0.31572871, "memory(GiB)": 34.88, "step": 108635, "train_speed(iter/s)": 0.412119 }, { "acc": 0.94650002, "epoch": 2.9415427937075247, "grad_norm": 10.964234352111816, "learning_rate": 1.7960895399816088e-06, "loss": 0.31409936, "memory(GiB)": 34.88, "step": 108640, "train_speed(iter/s)": 0.41212 }, { "acc": 0.93924026, "epoch": 2.94167817399074, "grad_norm": 9.102602005004883, "learning_rate": 1.795660046921662e-06, "loss": 0.37449923, "memory(GiB)": 34.88, "step": 108645, "train_speed(iter/s)": 0.412121 }, { "acc": 0.92459326, "epoch": 2.9418135542739554, "grad_norm": 6.3711957931518555, "learning_rate": 1.79523059400863e-06, "loss": 0.43372517, "memory(GiB)": 34.88, "step": 108650, "train_speed(iter/s)": 0.412121 }, { "acc": 0.93409271, "epoch": 2.941948934557171, "grad_norm": 3.6320385932922363, "learning_rate": 1.794801181247899e-06, "loss": 0.35467019, "memory(GiB)": 34.88, "step": 108655, "train_speed(iter/s)": 0.412122 }, { "acc": 0.94414454, "epoch": 2.9420843148403866, "grad_norm": 7.153548240661621, "learning_rate": 1.7943718086448444e-06, "loss": 0.28349147, "memory(GiB)": 34.88, "step": 108660, "train_speed(iter/s)": 0.412123 }, { "acc": 0.9541954, "epoch": 2.942219695123602, "grad_norm": 4.6445770263671875, "learning_rate": 1.7939424762048441e-06, "loss": 0.2656734, "memory(GiB)": 34.88, "step": 108665, "train_speed(iter/s)": 0.412124 }, { "acc": 0.94562855, "epoch": 2.9423550754068177, "grad_norm": 8.330288887023926, "learning_rate": 1.7935131839332776e-06, "loss": 0.28861604, "memory(GiB)": 34.88, "step": 108670, "train_speed(iter/s)": 0.412125 }, { "acc": 0.9294899, "epoch": 2.9424904556900335, "grad_norm": 6.798101902008057, "learning_rate": 1.7930839318355225e-06, "loss": 0.38427024, "memory(GiB)": 34.88, "step": 108675, "train_speed(iter/s)": 0.412126 }, { "acc": 0.93022861, "epoch": 2.942625835973249, "grad_norm": 9.4117431640625, "learning_rate": 1.7926547199169582e-06, "loss": 0.39957447, "memory(GiB)": 34.88, "step": 108680, "train_speed(iter/s)": 0.412127 }, { "acc": 0.93912659, "epoch": 2.9427612162564643, "grad_norm": 3.7024917602539062, "learning_rate": 1.7922255481829565e-06, "loss": 0.37729111, "memory(GiB)": 34.88, "step": 108685, "train_speed(iter/s)": 0.412128 }, { "acc": 0.940942, "epoch": 2.94289659653968, "grad_norm": 10.910388946533203, "learning_rate": 1.7917964166388992e-06, "loss": 0.40049119, "memory(GiB)": 34.88, "step": 108690, "train_speed(iter/s)": 0.412129 }, { "acc": 0.94123087, "epoch": 2.9430319768228954, "grad_norm": 16.427921295166016, "learning_rate": 1.7913673252901578e-06, "loss": 0.35751553, "memory(GiB)": 34.88, "step": 108695, "train_speed(iter/s)": 0.41213 }, { "acc": 0.93651686, "epoch": 2.943167357106111, "grad_norm": 6.893730640411377, "learning_rate": 1.7909382741421112e-06, "loss": 0.36835406, "memory(GiB)": 34.88, "step": 108700, "train_speed(iter/s)": 0.412131 }, { "acc": 0.93680973, "epoch": 2.9433027373893266, "grad_norm": 4.3846635818481445, "learning_rate": 1.7905092632001298e-06, "loss": 0.41305385, "memory(GiB)": 34.88, "step": 108705, "train_speed(iter/s)": 0.412132 }, { "acc": 0.95024767, "epoch": 2.9434381176725424, "grad_norm": 5.098695278167725, "learning_rate": 1.79008029246959e-06, "loss": 0.31832972, "memory(GiB)": 34.88, "step": 108710, "train_speed(iter/s)": 0.412133 }, { "acc": 0.95066833, "epoch": 2.9435734979557577, "grad_norm": 8.60663890838623, "learning_rate": 1.7896513619558676e-06, "loss": 0.26924899, "memory(GiB)": 34.88, "step": 108715, "train_speed(iter/s)": 0.412134 }, { "acc": 0.91408319, "epoch": 2.943708878238973, "grad_norm": 7.1421403884887695, "learning_rate": 1.7892224716643299e-06, "loss": 0.50212593, "memory(GiB)": 34.88, "step": 108720, "train_speed(iter/s)": 0.412135 }, { "acc": 0.93584328, "epoch": 2.943844258522189, "grad_norm": 6.419028282165527, "learning_rate": 1.788793621600357e-06, "loss": 0.39050846, "memory(GiB)": 34.88, "step": 108725, "train_speed(iter/s)": 0.412136 }, { "acc": 0.95084915, "epoch": 2.9439796388054043, "grad_norm": 3.7281720638275146, "learning_rate": 1.7883648117693156e-06, "loss": 0.25617223, "memory(GiB)": 34.88, "step": 108730, "train_speed(iter/s)": 0.412137 }, { "acc": 0.94154396, "epoch": 2.94411501908862, "grad_norm": 4.675113201141357, "learning_rate": 1.7879360421765815e-06, "loss": 0.30445607, "memory(GiB)": 34.88, "step": 108735, "train_speed(iter/s)": 0.412138 }, { "acc": 0.94439802, "epoch": 2.9442503993718354, "grad_norm": 5.8365325927734375, "learning_rate": 1.7875073128275195e-06, "loss": 0.25493758, "memory(GiB)": 34.88, "step": 108740, "train_speed(iter/s)": 0.412139 }, { "acc": 0.94089346, "epoch": 2.944385779655051, "grad_norm": 8.074456214904785, "learning_rate": 1.7870786237275084e-06, "loss": 0.32793274, "memory(GiB)": 34.88, "step": 108745, "train_speed(iter/s)": 0.41214 }, { "acc": 0.93832359, "epoch": 2.9445211599382666, "grad_norm": 6.994224548339844, "learning_rate": 1.7866499748819137e-06, "loss": 0.37374549, "memory(GiB)": 34.88, "step": 108750, "train_speed(iter/s)": 0.412141 }, { "acc": 0.9281805, "epoch": 2.944656540221482, "grad_norm": 6.865875244140625, "learning_rate": 1.7862213662961034e-06, "loss": 0.43386073, "memory(GiB)": 34.88, "step": 108755, "train_speed(iter/s)": 0.412142 }, { "acc": 0.94309778, "epoch": 2.9447919205046977, "grad_norm": 6.279681205749512, "learning_rate": 1.785792797975452e-06, "loss": 0.36130872, "memory(GiB)": 34.88, "step": 108760, "train_speed(iter/s)": 0.412143 }, { "acc": 0.94068441, "epoch": 2.944927300787913, "grad_norm": 3.703981399536133, "learning_rate": 1.7853642699253231e-06, "loss": 0.29975376, "memory(GiB)": 34.88, "step": 108765, "train_speed(iter/s)": 0.412144 }, { "acc": 0.94194298, "epoch": 2.945062681071129, "grad_norm": 12.5328369140625, "learning_rate": 1.7849357821510887e-06, "loss": 0.39345546, "memory(GiB)": 34.88, "step": 108770, "train_speed(iter/s)": 0.412145 }, { "acc": 0.93121929, "epoch": 2.9451980613543443, "grad_norm": 4.834841728210449, "learning_rate": 1.7845073346581115e-06, "loss": 0.41454782, "memory(GiB)": 34.88, "step": 108775, "train_speed(iter/s)": 0.412146 }, { "acc": 0.9352191, "epoch": 2.94533344163756, "grad_norm": 6.014826774597168, "learning_rate": 1.7840789274517653e-06, "loss": 0.34154029, "memory(GiB)": 34.88, "step": 108780, "train_speed(iter/s)": 0.412147 }, { "acc": 0.93765965, "epoch": 2.9454688219207754, "grad_norm": 6.104146957397461, "learning_rate": 1.7836505605374117e-06, "loss": 0.34130173, "memory(GiB)": 34.88, "step": 108785, "train_speed(iter/s)": 0.412148 }, { "acc": 0.94000721, "epoch": 2.9456042022039908, "grad_norm": 5.515597343444824, "learning_rate": 1.7832222339204184e-06, "loss": 0.33277564, "memory(GiB)": 34.88, "step": 108790, "train_speed(iter/s)": 0.412149 }, { "acc": 0.92623672, "epoch": 2.9457395824872066, "grad_norm": 9.866880416870117, "learning_rate": 1.7827939476061523e-06, "loss": 0.3891727, "memory(GiB)": 34.88, "step": 108795, "train_speed(iter/s)": 0.41215 }, { "acc": 0.93653593, "epoch": 2.9458749627704224, "grad_norm": 46.12395095825195, "learning_rate": 1.782365701599976e-06, "loss": 0.34403749, "memory(GiB)": 34.88, "step": 108800, "train_speed(iter/s)": 0.412151 }, { "acc": 0.93826132, "epoch": 2.9460103430536377, "grad_norm": 8.392247200012207, "learning_rate": 1.7819374959072572e-06, "loss": 0.37326553, "memory(GiB)": 34.88, "step": 108805, "train_speed(iter/s)": 0.412152 }, { "acc": 0.93635044, "epoch": 2.946145723336853, "grad_norm": 7.503725051879883, "learning_rate": 1.7815093305333548e-06, "loss": 0.3036989, "memory(GiB)": 34.88, "step": 108810, "train_speed(iter/s)": 0.412153 }, { "acc": 0.94769268, "epoch": 2.946281103620069, "grad_norm": 5.894956111907959, "learning_rate": 1.7810812054836388e-06, "loss": 0.27263427, "memory(GiB)": 34.88, "step": 108815, "train_speed(iter/s)": 0.412154 }, { "acc": 0.93949909, "epoch": 2.9464164839032843, "grad_norm": 4.409616470336914, "learning_rate": 1.7806531207634684e-06, "loss": 0.35093002, "memory(GiB)": 34.88, "step": 108820, "train_speed(iter/s)": 0.412155 }, { "acc": 0.95539122, "epoch": 2.9465518641864996, "grad_norm": 9.697660446166992, "learning_rate": 1.7802250763782066e-06, "loss": 0.24134278, "memory(GiB)": 34.88, "step": 108825, "train_speed(iter/s)": 0.412156 }, { "acc": 0.94400272, "epoch": 2.9466872444697154, "grad_norm": 7.169013977050781, "learning_rate": 1.7797970723332182e-06, "loss": 0.24275267, "memory(GiB)": 34.88, "step": 108830, "train_speed(iter/s)": 0.412157 }, { "acc": 0.94968472, "epoch": 2.946822624752931, "grad_norm": 8.905203819274902, "learning_rate": 1.7793691086338602e-06, "loss": 0.35895524, "memory(GiB)": 34.88, "step": 108835, "train_speed(iter/s)": 0.412158 }, { "acc": 0.93439903, "epoch": 2.9469580050361466, "grad_norm": 15.975380897521973, "learning_rate": 1.7789411852854984e-06, "loss": 0.39324574, "memory(GiB)": 34.88, "step": 108840, "train_speed(iter/s)": 0.412159 }, { "acc": 0.9406373, "epoch": 2.947093385319362, "grad_norm": 6.022390365600586, "learning_rate": 1.7785133022934871e-06, "loss": 0.33627222, "memory(GiB)": 34.88, "step": 108845, "train_speed(iter/s)": 0.41216 }, { "acc": 0.92904949, "epoch": 2.9472287656025777, "grad_norm": 12.885167121887207, "learning_rate": 1.7780854596631947e-06, "loss": 0.41776915, "memory(GiB)": 34.88, "step": 108850, "train_speed(iter/s)": 0.412161 }, { "acc": 0.92682495, "epoch": 2.947364145885793, "grad_norm": 6.0656609535217285, "learning_rate": 1.7776576573999742e-06, "loss": 0.42823954, "memory(GiB)": 34.88, "step": 108855, "train_speed(iter/s)": 0.412162 }, { "acc": 0.93873787, "epoch": 2.947499526169009, "grad_norm": 6.260306358337402, "learning_rate": 1.7772298955091865e-06, "loss": 0.32079532, "memory(GiB)": 34.88, "step": 108860, "train_speed(iter/s)": 0.412163 }, { "acc": 0.93406372, "epoch": 2.9476349064522243, "grad_norm": 23.38873291015625, "learning_rate": 1.7768021739961917e-06, "loss": 0.35903065, "memory(GiB)": 34.88, "step": 108865, "train_speed(iter/s)": 0.412164 }, { "acc": 0.95067348, "epoch": 2.94777028673544, "grad_norm": 10.465436935424805, "learning_rate": 1.7763744928663473e-06, "loss": 0.30216632, "memory(GiB)": 34.88, "step": 108870, "train_speed(iter/s)": 0.412165 }, { "acc": 0.9484499, "epoch": 2.9479056670186554, "grad_norm": 3.559901475906372, "learning_rate": 1.7759468521250106e-06, "loss": 0.29854865, "memory(GiB)": 34.88, "step": 108875, "train_speed(iter/s)": 0.412166 }, { "acc": 0.93091335, "epoch": 2.9480410473018708, "grad_norm": 18.694210052490234, "learning_rate": 1.7755192517775352e-06, "loss": 0.40873332, "memory(GiB)": 34.88, "step": 108880, "train_speed(iter/s)": 0.412167 }, { "acc": 0.94235878, "epoch": 2.9481764275850866, "grad_norm": 7.093217372894287, "learning_rate": 1.775091691829284e-06, "loss": 0.34927406, "memory(GiB)": 34.88, "step": 108885, "train_speed(iter/s)": 0.412168 }, { "acc": 0.93715897, "epoch": 2.948311807868302, "grad_norm": 6.010318279266357, "learning_rate": 1.7746641722856085e-06, "loss": 0.34585447, "memory(GiB)": 34.88, "step": 108890, "train_speed(iter/s)": 0.412169 }, { "acc": 0.93128319, "epoch": 2.9484471881515177, "grad_norm": 3.844616413116455, "learning_rate": 1.7742366931518644e-06, "loss": 0.43457146, "memory(GiB)": 34.88, "step": 108895, "train_speed(iter/s)": 0.41217 }, { "acc": 0.93138523, "epoch": 2.948582568434733, "grad_norm": 11.99202823638916, "learning_rate": 1.773809254433409e-06, "loss": 0.40122099, "memory(GiB)": 34.88, "step": 108900, "train_speed(iter/s)": 0.412171 }, { "acc": 0.93847599, "epoch": 2.948717948717949, "grad_norm": 5.122461318969727, "learning_rate": 1.773381856135597e-06, "loss": 0.31113133, "memory(GiB)": 34.88, "step": 108905, "train_speed(iter/s)": 0.412172 }, { "acc": 0.93703871, "epoch": 2.9488533290011643, "grad_norm": 4.428903102874756, "learning_rate": 1.7729544982637787e-06, "loss": 0.33567152, "memory(GiB)": 34.88, "step": 108910, "train_speed(iter/s)": 0.412172 }, { "acc": 0.94296875, "epoch": 2.9489887092843796, "grad_norm": 9.29344367980957, "learning_rate": 1.7725271808233104e-06, "loss": 0.28665037, "memory(GiB)": 34.88, "step": 108915, "train_speed(iter/s)": 0.412173 }, { "acc": 0.92829914, "epoch": 2.9491240895675954, "grad_norm": 7.059850692749023, "learning_rate": 1.772099903819546e-06, "loss": 0.43798518, "memory(GiB)": 34.88, "step": 108920, "train_speed(iter/s)": 0.412174 }, { "acc": 0.94156933, "epoch": 2.9492594698508108, "grad_norm": 10.804729461669922, "learning_rate": 1.7716726672578353e-06, "loss": 0.31273093, "memory(GiB)": 34.88, "step": 108925, "train_speed(iter/s)": 0.412175 }, { "acc": 0.94531088, "epoch": 2.9493948501340266, "grad_norm": 5.16411018371582, "learning_rate": 1.771245471143533e-06, "loss": 0.24864278, "memory(GiB)": 34.88, "step": 108930, "train_speed(iter/s)": 0.412176 }, { "acc": 0.95694284, "epoch": 2.949530230417242, "grad_norm": 3.7381739616394043, "learning_rate": 1.7708183154819866e-06, "loss": 0.21123552, "memory(GiB)": 34.88, "step": 108935, "train_speed(iter/s)": 0.412177 }, { "acc": 0.94793272, "epoch": 2.9496656107004577, "grad_norm": 4.227437496185303, "learning_rate": 1.7703912002785517e-06, "loss": 0.29079981, "memory(GiB)": 34.88, "step": 108940, "train_speed(iter/s)": 0.412178 }, { "acc": 0.94108238, "epoch": 2.949800990983673, "grad_norm": 4.019680023193359, "learning_rate": 1.7699641255385757e-06, "loss": 0.34765525, "memory(GiB)": 34.88, "step": 108945, "train_speed(iter/s)": 0.412179 }, { "acc": 0.92247343, "epoch": 2.9499363712668885, "grad_norm": 8.086106300354004, "learning_rate": 1.7695370912674097e-06, "loss": 0.44568462, "memory(GiB)": 34.88, "step": 108950, "train_speed(iter/s)": 0.41218 }, { "acc": 0.94436207, "epoch": 2.9500717515501043, "grad_norm": 5.884735584259033, "learning_rate": 1.7691100974704044e-06, "loss": 0.29451532, "memory(GiB)": 34.88, "step": 108955, "train_speed(iter/s)": 0.412181 }, { "acc": 0.92315083, "epoch": 2.95020713183332, "grad_norm": 11.252897262573242, "learning_rate": 1.768683144152905e-06, "loss": 0.3972436, "memory(GiB)": 34.88, "step": 108960, "train_speed(iter/s)": 0.412182 }, { "acc": 0.94468822, "epoch": 2.9503425121165354, "grad_norm": 3.8474748134613037, "learning_rate": 1.7682562313202647e-06, "loss": 0.25652871, "memory(GiB)": 34.88, "step": 108965, "train_speed(iter/s)": 0.412183 }, { "acc": 0.93548889, "epoch": 2.950477892399751, "grad_norm": 10.694175720214844, "learning_rate": 1.7678293589778257e-06, "loss": 0.40043287, "memory(GiB)": 34.88, "step": 108970, "train_speed(iter/s)": 0.412184 }, { "acc": 0.9598382, "epoch": 2.9506132726829666, "grad_norm": 3.4422013759613037, "learning_rate": 1.7674025271309417e-06, "loss": 0.21943874, "memory(GiB)": 34.88, "step": 108975, "train_speed(iter/s)": 0.412185 }, { "acc": 0.94032707, "epoch": 2.950748652966182, "grad_norm": 7.822531223297119, "learning_rate": 1.7669757357849545e-06, "loss": 0.32372875, "memory(GiB)": 34.88, "step": 108980, "train_speed(iter/s)": 0.412186 }, { "acc": 0.94778996, "epoch": 2.9508840332493973, "grad_norm": 5.174524784088135, "learning_rate": 1.7665489849452134e-06, "loss": 0.26499894, "memory(GiB)": 34.88, "step": 108985, "train_speed(iter/s)": 0.412187 }, { "acc": 0.93518705, "epoch": 2.951019413532613, "grad_norm": 3.915403366088867, "learning_rate": 1.766122274617063e-06, "loss": 0.44926491, "memory(GiB)": 34.88, "step": 108990, "train_speed(iter/s)": 0.412188 }, { "acc": 0.93113623, "epoch": 2.951154793815829, "grad_norm": 3.300262212753296, "learning_rate": 1.765695604805851e-06, "loss": 0.36714871, "memory(GiB)": 34.88, "step": 108995, "train_speed(iter/s)": 0.412189 }, { "acc": 0.94157619, "epoch": 2.9512901740990443, "grad_norm": 6.630136013031006, "learning_rate": 1.7652689755169203e-06, "loss": 0.33006246, "memory(GiB)": 34.88, "step": 109000, "train_speed(iter/s)": 0.41219 }, { "acc": 0.9315032, "epoch": 2.9514255543822596, "grad_norm": 8.179933547973633, "learning_rate": 1.7648423867556117e-06, "loss": 0.37013748, "memory(GiB)": 34.88, "step": 109005, "train_speed(iter/s)": 0.412191 }, { "acc": 0.94769878, "epoch": 2.9515609346654754, "grad_norm": 4.622296333312988, "learning_rate": 1.7644158385272762e-06, "loss": 0.28142669, "memory(GiB)": 34.88, "step": 109010, "train_speed(iter/s)": 0.412192 }, { "acc": 0.9283474, "epoch": 2.951696314948691, "grad_norm": 13.515769004821777, "learning_rate": 1.7639893308372525e-06, "loss": 0.38849463, "memory(GiB)": 34.88, "step": 109015, "train_speed(iter/s)": 0.412193 }, { "acc": 0.94482412, "epoch": 2.9518316952319066, "grad_norm": 11.163824081420898, "learning_rate": 1.7635628636908843e-06, "loss": 0.36196208, "memory(GiB)": 34.88, "step": 109020, "train_speed(iter/s)": 0.412194 }, { "acc": 0.95123777, "epoch": 2.951967075515122, "grad_norm": 11.567367553710938, "learning_rate": 1.7631364370935148e-06, "loss": 0.31151185, "memory(GiB)": 34.88, "step": 109025, "train_speed(iter/s)": 0.412194 }, { "acc": 0.9090806, "epoch": 2.9521024557983377, "grad_norm": 11.502609252929688, "learning_rate": 1.7627100510504866e-06, "loss": 0.55153923, "memory(GiB)": 34.88, "step": 109030, "train_speed(iter/s)": 0.412195 }, { "acc": 0.93521557, "epoch": 2.952237836081553, "grad_norm": 6.990773677825928, "learning_rate": 1.7622837055671382e-06, "loss": 0.43895826, "memory(GiB)": 34.88, "step": 109035, "train_speed(iter/s)": 0.412196 }, { "acc": 0.92974358, "epoch": 2.9523732163647685, "grad_norm": 5.937129497528076, "learning_rate": 1.7618574006488114e-06, "loss": 0.3403863, "memory(GiB)": 34.88, "step": 109040, "train_speed(iter/s)": 0.412197 }, { "acc": 0.93368397, "epoch": 2.9525085966479843, "grad_norm": 6.73336935043335, "learning_rate": 1.7614311363008496e-06, "loss": 0.39206009, "memory(GiB)": 34.88, "step": 109045, "train_speed(iter/s)": 0.412198 }, { "acc": 0.95030222, "epoch": 2.9526439769311996, "grad_norm": 6.208309650421143, "learning_rate": 1.7610049125285877e-06, "loss": 0.24938047, "memory(GiB)": 34.88, "step": 109050, "train_speed(iter/s)": 0.412199 }, { "acc": 0.94395275, "epoch": 2.9527793572144154, "grad_norm": 5.7257232666015625, "learning_rate": 1.760578729337368e-06, "loss": 0.38395097, "memory(GiB)": 34.88, "step": 109055, "train_speed(iter/s)": 0.4122 }, { "acc": 0.94951172, "epoch": 2.952914737497631, "grad_norm": 7.972770690917969, "learning_rate": 1.7601525867325281e-06, "loss": 0.34675832, "memory(GiB)": 34.88, "step": 109060, "train_speed(iter/s)": 0.412201 }, { "acc": 0.93371029, "epoch": 2.9530501177808466, "grad_norm": 7.672390937805176, "learning_rate": 1.7597264847194092e-06, "loss": 0.43405008, "memory(GiB)": 34.88, "step": 109065, "train_speed(iter/s)": 0.412202 }, { "acc": 0.93562841, "epoch": 2.953185498064062, "grad_norm": 7.626605033874512, "learning_rate": 1.7593004233033445e-06, "loss": 0.29002342, "memory(GiB)": 34.88, "step": 109070, "train_speed(iter/s)": 0.412203 }, { "acc": 0.95837708, "epoch": 2.9533208783472773, "grad_norm": 4.353302001953125, "learning_rate": 1.7588744024896735e-06, "loss": 0.27416129, "memory(GiB)": 34.88, "step": 109075, "train_speed(iter/s)": 0.412204 }, { "acc": 0.94329453, "epoch": 2.953456258630493, "grad_norm": 2.5417401790618896, "learning_rate": 1.758448422283734e-06, "loss": 0.3496439, "memory(GiB)": 34.88, "step": 109080, "train_speed(iter/s)": 0.412205 }, { "acc": 0.93313017, "epoch": 2.9535916389137085, "grad_norm": 6.952619552612305, "learning_rate": 1.7580224826908604e-06, "loss": 0.48892298, "memory(GiB)": 34.88, "step": 109085, "train_speed(iter/s)": 0.412206 }, { "acc": 0.93098278, "epoch": 2.9537270191969243, "grad_norm": 10.94626522064209, "learning_rate": 1.7575965837163888e-06, "loss": 0.37869921, "memory(GiB)": 34.88, "step": 109090, "train_speed(iter/s)": 0.412207 }, { "acc": 0.94465971, "epoch": 2.9538623994801396, "grad_norm": 4.600067138671875, "learning_rate": 1.7571707253656547e-06, "loss": 0.27204194, "memory(GiB)": 34.88, "step": 109095, "train_speed(iter/s)": 0.412208 }, { "acc": 0.94605465, "epoch": 2.9539977797633554, "grad_norm": 5.950283527374268, "learning_rate": 1.756744907643995e-06, "loss": 0.32230713, "memory(GiB)": 34.88, "step": 109100, "train_speed(iter/s)": 0.412209 }, { "acc": 0.93419971, "epoch": 2.954133160046571, "grad_norm": 10.837331771850586, "learning_rate": 1.7563191305567399e-06, "loss": 0.35653496, "memory(GiB)": 34.88, "step": 109105, "train_speed(iter/s)": 0.41221 }, { "acc": 0.94270086, "epoch": 2.954268540329786, "grad_norm": 5.736912727355957, "learning_rate": 1.7558933941092249e-06, "loss": 0.31774135, "memory(GiB)": 34.88, "step": 109110, "train_speed(iter/s)": 0.412211 }, { "acc": 0.9424058, "epoch": 2.954403920613002, "grad_norm": 9.273433685302734, "learning_rate": 1.7554676983067827e-06, "loss": 0.33355732, "memory(GiB)": 34.88, "step": 109115, "train_speed(iter/s)": 0.412212 }, { "acc": 0.93928347, "epoch": 2.9545393008962177, "grad_norm": 3.357938289642334, "learning_rate": 1.7550420431547494e-06, "loss": 0.35695126, "memory(GiB)": 34.88, "step": 109120, "train_speed(iter/s)": 0.412213 }, { "acc": 0.94480915, "epoch": 2.954674681179433, "grad_norm": 4.463879108428955, "learning_rate": 1.7546164286584516e-06, "loss": 0.35535765, "memory(GiB)": 34.88, "step": 109125, "train_speed(iter/s)": 0.412214 }, { "acc": 0.92885714, "epoch": 2.9548100614626485, "grad_norm": 8.98189926147461, "learning_rate": 1.7541908548232245e-06, "loss": 0.40021195, "memory(GiB)": 34.88, "step": 109130, "train_speed(iter/s)": 0.412215 }, { "acc": 0.94743414, "epoch": 2.9549454417458643, "grad_norm": 7.112666606903076, "learning_rate": 1.7537653216543996e-06, "loss": 0.30658765, "memory(GiB)": 34.88, "step": 109135, "train_speed(iter/s)": 0.412216 }, { "acc": 0.95918713, "epoch": 2.9550808220290796, "grad_norm": 3.06400203704834, "learning_rate": 1.7533398291573046e-06, "loss": 0.26707773, "memory(GiB)": 34.88, "step": 109140, "train_speed(iter/s)": 0.412217 }, { "acc": 0.94339504, "epoch": 2.955216202312295, "grad_norm": 8.863232612609863, "learning_rate": 1.7529143773372717e-06, "loss": 0.31008029, "memory(GiB)": 34.88, "step": 109145, "train_speed(iter/s)": 0.412218 }, { "acc": 0.94212284, "epoch": 2.955351582595511, "grad_norm": 5.911715984344482, "learning_rate": 1.75248896619963e-06, "loss": 0.30779455, "memory(GiB)": 34.88, "step": 109150, "train_speed(iter/s)": 0.412219 }, { "acc": 0.93815622, "epoch": 2.9554869628787266, "grad_norm": 6.839084148406982, "learning_rate": 1.7520635957497107e-06, "loss": 0.34765444, "memory(GiB)": 34.88, "step": 109155, "train_speed(iter/s)": 0.41222 }, { "acc": 0.93488827, "epoch": 2.955622343161942, "grad_norm": 7.151370048522949, "learning_rate": 1.7516382659928388e-06, "loss": 0.35679731, "memory(GiB)": 34.88, "step": 109160, "train_speed(iter/s)": 0.412221 }, { "acc": 0.94809113, "epoch": 2.9557577234451573, "grad_norm": 27.676496505737305, "learning_rate": 1.7512129769343444e-06, "loss": 0.34513841, "memory(GiB)": 34.88, "step": 109165, "train_speed(iter/s)": 0.412222 }, { "acc": 0.9453413, "epoch": 2.955893103728373, "grad_norm": 2.934016704559326, "learning_rate": 1.750787728579556e-06, "loss": 0.29925988, "memory(GiB)": 34.88, "step": 109170, "train_speed(iter/s)": 0.412223 }, { "acc": 0.94144688, "epoch": 2.9560284840115885, "grad_norm": 17.61614990234375, "learning_rate": 1.7503625209337977e-06, "loss": 0.27587347, "memory(GiB)": 34.88, "step": 109175, "train_speed(iter/s)": 0.412224 }, { "acc": 0.95386276, "epoch": 2.9561638642948043, "grad_norm": 6.240654468536377, "learning_rate": 1.7499373540023987e-06, "loss": 0.24221587, "memory(GiB)": 34.88, "step": 109180, "train_speed(iter/s)": 0.412225 }, { "acc": 0.9421711, "epoch": 2.9562992445780196, "grad_norm": 4.995537281036377, "learning_rate": 1.7495122277906832e-06, "loss": 0.31956277, "memory(GiB)": 34.88, "step": 109185, "train_speed(iter/s)": 0.412226 }, { "acc": 0.93213825, "epoch": 2.9564346248612354, "grad_norm": 7.109813690185547, "learning_rate": 1.7490871423039799e-06, "loss": 0.37529421, "memory(GiB)": 34.88, "step": 109190, "train_speed(iter/s)": 0.412227 }, { "acc": 0.9420332, "epoch": 2.956570005144451, "grad_norm": 9.04442024230957, "learning_rate": 1.7486620975476101e-06, "loss": 0.32891548, "memory(GiB)": 34.88, "step": 109195, "train_speed(iter/s)": 0.412228 }, { "acc": 0.94629688, "epoch": 2.956705385427666, "grad_norm": 6.048051357269287, "learning_rate": 1.7482370935268996e-06, "loss": 0.29601293, "memory(GiB)": 34.88, "step": 109200, "train_speed(iter/s)": 0.412229 }, { "acc": 0.95152845, "epoch": 2.956840765710882, "grad_norm": 6.487617015838623, "learning_rate": 1.7478121302471744e-06, "loss": 0.28054285, "memory(GiB)": 34.88, "step": 109205, "train_speed(iter/s)": 0.41223 }, { "acc": 0.93220482, "epoch": 2.9569761459940973, "grad_norm": 7.129615783691406, "learning_rate": 1.7473872077137547e-06, "loss": 0.34777932, "memory(GiB)": 34.88, "step": 109210, "train_speed(iter/s)": 0.412231 }, { "acc": 0.94103527, "epoch": 2.957111526277313, "grad_norm": 11.985607147216797, "learning_rate": 1.746962325931965e-06, "loss": 0.378758, "memory(GiB)": 34.88, "step": 109215, "train_speed(iter/s)": 0.412232 }, { "acc": 0.93931046, "epoch": 2.9572469065605285, "grad_norm": 4.196687698364258, "learning_rate": 1.7465374849071276e-06, "loss": 0.36829617, "memory(GiB)": 34.88, "step": 109220, "train_speed(iter/s)": 0.412233 }, { "acc": 0.94063921, "epoch": 2.9573822868437443, "grad_norm": 12.185912132263184, "learning_rate": 1.7461126846445666e-06, "loss": 0.35914483, "memory(GiB)": 34.88, "step": 109225, "train_speed(iter/s)": 0.412234 }, { "acc": 0.94904242, "epoch": 2.9575176671269596, "grad_norm": 3.6661784648895264, "learning_rate": 1.7456879251496e-06, "loss": 0.29081182, "memory(GiB)": 34.88, "step": 109230, "train_speed(iter/s)": 0.412235 }, { "acc": 0.93215799, "epoch": 2.957653047410175, "grad_norm": 7.113080978393555, "learning_rate": 1.7452632064275504e-06, "loss": 0.34146826, "memory(GiB)": 34.88, "step": 109235, "train_speed(iter/s)": 0.412236 }, { "acc": 0.94045048, "epoch": 2.957788427693391, "grad_norm": 8.218839645385742, "learning_rate": 1.7448385284837382e-06, "loss": 0.35201015, "memory(GiB)": 34.88, "step": 109240, "train_speed(iter/s)": 0.412237 }, { "acc": 0.94583941, "epoch": 2.957923807976606, "grad_norm": 5.376667022705078, "learning_rate": 1.744413891323485e-06, "loss": 0.3434885, "memory(GiB)": 34.88, "step": 109245, "train_speed(iter/s)": 0.412237 }, { "acc": 0.9502779, "epoch": 2.958059188259822, "grad_norm": 9.64177131652832, "learning_rate": 1.7439892949521064e-06, "loss": 0.31745875, "memory(GiB)": 34.88, "step": 109250, "train_speed(iter/s)": 0.412238 }, { "acc": 0.9452466, "epoch": 2.9581945685430373, "grad_norm": 6.7990193367004395, "learning_rate": 1.743564739374924e-06, "loss": 0.30952225, "memory(GiB)": 34.88, "step": 109255, "train_speed(iter/s)": 0.412239 }, { "acc": 0.93684273, "epoch": 2.958329948826253, "grad_norm": 4.314508438110352, "learning_rate": 1.7431402245972568e-06, "loss": 0.38569467, "memory(GiB)": 34.88, "step": 109260, "train_speed(iter/s)": 0.41224 }, { "acc": 0.94360542, "epoch": 2.9584653291094685, "grad_norm": 6.647030353546143, "learning_rate": 1.74271575062442e-06, "loss": 0.32120805, "memory(GiB)": 34.88, "step": 109265, "train_speed(iter/s)": 0.412241 }, { "acc": 0.94090271, "epoch": 2.958600709392684, "grad_norm": 2.4766337871551514, "learning_rate": 1.7422913174617327e-06, "loss": 0.33743994, "memory(GiB)": 34.88, "step": 109270, "train_speed(iter/s)": 0.412242 }, { "acc": 0.94589453, "epoch": 2.9587360896758996, "grad_norm": 4.885346412658691, "learning_rate": 1.741866925114511e-06, "loss": 0.27542276, "memory(GiB)": 34.88, "step": 109275, "train_speed(iter/s)": 0.412243 }, { "acc": 0.93316936, "epoch": 2.9588714699591154, "grad_norm": 3.797449827194214, "learning_rate": 1.741442573588074e-06, "loss": 0.40479622, "memory(GiB)": 34.88, "step": 109280, "train_speed(iter/s)": 0.412244 }, { "acc": 0.9469305, "epoch": 2.959006850242331, "grad_norm": 14.881168365478516, "learning_rate": 1.741018262887731e-06, "loss": 0.2928504, "memory(GiB)": 34.88, "step": 109285, "train_speed(iter/s)": 0.412245 }, { "acc": 0.94054327, "epoch": 2.959142230525546, "grad_norm": 11.623353004455566, "learning_rate": 1.7405939930188059e-06, "loss": 0.32901201, "memory(GiB)": 34.88, "step": 109290, "train_speed(iter/s)": 0.412246 }, { "acc": 0.9394083, "epoch": 2.959277610808762, "grad_norm": 7.275746822357178, "learning_rate": 1.7401697639866091e-06, "loss": 0.37396233, "memory(GiB)": 34.88, "step": 109295, "train_speed(iter/s)": 0.412247 }, { "acc": 0.9392663, "epoch": 2.9594129910919773, "grad_norm": 4.7390642166137695, "learning_rate": 1.739745575796453e-06, "loss": 0.38815365, "memory(GiB)": 34.88, "step": 109300, "train_speed(iter/s)": 0.412248 }, { "acc": 0.96122704, "epoch": 2.9595483713751927, "grad_norm": 2.30956768989563, "learning_rate": 1.7393214284536541e-06, "loss": 0.18644423, "memory(GiB)": 34.88, "step": 109305, "train_speed(iter/s)": 0.412249 }, { "acc": 0.93330669, "epoch": 2.9596837516584085, "grad_norm": 4.962413311004639, "learning_rate": 1.7388973219635247e-06, "loss": 0.41149688, "memory(GiB)": 34.88, "step": 109310, "train_speed(iter/s)": 0.41225 }, { "acc": 0.91801109, "epoch": 2.9598191319416243, "grad_norm": 7.780290603637695, "learning_rate": 1.7384732563313793e-06, "loss": 0.49890652, "memory(GiB)": 34.88, "step": 109315, "train_speed(iter/s)": 0.412251 }, { "acc": 0.93442879, "epoch": 2.9599545122248396, "grad_norm": 5.374207019805908, "learning_rate": 1.7380492315625253e-06, "loss": 0.37509069, "memory(GiB)": 34.88, "step": 109320, "train_speed(iter/s)": 0.412252 }, { "acc": 0.93383198, "epoch": 2.960089892508055, "grad_norm": 4.332144737243652, "learning_rate": 1.7376252476622818e-06, "loss": 0.39295516, "memory(GiB)": 34.88, "step": 109325, "train_speed(iter/s)": 0.412253 }, { "acc": 0.9408227, "epoch": 2.960225272791271, "grad_norm": 14.678600311279297, "learning_rate": 1.7372013046359537e-06, "loss": 0.38164384, "memory(GiB)": 34.88, "step": 109330, "train_speed(iter/s)": 0.412254 }, { "acc": 0.94231157, "epoch": 2.960360653074486, "grad_norm": 3.9964699745178223, "learning_rate": 1.7367774024888562e-06, "loss": 0.33210902, "memory(GiB)": 34.88, "step": 109335, "train_speed(iter/s)": 0.412255 }, { "acc": 0.94147787, "epoch": 2.960496033357702, "grad_norm": 11.006793022155762, "learning_rate": 1.7363535412262958e-06, "loss": 0.34320264, "memory(GiB)": 34.88, "step": 109340, "train_speed(iter/s)": 0.412256 }, { "acc": 0.9373641, "epoch": 2.9606314136409173, "grad_norm": 9.798971176147461, "learning_rate": 1.7359297208535841e-06, "loss": 0.38252287, "memory(GiB)": 34.88, "step": 109345, "train_speed(iter/s)": 0.412257 }, { "acc": 0.93138714, "epoch": 2.960766793924133, "grad_norm": 28.777196884155273, "learning_rate": 1.7355059413760307e-06, "loss": 0.38884096, "memory(GiB)": 34.88, "step": 109350, "train_speed(iter/s)": 0.412258 }, { "acc": 0.92879448, "epoch": 2.9609021742073485, "grad_norm": 8.152496337890625, "learning_rate": 1.7350822027989417e-06, "loss": 0.32480719, "memory(GiB)": 34.88, "step": 109355, "train_speed(iter/s)": 0.412259 }, { "acc": 0.94370689, "epoch": 2.961037554490564, "grad_norm": 4.576077461242676, "learning_rate": 1.7346585051276293e-06, "loss": 0.33423994, "memory(GiB)": 34.88, "step": 109360, "train_speed(iter/s)": 0.41226 }, { "acc": 0.9533989, "epoch": 2.9611729347737796, "grad_norm": 6.5331645011901855, "learning_rate": 1.7342348483673976e-06, "loss": 0.26781139, "memory(GiB)": 34.88, "step": 109365, "train_speed(iter/s)": 0.41226 }, { "acc": 0.94632053, "epoch": 2.961308315056995, "grad_norm": 3.9231300354003906, "learning_rate": 1.7338112325235567e-06, "loss": 0.25643899, "memory(GiB)": 34.88, "step": 109370, "train_speed(iter/s)": 0.412261 }, { "acc": 0.95183315, "epoch": 2.961443695340211, "grad_norm": 4.251149654388428, "learning_rate": 1.7333876576014108e-06, "loss": 0.24419646, "memory(GiB)": 34.88, "step": 109375, "train_speed(iter/s)": 0.412262 }, { "acc": 0.93642349, "epoch": 2.961579075623426, "grad_norm": 4.317321300506592, "learning_rate": 1.7329641236062661e-06, "loss": 0.35751972, "memory(GiB)": 34.88, "step": 109380, "train_speed(iter/s)": 0.412263 }, { "acc": 0.93797655, "epoch": 2.961714455906642, "grad_norm": 20.77849578857422, "learning_rate": 1.732540630543431e-06, "loss": 0.36458306, "memory(GiB)": 34.88, "step": 109385, "train_speed(iter/s)": 0.412264 }, { "acc": 0.93153839, "epoch": 2.9618498361898573, "grad_norm": 6.8830885887146, "learning_rate": 1.7321171784182045e-06, "loss": 0.39842138, "memory(GiB)": 34.88, "step": 109390, "train_speed(iter/s)": 0.412265 }, { "acc": 0.94749851, "epoch": 2.9619852164730727, "grad_norm": 5.915849685668945, "learning_rate": 1.7316937672358991e-06, "loss": 0.34385428, "memory(GiB)": 34.88, "step": 109395, "train_speed(iter/s)": 0.412266 }, { "acc": 0.96234837, "epoch": 2.9621205967562885, "grad_norm": 9.379646301269531, "learning_rate": 1.7312703970018136e-06, "loss": 0.22526655, "memory(GiB)": 34.88, "step": 109400, "train_speed(iter/s)": 0.412267 }, { "acc": 0.93441792, "epoch": 2.962255977039504, "grad_norm": 7.897432327270508, "learning_rate": 1.7308470677212536e-06, "loss": 0.37271543, "memory(GiB)": 34.88, "step": 109405, "train_speed(iter/s)": 0.412268 }, { "acc": 0.93427849, "epoch": 2.9623913573227196, "grad_norm": 3.3646035194396973, "learning_rate": 1.7304237793995191e-06, "loss": 0.42457685, "memory(GiB)": 34.88, "step": 109410, "train_speed(iter/s)": 0.412269 }, { "acc": 0.93839664, "epoch": 2.962526737605935, "grad_norm": 5.333183288574219, "learning_rate": 1.7300005320419175e-06, "loss": 0.34920697, "memory(GiB)": 34.88, "step": 109415, "train_speed(iter/s)": 0.41227 }, { "acc": 0.94388084, "epoch": 2.962662117889151, "grad_norm": 4.625829219818115, "learning_rate": 1.7295773256537484e-06, "loss": 0.32967811, "memory(GiB)": 34.88, "step": 109420, "train_speed(iter/s)": 0.412271 }, { "acc": 0.93252068, "epoch": 2.962797498172366, "grad_norm": 7.6561055183410645, "learning_rate": 1.7291541602403105e-06, "loss": 0.44872375, "memory(GiB)": 34.88, "step": 109425, "train_speed(iter/s)": 0.412272 }, { "acc": 0.93624229, "epoch": 2.9629328784555815, "grad_norm": 7.563348770141602, "learning_rate": 1.7287310358069102e-06, "loss": 0.31299291, "memory(GiB)": 34.88, "step": 109430, "train_speed(iter/s)": 0.412273 }, { "acc": 0.94687805, "epoch": 2.9630682587387973, "grad_norm": 4.859818935394287, "learning_rate": 1.7283079523588433e-06, "loss": 0.37560356, "memory(GiB)": 34.88, "step": 109435, "train_speed(iter/s)": 0.412274 }, { "acc": 0.93303928, "epoch": 2.9632036390220127, "grad_norm": 8.877394676208496, "learning_rate": 1.727884909901414e-06, "loss": 0.40486183, "memory(GiB)": 34.88, "step": 109440, "train_speed(iter/s)": 0.412275 }, { "acc": 0.95082502, "epoch": 2.9633390193052285, "grad_norm": 7.171051979064941, "learning_rate": 1.7274619084399153e-06, "loss": 0.2700428, "memory(GiB)": 34.88, "step": 109445, "train_speed(iter/s)": 0.412276 }, { "acc": 0.93476744, "epoch": 2.963474399588444, "grad_norm": 15.007964134216309, "learning_rate": 1.727038947979654e-06, "loss": 0.43557787, "memory(GiB)": 34.88, "step": 109450, "train_speed(iter/s)": 0.412277 }, { "acc": 0.93650284, "epoch": 2.9636097798716596, "grad_norm": 12.755813598632812, "learning_rate": 1.7266160285259232e-06, "loss": 0.34658339, "memory(GiB)": 34.88, "step": 109455, "train_speed(iter/s)": 0.412277 }, { "acc": 0.94209633, "epoch": 2.963745160154875, "grad_norm": 5.050932884216309, "learning_rate": 1.726193150084022e-06, "loss": 0.31143093, "memory(GiB)": 34.88, "step": 109460, "train_speed(iter/s)": 0.412278 }, { "acc": 0.94542942, "epoch": 2.9638805404380903, "grad_norm": 6.747922420501709, "learning_rate": 1.7257703126592495e-06, "loss": 0.31658506, "memory(GiB)": 34.88, "step": 109465, "train_speed(iter/s)": 0.412279 }, { "acc": 0.94699364, "epoch": 2.964015920721306, "grad_norm": 4.916313648223877, "learning_rate": 1.7253475162569e-06, "loss": 0.28865204, "memory(GiB)": 34.88, "step": 109470, "train_speed(iter/s)": 0.41228 }, { "acc": 0.93836441, "epoch": 2.964151301004522, "grad_norm": 11.445172309875488, "learning_rate": 1.7249247608822725e-06, "loss": 0.43377032, "memory(GiB)": 34.88, "step": 109475, "train_speed(iter/s)": 0.412281 }, { "acc": 0.9373683, "epoch": 2.9642866812877373, "grad_norm": 6.1503520011901855, "learning_rate": 1.724502046540658e-06, "loss": 0.3001338, "memory(GiB)": 34.88, "step": 109480, "train_speed(iter/s)": 0.412282 }, { "acc": 0.94432335, "epoch": 2.9644220615709527, "grad_norm": 5.728372573852539, "learning_rate": 1.7240793732373587e-06, "loss": 0.28030972, "memory(GiB)": 34.88, "step": 109485, "train_speed(iter/s)": 0.412283 }, { "acc": 0.93537683, "epoch": 2.9645574418541685, "grad_norm": 7.69960880279541, "learning_rate": 1.7236567409776642e-06, "loss": 0.3710598, "memory(GiB)": 34.88, "step": 109490, "train_speed(iter/s)": 0.412284 }, { "acc": 0.950284, "epoch": 2.964692822137384, "grad_norm": 7.2519636154174805, "learning_rate": 1.7232341497668716e-06, "loss": 0.23168268, "memory(GiB)": 34.88, "step": 109495, "train_speed(iter/s)": 0.412285 }, { "acc": 0.94395409, "epoch": 2.964828202420599, "grad_norm": 9.162114143371582, "learning_rate": 1.7228115996102727e-06, "loss": 0.36816285, "memory(GiB)": 34.88, "step": 109500, "train_speed(iter/s)": 0.412286 }, { "acc": 0.93919296, "epoch": 2.964963582703815, "grad_norm": 12.303763389587402, "learning_rate": 1.7223890905131612e-06, "loss": 0.34485574, "memory(GiB)": 34.88, "step": 109505, "train_speed(iter/s)": 0.412286 }, { "acc": 0.93323593, "epoch": 2.965098962987031, "grad_norm": 7.287404537200928, "learning_rate": 1.7219666224808323e-06, "loss": 0.46140661, "memory(GiB)": 34.88, "step": 109510, "train_speed(iter/s)": 0.412287 }, { "acc": 0.94525051, "epoch": 2.965234343270246, "grad_norm": 4.4086198806762695, "learning_rate": 1.7215441955185725e-06, "loss": 0.32709417, "memory(GiB)": 34.88, "step": 109515, "train_speed(iter/s)": 0.412288 }, { "acc": 0.94713717, "epoch": 2.9653697235534615, "grad_norm": 8.69876766204834, "learning_rate": 1.721121809631681e-06, "loss": 0.30652785, "memory(GiB)": 34.88, "step": 109520, "train_speed(iter/s)": 0.412289 }, { "acc": 0.93597527, "epoch": 2.9655051038366773, "grad_norm": 13.164399147033691, "learning_rate": 1.7206994648254432e-06, "loss": 0.4338274, "memory(GiB)": 34.88, "step": 109525, "train_speed(iter/s)": 0.41229 }, { "acc": 0.93894854, "epoch": 2.9656404841198927, "grad_norm": 21.449697494506836, "learning_rate": 1.7202771611051542e-06, "loss": 0.33600645, "memory(GiB)": 34.88, "step": 109530, "train_speed(iter/s)": 0.412291 }, { "acc": 0.94231892, "epoch": 2.9657758644031085, "grad_norm": 5.453459739685059, "learning_rate": 1.7198548984760983e-06, "loss": 0.29724092, "memory(GiB)": 34.88, "step": 109535, "train_speed(iter/s)": 0.412292 }, { "acc": 0.95137844, "epoch": 2.965911244686324, "grad_norm": 3.8537418842315674, "learning_rate": 1.7194326769435716e-06, "loss": 0.29156003, "memory(GiB)": 34.88, "step": 109540, "train_speed(iter/s)": 0.412292 }, { "acc": 0.93614197, "epoch": 2.9660466249695396, "grad_norm": 5.464376926422119, "learning_rate": 1.719010496512861e-06, "loss": 0.38968878, "memory(GiB)": 34.88, "step": 109545, "train_speed(iter/s)": 0.412293 }, { "acc": 0.94891872, "epoch": 2.966182005252755, "grad_norm": 4.9591217041015625, "learning_rate": 1.7185883571892515e-06, "loss": 0.28230479, "memory(GiB)": 34.88, "step": 109550, "train_speed(iter/s)": 0.412294 }, { "acc": 0.94149532, "epoch": 2.9663173855359704, "grad_norm": 3.2783005237579346, "learning_rate": 1.718166258978037e-06, "loss": 0.31461666, "memory(GiB)": 34.88, "step": 109555, "train_speed(iter/s)": 0.412295 }, { "acc": 0.94160719, "epoch": 2.966452765819186, "grad_norm": 25.21150779724121, "learning_rate": 1.7177442018845008e-06, "loss": 0.38480687, "memory(GiB)": 34.88, "step": 109560, "train_speed(iter/s)": 0.412296 }, { "acc": 0.94049797, "epoch": 2.9665881461024015, "grad_norm": 7.503803730010986, "learning_rate": 1.7173221859139333e-06, "loss": 0.36039367, "memory(GiB)": 34.88, "step": 109565, "train_speed(iter/s)": 0.412297 }, { "acc": 0.94978399, "epoch": 2.9667235263856173, "grad_norm": 4.856409072875977, "learning_rate": 1.716900211071616e-06, "loss": 0.23924267, "memory(GiB)": 34.88, "step": 109570, "train_speed(iter/s)": 0.412298 }, { "acc": 0.94840736, "epoch": 2.9668589066688327, "grad_norm": 5.104011535644531, "learning_rate": 1.7164782773628417e-06, "loss": 0.25846832, "memory(GiB)": 34.88, "step": 109575, "train_speed(iter/s)": 0.412299 }, { "acc": 0.94560528, "epoch": 2.9669942869520485, "grad_norm": 13.108010292053223, "learning_rate": 1.7160563847928907e-06, "loss": 0.33468118, "memory(GiB)": 34.88, "step": 109580, "train_speed(iter/s)": 0.4123 }, { "acc": 0.93798189, "epoch": 2.967129667235264, "grad_norm": 25.361560821533203, "learning_rate": 1.7156345333670498e-06, "loss": 0.38213634, "memory(GiB)": 34.88, "step": 109585, "train_speed(iter/s)": 0.412301 }, { "acc": 0.94532471, "epoch": 2.967265047518479, "grad_norm": 4.179113388061523, "learning_rate": 1.7152127230906056e-06, "loss": 0.28967195, "memory(GiB)": 34.88, "step": 109590, "train_speed(iter/s)": 0.412301 }, { "acc": 0.93958969, "epoch": 2.967400427801695, "grad_norm": 10.038443565368652, "learning_rate": 1.7147909539688384e-06, "loss": 0.39068806, "memory(GiB)": 34.88, "step": 109595, "train_speed(iter/s)": 0.412303 }, { "acc": 0.92421198, "epoch": 2.9675358080849104, "grad_norm": 11.101142883300781, "learning_rate": 1.7143692260070353e-06, "loss": 0.45672469, "memory(GiB)": 34.88, "step": 109600, "train_speed(iter/s)": 0.412304 }, { "acc": 0.94412479, "epoch": 2.967671188368126, "grad_norm": 3.951263666152954, "learning_rate": 1.7139475392104735e-06, "loss": 0.35632775, "memory(GiB)": 34.88, "step": 109605, "train_speed(iter/s)": 0.412305 }, { "acc": 0.94296894, "epoch": 2.9678065686513415, "grad_norm": 11.12979507446289, "learning_rate": 1.7135258935844432e-06, "loss": 0.33313532, "memory(GiB)": 34.88, "step": 109610, "train_speed(iter/s)": 0.412305 }, { "acc": 0.92458744, "epoch": 2.9679419489345573, "grad_norm": 15.213050842285156, "learning_rate": 1.7131042891342208e-06, "loss": 0.4425458, "memory(GiB)": 34.88, "step": 109615, "train_speed(iter/s)": 0.412306 }, { "acc": 0.93298912, "epoch": 2.9680773292177727, "grad_norm": 10.900283813476562, "learning_rate": 1.7126827258650892e-06, "loss": 0.38489432, "memory(GiB)": 34.88, "step": 109620, "train_speed(iter/s)": 0.412307 }, { "acc": 0.94280262, "epoch": 2.968212709500988, "grad_norm": 14.977925300598145, "learning_rate": 1.7122612037823311e-06, "loss": 0.27919521, "memory(GiB)": 34.88, "step": 109625, "train_speed(iter/s)": 0.412308 }, { "acc": 0.92839699, "epoch": 2.968348089784204, "grad_norm": 17.214885711669922, "learning_rate": 1.7118397228912237e-06, "loss": 0.43940029, "memory(GiB)": 34.88, "step": 109630, "train_speed(iter/s)": 0.412309 }, { "acc": 0.92773037, "epoch": 2.9684834700674196, "grad_norm": 7.287418842315674, "learning_rate": 1.7114182831970505e-06, "loss": 0.37423501, "memory(GiB)": 34.88, "step": 109635, "train_speed(iter/s)": 0.41231 }, { "acc": 0.94863739, "epoch": 2.968618850350635, "grad_norm": 6.737793445587158, "learning_rate": 1.7109968847050848e-06, "loss": 0.27107055, "memory(GiB)": 34.88, "step": 109640, "train_speed(iter/s)": 0.412311 }, { "acc": 0.93110733, "epoch": 2.9687542306338504, "grad_norm": 7.005692958831787, "learning_rate": 1.710575527420613e-06, "loss": 0.39646103, "memory(GiB)": 34.88, "step": 109645, "train_speed(iter/s)": 0.412311 }, { "acc": 0.93001976, "epoch": 2.968889610917066, "grad_norm": 12.983477592468262, "learning_rate": 1.7101542113489087e-06, "loss": 0.38539543, "memory(GiB)": 34.88, "step": 109650, "train_speed(iter/s)": 0.412312 }, { "acc": 0.95621204, "epoch": 2.9690249912002815, "grad_norm": 12.782889366149902, "learning_rate": 1.7097329364952506e-06, "loss": 0.24349439, "memory(GiB)": 34.88, "step": 109655, "train_speed(iter/s)": 0.412313 }, { "acc": 0.95013704, "epoch": 2.969160371483497, "grad_norm": 6.446141719818115, "learning_rate": 1.7093117028649161e-06, "loss": 0.28095386, "memory(GiB)": 34.88, "step": 109660, "train_speed(iter/s)": 0.412314 }, { "acc": 0.93929768, "epoch": 2.9692957517667127, "grad_norm": 5.590140342712402, "learning_rate": 1.7088905104631845e-06, "loss": 0.39559879, "memory(GiB)": 34.88, "step": 109665, "train_speed(iter/s)": 0.412315 }, { "acc": 0.94123249, "epoch": 2.9694311320499285, "grad_norm": 9.141496658325195, "learning_rate": 1.7084693592953292e-06, "loss": 0.3609988, "memory(GiB)": 34.88, "step": 109670, "train_speed(iter/s)": 0.412316 }, { "acc": 0.94014196, "epoch": 2.969566512333144, "grad_norm": 6.754793643951416, "learning_rate": 1.7080482493666234e-06, "loss": 0.28344417, "memory(GiB)": 34.88, "step": 109675, "train_speed(iter/s)": 0.412316 }, { "acc": 0.93578491, "epoch": 2.969701892616359, "grad_norm": 10.230130195617676, "learning_rate": 1.7076271806823487e-06, "loss": 0.37039113, "memory(GiB)": 34.88, "step": 109680, "train_speed(iter/s)": 0.412317 }, { "acc": 0.94044266, "epoch": 2.969837272899575, "grad_norm": 7.689987659454346, "learning_rate": 1.7072061532477746e-06, "loss": 0.37713788, "memory(GiB)": 34.88, "step": 109685, "train_speed(iter/s)": 0.412318 }, { "acc": 0.94912271, "epoch": 2.9699726531827904, "grad_norm": 2.4353151321411133, "learning_rate": 1.7067851670681779e-06, "loss": 0.30327115, "memory(GiB)": 34.88, "step": 109690, "train_speed(iter/s)": 0.412319 }, { "acc": 0.95320492, "epoch": 2.970108033466006, "grad_norm": 8.439388275146484, "learning_rate": 1.7063642221488308e-06, "loss": 0.30684786, "memory(GiB)": 34.88, "step": 109695, "train_speed(iter/s)": 0.41232 }, { "acc": 0.93931532, "epoch": 2.9702434137492215, "grad_norm": 10.214920997619629, "learning_rate": 1.7059433184950095e-06, "loss": 0.34255834, "memory(GiB)": 34.88, "step": 109700, "train_speed(iter/s)": 0.412321 }, { "acc": 0.94328127, "epoch": 2.9703787940324373, "grad_norm": 6.45919132232666, "learning_rate": 1.7055224561119824e-06, "loss": 0.34557118, "memory(GiB)": 34.88, "step": 109705, "train_speed(iter/s)": 0.412322 }, { "acc": 0.94087067, "epoch": 2.9705141743156527, "grad_norm": 3.778778553009033, "learning_rate": 1.705101635005024e-06, "loss": 0.37030044, "memory(GiB)": 34.88, "step": 109710, "train_speed(iter/s)": 0.412323 }, { "acc": 0.93653584, "epoch": 2.970649554598868, "grad_norm": 8.462977409362793, "learning_rate": 1.7046808551794076e-06, "loss": 0.37399983, "memory(GiB)": 34.88, "step": 109715, "train_speed(iter/s)": 0.412324 }, { "acc": 0.9393713, "epoch": 2.970784934882084, "grad_norm": 2.801543712615967, "learning_rate": 1.7042601166404e-06, "loss": 0.38931446, "memory(GiB)": 34.88, "step": 109720, "train_speed(iter/s)": 0.412325 }, { "acc": 0.93809013, "epoch": 2.970920315165299, "grad_norm": 21.38311004638672, "learning_rate": 1.7038394193932738e-06, "loss": 0.37540655, "memory(GiB)": 34.88, "step": 109725, "train_speed(iter/s)": 0.412325 }, { "acc": 0.94550028, "epoch": 2.971055695448515, "grad_norm": 4.077938556671143, "learning_rate": 1.7034187634432996e-06, "loss": 0.32233107, "memory(GiB)": 34.88, "step": 109730, "train_speed(iter/s)": 0.412326 }, { "acc": 0.96047926, "epoch": 2.9711910757317304, "grad_norm": 6.897602558135986, "learning_rate": 1.7029981487957484e-06, "loss": 0.25838859, "memory(GiB)": 34.88, "step": 109735, "train_speed(iter/s)": 0.412327 }, { "acc": 0.93232307, "epoch": 2.971326456014946, "grad_norm": 8.697376251220703, "learning_rate": 1.7025775754558863e-06, "loss": 0.37529263, "memory(GiB)": 34.88, "step": 109740, "train_speed(iter/s)": 0.412328 }, { "acc": 0.94132643, "epoch": 2.9714618362981615, "grad_norm": 5.648114204406738, "learning_rate": 1.7021570434289825e-06, "loss": 0.36133981, "memory(GiB)": 34.88, "step": 109745, "train_speed(iter/s)": 0.412329 }, { "acc": 0.93168058, "epoch": 2.971597216581377, "grad_norm": 10.729692459106445, "learning_rate": 1.701736552720307e-06, "loss": 0.42641678, "memory(GiB)": 34.88, "step": 109750, "train_speed(iter/s)": 0.41233 }, { "acc": 0.92925873, "epoch": 2.9717325968645927, "grad_norm": 4.302452087402344, "learning_rate": 1.701316103335124e-06, "loss": 0.40234499, "memory(GiB)": 34.88, "step": 109755, "train_speed(iter/s)": 0.412331 }, { "acc": 0.93863745, "epoch": 2.971867977147808, "grad_norm": 8.849947929382324, "learning_rate": 1.7008956952787025e-06, "loss": 0.38662729, "memory(GiB)": 34.88, "step": 109760, "train_speed(iter/s)": 0.412331 }, { "acc": 0.95203991, "epoch": 2.972003357431024, "grad_norm": 4.368374347686768, "learning_rate": 1.700475328556308e-06, "loss": 0.28667884, "memory(GiB)": 34.88, "step": 109765, "train_speed(iter/s)": 0.412332 }, { "acc": 0.93432922, "epoch": 2.972138737714239, "grad_norm": 5.9582343101501465, "learning_rate": 1.7000550031732093e-06, "loss": 0.40207691, "memory(GiB)": 34.88, "step": 109770, "train_speed(iter/s)": 0.412333 }, { "acc": 0.93900414, "epoch": 2.972274117997455, "grad_norm": 10.217174530029297, "learning_rate": 1.6996347191346677e-06, "loss": 0.3747241, "memory(GiB)": 34.88, "step": 109775, "train_speed(iter/s)": 0.412334 }, { "acc": 0.95148754, "epoch": 2.9724094982806704, "grad_norm": 9.146003723144531, "learning_rate": 1.6992144764459503e-06, "loss": 0.31230373, "memory(GiB)": 34.88, "step": 109780, "train_speed(iter/s)": 0.412335 }, { "acc": 0.93741865, "epoch": 2.9725448785638857, "grad_norm": 9.032828330993652, "learning_rate": 1.6987942751123206e-06, "loss": 0.43781595, "memory(GiB)": 34.88, "step": 109785, "train_speed(iter/s)": 0.412336 }, { "acc": 0.93533487, "epoch": 2.9726802588471015, "grad_norm": 4.960508823394775, "learning_rate": 1.698374115139045e-06, "loss": 0.39457951, "memory(GiB)": 34.88, "step": 109790, "train_speed(iter/s)": 0.412337 }, { "acc": 0.95159702, "epoch": 2.9728156391303173, "grad_norm": 5.981532573699951, "learning_rate": 1.6979539965313826e-06, "loss": 0.26947489, "memory(GiB)": 34.88, "step": 109795, "train_speed(iter/s)": 0.412338 }, { "acc": 0.93810139, "epoch": 2.9729510194135327, "grad_norm": 9.532922744750977, "learning_rate": 1.6975339192945989e-06, "loss": 0.35713325, "memory(GiB)": 34.88, "step": 109800, "train_speed(iter/s)": 0.412339 }, { "acc": 0.93042259, "epoch": 2.973086399696748, "grad_norm": 6.5372724533081055, "learning_rate": 1.697113883433957e-06, "loss": 0.37905376, "memory(GiB)": 34.88, "step": 109805, "train_speed(iter/s)": 0.41234 }, { "acc": 0.94452696, "epoch": 2.973221779979964, "grad_norm": 7.8885674476623535, "learning_rate": 1.696693888954716e-06, "loss": 0.26552033, "memory(GiB)": 34.88, "step": 109810, "train_speed(iter/s)": 0.412341 }, { "acc": 0.94140282, "epoch": 2.973357160263179, "grad_norm": 15.551467895507812, "learning_rate": 1.6962739358621383e-06, "loss": 0.31784296, "memory(GiB)": 34.88, "step": 109815, "train_speed(iter/s)": 0.412342 }, { "acc": 0.93922825, "epoch": 2.9734925405463946, "grad_norm": 2.4902617931365967, "learning_rate": 1.695854024161485e-06, "loss": 0.3387759, "memory(GiB)": 34.88, "step": 109820, "train_speed(iter/s)": 0.412343 }, { "acc": 0.92625713, "epoch": 2.9736279208296104, "grad_norm": 5.241692066192627, "learning_rate": 1.6954341538580175e-06, "loss": 0.4322432, "memory(GiB)": 34.88, "step": 109825, "train_speed(iter/s)": 0.412343 }, { "acc": 0.945117, "epoch": 2.973763301112826, "grad_norm": 4.392404556274414, "learning_rate": 1.6950143249569926e-06, "loss": 0.31663961, "memory(GiB)": 34.88, "step": 109830, "train_speed(iter/s)": 0.412344 }, { "acc": 0.94463158, "epoch": 2.9738986813960415, "grad_norm": 9.293688774108887, "learning_rate": 1.6945945374636716e-06, "loss": 0.3759937, "memory(GiB)": 34.88, "step": 109835, "train_speed(iter/s)": 0.412345 }, { "acc": 0.93397007, "epoch": 2.974034061679257, "grad_norm": 4.670687675476074, "learning_rate": 1.6941747913833137e-06, "loss": 0.38325014, "memory(GiB)": 34.88, "step": 109840, "train_speed(iter/s)": 0.412346 }, { "acc": 0.9226841, "epoch": 2.9741694419624727, "grad_norm": 12.948042869567871, "learning_rate": 1.6937550867211743e-06, "loss": 0.51351099, "memory(GiB)": 34.88, "step": 109845, "train_speed(iter/s)": 0.412347 }, { "acc": 0.92180862, "epoch": 2.974304822245688, "grad_norm": 11.500513076782227, "learning_rate": 1.6933354234825128e-06, "loss": 0.51200404, "memory(GiB)": 34.88, "step": 109850, "train_speed(iter/s)": 0.412348 }, { "acc": 0.94254074, "epoch": 2.974440202528904, "grad_norm": 6.8580803871154785, "learning_rate": 1.692915801672586e-06, "loss": 0.34544287, "memory(GiB)": 34.88, "step": 109855, "train_speed(iter/s)": 0.412349 }, { "acc": 0.94399948, "epoch": 2.974575582812119, "grad_norm": 14.383993148803711, "learning_rate": 1.6924962212966526e-06, "loss": 0.38883791, "memory(GiB)": 34.88, "step": 109860, "train_speed(iter/s)": 0.41235 }, { "acc": 0.94189377, "epoch": 2.974710963095335, "grad_norm": 3.5023388862609863, "learning_rate": 1.6920766823599648e-06, "loss": 0.35911465, "memory(GiB)": 34.88, "step": 109865, "train_speed(iter/s)": 0.41235 }, { "acc": 0.95115423, "epoch": 2.9748463433785504, "grad_norm": 3.8099100589752197, "learning_rate": 1.6916571848677808e-06, "loss": 0.28654938, "memory(GiB)": 34.88, "step": 109870, "train_speed(iter/s)": 0.412351 }, { "acc": 0.93441963, "epoch": 2.9749817236617657, "grad_norm": 5.342852592468262, "learning_rate": 1.6912377288253543e-06, "loss": 0.3759445, "memory(GiB)": 34.88, "step": 109875, "train_speed(iter/s)": 0.412352 }, { "acc": 0.92510309, "epoch": 2.9751171039449815, "grad_norm": 8.387075424194336, "learning_rate": 1.690818314237942e-06, "loss": 0.36481643, "memory(GiB)": 34.88, "step": 109880, "train_speed(iter/s)": 0.412353 }, { "acc": 0.94396572, "epoch": 2.975252484228197, "grad_norm": 20.628646850585938, "learning_rate": 1.690398941110795e-06, "loss": 0.34012918, "memory(GiB)": 34.88, "step": 109885, "train_speed(iter/s)": 0.412354 }, { "acc": 0.93023949, "epoch": 2.9753878645114127, "grad_norm": 9.336191177368164, "learning_rate": 1.689979609449168e-06, "loss": 0.46042495, "memory(GiB)": 34.88, "step": 109890, "train_speed(iter/s)": 0.412355 }, { "acc": 0.94236755, "epoch": 2.975523244794628, "grad_norm": 4.830979824066162, "learning_rate": 1.6895603192583154e-06, "loss": 0.33594441, "memory(GiB)": 34.88, "step": 109895, "train_speed(iter/s)": 0.412356 }, { "acc": 0.94061394, "epoch": 2.975658625077844, "grad_norm": 20.04673957824707, "learning_rate": 1.6891410705434872e-06, "loss": 0.36729505, "memory(GiB)": 34.88, "step": 109900, "train_speed(iter/s)": 0.412357 }, { "acc": 0.95071268, "epoch": 2.975794005361059, "grad_norm": 5.072244644165039, "learning_rate": 1.6887218633099362e-06, "loss": 0.27818475, "memory(GiB)": 34.88, "step": 109905, "train_speed(iter/s)": 0.412358 }, { "acc": 0.93966427, "epoch": 2.9759293856442746, "grad_norm": 9.169445037841797, "learning_rate": 1.6883026975629139e-06, "loss": 0.36941028, "memory(GiB)": 34.88, "step": 109910, "train_speed(iter/s)": 0.412359 }, { "acc": 0.96090946, "epoch": 2.9760647659274904, "grad_norm": 8.116129875183105, "learning_rate": 1.6878835733076728e-06, "loss": 0.20495653, "memory(GiB)": 34.88, "step": 109915, "train_speed(iter/s)": 0.41236 }, { "acc": 0.95396347, "epoch": 2.9762001462107057, "grad_norm": 6.671389102935791, "learning_rate": 1.6874644905494602e-06, "loss": 0.22363341, "memory(GiB)": 34.88, "step": 109920, "train_speed(iter/s)": 0.41236 }, { "acc": 0.95401602, "epoch": 2.9763355264939215, "grad_norm": 9.951251029968262, "learning_rate": 1.6870454492935273e-06, "loss": 0.22273617, "memory(GiB)": 34.88, "step": 109925, "train_speed(iter/s)": 0.412361 }, { "acc": 0.95036936, "epoch": 2.976470906777137, "grad_norm": 6.442879676818848, "learning_rate": 1.686626449545126e-06, "loss": 0.25819039, "memory(GiB)": 34.88, "step": 109930, "train_speed(iter/s)": 0.412362 }, { "acc": 0.92557735, "epoch": 2.9766062870603527, "grad_norm": 7.148850917816162, "learning_rate": 1.6862074913095003e-06, "loss": 0.50813828, "memory(GiB)": 34.88, "step": 109935, "train_speed(iter/s)": 0.412363 }, { "acc": 0.92428646, "epoch": 2.976741667343568, "grad_norm": 7.872547626495361, "learning_rate": 1.6857885745919003e-06, "loss": 0.38427119, "memory(GiB)": 34.88, "step": 109940, "train_speed(iter/s)": 0.412364 }, { "acc": 0.94569168, "epoch": 2.9768770476267834, "grad_norm": 9.856855392456055, "learning_rate": 1.6853696993975755e-06, "loss": 0.30631924, "memory(GiB)": 34.88, "step": 109945, "train_speed(iter/s)": 0.412365 }, { "acc": 0.92156124, "epoch": 2.977012427909999, "grad_norm": 50.9693717956543, "learning_rate": 1.6849508657317729e-06, "loss": 0.45665116, "memory(GiB)": 34.88, "step": 109950, "train_speed(iter/s)": 0.412366 }, { "acc": 0.93977432, "epoch": 2.977147808193215, "grad_norm": 9.125088691711426, "learning_rate": 1.684532073599735e-06, "loss": 0.35304475, "memory(GiB)": 34.88, "step": 109955, "train_speed(iter/s)": 0.412367 }, { "acc": 0.93002701, "epoch": 2.9772831884764304, "grad_norm": 7.187256336212158, "learning_rate": 1.6841133230067147e-06, "loss": 0.38289642, "memory(GiB)": 34.88, "step": 109960, "train_speed(iter/s)": 0.412368 }, { "acc": 0.94239197, "epoch": 2.9774185687596457, "grad_norm": 4.572746276855469, "learning_rate": 1.6836946139579542e-06, "loss": 0.34124274, "memory(GiB)": 34.88, "step": 109965, "train_speed(iter/s)": 0.412369 }, { "acc": 0.93892374, "epoch": 2.9775539490428615, "grad_norm": 9.284069061279297, "learning_rate": 1.683275946458697e-06, "loss": 0.32688575, "memory(GiB)": 34.88, "step": 109970, "train_speed(iter/s)": 0.41237 }, { "acc": 0.94778767, "epoch": 2.977689329326077, "grad_norm": 1.9678075313568115, "learning_rate": 1.6828573205141892e-06, "loss": 0.30290713, "memory(GiB)": 34.88, "step": 109975, "train_speed(iter/s)": 0.412371 }, { "acc": 0.92822075, "epoch": 2.9778247096092922, "grad_norm": 8.006705284118652, "learning_rate": 1.6824387361296751e-06, "loss": 0.46387968, "memory(GiB)": 34.88, "step": 109980, "train_speed(iter/s)": 0.412372 }, { "acc": 0.9431448, "epoch": 2.977960089892508, "grad_norm": 3.8831405639648438, "learning_rate": 1.6820201933104001e-06, "loss": 0.32066908, "memory(GiB)": 34.88, "step": 109985, "train_speed(iter/s)": 0.412373 }, { "acc": 0.93958988, "epoch": 2.978095470175724, "grad_norm": 5.911356449127197, "learning_rate": 1.6816016920616027e-06, "loss": 0.35734308, "memory(GiB)": 34.88, "step": 109990, "train_speed(iter/s)": 0.412374 }, { "acc": 0.93454666, "epoch": 2.978230850458939, "grad_norm": 9.317766189575195, "learning_rate": 1.6811832323885305e-06, "loss": 0.44357486, "memory(GiB)": 34.88, "step": 109995, "train_speed(iter/s)": 0.412375 }, { "acc": 0.93948269, "epoch": 2.9783662307421546, "grad_norm": 65.07560729980469, "learning_rate": 1.6807648142964225e-06, "loss": 0.39782028, "memory(GiB)": 34.88, "step": 110000, "train_speed(iter/s)": 0.412376 }, { "epoch": 2.9783662307421546, "eval_acc": 0.6249855396520941, "eval_loss": 1.202934741973877, "eval_runtime": 1299.0916, "eval_samples_per_second": 66.435, "eval_steps_per_second": 2.077, "step": 110000 }, { "acc": 0.94914083, "epoch": 2.9785016110253704, "grad_norm": 7.772839069366455, "learning_rate": 1.6803464377905221e-06, "loss": 0.36105464, "memory(GiB)": 34.88, "step": 110005, "train_speed(iter/s)": 0.41034 }, { "acc": 0.93564186, "epoch": 2.9786369913085857, "grad_norm": 12.499860763549805, "learning_rate": 1.679928102876068e-06, "loss": 0.45811996, "memory(GiB)": 34.88, "step": 110010, "train_speed(iter/s)": 0.410341 }, { "acc": 0.93296347, "epoch": 2.9787723715918015, "grad_norm": 7.457651138305664, "learning_rate": 1.6795098095583013e-06, "loss": 0.41372061, "memory(GiB)": 34.88, "step": 110015, "train_speed(iter/s)": 0.410342 }, { "acc": 0.94354191, "epoch": 2.978907751875017, "grad_norm": 10.835362434387207, "learning_rate": 1.6790915578424644e-06, "loss": 0.29256277, "memory(GiB)": 34.88, "step": 110020, "train_speed(iter/s)": 0.410343 }, { "acc": 0.94469118, "epoch": 2.9790431321582327, "grad_norm": 5.53869104385376, "learning_rate": 1.6786733477337913e-06, "loss": 0.3314862, "memory(GiB)": 34.88, "step": 110025, "train_speed(iter/s)": 0.410344 }, { "acc": 0.91722946, "epoch": 2.979178512441448, "grad_norm": 10.300039291381836, "learning_rate": 1.6782551792375282e-06, "loss": 0.56992679, "memory(GiB)": 34.88, "step": 110030, "train_speed(iter/s)": 0.410345 }, { "acc": 0.95441666, "epoch": 2.9793138927246634, "grad_norm": 3.0195119380950928, "learning_rate": 1.6778370523589073e-06, "loss": 0.27434354, "memory(GiB)": 34.88, "step": 110035, "train_speed(iter/s)": 0.410346 }, { "acc": 0.92885904, "epoch": 2.979449273007879, "grad_norm": 3.1539056301116943, "learning_rate": 1.677418967103171e-06, "loss": 0.43185287, "memory(GiB)": 34.88, "step": 110040, "train_speed(iter/s)": 0.410347 }, { "acc": 0.9396307, "epoch": 2.9795846532910946, "grad_norm": 6.282147407531738, "learning_rate": 1.6770009234755527e-06, "loss": 0.34328921, "memory(GiB)": 34.88, "step": 110045, "train_speed(iter/s)": 0.410348 }, { "acc": 0.93398151, "epoch": 2.9797200335743104, "grad_norm": 9.534401893615723, "learning_rate": 1.6765829214812908e-06, "loss": 0.35962806, "memory(GiB)": 34.88, "step": 110050, "train_speed(iter/s)": 0.410349 }, { "acc": 0.93891163, "epoch": 2.9798554138575257, "grad_norm": 5.71742582321167, "learning_rate": 1.6761649611256239e-06, "loss": 0.27738769, "memory(GiB)": 34.88, "step": 110055, "train_speed(iter/s)": 0.41035 }, { "acc": 0.93912201, "epoch": 2.9799907941407415, "grad_norm": 6.2830915451049805, "learning_rate": 1.6757470424137833e-06, "loss": 0.31968565, "memory(GiB)": 34.88, "step": 110060, "train_speed(iter/s)": 0.410351 }, { "acc": 0.93684731, "epoch": 2.980126174423957, "grad_norm": 4.309093475341797, "learning_rate": 1.6753291653510074e-06, "loss": 0.33601003, "memory(GiB)": 34.88, "step": 110065, "train_speed(iter/s)": 0.410352 }, { "acc": 0.94618435, "epoch": 2.9802615547071722, "grad_norm": 13.365415573120117, "learning_rate": 1.67491132994253e-06, "loss": 0.30791025, "memory(GiB)": 34.88, "step": 110070, "train_speed(iter/s)": 0.410353 }, { "acc": 0.94173107, "epoch": 2.980396934990388, "grad_norm": 9.587407112121582, "learning_rate": 1.6744935361935865e-06, "loss": 0.29969168, "memory(GiB)": 34.88, "step": 110075, "train_speed(iter/s)": 0.410354 }, { "acc": 0.9431529, "epoch": 2.9805323152736034, "grad_norm": 10.965282440185547, "learning_rate": 1.6740757841094068e-06, "loss": 0.34221296, "memory(GiB)": 34.88, "step": 110080, "train_speed(iter/s)": 0.410355 }, { "acc": 0.94035616, "epoch": 2.980667695556819, "grad_norm": 5.539626598358154, "learning_rate": 1.67365807369523e-06, "loss": 0.36589994, "memory(GiB)": 34.88, "step": 110085, "train_speed(iter/s)": 0.410356 }, { "acc": 0.9433033, "epoch": 2.9808030758400346, "grad_norm": 8.454644203186035, "learning_rate": 1.6732404049562855e-06, "loss": 0.34622293, "memory(GiB)": 34.88, "step": 110090, "train_speed(iter/s)": 0.410357 }, { "acc": 0.9325716, "epoch": 2.9809384561232504, "grad_norm": 17.076963424682617, "learning_rate": 1.6728227778978037e-06, "loss": 0.44865799, "memory(GiB)": 34.88, "step": 110095, "train_speed(iter/s)": 0.410358 }, { "acc": 0.93067169, "epoch": 2.9810738364064657, "grad_norm": 8.754253387451172, "learning_rate": 1.6724051925250184e-06, "loss": 0.3708195, "memory(GiB)": 34.88, "step": 110100, "train_speed(iter/s)": 0.410359 }, { "acc": 0.92609081, "epoch": 2.981209216689681, "grad_norm": 6.735789775848389, "learning_rate": 1.67198764884316e-06, "loss": 0.43494558, "memory(GiB)": 34.88, "step": 110105, "train_speed(iter/s)": 0.41036 }, { "acc": 0.93685865, "epoch": 2.981344596972897, "grad_norm": 10.753714561462402, "learning_rate": 1.6715701468574611e-06, "loss": 0.37998416, "memory(GiB)": 34.88, "step": 110110, "train_speed(iter/s)": 0.410361 }, { "acc": 0.93625126, "epoch": 2.9814799772561127, "grad_norm": 8.745689392089844, "learning_rate": 1.6711526865731464e-06, "loss": 0.3511692, "memory(GiB)": 34.88, "step": 110115, "train_speed(iter/s)": 0.410362 }, { "acc": 0.94209232, "epoch": 2.981615357539328, "grad_norm": 21.92251968383789, "learning_rate": 1.6707352679954518e-06, "loss": 0.28388615, "memory(GiB)": 34.88, "step": 110120, "train_speed(iter/s)": 0.410363 }, { "acc": 0.95180283, "epoch": 2.9817507378225434, "grad_norm": 6.009197235107422, "learning_rate": 1.6703178911296027e-06, "loss": 0.29319704, "memory(GiB)": 34.88, "step": 110125, "train_speed(iter/s)": 0.410364 }, { "acc": 0.94523745, "epoch": 2.981886118105759, "grad_norm": 8.643322944641113, "learning_rate": 1.6699005559808297e-06, "loss": 0.34753182, "memory(GiB)": 34.88, "step": 110130, "train_speed(iter/s)": 0.410365 }, { "acc": 0.94753618, "epoch": 2.9820214983889746, "grad_norm": 9.60739517211914, "learning_rate": 1.6694832625543578e-06, "loss": 0.2691715, "memory(GiB)": 34.88, "step": 110135, "train_speed(iter/s)": 0.410366 }, { "acc": 0.95602045, "epoch": 2.98215687867219, "grad_norm": 11.053194999694824, "learning_rate": 1.6690660108554158e-06, "loss": 0.28839536, "memory(GiB)": 34.88, "step": 110140, "train_speed(iter/s)": 0.410367 }, { "acc": 0.93718805, "epoch": 2.9822922589554057, "grad_norm": 9.645087242126465, "learning_rate": 1.6686488008892323e-06, "loss": 0.36856451, "memory(GiB)": 34.88, "step": 110145, "train_speed(iter/s)": 0.410368 }, { "acc": 0.93840961, "epoch": 2.9824276392386215, "grad_norm": 7.852402687072754, "learning_rate": 1.668231632661029e-06, "loss": 0.35453129, "memory(GiB)": 34.88, "step": 110150, "train_speed(iter/s)": 0.410369 }, { "acc": 0.94221296, "epoch": 2.982563019521837, "grad_norm": 4.7139506340026855, "learning_rate": 1.667814506176038e-06, "loss": 0.3421309, "memory(GiB)": 34.88, "step": 110155, "train_speed(iter/s)": 0.41037 }, { "acc": 0.93790808, "epoch": 2.9826983998050522, "grad_norm": 4.532199382781982, "learning_rate": 1.6673974214394805e-06, "loss": 0.34161551, "memory(GiB)": 34.88, "step": 110160, "train_speed(iter/s)": 0.410371 }, { "acc": 0.9409646, "epoch": 2.982833780088268, "grad_norm": 6.046004295349121, "learning_rate": 1.666980378456583e-06, "loss": 0.37007053, "memory(GiB)": 34.88, "step": 110165, "train_speed(iter/s)": 0.410372 }, { "acc": 0.94972601, "epoch": 2.9829691603714834, "grad_norm": 5.779874801635742, "learning_rate": 1.6665633772325678e-06, "loss": 0.30390787, "memory(GiB)": 34.88, "step": 110170, "train_speed(iter/s)": 0.410373 }, { "acc": 0.93871183, "epoch": 2.983104540654699, "grad_norm": 10.885456085205078, "learning_rate": 1.6661464177726604e-06, "loss": 0.32986903, "memory(GiB)": 34.88, "step": 110175, "train_speed(iter/s)": 0.410374 }, { "acc": 0.93477077, "epoch": 2.9832399209379146, "grad_norm": 6.438621520996094, "learning_rate": 1.6657295000820852e-06, "loss": 0.34112575, "memory(GiB)": 34.88, "step": 110180, "train_speed(iter/s)": 0.410375 }, { "acc": 0.94168959, "epoch": 2.9833753012211304, "grad_norm": 3.9981634616851807, "learning_rate": 1.66531262416606e-06, "loss": 0.390222, "memory(GiB)": 34.88, "step": 110185, "train_speed(iter/s)": 0.410376 }, { "acc": 0.93188, "epoch": 2.9835106815043457, "grad_norm": 9.107660293579102, "learning_rate": 1.6648957900298136e-06, "loss": 0.41729178, "memory(GiB)": 34.88, "step": 110190, "train_speed(iter/s)": 0.410377 }, { "acc": 0.92638111, "epoch": 2.983646061787561, "grad_norm": 5.2872114181518555, "learning_rate": 1.664478997678562e-06, "loss": 0.42546606, "memory(GiB)": 34.88, "step": 110195, "train_speed(iter/s)": 0.410379 }, { "acc": 0.93547897, "epoch": 2.983781442070777, "grad_norm": 7.555844306945801, "learning_rate": 1.6640622471175314e-06, "loss": 0.34541454, "memory(GiB)": 34.88, "step": 110200, "train_speed(iter/s)": 0.41038 }, { "acc": 0.94169827, "epoch": 2.9839168223539922, "grad_norm": 15.534683227539062, "learning_rate": 1.663645538351936e-06, "loss": 0.3402281, "memory(GiB)": 34.88, "step": 110205, "train_speed(iter/s)": 0.41038 }, { "acc": 0.93996782, "epoch": 2.984052202637208, "grad_norm": 7.066350936889648, "learning_rate": 1.6632288713870028e-06, "loss": 0.39702048, "memory(GiB)": 34.88, "step": 110210, "train_speed(iter/s)": 0.410381 }, { "acc": 0.9472641, "epoch": 2.9841875829204234, "grad_norm": 3.2433788776397705, "learning_rate": 1.6628122462279484e-06, "loss": 0.26271625, "memory(GiB)": 34.88, "step": 110215, "train_speed(iter/s)": 0.410382 }, { "acc": 0.94566441, "epoch": 2.984322963203639, "grad_norm": 5.664673328399658, "learning_rate": 1.6623956628799881e-06, "loss": 0.29114668, "memory(GiB)": 34.88, "step": 110220, "train_speed(iter/s)": 0.410384 }, { "acc": 0.92112408, "epoch": 2.9844583434868546, "grad_norm": 14.10948657989502, "learning_rate": 1.6619791213483477e-06, "loss": 0.487257, "memory(GiB)": 34.88, "step": 110225, "train_speed(iter/s)": 0.410385 }, { "acc": 0.94644146, "epoch": 2.98459372377007, "grad_norm": 5.969367980957031, "learning_rate": 1.6615626216382397e-06, "loss": 0.280879, "memory(GiB)": 34.88, "step": 110230, "train_speed(iter/s)": 0.410386 }, { "acc": 0.93919954, "epoch": 2.9847291040532857, "grad_norm": 5.474316120147705, "learning_rate": 1.6611461637548848e-06, "loss": 0.3485671, "memory(GiB)": 34.88, "step": 110235, "train_speed(iter/s)": 0.410387 }, { "acc": 0.94848099, "epoch": 2.984864484336501, "grad_norm": 2.7048680782318115, "learning_rate": 1.6607297477034956e-06, "loss": 0.28575945, "memory(GiB)": 34.88, "step": 110240, "train_speed(iter/s)": 0.410388 }, { "acc": 0.93730879, "epoch": 2.984999864619717, "grad_norm": 4.640703201293945, "learning_rate": 1.6603133734892942e-06, "loss": 0.37549629, "memory(GiB)": 34.88, "step": 110245, "train_speed(iter/s)": 0.410389 }, { "acc": 0.93654671, "epoch": 2.9851352449029322, "grad_norm": 14.315967559814453, "learning_rate": 1.6598970411174925e-06, "loss": 0.37003961, "memory(GiB)": 34.88, "step": 110250, "train_speed(iter/s)": 0.41039 }, { "acc": 0.94160528, "epoch": 2.985270625186148, "grad_norm": 18.68320655822754, "learning_rate": 1.659480750593307e-06, "loss": 0.36033301, "memory(GiB)": 34.88, "step": 110255, "train_speed(iter/s)": 0.410391 }, { "acc": 0.94778118, "epoch": 2.9854060054693634, "grad_norm": 8.312132835388184, "learning_rate": 1.6590645019219545e-06, "loss": 0.32074568, "memory(GiB)": 34.88, "step": 110260, "train_speed(iter/s)": 0.410392 }, { "acc": 0.94386244, "epoch": 2.9855413857525788, "grad_norm": 7.741428375244141, "learning_rate": 1.6586482951086458e-06, "loss": 0.3163857, "memory(GiB)": 34.88, "step": 110265, "train_speed(iter/s)": 0.410393 }, { "acc": 0.93458138, "epoch": 2.9856767660357946, "grad_norm": 8.332531929016113, "learning_rate": 1.6582321301585982e-06, "loss": 0.40883808, "memory(GiB)": 34.88, "step": 110270, "train_speed(iter/s)": 0.410394 }, { "acc": 0.92796326, "epoch": 2.9858121463190104, "grad_norm": 6.629067897796631, "learning_rate": 1.6578160070770196e-06, "loss": 0.35548286, "memory(GiB)": 34.88, "step": 110275, "train_speed(iter/s)": 0.410395 }, { "acc": 0.94071045, "epoch": 2.9859475266022257, "grad_norm": 4.5506367683410645, "learning_rate": 1.6573999258691295e-06, "loss": 0.36518736, "memory(GiB)": 34.88, "step": 110280, "train_speed(iter/s)": 0.410396 }, { "acc": 0.93391399, "epoch": 2.986082906885441, "grad_norm": 8.63323974609375, "learning_rate": 1.6569838865401361e-06, "loss": 0.43289156, "memory(GiB)": 34.88, "step": 110285, "train_speed(iter/s)": 0.410397 }, { "acc": 0.93366776, "epoch": 2.986218287168657, "grad_norm": 13.361817359924316, "learning_rate": 1.6565678890952514e-06, "loss": 0.38291919, "memory(GiB)": 34.88, "step": 110290, "train_speed(iter/s)": 0.410398 }, { "acc": 0.93600616, "epoch": 2.9863536674518723, "grad_norm": 8.450129508972168, "learning_rate": 1.6561519335396889e-06, "loss": 0.3414088, "memory(GiB)": 34.88, "step": 110295, "train_speed(iter/s)": 0.410399 }, { "acc": 0.94818859, "epoch": 2.9864890477350876, "grad_norm": 5.710073471069336, "learning_rate": 1.6557360198786565e-06, "loss": 0.29389265, "memory(GiB)": 34.88, "step": 110300, "train_speed(iter/s)": 0.4104 }, { "acc": 0.93155994, "epoch": 2.9866244280183034, "grad_norm": 5.53201961517334, "learning_rate": 1.655320148117366e-06, "loss": 0.36146441, "memory(GiB)": 34.88, "step": 110305, "train_speed(iter/s)": 0.410401 }, { "acc": 0.93734989, "epoch": 2.986759808301519, "grad_norm": 13.526040077209473, "learning_rate": 1.6549043182610242e-06, "loss": 0.38367982, "memory(GiB)": 34.88, "step": 110310, "train_speed(iter/s)": 0.410402 }, { "acc": 0.94793606, "epoch": 2.9868951885847346, "grad_norm": 12.038570404052734, "learning_rate": 1.6544885303148452e-06, "loss": 0.28399658, "memory(GiB)": 34.88, "step": 110315, "train_speed(iter/s)": 0.410403 }, { "acc": 0.94548931, "epoch": 2.98703056886795, "grad_norm": 9.023269653320312, "learning_rate": 1.6540727842840325e-06, "loss": 0.33592563, "memory(GiB)": 34.88, "step": 110320, "train_speed(iter/s)": 0.410404 }, { "acc": 0.93979092, "epoch": 2.9871659491511657, "grad_norm": 17.634307861328125, "learning_rate": 1.6536570801737975e-06, "loss": 0.3373034, "memory(GiB)": 34.88, "step": 110325, "train_speed(iter/s)": 0.410405 }, { "acc": 0.94636469, "epoch": 2.987301329434381, "grad_norm": 4.675527572631836, "learning_rate": 1.653241417989346e-06, "loss": 0.32713127, "memory(GiB)": 34.88, "step": 110330, "train_speed(iter/s)": 0.410406 }, { "acc": 0.94165373, "epoch": 2.987436709717597, "grad_norm": 12.058615684509277, "learning_rate": 1.6528257977358878e-06, "loss": 0.31242423, "memory(GiB)": 34.88, "step": 110335, "train_speed(iter/s)": 0.410407 }, { "acc": 0.92717257, "epoch": 2.9875720900008123, "grad_norm": 4.5133376121521, "learning_rate": 1.652410219418626e-06, "loss": 0.42887578, "memory(GiB)": 34.88, "step": 110340, "train_speed(iter/s)": 0.410408 }, { "acc": 0.93989019, "epoch": 2.987707470284028, "grad_norm": 8.491533279418945, "learning_rate": 1.6519946830427655e-06, "loss": 0.33817377, "memory(GiB)": 34.88, "step": 110345, "train_speed(iter/s)": 0.410409 }, { "acc": 0.93355007, "epoch": 2.9878428505672434, "grad_norm": 14.27933406829834, "learning_rate": 1.6515791886135166e-06, "loss": 0.40928335, "memory(GiB)": 34.88, "step": 110350, "train_speed(iter/s)": 0.41041 }, { "acc": 0.9390173, "epoch": 2.9879782308504588, "grad_norm": 3.0646679401397705, "learning_rate": 1.6511637361360802e-06, "loss": 0.32362814, "memory(GiB)": 34.88, "step": 110355, "train_speed(iter/s)": 0.410411 }, { "acc": 0.94081383, "epoch": 2.9881136111336746, "grad_norm": 8.264148712158203, "learning_rate": 1.650748325615662e-06, "loss": 0.39073858, "memory(GiB)": 34.88, "step": 110360, "train_speed(iter/s)": 0.410412 }, { "acc": 0.94678612, "epoch": 2.98824899141689, "grad_norm": 8.749750137329102, "learning_rate": 1.6503329570574656e-06, "loss": 0.32563355, "memory(GiB)": 34.88, "step": 110365, "train_speed(iter/s)": 0.410413 }, { "acc": 0.94047585, "epoch": 2.9883843717001057, "grad_norm": 5.267924785614014, "learning_rate": 1.6499176304666955e-06, "loss": 0.34015419, "memory(GiB)": 34.88, "step": 110370, "train_speed(iter/s)": 0.410414 }, { "acc": 0.93753014, "epoch": 2.988519751983321, "grad_norm": 4.633865833282471, "learning_rate": 1.649502345848553e-06, "loss": 0.36349258, "memory(GiB)": 34.88, "step": 110375, "train_speed(iter/s)": 0.410415 }, { "acc": 0.940732, "epoch": 2.988655132266537, "grad_norm": 4.515456676483154, "learning_rate": 1.6490871032082398e-06, "loss": 0.40623827, "memory(GiB)": 34.88, "step": 110380, "train_speed(iter/s)": 0.410417 }, { "acc": 0.92692022, "epoch": 2.9887905125497523, "grad_norm": 5.655021667480469, "learning_rate": 1.6486719025509608e-06, "loss": 0.45781188, "memory(GiB)": 34.88, "step": 110385, "train_speed(iter/s)": 0.410418 }, { "acc": 0.93564062, "epoch": 2.9889258928329676, "grad_norm": 10.534854888916016, "learning_rate": 1.6482567438819134e-06, "loss": 0.38507707, "memory(GiB)": 34.88, "step": 110390, "train_speed(iter/s)": 0.410419 }, { "acc": 0.93699589, "epoch": 2.9890612731161834, "grad_norm": 5.2273125648498535, "learning_rate": 1.6478416272062994e-06, "loss": 0.34363801, "memory(GiB)": 34.88, "step": 110395, "train_speed(iter/s)": 0.410419 }, { "acc": 0.94460707, "epoch": 2.9891966533993988, "grad_norm": 9.149271965026855, "learning_rate": 1.6474265525293203e-06, "loss": 0.42051601, "memory(GiB)": 34.88, "step": 110400, "train_speed(iter/s)": 0.41042 }, { "acc": 0.94995689, "epoch": 2.9893320336826146, "grad_norm": 2.5684661865234375, "learning_rate": 1.6470115198561759e-06, "loss": 0.30149188, "memory(GiB)": 34.88, "step": 110405, "train_speed(iter/s)": 0.410421 }, { "acc": 0.96039028, "epoch": 2.98946741396583, "grad_norm": 3.1223151683807373, "learning_rate": 1.6465965291920633e-06, "loss": 0.22928586, "memory(GiB)": 34.88, "step": 110410, "train_speed(iter/s)": 0.410422 }, { "acc": 0.94007387, "epoch": 2.9896027942490457, "grad_norm": 9.340338706970215, "learning_rate": 1.6461815805421817e-06, "loss": 0.35635033, "memory(GiB)": 34.88, "step": 110415, "train_speed(iter/s)": 0.410424 }, { "acc": 0.94333553, "epoch": 2.989738174532261, "grad_norm": 4.545822620391846, "learning_rate": 1.6457666739117294e-06, "loss": 0.30257196, "memory(GiB)": 34.88, "step": 110420, "train_speed(iter/s)": 0.410425 }, { "acc": 0.94391975, "epoch": 2.9898735548154765, "grad_norm": 8.259934425354004, "learning_rate": 1.6453518093059059e-06, "loss": 0.31181643, "memory(GiB)": 34.88, "step": 110425, "train_speed(iter/s)": 0.410426 }, { "acc": 0.925735, "epoch": 2.9900089350986923, "grad_norm": 7.45237922668457, "learning_rate": 1.6449369867299063e-06, "loss": 0.39642549, "memory(GiB)": 34.88, "step": 110430, "train_speed(iter/s)": 0.410427 }, { "acc": 0.92696323, "epoch": 2.9901443153819076, "grad_norm": 9.289963722229004, "learning_rate": 1.644522206188924e-06, "loss": 0.49815669, "memory(GiB)": 34.88, "step": 110435, "train_speed(iter/s)": 0.410427 }, { "acc": 0.92353878, "epoch": 2.9902796956651234, "grad_norm": 13.32606315612793, "learning_rate": 1.6441074676881604e-06, "loss": 0.49405403, "memory(GiB)": 34.88, "step": 110440, "train_speed(iter/s)": 0.410428 }, { "acc": 0.93336124, "epoch": 2.9904150759483388, "grad_norm": 14.315951347351074, "learning_rate": 1.6436927712328082e-06, "loss": 0.38366566, "memory(GiB)": 34.88, "step": 110445, "train_speed(iter/s)": 0.41043 }, { "acc": 0.94428701, "epoch": 2.9905504562315546, "grad_norm": 4.3989577293396, "learning_rate": 1.6432781168280624e-06, "loss": 0.31719596, "memory(GiB)": 34.88, "step": 110450, "train_speed(iter/s)": 0.410431 }, { "acc": 0.93297005, "epoch": 2.99068583651477, "grad_norm": 7.495877742767334, "learning_rate": 1.642863504479117e-06, "loss": 0.36717319, "memory(GiB)": 34.88, "step": 110455, "train_speed(iter/s)": 0.410432 }, { "acc": 0.92787933, "epoch": 2.9908212167979853, "grad_norm": 32.46124267578125, "learning_rate": 1.6424489341911683e-06, "loss": 0.43891754, "memory(GiB)": 34.88, "step": 110460, "train_speed(iter/s)": 0.410433 }, { "acc": 0.95435257, "epoch": 2.990956597081201, "grad_norm": 5.813667297363281, "learning_rate": 1.642034405969408e-06, "loss": 0.23689337, "memory(GiB)": 34.88, "step": 110465, "train_speed(iter/s)": 0.410434 }, { "acc": 0.94290524, "epoch": 2.991091977364417, "grad_norm": 4.263205051422119, "learning_rate": 1.6416199198190252e-06, "loss": 0.35720487, "memory(GiB)": 34.88, "step": 110470, "train_speed(iter/s)": 0.410435 }, { "acc": 0.93458939, "epoch": 2.9912273576476323, "grad_norm": 5.5125041007995605, "learning_rate": 1.6412054757452189e-06, "loss": 0.35063426, "memory(GiB)": 34.88, "step": 110475, "train_speed(iter/s)": 0.410436 }, { "acc": 0.93770409, "epoch": 2.9913627379308476, "grad_norm": 4.2097086906433105, "learning_rate": 1.6407910737531757e-06, "loss": 0.34774501, "memory(GiB)": 34.88, "step": 110480, "train_speed(iter/s)": 0.410437 }, { "acc": 0.94402838, "epoch": 2.9914981182140634, "grad_norm": 8.126248359680176, "learning_rate": 1.6403767138480891e-06, "loss": 0.3465606, "memory(GiB)": 34.88, "step": 110485, "train_speed(iter/s)": 0.410438 }, { "acc": 0.91831923, "epoch": 2.9916334984972788, "grad_norm": 10.553006172180176, "learning_rate": 1.6399623960351484e-06, "loss": 0.42498045, "memory(GiB)": 34.88, "step": 110490, "train_speed(iter/s)": 0.410439 }, { "acc": 0.94837723, "epoch": 2.991768878780494, "grad_norm": 7.854288578033447, "learning_rate": 1.6395481203195474e-06, "loss": 0.33041315, "memory(GiB)": 34.88, "step": 110495, "train_speed(iter/s)": 0.41044 }, { "acc": 0.93745499, "epoch": 2.99190425906371, "grad_norm": 6.920015811920166, "learning_rate": 1.6391338867064704e-06, "loss": 0.33143101, "memory(GiB)": 34.88, "step": 110500, "train_speed(iter/s)": 0.410441 }, { "acc": 0.94015942, "epoch": 2.9920396393469257, "grad_norm": 4.657181739807129, "learning_rate": 1.6387196952011094e-06, "loss": 0.30560229, "memory(GiB)": 34.88, "step": 110505, "train_speed(iter/s)": 0.410442 }, { "acc": 0.93981323, "epoch": 2.992175019630141, "grad_norm": 5.242971897125244, "learning_rate": 1.6383055458086545e-06, "loss": 0.35222542, "memory(GiB)": 34.88, "step": 110510, "train_speed(iter/s)": 0.410443 }, { "acc": 0.93829079, "epoch": 2.9923103999133565, "grad_norm": 7.764580726623535, "learning_rate": 1.6378914385342898e-06, "loss": 0.3582494, "memory(GiB)": 34.88, "step": 110515, "train_speed(iter/s)": 0.410444 }, { "acc": 0.93370676, "epoch": 2.9924457801965723, "grad_norm": 7.358953475952148, "learning_rate": 1.6374773733832053e-06, "loss": 0.39054003, "memory(GiB)": 34.88, "step": 110520, "train_speed(iter/s)": 0.410445 }, { "acc": 0.94170218, "epoch": 2.9925811604797876, "grad_norm": 12.018329620361328, "learning_rate": 1.6370633503605868e-06, "loss": 0.35902722, "memory(GiB)": 34.88, "step": 110525, "train_speed(iter/s)": 0.410446 }, { "acc": 0.92270069, "epoch": 2.9927165407630034, "grad_norm": 17.484479904174805, "learning_rate": 1.6366493694716235e-06, "loss": 0.49499598, "memory(GiB)": 34.88, "step": 110530, "train_speed(iter/s)": 0.410447 }, { "acc": 0.95664139, "epoch": 2.9928519210462188, "grad_norm": 5.421092510223389, "learning_rate": 1.6362354307214979e-06, "loss": 0.27565737, "memory(GiB)": 34.88, "step": 110535, "train_speed(iter/s)": 0.410448 }, { "acc": 0.95158501, "epoch": 2.9929873013294346, "grad_norm": 4.769205570220947, "learning_rate": 1.6358215341153962e-06, "loss": 0.25646424, "memory(GiB)": 34.88, "step": 110540, "train_speed(iter/s)": 0.410449 }, { "acc": 0.92502069, "epoch": 2.99312268161265, "grad_norm": 7.720022678375244, "learning_rate": 1.635407679658505e-06, "loss": 0.43264246, "memory(GiB)": 34.88, "step": 110545, "train_speed(iter/s)": 0.41045 }, { "acc": 0.9300457, "epoch": 2.9932580618958653, "grad_norm": 3.5857415199279785, "learning_rate": 1.634993867356008e-06, "loss": 0.34700251, "memory(GiB)": 34.88, "step": 110550, "train_speed(iter/s)": 0.410451 }, { "acc": 0.93734055, "epoch": 2.993393442179081, "grad_norm": 15.635001182556152, "learning_rate": 1.6345800972130873e-06, "loss": 0.41873322, "memory(GiB)": 34.88, "step": 110555, "train_speed(iter/s)": 0.410452 }, { "acc": 0.95416641, "epoch": 2.9935288224622965, "grad_norm": 5.949113368988037, "learning_rate": 1.6341663692349274e-06, "loss": 0.26738238, "memory(GiB)": 34.88, "step": 110560, "train_speed(iter/s)": 0.410453 }, { "acc": 0.94499626, "epoch": 2.9936642027455123, "grad_norm": 14.889608383178711, "learning_rate": 1.6337526834267125e-06, "loss": 0.33071508, "memory(GiB)": 34.88, "step": 110565, "train_speed(iter/s)": 0.410454 }, { "acc": 0.9474494, "epoch": 2.9937995830287276, "grad_norm": 14.871297836303711, "learning_rate": 1.6333390397936222e-06, "loss": 0.31843195, "memory(GiB)": 34.88, "step": 110570, "train_speed(iter/s)": 0.410455 }, { "acc": 0.93447123, "epoch": 2.9939349633119434, "grad_norm": 10.725038528442383, "learning_rate": 1.632925438340839e-06, "loss": 0.38911247, "memory(GiB)": 34.88, "step": 110575, "train_speed(iter/s)": 0.410456 }, { "acc": 0.9291728, "epoch": 2.994070343595159, "grad_norm": 6.430381774902344, "learning_rate": 1.6325118790735447e-06, "loss": 0.3962074, "memory(GiB)": 34.88, "step": 110580, "train_speed(iter/s)": 0.410457 }, { "acc": 0.94463568, "epoch": 2.994205723878374, "grad_norm": 3.1990325450897217, "learning_rate": 1.6320983619969217e-06, "loss": 0.30183082, "memory(GiB)": 34.88, "step": 110585, "train_speed(iter/s)": 0.410458 }, { "acc": 0.94850864, "epoch": 2.99434110416159, "grad_norm": 10.974516868591309, "learning_rate": 1.6316848871161457e-06, "loss": 0.27599244, "memory(GiB)": 34.88, "step": 110590, "train_speed(iter/s)": 0.410459 }, { "acc": 0.92555389, "epoch": 2.9944764844448053, "grad_norm": 7.609178066253662, "learning_rate": 1.6312714544363985e-06, "loss": 0.43442392, "memory(GiB)": 34.88, "step": 110595, "train_speed(iter/s)": 0.41046 }, { "acc": 0.92976847, "epoch": 2.994611864728021, "grad_norm": 3.172412157058716, "learning_rate": 1.6308580639628619e-06, "loss": 0.44461594, "memory(GiB)": 34.88, "step": 110600, "train_speed(iter/s)": 0.410461 }, { "acc": 0.93762655, "epoch": 2.9947472450112365, "grad_norm": 8.14970588684082, "learning_rate": 1.6304447157007097e-06, "loss": 0.37079401, "memory(GiB)": 34.88, "step": 110605, "train_speed(iter/s)": 0.410462 }, { "acc": 0.94086533, "epoch": 2.9948826252944523, "grad_norm": 3.1027278900146484, "learning_rate": 1.6300314096551224e-06, "loss": 0.37285032, "memory(GiB)": 34.88, "step": 110610, "train_speed(iter/s)": 0.410463 }, { "acc": 0.92770653, "epoch": 2.9950180055776676, "grad_norm": 4.256231307983398, "learning_rate": 1.6296181458312772e-06, "loss": 0.37839379, "memory(GiB)": 34.88, "step": 110615, "train_speed(iter/s)": 0.410464 }, { "acc": 0.93749638, "epoch": 2.995153385860883, "grad_norm": 23.404212951660156, "learning_rate": 1.6292049242343519e-06, "loss": 0.37595751, "memory(GiB)": 34.88, "step": 110620, "train_speed(iter/s)": 0.410465 }, { "acc": 0.95127144, "epoch": 2.995288766144099, "grad_norm": 7.066459655761719, "learning_rate": 1.6287917448695211e-06, "loss": 0.28732142, "memory(GiB)": 34.88, "step": 110625, "train_speed(iter/s)": 0.410466 }, { "acc": 0.94191723, "epoch": 2.9954241464273146, "grad_norm": 10.686768531799316, "learning_rate": 1.6283786077419614e-06, "loss": 0.32059805, "memory(GiB)": 34.88, "step": 110630, "train_speed(iter/s)": 0.410467 }, { "acc": 0.94152374, "epoch": 2.99555952671053, "grad_norm": 9.864811897277832, "learning_rate": 1.62796551285685e-06, "loss": 0.34300058, "memory(GiB)": 34.88, "step": 110635, "train_speed(iter/s)": 0.410468 }, { "acc": 0.93570414, "epoch": 2.9956949069937453, "grad_norm": 10.112260818481445, "learning_rate": 1.6275524602193578e-06, "loss": 0.34805279, "memory(GiB)": 34.88, "step": 110640, "train_speed(iter/s)": 0.410469 }, { "acc": 0.93106346, "epoch": 2.995830287276961, "grad_norm": 3.9659430980682373, "learning_rate": 1.6271394498346621e-06, "loss": 0.37074964, "memory(GiB)": 34.88, "step": 110645, "train_speed(iter/s)": 0.41047 }, { "acc": 0.92868052, "epoch": 2.9959656675601765, "grad_norm": 25.030366897583008, "learning_rate": 1.6267264817079362e-06, "loss": 0.41471949, "memory(GiB)": 34.88, "step": 110650, "train_speed(iter/s)": 0.410471 }, { "acc": 0.95588913, "epoch": 2.996101047843392, "grad_norm": 10.23346996307373, "learning_rate": 1.6263135558443541e-06, "loss": 0.27973025, "memory(GiB)": 34.88, "step": 110655, "train_speed(iter/s)": 0.410472 }, { "acc": 0.94541798, "epoch": 2.9962364281266076, "grad_norm": 5.50471830368042, "learning_rate": 1.625900672249086e-06, "loss": 0.32185543, "memory(GiB)": 34.88, "step": 110660, "train_speed(iter/s)": 0.410473 }, { "acc": 0.95503044, "epoch": 2.9963718084098234, "grad_norm": 7.559676170349121, "learning_rate": 1.6254878309273059e-06, "loss": 0.26822653, "memory(GiB)": 34.88, "step": 110665, "train_speed(iter/s)": 0.410474 }, { "acc": 0.94328823, "epoch": 2.996507188693039, "grad_norm": 7.470510482788086, "learning_rate": 1.6250750318841843e-06, "loss": 0.31240947, "memory(GiB)": 34.88, "step": 110670, "train_speed(iter/s)": 0.410475 }, { "acc": 0.93691072, "epoch": 2.996642568976254, "grad_norm": 16.594390869140625, "learning_rate": 1.6246622751248956e-06, "loss": 0.33341088, "memory(GiB)": 34.88, "step": 110675, "train_speed(iter/s)": 0.410476 }, { "acc": 0.93923588, "epoch": 2.99677794925947, "grad_norm": 6.399104595184326, "learning_rate": 1.6242495606546057e-06, "loss": 0.37772133, "memory(GiB)": 34.88, "step": 110680, "train_speed(iter/s)": 0.410477 }, { "acc": 0.93461895, "epoch": 2.9969133295426853, "grad_norm": 9.350090980529785, "learning_rate": 1.623836888478487e-06, "loss": 0.39353101, "memory(GiB)": 34.88, "step": 110685, "train_speed(iter/s)": 0.410478 }, { "acc": 0.94594822, "epoch": 2.997048709825901, "grad_norm": 9.961446762084961, "learning_rate": 1.6234242586017107e-06, "loss": 0.29010553, "memory(GiB)": 34.88, "step": 110690, "train_speed(iter/s)": 0.41048 }, { "acc": 0.92497635, "epoch": 2.9971840901091165, "grad_norm": 7.160745620727539, "learning_rate": 1.6230116710294423e-06, "loss": 0.45741529, "memory(GiB)": 34.88, "step": 110695, "train_speed(iter/s)": 0.410481 }, { "acc": 0.94007778, "epoch": 2.9973194703923323, "grad_norm": 5.076314449310303, "learning_rate": 1.622599125766852e-06, "loss": 0.32580101, "memory(GiB)": 34.88, "step": 110700, "train_speed(iter/s)": 0.410482 }, { "acc": 0.9510025, "epoch": 2.9974548506755476, "grad_norm": 6.052025318145752, "learning_rate": 1.6221866228191076e-06, "loss": 0.28285785, "memory(GiB)": 34.88, "step": 110705, "train_speed(iter/s)": 0.410483 }, { "acc": 0.9344347, "epoch": 2.997590230958763, "grad_norm": 11.6202392578125, "learning_rate": 1.6217741621913782e-06, "loss": 0.33832207, "memory(GiB)": 34.88, "step": 110710, "train_speed(iter/s)": 0.410484 }, { "acc": 0.93551579, "epoch": 2.997725611241979, "grad_norm": 7.86057186126709, "learning_rate": 1.6213617438888273e-06, "loss": 0.36171646, "memory(GiB)": 34.88, "step": 110715, "train_speed(iter/s)": 0.410485 }, { "acc": 0.95058002, "epoch": 2.997860991525194, "grad_norm": 21.00679588317871, "learning_rate": 1.6209493679166235e-06, "loss": 0.24760489, "memory(GiB)": 34.88, "step": 110720, "train_speed(iter/s)": 0.410486 }, { "acc": 0.93928633, "epoch": 2.99799637180841, "grad_norm": 8.888440132141113, "learning_rate": 1.6205370342799343e-06, "loss": 0.36337049, "memory(GiB)": 34.88, "step": 110725, "train_speed(iter/s)": 0.410487 }, { "acc": 0.94214258, "epoch": 2.9981317520916253, "grad_norm": 11.353857040405273, "learning_rate": 1.6201247429839202e-06, "loss": 0.37257376, "memory(GiB)": 34.88, "step": 110730, "train_speed(iter/s)": 0.410488 }, { "acc": 0.95072165, "epoch": 2.998267132374841, "grad_norm": 7.848679065704346, "learning_rate": 1.6197124940337493e-06, "loss": 0.25614614, "memory(GiB)": 34.88, "step": 110735, "train_speed(iter/s)": 0.410489 }, { "acc": 0.95142746, "epoch": 2.9984025126580565, "grad_norm": 7.423421382904053, "learning_rate": 1.6193002874345844e-06, "loss": 0.28141108, "memory(GiB)": 34.88, "step": 110740, "train_speed(iter/s)": 0.410489 }, { "acc": 0.93492203, "epoch": 2.998537892941272, "grad_norm": 6.8795976638793945, "learning_rate": 1.6188881231915924e-06, "loss": 0.36106846, "memory(GiB)": 34.88, "step": 110745, "train_speed(iter/s)": 0.41049 }, { "acc": 0.93618898, "epoch": 2.9986732732244876, "grad_norm": 7.381380081176758, "learning_rate": 1.6184760013099301e-06, "loss": 0.3793992, "memory(GiB)": 34.88, "step": 110750, "train_speed(iter/s)": 0.410491 }, { "acc": 0.93646078, "epoch": 2.998808653507703, "grad_norm": 15.680730819702148, "learning_rate": 1.6180639217947677e-06, "loss": 0.35265818, "memory(GiB)": 34.88, "step": 110755, "train_speed(iter/s)": 0.410492 }, { "acc": 0.93350639, "epoch": 2.998944033790919, "grad_norm": 8.896382331848145, "learning_rate": 1.6176518846512635e-06, "loss": 0.37039766, "memory(GiB)": 34.88, "step": 110760, "train_speed(iter/s)": 0.410493 }, { "acc": 0.92687187, "epoch": 2.999079414074134, "grad_norm": 3.9569687843322754, "learning_rate": 1.6172398898845774e-06, "loss": 0.45641928, "memory(GiB)": 34.88, "step": 110765, "train_speed(iter/s)": 0.410494 }, { "acc": 0.94443054, "epoch": 2.99921479435735, "grad_norm": 4.3377251625061035, "learning_rate": 1.616827937499873e-06, "loss": 0.32306449, "memory(GiB)": 34.88, "step": 110770, "train_speed(iter/s)": 0.410495 }, { "acc": 0.93917856, "epoch": 2.9993501746405653, "grad_norm": 7.309320449829102, "learning_rate": 1.6164160275023097e-06, "loss": 0.34773071, "memory(GiB)": 34.88, "step": 110775, "train_speed(iter/s)": 0.410496 }, { "acc": 0.92700653, "epoch": 2.9994855549237807, "grad_norm": 8.211775779724121, "learning_rate": 1.6160041598970492e-06, "loss": 0.3974648, "memory(GiB)": 34.88, "step": 110780, "train_speed(iter/s)": 0.410498 }, { "acc": 0.95271797, "epoch": 2.9996209352069965, "grad_norm": 4.681776523590088, "learning_rate": 1.6155923346892466e-06, "loss": 0.25477109, "memory(GiB)": 34.88, "step": 110785, "train_speed(iter/s)": 0.410499 }, { "acc": 0.93618507, "epoch": 2.9997563154902123, "grad_norm": 9.384389877319336, "learning_rate": 1.6151805518840668e-06, "loss": 0.32338762, "memory(GiB)": 34.88, "step": 110790, "train_speed(iter/s)": 0.4105 }, { "acc": 0.94546585, "epoch": 2.9998916957734276, "grad_norm": 14.128491401672363, "learning_rate": 1.6147688114866639e-06, "loss": 0.3144052, "memory(GiB)": 34.88, "step": 110795, "train_speed(iter/s)": 0.410501 }, { "acc": 0.9451273, "epoch": 3.000027076056643, "grad_norm": 5.936593532562256, "learning_rate": 1.6143571135021985e-06, "loss": 0.30689468, "memory(GiB)": 34.88, "step": 110800, "train_speed(iter/s)": 0.410499 }, { "acc": 0.95005493, "epoch": 3.000162456339859, "grad_norm": 5.024670124053955, "learning_rate": 1.6139454579358252e-06, "loss": 0.31329594, "memory(GiB)": 34.88, "step": 110805, "train_speed(iter/s)": 0.4105 }, { "acc": 0.94575863, "epoch": 3.000297836623074, "grad_norm": 9.36400032043457, "learning_rate": 1.6135338447927022e-06, "loss": 0.2999217, "memory(GiB)": 34.88, "step": 110810, "train_speed(iter/s)": 0.410501 }, { "acc": 0.93121262, "epoch": 3.00043321690629, "grad_norm": 7.205721855163574, "learning_rate": 1.6131222740779867e-06, "loss": 0.39460928, "memory(GiB)": 34.88, "step": 110815, "train_speed(iter/s)": 0.410503 }, { "acc": 0.93436966, "epoch": 3.0005685971895053, "grad_norm": 6.165472984313965, "learning_rate": 1.6127107457968307e-06, "loss": 0.3931813, "memory(GiB)": 34.88, "step": 110820, "train_speed(iter/s)": 0.410504 }, { "acc": 0.95379105, "epoch": 3.0007039774727207, "grad_norm": 7.336496829986572, "learning_rate": 1.612299259954395e-06, "loss": 0.28072462, "memory(GiB)": 34.88, "step": 110825, "train_speed(iter/s)": 0.410505 }, { "acc": 0.94105053, "epoch": 3.0008393577559365, "grad_norm": 7.209446430206299, "learning_rate": 1.6118878165558303e-06, "loss": 0.3706068, "memory(GiB)": 34.88, "step": 110830, "train_speed(iter/s)": 0.410506 }, { "acc": 0.92758493, "epoch": 3.000974738039152, "grad_norm": 10.284165382385254, "learning_rate": 1.6114764156062929e-06, "loss": 0.38990998, "memory(GiB)": 34.88, "step": 110835, "train_speed(iter/s)": 0.410507 }, { "acc": 0.93472252, "epoch": 3.0011101183223676, "grad_norm": 9.097795486450195, "learning_rate": 1.6110650571109337e-06, "loss": 0.40853252, "memory(GiB)": 34.88, "step": 110840, "train_speed(iter/s)": 0.410508 }, { "acc": 0.93119965, "epoch": 3.001245498605583, "grad_norm": 4.421783924102783, "learning_rate": 1.6106537410749073e-06, "loss": 0.3978385, "memory(GiB)": 34.88, "step": 110845, "train_speed(iter/s)": 0.410509 }, { "acc": 0.94758873, "epoch": 3.001380878888799, "grad_norm": 6.47208833694458, "learning_rate": 1.6102424675033684e-06, "loss": 0.36602902, "memory(GiB)": 34.88, "step": 110850, "train_speed(iter/s)": 0.41051 }, { "acc": 0.9521409, "epoch": 3.001516259172014, "grad_norm": 2.2747645378112793, "learning_rate": 1.6098312364014634e-06, "loss": 0.29726257, "memory(GiB)": 34.88, "step": 110855, "train_speed(iter/s)": 0.410511 }, { "acc": 0.9438385, "epoch": 3.00165163945523, "grad_norm": 7.806044578552246, "learning_rate": 1.6094200477743505e-06, "loss": 0.34366751, "memory(GiB)": 34.88, "step": 110860, "train_speed(iter/s)": 0.410512 }, { "acc": 0.95279064, "epoch": 3.0017870197384453, "grad_norm": 4.047074317932129, "learning_rate": 1.6090089016271764e-06, "loss": 0.26988845, "memory(GiB)": 34.88, "step": 110865, "train_speed(iter/s)": 0.410513 }, { "acc": 0.94277754, "epoch": 3.0019224000216607, "grad_norm": 8.677925109863281, "learning_rate": 1.6085977979650938e-06, "loss": 0.34025431, "memory(GiB)": 34.88, "step": 110870, "train_speed(iter/s)": 0.410514 }, { "acc": 0.95049019, "epoch": 3.0020577803048765, "grad_norm": 4.567361354827881, "learning_rate": 1.6081867367932477e-06, "loss": 0.34284329, "memory(GiB)": 34.88, "step": 110875, "train_speed(iter/s)": 0.410515 }, { "acc": 0.94474354, "epoch": 3.002193160588092, "grad_norm": 9.176656723022461, "learning_rate": 1.6077757181167947e-06, "loss": 0.36150045, "memory(GiB)": 34.88, "step": 110880, "train_speed(iter/s)": 0.410516 }, { "acc": 0.93850183, "epoch": 3.0023285408713076, "grad_norm": 6.001633644104004, "learning_rate": 1.6073647419408803e-06, "loss": 0.39953775, "memory(GiB)": 34.88, "step": 110885, "train_speed(iter/s)": 0.410517 }, { "acc": 0.94488354, "epoch": 3.002463921154523, "grad_norm": 6.156196117401123, "learning_rate": 1.6069538082706484e-06, "loss": 0.250617, "memory(GiB)": 34.88, "step": 110890, "train_speed(iter/s)": 0.410518 }, { "acc": 0.94121008, "epoch": 3.002599301437739, "grad_norm": 14.172852516174316, "learning_rate": 1.6065429171112554e-06, "loss": 0.33994269, "memory(GiB)": 34.88, "step": 110895, "train_speed(iter/s)": 0.410519 }, { "acc": 0.93913631, "epoch": 3.002734681720954, "grad_norm": 8.09077262878418, "learning_rate": 1.6061320684678411e-06, "loss": 0.40420303, "memory(GiB)": 34.88, "step": 110900, "train_speed(iter/s)": 0.41052 }, { "acc": 0.93105698, "epoch": 3.0028700620041695, "grad_norm": 9.299474716186523, "learning_rate": 1.6057212623455579e-06, "loss": 0.47166796, "memory(GiB)": 34.88, "step": 110905, "train_speed(iter/s)": 0.410521 }, { "acc": 0.93963623, "epoch": 3.0030054422873853, "grad_norm": 7.90201473236084, "learning_rate": 1.6053104987495448e-06, "loss": 0.35910258, "memory(GiB)": 34.88, "step": 110910, "train_speed(iter/s)": 0.410522 }, { "acc": 0.92980518, "epoch": 3.0031408225706007, "grad_norm": 8.570483207702637, "learning_rate": 1.6048997776849555e-06, "loss": 0.4018961, "memory(GiB)": 34.88, "step": 110915, "train_speed(iter/s)": 0.410523 }, { "acc": 0.94536276, "epoch": 3.0032762028538165, "grad_norm": 9.067890167236328, "learning_rate": 1.6044890991569298e-06, "loss": 0.3178041, "memory(GiB)": 34.88, "step": 110920, "train_speed(iter/s)": 0.410524 }, { "acc": 0.9375536, "epoch": 3.003411583137032, "grad_norm": 6.782474994659424, "learning_rate": 1.6040784631706133e-06, "loss": 0.48432565, "memory(GiB)": 34.88, "step": 110925, "train_speed(iter/s)": 0.410525 }, { "acc": 0.93522091, "epoch": 3.0035469634202476, "grad_norm": 15.226301193237305, "learning_rate": 1.6036678697311525e-06, "loss": 0.37973018, "memory(GiB)": 34.88, "step": 110930, "train_speed(iter/s)": 0.410527 }, { "acc": 0.92811031, "epoch": 3.003682343703463, "grad_norm": 7.311668395996094, "learning_rate": 1.603257318843687e-06, "loss": 0.37297206, "memory(GiB)": 34.88, "step": 110935, "train_speed(iter/s)": 0.410527 }, { "acc": 0.94599447, "epoch": 3.003817723986679, "grad_norm": 7.740264892578125, "learning_rate": 1.6028468105133637e-06, "loss": 0.28591614, "memory(GiB)": 34.88, "step": 110940, "train_speed(iter/s)": 0.410528 }, { "acc": 0.94293423, "epoch": 3.003953104269894, "grad_norm": 8.686772346496582, "learning_rate": 1.6024363447453196e-06, "loss": 0.31608014, "memory(GiB)": 34.88, "step": 110945, "train_speed(iter/s)": 0.410529 }, { "acc": 0.94683609, "epoch": 3.0040884845531095, "grad_norm": 5.263424396514893, "learning_rate": 1.602025921544703e-06, "loss": 0.24370408, "memory(GiB)": 34.88, "step": 110950, "train_speed(iter/s)": 0.41053 }, { "acc": 0.93874283, "epoch": 3.0042238648363253, "grad_norm": 6.882538318634033, "learning_rate": 1.6016155409166503e-06, "loss": 0.38281536, "memory(GiB)": 34.88, "step": 110955, "train_speed(iter/s)": 0.410531 }, { "acc": 0.95542049, "epoch": 3.0043592451195407, "grad_norm": 13.622044563293457, "learning_rate": 1.6012052028663049e-06, "loss": 0.2946105, "memory(GiB)": 34.88, "step": 110960, "train_speed(iter/s)": 0.410532 }, { "acc": 0.9608984, "epoch": 3.0044946254027565, "grad_norm": 4.1864542961120605, "learning_rate": 1.6007949073988062e-06, "loss": 0.22036381, "memory(GiB)": 34.88, "step": 110965, "train_speed(iter/s)": 0.410533 }, { "acc": 0.95538864, "epoch": 3.004630005685972, "grad_norm": 6.364385604858398, "learning_rate": 1.6003846545192962e-06, "loss": 0.35127084, "memory(GiB)": 34.88, "step": 110970, "train_speed(iter/s)": 0.410534 }, { "acc": 0.93298435, "epoch": 3.0047653859691876, "grad_norm": 8.612198829650879, "learning_rate": 1.5999744442329116e-06, "loss": 0.3394316, "memory(GiB)": 34.88, "step": 110975, "train_speed(iter/s)": 0.410535 }, { "acc": 0.94521675, "epoch": 3.004900766252403, "grad_norm": 4.381752967834473, "learning_rate": 1.5995642765447882e-06, "loss": 0.31452017, "memory(GiB)": 34.88, "step": 110980, "train_speed(iter/s)": 0.410536 }, { "acc": 0.94280224, "epoch": 3.0050361465356183, "grad_norm": 4.983355522155762, "learning_rate": 1.5991541514600711e-06, "loss": 0.31869688, "memory(GiB)": 34.88, "step": 110985, "train_speed(iter/s)": 0.410537 }, { "acc": 0.94658566, "epoch": 3.005171526818834, "grad_norm": 5.1350836753845215, "learning_rate": 1.598744068983893e-06, "loss": 0.31657248, "memory(GiB)": 34.88, "step": 110990, "train_speed(iter/s)": 0.410539 }, { "acc": 0.94042377, "epoch": 3.0053069071020495, "grad_norm": 5.004647254943848, "learning_rate": 1.5983340291213933e-06, "loss": 0.32785616, "memory(GiB)": 34.88, "step": 110995, "train_speed(iter/s)": 0.41054 }, { "acc": 0.94994621, "epoch": 3.0054422873852653, "grad_norm": 3.3087825775146484, "learning_rate": 1.597924031877705e-06, "loss": 0.29003377, "memory(GiB)": 34.88, "step": 111000, "train_speed(iter/s)": 0.410541 }, { "acc": 0.93324385, "epoch": 3.0055776676684807, "grad_norm": 8.128571510314941, "learning_rate": 1.5975140772579697e-06, "loss": 0.30001807, "memory(GiB)": 34.88, "step": 111005, "train_speed(iter/s)": 0.410542 }, { "acc": 0.94609575, "epoch": 3.0057130479516965, "grad_norm": 10.143853187561035, "learning_rate": 1.5971041652673205e-06, "loss": 0.27284396, "memory(GiB)": 34.88, "step": 111010, "train_speed(iter/s)": 0.410543 }, { "acc": 0.94356899, "epoch": 3.005848428234912, "grad_norm": 2.500354528427124, "learning_rate": 1.5966942959108879e-06, "loss": 0.33630681, "memory(GiB)": 34.88, "step": 111015, "train_speed(iter/s)": 0.410544 }, { "acc": 0.93797855, "epoch": 3.0059838085181276, "grad_norm": 5.798437595367432, "learning_rate": 1.5962844691938136e-06, "loss": 0.32963963, "memory(GiB)": 34.88, "step": 111020, "train_speed(iter/s)": 0.410545 }, { "acc": 0.94916992, "epoch": 3.006119188801343, "grad_norm": 10.643084526062012, "learning_rate": 1.5958746851212265e-06, "loss": 0.3070693, "memory(GiB)": 34.88, "step": 111025, "train_speed(iter/s)": 0.410546 }, { "acc": 0.94360561, "epoch": 3.0062545690845583, "grad_norm": 7.468898773193359, "learning_rate": 1.595464943698263e-06, "loss": 0.38545558, "memory(GiB)": 34.88, "step": 111030, "train_speed(iter/s)": 0.410547 }, { "acc": 0.93899126, "epoch": 3.006389949367774, "grad_norm": 8.272037506103516, "learning_rate": 1.5950552449300513e-06, "loss": 0.40759897, "memory(GiB)": 34.88, "step": 111035, "train_speed(iter/s)": 0.410548 }, { "acc": 0.94429159, "epoch": 3.0065253296509895, "grad_norm": 11.427374839782715, "learning_rate": 1.5946455888217305e-06, "loss": 0.37094522, "memory(GiB)": 34.88, "step": 111040, "train_speed(iter/s)": 0.410549 }, { "acc": 0.93633738, "epoch": 3.0066607099342053, "grad_norm": 5.088961124420166, "learning_rate": 1.5942359753784267e-06, "loss": 0.4288702, "memory(GiB)": 34.88, "step": 111045, "train_speed(iter/s)": 0.41055 }, { "acc": 0.94603138, "epoch": 3.0067960902174207, "grad_norm": 4.051671981811523, "learning_rate": 1.5938264046052732e-06, "loss": 0.31273522, "memory(GiB)": 34.88, "step": 111050, "train_speed(iter/s)": 0.410551 }, { "acc": 0.94330149, "epoch": 3.0069314705006365, "grad_norm": 7.025436878204346, "learning_rate": 1.5934168765074023e-06, "loss": 0.39893513, "memory(GiB)": 34.88, "step": 111055, "train_speed(iter/s)": 0.410552 }, { "acc": 0.94976635, "epoch": 3.007066850783852, "grad_norm": 9.75017261505127, "learning_rate": 1.5930073910899408e-06, "loss": 0.3028044, "memory(GiB)": 34.88, "step": 111060, "train_speed(iter/s)": 0.410553 }, { "acc": 0.95478897, "epoch": 3.007202231067067, "grad_norm": 4.265255928039551, "learning_rate": 1.5925979483580211e-06, "loss": 0.21693561, "memory(GiB)": 34.88, "step": 111065, "train_speed(iter/s)": 0.410554 }, { "acc": 0.94010887, "epoch": 3.007337611350283, "grad_norm": 8.541059494018555, "learning_rate": 1.5921885483167685e-06, "loss": 0.32214565, "memory(GiB)": 34.88, "step": 111070, "train_speed(iter/s)": 0.410555 }, { "acc": 0.94211235, "epoch": 3.0074729916334983, "grad_norm": 7.775313377380371, "learning_rate": 1.5917791909713173e-06, "loss": 0.37254062, "memory(GiB)": 34.88, "step": 111075, "train_speed(iter/s)": 0.410556 }, { "acc": 0.92214813, "epoch": 3.007608371916714, "grad_norm": 4.673927307128906, "learning_rate": 1.5913698763267903e-06, "loss": 0.49241672, "memory(GiB)": 34.88, "step": 111080, "train_speed(iter/s)": 0.410557 }, { "acc": 0.9369751, "epoch": 3.0077437521999295, "grad_norm": 4.636883735656738, "learning_rate": 1.590960604388317e-06, "loss": 0.32031412, "memory(GiB)": 34.88, "step": 111085, "train_speed(iter/s)": 0.410558 }, { "acc": 0.9397541, "epoch": 3.0078791324831453, "grad_norm": 14.138124465942383, "learning_rate": 1.5905513751610247e-06, "loss": 0.35677903, "memory(GiB)": 34.88, "step": 111090, "train_speed(iter/s)": 0.410559 }, { "acc": 0.9444602, "epoch": 3.0080145127663607, "grad_norm": 4.271028995513916, "learning_rate": 1.590142188650041e-06, "loss": 0.36703653, "memory(GiB)": 34.88, "step": 111095, "train_speed(iter/s)": 0.41056 }, { "acc": 0.93537016, "epoch": 3.0081498930495765, "grad_norm": 3.6989645957946777, "learning_rate": 1.5897330448604906e-06, "loss": 0.37518148, "memory(GiB)": 34.88, "step": 111100, "train_speed(iter/s)": 0.410561 }, { "acc": 0.93211603, "epoch": 3.008285273332792, "grad_norm": 12.686211585998535, "learning_rate": 1.589323943797495e-06, "loss": 0.47637033, "memory(GiB)": 34.88, "step": 111105, "train_speed(iter/s)": 0.410562 }, { "acc": 0.94166937, "epoch": 3.008420653616007, "grad_norm": 5.592038631439209, "learning_rate": 1.5889148854661854e-06, "loss": 0.29871345, "memory(GiB)": 34.88, "step": 111110, "train_speed(iter/s)": 0.410563 }, { "acc": 0.94229336, "epoch": 3.008556033899223, "grad_norm": 3.261099100112915, "learning_rate": 1.5885058698716813e-06, "loss": 0.26164572, "memory(GiB)": 34.88, "step": 111115, "train_speed(iter/s)": 0.410564 }, { "acc": 0.95009174, "epoch": 3.0086914141824384, "grad_norm": 10.312600135803223, "learning_rate": 1.5880968970191086e-06, "loss": 0.3025176, "memory(GiB)": 34.88, "step": 111120, "train_speed(iter/s)": 0.410565 }, { "acc": 0.93513546, "epoch": 3.008826794465654, "grad_norm": 12.552830696105957, "learning_rate": 1.5876879669135905e-06, "loss": 0.32966285, "memory(GiB)": 34.88, "step": 111125, "train_speed(iter/s)": 0.410566 }, { "acc": 0.94946795, "epoch": 3.0089621747488695, "grad_norm": 13.055011749267578, "learning_rate": 1.5872790795602499e-06, "loss": 0.29838243, "memory(GiB)": 34.88, "step": 111130, "train_speed(iter/s)": 0.410567 }, { "acc": 0.9304554, "epoch": 3.0090975550320853, "grad_norm": 24.16932487487793, "learning_rate": 1.5868702349642096e-06, "loss": 0.39633214, "memory(GiB)": 34.88, "step": 111135, "train_speed(iter/s)": 0.410568 }, { "acc": 0.93834267, "epoch": 3.0092329353153007, "grad_norm": 7.1735453605651855, "learning_rate": 1.5864614331305857e-06, "loss": 0.37874894, "memory(GiB)": 34.88, "step": 111140, "train_speed(iter/s)": 0.410569 }, { "acc": 0.93643017, "epoch": 3.009368315598516, "grad_norm": 4.009777069091797, "learning_rate": 1.5860526740645069e-06, "loss": 0.37458375, "memory(GiB)": 34.88, "step": 111145, "train_speed(iter/s)": 0.41057 }, { "acc": 0.93172035, "epoch": 3.009503695881732, "grad_norm": 8.431647300720215, "learning_rate": 1.5856439577710886e-06, "loss": 0.39649405, "memory(GiB)": 34.88, "step": 111150, "train_speed(iter/s)": 0.410571 }, { "acc": 0.94870157, "epoch": 3.009639076164947, "grad_norm": 7.586237907409668, "learning_rate": 1.5852352842554522e-06, "loss": 0.27737145, "memory(GiB)": 34.88, "step": 111155, "train_speed(iter/s)": 0.410572 }, { "acc": 0.94793634, "epoch": 3.009774456448163, "grad_norm": 5.436235427856445, "learning_rate": 1.5848266535227174e-06, "loss": 0.32552862, "memory(GiB)": 34.88, "step": 111160, "train_speed(iter/s)": 0.410573 }, { "acc": 0.94555731, "epoch": 3.0099098367313784, "grad_norm": 5.297762393951416, "learning_rate": 1.5844180655780051e-06, "loss": 0.34311461, "memory(GiB)": 34.88, "step": 111165, "train_speed(iter/s)": 0.410574 }, { "acc": 0.93303442, "epoch": 3.010045217014594, "grad_norm": 6.297247409820557, "learning_rate": 1.58400952042643e-06, "loss": 0.45902929, "memory(GiB)": 34.88, "step": 111170, "train_speed(iter/s)": 0.410575 }, { "acc": 0.94147396, "epoch": 3.0101805972978095, "grad_norm": 7.192718505859375, "learning_rate": 1.5836010180731113e-06, "loss": 0.3710542, "memory(GiB)": 34.88, "step": 111175, "train_speed(iter/s)": 0.410576 }, { "acc": 0.95094681, "epoch": 3.0103159775810253, "grad_norm": 5.5130295753479, "learning_rate": 1.583192558523168e-06, "loss": 0.24822185, "memory(GiB)": 34.88, "step": 111180, "train_speed(iter/s)": 0.410577 }, { "acc": 0.94433441, "epoch": 3.0104513578642407, "grad_norm": 10.177637100219727, "learning_rate": 1.5827841417817137e-06, "loss": 0.33979363, "memory(GiB)": 34.88, "step": 111185, "train_speed(iter/s)": 0.410578 }, { "acc": 0.93984737, "epoch": 3.010586738147456, "grad_norm": 7.998037338256836, "learning_rate": 1.5823757678538666e-06, "loss": 0.34001379, "memory(GiB)": 34.88, "step": 111190, "train_speed(iter/s)": 0.410579 }, { "acc": 0.92874002, "epoch": 3.010722118430672, "grad_norm": 12.00409984588623, "learning_rate": 1.5819674367447424e-06, "loss": 0.39532647, "memory(GiB)": 34.88, "step": 111195, "train_speed(iter/s)": 0.41058 }, { "acc": 0.94343777, "epoch": 3.010857498713887, "grad_norm": 8.855137825012207, "learning_rate": 1.581559148459457e-06, "loss": 0.27243481, "memory(GiB)": 34.88, "step": 111200, "train_speed(iter/s)": 0.410581 }, { "acc": 0.92646008, "epoch": 3.010992878997103, "grad_norm": 7.920615196228027, "learning_rate": 1.5811509030031225e-06, "loss": 0.50273209, "memory(GiB)": 34.88, "step": 111205, "train_speed(iter/s)": 0.410582 }, { "acc": 0.92905521, "epoch": 3.0111282592803184, "grad_norm": 5.462869167327881, "learning_rate": 1.5807427003808549e-06, "loss": 0.35106616, "memory(GiB)": 34.88, "step": 111210, "train_speed(iter/s)": 0.410583 }, { "acc": 0.94913874, "epoch": 3.011263639563534, "grad_norm": 10.329078674316406, "learning_rate": 1.5803345405977663e-06, "loss": 0.27015138, "memory(GiB)": 34.88, "step": 111215, "train_speed(iter/s)": 0.410584 }, { "acc": 0.94606695, "epoch": 3.0113990198467495, "grad_norm": 6.532536506652832, "learning_rate": 1.5799264236589734e-06, "loss": 0.31444507, "memory(GiB)": 34.88, "step": 111220, "train_speed(iter/s)": 0.410585 }, { "acc": 0.93130713, "epoch": 3.011534400129965, "grad_norm": 13.712488174438477, "learning_rate": 1.5795183495695837e-06, "loss": 0.44166656, "memory(GiB)": 34.88, "step": 111225, "train_speed(iter/s)": 0.410586 }, { "acc": 0.9452939, "epoch": 3.0116697804131807, "grad_norm": 7.072145462036133, "learning_rate": 1.5791103183347114e-06, "loss": 0.30443087, "memory(GiB)": 34.88, "step": 111230, "train_speed(iter/s)": 0.410587 }, { "acc": 0.93612804, "epoch": 3.011805160696396, "grad_norm": 7.321054458618164, "learning_rate": 1.5787023299594701e-06, "loss": 0.35138302, "memory(GiB)": 34.88, "step": 111235, "train_speed(iter/s)": 0.410588 }, { "acc": 0.94677372, "epoch": 3.011940540979612, "grad_norm": 2.0260493755340576, "learning_rate": 1.5782943844489666e-06, "loss": 0.31184835, "memory(GiB)": 34.88, "step": 111240, "train_speed(iter/s)": 0.410589 }, { "acc": 0.93791857, "epoch": 3.012075921262827, "grad_norm": 6.033419609069824, "learning_rate": 1.5778864818083125e-06, "loss": 0.371421, "memory(GiB)": 34.88, "step": 111245, "train_speed(iter/s)": 0.41059 }, { "acc": 0.93123493, "epoch": 3.012211301546043, "grad_norm": 15.708596229553223, "learning_rate": 1.5774786220426186e-06, "loss": 0.39164915, "memory(GiB)": 34.88, "step": 111250, "train_speed(iter/s)": 0.410591 }, { "acc": 0.94611626, "epoch": 3.0123466818292584, "grad_norm": 4.564674377441406, "learning_rate": 1.5770708051569952e-06, "loss": 0.28443792, "memory(GiB)": 34.88, "step": 111255, "train_speed(iter/s)": 0.410592 }, { "acc": 0.93838453, "epoch": 3.012482062112474, "grad_norm": 17.74481964111328, "learning_rate": 1.5766630311565475e-06, "loss": 0.35731637, "memory(GiB)": 34.88, "step": 111260, "train_speed(iter/s)": 0.410593 }, { "acc": 0.91040916, "epoch": 3.0126174423956895, "grad_norm": 14.397043228149414, "learning_rate": 1.576255300046386e-06, "loss": 0.55185442, "memory(GiB)": 34.88, "step": 111265, "train_speed(iter/s)": 0.410594 }, { "acc": 0.9366807, "epoch": 3.012752822678905, "grad_norm": 9.963850021362305, "learning_rate": 1.5758476118316202e-06, "loss": 0.3713202, "memory(GiB)": 34.88, "step": 111270, "train_speed(iter/s)": 0.410595 }, { "acc": 0.93905392, "epoch": 3.0128882029621207, "grad_norm": 19.90505599975586, "learning_rate": 1.5754399665173522e-06, "loss": 0.38461583, "memory(GiB)": 34.88, "step": 111275, "train_speed(iter/s)": 0.410596 }, { "acc": 0.96224937, "epoch": 3.013023583245336, "grad_norm": 3.6647114753723145, "learning_rate": 1.5750323641086924e-06, "loss": 0.21676755, "memory(GiB)": 34.88, "step": 111280, "train_speed(iter/s)": 0.410597 }, { "acc": 0.94553299, "epoch": 3.013158963528552, "grad_norm": 7.938684940338135, "learning_rate": 1.5746248046107454e-06, "loss": 0.35382974, "memory(GiB)": 34.88, "step": 111285, "train_speed(iter/s)": 0.410598 }, { "acc": 0.94378624, "epoch": 3.013294343811767, "grad_norm": 10.368697166442871, "learning_rate": 1.5742172880286182e-06, "loss": 0.36829829, "memory(GiB)": 34.88, "step": 111290, "train_speed(iter/s)": 0.410599 }, { "acc": 0.93172083, "epoch": 3.013429724094983, "grad_norm": 8.253653526306152, "learning_rate": 1.5738098143674138e-06, "loss": 0.40473366, "memory(GiB)": 34.88, "step": 111295, "train_speed(iter/s)": 0.4106 }, { "acc": 0.9397337, "epoch": 3.0135651043781984, "grad_norm": 3.9778265953063965, "learning_rate": 1.5734023836322372e-06, "loss": 0.40281172, "memory(GiB)": 34.88, "step": 111300, "train_speed(iter/s)": 0.410601 }, { "acc": 0.93897905, "epoch": 3.0137004846614137, "grad_norm": 8.508708953857422, "learning_rate": 1.5729949958281936e-06, "loss": 0.35824137, "memory(GiB)": 34.88, "step": 111305, "train_speed(iter/s)": 0.410602 }, { "acc": 0.94667416, "epoch": 3.0138358649446295, "grad_norm": 6.113931655883789, "learning_rate": 1.5725876509603836e-06, "loss": 0.24697151, "memory(GiB)": 34.88, "step": 111310, "train_speed(iter/s)": 0.410603 }, { "acc": 0.95348034, "epoch": 3.013971245227845, "grad_norm": 6.3988823890686035, "learning_rate": 1.5721803490339122e-06, "loss": 0.27803106, "memory(GiB)": 34.88, "step": 111315, "train_speed(iter/s)": 0.410604 }, { "acc": 0.95168505, "epoch": 3.0141066255110607, "grad_norm": 3.2854199409484863, "learning_rate": 1.5717730900538805e-06, "loss": 0.21225007, "memory(GiB)": 34.88, "step": 111320, "train_speed(iter/s)": 0.410605 }, { "acc": 0.94591351, "epoch": 3.014242005794276, "grad_norm": 5.287873268127441, "learning_rate": 1.5713658740253925e-06, "loss": 0.28703542, "memory(GiB)": 34.88, "step": 111325, "train_speed(iter/s)": 0.410606 }, { "acc": 0.93557224, "epoch": 3.014377386077492, "grad_norm": 18.974498748779297, "learning_rate": 1.5709587009535465e-06, "loss": 0.33765075, "memory(GiB)": 34.88, "step": 111330, "train_speed(iter/s)": 0.410607 }, { "acc": 0.9428009, "epoch": 3.014512766360707, "grad_norm": 7.532454967498779, "learning_rate": 1.5705515708434438e-06, "loss": 0.31697674, "memory(GiB)": 34.88, "step": 111335, "train_speed(iter/s)": 0.410608 }, { "acc": 0.95445042, "epoch": 3.014648146643923, "grad_norm": 5.4169111251831055, "learning_rate": 1.5701444837001853e-06, "loss": 0.21838017, "memory(GiB)": 34.88, "step": 111340, "train_speed(iter/s)": 0.410609 }, { "acc": 0.94074659, "epoch": 3.0147835269271384, "grad_norm": 9.75269603729248, "learning_rate": 1.5697374395288716e-06, "loss": 0.36946952, "memory(GiB)": 34.88, "step": 111345, "train_speed(iter/s)": 0.41061 }, { "acc": 0.94184351, "epoch": 3.0149189072103537, "grad_norm": 7.99392557144165, "learning_rate": 1.5693304383345992e-06, "loss": 0.35906806, "memory(GiB)": 34.88, "step": 111350, "train_speed(iter/s)": 0.410611 }, { "acc": 0.9393549, "epoch": 3.0150542874935695, "grad_norm": 4.463257789611816, "learning_rate": 1.5689234801224678e-06, "loss": 0.28864927, "memory(GiB)": 34.88, "step": 111355, "train_speed(iter/s)": 0.410612 }, { "acc": 0.94646902, "epoch": 3.015189667776785, "grad_norm": 8.133099555969238, "learning_rate": 1.5685165648975765e-06, "loss": 0.35034227, "memory(GiB)": 34.88, "step": 111360, "train_speed(iter/s)": 0.410613 }, { "acc": 0.93530741, "epoch": 3.0153250480600007, "grad_norm": 6.223297595977783, "learning_rate": 1.5681096926650203e-06, "loss": 0.37284899, "memory(GiB)": 34.88, "step": 111365, "train_speed(iter/s)": 0.410614 }, { "acc": 0.94897022, "epoch": 3.015460428343216, "grad_norm": 9.642614364624023, "learning_rate": 1.5677028634298973e-06, "loss": 0.34357519, "memory(GiB)": 34.88, "step": 111370, "train_speed(iter/s)": 0.410614 }, { "acc": 0.94016247, "epoch": 3.015595808626432, "grad_norm": 19.112539291381836, "learning_rate": 1.5672960771973037e-06, "loss": 0.35375948, "memory(GiB)": 34.88, "step": 111375, "train_speed(iter/s)": 0.410615 }, { "acc": 0.94394665, "epoch": 3.015731188909647, "grad_norm": 4.441709041595459, "learning_rate": 1.5668893339723374e-06, "loss": 0.29204695, "memory(GiB)": 34.88, "step": 111380, "train_speed(iter/s)": 0.410616 }, { "acc": 0.93059731, "epoch": 3.0158665691928626, "grad_norm": 7.262806415557861, "learning_rate": 1.56648263376009e-06, "loss": 0.42810287, "memory(GiB)": 34.88, "step": 111385, "train_speed(iter/s)": 0.410617 }, { "acc": 0.94374104, "epoch": 3.0160019494760784, "grad_norm": 5.637604236602783, "learning_rate": 1.5660759765656577e-06, "loss": 0.34248693, "memory(GiB)": 34.88, "step": 111390, "train_speed(iter/s)": 0.410618 }, { "acc": 0.94133558, "epoch": 3.0161373297592937, "grad_norm": 5.228850364685059, "learning_rate": 1.5656693623941366e-06, "loss": 0.35208216, "memory(GiB)": 34.88, "step": 111395, "train_speed(iter/s)": 0.410619 }, { "acc": 0.92406883, "epoch": 3.0162727100425095, "grad_norm": 8.70010757446289, "learning_rate": 1.5652627912506168e-06, "loss": 0.48252935, "memory(GiB)": 34.88, "step": 111400, "train_speed(iter/s)": 0.41062 }, { "acc": 0.94765882, "epoch": 3.016408090325725, "grad_norm": 5.941638469696045, "learning_rate": 1.5648562631401942e-06, "loss": 0.272646, "memory(GiB)": 34.88, "step": 111405, "train_speed(iter/s)": 0.410621 }, { "acc": 0.93484011, "epoch": 3.0165434706089407, "grad_norm": 6.844160556793213, "learning_rate": 1.5644497780679595e-06, "loss": 0.39306056, "memory(GiB)": 34.88, "step": 111410, "train_speed(iter/s)": 0.410622 }, { "acc": 0.94545593, "epoch": 3.016678850892156, "grad_norm": 5.339404582977295, "learning_rate": 1.5640433360390076e-06, "loss": 0.2931078, "memory(GiB)": 34.88, "step": 111415, "train_speed(iter/s)": 0.410623 }, { "acc": 0.94638939, "epoch": 3.0168142311753714, "grad_norm": 6.204569339752197, "learning_rate": 1.5636369370584247e-06, "loss": 0.36255851, "memory(GiB)": 34.88, "step": 111420, "train_speed(iter/s)": 0.410624 }, { "acc": 0.94343233, "epoch": 3.016949611458587, "grad_norm": 7.430582523345947, "learning_rate": 1.5632305811313087e-06, "loss": 0.3192625, "memory(GiB)": 34.88, "step": 111425, "train_speed(iter/s)": 0.410625 }, { "acc": 0.93243599, "epoch": 3.0170849917418026, "grad_norm": 15.68230152130127, "learning_rate": 1.5628242682627465e-06, "loss": 0.4516058, "memory(GiB)": 34.88, "step": 111430, "train_speed(iter/s)": 0.410626 }, { "acc": 0.95831566, "epoch": 3.0172203720250184, "grad_norm": 5.05075216293335, "learning_rate": 1.5624179984578253e-06, "loss": 0.21724472, "memory(GiB)": 34.88, "step": 111435, "train_speed(iter/s)": 0.410627 }, { "acc": 0.94106359, "epoch": 3.0173557523082337, "grad_norm": 11.2290620803833, "learning_rate": 1.5620117717216374e-06, "loss": 0.38346436, "memory(GiB)": 34.88, "step": 111440, "train_speed(iter/s)": 0.410628 }, { "acc": 0.94304256, "epoch": 3.0174911325914495, "grad_norm": 13.98841667175293, "learning_rate": 1.5616055880592717e-06, "loss": 0.26565137, "memory(GiB)": 34.88, "step": 111445, "train_speed(iter/s)": 0.410628 }, { "acc": 0.93967018, "epoch": 3.017626512874665, "grad_norm": 5.2654290199279785, "learning_rate": 1.5611994474758171e-06, "loss": 0.40147371, "memory(GiB)": 34.88, "step": 111450, "train_speed(iter/s)": 0.410629 }, { "acc": 0.9349762, "epoch": 3.0177618931578807, "grad_norm": 5.667820930480957, "learning_rate": 1.560793349976357e-06, "loss": 0.38138556, "memory(GiB)": 34.88, "step": 111455, "train_speed(iter/s)": 0.410631 }, { "acc": 0.93027906, "epoch": 3.017897273441096, "grad_norm": 13.54802417755127, "learning_rate": 1.560387295565985e-06, "loss": 0.43385301, "memory(GiB)": 34.88, "step": 111460, "train_speed(iter/s)": 0.410631 }, { "acc": 0.9366787, "epoch": 3.0180326537243114, "grad_norm": 8.17491340637207, "learning_rate": 1.5599812842497824e-06, "loss": 0.4390028, "memory(GiB)": 34.88, "step": 111465, "train_speed(iter/s)": 0.410632 }, { "acc": 0.94437141, "epoch": 3.018168034007527, "grad_norm": 10.93575382232666, "learning_rate": 1.5595753160328394e-06, "loss": 0.36895065, "memory(GiB)": 34.88, "step": 111470, "train_speed(iter/s)": 0.410633 }, { "acc": 0.9383954, "epoch": 3.0183034142907426, "grad_norm": 11.913822174072266, "learning_rate": 1.559169390920238e-06, "loss": 0.37519624, "memory(GiB)": 34.88, "step": 111475, "train_speed(iter/s)": 0.410633 }, { "acc": 0.94658384, "epoch": 3.0184387945739584, "grad_norm": 4.210690498352051, "learning_rate": 1.5587635089170648e-06, "loss": 0.34275489, "memory(GiB)": 34.88, "step": 111480, "train_speed(iter/s)": 0.410634 }, { "acc": 0.94802818, "epoch": 3.0185741748571737, "grad_norm": 4.951550483703613, "learning_rate": 1.5583576700284046e-06, "loss": 0.32252493, "memory(GiB)": 34.88, "step": 111485, "train_speed(iter/s)": 0.410635 }, { "acc": 0.94215431, "epoch": 3.0187095551403895, "grad_norm": 5.023133754730225, "learning_rate": 1.557951874259339e-06, "loss": 0.34524875, "memory(GiB)": 34.88, "step": 111490, "train_speed(iter/s)": 0.410636 }, { "acc": 0.94206839, "epoch": 3.018844935423605, "grad_norm": 3.761251449584961, "learning_rate": 1.5575461216149557e-06, "loss": 0.28465199, "memory(GiB)": 34.88, "step": 111495, "train_speed(iter/s)": 0.410637 }, { "acc": 0.95220375, "epoch": 3.0189803157068202, "grad_norm": 4.821367263793945, "learning_rate": 1.5571404121003335e-06, "loss": 0.23672855, "memory(GiB)": 34.88, "step": 111500, "train_speed(iter/s)": 0.410639 }, { "acc": 0.92999821, "epoch": 3.019115695990036, "grad_norm": 8.438321113586426, "learning_rate": 1.5567347457205584e-06, "loss": 0.39432993, "memory(GiB)": 34.88, "step": 111505, "train_speed(iter/s)": 0.410639 }, { "acc": 0.93360901, "epoch": 3.0192510762732514, "grad_norm": 15.25818920135498, "learning_rate": 1.5563291224807067e-06, "loss": 0.36903138, "memory(GiB)": 34.88, "step": 111510, "train_speed(iter/s)": 0.41064 }, { "acc": 0.94378395, "epoch": 3.019386456556467, "grad_norm": 5.624917984008789, "learning_rate": 1.555923542385866e-06, "loss": 0.28644171, "memory(GiB)": 34.88, "step": 111515, "train_speed(iter/s)": 0.410641 }, { "acc": 0.9504302, "epoch": 3.0195218368396826, "grad_norm": 2.779287576675415, "learning_rate": 1.5555180054411142e-06, "loss": 0.2004993, "memory(GiB)": 34.88, "step": 111520, "train_speed(iter/s)": 0.410642 }, { "acc": 0.94653177, "epoch": 3.0196572171228984, "grad_norm": 16.478904724121094, "learning_rate": 1.5551125116515283e-06, "loss": 0.27611542, "memory(GiB)": 34.88, "step": 111525, "train_speed(iter/s)": 0.410643 }, { "acc": 0.92615051, "epoch": 3.0197925974061137, "grad_norm": 10.466776847839355, "learning_rate": 1.5547070610221938e-06, "loss": 0.41392174, "memory(GiB)": 34.88, "step": 111530, "train_speed(iter/s)": 0.410645 }, { "acc": 0.92967434, "epoch": 3.0199279776893295, "grad_norm": 18.359697341918945, "learning_rate": 1.554301653558185e-06, "loss": 0.44035816, "memory(GiB)": 34.88, "step": 111535, "train_speed(iter/s)": 0.410645 }, { "acc": 0.94586849, "epoch": 3.020063357972545, "grad_norm": 5.300137042999268, "learning_rate": 1.553896289264584e-06, "loss": 0.31714509, "memory(GiB)": 34.88, "step": 111540, "train_speed(iter/s)": 0.410646 }, { "acc": 0.94048195, "epoch": 3.0201987382557602, "grad_norm": 9.60272216796875, "learning_rate": 1.5534909681464642e-06, "loss": 0.29653163, "memory(GiB)": 34.88, "step": 111545, "train_speed(iter/s)": 0.410647 }, { "acc": 0.93502226, "epoch": 3.020334118538976, "grad_norm": 4.719212532043457, "learning_rate": 1.5530856902089084e-06, "loss": 0.36479278, "memory(GiB)": 34.88, "step": 111550, "train_speed(iter/s)": 0.410648 }, { "acc": 0.93696804, "epoch": 3.0204694988221914, "grad_norm": 8.43932056427002, "learning_rate": 1.5526804554569915e-06, "loss": 0.35751045, "memory(GiB)": 34.88, "step": 111555, "train_speed(iter/s)": 0.410649 }, { "acc": 0.95089245, "epoch": 3.020604879105407, "grad_norm": 8.053727149963379, "learning_rate": 1.5522752638957878e-06, "loss": 0.30333464, "memory(GiB)": 34.88, "step": 111560, "train_speed(iter/s)": 0.41065 }, { "acc": 0.94874353, "epoch": 3.0207402593886226, "grad_norm": 7.954862594604492, "learning_rate": 1.5518701155303748e-06, "loss": 0.39543157, "memory(GiB)": 34.88, "step": 111565, "train_speed(iter/s)": 0.41065 }, { "acc": 0.94251099, "epoch": 3.0208756396718384, "grad_norm": 14.860795021057129, "learning_rate": 1.5514650103658272e-06, "loss": 0.37523055, "memory(GiB)": 34.88, "step": 111570, "train_speed(iter/s)": 0.410651 }, { "acc": 0.93558111, "epoch": 3.0210110199550537, "grad_norm": 8.047825813293457, "learning_rate": 1.551059948407222e-06, "loss": 0.34707994, "memory(GiB)": 34.88, "step": 111575, "train_speed(iter/s)": 0.410652 }, { "acc": 0.94488945, "epoch": 3.021146400238269, "grad_norm": 10.3504056930542, "learning_rate": 1.5506549296596285e-06, "loss": 0.32774901, "memory(GiB)": 34.88, "step": 111580, "train_speed(iter/s)": 0.410653 }, { "acc": 0.94067326, "epoch": 3.021281780521485, "grad_norm": 5.3925604820251465, "learning_rate": 1.5502499541281264e-06, "loss": 0.3843292, "memory(GiB)": 34.88, "step": 111585, "train_speed(iter/s)": 0.410654 }, { "acc": 0.91727486, "epoch": 3.0214171608047002, "grad_norm": 11.175618171691895, "learning_rate": 1.5498450218177844e-06, "loss": 0.51092701, "memory(GiB)": 34.88, "step": 111590, "train_speed(iter/s)": 0.410655 }, { "acc": 0.93060989, "epoch": 3.021552541087916, "grad_norm": 6.99436092376709, "learning_rate": 1.549440132733678e-06, "loss": 0.3981672, "memory(GiB)": 34.88, "step": 111595, "train_speed(iter/s)": 0.410655 }, { "acc": 0.94163818, "epoch": 3.0216879213711314, "grad_norm": 7.768819808959961, "learning_rate": 1.5490352868808764e-06, "loss": 0.36751337, "memory(GiB)": 34.88, "step": 111600, "train_speed(iter/s)": 0.410657 }, { "acc": 0.93993511, "epoch": 3.021823301654347, "grad_norm": 13.856030464172363, "learning_rate": 1.5486304842644517e-06, "loss": 0.29314723, "memory(GiB)": 34.88, "step": 111605, "train_speed(iter/s)": 0.410658 }, { "acc": 0.92874212, "epoch": 3.0219586819375626, "grad_norm": 17.745075225830078, "learning_rate": 1.5482257248894784e-06, "loss": 0.44627442, "memory(GiB)": 34.88, "step": 111610, "train_speed(iter/s)": 0.410659 }, { "acc": 0.94686747, "epoch": 3.0220940622207784, "grad_norm": 3.1737921237945557, "learning_rate": 1.54782100876102e-06, "loss": 0.36894062, "memory(GiB)": 34.88, "step": 111615, "train_speed(iter/s)": 0.410659 }, { "acc": 0.95211353, "epoch": 3.0222294425039937, "grad_norm": 9.218480110168457, "learning_rate": 1.5474163358841538e-06, "loss": 0.2719717, "memory(GiB)": 34.88, "step": 111620, "train_speed(iter/s)": 0.41066 }, { "acc": 0.92481918, "epoch": 3.022364822787209, "grad_norm": 7.706552028656006, "learning_rate": 1.5470117062639444e-06, "loss": 0.38617628, "memory(GiB)": 34.88, "step": 111625, "train_speed(iter/s)": 0.410661 }, { "acc": 0.95071392, "epoch": 3.022500203070425, "grad_norm": 6.643819808959961, "learning_rate": 1.5466071199054632e-06, "loss": 0.37734179, "memory(GiB)": 34.88, "step": 111630, "train_speed(iter/s)": 0.410662 }, { "acc": 0.94561977, "epoch": 3.0226355833536402, "grad_norm": 15.137911796569824, "learning_rate": 1.5462025768137746e-06, "loss": 0.30529838, "memory(GiB)": 34.88, "step": 111635, "train_speed(iter/s)": 0.410663 }, { "acc": 0.94858093, "epoch": 3.022770963636856, "grad_norm": 5.244466781616211, "learning_rate": 1.5457980769939518e-06, "loss": 0.2773479, "memory(GiB)": 34.88, "step": 111640, "train_speed(iter/s)": 0.410664 }, { "acc": 0.95111427, "epoch": 3.0229063439200714, "grad_norm": 6.207235336303711, "learning_rate": 1.5453936204510592e-06, "loss": 0.29081993, "memory(GiB)": 34.88, "step": 111645, "train_speed(iter/s)": 0.410665 }, { "acc": 0.92836056, "epoch": 3.023041724203287, "grad_norm": 3.9968008995056152, "learning_rate": 1.5449892071901601e-06, "loss": 0.42833195, "memory(GiB)": 34.88, "step": 111650, "train_speed(iter/s)": 0.410666 }, { "acc": 0.93363171, "epoch": 3.0231771044865026, "grad_norm": 12.615424156188965, "learning_rate": 1.5445848372163267e-06, "loss": 0.33853183, "memory(GiB)": 34.88, "step": 111655, "train_speed(iter/s)": 0.410667 }, { "acc": 0.94121742, "epoch": 3.023312484769718, "grad_norm": 10.703736305236816, "learning_rate": 1.5441805105346197e-06, "loss": 0.39942877, "memory(GiB)": 34.88, "step": 111660, "train_speed(iter/s)": 0.410668 }, { "acc": 0.94828396, "epoch": 3.0234478650529337, "grad_norm": 17.259084701538086, "learning_rate": 1.543776227150108e-06, "loss": 0.29440627, "memory(GiB)": 34.88, "step": 111665, "train_speed(iter/s)": 0.410669 }, { "acc": 0.93988276, "epoch": 3.023583245336149, "grad_norm": 8.635140419006348, "learning_rate": 1.5433719870678508e-06, "loss": 0.30751655, "memory(GiB)": 34.88, "step": 111670, "train_speed(iter/s)": 0.41067 }, { "acc": 0.94373627, "epoch": 3.023718625619365, "grad_norm": 4.215952396392822, "learning_rate": 1.5429677902929183e-06, "loss": 0.3139153, "memory(GiB)": 34.88, "step": 111675, "train_speed(iter/s)": 0.410671 }, { "acc": 0.94148598, "epoch": 3.0238540059025802, "grad_norm": 5.438102722167969, "learning_rate": 1.5425636368303708e-06, "loss": 0.31408112, "memory(GiB)": 34.88, "step": 111680, "train_speed(iter/s)": 0.410671 }, { "acc": 0.93986206, "epoch": 3.023989386185796, "grad_norm": 11.053051948547363, "learning_rate": 1.5421595266852676e-06, "loss": 0.34029105, "memory(GiB)": 34.88, "step": 111685, "train_speed(iter/s)": 0.410672 }, { "acc": 0.94153032, "epoch": 3.0241247664690114, "grad_norm": 7.585350036621094, "learning_rate": 1.5417554598626782e-06, "loss": 0.29409053, "memory(GiB)": 34.88, "step": 111690, "train_speed(iter/s)": 0.410673 }, { "acc": 0.93924427, "epoch": 3.024260146752227, "grad_norm": 6.56079626083374, "learning_rate": 1.5413514363676589e-06, "loss": 0.33200605, "memory(GiB)": 34.88, "step": 111695, "train_speed(iter/s)": 0.410674 }, { "acc": 0.93881664, "epoch": 3.0243955270354426, "grad_norm": 12.411544799804688, "learning_rate": 1.540947456205274e-06, "loss": 0.35834517, "memory(GiB)": 34.88, "step": 111700, "train_speed(iter/s)": 0.410675 }, { "acc": 0.92665281, "epoch": 3.024530907318658, "grad_norm": 9.021032333374023, "learning_rate": 1.5405435193805796e-06, "loss": 0.43690138, "memory(GiB)": 34.88, "step": 111705, "train_speed(iter/s)": 0.410676 }, { "acc": 0.93271904, "epoch": 3.0246662876018737, "grad_norm": 8.452967643737793, "learning_rate": 1.5401396258986424e-06, "loss": 0.38787537, "memory(GiB)": 34.88, "step": 111710, "train_speed(iter/s)": 0.410677 }, { "acc": 0.93927603, "epoch": 3.024801667885089, "grad_norm": 6.9222002029418945, "learning_rate": 1.539735775764516e-06, "loss": 0.37182865, "memory(GiB)": 34.88, "step": 111715, "train_speed(iter/s)": 0.410678 }, { "acc": 0.92895784, "epoch": 3.024937048168305, "grad_norm": 6.329554557800293, "learning_rate": 1.5393319689832625e-06, "loss": 0.42778225, "memory(GiB)": 34.88, "step": 111720, "train_speed(iter/s)": 0.410679 }, { "acc": 0.94955912, "epoch": 3.0250724284515202, "grad_norm": 16.336898803710938, "learning_rate": 1.538928205559941e-06, "loss": 0.30403471, "memory(GiB)": 34.88, "step": 111725, "train_speed(iter/s)": 0.41068 }, { "acc": 0.94394999, "epoch": 3.025207808734736, "grad_norm": 3.3765079975128174, "learning_rate": 1.538524485499607e-06, "loss": 0.32649131, "memory(GiB)": 34.88, "step": 111730, "train_speed(iter/s)": 0.410681 }, { "acc": 0.95569439, "epoch": 3.0253431890179514, "grad_norm": 3.7360458374023438, "learning_rate": 1.5381208088073207e-06, "loss": 0.29757559, "memory(GiB)": 34.88, "step": 111735, "train_speed(iter/s)": 0.410682 }, { "acc": 0.94038181, "epoch": 3.0254785693011668, "grad_norm": 8.578707695007324, "learning_rate": 1.5377171754881343e-06, "loss": 0.28706236, "memory(GiB)": 34.88, "step": 111740, "train_speed(iter/s)": 0.410682 }, { "acc": 0.93870487, "epoch": 3.0256139495843826, "grad_norm": 13.214825630187988, "learning_rate": 1.5373135855471095e-06, "loss": 0.33265653, "memory(GiB)": 34.88, "step": 111745, "train_speed(iter/s)": 0.410683 }, { "acc": 0.93857746, "epoch": 3.025749329867598, "grad_norm": 6.536441326141357, "learning_rate": 1.5369100389892992e-06, "loss": 0.38428929, "memory(GiB)": 34.88, "step": 111750, "train_speed(iter/s)": 0.410684 }, { "acc": 0.95146227, "epoch": 3.0258847101508137, "grad_norm": 9.131722450256348, "learning_rate": 1.5365065358197586e-06, "loss": 0.24727786, "memory(GiB)": 34.88, "step": 111755, "train_speed(iter/s)": 0.410685 }, { "acc": 0.94217453, "epoch": 3.026020090434029, "grad_norm": 5.863286018371582, "learning_rate": 1.5361030760435425e-06, "loss": 0.3076189, "memory(GiB)": 34.88, "step": 111760, "train_speed(iter/s)": 0.410686 }, { "acc": 0.94446039, "epoch": 3.026155470717245, "grad_norm": 4.557762622833252, "learning_rate": 1.5356996596657077e-06, "loss": 0.3527411, "memory(GiB)": 34.88, "step": 111765, "train_speed(iter/s)": 0.410687 }, { "acc": 0.92309513, "epoch": 3.0262908510004602, "grad_norm": 21.82975959777832, "learning_rate": 1.535296286691305e-06, "loss": 0.44097853, "memory(GiB)": 34.88, "step": 111770, "train_speed(iter/s)": 0.410688 }, { "acc": 0.93135405, "epoch": 3.026426231283676, "grad_norm": 13.827828407287598, "learning_rate": 1.5348929571253845e-06, "loss": 0.38930264, "memory(GiB)": 34.88, "step": 111775, "train_speed(iter/s)": 0.410689 }, { "acc": 0.93757639, "epoch": 3.0265616115668914, "grad_norm": 5.332611083984375, "learning_rate": 1.534489670973006e-06, "loss": 0.35957117, "memory(GiB)": 34.88, "step": 111780, "train_speed(iter/s)": 0.41069 }, { "acc": 0.94792519, "epoch": 3.0266969918501068, "grad_norm": 4.570611476898193, "learning_rate": 1.5340864282392152e-06, "loss": 0.2585556, "memory(GiB)": 34.88, "step": 111785, "train_speed(iter/s)": 0.410691 }, { "acc": 0.93382931, "epoch": 3.0268323721333226, "grad_norm": 3.598311185836792, "learning_rate": 1.5336832289290666e-06, "loss": 0.35859005, "memory(GiB)": 34.88, "step": 111790, "train_speed(iter/s)": 0.410692 }, { "acc": 0.93032007, "epoch": 3.026967752416538, "grad_norm": 9.127425193786621, "learning_rate": 1.5332800730476096e-06, "loss": 0.44132919, "memory(GiB)": 34.88, "step": 111795, "train_speed(iter/s)": 0.410693 }, { "acc": 0.93410006, "epoch": 3.0271031326997537, "grad_norm": 10.418538093566895, "learning_rate": 1.5328769605998976e-06, "loss": 0.3966012, "memory(GiB)": 34.88, "step": 111800, "train_speed(iter/s)": 0.410694 }, { "acc": 0.94311638, "epoch": 3.027238512982969, "grad_norm": 6.417623043060303, "learning_rate": 1.5324738915909776e-06, "loss": 0.35051782, "memory(GiB)": 34.88, "step": 111805, "train_speed(iter/s)": 0.410695 }, { "acc": 0.9535099, "epoch": 3.027373893266185, "grad_norm": 5.7463202476501465, "learning_rate": 1.5320708660258962e-06, "loss": 0.2392015, "memory(GiB)": 34.88, "step": 111810, "train_speed(iter/s)": 0.410696 }, { "acc": 0.94738865, "epoch": 3.0275092735494002, "grad_norm": 4.291506767272949, "learning_rate": 1.5316678839097083e-06, "loss": 0.27343915, "memory(GiB)": 34.88, "step": 111815, "train_speed(iter/s)": 0.410697 }, { "acc": 0.95217924, "epoch": 3.0276446538326156, "grad_norm": 14.130000114440918, "learning_rate": 1.5312649452474573e-06, "loss": 0.28427465, "memory(GiB)": 34.88, "step": 111820, "train_speed(iter/s)": 0.410698 }, { "acc": 0.9410511, "epoch": 3.0277800341158314, "grad_norm": 9.864712715148926, "learning_rate": 1.5308620500441923e-06, "loss": 0.32836397, "memory(GiB)": 34.88, "step": 111825, "train_speed(iter/s)": 0.410699 }, { "acc": 0.93952723, "epoch": 3.0279154143990468, "grad_norm": 18.596040725708008, "learning_rate": 1.5304591983049607e-06, "loss": 0.34070649, "memory(GiB)": 34.88, "step": 111830, "train_speed(iter/s)": 0.4107 }, { "acc": 0.92072639, "epoch": 3.0280507946822626, "grad_norm": 7.889705181121826, "learning_rate": 1.53005639003481e-06, "loss": 0.43102341, "memory(GiB)": 34.88, "step": 111835, "train_speed(iter/s)": 0.410701 }, { "acc": 0.94222336, "epoch": 3.028186174965478, "grad_norm": 13.332195281982422, "learning_rate": 1.5296536252387838e-06, "loss": 0.30124073, "memory(GiB)": 34.88, "step": 111840, "train_speed(iter/s)": 0.410702 }, { "acc": 0.94763412, "epoch": 3.0283215552486937, "grad_norm": 2.500810384750366, "learning_rate": 1.5292509039219287e-06, "loss": 0.30649581, "memory(GiB)": 34.88, "step": 111845, "train_speed(iter/s)": 0.410703 }, { "acc": 0.93442593, "epoch": 3.028456935531909, "grad_norm": 7.128863334655762, "learning_rate": 1.5288482260892907e-06, "loss": 0.43245745, "memory(GiB)": 34.88, "step": 111850, "train_speed(iter/s)": 0.410704 }, { "acc": 0.94967976, "epoch": 3.028592315815125, "grad_norm": 13.260031700134277, "learning_rate": 1.5284455917459111e-06, "loss": 0.26031127, "memory(GiB)": 34.88, "step": 111855, "train_speed(iter/s)": 0.410705 }, { "acc": 0.92806969, "epoch": 3.0287276960983402, "grad_norm": 6.089513301849365, "learning_rate": 1.528043000896836e-06, "loss": 0.49868627, "memory(GiB)": 34.88, "step": 111860, "train_speed(iter/s)": 0.410706 }, { "acc": 0.938381, "epoch": 3.0288630763815556, "grad_norm": 7.55923318862915, "learning_rate": 1.5276404535471084e-06, "loss": 0.38051615, "memory(GiB)": 34.88, "step": 111865, "train_speed(iter/s)": 0.410707 }, { "acc": 0.93018703, "epoch": 3.0289984566647714, "grad_norm": 10.94311237335205, "learning_rate": 1.5272379497017718e-06, "loss": 0.40630484, "memory(GiB)": 34.88, "step": 111870, "train_speed(iter/s)": 0.410708 }, { "acc": 0.95254135, "epoch": 3.0291338369479868, "grad_norm": 6.603788375854492, "learning_rate": 1.526835489365866e-06, "loss": 0.279075, "memory(GiB)": 34.88, "step": 111875, "train_speed(iter/s)": 0.410709 }, { "acc": 0.9414896, "epoch": 3.0292692172312026, "grad_norm": 6.657074451446533, "learning_rate": 1.5264330725444343e-06, "loss": 0.37529368, "memory(GiB)": 34.88, "step": 111880, "train_speed(iter/s)": 0.41071 }, { "acc": 0.94000597, "epoch": 3.029404597514418, "grad_norm": 5.409117221832275, "learning_rate": 1.526030699242517e-06, "loss": 0.3446497, "memory(GiB)": 34.88, "step": 111885, "train_speed(iter/s)": 0.410711 }, { "acc": 0.94010401, "epoch": 3.0295399777976337, "grad_norm": 6.769021034240723, "learning_rate": 1.5256283694651563e-06, "loss": 0.36143627, "memory(GiB)": 34.88, "step": 111890, "train_speed(iter/s)": 0.410712 }, { "acc": 0.94718809, "epoch": 3.029675358080849, "grad_norm": 4.1873016357421875, "learning_rate": 1.5252260832173896e-06, "loss": 0.2831346, "memory(GiB)": 34.88, "step": 111895, "train_speed(iter/s)": 0.410713 }, { "acc": 0.94829731, "epoch": 3.0298107383640644, "grad_norm": 6.4953837394714355, "learning_rate": 1.524823840504258e-06, "loss": 0.32660632, "memory(GiB)": 34.88, "step": 111900, "train_speed(iter/s)": 0.410714 }, { "acc": 0.94103556, "epoch": 3.0299461186472803, "grad_norm": 13.095005989074707, "learning_rate": 1.524421641330801e-06, "loss": 0.374018, "memory(GiB)": 34.88, "step": 111905, "train_speed(iter/s)": 0.410715 }, { "acc": 0.94482126, "epoch": 3.0300814989304956, "grad_norm": 9.84396743774414, "learning_rate": 1.5240194857020544e-06, "loss": 0.2978188, "memory(GiB)": 34.88, "step": 111910, "train_speed(iter/s)": 0.410716 }, { "acc": 0.94417114, "epoch": 3.0302168792137114, "grad_norm": 7.141833305358887, "learning_rate": 1.5236173736230579e-06, "loss": 0.31074677, "memory(GiB)": 34.88, "step": 111915, "train_speed(iter/s)": 0.410717 }, { "acc": 0.93841572, "epoch": 3.0303522594969268, "grad_norm": 11.749130249023438, "learning_rate": 1.5232153050988483e-06, "loss": 0.3870841, "memory(GiB)": 34.88, "step": 111920, "train_speed(iter/s)": 0.410718 }, { "acc": 0.95006752, "epoch": 3.0304876397801426, "grad_norm": 7.8036112785339355, "learning_rate": 1.5228132801344637e-06, "loss": 0.30013947, "memory(GiB)": 34.88, "step": 111925, "train_speed(iter/s)": 0.410718 }, { "acc": 0.94670486, "epoch": 3.030623020063358, "grad_norm": 4.039303302764893, "learning_rate": 1.5224112987349387e-06, "loss": 0.29875612, "memory(GiB)": 34.88, "step": 111930, "train_speed(iter/s)": 0.410719 }, { "acc": 0.933988, "epoch": 3.0307584003465737, "grad_norm": 4.4425272941589355, "learning_rate": 1.5220093609053061e-06, "loss": 0.37364347, "memory(GiB)": 34.88, "step": 111935, "train_speed(iter/s)": 0.41072 }, { "acc": 0.93575134, "epoch": 3.030893780629789, "grad_norm": 20.615806579589844, "learning_rate": 1.5216074666506076e-06, "loss": 0.36947794, "memory(GiB)": 34.88, "step": 111940, "train_speed(iter/s)": 0.410721 }, { "acc": 0.94134159, "epoch": 3.0310291609130044, "grad_norm": 7.851988792419434, "learning_rate": 1.521205615975872e-06, "loss": 0.31867747, "memory(GiB)": 34.88, "step": 111945, "train_speed(iter/s)": 0.410722 }, { "acc": 0.93814697, "epoch": 3.0311645411962203, "grad_norm": 2.411186456680298, "learning_rate": 1.5208038088861358e-06, "loss": 0.36103568, "memory(GiB)": 34.88, "step": 111950, "train_speed(iter/s)": 0.410723 }, { "acc": 0.94638119, "epoch": 3.0312999214794356, "grad_norm": 5.904223442077637, "learning_rate": 1.5204020453864312e-06, "loss": 0.28975532, "memory(GiB)": 34.88, "step": 111955, "train_speed(iter/s)": 0.410724 }, { "acc": 0.94856386, "epoch": 3.0314353017626514, "grad_norm": 10.756706237792969, "learning_rate": 1.5200003254817943e-06, "loss": 0.32707393, "memory(GiB)": 34.88, "step": 111960, "train_speed(iter/s)": 0.410725 }, { "acc": 0.95319567, "epoch": 3.0315706820458668, "grad_norm": 5.571157932281494, "learning_rate": 1.5195986491772532e-06, "loss": 0.29490771, "memory(GiB)": 34.88, "step": 111965, "train_speed(iter/s)": 0.410726 }, { "acc": 0.93866234, "epoch": 3.0317060623290826, "grad_norm": 6.223231315612793, "learning_rate": 1.5191970164778411e-06, "loss": 0.35779448, "memory(GiB)": 34.88, "step": 111970, "train_speed(iter/s)": 0.410727 }, { "acc": 0.93121939, "epoch": 3.031841442612298, "grad_norm": 4.9389729499816895, "learning_rate": 1.5187954273885916e-06, "loss": 0.42343321, "memory(GiB)": 34.88, "step": 111975, "train_speed(iter/s)": 0.410728 }, { "acc": 0.94668713, "epoch": 3.0319768228955133, "grad_norm": 5.027649879455566, "learning_rate": 1.5183938819145314e-06, "loss": 0.34295094, "memory(GiB)": 34.88, "step": 111980, "train_speed(iter/s)": 0.410729 }, { "acc": 0.94341841, "epoch": 3.032112203178729, "grad_norm": 5.819934368133545, "learning_rate": 1.5179923800606927e-06, "loss": 0.28580952, "memory(GiB)": 34.88, "step": 111985, "train_speed(iter/s)": 0.41073 }, { "acc": 0.9475523, "epoch": 3.0322475834619445, "grad_norm": 9.42902660369873, "learning_rate": 1.5175909218321056e-06, "loss": 0.2801697, "memory(GiB)": 34.88, "step": 111990, "train_speed(iter/s)": 0.410731 }, { "acc": 0.95683413, "epoch": 3.0323829637451603, "grad_norm": 6.143803119659424, "learning_rate": 1.5171895072337997e-06, "loss": 0.17276766, "memory(GiB)": 34.88, "step": 111995, "train_speed(iter/s)": 0.410732 }, { "acc": 0.95626812, "epoch": 3.0325183440283756, "grad_norm": 6.561227798461914, "learning_rate": 1.5167881362708013e-06, "loss": 0.2638387, "memory(GiB)": 34.88, "step": 112000, "train_speed(iter/s)": 0.410733 }, { "acc": 0.94096355, "epoch": 3.0326537243115914, "grad_norm": 10.064831733703613, "learning_rate": 1.5163868089481392e-06, "loss": 0.36661611, "memory(GiB)": 34.88, "step": 112005, "train_speed(iter/s)": 0.410734 }, { "acc": 0.95426121, "epoch": 3.0327891045948068, "grad_norm": 6.605043411254883, "learning_rate": 1.5159855252708408e-06, "loss": 0.21934185, "memory(GiB)": 34.88, "step": 112010, "train_speed(iter/s)": 0.410735 }, { "acc": 0.95742941, "epoch": 3.0329244848780226, "grad_norm": 6.093297958374023, "learning_rate": 1.5155842852439353e-06, "loss": 0.23459332, "memory(GiB)": 34.88, "step": 112015, "train_speed(iter/s)": 0.410736 }, { "acc": 0.95927277, "epoch": 3.033059865161238, "grad_norm": 6.812227725982666, "learning_rate": 1.515183088872445e-06, "loss": 0.21549587, "memory(GiB)": 34.88, "step": 112020, "train_speed(iter/s)": 0.410737 }, { "acc": 0.9451231, "epoch": 3.0331952454444533, "grad_norm": 3.336775779724121, "learning_rate": 1.514781936161398e-06, "loss": 0.28224607, "memory(GiB)": 34.88, "step": 112025, "train_speed(iter/s)": 0.410738 }, { "acc": 0.94306583, "epoch": 3.033330625727669, "grad_norm": 6.180850982666016, "learning_rate": 1.5143808271158204e-06, "loss": 0.34439788, "memory(GiB)": 34.88, "step": 112030, "train_speed(iter/s)": 0.410739 }, { "acc": 0.93732157, "epoch": 3.0334660060108845, "grad_norm": 6.911907196044922, "learning_rate": 1.5139797617407343e-06, "loss": 0.33349395, "memory(GiB)": 34.88, "step": 112035, "train_speed(iter/s)": 0.41074 }, { "acc": 0.93921223, "epoch": 3.0336013862941003, "grad_norm": 8.884037971496582, "learning_rate": 1.513578740041165e-06, "loss": 0.34169919, "memory(GiB)": 34.88, "step": 112040, "train_speed(iter/s)": 0.410741 }, { "acc": 0.95627251, "epoch": 3.0337367665773156, "grad_norm": 7.937986850738525, "learning_rate": 1.513177762022137e-06, "loss": 0.23654771, "memory(GiB)": 34.88, "step": 112045, "train_speed(iter/s)": 0.410742 }, { "acc": 0.94111853, "epoch": 3.0338721468605314, "grad_norm": 8.463716506958008, "learning_rate": 1.5127768276886732e-06, "loss": 0.38518658, "memory(GiB)": 34.88, "step": 112050, "train_speed(iter/s)": 0.410743 }, { "acc": 0.93520737, "epoch": 3.0340075271437468, "grad_norm": 2.482924222946167, "learning_rate": 1.512375937045793e-06, "loss": 0.36750975, "memory(GiB)": 34.88, "step": 112055, "train_speed(iter/s)": 0.410744 }, { "acc": 0.9476944, "epoch": 3.034142907426962, "grad_norm": 4.164012432098389, "learning_rate": 1.5119750900985236e-06, "loss": 0.30609455, "memory(GiB)": 34.88, "step": 112060, "train_speed(iter/s)": 0.410745 }, { "acc": 0.93408012, "epoch": 3.034278287710178, "grad_norm": 8.683207511901855, "learning_rate": 1.5115742868518834e-06, "loss": 0.40397868, "memory(GiB)": 34.88, "step": 112065, "train_speed(iter/s)": 0.410746 }, { "acc": 0.94838791, "epoch": 3.0344136679933933, "grad_norm": 4.4472336769104, "learning_rate": 1.5111735273108925e-06, "loss": 0.30749378, "memory(GiB)": 34.88, "step": 112070, "train_speed(iter/s)": 0.410747 }, { "acc": 0.93820829, "epoch": 3.034549048276609, "grad_norm": 4.987944602966309, "learning_rate": 1.510772811480572e-06, "loss": 0.33323145, "memory(GiB)": 34.88, "step": 112075, "train_speed(iter/s)": 0.410748 }, { "acc": 0.943046, "epoch": 3.0346844285598245, "grad_norm": 9.958589553833008, "learning_rate": 1.5103721393659415e-06, "loss": 0.32586052, "memory(GiB)": 34.88, "step": 112080, "train_speed(iter/s)": 0.410749 }, { "acc": 0.94423685, "epoch": 3.0348198088430403, "grad_norm": 7.972431659698486, "learning_rate": 1.5099715109720225e-06, "loss": 0.30376754, "memory(GiB)": 34.88, "step": 112085, "train_speed(iter/s)": 0.41075 }, { "acc": 0.92218513, "epoch": 3.0349551891262556, "grad_norm": 17.397293090820312, "learning_rate": 1.509570926303829e-06, "loss": 0.52385845, "memory(GiB)": 34.88, "step": 112090, "train_speed(iter/s)": 0.410751 }, { "acc": 0.94831038, "epoch": 3.0350905694094714, "grad_norm": 3.611339569091797, "learning_rate": 1.5091703853663838e-06, "loss": 0.32018485, "memory(GiB)": 34.88, "step": 112095, "train_speed(iter/s)": 0.410752 }, { "acc": 0.93218269, "epoch": 3.0352259496926868, "grad_norm": 10.630322456359863, "learning_rate": 1.5087698881647033e-06, "loss": 0.40764761, "memory(GiB)": 34.88, "step": 112100, "train_speed(iter/s)": 0.410753 }, { "acc": 0.93826666, "epoch": 3.035361329975902, "grad_norm": 5.854145526885986, "learning_rate": 1.508369434703802e-06, "loss": 0.34287395, "memory(GiB)": 34.88, "step": 112105, "train_speed(iter/s)": 0.410754 }, { "acc": 0.93863964, "epoch": 3.035496710259118, "grad_norm": 7.385596752166748, "learning_rate": 1.5079690249886978e-06, "loss": 0.4444787, "memory(GiB)": 34.88, "step": 112110, "train_speed(iter/s)": 0.410754 }, { "acc": 0.92859058, "epoch": 3.0356320905423333, "grad_norm": 9.946745872497559, "learning_rate": 1.5075686590244071e-06, "loss": 0.4390594, "memory(GiB)": 34.88, "step": 112115, "train_speed(iter/s)": 0.410756 }, { "acc": 0.93720226, "epoch": 3.035767470825549, "grad_norm": 9.112297058105469, "learning_rate": 1.5071683368159467e-06, "loss": 0.38472061, "memory(GiB)": 34.88, "step": 112120, "train_speed(iter/s)": 0.410757 }, { "acc": 0.94525547, "epoch": 3.0359028511087645, "grad_norm": 10.529064178466797, "learning_rate": 1.5067680583683283e-06, "loss": 0.25949385, "memory(GiB)": 34.88, "step": 112125, "train_speed(iter/s)": 0.410758 }, { "acc": 0.92003784, "epoch": 3.0360382313919803, "grad_norm": 4.311827659606934, "learning_rate": 1.5063678236865672e-06, "loss": 0.42511053, "memory(GiB)": 34.88, "step": 112130, "train_speed(iter/s)": 0.410759 }, { "acc": 0.95151329, "epoch": 3.0361736116751956, "grad_norm": 2.873866081237793, "learning_rate": 1.5059676327756778e-06, "loss": 0.32725415, "memory(GiB)": 34.88, "step": 112135, "train_speed(iter/s)": 0.41076 }, { "acc": 0.95230131, "epoch": 3.036308991958411, "grad_norm": 2.977468967437744, "learning_rate": 1.505567485640674e-06, "loss": 0.20255055, "memory(GiB)": 34.88, "step": 112140, "train_speed(iter/s)": 0.410761 }, { "acc": 0.9327507, "epoch": 3.0364443722416268, "grad_norm": 8.22591495513916, "learning_rate": 1.5051673822865668e-06, "loss": 0.36892653, "memory(GiB)": 34.88, "step": 112145, "train_speed(iter/s)": 0.410762 }, { "acc": 0.93168268, "epoch": 3.036579752524842, "grad_norm": 5.142805576324463, "learning_rate": 1.5047673227183685e-06, "loss": 0.38653812, "memory(GiB)": 34.88, "step": 112150, "train_speed(iter/s)": 0.410762 }, { "acc": 0.93923988, "epoch": 3.036715132808058, "grad_norm": 7.229573726654053, "learning_rate": 1.504367306941093e-06, "loss": 0.34406826, "memory(GiB)": 34.88, "step": 112155, "train_speed(iter/s)": 0.410764 }, { "acc": 0.94373798, "epoch": 3.0368505130912733, "grad_norm": 24.545589447021484, "learning_rate": 1.5039673349597474e-06, "loss": 0.3987365, "memory(GiB)": 34.88, "step": 112160, "train_speed(iter/s)": 0.410765 }, { "acc": 0.93439064, "epoch": 3.036985893374489, "grad_norm": 9.461416244506836, "learning_rate": 1.5035674067793442e-06, "loss": 0.38157959, "memory(GiB)": 34.88, "step": 112165, "train_speed(iter/s)": 0.410766 }, { "acc": 0.95141182, "epoch": 3.0371212736577045, "grad_norm": 4.917668342590332, "learning_rate": 1.5031675224048938e-06, "loss": 0.30521505, "memory(GiB)": 34.88, "step": 112170, "train_speed(iter/s)": 0.410767 }, { "acc": 0.93360252, "epoch": 3.0372566539409203, "grad_norm": 10.915669441223145, "learning_rate": 1.502767681841406e-06, "loss": 0.40517416, "memory(GiB)": 34.88, "step": 112175, "train_speed(iter/s)": 0.410768 }, { "acc": 0.94314632, "epoch": 3.0373920342241356, "grad_norm": 14.77368450164795, "learning_rate": 1.5023678850938852e-06, "loss": 0.37043574, "memory(GiB)": 34.88, "step": 112180, "train_speed(iter/s)": 0.410769 }, { "acc": 0.94774122, "epoch": 3.037527414507351, "grad_norm": 5.196448802947998, "learning_rate": 1.5019681321673462e-06, "loss": 0.30642209, "memory(GiB)": 34.88, "step": 112185, "train_speed(iter/s)": 0.41077 }, { "acc": 0.93327732, "epoch": 3.037662794790567, "grad_norm": 4.674039840698242, "learning_rate": 1.5015684230667935e-06, "loss": 0.3644165, "memory(GiB)": 34.88, "step": 112190, "train_speed(iter/s)": 0.410771 }, { "acc": 0.94014959, "epoch": 3.037798175073782, "grad_norm": 12.066947937011719, "learning_rate": 1.5011687577972323e-06, "loss": 0.39800537, "memory(GiB)": 34.88, "step": 112195, "train_speed(iter/s)": 0.410772 }, { "acc": 0.93618078, "epoch": 3.037933555356998, "grad_norm": 10.508185386657715, "learning_rate": 1.5007691363636714e-06, "loss": 0.35296693, "memory(GiB)": 34.88, "step": 112200, "train_speed(iter/s)": 0.410773 }, { "acc": 0.93557549, "epoch": 3.0380689356402133, "grad_norm": 12.492157936096191, "learning_rate": 1.5003695587711165e-06, "loss": 0.43498735, "memory(GiB)": 34.88, "step": 112205, "train_speed(iter/s)": 0.410773 }, { "acc": 0.93563423, "epoch": 3.038204315923429, "grad_norm": 8.032963752746582, "learning_rate": 1.499970025024575e-06, "loss": 0.33137941, "memory(GiB)": 34.88, "step": 112210, "train_speed(iter/s)": 0.410774 }, { "acc": 0.93181019, "epoch": 3.0383396962066445, "grad_norm": 6.277591705322266, "learning_rate": 1.4995705351290465e-06, "loss": 0.39872794, "memory(GiB)": 34.88, "step": 112215, "train_speed(iter/s)": 0.410775 }, { "acc": 0.93641529, "epoch": 3.03847507648986, "grad_norm": 6.048477649688721, "learning_rate": 1.499171089089542e-06, "loss": 0.36406946, "memory(GiB)": 34.88, "step": 112220, "train_speed(iter/s)": 0.410776 }, { "acc": 0.94236193, "epoch": 3.0386104567730756, "grad_norm": 7.4900712966918945, "learning_rate": 1.4987716869110618e-06, "loss": 0.32628517, "memory(GiB)": 34.88, "step": 112225, "train_speed(iter/s)": 0.410777 }, { "acc": 0.94287701, "epoch": 3.038745837056291, "grad_norm": 5.423055648803711, "learning_rate": 1.4983723285986091e-06, "loss": 0.29622488, "memory(GiB)": 34.88, "step": 112230, "train_speed(iter/s)": 0.410778 }, { "acc": 0.94247665, "epoch": 3.038881217339507, "grad_norm": 3.869662046432495, "learning_rate": 1.4979730141571863e-06, "loss": 0.35195494, "memory(GiB)": 34.88, "step": 112235, "train_speed(iter/s)": 0.410779 }, { "acc": 0.9332634, "epoch": 3.039016597622722, "grad_norm": 6.5808329582214355, "learning_rate": 1.4975737435917965e-06, "loss": 0.40218725, "memory(GiB)": 34.88, "step": 112240, "train_speed(iter/s)": 0.41078 }, { "acc": 0.93762379, "epoch": 3.039151977905938, "grad_norm": 4.062489986419678, "learning_rate": 1.4971745169074433e-06, "loss": 0.47822237, "memory(GiB)": 34.88, "step": 112245, "train_speed(iter/s)": 0.410781 }, { "acc": 0.94797668, "epoch": 3.0392873581891533, "grad_norm": 4.354601860046387, "learning_rate": 1.4967753341091227e-06, "loss": 0.3493984, "memory(GiB)": 34.88, "step": 112250, "train_speed(iter/s)": 0.410782 }, { "acc": 0.94779701, "epoch": 3.039422738472369, "grad_norm": 10.901705741882324, "learning_rate": 1.4963761952018405e-06, "loss": 0.33309796, "memory(GiB)": 34.88, "step": 112255, "train_speed(iter/s)": 0.410783 }, { "acc": 0.93451614, "epoch": 3.0395581187555845, "grad_norm": 8.18992805480957, "learning_rate": 1.495977100190594e-06, "loss": 0.38469942, "memory(GiB)": 34.88, "step": 112260, "train_speed(iter/s)": 0.410784 }, { "acc": 0.9444643, "epoch": 3.0396934990388, "grad_norm": 5.977258682250977, "learning_rate": 1.4955780490803845e-06, "loss": 0.33578534, "memory(GiB)": 34.88, "step": 112265, "train_speed(iter/s)": 0.410785 }, { "acc": 0.92230339, "epoch": 3.0398288793220156, "grad_norm": 33.37701416015625, "learning_rate": 1.4951790418762077e-06, "loss": 0.46581421, "memory(GiB)": 34.88, "step": 112270, "train_speed(iter/s)": 0.410786 }, { "acc": 0.93526258, "epoch": 3.039964259605231, "grad_norm": 10.0567045211792, "learning_rate": 1.494780078583064e-06, "loss": 0.40026503, "memory(GiB)": 34.88, "step": 112275, "train_speed(iter/s)": 0.410787 }, { "acc": 0.94245338, "epoch": 3.040099639888447, "grad_norm": 4.379419326782227, "learning_rate": 1.4943811592059529e-06, "loss": 0.38853159, "memory(GiB)": 34.88, "step": 112280, "train_speed(iter/s)": 0.410788 }, { "acc": 0.93267879, "epoch": 3.040235020171662, "grad_norm": 9.609929084777832, "learning_rate": 1.4939822837498666e-06, "loss": 0.392378, "memory(GiB)": 34.88, "step": 112285, "train_speed(iter/s)": 0.410789 }, { "acc": 0.93354053, "epoch": 3.040370400454878, "grad_norm": 12.293591499328613, "learning_rate": 1.4935834522198081e-06, "loss": 0.41825242, "memory(GiB)": 34.88, "step": 112290, "train_speed(iter/s)": 0.41079 }, { "acc": 0.93094339, "epoch": 3.0405057807380933, "grad_norm": 7.323278427124023, "learning_rate": 1.4931846646207684e-06, "loss": 0.36577592, "memory(GiB)": 34.88, "step": 112295, "train_speed(iter/s)": 0.410791 }, { "acc": 0.94307652, "epoch": 3.0406411610213087, "grad_norm": 7.879047393798828, "learning_rate": 1.492785920957747e-06, "loss": 0.36961741, "memory(GiB)": 34.88, "step": 112300, "train_speed(iter/s)": 0.410792 }, { "acc": 0.93196487, "epoch": 3.0407765413045245, "grad_norm": 8.2457857131958, "learning_rate": 1.492387221235734e-06, "loss": 0.40723653, "memory(GiB)": 34.88, "step": 112305, "train_speed(iter/s)": 0.410793 }, { "acc": 0.93883629, "epoch": 3.04091192158774, "grad_norm": 13.55976390838623, "learning_rate": 1.4919885654597305e-06, "loss": 0.34735131, "memory(GiB)": 34.88, "step": 112310, "train_speed(iter/s)": 0.410794 }, { "acc": 0.93220253, "epoch": 3.0410473018709556, "grad_norm": 18.264707565307617, "learning_rate": 1.4915899536347266e-06, "loss": 0.46543608, "memory(GiB)": 34.88, "step": 112315, "train_speed(iter/s)": 0.410796 }, { "acc": 0.9430541, "epoch": 3.041182682154171, "grad_norm": 7.72118616104126, "learning_rate": 1.491191385765713e-06, "loss": 0.29462152, "memory(GiB)": 34.88, "step": 112320, "train_speed(iter/s)": 0.410797 }, { "acc": 0.95416794, "epoch": 3.041318062437387, "grad_norm": 5.3427534103393555, "learning_rate": 1.4907928618576884e-06, "loss": 0.26024446, "memory(GiB)": 34.88, "step": 112325, "train_speed(iter/s)": 0.410798 }, { "acc": 0.94428539, "epoch": 3.041453442720602, "grad_norm": 12.60251522064209, "learning_rate": 1.4903943819156405e-06, "loss": 0.29539213, "memory(GiB)": 34.88, "step": 112330, "train_speed(iter/s)": 0.410798 }, { "acc": 0.94780064, "epoch": 3.041588823003818, "grad_norm": 7.563193321228027, "learning_rate": 1.489995945944565e-06, "loss": 0.34836996, "memory(GiB)": 34.88, "step": 112335, "train_speed(iter/s)": 0.4108 }, { "acc": 0.94867258, "epoch": 3.0417242032870333, "grad_norm": 12.026717185974121, "learning_rate": 1.4895975539494475e-06, "loss": 0.29485621, "memory(GiB)": 34.88, "step": 112340, "train_speed(iter/s)": 0.410801 }, { "acc": 0.93921318, "epoch": 3.0418595835702487, "grad_norm": 4.721395969390869, "learning_rate": 1.489199205935285e-06, "loss": 0.2666573, "memory(GiB)": 34.88, "step": 112345, "train_speed(iter/s)": 0.410802 }, { "acc": 0.94285793, "epoch": 3.0419949638534645, "grad_norm": 6.448751926422119, "learning_rate": 1.4888009019070647e-06, "loss": 0.29968832, "memory(GiB)": 34.88, "step": 112350, "train_speed(iter/s)": 0.410802 }, { "acc": 0.94398012, "epoch": 3.04213034413668, "grad_norm": 13.459033966064453, "learning_rate": 1.4884026418697736e-06, "loss": 0.38656592, "memory(GiB)": 34.88, "step": 112355, "train_speed(iter/s)": 0.410803 }, { "acc": 0.94741306, "epoch": 3.0422657244198956, "grad_norm": 5.152488708496094, "learning_rate": 1.4880044258284057e-06, "loss": 0.34168029, "memory(GiB)": 34.88, "step": 112360, "train_speed(iter/s)": 0.410804 }, { "acc": 0.94147758, "epoch": 3.042401104703111, "grad_norm": 16.866153717041016, "learning_rate": 1.4876062537879455e-06, "loss": 0.36749988, "memory(GiB)": 34.88, "step": 112365, "train_speed(iter/s)": 0.410805 }, { "acc": 0.93561115, "epoch": 3.042536484986327, "grad_norm": 4.90716028213501, "learning_rate": 1.4872081257533836e-06, "loss": 0.42165356, "memory(GiB)": 34.88, "step": 112370, "train_speed(iter/s)": 0.410806 }, { "acc": 0.950138, "epoch": 3.042671865269542, "grad_norm": 5.266702175140381, "learning_rate": 1.4868100417297032e-06, "loss": 0.31394973, "memory(GiB)": 34.88, "step": 112375, "train_speed(iter/s)": 0.410807 }, { "acc": 0.95187092, "epoch": 3.0428072455527575, "grad_norm": 4.814398288726807, "learning_rate": 1.4864120017218973e-06, "loss": 0.32015836, "memory(GiB)": 34.88, "step": 112380, "train_speed(iter/s)": 0.410808 }, { "acc": 0.92163372, "epoch": 3.0429426258359733, "grad_norm": 8.842202186584473, "learning_rate": 1.4860140057349465e-06, "loss": 0.45984879, "memory(GiB)": 34.88, "step": 112385, "train_speed(iter/s)": 0.410809 }, { "acc": 0.94204941, "epoch": 3.0430780061191887, "grad_norm": 8.677490234375, "learning_rate": 1.4856160537738386e-06, "loss": 0.31873615, "memory(GiB)": 34.88, "step": 112390, "train_speed(iter/s)": 0.41081 }, { "acc": 0.93729906, "epoch": 3.0432133864024045, "grad_norm": 7.5788655281066895, "learning_rate": 1.4852181458435614e-06, "loss": 0.43776603, "memory(GiB)": 34.88, "step": 112395, "train_speed(iter/s)": 0.410811 }, { "acc": 0.9425024, "epoch": 3.04334876668562, "grad_norm": 8.925924301147461, "learning_rate": 1.4848202819490948e-06, "loss": 0.33582506, "memory(GiB)": 34.88, "step": 112400, "train_speed(iter/s)": 0.410812 }, { "acc": 0.93647137, "epoch": 3.0434841469688356, "grad_norm": 4.745939254760742, "learning_rate": 1.4844224620954262e-06, "loss": 0.37852764, "memory(GiB)": 34.88, "step": 112405, "train_speed(iter/s)": 0.410813 }, { "acc": 0.94470329, "epoch": 3.043619527252051, "grad_norm": 7.079735279083252, "learning_rate": 1.484024686287535e-06, "loss": 0.30338373, "memory(GiB)": 34.88, "step": 112410, "train_speed(iter/s)": 0.410814 }, { "acc": 0.93846989, "epoch": 3.0437549075352663, "grad_norm": 8.967217445373535, "learning_rate": 1.4836269545304106e-06, "loss": 0.37091033, "memory(GiB)": 34.88, "step": 112415, "train_speed(iter/s)": 0.410815 }, { "acc": 0.93895817, "epoch": 3.043890287818482, "grad_norm": 15.010456085205078, "learning_rate": 1.4832292668290294e-06, "loss": 0.42670851, "memory(GiB)": 34.88, "step": 112420, "train_speed(iter/s)": 0.410816 }, { "acc": 0.9399456, "epoch": 3.0440256681016975, "grad_norm": 5.746703147888184, "learning_rate": 1.4828316231883764e-06, "loss": 0.38766625, "memory(GiB)": 34.88, "step": 112425, "train_speed(iter/s)": 0.410817 }, { "acc": 0.94286976, "epoch": 3.0441610483849133, "grad_norm": 5.514863967895508, "learning_rate": 1.4824340236134313e-06, "loss": 0.32686656, "memory(GiB)": 34.88, "step": 112430, "train_speed(iter/s)": 0.410818 }, { "acc": 0.94285831, "epoch": 3.0442964286681287, "grad_norm": 4.875946044921875, "learning_rate": 1.4820364681091774e-06, "loss": 0.30666118, "memory(GiB)": 34.88, "step": 112435, "train_speed(iter/s)": 0.410819 }, { "acc": 0.93902473, "epoch": 3.0444318089513445, "grad_norm": 5.244241714477539, "learning_rate": 1.4816389566805927e-06, "loss": 0.31672502, "memory(GiB)": 34.88, "step": 112440, "train_speed(iter/s)": 0.41082 }, { "acc": 0.92791605, "epoch": 3.04456718923456, "grad_norm": 10.435641288757324, "learning_rate": 1.4812414893326543e-06, "loss": 0.39781635, "memory(GiB)": 34.88, "step": 112445, "train_speed(iter/s)": 0.410821 }, { "acc": 0.93289165, "epoch": 3.0447025695177756, "grad_norm": 10.298877716064453, "learning_rate": 1.4808440660703474e-06, "loss": 0.34297216, "memory(GiB)": 34.88, "step": 112450, "train_speed(iter/s)": 0.410822 }, { "acc": 0.93533974, "epoch": 3.044837949800991, "grad_norm": 11.169832229614258, "learning_rate": 1.480446686898645e-06, "loss": 0.40963554, "memory(GiB)": 34.88, "step": 112455, "train_speed(iter/s)": 0.410823 }, { "acc": 0.93805704, "epoch": 3.0449733300842063, "grad_norm": 17.84612464904785, "learning_rate": 1.4800493518225276e-06, "loss": 0.32099745, "memory(GiB)": 34.88, "step": 112460, "train_speed(iter/s)": 0.410824 }, { "acc": 0.93147297, "epoch": 3.045108710367422, "grad_norm": 9.479154586791992, "learning_rate": 1.4796520608469721e-06, "loss": 0.39115577, "memory(GiB)": 34.88, "step": 112465, "train_speed(iter/s)": 0.410825 }, { "acc": 0.9322506, "epoch": 3.0452440906506375, "grad_norm": 8.283499717712402, "learning_rate": 1.4792548139769575e-06, "loss": 0.35387559, "memory(GiB)": 34.88, "step": 112470, "train_speed(iter/s)": 0.410826 }, { "acc": 0.942449, "epoch": 3.0453794709338533, "grad_norm": 4.891341209411621, "learning_rate": 1.478857611217458e-06, "loss": 0.36234167, "memory(GiB)": 34.88, "step": 112475, "train_speed(iter/s)": 0.410827 }, { "acc": 0.93298588, "epoch": 3.0455148512170687, "grad_norm": 5.266227722167969, "learning_rate": 1.4784604525734462e-06, "loss": 0.374576, "memory(GiB)": 34.88, "step": 112480, "train_speed(iter/s)": 0.410828 }, { "acc": 0.94674158, "epoch": 3.0456502315002845, "grad_norm": 19.199920654296875, "learning_rate": 1.478063338049903e-06, "loss": 0.32523787, "memory(GiB)": 34.88, "step": 112485, "train_speed(iter/s)": 0.410829 }, { "acc": 0.93800097, "epoch": 3.0457856117835, "grad_norm": 13.575556755065918, "learning_rate": 1.4776662676517995e-06, "loss": 0.3846333, "memory(GiB)": 34.88, "step": 112490, "train_speed(iter/s)": 0.41083 }, { "acc": 0.93517008, "epoch": 3.0459209920667156, "grad_norm": 6.521595478057861, "learning_rate": 1.4772692413841129e-06, "loss": 0.33351481, "memory(GiB)": 34.88, "step": 112495, "train_speed(iter/s)": 0.410831 }, { "acc": 0.924189, "epoch": 3.046056372349931, "grad_norm": 22.50750732421875, "learning_rate": 1.476872259251811e-06, "loss": 0.48187771, "memory(GiB)": 34.88, "step": 112500, "train_speed(iter/s)": 0.410832 }, { "acc": 0.94733582, "epoch": 3.0461917526331463, "grad_norm": 5.1092529296875, "learning_rate": 1.4764753212598734e-06, "loss": 0.28450108, "memory(GiB)": 34.88, "step": 112505, "train_speed(iter/s)": 0.410833 }, { "acc": 0.92365208, "epoch": 3.046327132916362, "grad_norm": 13.355175971984863, "learning_rate": 1.4760784274132684e-06, "loss": 0.45763807, "memory(GiB)": 34.88, "step": 112510, "train_speed(iter/s)": 0.410834 }, { "acc": 0.9303833, "epoch": 3.0464625131995775, "grad_norm": 4.786769866943359, "learning_rate": 1.4756815777169688e-06, "loss": 0.4333518, "memory(GiB)": 34.88, "step": 112515, "train_speed(iter/s)": 0.410835 }, { "acc": 0.94028225, "epoch": 3.0465978934827933, "grad_norm": 6.131227493286133, "learning_rate": 1.4752847721759483e-06, "loss": 0.33016338, "memory(GiB)": 34.88, "step": 112520, "train_speed(iter/s)": 0.410836 }, { "acc": 0.93433504, "epoch": 3.0467332737660087, "grad_norm": 6.075316905975342, "learning_rate": 1.4748880107951738e-06, "loss": 0.3915364, "memory(GiB)": 34.88, "step": 112525, "train_speed(iter/s)": 0.410837 }, { "acc": 0.9376379, "epoch": 3.0468686540492245, "grad_norm": 4.336094856262207, "learning_rate": 1.4744912935796186e-06, "loss": 0.36225348, "memory(GiB)": 34.88, "step": 112530, "train_speed(iter/s)": 0.410838 }, { "acc": 0.94849081, "epoch": 3.04700403433244, "grad_norm": 6.85790491104126, "learning_rate": 1.4740946205342482e-06, "loss": 0.30074754, "memory(GiB)": 34.88, "step": 112535, "train_speed(iter/s)": 0.410839 }, { "acc": 0.93253546, "epoch": 3.047139414615655, "grad_norm": 16.642658233642578, "learning_rate": 1.4736979916640376e-06, "loss": 0.38185751, "memory(GiB)": 34.88, "step": 112540, "train_speed(iter/s)": 0.41084 }, { "acc": 0.96784897, "epoch": 3.047274794898871, "grad_norm": 6.146676063537598, "learning_rate": 1.4733014069739504e-06, "loss": 0.17906728, "memory(GiB)": 34.88, "step": 112545, "train_speed(iter/s)": 0.410841 }, { "acc": 0.93128872, "epoch": 3.0474101751820863, "grad_norm": 5.677731513977051, "learning_rate": 1.4729048664689566e-06, "loss": 0.34457321, "memory(GiB)": 34.88, "step": 112550, "train_speed(iter/s)": 0.410842 }, { "acc": 0.9405817, "epoch": 3.047545555465302, "grad_norm": 5.038843631744385, "learning_rate": 1.4725083701540244e-06, "loss": 0.37864904, "memory(GiB)": 34.88, "step": 112555, "train_speed(iter/s)": 0.410843 }, { "acc": 0.93372517, "epoch": 3.0476809357485175, "grad_norm": 9.255257606506348, "learning_rate": 1.4721119180341203e-06, "loss": 0.3788934, "memory(GiB)": 34.88, "step": 112560, "train_speed(iter/s)": 0.410844 }, { "acc": 0.94332075, "epoch": 3.0478163160317333, "grad_norm": 8.47775936126709, "learning_rate": 1.471715510114211e-06, "loss": 0.38230901, "memory(GiB)": 34.88, "step": 112565, "train_speed(iter/s)": 0.410845 }, { "acc": 0.94595566, "epoch": 3.0479516963149487, "grad_norm": 6.433221340179443, "learning_rate": 1.4713191463992579e-06, "loss": 0.26891067, "memory(GiB)": 34.88, "step": 112570, "train_speed(iter/s)": 0.410846 }, { "acc": 0.95310373, "epoch": 3.048087076598164, "grad_norm": 17.145769119262695, "learning_rate": 1.470922826894233e-06, "loss": 0.29340649, "memory(GiB)": 34.88, "step": 112575, "train_speed(iter/s)": 0.410847 }, { "acc": 0.94288568, "epoch": 3.04822245688138, "grad_norm": 7.6413750648498535, "learning_rate": 1.4705265516040963e-06, "loss": 0.34762793, "memory(GiB)": 34.88, "step": 112580, "train_speed(iter/s)": 0.410848 }, { "acc": 0.95445461, "epoch": 3.048357837164595, "grad_norm": 8.425519943237305, "learning_rate": 1.4701303205338133e-06, "loss": 0.21535068, "memory(GiB)": 34.88, "step": 112585, "train_speed(iter/s)": 0.410849 }, { "acc": 0.95967426, "epoch": 3.048493217447811, "grad_norm": 5.550544261932373, "learning_rate": 1.4697341336883478e-06, "loss": 0.2433636, "memory(GiB)": 34.88, "step": 112590, "train_speed(iter/s)": 0.41085 }, { "acc": 0.93626003, "epoch": 3.0486285977310263, "grad_norm": 7.829226493835449, "learning_rate": 1.4693379910726645e-06, "loss": 0.33631845, "memory(GiB)": 34.88, "step": 112595, "train_speed(iter/s)": 0.410851 }, { "acc": 0.93043709, "epoch": 3.048763978014242, "grad_norm": 8.73603343963623, "learning_rate": 1.468941892691722e-06, "loss": 0.36192274, "memory(GiB)": 34.88, "step": 112600, "train_speed(iter/s)": 0.410852 }, { "acc": 0.93399973, "epoch": 3.0488993582974575, "grad_norm": 23.582664489746094, "learning_rate": 1.4685458385504844e-06, "loss": 0.42239304, "memory(GiB)": 34.88, "step": 112605, "train_speed(iter/s)": 0.410853 }, { "acc": 0.94203682, "epoch": 3.0490347385806733, "grad_norm": 10.002209663391113, "learning_rate": 1.4681498286539148e-06, "loss": 0.26500793, "memory(GiB)": 34.88, "step": 112610, "train_speed(iter/s)": 0.410854 }, { "acc": 0.92763519, "epoch": 3.0491701188638887, "grad_norm": 17.351743698120117, "learning_rate": 1.4677538630069706e-06, "loss": 0.42083931, "memory(GiB)": 34.88, "step": 112615, "train_speed(iter/s)": 0.410855 }, { "acc": 0.91189098, "epoch": 3.049305499147104, "grad_norm": 7.0466156005859375, "learning_rate": 1.4673579416146138e-06, "loss": 0.52007942, "memory(GiB)": 34.88, "step": 112620, "train_speed(iter/s)": 0.410856 }, { "acc": 0.95778904, "epoch": 3.04944087943032, "grad_norm": 3.2732369899749756, "learning_rate": 1.4669620644818037e-06, "loss": 0.25447934, "memory(GiB)": 34.88, "step": 112625, "train_speed(iter/s)": 0.410857 }, { "acc": 0.9488327, "epoch": 3.049576259713535, "grad_norm": 7.922903060913086, "learning_rate": 1.466566231613501e-06, "loss": 0.28338103, "memory(GiB)": 34.88, "step": 112630, "train_speed(iter/s)": 0.410858 }, { "acc": 0.93092442, "epoch": 3.049711639996751, "grad_norm": 9.941991806030273, "learning_rate": 1.4661704430146614e-06, "loss": 0.38666112, "memory(GiB)": 34.88, "step": 112635, "train_speed(iter/s)": 0.410859 }, { "acc": 0.95049105, "epoch": 3.0498470202799663, "grad_norm": 3.602449417114258, "learning_rate": 1.4657746986902443e-06, "loss": 0.29920094, "memory(GiB)": 34.88, "step": 112640, "train_speed(iter/s)": 0.41086 }, { "acc": 0.94579029, "epoch": 3.049982400563182, "grad_norm": 14.941535949707031, "learning_rate": 1.4653789986452094e-06, "loss": 0.34281566, "memory(GiB)": 34.88, "step": 112645, "train_speed(iter/s)": 0.410861 }, { "acc": 0.93420248, "epoch": 3.0501177808463975, "grad_norm": 3.9676809310913086, "learning_rate": 1.4649833428845092e-06, "loss": 0.38998926, "memory(GiB)": 34.88, "step": 112650, "train_speed(iter/s)": 0.410862 }, { "acc": 0.94744892, "epoch": 3.050253161129613, "grad_norm": 5.3915534019470215, "learning_rate": 1.464587731413103e-06, "loss": 0.28240304, "memory(GiB)": 34.88, "step": 112655, "train_speed(iter/s)": 0.410863 }, { "acc": 0.94755812, "epoch": 3.0503885414128287, "grad_norm": 8.147385597229004, "learning_rate": 1.4641921642359457e-06, "loss": 0.31456251, "memory(GiB)": 34.88, "step": 112660, "train_speed(iter/s)": 0.410864 }, { "acc": 0.94555283, "epoch": 3.050523921696044, "grad_norm": 9.355768203735352, "learning_rate": 1.463796641357995e-06, "loss": 0.34886894, "memory(GiB)": 34.88, "step": 112665, "train_speed(iter/s)": 0.410865 }, { "acc": 0.94749908, "epoch": 3.05065930197926, "grad_norm": 8.33630657196045, "learning_rate": 1.4634011627842015e-06, "loss": 0.34450469, "memory(GiB)": 34.88, "step": 112670, "train_speed(iter/s)": 0.410866 }, { "acc": 0.94642115, "epoch": 3.050794682262475, "grad_norm": 8.819941520690918, "learning_rate": 1.4630057285195211e-06, "loss": 0.23054426, "memory(GiB)": 34.88, "step": 112675, "train_speed(iter/s)": 0.410867 }, { "acc": 0.94783535, "epoch": 3.050930062545691, "grad_norm": 4.747750282287598, "learning_rate": 1.4626103385689078e-06, "loss": 0.2696224, "memory(GiB)": 34.88, "step": 112680, "train_speed(iter/s)": 0.410868 }, { "acc": 0.95254269, "epoch": 3.0510654428289063, "grad_norm": 4.433883190155029, "learning_rate": 1.4622149929373166e-06, "loss": 0.28429551, "memory(GiB)": 34.88, "step": 112685, "train_speed(iter/s)": 0.410869 }, { "acc": 0.95794611, "epoch": 3.051200823112122, "grad_norm": 7.894105911254883, "learning_rate": 1.4618196916296956e-06, "loss": 0.24848607, "memory(GiB)": 34.88, "step": 112690, "train_speed(iter/s)": 0.41087 }, { "acc": 0.93937588, "epoch": 3.0513362033953375, "grad_norm": 8.782194137573242, "learning_rate": 1.461424434650999e-06, "loss": 0.35690758, "memory(GiB)": 34.88, "step": 112695, "train_speed(iter/s)": 0.410871 }, { "acc": 0.94551172, "epoch": 3.051471583678553, "grad_norm": 3.586712598800659, "learning_rate": 1.4610292220061797e-06, "loss": 0.2956625, "memory(GiB)": 34.88, "step": 112700, "train_speed(iter/s)": 0.410872 }, { "acc": 0.93363104, "epoch": 3.0516069639617687, "grad_norm": 6.786685943603516, "learning_rate": 1.4606340537001856e-06, "loss": 0.38825707, "memory(GiB)": 34.88, "step": 112705, "train_speed(iter/s)": 0.410873 }, { "acc": 0.93364983, "epoch": 3.051742344244984, "grad_norm": 5.07558012008667, "learning_rate": 1.4602389297379682e-06, "loss": 0.41460319, "memory(GiB)": 34.88, "step": 112710, "train_speed(iter/s)": 0.410874 }, { "acc": 0.93388214, "epoch": 3.0518777245282, "grad_norm": 6.040514945983887, "learning_rate": 1.4598438501244772e-06, "loss": 0.43848004, "memory(GiB)": 34.88, "step": 112715, "train_speed(iter/s)": 0.410875 }, { "acc": 0.93912325, "epoch": 3.052013104811415, "grad_norm": 7.826782703399658, "learning_rate": 1.4594488148646635e-06, "loss": 0.35959005, "memory(GiB)": 34.88, "step": 112720, "train_speed(iter/s)": 0.410876 }, { "acc": 0.93179016, "epoch": 3.052148485094631, "grad_norm": 5.127954483032227, "learning_rate": 1.4590538239634727e-06, "loss": 0.38327765, "memory(GiB)": 34.88, "step": 112725, "train_speed(iter/s)": 0.410877 }, { "acc": 0.93517008, "epoch": 3.0522838653778464, "grad_norm": 11.664052963256836, "learning_rate": 1.458658877425854e-06, "loss": 0.38196101, "memory(GiB)": 34.88, "step": 112730, "train_speed(iter/s)": 0.410878 }, { "acc": 0.92263222, "epoch": 3.0524192456610617, "grad_norm": 5.707948684692383, "learning_rate": 1.4582639752567566e-06, "loss": 0.50358925, "memory(GiB)": 34.88, "step": 112735, "train_speed(iter/s)": 0.410879 }, { "acc": 0.94254532, "epoch": 3.0525546259442775, "grad_norm": 6.076093673706055, "learning_rate": 1.457869117461124e-06, "loss": 0.30238364, "memory(GiB)": 34.88, "step": 112740, "train_speed(iter/s)": 0.41088 }, { "acc": 0.94071951, "epoch": 3.052690006227493, "grad_norm": 3.9027481079101562, "learning_rate": 1.4574743040439049e-06, "loss": 0.38144889, "memory(GiB)": 34.88, "step": 112745, "train_speed(iter/s)": 0.41088 }, { "acc": 0.95867081, "epoch": 3.0528253865107087, "grad_norm": 2.8723793029785156, "learning_rate": 1.4570795350100447e-06, "loss": 0.20673394, "memory(GiB)": 34.88, "step": 112750, "train_speed(iter/s)": 0.410881 }, { "acc": 0.9503191, "epoch": 3.052960766793924, "grad_norm": 6.826610088348389, "learning_rate": 1.4566848103644898e-06, "loss": 0.31654134, "memory(GiB)": 34.88, "step": 112755, "train_speed(iter/s)": 0.410882 }, { "acc": 0.94777479, "epoch": 3.05309614707714, "grad_norm": 3.9484875202178955, "learning_rate": 1.4562901301121824e-06, "loss": 0.23018627, "memory(GiB)": 34.88, "step": 112760, "train_speed(iter/s)": 0.410883 }, { "acc": 0.94694376, "epoch": 3.053231527360355, "grad_norm": 6.447192668914795, "learning_rate": 1.455895494258068e-06, "loss": 0.30476923, "memory(GiB)": 34.88, "step": 112765, "train_speed(iter/s)": 0.410884 }, { "acc": 0.95555592, "epoch": 3.053366907643571, "grad_norm": 6.661599636077881, "learning_rate": 1.4555009028070921e-06, "loss": 0.26554365, "memory(GiB)": 34.88, "step": 112770, "train_speed(iter/s)": 0.410885 }, { "acc": 0.95303421, "epoch": 3.0535022879267864, "grad_norm": 5.108063220977783, "learning_rate": 1.4551063557641937e-06, "loss": 0.26141653, "memory(GiB)": 34.88, "step": 112775, "train_speed(iter/s)": 0.410886 }, { "acc": 0.94023647, "epoch": 3.0536376682100017, "grad_norm": 14.367576599121094, "learning_rate": 1.4547118531343173e-06, "loss": 0.31253097, "memory(GiB)": 34.88, "step": 112780, "train_speed(iter/s)": 0.410887 }, { "acc": 0.93736324, "epoch": 3.0537730484932175, "grad_norm": 8.218728065490723, "learning_rate": 1.4543173949224055e-06, "loss": 0.35564098, "memory(GiB)": 34.88, "step": 112785, "train_speed(iter/s)": 0.410888 }, { "acc": 0.95123129, "epoch": 3.053908428776433, "grad_norm": 7.674715518951416, "learning_rate": 1.4539229811334003e-06, "loss": 0.2634788, "memory(GiB)": 34.88, "step": 112790, "train_speed(iter/s)": 0.410889 }, { "acc": 0.94583645, "epoch": 3.0540438090596487, "grad_norm": 7.290525913238525, "learning_rate": 1.4535286117722398e-06, "loss": 0.2633703, "memory(GiB)": 34.88, "step": 112795, "train_speed(iter/s)": 0.41089 }, { "acc": 0.95331221, "epoch": 3.054179189342864, "grad_norm": 3.1996355056762695, "learning_rate": 1.4531342868438652e-06, "loss": 0.26784372, "memory(GiB)": 34.88, "step": 112800, "train_speed(iter/s)": 0.410891 }, { "acc": 0.94735584, "epoch": 3.05431456962608, "grad_norm": 45.280757904052734, "learning_rate": 1.4527400063532173e-06, "loss": 0.28093331, "memory(GiB)": 34.88, "step": 112805, "train_speed(iter/s)": 0.410892 }, { "acc": 0.94028912, "epoch": 3.054449949909295, "grad_norm": 10.864473342895508, "learning_rate": 1.4523457703052362e-06, "loss": 0.2820457, "memory(GiB)": 34.88, "step": 112810, "train_speed(iter/s)": 0.410893 }, { "acc": 0.93695726, "epoch": 3.0545853301925106, "grad_norm": 16.011062622070312, "learning_rate": 1.451951578704857e-06, "loss": 0.38709521, "memory(GiB)": 34.88, "step": 112815, "train_speed(iter/s)": 0.410894 }, { "acc": 0.94460611, "epoch": 3.0547207104757264, "grad_norm": 5.216592311859131, "learning_rate": 1.45155743155702e-06, "loss": 0.38997536, "memory(GiB)": 34.88, "step": 112820, "train_speed(iter/s)": 0.410895 }, { "acc": 0.93207617, "epoch": 3.0548560907589417, "grad_norm": 14.699166297912598, "learning_rate": 1.4511633288666637e-06, "loss": 0.39647882, "memory(GiB)": 34.88, "step": 112825, "train_speed(iter/s)": 0.410896 }, { "acc": 0.95099888, "epoch": 3.0549914710421575, "grad_norm": 5.8235368728637695, "learning_rate": 1.4507692706387222e-06, "loss": 0.31904335, "memory(GiB)": 34.88, "step": 112830, "train_speed(iter/s)": 0.410897 }, { "acc": 0.94472322, "epoch": 3.055126851325373, "grad_norm": 4.182260990142822, "learning_rate": 1.4503752568781334e-06, "loss": 0.32777925, "memory(GiB)": 34.88, "step": 112835, "train_speed(iter/s)": 0.410898 }, { "acc": 0.94460096, "epoch": 3.0552622316085887, "grad_norm": 6.828815937042236, "learning_rate": 1.4499812875898327e-06, "loss": 0.35093367, "memory(GiB)": 34.88, "step": 112840, "train_speed(iter/s)": 0.410899 }, { "acc": 0.93733711, "epoch": 3.055397611891804, "grad_norm": 4.464057445526123, "learning_rate": 1.4495873627787572e-06, "loss": 0.35393372, "memory(GiB)": 34.88, "step": 112845, "train_speed(iter/s)": 0.4109 }, { "acc": 0.95299501, "epoch": 3.05553299217502, "grad_norm": 5.467289447784424, "learning_rate": 1.4491934824498369e-06, "loss": 0.30454979, "memory(GiB)": 34.88, "step": 112850, "train_speed(iter/s)": 0.410901 }, { "acc": 0.93771009, "epoch": 3.055668372458235, "grad_norm": 12.260083198547363, "learning_rate": 1.4487996466080123e-06, "loss": 0.45698395, "memory(GiB)": 34.88, "step": 112855, "train_speed(iter/s)": 0.410902 }, { "acc": 0.95623188, "epoch": 3.0558037527414506, "grad_norm": 3.796259880065918, "learning_rate": 1.448405855258214e-06, "loss": 0.24336762, "memory(GiB)": 34.88, "step": 112860, "train_speed(iter/s)": 0.410903 }, { "acc": 0.95364532, "epoch": 3.0559391330246664, "grad_norm": 5.218354225158691, "learning_rate": 1.4480121084053724e-06, "loss": 0.25921469, "memory(GiB)": 34.88, "step": 112865, "train_speed(iter/s)": 0.410904 }, { "acc": 0.94746485, "epoch": 3.0560745133078817, "grad_norm": 4.097927570343018, "learning_rate": 1.447618406054423e-06, "loss": 0.27972341, "memory(GiB)": 34.88, "step": 112870, "train_speed(iter/s)": 0.410905 }, { "acc": 0.95155563, "epoch": 3.0562098935910975, "grad_norm": 2.7745308876037598, "learning_rate": 1.4472247482102963e-06, "loss": 0.21323347, "memory(GiB)": 34.88, "step": 112875, "train_speed(iter/s)": 0.410906 }, { "acc": 0.93017979, "epoch": 3.056345273874313, "grad_norm": 6.042102813720703, "learning_rate": 1.446831134877927e-06, "loss": 0.45982542, "memory(GiB)": 34.88, "step": 112880, "train_speed(iter/s)": 0.410907 }, { "acc": 0.95413113, "epoch": 3.0564806541575287, "grad_norm": 5.667418956756592, "learning_rate": 1.446437566062239e-06, "loss": 0.25467024, "memory(GiB)": 34.88, "step": 112885, "train_speed(iter/s)": 0.410908 }, { "acc": 0.94807091, "epoch": 3.056616034440744, "grad_norm": 5.853381633758545, "learning_rate": 1.4460440417681701e-06, "loss": 0.3726727, "memory(GiB)": 34.88, "step": 112890, "train_speed(iter/s)": 0.410909 }, { "acc": 0.94528122, "epoch": 3.0567514147239594, "grad_norm": 2.702833890914917, "learning_rate": 1.4456505620006467e-06, "loss": 0.34717586, "memory(GiB)": 34.88, "step": 112895, "train_speed(iter/s)": 0.41091 }, { "acc": 0.93376923, "epoch": 3.056886795007175, "grad_norm": 4.495290279388428, "learning_rate": 1.4452571267645957e-06, "loss": 0.34966202, "memory(GiB)": 34.88, "step": 112900, "train_speed(iter/s)": 0.410911 }, { "acc": 0.93947697, "epoch": 3.0570221752903906, "grad_norm": 17.220951080322266, "learning_rate": 1.4448637360649485e-06, "loss": 0.35940578, "memory(GiB)": 34.88, "step": 112905, "train_speed(iter/s)": 0.410912 }, { "acc": 0.93430481, "epoch": 3.0571575555736064, "grad_norm": 14.058635711669922, "learning_rate": 1.4444703899066318e-06, "loss": 0.39397779, "memory(GiB)": 34.88, "step": 112910, "train_speed(iter/s)": 0.410913 }, { "acc": 0.94388247, "epoch": 3.0572929358568217, "grad_norm": 6.7129621505737305, "learning_rate": 1.4440770882945754e-06, "loss": 0.34747367, "memory(GiB)": 34.88, "step": 112915, "train_speed(iter/s)": 0.410914 }, { "acc": 0.94226151, "epoch": 3.0574283161400375, "grad_norm": 9.333649635314941, "learning_rate": 1.443683831233701e-06, "loss": 0.37910428, "memory(GiB)": 34.88, "step": 112920, "train_speed(iter/s)": 0.410914 }, { "acc": 0.9334549, "epoch": 3.057563696423253, "grad_norm": 6.863068580627441, "learning_rate": 1.4432906187289414e-06, "loss": 0.4670464, "memory(GiB)": 34.88, "step": 112925, "train_speed(iter/s)": 0.410915 }, { "acc": 0.93948002, "epoch": 3.0576990767064687, "grad_norm": 5.894991397857666, "learning_rate": 1.4428974507852176e-06, "loss": 0.42873454, "memory(GiB)": 34.88, "step": 112930, "train_speed(iter/s)": 0.410916 }, { "acc": 0.93709078, "epoch": 3.057834456989684, "grad_norm": 5.952359676361084, "learning_rate": 1.4425043274074579e-06, "loss": 0.37485929, "memory(GiB)": 34.88, "step": 112935, "train_speed(iter/s)": 0.410917 }, { "acc": 0.94544325, "epoch": 3.0579698372728994, "grad_norm": 4.219554901123047, "learning_rate": 1.4421112486005841e-06, "loss": 0.30121329, "memory(GiB)": 34.88, "step": 112940, "train_speed(iter/s)": 0.410918 }, { "acc": 0.95137653, "epoch": 3.058105217556115, "grad_norm": 3.7490592002868652, "learning_rate": 1.4417182143695218e-06, "loss": 0.27321532, "memory(GiB)": 34.88, "step": 112945, "train_speed(iter/s)": 0.410919 }, { "acc": 0.93962946, "epoch": 3.0582405978393306, "grad_norm": 6.42650032043457, "learning_rate": 1.4413252247191957e-06, "loss": 0.34521804, "memory(GiB)": 34.88, "step": 112950, "train_speed(iter/s)": 0.41092 }, { "acc": 0.94368677, "epoch": 3.0583759781225464, "grad_norm": 7.112250804901123, "learning_rate": 1.440932279654524e-06, "loss": 0.33135164, "memory(GiB)": 34.88, "step": 112955, "train_speed(iter/s)": 0.410921 }, { "acc": 0.93434925, "epoch": 3.0585113584057617, "grad_norm": 18.456668853759766, "learning_rate": 1.4405393791804363e-06, "loss": 0.42422705, "memory(GiB)": 34.88, "step": 112960, "train_speed(iter/s)": 0.410922 }, { "acc": 0.94226036, "epoch": 3.0586467386889775, "grad_norm": 7.254223823547363, "learning_rate": 1.4401465233018487e-06, "loss": 0.35111721, "memory(GiB)": 34.88, "step": 112965, "train_speed(iter/s)": 0.410923 }, { "acc": 0.95327454, "epoch": 3.058782118972193, "grad_norm": 6.544200897216797, "learning_rate": 1.4397537120236862e-06, "loss": 0.26160808, "memory(GiB)": 34.88, "step": 112970, "train_speed(iter/s)": 0.410924 }, { "acc": 0.94920282, "epoch": 3.0589174992554082, "grad_norm": 10.752913475036621, "learning_rate": 1.4393609453508644e-06, "loss": 0.37772865, "memory(GiB)": 34.88, "step": 112975, "train_speed(iter/s)": 0.410925 }, { "acc": 0.93815203, "epoch": 3.059052879538624, "grad_norm": 7.197993755340576, "learning_rate": 1.43896822328831e-06, "loss": 0.28047976, "memory(GiB)": 34.88, "step": 112980, "train_speed(iter/s)": 0.410926 }, { "acc": 0.92732925, "epoch": 3.0591882598218394, "grad_norm": 6.798643112182617, "learning_rate": 1.4385755458409398e-06, "loss": 0.40343246, "memory(GiB)": 34.88, "step": 112985, "train_speed(iter/s)": 0.410927 }, { "acc": 0.94933586, "epoch": 3.059323640105055, "grad_norm": 6.574371814727783, "learning_rate": 1.4381829130136696e-06, "loss": 0.30726531, "memory(GiB)": 34.88, "step": 112990, "train_speed(iter/s)": 0.410928 }, { "acc": 0.94223127, "epoch": 3.0594590203882706, "grad_norm": 5.483584403991699, "learning_rate": 1.437790324811424e-06, "loss": 0.35965567, "memory(GiB)": 34.88, "step": 112995, "train_speed(iter/s)": 0.410929 }, { "acc": 0.94118385, "epoch": 3.0595944006714864, "grad_norm": 5.7966461181640625, "learning_rate": 1.4373977812391158e-06, "loss": 0.31306801, "memory(GiB)": 34.88, "step": 113000, "train_speed(iter/s)": 0.41093 }, { "acc": 0.94363213, "epoch": 3.0597297809547017, "grad_norm": 6.095720291137695, "learning_rate": 1.4370052823016658e-06, "loss": 0.30475597, "memory(GiB)": 34.88, "step": 113005, "train_speed(iter/s)": 0.410931 }, { "acc": 0.92859602, "epoch": 3.0598651612379175, "grad_norm": 7.758685111999512, "learning_rate": 1.4366128280039864e-06, "loss": 0.36186681, "memory(GiB)": 34.88, "step": 113010, "train_speed(iter/s)": 0.410932 }, { "acc": 0.94756622, "epoch": 3.060000541521133, "grad_norm": 3.2913966178894043, "learning_rate": 1.4362204183510001e-06, "loss": 0.32417254, "memory(GiB)": 34.88, "step": 113015, "train_speed(iter/s)": 0.410933 }, { "acc": 0.9570859, "epoch": 3.0601359218043482, "grad_norm": 6.5660624504089355, "learning_rate": 1.4358280533476199e-06, "loss": 0.21493344, "memory(GiB)": 34.88, "step": 113020, "train_speed(iter/s)": 0.410934 }, { "acc": 0.945788, "epoch": 3.060271302087564, "grad_norm": 4.107687473297119, "learning_rate": 1.4354357329987561e-06, "loss": 0.28242011, "memory(GiB)": 34.88, "step": 113025, "train_speed(iter/s)": 0.410935 }, { "acc": 0.94654875, "epoch": 3.0604066823707794, "grad_norm": 25.87057113647461, "learning_rate": 1.4350434573093316e-06, "loss": 0.35993147, "memory(GiB)": 34.88, "step": 113030, "train_speed(iter/s)": 0.410936 }, { "acc": 0.93414907, "epoch": 3.060542062653995, "grad_norm": 4.8490986824035645, "learning_rate": 1.4346512262842546e-06, "loss": 0.43723907, "memory(GiB)": 34.88, "step": 113035, "train_speed(iter/s)": 0.410937 }, { "acc": 0.93566532, "epoch": 3.0606774429372106, "grad_norm": 4.519296169281006, "learning_rate": 1.434259039928442e-06, "loss": 0.38750348, "memory(GiB)": 34.88, "step": 113040, "train_speed(iter/s)": 0.410938 }, { "acc": 0.94874105, "epoch": 3.0608128232204264, "grad_norm": 15.502090454101562, "learning_rate": 1.433866898246802e-06, "loss": 0.25725579, "memory(GiB)": 34.88, "step": 113045, "train_speed(iter/s)": 0.410939 }, { "acc": 0.93780899, "epoch": 3.0609482035036417, "grad_norm": 10.14250659942627, "learning_rate": 1.433474801244253e-06, "loss": 0.34731998, "memory(GiB)": 34.88, "step": 113050, "train_speed(iter/s)": 0.41094 }, { "acc": 0.94789124, "epoch": 3.061083583786857, "grad_norm": 8.199180603027344, "learning_rate": 1.4330827489257024e-06, "loss": 0.27003722, "memory(GiB)": 34.88, "step": 113055, "train_speed(iter/s)": 0.410941 }, { "acc": 0.93871632, "epoch": 3.061218964070073, "grad_norm": 12.135091781616211, "learning_rate": 1.432690741296064e-06, "loss": 0.37150617, "memory(GiB)": 34.88, "step": 113060, "train_speed(iter/s)": 0.410942 }, { "acc": 0.93929691, "epoch": 3.0613543443532882, "grad_norm": 6.359902858734131, "learning_rate": 1.4322987783602463e-06, "loss": 0.35180249, "memory(GiB)": 34.88, "step": 113065, "train_speed(iter/s)": 0.410943 }, { "acc": 0.94548264, "epoch": 3.061489724636504, "grad_norm": 9.809983253479004, "learning_rate": 1.4319068601231598e-06, "loss": 0.284006, "memory(GiB)": 34.88, "step": 113070, "train_speed(iter/s)": 0.410944 }, { "acc": 0.93024635, "epoch": 3.0616251049197194, "grad_norm": 7.198392391204834, "learning_rate": 1.4315149865897167e-06, "loss": 0.39238172, "memory(GiB)": 34.88, "step": 113075, "train_speed(iter/s)": 0.410945 }, { "acc": 0.92864265, "epoch": 3.061760485202935, "grad_norm": 18.112756729125977, "learning_rate": 1.4311231577648205e-06, "loss": 0.47134299, "memory(GiB)": 34.88, "step": 113080, "train_speed(iter/s)": 0.410946 }, { "acc": 0.94553413, "epoch": 3.0618958654861506, "grad_norm": 35.282108306884766, "learning_rate": 1.4307313736533865e-06, "loss": 0.28601, "memory(GiB)": 34.88, "step": 113085, "train_speed(iter/s)": 0.410947 }, { "acc": 0.9441143, "epoch": 3.0620312457693664, "grad_norm": 5.856301307678223, "learning_rate": 1.4303396342603174e-06, "loss": 0.32832828, "memory(GiB)": 34.88, "step": 113090, "train_speed(iter/s)": 0.410948 }, { "acc": 0.94570503, "epoch": 3.0621666260525817, "grad_norm": 4.510166645050049, "learning_rate": 1.4299479395905236e-06, "loss": 0.29731464, "memory(GiB)": 34.88, "step": 113095, "train_speed(iter/s)": 0.410949 }, { "acc": 0.94003286, "epoch": 3.062302006335797, "grad_norm": 3.0548670291900635, "learning_rate": 1.4295562896489077e-06, "loss": 0.29718232, "memory(GiB)": 34.88, "step": 113100, "train_speed(iter/s)": 0.41095 }, { "acc": 0.94324045, "epoch": 3.062437386619013, "grad_norm": 4.235318183898926, "learning_rate": 1.4291646844403813e-06, "loss": 0.32090688, "memory(GiB)": 34.88, "step": 113105, "train_speed(iter/s)": 0.410951 }, { "acc": 0.9459528, "epoch": 3.0625727669022282, "grad_norm": 5.375917434692383, "learning_rate": 1.428773123969848e-06, "loss": 0.33702888, "memory(GiB)": 34.88, "step": 113110, "train_speed(iter/s)": 0.410952 }, { "acc": 0.93960609, "epoch": 3.062708147185444, "grad_norm": 4.337634086608887, "learning_rate": 1.4283816082422088e-06, "loss": 0.29535913, "memory(GiB)": 34.88, "step": 113115, "train_speed(iter/s)": 0.410953 }, { "acc": 0.94162931, "epoch": 3.0628435274686594, "grad_norm": 8.964167594909668, "learning_rate": 1.4279901372623746e-06, "loss": 0.37183366, "memory(GiB)": 34.88, "step": 113120, "train_speed(iter/s)": 0.410954 }, { "acc": 0.93188953, "epoch": 3.062978907751875, "grad_norm": 8.759726524353027, "learning_rate": 1.4275987110352452e-06, "loss": 0.34960437, "memory(GiB)": 34.88, "step": 113125, "train_speed(iter/s)": 0.410955 }, { "acc": 0.94929485, "epoch": 3.0631142880350906, "grad_norm": 5.282036304473877, "learning_rate": 1.4272073295657268e-06, "loss": 0.34493802, "memory(GiB)": 34.88, "step": 113130, "train_speed(iter/s)": 0.410956 }, { "acc": 0.94164314, "epoch": 3.063249668318306, "grad_norm": 5.7847747802734375, "learning_rate": 1.4268159928587174e-06, "loss": 0.32807879, "memory(GiB)": 34.88, "step": 113135, "train_speed(iter/s)": 0.410957 }, { "acc": 0.93660574, "epoch": 3.0633850486015217, "grad_norm": 7.4513421058654785, "learning_rate": 1.426424700919125e-06, "loss": 0.38549142, "memory(GiB)": 34.88, "step": 113140, "train_speed(iter/s)": 0.410958 }, { "acc": 0.93440742, "epoch": 3.063520428884737, "grad_norm": 10.012287139892578, "learning_rate": 1.4260334537518484e-06, "loss": 0.36613536, "memory(GiB)": 34.88, "step": 113145, "train_speed(iter/s)": 0.410959 }, { "acc": 0.94600048, "epoch": 3.063655809167953, "grad_norm": 15.408584594726562, "learning_rate": 1.4256422513617881e-06, "loss": 0.3544147, "memory(GiB)": 34.88, "step": 113150, "train_speed(iter/s)": 0.41096 }, { "acc": 0.94228592, "epoch": 3.0637911894511682, "grad_norm": 6.424625873565674, "learning_rate": 1.4252510937538478e-06, "loss": 0.30543008, "memory(GiB)": 34.88, "step": 113155, "train_speed(iter/s)": 0.410961 }, { "acc": 0.94416676, "epoch": 3.063926569734384, "grad_norm": 7.603592872619629, "learning_rate": 1.4248599809329231e-06, "loss": 0.30587826, "memory(GiB)": 34.88, "step": 113160, "train_speed(iter/s)": 0.410962 }, { "acc": 0.93523293, "epoch": 3.0640619500175994, "grad_norm": 6.943734169006348, "learning_rate": 1.424468912903918e-06, "loss": 0.36271186, "memory(GiB)": 34.88, "step": 113165, "train_speed(iter/s)": 0.410963 }, { "acc": 0.96222668, "epoch": 3.064197330300815, "grad_norm": 3.387989044189453, "learning_rate": 1.4240778896717255e-06, "loss": 0.24785457, "memory(GiB)": 34.88, "step": 113170, "train_speed(iter/s)": 0.410964 }, { "acc": 0.94453306, "epoch": 3.0643327105840306, "grad_norm": 2.912315845489502, "learning_rate": 1.4236869112412514e-06, "loss": 0.31266768, "memory(GiB)": 34.88, "step": 113175, "train_speed(iter/s)": 0.410965 }, { "acc": 0.93310814, "epoch": 3.064468090867246, "grad_norm": 12.601785659790039, "learning_rate": 1.4232959776173874e-06, "loss": 0.36990981, "memory(GiB)": 34.88, "step": 113180, "train_speed(iter/s)": 0.410965 }, { "acc": 0.94379396, "epoch": 3.0646034711504617, "grad_norm": 3.175420045852661, "learning_rate": 1.4229050888050337e-06, "loss": 0.37484498, "memory(GiB)": 34.88, "step": 113185, "train_speed(iter/s)": 0.410966 }, { "acc": 0.94980545, "epoch": 3.064738851433677, "grad_norm": 13.417220115661621, "learning_rate": 1.4225142448090873e-06, "loss": 0.32341807, "memory(GiB)": 34.88, "step": 113190, "train_speed(iter/s)": 0.410967 }, { "acc": 0.93579807, "epoch": 3.064874231716893, "grad_norm": 9.79881477355957, "learning_rate": 1.4221234456344416e-06, "loss": 0.31258154, "memory(GiB)": 34.88, "step": 113195, "train_speed(iter/s)": 0.410969 }, { "acc": 0.94090137, "epoch": 3.0650096120001082, "grad_norm": 4.611570358276367, "learning_rate": 1.4217326912859958e-06, "loss": 0.36456795, "memory(GiB)": 34.88, "step": 113200, "train_speed(iter/s)": 0.41097 }, { "acc": 0.94120846, "epoch": 3.065144992283324, "grad_norm": 5.868973731994629, "learning_rate": 1.4213419817686388e-06, "loss": 0.31612511, "memory(GiB)": 34.88, "step": 113205, "train_speed(iter/s)": 0.41097 }, { "acc": 0.94055042, "epoch": 3.0652803725665394, "grad_norm": 16.30512046813965, "learning_rate": 1.420951317087273e-06, "loss": 0.32260995, "memory(GiB)": 34.88, "step": 113210, "train_speed(iter/s)": 0.410972 }, { "acc": 0.94848995, "epoch": 3.0654157528497548, "grad_norm": 6.060846328735352, "learning_rate": 1.4205606972467865e-06, "loss": 0.2623508, "memory(GiB)": 34.88, "step": 113215, "train_speed(iter/s)": 0.410973 }, { "acc": 0.94043999, "epoch": 3.0655511331329706, "grad_norm": 5.811164855957031, "learning_rate": 1.4201701222520736e-06, "loss": 0.33635061, "memory(GiB)": 34.88, "step": 113220, "train_speed(iter/s)": 0.410973 }, { "acc": 0.93315582, "epoch": 3.065686513416186, "grad_norm": 11.069795608520508, "learning_rate": 1.4197795921080285e-06, "loss": 0.45924101, "memory(GiB)": 34.88, "step": 113225, "train_speed(iter/s)": 0.410974 }, { "acc": 0.9306448, "epoch": 3.0658218936994017, "grad_norm": 19.049158096313477, "learning_rate": 1.419389106819544e-06, "loss": 0.36835003, "memory(GiB)": 34.88, "step": 113230, "train_speed(iter/s)": 0.410975 }, { "acc": 0.93467035, "epoch": 3.065957273982617, "grad_norm": 5.224974632263184, "learning_rate": 1.4189986663915104e-06, "loss": 0.41560392, "memory(GiB)": 34.88, "step": 113235, "train_speed(iter/s)": 0.410976 }, { "acc": 0.94246292, "epoch": 3.066092654265833, "grad_norm": 10.43021297454834, "learning_rate": 1.4186082708288156e-06, "loss": 0.36870427, "memory(GiB)": 34.88, "step": 113240, "train_speed(iter/s)": 0.410977 }, { "acc": 0.94561653, "epoch": 3.0662280345490482, "grad_norm": 4.927795886993408, "learning_rate": 1.418217920136356e-06, "loss": 0.35521078, "memory(GiB)": 34.88, "step": 113245, "train_speed(iter/s)": 0.410978 }, { "acc": 0.94247265, "epoch": 3.0663634148322636, "grad_norm": 17.34464454650879, "learning_rate": 1.417827614319017e-06, "loss": 0.32232046, "memory(GiB)": 34.88, "step": 113250, "train_speed(iter/s)": 0.410979 }, { "acc": 0.9395812, "epoch": 3.0664987951154794, "grad_norm": 37.79024124145508, "learning_rate": 1.4174373533816898e-06, "loss": 0.38979464, "memory(GiB)": 34.88, "step": 113255, "train_speed(iter/s)": 0.41098 }, { "acc": 0.93934746, "epoch": 3.0666341753986948, "grad_norm": 3.5002686977386475, "learning_rate": 1.4170471373292632e-06, "loss": 0.30499468, "memory(GiB)": 34.88, "step": 113260, "train_speed(iter/s)": 0.410981 }, { "acc": 0.93788671, "epoch": 3.0667695556819106, "grad_norm": 6.213687419891357, "learning_rate": 1.416656966166626e-06, "loss": 0.37129443, "memory(GiB)": 34.88, "step": 113265, "train_speed(iter/s)": 0.410982 }, { "acc": 0.9318594, "epoch": 3.066904935965126, "grad_norm": 5.887427806854248, "learning_rate": 1.4162668398986642e-06, "loss": 0.37253027, "memory(GiB)": 34.88, "step": 113270, "train_speed(iter/s)": 0.410983 }, { "acc": 0.94043064, "epoch": 3.0670403162483417, "grad_norm": 5.586676597595215, "learning_rate": 1.4158767585302655e-06, "loss": 0.34216378, "memory(GiB)": 34.88, "step": 113275, "train_speed(iter/s)": 0.410984 }, { "acc": 0.94400635, "epoch": 3.067175696531557, "grad_norm": 5.8390212059021, "learning_rate": 1.415486722066318e-06, "loss": 0.33219445, "memory(GiB)": 34.88, "step": 113280, "train_speed(iter/s)": 0.410985 }, { "acc": 0.96435738, "epoch": 3.067311076814773, "grad_norm": 2.6580824851989746, "learning_rate": 1.415096730511705e-06, "loss": 0.24552004, "memory(GiB)": 34.88, "step": 113285, "train_speed(iter/s)": 0.410986 }, { "acc": 0.94390516, "epoch": 3.0674464570979882, "grad_norm": 16.655807495117188, "learning_rate": 1.4147067838713126e-06, "loss": 0.30783219, "memory(GiB)": 34.88, "step": 113290, "train_speed(iter/s)": 0.410987 }, { "acc": 0.93083267, "epoch": 3.0675818373812036, "grad_norm": 6.905264377593994, "learning_rate": 1.4143168821500275e-06, "loss": 0.38774438, "memory(GiB)": 34.88, "step": 113295, "train_speed(iter/s)": 0.410988 }, { "acc": 0.92171717, "epoch": 3.0677172176644194, "grad_norm": 10.86551570892334, "learning_rate": 1.4139270253527338e-06, "loss": 0.41926813, "memory(GiB)": 34.88, "step": 113300, "train_speed(iter/s)": 0.410989 }, { "acc": 0.94988127, "epoch": 3.0678525979476348, "grad_norm": 3.208080291748047, "learning_rate": 1.4135372134843124e-06, "loss": 0.31330321, "memory(GiB)": 34.88, "step": 113305, "train_speed(iter/s)": 0.41099 }, { "acc": 0.93991404, "epoch": 3.0679879782308506, "grad_norm": 8.23299503326416, "learning_rate": 1.4131474465496482e-06, "loss": 0.33340366, "memory(GiB)": 34.88, "step": 113310, "train_speed(iter/s)": 0.410991 }, { "acc": 0.93175497, "epoch": 3.068123358514066, "grad_norm": 13.117274284362793, "learning_rate": 1.412757724553626e-06, "loss": 0.43361626, "memory(GiB)": 34.88, "step": 113315, "train_speed(iter/s)": 0.410992 }, { "acc": 0.94480896, "epoch": 3.0682587387972817, "grad_norm": 8.199853897094727, "learning_rate": 1.4123680475011234e-06, "loss": 0.31168711, "memory(GiB)": 34.88, "step": 113320, "train_speed(iter/s)": 0.410993 }, { "acc": 0.93398781, "epoch": 3.068394119080497, "grad_norm": 4.019661903381348, "learning_rate": 1.4119784153970246e-06, "loss": 0.37584181, "memory(GiB)": 34.88, "step": 113325, "train_speed(iter/s)": 0.410994 }, { "acc": 0.94649887, "epoch": 3.068529499363713, "grad_norm": 7.941824913024902, "learning_rate": 1.4115888282462103e-06, "loss": 0.30729036, "memory(GiB)": 34.88, "step": 113330, "train_speed(iter/s)": 0.410995 }, { "acc": 0.94367313, "epoch": 3.0686648796469282, "grad_norm": 5.5062994956970215, "learning_rate": 1.4111992860535618e-06, "loss": 0.35748754, "memory(GiB)": 34.88, "step": 113335, "train_speed(iter/s)": 0.410996 }, { "acc": 0.93544197, "epoch": 3.0688002599301436, "grad_norm": 10.129673957824707, "learning_rate": 1.4108097888239566e-06, "loss": 0.41694989, "memory(GiB)": 34.88, "step": 113340, "train_speed(iter/s)": 0.410997 }, { "acc": 0.9298954, "epoch": 3.0689356402133594, "grad_norm": 7.108686447143555, "learning_rate": 1.4104203365622742e-06, "loss": 0.39641726, "memory(GiB)": 34.88, "step": 113345, "train_speed(iter/s)": 0.410998 }, { "acc": 0.94950838, "epoch": 3.0690710204965748, "grad_norm": 6.043685436248779, "learning_rate": 1.4100309292733946e-06, "loss": 0.26422305, "memory(GiB)": 34.88, "step": 113350, "train_speed(iter/s)": 0.410999 }, { "acc": 0.94255295, "epoch": 3.0692064007797906, "grad_norm": 5.431974411010742, "learning_rate": 1.409641566962197e-06, "loss": 0.29203763, "memory(GiB)": 34.88, "step": 113355, "train_speed(iter/s)": 0.411 }, { "acc": 0.94631414, "epoch": 3.069341781063006, "grad_norm": 9.157539367675781, "learning_rate": 1.409252249633556e-06, "loss": 0.33378048, "memory(GiB)": 34.88, "step": 113360, "train_speed(iter/s)": 0.411 }, { "acc": 0.95106983, "epoch": 3.0694771613462217, "grad_norm": 3.5923430919647217, "learning_rate": 1.4088629772923496e-06, "loss": 0.28038657, "memory(GiB)": 34.88, "step": 113365, "train_speed(iter/s)": 0.411002 }, { "acc": 0.94412737, "epoch": 3.069612541629437, "grad_norm": 11.409589767456055, "learning_rate": 1.408473749943456e-06, "loss": 0.28719182, "memory(GiB)": 34.88, "step": 113370, "train_speed(iter/s)": 0.411003 }, { "acc": 0.95352287, "epoch": 3.0697479219126524, "grad_norm": 6.890645503997803, "learning_rate": 1.408084567591748e-06, "loss": 0.2849278, "memory(GiB)": 34.88, "step": 113375, "train_speed(iter/s)": 0.411004 }, { "acc": 0.94229841, "epoch": 3.0698833021958682, "grad_norm": 3.4649581909179688, "learning_rate": 1.407695430242102e-06, "loss": 0.38072271, "memory(GiB)": 34.88, "step": 113380, "train_speed(iter/s)": 0.411005 }, { "acc": 0.95255375, "epoch": 3.0700186824790836, "grad_norm": 5.044070720672607, "learning_rate": 1.4073063378993936e-06, "loss": 0.27618446, "memory(GiB)": 34.88, "step": 113385, "train_speed(iter/s)": 0.411006 }, { "acc": 0.95758467, "epoch": 3.0701540627622994, "grad_norm": 3.58028244972229, "learning_rate": 1.4069172905684975e-06, "loss": 0.28217282, "memory(GiB)": 34.88, "step": 113390, "train_speed(iter/s)": 0.411007 }, { "acc": 0.93632364, "epoch": 3.0702894430455148, "grad_norm": 10.093711853027344, "learning_rate": 1.406528288254285e-06, "loss": 0.31787562, "memory(GiB)": 34.88, "step": 113395, "train_speed(iter/s)": 0.411008 }, { "acc": 0.94527912, "epoch": 3.0704248233287306, "grad_norm": 5.024101734161377, "learning_rate": 1.4061393309616296e-06, "loss": 0.28930078, "memory(GiB)": 34.88, "step": 113400, "train_speed(iter/s)": 0.411009 }, { "acc": 0.94059114, "epoch": 3.070560203611946, "grad_norm": 5.275765895843506, "learning_rate": 1.4057504186954074e-06, "loss": 0.40323086, "memory(GiB)": 34.88, "step": 113405, "train_speed(iter/s)": 0.41101 }, { "acc": 0.93217354, "epoch": 3.0706955838951613, "grad_norm": 6.4914870262146, "learning_rate": 1.4053615514604847e-06, "loss": 0.4147305, "memory(GiB)": 34.88, "step": 113410, "train_speed(iter/s)": 0.411011 }, { "acc": 0.94629593, "epoch": 3.070830964178377, "grad_norm": 7.0724778175354, "learning_rate": 1.4049727292617357e-06, "loss": 0.28156719, "memory(GiB)": 34.88, "step": 113415, "train_speed(iter/s)": 0.411012 }, { "acc": 0.93453674, "epoch": 3.0709663444615924, "grad_norm": 8.31938648223877, "learning_rate": 1.4045839521040314e-06, "loss": 0.3448688, "memory(GiB)": 34.88, "step": 113420, "train_speed(iter/s)": 0.411013 }, { "acc": 0.95081863, "epoch": 3.0711017247448082, "grad_norm": 7.101583003997803, "learning_rate": 1.404195219992243e-06, "loss": 0.24111102, "memory(GiB)": 34.88, "step": 113425, "train_speed(iter/s)": 0.411014 }, { "acc": 0.92100363, "epoch": 3.0712371050280236, "grad_norm": 8.288946151733398, "learning_rate": 1.403806532931237e-06, "loss": 0.424189, "memory(GiB)": 34.88, "step": 113430, "train_speed(iter/s)": 0.411015 }, { "acc": 0.95667534, "epoch": 3.0713724853112394, "grad_norm": 16.897605895996094, "learning_rate": 1.4034178909258844e-06, "loss": 0.2439213, "memory(GiB)": 34.88, "step": 113435, "train_speed(iter/s)": 0.411016 }, { "acc": 0.93735552, "epoch": 3.0715078655944548, "grad_norm": 9.904699325561523, "learning_rate": 1.403029293981055e-06, "loss": 0.31061983, "memory(GiB)": 34.88, "step": 113440, "train_speed(iter/s)": 0.411017 }, { "acc": 0.94636803, "epoch": 3.0716432458776706, "grad_norm": 2.0720622539520264, "learning_rate": 1.4026407421016134e-06, "loss": 0.35390863, "memory(GiB)": 34.88, "step": 113445, "train_speed(iter/s)": 0.411018 }, { "acc": 0.94265633, "epoch": 3.071778626160886, "grad_norm": 4.650178909301758, "learning_rate": 1.4022522352924281e-06, "loss": 0.35195601, "memory(GiB)": 34.88, "step": 113450, "train_speed(iter/s)": 0.411019 }, { "acc": 0.92053757, "epoch": 3.0719140064441013, "grad_norm": 10.461601257324219, "learning_rate": 1.401863773558368e-06, "loss": 0.45168862, "memory(GiB)": 34.88, "step": 113455, "train_speed(iter/s)": 0.41102 }, { "acc": 0.94425926, "epoch": 3.072049386727317, "grad_norm": 6.315578937530518, "learning_rate": 1.4014753569042987e-06, "loss": 0.37820687, "memory(GiB)": 34.88, "step": 113460, "train_speed(iter/s)": 0.411021 }, { "acc": 0.92563686, "epoch": 3.0721847670105324, "grad_norm": 9.274442672729492, "learning_rate": 1.4010869853350834e-06, "loss": 0.46113625, "memory(GiB)": 34.88, "step": 113465, "train_speed(iter/s)": 0.411022 }, { "acc": 0.93367109, "epoch": 3.0723201472937482, "grad_norm": 11.00695514678955, "learning_rate": 1.400698658855589e-06, "loss": 0.36592908, "memory(GiB)": 34.88, "step": 113470, "train_speed(iter/s)": 0.411023 }, { "acc": 0.94684238, "epoch": 3.0724555275769636, "grad_norm": 7.241222858428955, "learning_rate": 1.4003103774706806e-06, "loss": 0.33176725, "memory(GiB)": 34.88, "step": 113475, "train_speed(iter/s)": 0.411024 }, { "acc": 0.93928261, "epoch": 3.0725909078601794, "grad_norm": 6.094945430755615, "learning_rate": 1.399922141185223e-06, "loss": 0.39156005, "memory(GiB)": 34.88, "step": 113480, "train_speed(iter/s)": 0.411025 }, { "acc": 0.94407654, "epoch": 3.0727262881433948, "grad_norm": 4.2507758140563965, "learning_rate": 1.3995339500040764e-06, "loss": 0.36463428, "memory(GiB)": 34.88, "step": 113485, "train_speed(iter/s)": 0.411026 }, { "acc": 0.9308363, "epoch": 3.0728616684266106, "grad_norm": 10.91821575164795, "learning_rate": 1.3991458039321052e-06, "loss": 0.43743896, "memory(GiB)": 34.88, "step": 113490, "train_speed(iter/s)": 0.411026 }, { "acc": 0.93438835, "epoch": 3.072997048709826, "grad_norm": 9.33452320098877, "learning_rate": 1.3987577029741738e-06, "loss": 0.3447304, "memory(GiB)": 34.88, "step": 113495, "train_speed(iter/s)": 0.411027 }, { "acc": 0.94841194, "epoch": 3.0731324289930413, "grad_norm": 7.645750999450684, "learning_rate": 1.3983696471351406e-06, "loss": 0.27374763, "memory(GiB)": 34.88, "step": 113500, "train_speed(iter/s)": 0.411028 }, { "acc": 0.94256067, "epoch": 3.073267809276257, "grad_norm": 3.3034026622772217, "learning_rate": 1.3979816364198682e-06, "loss": 0.34369178, "memory(GiB)": 34.88, "step": 113505, "train_speed(iter/s)": 0.411029 }, { "acc": 0.94481125, "epoch": 3.0734031895594724, "grad_norm": 9.756095886230469, "learning_rate": 1.397593670833217e-06, "loss": 0.29744563, "memory(GiB)": 34.88, "step": 113510, "train_speed(iter/s)": 0.41103 }, { "acc": 0.95128202, "epoch": 3.0735385698426883, "grad_norm": 9.579144477844238, "learning_rate": 1.3972057503800491e-06, "loss": 0.29284458, "memory(GiB)": 34.88, "step": 113515, "train_speed(iter/s)": 0.411031 }, { "acc": 0.94325695, "epoch": 3.0736739501259036, "grad_norm": 7.096970558166504, "learning_rate": 1.3968178750652188e-06, "loss": 0.31268597, "memory(GiB)": 34.88, "step": 113520, "train_speed(iter/s)": 0.411032 }, { "acc": 0.94671812, "epoch": 3.0738093304091194, "grad_norm": 10.636995315551758, "learning_rate": 1.3964300448935918e-06, "loss": 0.3015619, "memory(GiB)": 34.88, "step": 113525, "train_speed(iter/s)": 0.411033 }, { "acc": 0.94213476, "epoch": 3.0739447106923348, "grad_norm": 3.599421977996826, "learning_rate": 1.396042259870023e-06, "loss": 0.28746662, "memory(GiB)": 34.88, "step": 113530, "train_speed(iter/s)": 0.411034 }, { "acc": 0.95614271, "epoch": 3.07408009097555, "grad_norm": 7.079782009124756, "learning_rate": 1.3956545199993681e-06, "loss": 0.26033804, "memory(GiB)": 34.88, "step": 113535, "train_speed(iter/s)": 0.411035 }, { "acc": 0.94321232, "epoch": 3.074215471258766, "grad_norm": 5.317623138427734, "learning_rate": 1.3952668252864867e-06, "loss": 0.28920987, "memory(GiB)": 34.88, "step": 113540, "train_speed(iter/s)": 0.411036 }, { "acc": 0.93814316, "epoch": 3.0743508515419813, "grad_norm": 12.923426628112793, "learning_rate": 1.394879175736235e-06, "loss": 0.4310473, "memory(GiB)": 34.88, "step": 113545, "train_speed(iter/s)": 0.411037 }, { "acc": 0.93729458, "epoch": 3.074486231825197, "grad_norm": 12.719635009765625, "learning_rate": 1.394491571353471e-06, "loss": 0.37870846, "memory(GiB)": 34.88, "step": 113550, "train_speed(iter/s)": 0.411038 }, { "acc": 0.94461622, "epoch": 3.0746216121084124, "grad_norm": 8.32583999633789, "learning_rate": 1.3941040121430447e-06, "loss": 0.35839658, "memory(GiB)": 34.88, "step": 113555, "train_speed(iter/s)": 0.411039 }, { "acc": 0.94824429, "epoch": 3.0747569923916283, "grad_norm": 4.8237690925598145, "learning_rate": 1.3937164981098183e-06, "loss": 0.29991822, "memory(GiB)": 34.88, "step": 113560, "train_speed(iter/s)": 0.41104 }, { "acc": 0.9397007, "epoch": 3.0748923726748436, "grad_norm": 10.982160568237305, "learning_rate": 1.3933290292586417e-06, "loss": 0.42761087, "memory(GiB)": 34.88, "step": 113565, "train_speed(iter/s)": 0.411041 }, { "acc": 0.94227829, "epoch": 3.075027752958059, "grad_norm": 6.516124248504639, "learning_rate": 1.3929416055943692e-06, "loss": 0.39604373, "memory(GiB)": 34.88, "step": 113570, "train_speed(iter/s)": 0.411042 }, { "acc": 0.9435524, "epoch": 3.0751631332412748, "grad_norm": 5.06416654586792, "learning_rate": 1.3925542271218543e-06, "loss": 0.35472963, "memory(GiB)": 34.88, "step": 113575, "train_speed(iter/s)": 0.411042 }, { "acc": 0.94948692, "epoch": 3.07529851352449, "grad_norm": 4.637569904327393, "learning_rate": 1.392166893845949e-06, "loss": 0.35213244, "memory(GiB)": 34.88, "step": 113580, "train_speed(iter/s)": 0.411043 }, { "acc": 0.93417091, "epoch": 3.075433893807706, "grad_norm": 12.232006072998047, "learning_rate": 1.3917796057715083e-06, "loss": 0.43431063, "memory(GiB)": 34.88, "step": 113585, "train_speed(iter/s)": 0.411044 }, { "acc": 0.93915071, "epoch": 3.0755692740909213, "grad_norm": 6.682344913482666, "learning_rate": 1.3913923629033782e-06, "loss": 0.43188295, "memory(GiB)": 34.88, "step": 113590, "train_speed(iter/s)": 0.411045 }, { "acc": 0.93208923, "epoch": 3.075704654374137, "grad_norm": 8.548681259155273, "learning_rate": 1.3910051652464168e-06, "loss": 0.42100115, "memory(GiB)": 34.88, "step": 113595, "train_speed(iter/s)": 0.411046 }, { "acc": 0.94371347, "epoch": 3.0758400346573525, "grad_norm": 3.815530776977539, "learning_rate": 1.3906180128054688e-06, "loss": 0.30753565, "memory(GiB)": 34.88, "step": 113600, "train_speed(iter/s)": 0.411047 }, { "acc": 0.94151697, "epoch": 3.0759754149405683, "grad_norm": 5.504889488220215, "learning_rate": 1.3902309055853879e-06, "loss": 0.37990453, "memory(GiB)": 34.88, "step": 113605, "train_speed(iter/s)": 0.411048 }, { "acc": 0.94275856, "epoch": 3.0761107952237836, "grad_norm": 3.1231625080108643, "learning_rate": 1.3898438435910197e-06, "loss": 0.3157856, "memory(GiB)": 34.88, "step": 113610, "train_speed(iter/s)": 0.411049 }, { "acc": 0.93146515, "epoch": 3.076246175506999, "grad_norm": 6.666194438934326, "learning_rate": 1.3894568268272152e-06, "loss": 0.39537601, "memory(GiB)": 34.88, "step": 113615, "train_speed(iter/s)": 0.41105 }, { "acc": 0.92642841, "epoch": 3.0763815557902148, "grad_norm": 11.986964225769043, "learning_rate": 1.3890698552988235e-06, "loss": 0.44808817, "memory(GiB)": 34.88, "step": 113620, "train_speed(iter/s)": 0.411051 }, { "acc": 0.94458752, "epoch": 3.07651693607343, "grad_norm": 11.975709915161133, "learning_rate": 1.3886829290106893e-06, "loss": 0.30998178, "memory(GiB)": 34.88, "step": 113625, "train_speed(iter/s)": 0.411052 }, { "acc": 0.93958559, "epoch": 3.076652316356646, "grad_norm": 4.159153938293457, "learning_rate": 1.3882960479676607e-06, "loss": 0.38994741, "memory(GiB)": 34.88, "step": 113630, "train_speed(iter/s)": 0.411053 }, { "acc": 0.93122902, "epoch": 3.0767876966398613, "grad_norm": 14.927471160888672, "learning_rate": 1.3879092121745854e-06, "loss": 0.43363886, "memory(GiB)": 34.88, "step": 113635, "train_speed(iter/s)": 0.411054 }, { "acc": 0.9273962, "epoch": 3.076923076923077, "grad_norm": 5.612542152404785, "learning_rate": 1.3875224216363095e-06, "loss": 0.39923294, "memory(GiB)": 34.88, "step": 113640, "train_speed(iter/s)": 0.411055 }, { "acc": 0.94509315, "epoch": 3.0770584572062925, "grad_norm": 7.710806846618652, "learning_rate": 1.3871356763576743e-06, "loss": 0.38766792, "memory(GiB)": 34.88, "step": 113645, "train_speed(iter/s)": 0.411056 }, { "acc": 0.94063435, "epoch": 3.0771938374895083, "grad_norm": 4.9670000076293945, "learning_rate": 1.38674897634353e-06, "loss": 0.37443066, "memory(GiB)": 34.88, "step": 113650, "train_speed(iter/s)": 0.411057 }, { "acc": 0.94444237, "epoch": 3.0773292177727236, "grad_norm": 3.4918150901794434, "learning_rate": 1.3863623215987193e-06, "loss": 0.27596595, "memory(GiB)": 34.88, "step": 113655, "train_speed(iter/s)": 0.411058 }, { "acc": 0.94691515, "epoch": 3.077464598055939, "grad_norm": 23.938425064086914, "learning_rate": 1.3859757121280825e-06, "loss": 0.31622555, "memory(GiB)": 34.88, "step": 113660, "train_speed(iter/s)": 0.411059 }, { "acc": 0.94896975, "epoch": 3.0775999783391548, "grad_norm": 20.729602813720703, "learning_rate": 1.3855891479364655e-06, "loss": 0.34853487, "memory(GiB)": 34.88, "step": 113665, "train_speed(iter/s)": 0.41106 }, { "acc": 0.92947664, "epoch": 3.07773535862237, "grad_norm": 8.646366119384766, "learning_rate": 1.38520262902871e-06, "loss": 0.40162301, "memory(GiB)": 34.88, "step": 113670, "train_speed(iter/s)": 0.411061 }, { "acc": 0.94128704, "epoch": 3.077870738905586, "grad_norm": 7.374509811401367, "learning_rate": 1.384816155409659e-06, "loss": 0.34383934, "memory(GiB)": 34.88, "step": 113675, "train_speed(iter/s)": 0.411062 }, { "acc": 0.93999176, "epoch": 3.0780061191888013, "grad_norm": 6.457287788391113, "learning_rate": 1.3844297270841504e-06, "loss": 0.36013224, "memory(GiB)": 34.88, "step": 113680, "train_speed(iter/s)": 0.411063 }, { "acc": 0.95451756, "epoch": 3.078141499472017, "grad_norm": 5.57932186126709, "learning_rate": 1.3840433440570306e-06, "loss": 0.25461571, "memory(GiB)": 34.88, "step": 113685, "train_speed(iter/s)": 0.411063 }, { "acc": 0.94080791, "epoch": 3.0782768797552325, "grad_norm": 14.004080772399902, "learning_rate": 1.3836570063331354e-06, "loss": 0.33009877, "memory(GiB)": 34.88, "step": 113690, "train_speed(iter/s)": 0.411064 }, { "acc": 0.94525814, "epoch": 3.078412260038448, "grad_norm": 18.36967658996582, "learning_rate": 1.3832707139173072e-06, "loss": 0.26778235, "memory(GiB)": 34.88, "step": 113695, "train_speed(iter/s)": 0.411065 }, { "acc": 0.94158144, "epoch": 3.0785476403216636, "grad_norm": 11.538782119750977, "learning_rate": 1.3828844668143823e-06, "loss": 0.33839242, "memory(GiB)": 34.88, "step": 113700, "train_speed(iter/s)": 0.411066 }, { "acc": 0.93887701, "epoch": 3.078683020604879, "grad_norm": 12.299508094787598, "learning_rate": 1.3824982650292002e-06, "loss": 0.34382267, "memory(GiB)": 34.88, "step": 113705, "train_speed(iter/s)": 0.411067 }, { "acc": 0.96383362, "epoch": 3.0788184008880948, "grad_norm": 8.402595520019531, "learning_rate": 1.3821121085666006e-06, "loss": 0.20045857, "memory(GiB)": 34.88, "step": 113710, "train_speed(iter/s)": 0.411068 }, { "acc": 0.94294071, "epoch": 3.07895378117131, "grad_norm": 15.691164016723633, "learning_rate": 1.3817259974314168e-06, "loss": 0.28842735, "memory(GiB)": 34.88, "step": 113715, "train_speed(iter/s)": 0.411069 }, { "acc": 0.94416161, "epoch": 3.079089161454526, "grad_norm": 3.5640311241149902, "learning_rate": 1.3813399316284912e-06, "loss": 0.3283433, "memory(GiB)": 34.88, "step": 113720, "train_speed(iter/s)": 0.41107 }, { "acc": 0.93412409, "epoch": 3.0792245417377413, "grad_norm": 10.77487850189209, "learning_rate": 1.3809539111626553e-06, "loss": 0.41206074, "memory(GiB)": 34.88, "step": 113725, "train_speed(iter/s)": 0.411071 }, { "acc": 0.94647303, "epoch": 3.0793599220209567, "grad_norm": 14.574946403503418, "learning_rate": 1.3805679360387487e-06, "loss": 0.33223214, "memory(GiB)": 34.88, "step": 113730, "train_speed(iter/s)": 0.411072 }, { "acc": 0.93580952, "epoch": 3.0794953023041725, "grad_norm": 5.362003803253174, "learning_rate": 1.3801820062616022e-06, "loss": 0.38900294, "memory(GiB)": 34.88, "step": 113735, "train_speed(iter/s)": 0.411073 }, { "acc": 0.93964367, "epoch": 3.079630682587388, "grad_norm": 8.546565055847168, "learning_rate": 1.3797961218360526e-06, "loss": 0.36152756, "memory(GiB)": 34.88, "step": 113740, "train_speed(iter/s)": 0.411074 }, { "acc": 0.94766502, "epoch": 3.0797660628706036, "grad_norm": 4.899301052093506, "learning_rate": 1.3794102827669353e-06, "loss": 0.30090723, "memory(GiB)": 34.88, "step": 113745, "train_speed(iter/s)": 0.411075 }, { "acc": 0.93401737, "epoch": 3.079901443153819, "grad_norm": 6.786352634429932, "learning_rate": 1.379024489059079e-06, "loss": 0.34268696, "memory(GiB)": 34.88, "step": 113750, "train_speed(iter/s)": 0.411076 }, { "acc": 0.93633022, "epoch": 3.0800368234370348, "grad_norm": 8.087064743041992, "learning_rate": 1.3786387407173236e-06, "loss": 0.34055741, "memory(GiB)": 34.88, "step": 113755, "train_speed(iter/s)": 0.411077 }, { "acc": 0.93920193, "epoch": 3.08017220372025, "grad_norm": 5.479073524475098, "learning_rate": 1.3782530377464948e-06, "loss": 0.31034868, "memory(GiB)": 34.88, "step": 113760, "train_speed(iter/s)": 0.411078 }, { "acc": 0.94869013, "epoch": 3.080307584003466, "grad_norm": 6.503515720367432, "learning_rate": 1.3778673801514285e-06, "loss": 0.28951049, "memory(GiB)": 34.88, "step": 113765, "train_speed(iter/s)": 0.411079 }, { "acc": 0.93771152, "epoch": 3.0804429642866813, "grad_norm": 3.861327648162842, "learning_rate": 1.3774817679369514e-06, "loss": 0.31979289, "memory(GiB)": 34.88, "step": 113770, "train_speed(iter/s)": 0.41108 }, { "acc": 0.9529747, "epoch": 3.0805783445698967, "grad_norm": 5.149493217468262, "learning_rate": 1.3770962011079004e-06, "loss": 0.27948451, "memory(GiB)": 34.88, "step": 113775, "train_speed(iter/s)": 0.411081 }, { "acc": 0.91930828, "epoch": 3.0807137248531125, "grad_norm": 21.617721557617188, "learning_rate": 1.3767106796691016e-06, "loss": 0.55255661, "memory(GiB)": 34.88, "step": 113780, "train_speed(iter/s)": 0.411082 }, { "acc": 0.93599834, "epoch": 3.080849105136328, "grad_norm": 6.2348313331604, "learning_rate": 1.376325203625382e-06, "loss": 0.38916061, "memory(GiB)": 34.88, "step": 113785, "train_speed(iter/s)": 0.411082 }, { "acc": 0.945963, "epoch": 3.0809844854195436, "grad_norm": 9.37933349609375, "learning_rate": 1.3759397729815758e-06, "loss": 0.37634108, "memory(GiB)": 34.88, "step": 113790, "train_speed(iter/s)": 0.411083 }, { "acc": 0.94070511, "epoch": 3.081119865702759, "grad_norm": 11.7332763671875, "learning_rate": 1.3755543877425068e-06, "loss": 0.36186948, "memory(GiB)": 34.88, "step": 113795, "train_speed(iter/s)": 0.411084 }, { "acc": 0.9481245, "epoch": 3.0812552459859748, "grad_norm": 4.929451942443848, "learning_rate": 1.3751690479130067e-06, "loss": 0.31156249, "memory(GiB)": 34.88, "step": 113800, "train_speed(iter/s)": 0.411085 }, { "acc": 0.95709324, "epoch": 3.08139062626919, "grad_norm": 3.244035243988037, "learning_rate": 1.3747837534978978e-06, "loss": 0.29402757, "memory(GiB)": 34.88, "step": 113805, "train_speed(iter/s)": 0.411086 }, { "acc": 0.95055313, "epoch": 3.0815260065524055, "grad_norm": 7.070907115936279, "learning_rate": 1.3743985045020117e-06, "loss": 0.26550417, "memory(GiB)": 34.88, "step": 113810, "train_speed(iter/s)": 0.411087 }, { "acc": 0.92936659, "epoch": 3.0816613868356213, "grad_norm": 10.74158763885498, "learning_rate": 1.374013300930171e-06, "loss": 0.40448303, "memory(GiB)": 34.88, "step": 113815, "train_speed(iter/s)": 0.411088 }, { "acc": 0.95642986, "epoch": 3.0817967671188367, "grad_norm": 6.149036407470703, "learning_rate": 1.373628142787202e-06, "loss": 0.24315276, "memory(GiB)": 34.88, "step": 113820, "train_speed(iter/s)": 0.411088 }, { "acc": 0.93619957, "epoch": 3.0819321474020525, "grad_norm": 2.9827075004577637, "learning_rate": 1.373243030077932e-06, "loss": 0.30049217, "memory(GiB)": 34.88, "step": 113825, "train_speed(iter/s)": 0.41109 }, { "acc": 0.95321112, "epoch": 3.082067527685268, "grad_norm": 12.689178466796875, "learning_rate": 1.372857962807181e-06, "loss": 0.30646992, "memory(GiB)": 34.88, "step": 113830, "train_speed(iter/s)": 0.41109 }, { "acc": 0.95167522, "epoch": 3.0822029079684836, "grad_norm": 3.4780704975128174, "learning_rate": 1.372472940979776e-06, "loss": 0.30155079, "memory(GiB)": 34.88, "step": 113835, "train_speed(iter/s)": 0.411091 }, { "acc": 0.95533352, "epoch": 3.082338288251699, "grad_norm": 6.165801048278809, "learning_rate": 1.3720879646005363e-06, "loss": 0.2651732, "memory(GiB)": 34.88, "step": 113840, "train_speed(iter/s)": 0.411092 }, { "acc": 0.9535161, "epoch": 3.082473668534915, "grad_norm": 9.013012886047363, "learning_rate": 1.3717030336742903e-06, "loss": 0.27269254, "memory(GiB)": 34.88, "step": 113845, "train_speed(iter/s)": 0.411093 }, { "acc": 0.94369068, "epoch": 3.08260904881813, "grad_norm": 5.794149875640869, "learning_rate": 1.371318148205856e-06, "loss": 0.28963094, "memory(GiB)": 34.88, "step": 113850, "train_speed(iter/s)": 0.411094 }, { "acc": 0.9400774, "epoch": 3.0827444291013455, "grad_norm": 6.965818405151367, "learning_rate": 1.3709333082000548e-06, "loss": 0.31756523, "memory(GiB)": 34.88, "step": 113855, "train_speed(iter/s)": 0.411095 }, { "acc": 0.92845049, "epoch": 3.0828798093845613, "grad_norm": 4.925443172454834, "learning_rate": 1.3705485136617104e-06, "loss": 0.46100597, "memory(GiB)": 34.88, "step": 113860, "train_speed(iter/s)": 0.411096 }, { "acc": 0.94733734, "epoch": 3.0830151896677767, "grad_norm": 4.1726179122924805, "learning_rate": 1.3701637645956396e-06, "loss": 0.26846504, "memory(GiB)": 34.88, "step": 113865, "train_speed(iter/s)": 0.411097 }, { "acc": 0.95015678, "epoch": 3.0831505699509925, "grad_norm": 4.233050346374512, "learning_rate": 1.369779061006665e-06, "loss": 0.26970897, "memory(GiB)": 34.88, "step": 113870, "train_speed(iter/s)": 0.411097 }, { "acc": 0.95098648, "epoch": 3.083285950234208, "grad_norm": 2.5379767417907715, "learning_rate": 1.369394402899602e-06, "loss": 0.32514529, "memory(GiB)": 34.88, "step": 113875, "train_speed(iter/s)": 0.411098 }, { "acc": 0.94893036, "epoch": 3.0834213305174236, "grad_norm": 3.8030197620391846, "learning_rate": 1.3690097902792743e-06, "loss": 0.28041198, "memory(GiB)": 34.88, "step": 113880, "train_speed(iter/s)": 0.411099 }, { "acc": 0.93003302, "epoch": 3.083556710800639, "grad_norm": 13.1370849609375, "learning_rate": 1.368625223150496e-06, "loss": 0.37161877, "memory(GiB)": 34.88, "step": 113885, "train_speed(iter/s)": 0.4111 }, { "acc": 0.94774952, "epoch": 3.0836920910838543, "grad_norm": 8.714722633361816, "learning_rate": 1.3682407015180856e-06, "loss": 0.30338736, "memory(GiB)": 34.88, "step": 113890, "train_speed(iter/s)": 0.411101 }, { "acc": 0.94899979, "epoch": 3.08382747136707, "grad_norm": 6.748578071594238, "learning_rate": 1.3678562253868605e-06, "loss": 0.27756364, "memory(GiB)": 34.88, "step": 113895, "train_speed(iter/s)": 0.411102 }, { "acc": 0.93980217, "epoch": 3.0839628516502855, "grad_norm": 12.46251106262207, "learning_rate": 1.3674717947616387e-06, "loss": 0.38173335, "memory(GiB)": 34.88, "step": 113900, "train_speed(iter/s)": 0.411103 }, { "acc": 0.93549538, "epoch": 3.0840982319335013, "grad_norm": 5.529202938079834, "learning_rate": 1.3670874096472337e-06, "loss": 0.36505535, "memory(GiB)": 34.88, "step": 113905, "train_speed(iter/s)": 0.411104 }, { "acc": 0.95483665, "epoch": 3.0842336122167167, "grad_norm": 2.2507238388061523, "learning_rate": 1.3667030700484582e-06, "loss": 0.24175754, "memory(GiB)": 34.88, "step": 113910, "train_speed(iter/s)": 0.411105 }, { "acc": 0.95183334, "epoch": 3.0843689924999325, "grad_norm": 5.004607200622559, "learning_rate": 1.3663187759701322e-06, "loss": 0.29073958, "memory(GiB)": 34.88, "step": 113915, "train_speed(iter/s)": 0.411106 }, { "acc": 0.95146017, "epoch": 3.084504372783148, "grad_norm": 3.1201534271240234, "learning_rate": 1.365934527417066e-06, "loss": 0.2636385, "memory(GiB)": 34.88, "step": 113920, "train_speed(iter/s)": 0.411107 }, { "acc": 0.91695557, "epoch": 3.0846397530663636, "grad_norm": 13.554709434509277, "learning_rate": 1.365550324394074e-06, "loss": 0.48304863, "memory(GiB)": 34.88, "step": 113925, "train_speed(iter/s)": 0.411108 }, { "acc": 0.93724747, "epoch": 3.084775133349579, "grad_norm": 5.889186382293701, "learning_rate": 1.3651661669059699e-06, "loss": 0.43328915, "memory(GiB)": 34.88, "step": 113930, "train_speed(iter/s)": 0.411109 }, { "acc": 0.93116999, "epoch": 3.0849105136327943, "grad_norm": 7.37724494934082, "learning_rate": 1.3647820549575667e-06, "loss": 0.38853297, "memory(GiB)": 34.88, "step": 113935, "train_speed(iter/s)": 0.411109 }, { "acc": 0.94353628, "epoch": 3.08504589391601, "grad_norm": 7.394562721252441, "learning_rate": 1.3643979885536732e-06, "loss": 0.33905945, "memory(GiB)": 34.88, "step": 113940, "train_speed(iter/s)": 0.41111 }, { "acc": 0.94524364, "epoch": 3.0851812741992255, "grad_norm": 8.909218788146973, "learning_rate": 1.364013967699103e-06, "loss": 0.27316129, "memory(GiB)": 34.88, "step": 113945, "train_speed(iter/s)": 0.411111 }, { "acc": 0.95355883, "epoch": 3.0853166544824413, "grad_norm": 9.133207321166992, "learning_rate": 1.3636299923986668e-06, "loss": 0.23146133, "memory(GiB)": 34.88, "step": 113950, "train_speed(iter/s)": 0.411112 }, { "acc": 0.94587002, "epoch": 3.0854520347656567, "grad_norm": 7.109808921813965, "learning_rate": 1.3632460626571728e-06, "loss": 0.30351253, "memory(GiB)": 34.88, "step": 113955, "train_speed(iter/s)": 0.411112 }, { "acc": 0.9329855, "epoch": 3.0855874150488725, "grad_norm": 6.654788970947266, "learning_rate": 1.3628621784794314e-06, "loss": 0.38930957, "memory(GiB)": 34.88, "step": 113960, "train_speed(iter/s)": 0.411113 }, { "acc": 0.94159985, "epoch": 3.085722795332088, "grad_norm": 22.838726043701172, "learning_rate": 1.3624783398702516e-06, "loss": 0.40320845, "memory(GiB)": 34.88, "step": 113965, "train_speed(iter/s)": 0.411114 }, { "acc": 0.9371027, "epoch": 3.085858175615303, "grad_norm": 2.9068405628204346, "learning_rate": 1.3620945468344428e-06, "loss": 0.36055379, "memory(GiB)": 34.88, "step": 113970, "train_speed(iter/s)": 0.411115 }, { "acc": 0.94960184, "epoch": 3.085993555898519, "grad_norm": 5.702120780944824, "learning_rate": 1.3617107993768112e-06, "loss": 0.27391608, "memory(GiB)": 34.88, "step": 113975, "train_speed(iter/s)": 0.411116 }, { "acc": 0.94335546, "epoch": 3.0861289361817343, "grad_norm": 5.718430995941162, "learning_rate": 1.3613270975021645e-06, "loss": 0.34204607, "memory(GiB)": 34.88, "step": 113980, "train_speed(iter/s)": 0.411117 }, { "acc": 0.93942127, "epoch": 3.08626431646495, "grad_norm": 9.148859024047852, "learning_rate": 1.3609434412153102e-06, "loss": 0.37301686, "memory(GiB)": 34.88, "step": 113985, "train_speed(iter/s)": 0.411118 }, { "acc": 0.94477329, "epoch": 3.0863996967481655, "grad_norm": 4.949523448944092, "learning_rate": 1.360559830521052e-06, "loss": 0.3403687, "memory(GiB)": 34.88, "step": 113990, "train_speed(iter/s)": 0.411119 }, { "acc": 0.92637205, "epoch": 3.0865350770313813, "grad_norm": 11.726156234741211, "learning_rate": 1.3601762654241993e-06, "loss": 0.43138599, "memory(GiB)": 34.88, "step": 113995, "train_speed(iter/s)": 0.41112 }, { "acc": 0.95829659, "epoch": 3.0866704573145967, "grad_norm": 10.640177726745605, "learning_rate": 1.359792745929551e-06, "loss": 0.22403302, "memory(GiB)": 34.88, "step": 114000, "train_speed(iter/s)": 0.411121 }, { "acc": 0.93511791, "epoch": 3.0868058375978125, "grad_norm": 9.948322296142578, "learning_rate": 1.359409272041918e-06, "loss": 0.43778348, "memory(GiB)": 34.88, "step": 114005, "train_speed(iter/s)": 0.411122 }, { "acc": 0.95184603, "epoch": 3.086941217881028, "grad_norm": 7.05704402923584, "learning_rate": 1.3590258437660997e-06, "loss": 0.22522933, "memory(GiB)": 34.88, "step": 114010, "train_speed(iter/s)": 0.411123 }, { "acc": 0.94876652, "epoch": 3.087076598164243, "grad_norm": 6.299100875854492, "learning_rate": 1.3586424611069004e-06, "loss": 0.23776579, "memory(GiB)": 34.88, "step": 114015, "train_speed(iter/s)": 0.411123 }, { "acc": 0.93426542, "epoch": 3.087211978447459, "grad_norm": 6.37408971786499, "learning_rate": 1.3582591240691233e-06, "loss": 0.39685585, "memory(GiB)": 34.88, "step": 114020, "train_speed(iter/s)": 0.411125 }, { "acc": 0.93830662, "epoch": 3.0873473587306743, "grad_norm": 9.822979927062988, "learning_rate": 1.3578758326575717e-06, "loss": 0.41711936, "memory(GiB)": 34.88, "step": 114025, "train_speed(iter/s)": 0.411125 }, { "acc": 0.95981617, "epoch": 3.08748273901389, "grad_norm": 7.16314172744751, "learning_rate": 1.3574925868770454e-06, "loss": 0.24927049, "memory(GiB)": 34.88, "step": 114030, "train_speed(iter/s)": 0.411126 }, { "acc": 0.94702244, "epoch": 3.0876181192971055, "grad_norm": 5.574942111968994, "learning_rate": 1.3571093867323428e-06, "loss": 0.28289685, "memory(GiB)": 34.88, "step": 114035, "train_speed(iter/s)": 0.411127 }, { "acc": 0.93738289, "epoch": 3.0877534995803213, "grad_norm": 11.208324432373047, "learning_rate": 1.3567262322282696e-06, "loss": 0.39058049, "memory(GiB)": 34.88, "step": 114040, "train_speed(iter/s)": 0.411129 }, { "acc": 0.9413969, "epoch": 3.0878888798635367, "grad_norm": 9.750370025634766, "learning_rate": 1.3563431233696213e-06, "loss": 0.4359777, "memory(GiB)": 34.88, "step": 114045, "train_speed(iter/s)": 0.411129 }, { "acc": 0.92144108, "epoch": 3.088024260146752, "grad_norm": 14.219579696655273, "learning_rate": 1.3559600601611986e-06, "loss": 0.50274067, "memory(GiB)": 34.88, "step": 114050, "train_speed(iter/s)": 0.41113 }, { "acc": 0.93612499, "epoch": 3.088159640429968, "grad_norm": 15.92323112487793, "learning_rate": 1.3555770426077999e-06, "loss": 0.40927191, "memory(GiB)": 34.88, "step": 114055, "train_speed(iter/s)": 0.411131 }, { "acc": 0.93621397, "epoch": 3.088295020713183, "grad_norm": 5.388108253479004, "learning_rate": 1.3551940707142256e-06, "loss": 0.37023041, "memory(GiB)": 34.88, "step": 114060, "train_speed(iter/s)": 0.411132 }, { "acc": 0.94432278, "epoch": 3.088430400996399, "grad_norm": 7.683138847351074, "learning_rate": 1.3548111444852691e-06, "loss": 0.3443079, "memory(GiB)": 34.88, "step": 114065, "train_speed(iter/s)": 0.411133 }, { "acc": 0.95445948, "epoch": 3.0885657812796143, "grad_norm": 7.551278591156006, "learning_rate": 1.3544282639257295e-06, "loss": 0.28727088, "memory(GiB)": 34.88, "step": 114070, "train_speed(iter/s)": 0.411134 }, { "acc": 0.94219532, "epoch": 3.08870116156283, "grad_norm": 12.033926963806152, "learning_rate": 1.3540454290404043e-06, "loss": 0.40913296, "memory(GiB)": 34.88, "step": 114075, "train_speed(iter/s)": 0.411135 }, { "acc": 0.93846102, "epoch": 3.0888365418460455, "grad_norm": 5.054342746734619, "learning_rate": 1.3536626398340866e-06, "loss": 0.36478031, "memory(GiB)": 34.88, "step": 114080, "train_speed(iter/s)": 0.411136 }, { "acc": 0.94523525, "epoch": 3.0889719221292613, "grad_norm": 7.400118350982666, "learning_rate": 1.3532798963115725e-06, "loss": 0.36121638, "memory(GiB)": 34.88, "step": 114085, "train_speed(iter/s)": 0.411137 }, { "acc": 0.93769503, "epoch": 3.0891073024124767, "grad_norm": 8.620585441589355, "learning_rate": 1.3528971984776575e-06, "loss": 0.41102638, "memory(GiB)": 34.88, "step": 114090, "train_speed(iter/s)": 0.411138 }, { "acc": 0.9319418, "epoch": 3.089242682695692, "grad_norm": 6.100100994110107, "learning_rate": 1.352514546337136e-06, "loss": 0.33284826, "memory(GiB)": 34.88, "step": 114095, "train_speed(iter/s)": 0.411139 }, { "acc": 0.93020687, "epoch": 3.089378062978908, "grad_norm": 11.38870906829834, "learning_rate": 1.3521319398947995e-06, "loss": 0.4454967, "memory(GiB)": 34.88, "step": 114100, "train_speed(iter/s)": 0.41114 }, { "acc": 0.93606024, "epoch": 3.089513443262123, "grad_norm": 5.23445463180542, "learning_rate": 1.3517493791554424e-06, "loss": 0.34122539, "memory(GiB)": 34.88, "step": 114105, "train_speed(iter/s)": 0.411141 }, { "acc": 0.93517952, "epoch": 3.089648823545339, "grad_norm": 14.287769317626953, "learning_rate": 1.351366864123857e-06, "loss": 0.38598392, "memory(GiB)": 34.88, "step": 114110, "train_speed(iter/s)": 0.411142 }, { "acc": 0.94179325, "epoch": 3.0897842038285543, "grad_norm": 4.827113628387451, "learning_rate": 1.350984394804834e-06, "loss": 0.32123218, "memory(GiB)": 34.88, "step": 114115, "train_speed(iter/s)": 0.411143 }, { "acc": 0.93679924, "epoch": 3.08991958411177, "grad_norm": 4.225745677947998, "learning_rate": 1.3506019712031652e-06, "loss": 0.32498717, "memory(GiB)": 34.88, "step": 114120, "train_speed(iter/s)": 0.411144 }, { "acc": 0.91820812, "epoch": 3.0900549643949855, "grad_norm": 11.853490829467773, "learning_rate": 1.350219593323641e-06, "loss": 0.42813692, "memory(GiB)": 34.88, "step": 114125, "train_speed(iter/s)": 0.411145 }, { "acc": 0.9441061, "epoch": 3.090190344678201, "grad_norm": 17.14686393737793, "learning_rate": 1.3498372611710534e-06, "loss": 0.32857339, "memory(GiB)": 34.88, "step": 114130, "train_speed(iter/s)": 0.411146 }, { "acc": 0.93987417, "epoch": 3.0903257249614167, "grad_norm": 5.586422920227051, "learning_rate": 1.3494549747501884e-06, "loss": 0.32337284, "memory(GiB)": 34.88, "step": 114135, "train_speed(iter/s)": 0.411147 }, { "acc": 0.93865271, "epoch": 3.090461105244632, "grad_norm": 9.072029113769531, "learning_rate": 1.3490727340658364e-06, "loss": 0.39108176, "memory(GiB)": 34.88, "step": 114140, "train_speed(iter/s)": 0.411148 }, { "acc": 0.9406682, "epoch": 3.090596485527848, "grad_norm": 6.502862930297852, "learning_rate": 1.3486905391227865e-06, "loss": 0.37704408, "memory(GiB)": 34.88, "step": 114145, "train_speed(iter/s)": 0.411148 }, { "acc": 0.93019848, "epoch": 3.090731865811063, "grad_norm": 11.88435173034668, "learning_rate": 1.3483083899258268e-06, "loss": 0.3640723, "memory(GiB)": 34.88, "step": 114150, "train_speed(iter/s)": 0.411149 }, { "acc": 0.92883167, "epoch": 3.090867246094279, "grad_norm": 5.554751873016357, "learning_rate": 1.3479262864797426e-06, "loss": 0.43434854, "memory(GiB)": 34.88, "step": 114155, "train_speed(iter/s)": 0.41115 }, { "acc": 0.95209732, "epoch": 3.0910026263774943, "grad_norm": 3.9244110584259033, "learning_rate": 1.347544228789321e-06, "loss": 0.31065364, "memory(GiB)": 34.88, "step": 114160, "train_speed(iter/s)": 0.411151 }, { "acc": 0.936063, "epoch": 3.09113800666071, "grad_norm": 3.9333088397979736, "learning_rate": 1.34716221685935e-06, "loss": 0.35362768, "memory(GiB)": 34.88, "step": 114165, "train_speed(iter/s)": 0.411152 }, { "acc": 0.94802094, "epoch": 3.0912733869439255, "grad_norm": 6.20276403427124, "learning_rate": 1.3467802506946122e-06, "loss": 0.34418683, "memory(GiB)": 34.88, "step": 114170, "train_speed(iter/s)": 0.411153 }, { "acc": 0.93526497, "epoch": 3.091408767227141, "grad_norm": 19.862003326416016, "learning_rate": 1.3463983302998941e-06, "loss": 0.40061474, "memory(GiB)": 34.88, "step": 114175, "train_speed(iter/s)": 0.411154 }, { "acc": 0.94713497, "epoch": 3.0915441475103567, "grad_norm": 5.201719760894775, "learning_rate": 1.3460164556799798e-06, "loss": 0.25227365, "memory(GiB)": 34.88, "step": 114180, "train_speed(iter/s)": 0.411155 }, { "acc": 0.93835468, "epoch": 3.091679527793572, "grad_norm": 5.707187175750732, "learning_rate": 1.3456346268396537e-06, "loss": 0.37399182, "memory(GiB)": 34.88, "step": 114185, "train_speed(iter/s)": 0.411156 }, { "acc": 0.94474001, "epoch": 3.091814908076788, "grad_norm": 8.909757614135742, "learning_rate": 1.3452528437836973e-06, "loss": 0.33023679, "memory(GiB)": 34.88, "step": 114190, "train_speed(iter/s)": 0.411157 }, { "acc": 0.93918839, "epoch": 3.091950288360003, "grad_norm": 7.1171770095825195, "learning_rate": 1.3448711065168936e-06, "loss": 0.39134376, "memory(GiB)": 34.88, "step": 114195, "train_speed(iter/s)": 0.411158 }, { "acc": 0.9496088, "epoch": 3.092085668643219, "grad_norm": 7.170846939086914, "learning_rate": 1.3444894150440273e-06, "loss": 0.25881624, "memory(GiB)": 34.88, "step": 114200, "train_speed(iter/s)": 0.411159 }, { "acc": 0.95005131, "epoch": 3.0922210489264343, "grad_norm": 4.58812141418457, "learning_rate": 1.3441077693698755e-06, "loss": 0.25718162, "memory(GiB)": 34.88, "step": 114205, "train_speed(iter/s)": 0.41116 }, { "acc": 0.95917263, "epoch": 3.0923564292096497, "grad_norm": 5.331755638122559, "learning_rate": 1.3437261694992214e-06, "loss": 0.20501471, "memory(GiB)": 34.88, "step": 114210, "train_speed(iter/s)": 0.411161 }, { "acc": 0.95251446, "epoch": 3.0924918094928655, "grad_norm": 6.144821643829346, "learning_rate": 1.3433446154368455e-06, "loss": 0.30942347, "memory(GiB)": 34.88, "step": 114215, "train_speed(iter/s)": 0.411162 }, { "acc": 0.94960251, "epoch": 3.092627189776081, "grad_norm": 7.164431095123291, "learning_rate": 1.3429631071875276e-06, "loss": 0.2794311, "memory(GiB)": 34.88, "step": 114220, "train_speed(iter/s)": 0.411162 }, { "acc": 0.93048706, "epoch": 3.0927625700592967, "grad_norm": 6.349354267120361, "learning_rate": 1.3425816447560453e-06, "loss": 0.43990788, "memory(GiB)": 34.88, "step": 114225, "train_speed(iter/s)": 0.411163 }, { "acc": 0.9424181, "epoch": 3.092897950342512, "grad_norm": 7.318518161773682, "learning_rate": 1.3422002281471786e-06, "loss": 0.32059736, "memory(GiB)": 34.88, "step": 114230, "train_speed(iter/s)": 0.411164 }, { "acc": 0.95099974, "epoch": 3.093033330625728, "grad_norm": 5.650075435638428, "learning_rate": 1.341818857365705e-06, "loss": 0.29936039, "memory(GiB)": 34.88, "step": 114235, "train_speed(iter/s)": 0.411165 }, { "acc": 0.94011126, "epoch": 3.093168710908943, "grad_norm": 10.069748878479004, "learning_rate": 1.341437532416403e-06, "loss": 0.34348292, "memory(GiB)": 34.88, "step": 114240, "train_speed(iter/s)": 0.411166 }, { "acc": 0.9448782, "epoch": 3.093304091192159, "grad_norm": 10.223001480102539, "learning_rate": 1.3410562533040471e-06, "loss": 0.31433427, "memory(GiB)": 34.88, "step": 114245, "train_speed(iter/s)": 0.411168 }, { "acc": 0.95269213, "epoch": 3.0934394714753743, "grad_norm": 5.187868595123291, "learning_rate": 1.3406750200334152e-06, "loss": 0.30134141, "memory(GiB)": 34.88, "step": 114250, "train_speed(iter/s)": 0.411169 }, { "acc": 0.93795052, "epoch": 3.0935748517585897, "grad_norm": 6.979177474975586, "learning_rate": 1.3402938326092841e-06, "loss": 0.34902995, "memory(GiB)": 34.88, "step": 114255, "train_speed(iter/s)": 0.41117 }, { "acc": 0.94166565, "epoch": 3.0937102320418055, "grad_norm": 28.07522964477539, "learning_rate": 1.3399126910364254e-06, "loss": 0.35945253, "memory(GiB)": 34.88, "step": 114260, "train_speed(iter/s)": 0.411171 }, { "acc": 0.91559772, "epoch": 3.093845612325021, "grad_norm": 11.801085472106934, "learning_rate": 1.3395315953196167e-06, "loss": 0.50027847, "memory(GiB)": 34.88, "step": 114265, "train_speed(iter/s)": 0.411172 }, { "acc": 0.94774599, "epoch": 3.0939809926082367, "grad_norm": 5.859585762023926, "learning_rate": 1.3391505454636308e-06, "loss": 0.27922995, "memory(GiB)": 34.88, "step": 114270, "train_speed(iter/s)": 0.411173 }, { "acc": 0.94264755, "epoch": 3.094116372891452, "grad_norm": 5.3493266105651855, "learning_rate": 1.3387695414732432e-06, "loss": 0.34130998, "memory(GiB)": 34.88, "step": 114275, "train_speed(iter/s)": 0.411173 }, { "acc": 0.94668608, "epoch": 3.094251753174668, "grad_norm": 7.548247814178467, "learning_rate": 1.3383885833532222e-06, "loss": 0.33085091, "memory(GiB)": 34.88, "step": 114280, "train_speed(iter/s)": 0.411174 }, { "acc": 0.93004885, "epoch": 3.094387133457883, "grad_norm": 11.376943588256836, "learning_rate": 1.3380076711083437e-06, "loss": 0.41011863, "memory(GiB)": 34.88, "step": 114285, "train_speed(iter/s)": 0.411175 }, { "acc": 0.94213486, "epoch": 3.0945225137410985, "grad_norm": 7.039170265197754, "learning_rate": 1.33762680474338e-06, "loss": 0.34807272, "memory(GiB)": 34.88, "step": 114290, "train_speed(iter/s)": 0.411176 }, { "acc": 0.93679953, "epoch": 3.0946578940243143, "grad_norm": 7.929417610168457, "learning_rate": 1.3372459842630986e-06, "loss": 0.40291309, "memory(GiB)": 34.88, "step": 114295, "train_speed(iter/s)": 0.411177 }, { "acc": 0.94428616, "epoch": 3.0947932743075297, "grad_norm": 5.803959846496582, "learning_rate": 1.3368652096722722e-06, "loss": 0.28168514, "memory(GiB)": 34.88, "step": 114300, "train_speed(iter/s)": 0.411178 }, { "acc": 0.94012394, "epoch": 3.0949286545907455, "grad_norm": 5.848424911499023, "learning_rate": 1.3364844809756704e-06, "loss": 0.38018789, "memory(GiB)": 34.88, "step": 114305, "train_speed(iter/s)": 0.41118 }, { "acc": 0.9477541, "epoch": 3.095064034873961, "grad_norm": 3.8784656524658203, "learning_rate": 1.336103798178065e-06, "loss": 0.28483925, "memory(GiB)": 34.88, "step": 114310, "train_speed(iter/s)": 0.41118 }, { "acc": 0.92992687, "epoch": 3.0951994151571767, "grad_norm": 12.277599334716797, "learning_rate": 1.335723161284219e-06, "loss": 0.42905836, "memory(GiB)": 34.88, "step": 114315, "train_speed(iter/s)": 0.411181 }, { "acc": 0.94708652, "epoch": 3.095334795440392, "grad_norm": 3.9556729793548584, "learning_rate": 1.335342570298907e-06, "loss": 0.28568039, "memory(GiB)": 34.88, "step": 114320, "train_speed(iter/s)": 0.411182 }, { "acc": 0.92557163, "epoch": 3.095470175723608, "grad_norm": 6.165842533111572, "learning_rate": 1.3349620252268938e-06, "loss": 0.47666183, "memory(GiB)": 34.88, "step": 114325, "train_speed(iter/s)": 0.411183 }, { "acc": 0.93464794, "epoch": 3.095605556006823, "grad_norm": 7.5075364112854, "learning_rate": 1.3345815260729456e-06, "loss": 0.39889638, "memory(GiB)": 34.88, "step": 114330, "train_speed(iter/s)": 0.411184 }, { "acc": 0.93308239, "epoch": 3.0957409362900385, "grad_norm": 7.407493591308594, "learning_rate": 1.3342010728418296e-06, "loss": 0.37581167, "memory(GiB)": 34.88, "step": 114335, "train_speed(iter/s)": 0.411185 }, { "acc": 0.94734154, "epoch": 3.0958763165732543, "grad_norm": 11.695988655090332, "learning_rate": 1.3338206655383116e-06, "loss": 0.33704796, "memory(GiB)": 34.88, "step": 114340, "train_speed(iter/s)": 0.411186 }, { "acc": 0.94199123, "epoch": 3.0960116968564697, "grad_norm": 14.895869255065918, "learning_rate": 1.3334403041671589e-06, "loss": 0.39672804, "memory(GiB)": 34.88, "step": 114345, "train_speed(iter/s)": 0.411187 }, { "acc": 0.93353319, "epoch": 3.0961470771396855, "grad_norm": 9.395944595336914, "learning_rate": 1.333059988733132e-06, "loss": 0.36214352, "memory(GiB)": 34.88, "step": 114350, "train_speed(iter/s)": 0.411188 }, { "acc": 0.93818083, "epoch": 3.096282457422901, "grad_norm": 28.552518844604492, "learning_rate": 1.332679719241e-06, "loss": 0.39158423, "memory(GiB)": 34.88, "step": 114355, "train_speed(iter/s)": 0.411189 }, { "acc": 0.93557816, "epoch": 3.0964178377061167, "grad_norm": 8.050697326660156, "learning_rate": 1.3322994956955234e-06, "loss": 0.3837024, "memory(GiB)": 34.88, "step": 114360, "train_speed(iter/s)": 0.41119 }, { "acc": 0.95535927, "epoch": 3.096553217989332, "grad_norm": 4.730288982391357, "learning_rate": 1.3319193181014672e-06, "loss": 0.29449883, "memory(GiB)": 34.88, "step": 114365, "train_speed(iter/s)": 0.411191 }, { "acc": 0.93718109, "epoch": 3.0966885982725474, "grad_norm": 8.235555648803711, "learning_rate": 1.3315391864635914e-06, "loss": 0.39642291, "memory(GiB)": 34.88, "step": 114370, "train_speed(iter/s)": 0.411192 }, { "acc": 0.93966551, "epoch": 3.096823978555763, "grad_norm": 6.795279502868652, "learning_rate": 1.3311591007866595e-06, "loss": 0.32435892, "memory(GiB)": 34.88, "step": 114375, "train_speed(iter/s)": 0.411193 }, { "acc": 0.94896049, "epoch": 3.0969593588389785, "grad_norm": 2.846174955368042, "learning_rate": 1.3307790610754342e-06, "loss": 0.30085187, "memory(GiB)": 34.88, "step": 114380, "train_speed(iter/s)": 0.411194 }, { "acc": 0.94777975, "epoch": 3.0970947391221944, "grad_norm": 9.298978805541992, "learning_rate": 1.3303990673346715e-06, "loss": 0.28517616, "memory(GiB)": 34.88, "step": 114385, "train_speed(iter/s)": 0.411195 }, { "acc": 0.93827648, "epoch": 3.0972301194054097, "grad_norm": 8.267114639282227, "learning_rate": 1.3300191195691375e-06, "loss": 0.37620101, "memory(GiB)": 34.88, "step": 114390, "train_speed(iter/s)": 0.411196 }, { "acc": 0.9378849, "epoch": 3.0973654996886255, "grad_norm": 12.189568519592285, "learning_rate": 1.3296392177835876e-06, "loss": 0.35984089, "memory(GiB)": 34.88, "step": 114395, "train_speed(iter/s)": 0.411196 }, { "acc": 0.94530411, "epoch": 3.097500879971841, "grad_norm": 7.285943031311035, "learning_rate": 1.329259361982784e-06, "loss": 0.33000836, "memory(GiB)": 34.88, "step": 114400, "train_speed(iter/s)": 0.411197 }, { "acc": 0.94400606, "epoch": 3.0976362602550562, "grad_norm": 6.1509108543396, "learning_rate": 1.3288795521714814e-06, "loss": 0.30288124, "memory(GiB)": 34.88, "step": 114405, "train_speed(iter/s)": 0.411198 }, { "acc": 0.93279285, "epoch": 3.097771640538272, "grad_norm": 5.117362022399902, "learning_rate": 1.32849978835444e-06, "loss": 0.40491252, "memory(GiB)": 34.88, "step": 114410, "train_speed(iter/s)": 0.411199 }, { "acc": 0.9381773, "epoch": 3.0979070208214874, "grad_norm": 6.460292816162109, "learning_rate": 1.3281200705364181e-06, "loss": 0.36712689, "memory(GiB)": 34.88, "step": 114415, "train_speed(iter/s)": 0.4112 }, { "acc": 0.94428225, "epoch": 3.098042401104703, "grad_norm": 9.852910995483398, "learning_rate": 1.327740398722168e-06, "loss": 0.31816978, "memory(GiB)": 34.88, "step": 114420, "train_speed(iter/s)": 0.411201 }, { "acc": 0.93377323, "epoch": 3.0981777813879186, "grad_norm": 5.1676459312438965, "learning_rate": 1.327360772916452e-06, "loss": 0.36243072, "memory(GiB)": 34.88, "step": 114425, "train_speed(iter/s)": 0.411202 }, { "acc": 0.92717886, "epoch": 3.0983131616711344, "grad_norm": 6.516120910644531, "learning_rate": 1.326981193124021e-06, "loss": 0.44684114, "memory(GiB)": 34.88, "step": 114430, "train_speed(iter/s)": 0.411203 }, { "acc": 0.95613518, "epoch": 3.0984485419543497, "grad_norm": 3.861729860305786, "learning_rate": 1.3266016593496325e-06, "loss": 0.23010979, "memory(GiB)": 34.88, "step": 114435, "train_speed(iter/s)": 0.411204 }, { "acc": 0.95692978, "epoch": 3.0985839222375655, "grad_norm": 3.5315682888031006, "learning_rate": 1.3262221715980374e-06, "loss": 0.22624705, "memory(GiB)": 34.88, "step": 114440, "train_speed(iter/s)": 0.411205 }, { "acc": 0.95584602, "epoch": 3.098719302520781, "grad_norm": 7.568702220916748, "learning_rate": 1.325842729873995e-06, "loss": 0.26420522, "memory(GiB)": 34.88, "step": 114445, "train_speed(iter/s)": 0.411206 }, { "acc": 0.93111591, "epoch": 3.0988546828039962, "grad_norm": 43.44503402709961, "learning_rate": 1.325463334182256e-06, "loss": 0.40776262, "memory(GiB)": 34.88, "step": 114450, "train_speed(iter/s)": 0.411207 }, { "acc": 0.94419098, "epoch": 3.098990063087212, "grad_norm": 6.248453140258789, "learning_rate": 1.3250839845275693e-06, "loss": 0.33295045, "memory(GiB)": 34.88, "step": 114455, "train_speed(iter/s)": 0.411207 }, { "acc": 0.94813128, "epoch": 3.0991254433704274, "grad_norm": 10.995180130004883, "learning_rate": 1.324704680914693e-06, "loss": 0.31284459, "memory(GiB)": 34.88, "step": 114460, "train_speed(iter/s)": 0.411208 }, { "acc": 0.93911448, "epoch": 3.099260823653643, "grad_norm": 6.973723888397217, "learning_rate": 1.3243254233483749e-06, "loss": 0.34844582, "memory(GiB)": 34.88, "step": 114465, "train_speed(iter/s)": 0.411209 }, { "acc": 0.93463802, "epoch": 3.0993962039368586, "grad_norm": 9.383262634277344, "learning_rate": 1.3239462118333688e-06, "loss": 0.37397819, "memory(GiB)": 34.88, "step": 114470, "train_speed(iter/s)": 0.41121 }, { "acc": 0.95388031, "epoch": 3.0995315842200744, "grad_norm": 10.182234764099121, "learning_rate": 1.3235670463744197e-06, "loss": 0.26345034, "memory(GiB)": 34.88, "step": 114475, "train_speed(iter/s)": 0.411211 }, { "acc": 0.93470688, "epoch": 3.0996669645032897, "grad_norm": 5.387143611907959, "learning_rate": 1.3231879269762844e-06, "loss": 0.38198628, "memory(GiB)": 34.88, "step": 114480, "train_speed(iter/s)": 0.411212 }, { "acc": 0.94823647, "epoch": 3.0998023447865055, "grad_norm": 9.937442779541016, "learning_rate": 1.3228088536437066e-06, "loss": 0.33920774, "memory(GiB)": 34.88, "step": 114485, "train_speed(iter/s)": 0.411213 }, { "acc": 0.95222492, "epoch": 3.099937725069721, "grad_norm": 5.282724857330322, "learning_rate": 1.322429826381438e-06, "loss": 0.25812366, "memory(GiB)": 34.88, "step": 114490, "train_speed(iter/s)": 0.411214 }, { "acc": 0.95144024, "epoch": 3.1000731053529362, "grad_norm": 3.342320203781128, "learning_rate": 1.3220508451942269e-06, "loss": 0.28251805, "memory(GiB)": 34.88, "step": 114495, "train_speed(iter/s)": 0.411215 }, { "acc": 0.94922962, "epoch": 3.100208485636152, "grad_norm": 5.479586601257324, "learning_rate": 1.321671910086818e-06, "loss": 0.34882486, "memory(GiB)": 34.88, "step": 114500, "train_speed(iter/s)": 0.411216 }, { "acc": 0.94119205, "epoch": 3.1003438659193674, "grad_norm": 2.9646823406219482, "learning_rate": 1.321293021063961e-06, "loss": 0.33448138, "memory(GiB)": 34.88, "step": 114505, "train_speed(iter/s)": 0.411217 }, { "acc": 0.92962198, "epoch": 3.100479246202583, "grad_norm": 8.350083351135254, "learning_rate": 1.3209141781303983e-06, "loss": 0.43678632, "memory(GiB)": 34.88, "step": 114510, "train_speed(iter/s)": 0.411218 }, { "acc": 0.93925152, "epoch": 3.1006146264857986, "grad_norm": 7.2696733474731445, "learning_rate": 1.3205353812908817e-06, "loss": 0.34479432, "memory(GiB)": 34.88, "step": 114515, "train_speed(iter/s)": 0.411219 }, { "acc": 0.95877504, "epoch": 3.1007500067690144, "grad_norm": 8.319096565246582, "learning_rate": 1.320156630550151e-06, "loss": 0.21085768, "memory(GiB)": 34.88, "step": 114520, "train_speed(iter/s)": 0.41122 }, { "acc": 0.94979134, "epoch": 3.1008853870522297, "grad_norm": 5.356194972991943, "learning_rate": 1.3197779259129534e-06, "loss": 0.25840986, "memory(GiB)": 34.88, "step": 114525, "train_speed(iter/s)": 0.41122 }, { "acc": 0.95051937, "epoch": 3.101020767335445, "grad_norm": 4.319075584411621, "learning_rate": 1.3193992673840335e-06, "loss": 0.35221708, "memory(GiB)": 34.88, "step": 114530, "train_speed(iter/s)": 0.411221 }, { "acc": 0.9493042, "epoch": 3.101156147618661, "grad_norm": 2.4068171977996826, "learning_rate": 1.3190206549681324e-06, "loss": 0.29105875, "memory(GiB)": 34.88, "step": 114535, "train_speed(iter/s)": 0.411222 }, { "acc": 0.9419302, "epoch": 3.1012915279018762, "grad_norm": 13.412538528442383, "learning_rate": 1.3186420886699958e-06, "loss": 0.33666909, "memory(GiB)": 34.88, "step": 114540, "train_speed(iter/s)": 0.411223 }, { "acc": 0.93676481, "epoch": 3.101426908185092, "grad_norm": 11.262236595153809, "learning_rate": 1.3182635684943607e-06, "loss": 0.3685703, "memory(GiB)": 34.88, "step": 114545, "train_speed(iter/s)": 0.411224 }, { "acc": 0.94351683, "epoch": 3.1015622884683074, "grad_norm": 6.435089588165283, "learning_rate": 1.3178850944459758e-06, "loss": 0.33521402, "memory(GiB)": 34.88, "step": 114550, "train_speed(iter/s)": 0.411225 }, { "acc": 0.93967419, "epoch": 3.101697668751523, "grad_norm": 14.619538307189941, "learning_rate": 1.3175066665295776e-06, "loss": 0.3595922, "memory(GiB)": 34.88, "step": 114555, "train_speed(iter/s)": 0.411226 }, { "acc": 0.95371647, "epoch": 3.1018330490347386, "grad_norm": 7.2876176834106445, "learning_rate": 1.3171282847499098e-06, "loss": 0.27081966, "memory(GiB)": 34.88, "step": 114560, "train_speed(iter/s)": 0.411227 }, { "acc": 0.94080772, "epoch": 3.101968429317954, "grad_norm": 9.365352630615234, "learning_rate": 1.3167499491117072e-06, "loss": 0.35148959, "memory(GiB)": 34.88, "step": 114565, "train_speed(iter/s)": 0.411228 }, { "acc": 0.93492584, "epoch": 3.1021038096011697, "grad_norm": 21.229354858398438, "learning_rate": 1.3163716596197154e-06, "loss": 0.44192228, "memory(GiB)": 34.88, "step": 114570, "train_speed(iter/s)": 0.411229 }, { "acc": 0.94777126, "epoch": 3.102239189884385, "grad_norm": 5.71585750579834, "learning_rate": 1.3159934162786705e-06, "loss": 0.28533587, "memory(GiB)": 34.88, "step": 114575, "train_speed(iter/s)": 0.41123 }, { "acc": 0.94492826, "epoch": 3.102374570167601, "grad_norm": 10.069245338439941, "learning_rate": 1.3156152190933072e-06, "loss": 0.35909877, "memory(GiB)": 34.88, "step": 114580, "train_speed(iter/s)": 0.411231 }, { "acc": 0.9472023, "epoch": 3.1025099504508162, "grad_norm": 8.750829696655273, "learning_rate": 1.31523706806837e-06, "loss": 0.29517663, "memory(GiB)": 34.88, "step": 114585, "train_speed(iter/s)": 0.411232 }, { "acc": 0.95813007, "epoch": 3.102645330734032, "grad_norm": 7.404302597045898, "learning_rate": 1.3148589632085912e-06, "loss": 0.22388389, "memory(GiB)": 34.88, "step": 114590, "train_speed(iter/s)": 0.411233 }, { "acc": 0.94014301, "epoch": 3.1027807110172474, "grad_norm": 5.5850510597229, "learning_rate": 1.3144809045187095e-06, "loss": 0.35482781, "memory(GiB)": 34.88, "step": 114595, "train_speed(iter/s)": 0.411234 }, { "acc": 0.93450947, "epoch": 3.102916091300463, "grad_norm": 6.3297014236450195, "learning_rate": 1.314102892003458e-06, "loss": 0.34749141, "memory(GiB)": 34.88, "step": 114600, "train_speed(iter/s)": 0.411235 }, { "acc": 0.92697811, "epoch": 3.1030514715836786, "grad_norm": 6.663386344909668, "learning_rate": 1.313724925667576e-06, "loss": 0.43815074, "memory(GiB)": 34.88, "step": 114605, "train_speed(iter/s)": 0.411236 }, { "acc": 0.95015421, "epoch": 3.103186851866894, "grad_norm": 5.171216011047363, "learning_rate": 1.3133470055157953e-06, "loss": 0.27034428, "memory(GiB)": 34.88, "step": 114610, "train_speed(iter/s)": 0.411237 }, { "acc": 0.95199766, "epoch": 3.1033222321501097, "grad_norm": 10.68659496307373, "learning_rate": 1.3129691315528511e-06, "loss": 0.32453566, "memory(GiB)": 34.88, "step": 114615, "train_speed(iter/s)": 0.411238 }, { "acc": 0.95011654, "epoch": 3.103457612433325, "grad_norm": 55.421443939208984, "learning_rate": 1.3125913037834788e-06, "loss": 0.28782351, "memory(GiB)": 34.88, "step": 114620, "train_speed(iter/s)": 0.411239 }, { "acc": 0.96154871, "epoch": 3.103592992716541, "grad_norm": 9.328173637390137, "learning_rate": 1.3122135222124077e-06, "loss": 0.24833016, "memory(GiB)": 34.88, "step": 114625, "train_speed(iter/s)": 0.41124 }, { "acc": 0.95125751, "epoch": 3.1037283729997562, "grad_norm": 5.013834476470947, "learning_rate": 1.3118357868443738e-06, "loss": 0.30220618, "memory(GiB)": 34.88, "step": 114630, "train_speed(iter/s)": 0.411241 }, { "acc": 0.94663229, "epoch": 3.103863753282972, "grad_norm": 4.09633207321167, "learning_rate": 1.3114580976841047e-06, "loss": 0.32031517, "memory(GiB)": 34.88, "step": 114635, "train_speed(iter/s)": 0.411242 }, { "acc": 0.94912052, "epoch": 3.1039991335661874, "grad_norm": 10.041373252868652, "learning_rate": 1.3110804547363368e-06, "loss": 0.34653978, "memory(GiB)": 34.88, "step": 114640, "train_speed(iter/s)": 0.411243 }, { "acc": 0.93890476, "epoch": 3.104134513849403, "grad_norm": 8.990928649902344, "learning_rate": 1.3107028580057973e-06, "loss": 0.33525772, "memory(GiB)": 34.88, "step": 114645, "train_speed(iter/s)": 0.411244 }, { "acc": 0.9331501, "epoch": 3.1042698941326186, "grad_norm": 4.560215473175049, "learning_rate": 1.3103253074972172e-06, "loss": 0.39905291, "memory(GiB)": 34.88, "step": 114650, "train_speed(iter/s)": 0.411245 }, { "acc": 0.940168, "epoch": 3.104405274415834, "grad_norm": 14.89166259765625, "learning_rate": 1.3099478032153279e-06, "loss": 0.35914342, "memory(GiB)": 34.88, "step": 114655, "train_speed(iter/s)": 0.411246 }, { "acc": 0.93351555, "epoch": 3.1045406546990497, "grad_norm": 5.4432477951049805, "learning_rate": 1.3095703451648555e-06, "loss": 0.42727861, "memory(GiB)": 34.88, "step": 114660, "train_speed(iter/s)": 0.411247 }, { "acc": 0.94181051, "epoch": 3.104676034982265, "grad_norm": 3.856886863708496, "learning_rate": 1.3091929333505308e-06, "loss": 0.33454044, "memory(GiB)": 34.88, "step": 114665, "train_speed(iter/s)": 0.411248 }, { "acc": 0.93534603, "epoch": 3.104811415265481, "grad_norm": 8.599254608154297, "learning_rate": 1.3088155677770779e-06, "loss": 0.40191469, "memory(GiB)": 34.88, "step": 114670, "train_speed(iter/s)": 0.411249 }, { "acc": 0.94386339, "epoch": 3.1049467955486962, "grad_norm": 8.05618953704834, "learning_rate": 1.308438248449229e-06, "loss": 0.27664194, "memory(GiB)": 34.88, "step": 114675, "train_speed(iter/s)": 0.41125 }, { "acc": 0.93396683, "epoch": 3.105082175831912, "grad_norm": 11.222668647766113, "learning_rate": 1.3080609753717075e-06, "loss": 0.35237658, "memory(GiB)": 34.88, "step": 114680, "train_speed(iter/s)": 0.411251 }, { "acc": 0.93140335, "epoch": 3.1052175561151274, "grad_norm": 13.726628303527832, "learning_rate": 1.3076837485492404e-06, "loss": 0.39969168, "memory(GiB)": 34.88, "step": 114685, "train_speed(iter/s)": 0.411252 }, { "acc": 0.93181343, "epoch": 3.1053529363983428, "grad_norm": 8.705431938171387, "learning_rate": 1.3073065679865532e-06, "loss": 0.45555959, "memory(GiB)": 34.88, "step": 114690, "train_speed(iter/s)": 0.411253 }, { "acc": 0.94790192, "epoch": 3.1054883166815586, "grad_norm": 5.637821674346924, "learning_rate": 1.3069294336883723e-06, "loss": 0.30610991, "memory(GiB)": 34.88, "step": 114695, "train_speed(iter/s)": 0.411254 }, { "acc": 0.93406372, "epoch": 3.105623696964774, "grad_norm": 4.632043361663818, "learning_rate": 1.3065523456594206e-06, "loss": 0.41679459, "memory(GiB)": 34.88, "step": 114700, "train_speed(iter/s)": 0.411255 }, { "acc": 0.94176121, "epoch": 3.1057590772479897, "grad_norm": 9.022456169128418, "learning_rate": 1.306175303904419e-06, "loss": 0.30542433, "memory(GiB)": 34.88, "step": 114705, "train_speed(iter/s)": 0.411256 }, { "acc": 0.95023003, "epoch": 3.105894457531205, "grad_norm": 3.806396961212158, "learning_rate": 1.3057983084280964e-06, "loss": 0.35652719, "memory(GiB)": 34.88, "step": 114710, "train_speed(iter/s)": 0.411257 }, { "acc": 0.94731579, "epoch": 3.106029837814421, "grad_norm": 8.484269142150879, "learning_rate": 1.3054213592351708e-06, "loss": 0.36815195, "memory(GiB)": 34.88, "step": 114715, "train_speed(iter/s)": 0.411258 }, { "acc": 0.95392361, "epoch": 3.1061652180976362, "grad_norm": 4.653091907501221, "learning_rate": 1.3050444563303667e-06, "loss": 0.28903637, "memory(GiB)": 34.88, "step": 114720, "train_speed(iter/s)": 0.411258 }, { "acc": 0.94881191, "epoch": 3.1063005983808516, "grad_norm": 3.8093953132629395, "learning_rate": 1.304667599718404e-06, "loss": 0.28632903, "memory(GiB)": 34.88, "step": 114725, "train_speed(iter/s)": 0.411259 }, { "acc": 0.95315285, "epoch": 3.1064359786640674, "grad_norm": 5.465158462524414, "learning_rate": 1.304290789404006e-06, "loss": 0.21401687, "memory(GiB)": 34.88, "step": 114730, "train_speed(iter/s)": 0.41126 }, { "acc": 0.9476655, "epoch": 3.1065713589472828, "grad_norm": 4.048654556274414, "learning_rate": 1.3039140253918909e-06, "loss": 0.35868649, "memory(GiB)": 34.88, "step": 114735, "train_speed(iter/s)": 0.411261 }, { "acc": 0.94363651, "epoch": 3.1067067392304986, "grad_norm": 12.686568260192871, "learning_rate": 1.303537307686778e-06, "loss": 0.30540974, "memory(GiB)": 34.88, "step": 114740, "train_speed(iter/s)": 0.411262 }, { "acc": 0.95886421, "epoch": 3.106842119513714, "grad_norm": 7.144284725189209, "learning_rate": 1.3031606362933891e-06, "loss": 0.2459342, "memory(GiB)": 34.88, "step": 114745, "train_speed(iter/s)": 0.411263 }, { "acc": 0.9376585, "epoch": 3.1069774997969297, "grad_norm": 14.830711364746094, "learning_rate": 1.3027840112164395e-06, "loss": 0.39124007, "memory(GiB)": 34.88, "step": 114750, "train_speed(iter/s)": 0.411264 }, { "acc": 0.94826965, "epoch": 3.107112880080145, "grad_norm": 6.332167625427246, "learning_rate": 1.3024074324606484e-06, "loss": 0.24571099, "memory(GiB)": 34.88, "step": 114755, "train_speed(iter/s)": 0.411265 }, { "acc": 0.94973278, "epoch": 3.107248260363361, "grad_norm": 8.99355411529541, "learning_rate": 1.302030900030734e-06, "loss": 0.33415327, "memory(GiB)": 34.88, "step": 114760, "train_speed(iter/s)": 0.411266 }, { "acc": 0.93112984, "epoch": 3.1073836406465762, "grad_norm": 4.471668243408203, "learning_rate": 1.3016544139314136e-06, "loss": 0.41932492, "memory(GiB)": 34.88, "step": 114765, "train_speed(iter/s)": 0.411267 }, { "acc": 0.94553795, "epoch": 3.1075190209297916, "grad_norm": 6.771517276763916, "learning_rate": 1.301277974167401e-06, "loss": 0.32191057, "memory(GiB)": 34.88, "step": 114770, "train_speed(iter/s)": 0.411268 }, { "acc": 0.94866724, "epoch": 3.1076544012130074, "grad_norm": 19.711170196533203, "learning_rate": 1.3009015807434134e-06, "loss": 0.22424352, "memory(GiB)": 34.88, "step": 114775, "train_speed(iter/s)": 0.411269 }, { "acc": 0.94863653, "epoch": 3.1077897814962228, "grad_norm": 7.917459011077881, "learning_rate": 1.3005252336641656e-06, "loss": 0.30881505, "memory(GiB)": 34.88, "step": 114780, "train_speed(iter/s)": 0.41127 }, { "acc": 0.94937439, "epoch": 3.1079251617794386, "grad_norm": 4.031818389892578, "learning_rate": 1.3001489329343734e-06, "loss": 0.27642725, "memory(GiB)": 34.88, "step": 114785, "train_speed(iter/s)": 0.411271 }, { "acc": 0.94065657, "epoch": 3.108060542062654, "grad_norm": 4.089335918426514, "learning_rate": 1.299772678558748e-06, "loss": 0.35190568, "memory(GiB)": 34.88, "step": 114790, "train_speed(iter/s)": 0.411272 }, { "acc": 0.95227327, "epoch": 3.1081959223458697, "grad_norm": 6.077321529388428, "learning_rate": 1.2993964705420042e-06, "loss": 0.25089216, "memory(GiB)": 34.88, "step": 114795, "train_speed(iter/s)": 0.411273 }, { "acc": 0.94563484, "epoch": 3.108331302629085, "grad_norm": 20.425613403320312, "learning_rate": 1.2990203088888564e-06, "loss": 0.34354348, "memory(GiB)": 34.88, "step": 114800, "train_speed(iter/s)": 0.411274 }, { "acc": 0.93575344, "epoch": 3.1084666829123004, "grad_norm": 6.941268444061279, "learning_rate": 1.2986441936040142e-06, "loss": 0.39179082, "memory(GiB)": 34.88, "step": 114805, "train_speed(iter/s)": 0.411275 }, { "acc": 0.94967623, "epoch": 3.1086020631955162, "grad_norm": 5.852370738983154, "learning_rate": 1.2982681246921893e-06, "loss": 0.26093211, "memory(GiB)": 34.88, "step": 114810, "train_speed(iter/s)": 0.411276 }, { "acc": 0.93900137, "epoch": 3.1087374434787316, "grad_norm": 5.98564338684082, "learning_rate": 1.2978921021580939e-06, "loss": 0.36005058, "memory(GiB)": 34.88, "step": 114815, "train_speed(iter/s)": 0.411277 }, { "acc": 0.94289055, "epoch": 3.1088728237619474, "grad_norm": 8.589726448059082, "learning_rate": 1.2975161260064393e-06, "loss": 0.34861159, "memory(GiB)": 34.88, "step": 114820, "train_speed(iter/s)": 0.411278 }, { "acc": 0.95051975, "epoch": 3.1090082040451628, "grad_norm": 10.999591827392578, "learning_rate": 1.297140196241933e-06, "loss": 0.23279545, "memory(GiB)": 34.88, "step": 114825, "train_speed(iter/s)": 0.411279 }, { "acc": 0.95709343, "epoch": 3.1091435843283786, "grad_norm": 4.980147838592529, "learning_rate": 1.2967643128692847e-06, "loss": 0.27370448, "memory(GiB)": 34.88, "step": 114830, "train_speed(iter/s)": 0.41128 }, { "acc": 0.95223141, "epoch": 3.109278964611594, "grad_norm": 5.648825645446777, "learning_rate": 1.2963884758932058e-06, "loss": 0.24003563, "memory(GiB)": 34.88, "step": 114835, "train_speed(iter/s)": 0.411281 }, { "acc": 0.95046558, "epoch": 3.1094143448948097, "grad_norm": 13.760933876037598, "learning_rate": 1.2960126853184004e-06, "loss": 0.22953115, "memory(GiB)": 34.88, "step": 114840, "train_speed(iter/s)": 0.411282 }, { "acc": 0.91877871, "epoch": 3.109549725178025, "grad_norm": 8.976967811584473, "learning_rate": 1.2956369411495784e-06, "loss": 0.46691628, "memory(GiB)": 34.88, "step": 114845, "train_speed(iter/s)": 0.411283 }, { "acc": 0.94391403, "epoch": 3.1096851054612404, "grad_norm": 5.608002662658691, "learning_rate": 1.2952612433914457e-06, "loss": 0.33709931, "memory(GiB)": 34.88, "step": 114850, "train_speed(iter/s)": 0.411284 }, { "acc": 0.93358002, "epoch": 3.1098204857444562, "grad_norm": 5.770788192749023, "learning_rate": 1.2948855920487113e-06, "loss": 0.37919981, "memory(GiB)": 34.88, "step": 114855, "train_speed(iter/s)": 0.411285 }, { "acc": 0.93971138, "epoch": 3.1099558660276716, "grad_norm": 7.463163375854492, "learning_rate": 1.2945099871260766e-06, "loss": 0.32740223, "memory(GiB)": 34.88, "step": 114860, "train_speed(iter/s)": 0.411286 }, { "acc": 0.94477539, "epoch": 3.1100912463108874, "grad_norm": 5.263079643249512, "learning_rate": 1.2941344286282498e-06, "loss": 0.36821718, "memory(GiB)": 34.88, "step": 114865, "train_speed(iter/s)": 0.411286 }, { "acc": 0.94075298, "epoch": 3.1102266265941028, "grad_norm": 4.415920257568359, "learning_rate": 1.293758916559936e-06, "loss": 0.35464046, "memory(GiB)": 34.88, "step": 114870, "train_speed(iter/s)": 0.411287 }, { "acc": 0.93985786, "epoch": 3.1103620068773186, "grad_norm": 7.785943984985352, "learning_rate": 1.2933834509258363e-06, "loss": 0.33263588, "memory(GiB)": 34.88, "step": 114875, "train_speed(iter/s)": 0.411288 }, { "acc": 0.9439724, "epoch": 3.110497387160534, "grad_norm": 6.397315502166748, "learning_rate": 1.2930080317306563e-06, "loss": 0.31244226, "memory(GiB)": 34.88, "step": 114880, "train_speed(iter/s)": 0.411289 }, { "acc": 0.93215046, "epoch": 3.1106327674437493, "grad_norm": 6.449047088623047, "learning_rate": 1.292632658979098e-06, "loss": 0.44838743, "memory(GiB)": 34.88, "step": 114885, "train_speed(iter/s)": 0.41129 }, { "acc": 0.92819901, "epoch": 3.110768147726965, "grad_norm": 11.06002140045166, "learning_rate": 1.2922573326758655e-06, "loss": 0.44495988, "memory(GiB)": 34.88, "step": 114890, "train_speed(iter/s)": 0.411291 }, { "acc": 0.9472703, "epoch": 3.1109035280101804, "grad_norm": 5.285184860229492, "learning_rate": 1.2918820528256576e-06, "loss": 0.3425988, "memory(GiB)": 34.88, "step": 114895, "train_speed(iter/s)": 0.411292 }, { "acc": 0.93601646, "epoch": 3.1110389082933962, "grad_norm": 8.659151077270508, "learning_rate": 1.2915068194331776e-06, "loss": 0.33391294, "memory(GiB)": 34.88, "step": 114900, "train_speed(iter/s)": 0.411293 }, { "acc": 0.93303394, "epoch": 3.1111742885766116, "grad_norm": 3.495811700820923, "learning_rate": 1.291131632503125e-06, "loss": 0.38867793, "memory(GiB)": 34.88, "step": 114905, "train_speed(iter/s)": 0.411294 }, { "acc": 0.94033604, "epoch": 3.1113096688598274, "grad_norm": 5.288567066192627, "learning_rate": 1.290756492040201e-06, "loss": 0.35369887, "memory(GiB)": 34.88, "step": 114910, "train_speed(iter/s)": 0.411295 }, { "acc": 0.9481926, "epoch": 3.1114450491430428, "grad_norm": 9.994889259338379, "learning_rate": 1.2903813980491032e-06, "loss": 0.25503788, "memory(GiB)": 34.88, "step": 114915, "train_speed(iter/s)": 0.411296 }, { "acc": 0.9437068, "epoch": 3.1115804294262586, "grad_norm": 18.347660064697266, "learning_rate": 1.2900063505345312e-06, "loss": 0.28438785, "memory(GiB)": 34.88, "step": 114920, "train_speed(iter/s)": 0.411297 }, { "acc": 0.95444813, "epoch": 3.111715809709474, "grad_norm": 3.051626682281494, "learning_rate": 1.2896313495011845e-06, "loss": 0.25707355, "memory(GiB)": 34.88, "step": 114925, "train_speed(iter/s)": 0.411298 }, { "acc": 0.94487791, "epoch": 3.1118511899926893, "grad_norm": 6.690197467803955, "learning_rate": 1.2892563949537583e-06, "loss": 0.28714991, "memory(GiB)": 34.88, "step": 114930, "train_speed(iter/s)": 0.411299 }, { "acc": 0.95172043, "epoch": 3.111986570275905, "grad_norm": 10.581984519958496, "learning_rate": 1.2888814868969507e-06, "loss": 0.28510389, "memory(GiB)": 34.88, "step": 114935, "train_speed(iter/s)": 0.4113 }, { "acc": 0.93667145, "epoch": 3.1121219505591204, "grad_norm": 13.721325874328613, "learning_rate": 1.2885066253354583e-06, "loss": 0.42566986, "memory(GiB)": 34.88, "step": 114940, "train_speed(iter/s)": 0.411301 }, { "acc": 0.9569169, "epoch": 3.1122573308423362, "grad_norm": 3.1976983547210693, "learning_rate": 1.2881318102739785e-06, "loss": 0.20302577, "memory(GiB)": 34.88, "step": 114945, "train_speed(iter/s)": 0.411302 }, { "acc": 0.93871937, "epoch": 3.1123927111255516, "grad_norm": 6.613318920135498, "learning_rate": 1.2877570417172037e-06, "loss": 0.39297771, "memory(GiB)": 34.88, "step": 114950, "train_speed(iter/s)": 0.411302 }, { "acc": 0.94854736, "epoch": 3.1125280914087674, "grad_norm": 19.322351455688477, "learning_rate": 1.28738231966983e-06, "loss": 0.24060133, "memory(GiB)": 34.88, "step": 114955, "train_speed(iter/s)": 0.411303 }, { "acc": 0.94170208, "epoch": 3.1126634716919828, "grad_norm": 8.193181991577148, "learning_rate": 1.2870076441365526e-06, "loss": 0.3498045, "memory(GiB)": 34.88, "step": 114960, "train_speed(iter/s)": 0.411304 }, { "acc": 0.95342426, "epoch": 3.112798851975198, "grad_norm": 7.857431411743164, "learning_rate": 1.2866330151220624e-06, "loss": 0.26358426, "memory(GiB)": 34.88, "step": 114965, "train_speed(iter/s)": 0.411305 }, { "acc": 0.95100994, "epoch": 3.112934232258414, "grad_norm": 15.697131156921387, "learning_rate": 1.286258432631054e-06, "loss": 0.2917181, "memory(GiB)": 34.88, "step": 114970, "train_speed(iter/s)": 0.411306 }, { "acc": 0.94242096, "epoch": 3.1130696125416293, "grad_norm": 5.701385974884033, "learning_rate": 1.28588389666822e-06, "loss": 0.34948149, "memory(GiB)": 34.88, "step": 114975, "train_speed(iter/s)": 0.411307 }, { "acc": 0.93781624, "epoch": 3.113204992824845, "grad_norm": 9.100457191467285, "learning_rate": 1.2855094072382534e-06, "loss": 0.37552352, "memory(GiB)": 34.88, "step": 114980, "train_speed(iter/s)": 0.411308 }, { "acc": 0.9537775, "epoch": 3.1133403731080604, "grad_norm": 7.918715000152588, "learning_rate": 1.2851349643458408e-06, "loss": 0.25761797, "memory(GiB)": 34.88, "step": 114985, "train_speed(iter/s)": 0.411309 }, { "acc": 0.93874683, "epoch": 3.1134757533912762, "grad_norm": 6.13551664352417, "learning_rate": 1.284760567995679e-06, "loss": 0.40383587, "memory(GiB)": 34.88, "step": 114990, "train_speed(iter/s)": 0.41131 }, { "acc": 0.94062443, "epoch": 3.1136111336744916, "grad_norm": 5.165663242340088, "learning_rate": 1.284386218192455e-06, "loss": 0.3642436, "memory(GiB)": 34.88, "step": 114995, "train_speed(iter/s)": 0.411311 }, { "acc": 0.95025043, "epoch": 3.1137465139577074, "grad_norm": 4.401540279388428, "learning_rate": 1.2840119149408564e-06, "loss": 0.295507, "memory(GiB)": 34.88, "step": 115000, "train_speed(iter/s)": 0.411312 }, { "acc": 0.94491749, "epoch": 3.1138818942409228, "grad_norm": 8.927390098571777, "learning_rate": 1.2836376582455744e-06, "loss": 0.28858523, "memory(GiB)": 34.88, "step": 115005, "train_speed(iter/s)": 0.411313 }, { "acc": 0.93403397, "epoch": 3.114017274524138, "grad_norm": 10.24297046661377, "learning_rate": 1.2832634481112972e-06, "loss": 0.39494276, "memory(GiB)": 34.88, "step": 115010, "train_speed(iter/s)": 0.411313 }, { "acc": 0.93693171, "epoch": 3.114152654807354, "grad_norm": 5.003336429595947, "learning_rate": 1.282889284542713e-06, "loss": 0.31966238, "memory(GiB)": 34.88, "step": 115015, "train_speed(iter/s)": 0.411314 }, { "acc": 0.94173546, "epoch": 3.1142880350905693, "grad_norm": 14.741474151611328, "learning_rate": 1.2825151675445055e-06, "loss": 0.33788176, "memory(GiB)": 34.88, "step": 115020, "train_speed(iter/s)": 0.411315 }, { "acc": 0.94975147, "epoch": 3.114423415373785, "grad_norm": 3.956334352493286, "learning_rate": 1.2821410971213663e-06, "loss": 0.29881604, "memory(GiB)": 34.88, "step": 115025, "train_speed(iter/s)": 0.411316 }, { "acc": 0.94830179, "epoch": 3.1145587956570004, "grad_norm": 9.480438232421875, "learning_rate": 1.2817670732779776e-06, "loss": 0.35098853, "memory(GiB)": 34.88, "step": 115030, "train_speed(iter/s)": 0.411317 }, { "acc": 0.9218421, "epoch": 3.1146941759402162, "grad_norm": 28.219558715820312, "learning_rate": 1.281393096019028e-06, "loss": 0.56127634, "memory(GiB)": 34.88, "step": 115035, "train_speed(iter/s)": 0.411318 }, { "acc": 0.94443245, "epoch": 3.1148295562234316, "grad_norm": 4.269364833831787, "learning_rate": 1.2810191653491987e-06, "loss": 0.3053772, "memory(GiB)": 34.88, "step": 115040, "train_speed(iter/s)": 0.411319 }, { "acc": 0.94954824, "epoch": 3.114964936506647, "grad_norm": 7.894834041595459, "learning_rate": 1.2806452812731756e-06, "loss": 0.28971395, "memory(GiB)": 34.88, "step": 115045, "train_speed(iter/s)": 0.41132 }, { "acc": 0.95945845, "epoch": 3.1151003167898628, "grad_norm": 5.18971061706543, "learning_rate": 1.280271443795644e-06, "loss": 0.27019262, "memory(GiB)": 34.88, "step": 115050, "train_speed(iter/s)": 0.411321 }, { "acc": 0.94133835, "epoch": 3.115235697073078, "grad_norm": 7.57088041305542, "learning_rate": 1.2798976529212823e-06, "loss": 0.37295032, "memory(GiB)": 34.88, "step": 115055, "train_speed(iter/s)": 0.411321 }, { "acc": 0.95359097, "epoch": 3.115371077356294, "grad_norm": 4.562067031860352, "learning_rate": 1.2795239086547786e-06, "loss": 0.23586786, "memory(GiB)": 34.88, "step": 115060, "train_speed(iter/s)": 0.411322 }, { "acc": 0.94341574, "epoch": 3.1155064576395093, "grad_norm": 7.690242767333984, "learning_rate": 1.2791502110008105e-06, "loss": 0.34585147, "memory(GiB)": 34.88, "step": 115065, "train_speed(iter/s)": 0.411323 }, { "acc": 0.94796906, "epoch": 3.115641837922725, "grad_norm": 8.341560363769531, "learning_rate": 1.2787765599640628e-06, "loss": 0.29429126, "memory(GiB)": 34.88, "step": 115070, "train_speed(iter/s)": 0.411324 }, { "acc": 0.94110146, "epoch": 3.1157772182059404, "grad_norm": 18.304367065429688, "learning_rate": 1.2784029555492127e-06, "loss": 0.44640579, "memory(GiB)": 34.88, "step": 115075, "train_speed(iter/s)": 0.411325 }, { "acc": 0.95601921, "epoch": 3.1159125984891562, "grad_norm": 11.755087852478027, "learning_rate": 1.2780293977609425e-06, "loss": 0.22985792, "memory(GiB)": 34.88, "step": 115080, "train_speed(iter/s)": 0.411326 }, { "acc": 0.93554068, "epoch": 3.1160479787723716, "grad_norm": 9.756895065307617, "learning_rate": 1.2776558866039327e-06, "loss": 0.3979068, "memory(GiB)": 34.88, "step": 115085, "train_speed(iter/s)": 0.411327 }, { "acc": 0.94023962, "epoch": 3.116183359055587, "grad_norm": 6.116494655609131, "learning_rate": 1.2772824220828572e-06, "loss": 0.37844961, "memory(GiB)": 34.88, "step": 115090, "train_speed(iter/s)": 0.411328 }, { "acc": 0.94563999, "epoch": 3.1163187393388028, "grad_norm": 13.425599098205566, "learning_rate": 1.2769090042024015e-06, "loss": 0.29218953, "memory(GiB)": 34.88, "step": 115095, "train_speed(iter/s)": 0.411329 }, { "acc": 0.93828621, "epoch": 3.116454119622018, "grad_norm": 12.955885887145996, "learning_rate": 1.2765356329672382e-06, "loss": 0.42248487, "memory(GiB)": 34.88, "step": 115100, "train_speed(iter/s)": 0.41133 }, { "acc": 0.94070616, "epoch": 3.116589499905234, "grad_norm": 7.1154961585998535, "learning_rate": 1.2761623083820475e-06, "loss": 0.35952494, "memory(GiB)": 34.88, "step": 115105, "train_speed(iter/s)": 0.411331 }, { "acc": 0.94971466, "epoch": 3.1167248801884493, "grad_norm": 7.032128810882568, "learning_rate": 1.275789030451502e-06, "loss": 0.28512874, "memory(GiB)": 34.88, "step": 115110, "train_speed(iter/s)": 0.411332 }, { "acc": 0.94880333, "epoch": 3.116860260471665, "grad_norm": 13.532488822937012, "learning_rate": 1.2754157991802828e-06, "loss": 0.28275445, "memory(GiB)": 34.88, "step": 115115, "train_speed(iter/s)": 0.411333 }, { "acc": 0.93616734, "epoch": 3.1169956407548804, "grad_norm": 10.027725219726562, "learning_rate": 1.2750426145730636e-06, "loss": 0.36433091, "memory(GiB)": 34.88, "step": 115120, "train_speed(iter/s)": 0.411334 }, { "acc": 0.93491611, "epoch": 3.117131021038096, "grad_norm": 9.111781120300293, "learning_rate": 1.2746694766345171e-06, "loss": 0.37811656, "memory(GiB)": 34.88, "step": 115125, "train_speed(iter/s)": 0.411335 }, { "acc": 0.95518837, "epoch": 3.1172664013213116, "grad_norm": 7.228271484375, "learning_rate": 1.2742963853693188e-06, "loss": 0.24056702, "memory(GiB)": 34.88, "step": 115130, "train_speed(iter/s)": 0.411336 }, { "acc": 0.93625278, "epoch": 3.117401781604527, "grad_norm": 13.291751861572266, "learning_rate": 1.273923340782143e-06, "loss": 0.40704622, "memory(GiB)": 34.88, "step": 115135, "train_speed(iter/s)": 0.411336 }, { "acc": 0.95117455, "epoch": 3.1175371618877428, "grad_norm": 11.165155410766602, "learning_rate": 1.2735503428776645e-06, "loss": 0.33717027, "memory(GiB)": 34.88, "step": 115140, "train_speed(iter/s)": 0.411337 }, { "acc": 0.95183296, "epoch": 3.117672542170958, "grad_norm": 5.680181980133057, "learning_rate": 1.2731773916605504e-06, "loss": 0.29715505, "memory(GiB)": 34.88, "step": 115145, "train_speed(iter/s)": 0.411338 }, { "acc": 0.94931889, "epoch": 3.117807922454174, "grad_norm": 6.708283424377441, "learning_rate": 1.2728044871354789e-06, "loss": 0.29635925, "memory(GiB)": 34.88, "step": 115150, "train_speed(iter/s)": 0.411339 }, { "acc": 0.94679222, "epoch": 3.1179433027373893, "grad_norm": 9.062219619750977, "learning_rate": 1.2724316293071174e-06, "loss": 0.32478909, "memory(GiB)": 34.88, "step": 115155, "train_speed(iter/s)": 0.41134 }, { "acc": 0.95378675, "epoch": 3.118078683020605, "grad_norm": 6.0995049476623535, "learning_rate": 1.2720588181801397e-06, "loss": 0.2967814, "memory(GiB)": 34.88, "step": 115160, "train_speed(iter/s)": 0.411341 }, { "acc": 0.93931503, "epoch": 3.1182140633038204, "grad_norm": 6.325929641723633, "learning_rate": 1.2716860537592119e-06, "loss": 0.30337186, "memory(GiB)": 34.88, "step": 115165, "train_speed(iter/s)": 0.411342 }, { "acc": 0.94796457, "epoch": 3.118349443587036, "grad_norm": 13.457725524902344, "learning_rate": 1.2713133360490065e-06, "loss": 0.30644321, "memory(GiB)": 34.88, "step": 115170, "train_speed(iter/s)": 0.411343 }, { "acc": 0.93852482, "epoch": 3.1184848238702516, "grad_norm": 4.870894432067871, "learning_rate": 1.2709406650541934e-06, "loss": 0.30094271, "memory(GiB)": 34.88, "step": 115175, "train_speed(iter/s)": 0.411344 }, { "acc": 0.93990126, "epoch": 3.118620204153467, "grad_norm": 12.111213684082031, "learning_rate": 1.2705680407794364e-06, "loss": 0.39868166, "memory(GiB)": 34.88, "step": 115180, "train_speed(iter/s)": 0.411345 }, { "acc": 0.94689741, "epoch": 3.1187555844366828, "grad_norm": 8.357613563537598, "learning_rate": 1.2701954632294094e-06, "loss": 0.27877998, "memory(GiB)": 34.88, "step": 115185, "train_speed(iter/s)": 0.411346 }, { "acc": 0.9435358, "epoch": 3.118890964719898, "grad_norm": 6.096899032592773, "learning_rate": 1.2698229324087754e-06, "loss": 0.2670877, "memory(GiB)": 34.88, "step": 115190, "train_speed(iter/s)": 0.411347 }, { "acc": 0.93544588, "epoch": 3.119026345003114, "grad_norm": 5.115170478820801, "learning_rate": 1.2694504483222043e-06, "loss": 0.40104246, "memory(GiB)": 34.88, "step": 115195, "train_speed(iter/s)": 0.411348 }, { "acc": 0.95012922, "epoch": 3.1191617252863293, "grad_norm": 17.620454788208008, "learning_rate": 1.2690780109743584e-06, "loss": 0.28882976, "memory(GiB)": 34.88, "step": 115200, "train_speed(iter/s)": 0.411349 }, { "acc": 0.93670235, "epoch": 3.1192971055695446, "grad_norm": 22.747203826904297, "learning_rate": 1.2687056203699047e-06, "loss": 0.42198114, "memory(GiB)": 34.88, "step": 115205, "train_speed(iter/s)": 0.41135 }, { "acc": 0.9557827, "epoch": 3.1194324858527605, "grad_norm": 7.455583572387695, "learning_rate": 1.2683332765135105e-06, "loss": 0.24866145, "memory(GiB)": 34.88, "step": 115210, "train_speed(iter/s)": 0.41135 }, { "acc": 0.92618999, "epoch": 3.119567866135976, "grad_norm": 17.196992874145508, "learning_rate": 1.2679609794098352e-06, "loss": 0.46714172, "memory(GiB)": 34.88, "step": 115215, "train_speed(iter/s)": 0.411351 }, { "acc": 0.93893175, "epoch": 3.1197032464191916, "grad_norm": 8.78569221496582, "learning_rate": 1.2675887290635489e-06, "loss": 0.37106106, "memory(GiB)": 34.88, "step": 115220, "train_speed(iter/s)": 0.411352 }, { "acc": 0.95769615, "epoch": 3.119838626702407, "grad_norm": 5.317576885223389, "learning_rate": 1.2672165254793095e-06, "loss": 0.22855799, "memory(GiB)": 34.88, "step": 115225, "train_speed(iter/s)": 0.411353 }, { "acc": 0.95041332, "epoch": 3.1199740069856228, "grad_norm": 3.416377305984497, "learning_rate": 1.2668443686617831e-06, "loss": 0.24557133, "memory(GiB)": 34.88, "step": 115230, "train_speed(iter/s)": 0.411354 }, { "acc": 0.94165096, "epoch": 3.120109387268838, "grad_norm": 3.1697230339050293, "learning_rate": 1.2664722586156272e-06, "loss": 0.39757247, "memory(GiB)": 34.88, "step": 115235, "train_speed(iter/s)": 0.411355 }, { "acc": 0.92703571, "epoch": 3.120244767552054, "grad_norm": 5.875863075256348, "learning_rate": 1.2661001953455088e-06, "loss": 0.46696529, "memory(GiB)": 34.88, "step": 115240, "train_speed(iter/s)": 0.411356 }, { "acc": 0.94100161, "epoch": 3.1203801478352693, "grad_norm": 6.541200160980225, "learning_rate": 1.2657281788560866e-06, "loss": 0.34483221, "memory(GiB)": 34.88, "step": 115245, "train_speed(iter/s)": 0.411357 }, { "acc": 0.9396019, "epoch": 3.1205155281184846, "grad_norm": 19.423986434936523, "learning_rate": 1.2653562091520169e-06, "loss": 0.35975628, "memory(GiB)": 34.88, "step": 115250, "train_speed(iter/s)": 0.411358 }, { "acc": 0.94614525, "epoch": 3.1206509084017005, "grad_norm": 8.493191719055176, "learning_rate": 1.2649842862379658e-06, "loss": 0.37616818, "memory(GiB)": 34.88, "step": 115255, "train_speed(iter/s)": 0.411359 }, { "acc": 0.94573154, "epoch": 3.120786288684916, "grad_norm": 5.980177402496338, "learning_rate": 1.2646124101185874e-06, "loss": 0.30688565, "memory(GiB)": 34.88, "step": 115260, "train_speed(iter/s)": 0.41136 }, { "acc": 0.93180389, "epoch": 3.1209216689681316, "grad_norm": 19.394271850585938, "learning_rate": 1.2642405807985445e-06, "loss": 0.42025623, "memory(GiB)": 34.88, "step": 115265, "train_speed(iter/s)": 0.411361 }, { "acc": 0.94190979, "epoch": 3.121057049251347, "grad_norm": 12.747849464416504, "learning_rate": 1.2638687982824884e-06, "loss": 0.34909768, "memory(GiB)": 34.88, "step": 115270, "train_speed(iter/s)": 0.411362 }, { "acc": 0.93235397, "epoch": 3.1211924295345628, "grad_norm": 5.511237144470215, "learning_rate": 1.2634970625750845e-06, "loss": 0.34508083, "memory(GiB)": 34.88, "step": 115275, "train_speed(iter/s)": 0.411363 }, { "acc": 0.95213757, "epoch": 3.121327809817778, "grad_norm": 6.3378753662109375, "learning_rate": 1.2631253736809836e-06, "loss": 0.32687323, "memory(GiB)": 34.88, "step": 115280, "train_speed(iter/s)": 0.411364 }, { "acc": 0.94059496, "epoch": 3.1214631901009935, "grad_norm": 5.603893280029297, "learning_rate": 1.2627537316048442e-06, "loss": 0.39532733, "memory(GiB)": 34.88, "step": 115285, "train_speed(iter/s)": 0.411365 }, { "acc": 0.95128937, "epoch": 3.1215985703842093, "grad_norm": 8.704472541809082, "learning_rate": 1.2623821363513226e-06, "loss": 0.31533976, "memory(GiB)": 34.88, "step": 115290, "train_speed(iter/s)": 0.411366 }, { "acc": 0.93189411, "epoch": 3.1217339506674247, "grad_norm": 11.481193542480469, "learning_rate": 1.2620105879250713e-06, "loss": 0.4278285, "memory(GiB)": 34.88, "step": 115295, "train_speed(iter/s)": 0.411367 }, { "acc": 0.94590483, "epoch": 3.1218693309506405, "grad_norm": 8.045907020568848, "learning_rate": 1.2616390863307476e-06, "loss": 0.30481515, "memory(GiB)": 34.88, "step": 115300, "train_speed(iter/s)": 0.411368 }, { "acc": 0.94860029, "epoch": 3.122004711233856, "grad_norm": 8.745963096618652, "learning_rate": 1.2612676315730001e-06, "loss": 0.29514546, "memory(GiB)": 34.88, "step": 115305, "train_speed(iter/s)": 0.411369 }, { "acc": 0.93932333, "epoch": 3.1221400915170716, "grad_norm": 6.430263042449951, "learning_rate": 1.2608962236564893e-06, "loss": 0.3808018, "memory(GiB)": 34.88, "step": 115310, "train_speed(iter/s)": 0.41137 }, { "acc": 0.93344212, "epoch": 3.122275471800287, "grad_norm": 10.473610877990723, "learning_rate": 1.2605248625858624e-06, "loss": 0.35841606, "memory(GiB)": 34.88, "step": 115315, "train_speed(iter/s)": 0.411371 }, { "acc": 0.94761715, "epoch": 3.1224108520835028, "grad_norm": 2.5993590354919434, "learning_rate": 1.2601535483657735e-06, "loss": 0.25159366, "memory(GiB)": 34.88, "step": 115320, "train_speed(iter/s)": 0.411372 }, { "acc": 0.94480553, "epoch": 3.122546232366718, "grad_norm": 5.834105968475342, "learning_rate": 1.2597822810008733e-06, "loss": 0.36074328, "memory(GiB)": 34.88, "step": 115325, "train_speed(iter/s)": 0.411373 }, { "acc": 0.95682774, "epoch": 3.1226816126499335, "grad_norm": 9.252921104431152, "learning_rate": 1.259411060495814e-06, "loss": 0.26467309, "memory(GiB)": 34.88, "step": 115330, "train_speed(iter/s)": 0.411374 }, { "acc": 0.95151558, "epoch": 3.1228169929331493, "grad_norm": 19.771944046020508, "learning_rate": 1.2590398868552458e-06, "loss": 0.2760021, "memory(GiB)": 34.88, "step": 115335, "train_speed(iter/s)": 0.411375 }, { "acc": 0.94440079, "epoch": 3.1229523732163647, "grad_norm": 4.254266738891602, "learning_rate": 1.2586687600838138e-06, "loss": 0.30240355, "memory(GiB)": 34.88, "step": 115340, "train_speed(iter/s)": 0.411376 }, { "acc": 0.94669933, "epoch": 3.1230877534995805, "grad_norm": 5.305547714233398, "learning_rate": 1.258297680186174e-06, "loss": 0.37922654, "memory(GiB)": 34.88, "step": 115345, "train_speed(iter/s)": 0.411376 }, { "acc": 0.95393839, "epoch": 3.123223133782796, "grad_norm": 8.670902252197266, "learning_rate": 1.2579266471669702e-06, "loss": 0.27639208, "memory(GiB)": 34.88, "step": 115350, "train_speed(iter/s)": 0.411377 }, { "acc": 0.94902258, "epoch": 3.1233585140660116, "grad_norm": 10.870965003967285, "learning_rate": 1.257555661030852e-06, "loss": 0.29652817, "memory(GiB)": 34.88, "step": 115355, "train_speed(iter/s)": 0.411378 }, { "acc": 0.94136734, "epoch": 3.123493894349227, "grad_norm": 11.627037048339844, "learning_rate": 1.2571847217824665e-06, "loss": 0.35489869, "memory(GiB)": 34.88, "step": 115360, "train_speed(iter/s)": 0.411379 }, { "acc": 0.93191423, "epoch": 3.1236292746324423, "grad_norm": 5.4952168464660645, "learning_rate": 1.2568138294264612e-06, "loss": 0.41666541, "memory(GiB)": 34.88, "step": 115365, "train_speed(iter/s)": 0.41138 }, { "acc": 0.93015213, "epoch": 3.123764654915658, "grad_norm": 6.737975120544434, "learning_rate": 1.2564429839674822e-06, "loss": 0.44745088, "memory(GiB)": 34.88, "step": 115370, "train_speed(iter/s)": 0.411381 }, { "acc": 0.94951782, "epoch": 3.1239000351988735, "grad_norm": 4.574621677398682, "learning_rate": 1.2560721854101716e-06, "loss": 0.30792542, "memory(GiB)": 34.88, "step": 115375, "train_speed(iter/s)": 0.411382 }, { "acc": 0.95498009, "epoch": 3.1240354154820893, "grad_norm": 7.010219573974609, "learning_rate": 1.2557014337591797e-06, "loss": 0.2803791, "memory(GiB)": 34.88, "step": 115380, "train_speed(iter/s)": 0.411383 }, { "acc": 0.94836674, "epoch": 3.1241707957653047, "grad_norm": 13.73936653137207, "learning_rate": 1.2553307290191468e-06, "loss": 0.36547899, "memory(GiB)": 34.88, "step": 115385, "train_speed(iter/s)": 0.411384 }, { "acc": 0.93970013, "epoch": 3.1243061760485205, "grad_norm": 9.641470909118652, "learning_rate": 1.2549600711947188e-06, "loss": 0.3505878, "memory(GiB)": 34.88, "step": 115390, "train_speed(iter/s)": 0.411385 }, { "acc": 0.94328594, "epoch": 3.124441556331736, "grad_norm": 4.605344295501709, "learning_rate": 1.254589460290538e-06, "loss": 0.29884923, "memory(GiB)": 34.88, "step": 115395, "train_speed(iter/s)": 0.411386 }, { "acc": 0.94703999, "epoch": 3.124576936614951, "grad_norm": 3.2094266414642334, "learning_rate": 1.254218896311249e-06, "loss": 0.30628104, "memory(GiB)": 34.88, "step": 115400, "train_speed(iter/s)": 0.411387 }, { "acc": 0.94314041, "epoch": 3.124712316898167, "grad_norm": 4.4529290199279785, "learning_rate": 1.2538483792614903e-06, "loss": 0.25828917, "memory(GiB)": 34.88, "step": 115405, "train_speed(iter/s)": 0.411388 }, { "acc": 0.9429554, "epoch": 3.1248476971813823, "grad_norm": 5.907423973083496, "learning_rate": 1.2534779091459056e-06, "loss": 0.27120674, "memory(GiB)": 34.88, "step": 115410, "train_speed(iter/s)": 0.411389 }, { "acc": 0.96252899, "epoch": 3.124983077464598, "grad_norm": 5.067464828491211, "learning_rate": 1.2531074859691375e-06, "loss": 0.26080256, "memory(GiB)": 34.88, "step": 115415, "train_speed(iter/s)": 0.41139 }, { "acc": 0.95492172, "epoch": 3.1251184577478135, "grad_norm": 10.739564895629883, "learning_rate": 1.252737109735822e-06, "loss": 0.3048955, "memory(GiB)": 34.88, "step": 115420, "train_speed(iter/s)": 0.411391 }, { "acc": 0.94048777, "epoch": 3.1252538380310293, "grad_norm": 8.539557456970215, "learning_rate": 1.2523667804506017e-06, "loss": 0.30463781, "memory(GiB)": 34.88, "step": 115425, "train_speed(iter/s)": 0.411392 }, { "acc": 0.95043278, "epoch": 3.1253892183142447, "grad_norm": 8.946495056152344, "learning_rate": 1.2519964981181148e-06, "loss": 0.33404129, "memory(GiB)": 34.88, "step": 115430, "train_speed(iter/s)": 0.411393 }, { "acc": 0.93131866, "epoch": 3.1255245985974605, "grad_norm": 7.1161675453186035, "learning_rate": 1.251626262743001e-06, "loss": 0.41882892, "memory(GiB)": 34.88, "step": 115435, "train_speed(iter/s)": 0.411394 }, { "acc": 0.95423279, "epoch": 3.125659978880676, "grad_norm": 12.911648750305176, "learning_rate": 1.2512560743298966e-06, "loss": 0.20910783, "memory(GiB)": 34.88, "step": 115440, "train_speed(iter/s)": 0.411395 }, { "acc": 0.95355463, "epoch": 3.125795359163891, "grad_norm": 3.796623468399048, "learning_rate": 1.250885932883439e-06, "loss": 0.2743818, "memory(GiB)": 34.88, "step": 115445, "train_speed(iter/s)": 0.411396 }, { "acc": 0.93527689, "epoch": 3.125930739447107, "grad_norm": 7.63888692855835, "learning_rate": 1.250515838408266e-06, "loss": 0.35807223, "memory(GiB)": 34.88, "step": 115450, "train_speed(iter/s)": 0.411397 }, { "acc": 0.94429188, "epoch": 3.1260661197303223, "grad_norm": 9.31552791595459, "learning_rate": 1.2501457909090148e-06, "loss": 0.36193793, "memory(GiB)": 34.88, "step": 115455, "train_speed(iter/s)": 0.411398 }, { "acc": 0.93331995, "epoch": 3.126201500013538, "grad_norm": 7.114445686340332, "learning_rate": 1.2497757903903178e-06, "loss": 0.40333862, "memory(GiB)": 34.88, "step": 115460, "train_speed(iter/s)": 0.411399 }, { "acc": 0.94109592, "epoch": 3.1263368802967535, "grad_norm": 8.884893417358398, "learning_rate": 1.2494058368568118e-06, "loss": 0.32974079, "memory(GiB)": 34.88, "step": 115465, "train_speed(iter/s)": 0.4114 }, { "acc": 0.94930382, "epoch": 3.1264722605799693, "grad_norm": 5.571058750152588, "learning_rate": 1.2490359303131328e-06, "loss": 0.28414662, "memory(GiB)": 34.88, "step": 115470, "train_speed(iter/s)": 0.411401 }, { "acc": 0.94111958, "epoch": 3.1266076408631847, "grad_norm": 11.115756034851074, "learning_rate": 1.2486660707639112e-06, "loss": 0.37891927, "memory(GiB)": 34.88, "step": 115475, "train_speed(iter/s)": 0.411402 }, { "acc": 0.94985523, "epoch": 3.1267430211464005, "grad_norm": 6.910693168640137, "learning_rate": 1.2482962582137818e-06, "loss": 0.30003662, "memory(GiB)": 34.88, "step": 115480, "train_speed(iter/s)": 0.411403 }, { "acc": 0.93727465, "epoch": 3.126878401429616, "grad_norm": 12.341802597045898, "learning_rate": 1.2479264926673772e-06, "loss": 0.38821802, "memory(GiB)": 34.88, "step": 115485, "train_speed(iter/s)": 0.411404 }, { "acc": 0.93745975, "epoch": 3.127013781712831, "grad_norm": 14.937949180603027, "learning_rate": 1.2475567741293312e-06, "loss": 0.33985636, "memory(GiB)": 34.88, "step": 115490, "train_speed(iter/s)": 0.411405 }, { "acc": 0.93580208, "epoch": 3.127149161996047, "grad_norm": 6.2935590744018555, "learning_rate": 1.2471871026042738e-06, "loss": 0.3582413, "memory(GiB)": 34.88, "step": 115495, "train_speed(iter/s)": 0.411405 }, { "acc": 0.94675713, "epoch": 3.1272845422792623, "grad_norm": 11.305120468139648, "learning_rate": 1.2468174780968323e-06, "loss": 0.33399391, "memory(GiB)": 34.88, "step": 115500, "train_speed(iter/s)": 0.411406 }, { "acc": 0.95428553, "epoch": 3.127419922562478, "grad_norm": 4.505958557128906, "learning_rate": 1.246447900611644e-06, "loss": 0.2515749, "memory(GiB)": 34.88, "step": 115505, "train_speed(iter/s)": 0.411407 }, { "acc": 0.93341179, "epoch": 3.1275553028456935, "grad_norm": 5.566835880279541, "learning_rate": 1.2460783701533328e-06, "loss": 0.39752207, "memory(GiB)": 34.88, "step": 115510, "train_speed(iter/s)": 0.411408 }, { "acc": 0.93184929, "epoch": 3.1276906831289093, "grad_norm": 10.203583717346191, "learning_rate": 1.2457088867265305e-06, "loss": 0.41010733, "memory(GiB)": 34.88, "step": 115515, "train_speed(iter/s)": 0.411409 }, { "acc": 0.94911404, "epoch": 3.1278260634121247, "grad_norm": 4.1549224853515625, "learning_rate": 1.2453394503358644e-06, "loss": 0.3393379, "memory(GiB)": 34.88, "step": 115520, "train_speed(iter/s)": 0.41141 }, { "acc": 0.94985943, "epoch": 3.12796144369534, "grad_norm": 8.447393417358398, "learning_rate": 1.244970060985965e-06, "loss": 0.33575931, "memory(GiB)": 34.88, "step": 115525, "train_speed(iter/s)": 0.411411 }, { "acc": 0.93310795, "epoch": 3.128096823978556, "grad_norm": 12.673256874084473, "learning_rate": 1.2446007186814562e-06, "loss": 0.3940371, "memory(GiB)": 34.88, "step": 115530, "train_speed(iter/s)": 0.411412 }, { "acc": 0.92958908, "epoch": 3.128232204261771, "grad_norm": 7.1739678382873535, "learning_rate": 1.2442314234269658e-06, "loss": 0.44266596, "memory(GiB)": 34.88, "step": 115535, "train_speed(iter/s)": 0.411413 }, { "acc": 0.92862997, "epoch": 3.128367584544987, "grad_norm": 18.3870792388916, "learning_rate": 1.2438621752271221e-06, "loss": 0.42261729, "memory(GiB)": 34.88, "step": 115540, "train_speed(iter/s)": 0.411414 }, { "acc": 0.95316973, "epoch": 3.1285029648282023, "grad_norm": 10.110486030578613, "learning_rate": 1.2434929740865477e-06, "loss": 0.21808496, "memory(GiB)": 34.88, "step": 115545, "train_speed(iter/s)": 0.411415 }, { "acc": 0.94541397, "epoch": 3.128638345111418, "grad_norm": 12.731431007385254, "learning_rate": 1.2431238200098684e-06, "loss": 0.31965024, "memory(GiB)": 34.88, "step": 115550, "train_speed(iter/s)": 0.411416 }, { "acc": 0.94308414, "epoch": 3.1287737253946335, "grad_norm": 9.853626251220703, "learning_rate": 1.2427547130017094e-06, "loss": 0.34829984, "memory(GiB)": 34.88, "step": 115555, "train_speed(iter/s)": 0.411417 }, { "acc": 0.95148087, "epoch": 3.128909105677849, "grad_norm": 15.024888038635254, "learning_rate": 1.2423856530666955e-06, "loss": 0.32364173, "memory(GiB)": 34.88, "step": 115560, "train_speed(iter/s)": 0.411417 }, { "acc": 0.95129356, "epoch": 3.1290444859610647, "grad_norm": 4.228488922119141, "learning_rate": 1.2420166402094468e-06, "loss": 0.29174504, "memory(GiB)": 34.88, "step": 115565, "train_speed(iter/s)": 0.411418 }, { "acc": 0.95060225, "epoch": 3.12917986624428, "grad_norm": 5.275363922119141, "learning_rate": 1.2416476744345874e-06, "loss": 0.27344508, "memory(GiB)": 34.88, "step": 115570, "train_speed(iter/s)": 0.411419 }, { "acc": 0.94319448, "epoch": 3.129315246527496, "grad_norm": 10.252728462219238, "learning_rate": 1.24127875574674e-06, "loss": 0.33830721, "memory(GiB)": 34.88, "step": 115575, "train_speed(iter/s)": 0.41142 }, { "acc": 0.93666925, "epoch": 3.129450626810711, "grad_norm": 4.312109470367432, "learning_rate": 1.240909884150527e-06, "loss": 0.38503184, "memory(GiB)": 34.88, "step": 115580, "train_speed(iter/s)": 0.411421 }, { "acc": 0.95303984, "epoch": 3.129586007093927, "grad_norm": 5.130548000335693, "learning_rate": 1.2405410596505658e-06, "loss": 0.26716952, "memory(GiB)": 34.88, "step": 115585, "train_speed(iter/s)": 0.411422 }, { "acc": 0.94418449, "epoch": 3.1297213873771423, "grad_norm": 20.152387619018555, "learning_rate": 1.2401722822514793e-06, "loss": 0.30689335, "memory(GiB)": 34.88, "step": 115590, "train_speed(iter/s)": 0.411423 }, { "acc": 0.94179211, "epoch": 3.129856767660358, "grad_norm": 3.5846338272094727, "learning_rate": 1.2398035519578874e-06, "loss": 0.34213042, "memory(GiB)": 34.88, "step": 115595, "train_speed(iter/s)": 0.411424 }, { "acc": 0.93326511, "epoch": 3.1299921479435735, "grad_norm": 5.460762977600098, "learning_rate": 1.239434868774407e-06, "loss": 0.39910886, "memory(GiB)": 34.88, "step": 115600, "train_speed(iter/s)": 0.411425 }, { "acc": 0.95005779, "epoch": 3.130127528226789, "grad_norm": 5.215874195098877, "learning_rate": 1.2390662327056575e-06, "loss": 0.32504058, "memory(GiB)": 34.88, "step": 115605, "train_speed(iter/s)": 0.411426 }, { "acc": 0.94960976, "epoch": 3.1302629085100047, "grad_norm": 9.607478141784668, "learning_rate": 1.2386976437562576e-06, "loss": 0.32476397, "memory(GiB)": 34.88, "step": 115610, "train_speed(iter/s)": 0.411427 }, { "acc": 0.92071428, "epoch": 3.13039828879322, "grad_norm": 5.004888534545898, "learning_rate": 1.2383291019308258e-06, "loss": 0.46561337, "memory(GiB)": 34.88, "step": 115615, "train_speed(iter/s)": 0.411428 }, { "acc": 0.95266457, "epoch": 3.130533669076436, "grad_norm": 5.240689277648926, "learning_rate": 1.237960607233975e-06, "loss": 0.24842553, "memory(GiB)": 34.88, "step": 115620, "train_speed(iter/s)": 0.411429 }, { "acc": 0.95076008, "epoch": 3.130669049359651, "grad_norm": 5.473550319671631, "learning_rate": 1.2375921596703241e-06, "loss": 0.32949615, "memory(GiB)": 34.88, "step": 115625, "train_speed(iter/s)": 0.41143 }, { "acc": 0.94859104, "epoch": 3.130804429642867, "grad_norm": 6.056789398193359, "learning_rate": 1.2372237592444887e-06, "loss": 0.30418258, "memory(GiB)": 34.88, "step": 115630, "train_speed(iter/s)": 0.411431 }, { "acc": 0.93442059, "epoch": 3.1309398099260823, "grad_norm": 5.042393684387207, "learning_rate": 1.2368554059610822e-06, "loss": 0.41472378, "memory(GiB)": 34.88, "step": 115635, "train_speed(iter/s)": 0.411432 }, { "acc": 0.93850365, "epoch": 3.131075190209298, "grad_norm": 13.449243545532227, "learning_rate": 1.23648709982472e-06, "loss": 0.33589802, "memory(GiB)": 34.88, "step": 115640, "train_speed(iter/s)": 0.411432 }, { "acc": 0.93970184, "epoch": 3.1312105704925135, "grad_norm": 8.027094841003418, "learning_rate": 1.2361188408400151e-06, "loss": 0.29820466, "memory(GiB)": 34.88, "step": 115645, "train_speed(iter/s)": 0.411433 }, { "acc": 0.94282703, "epoch": 3.131345950775729, "grad_norm": 5.208310604095459, "learning_rate": 1.235750629011583e-06, "loss": 0.29596567, "memory(GiB)": 34.88, "step": 115650, "train_speed(iter/s)": 0.411434 }, { "acc": 0.945502, "epoch": 3.1314813310589447, "grad_norm": 10.346515655517578, "learning_rate": 1.2353824643440313e-06, "loss": 0.31006236, "memory(GiB)": 34.88, "step": 115655, "train_speed(iter/s)": 0.411435 }, { "acc": 0.95135765, "epoch": 3.13161671134216, "grad_norm": 3.376070022583008, "learning_rate": 1.235014346841978e-06, "loss": 0.31975739, "memory(GiB)": 34.88, "step": 115660, "train_speed(iter/s)": 0.411436 }, { "acc": 0.92913074, "epoch": 3.131752091625376, "grad_norm": 5.214447975158691, "learning_rate": 1.2346462765100322e-06, "loss": 0.34345598, "memory(GiB)": 34.88, "step": 115665, "train_speed(iter/s)": 0.411437 }, { "acc": 0.93807831, "epoch": 3.131887471908591, "grad_norm": 11.399694442749023, "learning_rate": 1.2342782533528023e-06, "loss": 0.35015659, "memory(GiB)": 34.88, "step": 115670, "train_speed(iter/s)": 0.411438 }, { "acc": 0.94420099, "epoch": 3.132022852191807, "grad_norm": 8.476682662963867, "learning_rate": 1.2339102773749009e-06, "loss": 0.29210548, "memory(GiB)": 34.88, "step": 115675, "train_speed(iter/s)": 0.411439 }, { "acc": 0.96372519, "epoch": 3.1321582324750223, "grad_norm": 7.527622699737549, "learning_rate": 1.233542348580937e-06, "loss": 0.20836668, "memory(GiB)": 34.88, "step": 115680, "train_speed(iter/s)": 0.41144 }, { "acc": 0.94268456, "epoch": 3.1322936127582377, "grad_norm": 7.044132709503174, "learning_rate": 1.2331744669755205e-06, "loss": 0.32307129, "memory(GiB)": 34.88, "step": 115685, "train_speed(iter/s)": 0.411441 }, { "acc": 0.96320362, "epoch": 3.1324289930414535, "grad_norm": 4.371480941772461, "learning_rate": 1.2328066325632583e-06, "loss": 0.2356607, "memory(GiB)": 34.88, "step": 115690, "train_speed(iter/s)": 0.411442 }, { "acc": 0.94119835, "epoch": 3.132564373324669, "grad_norm": 4.553900718688965, "learning_rate": 1.2324388453487584e-06, "loss": 0.33981705, "memory(GiB)": 34.88, "step": 115695, "train_speed(iter/s)": 0.411443 }, { "acc": 0.93166695, "epoch": 3.1326997536078847, "grad_norm": 5.539641857147217, "learning_rate": 1.2320711053366295e-06, "loss": 0.39501774, "memory(GiB)": 34.88, "step": 115700, "train_speed(iter/s)": 0.411444 }, { "acc": 0.94825115, "epoch": 3.1328351338911, "grad_norm": 6.090845108032227, "learning_rate": 1.2317034125314781e-06, "loss": 0.34781723, "memory(GiB)": 34.88, "step": 115705, "train_speed(iter/s)": 0.411444 }, { "acc": 0.94694538, "epoch": 3.132970514174316, "grad_norm": 7.597537994384766, "learning_rate": 1.231335766937909e-06, "loss": 0.25917525, "memory(GiB)": 34.88, "step": 115710, "train_speed(iter/s)": 0.411445 }, { "acc": 0.94595509, "epoch": 3.133105894457531, "grad_norm": 6.312422275543213, "learning_rate": 1.2309681685605275e-06, "loss": 0.29129925, "memory(GiB)": 34.88, "step": 115715, "train_speed(iter/s)": 0.411446 }, { "acc": 0.93573761, "epoch": 3.1332412747407465, "grad_norm": 7.727264404296875, "learning_rate": 1.230600617403941e-06, "loss": 0.42425466, "memory(GiB)": 34.88, "step": 115720, "train_speed(iter/s)": 0.411447 }, { "acc": 0.94291477, "epoch": 3.1333766550239623, "grad_norm": 8.855886459350586, "learning_rate": 1.2302331134727507e-06, "loss": 0.3028487, "memory(GiB)": 34.88, "step": 115725, "train_speed(iter/s)": 0.411448 }, { "acc": 0.94466743, "epoch": 3.1335120353071777, "grad_norm": 6.196475982666016, "learning_rate": 1.229865656771562e-06, "loss": 0.35422041, "memory(GiB)": 34.88, "step": 115730, "train_speed(iter/s)": 0.411449 }, { "acc": 0.92950039, "epoch": 3.1336474155903935, "grad_norm": 8.866673469543457, "learning_rate": 1.2294982473049773e-06, "loss": 0.43469553, "memory(GiB)": 34.88, "step": 115735, "train_speed(iter/s)": 0.41145 }, { "acc": 0.94474583, "epoch": 3.133782795873609, "grad_norm": 3.0865890979766846, "learning_rate": 1.2291308850776015e-06, "loss": 0.3244849, "memory(GiB)": 34.88, "step": 115740, "train_speed(iter/s)": 0.411451 }, { "acc": 0.94370775, "epoch": 3.1339181761568247, "grad_norm": 7.111426830291748, "learning_rate": 1.2287635700940331e-06, "loss": 0.31269922, "memory(GiB)": 34.88, "step": 115745, "train_speed(iter/s)": 0.411452 }, { "acc": 0.93101025, "epoch": 3.13405355644004, "grad_norm": 11.39690113067627, "learning_rate": 1.2283963023588748e-06, "loss": 0.4090374, "memory(GiB)": 34.88, "step": 115750, "train_speed(iter/s)": 0.411453 }, { "acc": 0.94687099, "epoch": 3.134188936723256, "grad_norm": 4.70632791519165, "learning_rate": 1.228029081876729e-06, "loss": 0.26965175, "memory(GiB)": 34.88, "step": 115755, "train_speed(iter/s)": 0.411454 }, { "acc": 0.94171867, "epoch": 3.134324317006471, "grad_norm": 5.964385986328125, "learning_rate": 1.2276619086521925e-06, "loss": 0.33238997, "memory(GiB)": 34.88, "step": 115760, "train_speed(iter/s)": 0.411455 }, { "acc": 0.93235207, "epoch": 3.1344596972896865, "grad_norm": 8.910579681396484, "learning_rate": 1.2272947826898668e-06, "loss": 0.433357, "memory(GiB)": 34.88, "step": 115765, "train_speed(iter/s)": 0.411456 }, { "acc": 0.94913082, "epoch": 3.1345950775729023, "grad_norm": 9.760139465332031, "learning_rate": 1.226927703994351e-06, "loss": 0.3679023, "memory(GiB)": 34.88, "step": 115770, "train_speed(iter/s)": 0.411457 }, { "acc": 0.93557663, "epoch": 3.1347304578561177, "grad_norm": 10.014039993286133, "learning_rate": 1.226560672570245e-06, "loss": 0.44165063, "memory(GiB)": 34.88, "step": 115775, "train_speed(iter/s)": 0.411457 }, { "acc": 0.93544836, "epoch": 3.1348658381393335, "grad_norm": 8.859853744506836, "learning_rate": 1.226193688422141e-06, "loss": 0.42052264, "memory(GiB)": 34.88, "step": 115780, "train_speed(iter/s)": 0.411458 }, { "acc": 0.94532804, "epoch": 3.135001218422549, "grad_norm": 7.214088439941406, "learning_rate": 1.2258267515546437e-06, "loss": 0.32037692, "memory(GiB)": 34.88, "step": 115785, "train_speed(iter/s)": 0.411459 }, { "acc": 0.94322147, "epoch": 3.1351365987057647, "grad_norm": 2.861264705657959, "learning_rate": 1.2254598619723463e-06, "loss": 0.35759811, "memory(GiB)": 34.88, "step": 115790, "train_speed(iter/s)": 0.41146 }, { "acc": 0.93485413, "epoch": 3.13527197898898, "grad_norm": 28.424577713012695, "learning_rate": 1.2250930196798425e-06, "loss": 0.41716604, "memory(GiB)": 34.88, "step": 115795, "train_speed(iter/s)": 0.411461 }, { "acc": 0.95105801, "epoch": 3.135407359272196, "grad_norm": 5.894168376922607, "learning_rate": 1.2247262246817302e-06, "loss": 0.3342993, "memory(GiB)": 34.88, "step": 115800, "train_speed(iter/s)": 0.411462 }, { "acc": 0.9539237, "epoch": 3.135542739555411, "grad_norm": 3.180647373199463, "learning_rate": 1.2243594769826034e-06, "loss": 0.24348869, "memory(GiB)": 34.88, "step": 115805, "train_speed(iter/s)": 0.411463 }, { "acc": 0.95004854, "epoch": 3.1356781198386265, "grad_norm": 16.286216735839844, "learning_rate": 1.2239927765870585e-06, "loss": 0.26961012, "memory(GiB)": 34.88, "step": 115810, "train_speed(iter/s)": 0.411464 }, { "acc": 0.94913406, "epoch": 3.1358135001218423, "grad_norm": 15.243717193603516, "learning_rate": 1.2236261234996845e-06, "loss": 0.36046474, "memory(GiB)": 34.88, "step": 115815, "train_speed(iter/s)": 0.411465 }, { "acc": 0.93749428, "epoch": 3.1359488804050577, "grad_norm": 15.898963928222656, "learning_rate": 1.22325951772508e-06, "loss": 0.32945011, "memory(GiB)": 34.88, "step": 115820, "train_speed(iter/s)": 0.411466 }, { "acc": 0.93693361, "epoch": 3.1360842606882735, "grad_norm": 3.882272720336914, "learning_rate": 1.2228929592678337e-06, "loss": 0.32895162, "memory(GiB)": 34.88, "step": 115825, "train_speed(iter/s)": 0.411467 }, { "acc": 0.94309711, "epoch": 3.136219640971489, "grad_norm": 7.7276835441589355, "learning_rate": 1.2225264481325399e-06, "loss": 0.27549067, "memory(GiB)": 34.88, "step": 115830, "train_speed(iter/s)": 0.411468 }, { "acc": 0.95593882, "epoch": 3.1363550212547047, "grad_norm": 6.480495929718018, "learning_rate": 1.222159984323787e-06, "loss": 0.27197385, "memory(GiB)": 34.88, "step": 115835, "train_speed(iter/s)": 0.411468 }, { "acc": 0.9428916, "epoch": 3.13649040153792, "grad_norm": 6.851790428161621, "learning_rate": 1.2217935678461668e-06, "loss": 0.36128898, "memory(GiB)": 34.88, "step": 115840, "train_speed(iter/s)": 0.411469 }, { "acc": 0.95271835, "epoch": 3.1366257818211354, "grad_norm": 16.63722801208496, "learning_rate": 1.221427198704272e-06, "loss": 0.27392335, "memory(GiB)": 34.88, "step": 115845, "train_speed(iter/s)": 0.41147 }, { "acc": 0.93469763, "epoch": 3.136761162104351, "grad_norm": 15.182523727416992, "learning_rate": 1.2210608769026864e-06, "loss": 0.40947218, "memory(GiB)": 34.88, "step": 115850, "train_speed(iter/s)": 0.411471 }, { "acc": 0.95255852, "epoch": 3.1368965423875665, "grad_norm": 6.048579216003418, "learning_rate": 1.220694602446006e-06, "loss": 0.32206414, "memory(GiB)": 34.88, "step": 115855, "train_speed(iter/s)": 0.411472 }, { "acc": 0.93145275, "epoch": 3.1370319226707823, "grad_norm": 9.199501991271973, "learning_rate": 1.2203283753388137e-06, "loss": 0.38454168, "memory(GiB)": 34.88, "step": 115860, "train_speed(iter/s)": 0.411473 }, { "acc": 0.95132275, "epoch": 3.1371673029539977, "grad_norm": 4.410810470581055, "learning_rate": 1.2199621955857004e-06, "loss": 0.28860981, "memory(GiB)": 34.88, "step": 115865, "train_speed(iter/s)": 0.411474 }, { "acc": 0.95244179, "epoch": 3.1373026832372135, "grad_norm": 2.9588513374328613, "learning_rate": 1.2195960631912488e-06, "loss": 0.22516308, "memory(GiB)": 34.88, "step": 115870, "train_speed(iter/s)": 0.411475 }, { "acc": 0.95109234, "epoch": 3.137438063520429, "grad_norm": 15.482598304748535, "learning_rate": 1.2192299781600518e-06, "loss": 0.29471149, "memory(GiB)": 34.88, "step": 115875, "train_speed(iter/s)": 0.411476 }, { "acc": 0.9340538, "epoch": 3.1375734438036442, "grad_norm": 6.24154806137085, "learning_rate": 1.218863940496692e-06, "loss": 0.41004314, "memory(GiB)": 34.88, "step": 115880, "train_speed(iter/s)": 0.411477 }, { "acc": 0.94080811, "epoch": 3.13770882408686, "grad_norm": 4.089641571044922, "learning_rate": 1.2184979502057513e-06, "loss": 0.32282307, "memory(GiB)": 34.88, "step": 115885, "train_speed(iter/s)": 0.411478 }, { "acc": 0.94315634, "epoch": 3.1378442043700754, "grad_norm": 11.119644165039062, "learning_rate": 1.218132007291821e-06, "loss": 0.32059407, "memory(GiB)": 34.88, "step": 115890, "train_speed(iter/s)": 0.411479 }, { "acc": 0.95434113, "epoch": 3.137979584653291, "grad_norm": 9.634580612182617, "learning_rate": 1.2177661117594813e-06, "loss": 0.24391236, "memory(GiB)": 34.88, "step": 115895, "train_speed(iter/s)": 0.41148 }, { "acc": 0.94414291, "epoch": 3.1381149649365065, "grad_norm": 6.347550392150879, "learning_rate": 1.217400263613317e-06, "loss": 0.34519925, "memory(GiB)": 34.88, "step": 115900, "train_speed(iter/s)": 0.411481 }, { "acc": 0.94213543, "epoch": 3.1382503452197223, "grad_norm": 8.900957107543945, "learning_rate": 1.2170344628579085e-06, "loss": 0.37979538, "memory(GiB)": 34.88, "step": 115905, "train_speed(iter/s)": 0.411482 }, { "acc": 0.94597549, "epoch": 3.1383857255029377, "grad_norm": 10.298111915588379, "learning_rate": 1.2166687094978432e-06, "loss": 0.36125746, "memory(GiB)": 34.88, "step": 115910, "train_speed(iter/s)": 0.411483 }, { "acc": 0.94500799, "epoch": 3.1385211057861535, "grad_norm": 9.72082805633545, "learning_rate": 1.2163030035377e-06, "loss": 0.32190614, "memory(GiB)": 34.88, "step": 115915, "train_speed(iter/s)": 0.411484 }, { "acc": 0.93462143, "epoch": 3.138656486069369, "grad_norm": 3.862305164337158, "learning_rate": 1.2159373449820568e-06, "loss": 0.37913957, "memory(GiB)": 34.88, "step": 115920, "train_speed(iter/s)": 0.411485 }, { "acc": 0.9338583, "epoch": 3.1387918663525842, "grad_norm": 5.6426825523376465, "learning_rate": 1.2155717338355011e-06, "loss": 0.40111489, "memory(GiB)": 34.88, "step": 115925, "train_speed(iter/s)": 0.411486 }, { "acc": 0.93687162, "epoch": 3.1389272466358, "grad_norm": 10.74612045288086, "learning_rate": 1.2152061701026074e-06, "loss": 0.32995071, "memory(GiB)": 34.88, "step": 115930, "train_speed(iter/s)": 0.411487 }, { "acc": 0.95076103, "epoch": 3.1390626269190154, "grad_norm": 5.605968475341797, "learning_rate": 1.2148406537879585e-06, "loss": 0.25662036, "memory(GiB)": 34.88, "step": 115935, "train_speed(iter/s)": 0.411488 }, { "acc": 0.94703579, "epoch": 3.139198007202231, "grad_norm": 6.25450325012207, "learning_rate": 1.2144751848961288e-06, "loss": 0.27120223, "memory(GiB)": 34.88, "step": 115940, "train_speed(iter/s)": 0.411489 }, { "acc": 0.94025764, "epoch": 3.1393333874854465, "grad_norm": 18.60090446472168, "learning_rate": 1.2141097634317023e-06, "loss": 0.32528889, "memory(GiB)": 34.88, "step": 115945, "train_speed(iter/s)": 0.41149 }, { "acc": 0.94824753, "epoch": 3.1394687677686623, "grad_norm": 7.123212814331055, "learning_rate": 1.213744389399253e-06, "loss": 0.29455068, "memory(GiB)": 34.88, "step": 115950, "train_speed(iter/s)": 0.41149 }, { "acc": 0.95171108, "epoch": 3.1396041480518777, "grad_norm": 6.965781211853027, "learning_rate": 1.2133790628033578e-06, "loss": 0.27411051, "memory(GiB)": 34.88, "step": 115955, "train_speed(iter/s)": 0.411491 }, { "acc": 0.94361105, "epoch": 3.1397395283350935, "grad_norm": 8.3383150100708, "learning_rate": 1.2130137836485956e-06, "loss": 0.36237552, "memory(GiB)": 34.88, "step": 115960, "train_speed(iter/s)": 0.411492 }, { "acc": 0.94442091, "epoch": 3.139874908618309, "grad_norm": 8.268048286437988, "learning_rate": 1.2126485519395396e-06, "loss": 0.26069417, "memory(GiB)": 34.88, "step": 115965, "train_speed(iter/s)": 0.411493 }, { "acc": 0.95580759, "epoch": 3.1400102889015242, "grad_norm": 7.779880046844482, "learning_rate": 1.2122833676807669e-06, "loss": 0.23477712, "memory(GiB)": 34.88, "step": 115970, "train_speed(iter/s)": 0.411494 }, { "acc": 0.93871021, "epoch": 3.14014566918474, "grad_norm": 5.381343364715576, "learning_rate": 1.211918230876849e-06, "loss": 0.39821672, "memory(GiB)": 34.88, "step": 115975, "train_speed(iter/s)": 0.411495 }, { "acc": 0.94961615, "epoch": 3.1402810494679554, "grad_norm": 22.60805320739746, "learning_rate": 1.211553141532365e-06, "loss": 0.33392677, "memory(GiB)": 34.88, "step": 115980, "train_speed(iter/s)": 0.411496 }, { "acc": 0.94487629, "epoch": 3.140416429751171, "grad_norm": 6.020902633666992, "learning_rate": 1.2111880996518847e-06, "loss": 0.31365552, "memory(GiB)": 34.88, "step": 115985, "train_speed(iter/s)": 0.411497 }, { "acc": 0.95179634, "epoch": 3.1405518100343865, "grad_norm": 4.307159423828125, "learning_rate": 1.2108231052399811e-06, "loss": 0.31318364, "memory(GiB)": 34.88, "step": 115990, "train_speed(iter/s)": 0.411498 }, { "acc": 0.96188011, "epoch": 3.1406871903176024, "grad_norm": 14.916147232055664, "learning_rate": 1.2104581583012284e-06, "loss": 0.24346919, "memory(GiB)": 34.88, "step": 115995, "train_speed(iter/s)": 0.411499 }, { "acc": 0.95222397, "epoch": 3.1408225706008177, "grad_norm": 5.70045280456543, "learning_rate": 1.2100932588401978e-06, "loss": 0.26140842, "memory(GiB)": 34.88, "step": 116000, "train_speed(iter/s)": 0.4115 }, { "acc": 0.92353382, "epoch": 3.140957950884033, "grad_norm": 8.854701042175293, "learning_rate": 1.2097284068614596e-06, "loss": 0.40947905, "memory(GiB)": 34.88, "step": 116005, "train_speed(iter/s)": 0.411501 }, { "acc": 0.93707905, "epoch": 3.141093331167249, "grad_norm": 8.777352333068848, "learning_rate": 1.2093636023695824e-06, "loss": 0.41912727, "memory(GiB)": 34.88, "step": 116010, "train_speed(iter/s)": 0.411502 }, { "acc": 0.95062752, "epoch": 3.1412287114504642, "grad_norm": 5.5217108726501465, "learning_rate": 1.20899884536914e-06, "loss": 0.28159912, "memory(GiB)": 34.88, "step": 116015, "train_speed(iter/s)": 0.411503 }, { "acc": 0.92497921, "epoch": 3.14136409173368, "grad_norm": 13.465141296386719, "learning_rate": 1.208634135864699e-06, "loss": 0.47355509, "memory(GiB)": 34.88, "step": 116020, "train_speed(iter/s)": 0.411504 }, { "acc": 0.92634792, "epoch": 3.1414994720168954, "grad_norm": 6.996499538421631, "learning_rate": 1.2082694738608293e-06, "loss": 0.42999148, "memory(GiB)": 34.88, "step": 116025, "train_speed(iter/s)": 0.411504 }, { "acc": 0.95073833, "epoch": 3.141634852300111, "grad_norm": 6.737989902496338, "learning_rate": 1.207904859362098e-06, "loss": 0.32061677, "memory(GiB)": 34.88, "step": 116030, "train_speed(iter/s)": 0.411505 }, { "acc": 0.95597534, "epoch": 3.1417702325833265, "grad_norm": 6.311367511749268, "learning_rate": 1.2075402923730752e-06, "loss": 0.22098975, "memory(GiB)": 34.88, "step": 116035, "train_speed(iter/s)": 0.411506 }, { "acc": 0.94833708, "epoch": 3.141905612866542, "grad_norm": 5.000535011291504, "learning_rate": 1.2071757728983255e-06, "loss": 0.27546029, "memory(GiB)": 34.88, "step": 116040, "train_speed(iter/s)": 0.411507 }, { "acc": 0.96298161, "epoch": 3.1420409931497577, "grad_norm": 14.860284805297852, "learning_rate": 1.2068113009424128e-06, "loss": 0.18619187, "memory(GiB)": 34.88, "step": 116045, "train_speed(iter/s)": 0.411508 }, { "acc": 0.94222527, "epoch": 3.142176373432973, "grad_norm": 6.822541236877441, "learning_rate": 1.2064468765099088e-06, "loss": 0.34811234, "memory(GiB)": 34.88, "step": 116050, "train_speed(iter/s)": 0.411509 }, { "acc": 0.94130077, "epoch": 3.142311753716189, "grad_norm": 7.109817028045654, "learning_rate": 1.2060824996053737e-06, "loss": 0.36701906, "memory(GiB)": 34.88, "step": 116055, "train_speed(iter/s)": 0.41151 }, { "acc": 0.93733292, "epoch": 3.1424471339994042, "grad_norm": 10.950242042541504, "learning_rate": 1.2057181702333758e-06, "loss": 0.37309859, "memory(GiB)": 34.88, "step": 116060, "train_speed(iter/s)": 0.411511 }, { "acc": 0.95008335, "epoch": 3.14258251428262, "grad_norm": 11.208638191223145, "learning_rate": 1.2053538883984733e-06, "loss": 0.33156137, "memory(GiB)": 34.88, "step": 116065, "train_speed(iter/s)": 0.411512 }, { "acc": 0.92049999, "epoch": 3.1427178945658354, "grad_norm": 11.96521282196045, "learning_rate": 1.2049896541052371e-06, "loss": 0.5206408, "memory(GiB)": 34.88, "step": 116070, "train_speed(iter/s)": 0.411513 }, { "acc": 0.95019445, "epoch": 3.1428532748490507, "grad_norm": 5.778404235839844, "learning_rate": 1.2046254673582238e-06, "loss": 0.24924886, "memory(GiB)": 34.88, "step": 116075, "train_speed(iter/s)": 0.411514 }, { "acc": 0.94438572, "epoch": 3.1429886551322666, "grad_norm": 11.669181823730469, "learning_rate": 1.2042613281619984e-06, "loss": 0.30798388, "memory(GiB)": 34.88, "step": 116080, "train_speed(iter/s)": 0.411515 }, { "acc": 0.94290695, "epoch": 3.143124035415482, "grad_norm": 4.3013081550598145, "learning_rate": 1.2038972365211234e-06, "loss": 0.25453246, "memory(GiB)": 34.88, "step": 116085, "train_speed(iter/s)": 0.411516 }, { "acc": 0.94333477, "epoch": 3.1432594156986977, "grad_norm": 7.601227283477783, "learning_rate": 1.203533192440157e-06, "loss": 0.30377421, "memory(GiB)": 34.88, "step": 116090, "train_speed(iter/s)": 0.411517 }, { "acc": 0.94069271, "epoch": 3.143394795981913, "grad_norm": 8.532804489135742, "learning_rate": 1.2031691959236623e-06, "loss": 0.36881962, "memory(GiB)": 34.88, "step": 116095, "train_speed(iter/s)": 0.411518 }, { "acc": 0.94798203, "epoch": 3.143530176265129, "grad_norm": 7.437159061431885, "learning_rate": 1.2028052469761944e-06, "loss": 0.27159739, "memory(GiB)": 34.88, "step": 116100, "train_speed(iter/s)": 0.411519 }, { "acc": 0.95362816, "epoch": 3.1436655565483442, "grad_norm": 3.409707546234131, "learning_rate": 1.2024413456023186e-06, "loss": 0.19295573, "memory(GiB)": 34.88, "step": 116105, "train_speed(iter/s)": 0.41152 }, { "acc": 0.945718, "epoch": 3.14380093683156, "grad_norm": 8.862354278564453, "learning_rate": 1.2020774918065898e-06, "loss": 0.31902461, "memory(GiB)": 34.88, "step": 116110, "train_speed(iter/s)": 0.411521 }, { "acc": 0.9536829, "epoch": 3.1439363171147754, "grad_norm": 5.83359956741333, "learning_rate": 1.2017136855935664e-06, "loss": 0.28518739, "memory(GiB)": 34.88, "step": 116115, "train_speed(iter/s)": 0.411521 }, { "acc": 0.95560837, "epoch": 3.1440716973979908, "grad_norm": 2.2813234329223633, "learning_rate": 1.2013499269678062e-06, "loss": 0.20853546, "memory(GiB)": 34.88, "step": 116120, "train_speed(iter/s)": 0.411522 }, { "acc": 0.93186378, "epoch": 3.1442070776812066, "grad_norm": 9.449980735778809, "learning_rate": 1.2009862159338677e-06, "loss": 0.4021009, "memory(GiB)": 34.88, "step": 116125, "train_speed(iter/s)": 0.411523 }, { "acc": 0.93926849, "epoch": 3.144342457964422, "grad_norm": 6.88524055480957, "learning_rate": 1.200622552496305e-06, "loss": 0.35821552, "memory(GiB)": 34.88, "step": 116130, "train_speed(iter/s)": 0.411524 }, { "acc": 0.94590397, "epoch": 3.1444778382476377, "grad_norm": 9.683549880981445, "learning_rate": 1.200258936659671e-06, "loss": 0.32700615, "memory(GiB)": 34.88, "step": 116135, "train_speed(iter/s)": 0.411525 }, { "acc": 0.93185616, "epoch": 3.144613218530853, "grad_norm": 9.901189804077148, "learning_rate": 1.1998953684285276e-06, "loss": 0.43960671, "memory(GiB)": 34.88, "step": 116140, "train_speed(iter/s)": 0.411526 }, { "acc": 0.95236721, "epoch": 3.144748598814069, "grad_norm": 3.3800342082977295, "learning_rate": 1.1995318478074229e-06, "loss": 0.36586545, "memory(GiB)": 34.88, "step": 116145, "train_speed(iter/s)": 0.411527 }, { "acc": 0.93842068, "epoch": 3.1448839790972842, "grad_norm": 6.544695854187012, "learning_rate": 1.1991683748009134e-06, "loss": 0.33553271, "memory(GiB)": 34.88, "step": 116150, "train_speed(iter/s)": 0.411528 }, { "acc": 0.94677734, "epoch": 3.1450193593805, "grad_norm": 7.994531154632568, "learning_rate": 1.198804949413553e-06, "loss": 0.30442133, "memory(GiB)": 34.88, "step": 116155, "train_speed(iter/s)": 0.411529 }, { "acc": 0.95379572, "epoch": 3.1451547396637154, "grad_norm": 3.910297155380249, "learning_rate": 1.1984415716498938e-06, "loss": 0.30492187, "memory(GiB)": 34.88, "step": 116160, "train_speed(iter/s)": 0.41153 }, { "acc": 0.92929096, "epoch": 3.1452901199469308, "grad_norm": 5.82739782333374, "learning_rate": 1.1980782415144878e-06, "loss": 0.39124956, "memory(GiB)": 34.88, "step": 116165, "train_speed(iter/s)": 0.411531 }, { "acc": 0.94214478, "epoch": 3.1454255002301466, "grad_norm": 7.816900253295898, "learning_rate": 1.1977149590118836e-06, "loss": 0.28348961, "memory(GiB)": 34.88, "step": 116170, "train_speed(iter/s)": 0.411532 }, { "acc": 0.94179325, "epoch": 3.145560880513362, "grad_norm": 3.6356394290924072, "learning_rate": 1.1973517241466366e-06, "loss": 0.33771193, "memory(GiB)": 34.88, "step": 116175, "train_speed(iter/s)": 0.411532 }, { "acc": 0.92418156, "epoch": 3.1456962607965777, "grad_norm": 7.3747968673706055, "learning_rate": 1.1969885369232944e-06, "loss": 0.42975006, "memory(GiB)": 34.88, "step": 116180, "train_speed(iter/s)": 0.411533 }, { "acc": 0.94116802, "epoch": 3.145831641079793, "grad_norm": 8.235875129699707, "learning_rate": 1.1966253973464066e-06, "loss": 0.29578745, "memory(GiB)": 34.88, "step": 116185, "train_speed(iter/s)": 0.411534 }, { "acc": 0.94400921, "epoch": 3.145967021363009, "grad_norm": 8.851386070251465, "learning_rate": 1.1962623054205227e-06, "loss": 0.31478136, "memory(GiB)": 34.88, "step": 116190, "train_speed(iter/s)": 0.411535 }, { "acc": 0.93034439, "epoch": 3.1461024016462242, "grad_norm": 7.765681266784668, "learning_rate": 1.1958992611501927e-06, "loss": 0.48948898, "memory(GiB)": 34.88, "step": 116195, "train_speed(iter/s)": 0.411536 }, { "acc": 0.94458189, "epoch": 3.1462377819294396, "grad_norm": 7.075214385986328, "learning_rate": 1.1955362645399616e-06, "loss": 0.30970378, "memory(GiB)": 34.88, "step": 116200, "train_speed(iter/s)": 0.411537 }, { "acc": 0.94389153, "epoch": 3.1463731622126554, "grad_norm": 11.1925687789917, "learning_rate": 1.1951733155943784e-06, "loss": 0.34564486, "memory(GiB)": 34.88, "step": 116205, "train_speed(iter/s)": 0.411538 }, { "acc": 0.9471981, "epoch": 3.1465085424958708, "grad_norm": 33.947994232177734, "learning_rate": 1.1948104143179906e-06, "loss": 0.32240949, "memory(GiB)": 34.88, "step": 116210, "train_speed(iter/s)": 0.411539 }, { "acc": 0.92500057, "epoch": 3.1466439227790866, "grad_norm": 12.608474731445312, "learning_rate": 1.1944475607153412e-06, "loss": 0.4055079, "memory(GiB)": 34.88, "step": 116215, "train_speed(iter/s)": 0.411539 }, { "acc": 0.94281197, "epoch": 3.146779303062302, "grad_norm": 9.145695686340332, "learning_rate": 1.1940847547909777e-06, "loss": 0.34119301, "memory(GiB)": 34.88, "step": 116220, "train_speed(iter/s)": 0.41154 }, { "acc": 0.95109863, "epoch": 3.1469146833455177, "grad_norm": 21.878097534179688, "learning_rate": 1.1937219965494454e-06, "loss": 0.3057575, "memory(GiB)": 34.88, "step": 116225, "train_speed(iter/s)": 0.411541 }, { "acc": 0.94197197, "epoch": 3.147050063628733, "grad_norm": 8.80807876586914, "learning_rate": 1.1933592859952884e-06, "loss": 0.31311951, "memory(GiB)": 34.88, "step": 116230, "train_speed(iter/s)": 0.411542 }, { "acc": 0.94179668, "epoch": 3.1471854439119484, "grad_norm": 7.196295738220215, "learning_rate": 1.1929966231330492e-06, "loss": 0.38711026, "memory(GiB)": 34.88, "step": 116235, "train_speed(iter/s)": 0.411543 }, { "acc": 0.94632473, "epoch": 3.1473208241951642, "grad_norm": 9.306206703186035, "learning_rate": 1.1926340079672708e-06, "loss": 0.33632402, "memory(GiB)": 34.88, "step": 116240, "train_speed(iter/s)": 0.411544 }, { "acc": 0.94450188, "epoch": 3.1474562044783796, "grad_norm": 2.917837619781494, "learning_rate": 1.1922714405024963e-06, "loss": 0.28104191, "memory(GiB)": 34.88, "step": 116245, "train_speed(iter/s)": 0.411545 }, { "acc": 0.93391895, "epoch": 3.1475915847615954, "grad_norm": 8.410682678222656, "learning_rate": 1.19190892074327e-06, "loss": 0.41677403, "memory(GiB)": 34.88, "step": 116250, "train_speed(iter/s)": 0.411546 }, { "acc": 0.94424257, "epoch": 3.1477269650448108, "grad_norm": 5.017004013061523, "learning_rate": 1.1915464486941285e-06, "loss": 0.3061429, "memory(GiB)": 34.88, "step": 116255, "train_speed(iter/s)": 0.411546 }, { "acc": 0.9598177, "epoch": 3.1478623453280266, "grad_norm": 10.791626930236816, "learning_rate": 1.1911840243596152e-06, "loss": 0.22593613, "memory(GiB)": 34.88, "step": 116260, "train_speed(iter/s)": 0.411547 }, { "acc": 0.942869, "epoch": 3.147997725611242, "grad_norm": 8.550834655761719, "learning_rate": 1.1908216477442717e-06, "loss": 0.34005995, "memory(GiB)": 34.88, "step": 116265, "train_speed(iter/s)": 0.411548 }, { "acc": 0.94588223, "epoch": 3.1481331058944577, "grad_norm": 3.0472612380981445, "learning_rate": 1.1904593188526334e-06, "loss": 0.31153884, "memory(GiB)": 34.88, "step": 116270, "train_speed(iter/s)": 0.411549 }, { "acc": 0.93936787, "epoch": 3.148268486177673, "grad_norm": 5.589937686920166, "learning_rate": 1.190097037689242e-06, "loss": 0.37488639, "memory(GiB)": 34.88, "step": 116275, "train_speed(iter/s)": 0.41155 }, { "acc": 0.94896746, "epoch": 3.1484038664608884, "grad_norm": 12.192925453186035, "learning_rate": 1.1897348042586344e-06, "loss": 0.26310031, "memory(GiB)": 34.88, "step": 116280, "train_speed(iter/s)": 0.411551 }, { "acc": 0.95389156, "epoch": 3.1485392467441042, "grad_norm": 10.167092323303223, "learning_rate": 1.1893726185653513e-06, "loss": 0.35273261, "memory(GiB)": 34.88, "step": 116285, "train_speed(iter/s)": 0.411552 }, { "acc": 0.94872561, "epoch": 3.1486746270273196, "grad_norm": 4.7114481925964355, "learning_rate": 1.189010480613925e-06, "loss": 0.31988654, "memory(GiB)": 34.88, "step": 116290, "train_speed(iter/s)": 0.411553 }, { "acc": 0.95542259, "epoch": 3.1488100073105354, "grad_norm": 11.29472541809082, "learning_rate": 1.188648390408895e-06, "loss": 0.24195962, "memory(GiB)": 34.88, "step": 116295, "train_speed(iter/s)": 0.411554 }, { "acc": 0.95751934, "epoch": 3.1489453875937508, "grad_norm": 5.498472690582275, "learning_rate": 1.1882863479547982e-06, "loss": 0.27662261, "memory(GiB)": 34.88, "step": 116300, "train_speed(iter/s)": 0.411555 }, { "acc": 0.94439754, "epoch": 3.1490807678769666, "grad_norm": 9.44593620300293, "learning_rate": 1.1879243532561674e-06, "loss": 0.32942474, "memory(GiB)": 34.88, "step": 116305, "train_speed(iter/s)": 0.411556 }, { "acc": 0.94864426, "epoch": 3.149216148160182, "grad_norm": 6.0929999351501465, "learning_rate": 1.1875624063175377e-06, "loss": 0.29843616, "memory(GiB)": 34.88, "step": 116310, "train_speed(iter/s)": 0.411557 }, { "acc": 0.9347086, "epoch": 3.1493515284433977, "grad_norm": 10.798422813415527, "learning_rate": 1.1872005071434434e-06, "loss": 0.38727589, "memory(GiB)": 34.88, "step": 116315, "train_speed(iter/s)": 0.411557 }, { "acc": 0.94115334, "epoch": 3.149486908726613, "grad_norm": 3.409817695617676, "learning_rate": 1.1868386557384206e-06, "loss": 0.34129303, "memory(GiB)": 34.88, "step": 116320, "train_speed(iter/s)": 0.411558 }, { "acc": 0.93168526, "epoch": 3.1496222890098284, "grad_norm": 8.530426979064941, "learning_rate": 1.186476852106998e-06, "loss": 0.36835568, "memory(GiB)": 34.88, "step": 116325, "train_speed(iter/s)": 0.411559 }, { "acc": 0.95623379, "epoch": 3.1497576692930442, "grad_norm": 4.195878982543945, "learning_rate": 1.18611509625371e-06, "loss": 0.27608356, "memory(GiB)": 34.88, "step": 116330, "train_speed(iter/s)": 0.41156 }, { "acc": 0.93856926, "epoch": 3.1498930495762596, "grad_norm": 13.13721752166748, "learning_rate": 1.1857533881830898e-06, "loss": 0.45520954, "memory(GiB)": 34.88, "step": 116335, "train_speed(iter/s)": 0.411561 }, { "acc": 0.94807844, "epoch": 3.1500284298594754, "grad_norm": 3.652132987976074, "learning_rate": 1.1853917278996655e-06, "loss": 0.29867125, "memory(GiB)": 34.88, "step": 116340, "train_speed(iter/s)": 0.411562 }, { "acc": 0.94186649, "epoch": 3.1501638101426908, "grad_norm": 6.473381042480469, "learning_rate": 1.1850301154079691e-06, "loss": 0.32771916, "memory(GiB)": 34.88, "step": 116345, "train_speed(iter/s)": 0.411562 }, { "acc": 0.93858833, "epoch": 3.1502991904259066, "grad_norm": 8.628554344177246, "learning_rate": 1.18466855071253e-06, "loss": 0.37510374, "memory(GiB)": 34.88, "step": 116350, "train_speed(iter/s)": 0.411563 }, { "acc": 0.9455904, "epoch": 3.150434570709122, "grad_norm": 8.211512565612793, "learning_rate": 1.1843070338178794e-06, "loss": 0.34676809, "memory(GiB)": 34.88, "step": 116355, "train_speed(iter/s)": 0.411564 }, { "acc": 0.94815559, "epoch": 3.1505699509923373, "grad_norm": 5.131237030029297, "learning_rate": 1.183945564728543e-06, "loss": 0.35430231, "memory(GiB)": 34.88, "step": 116360, "train_speed(iter/s)": 0.411565 }, { "acc": 0.93435316, "epoch": 3.150705331275553, "grad_norm": 5.883212089538574, "learning_rate": 1.18358414344905e-06, "loss": 0.36569633, "memory(GiB)": 34.88, "step": 116365, "train_speed(iter/s)": 0.411566 }, { "acc": 0.94346848, "epoch": 3.1508407115587684, "grad_norm": 4.756065845489502, "learning_rate": 1.183222769983929e-06, "loss": 0.25822427, "memory(GiB)": 34.88, "step": 116370, "train_speed(iter/s)": 0.411567 }, { "acc": 0.95159655, "epoch": 3.1509760918419842, "grad_norm": 5.089731216430664, "learning_rate": 1.1828614443377078e-06, "loss": 0.20713916, "memory(GiB)": 34.88, "step": 116375, "train_speed(iter/s)": 0.411568 }, { "acc": 0.94544144, "epoch": 3.1511114721251996, "grad_norm": 4.592984199523926, "learning_rate": 1.1825001665149093e-06, "loss": 0.31149735, "memory(GiB)": 34.88, "step": 116380, "train_speed(iter/s)": 0.411569 }, { "acc": 0.93804092, "epoch": 3.1512468524084154, "grad_norm": 6.2923383712768555, "learning_rate": 1.1821389365200612e-06, "loss": 0.34673734, "memory(GiB)": 34.88, "step": 116385, "train_speed(iter/s)": 0.41157 }, { "acc": 0.93405514, "epoch": 3.1513822326916308, "grad_norm": 6.295688629150391, "learning_rate": 1.1817777543576902e-06, "loss": 0.44712334, "memory(GiB)": 34.88, "step": 116390, "train_speed(iter/s)": 0.411571 }, { "acc": 0.94727345, "epoch": 3.151517612974846, "grad_norm": 6.12484884262085, "learning_rate": 1.181416620032317e-06, "loss": 0.28507829, "memory(GiB)": 34.88, "step": 116395, "train_speed(iter/s)": 0.411572 }, { "acc": 0.9318119, "epoch": 3.151652993258062, "grad_norm": 4.5841827392578125, "learning_rate": 1.181055533548468e-06, "loss": 0.4119863, "memory(GiB)": 34.88, "step": 116400, "train_speed(iter/s)": 0.411573 }, { "acc": 0.94126453, "epoch": 3.1517883735412773, "grad_norm": 4.992959022521973, "learning_rate": 1.1806944949106666e-06, "loss": 0.39308178, "memory(GiB)": 34.88, "step": 116405, "train_speed(iter/s)": 0.411574 }, { "acc": 0.93641052, "epoch": 3.151923753824493, "grad_norm": 6.33773136138916, "learning_rate": 1.1803335041234361e-06, "loss": 0.36470466, "memory(GiB)": 34.88, "step": 116410, "train_speed(iter/s)": 0.411575 }, { "acc": 0.93521194, "epoch": 3.1520591341077084, "grad_norm": 7.388062953948975, "learning_rate": 1.1799725611912944e-06, "loss": 0.38422813, "memory(GiB)": 34.88, "step": 116415, "train_speed(iter/s)": 0.411576 }, { "acc": 0.9344862, "epoch": 3.1521945143909242, "grad_norm": 3.251166582107544, "learning_rate": 1.1796116661187693e-06, "loss": 0.4400423, "memory(GiB)": 34.88, "step": 116420, "train_speed(iter/s)": 0.411576 }, { "acc": 0.93871727, "epoch": 3.1523298946741396, "grad_norm": 6.288413047790527, "learning_rate": 1.1792508189103785e-06, "loss": 0.28869865, "memory(GiB)": 34.88, "step": 116425, "train_speed(iter/s)": 0.411577 }, { "acc": 0.93522139, "epoch": 3.1524652749573554, "grad_norm": 11.892373085021973, "learning_rate": 1.178890019570641e-06, "loss": 0.32314, "memory(GiB)": 34.88, "step": 116430, "train_speed(iter/s)": 0.411578 }, { "acc": 0.94250755, "epoch": 3.1526006552405708, "grad_norm": 9.883463859558105, "learning_rate": 1.1785292681040784e-06, "loss": 0.3130446, "memory(GiB)": 34.88, "step": 116435, "train_speed(iter/s)": 0.411579 }, { "acc": 0.92671595, "epoch": 3.152736035523786, "grad_norm": 4.083057403564453, "learning_rate": 1.178168564515209e-06, "loss": 0.4081418, "memory(GiB)": 34.88, "step": 116440, "train_speed(iter/s)": 0.41158 }, { "acc": 0.93312969, "epoch": 3.152871415807002, "grad_norm": 7.753925323486328, "learning_rate": 1.1778079088085534e-06, "loss": 0.37705572, "memory(GiB)": 34.88, "step": 116445, "train_speed(iter/s)": 0.41158 }, { "acc": 0.95274963, "epoch": 3.1530067960902173, "grad_norm": 3.798246383666992, "learning_rate": 1.1774473009886245e-06, "loss": 0.25534391, "memory(GiB)": 34.88, "step": 116450, "train_speed(iter/s)": 0.411581 }, { "acc": 0.92156935, "epoch": 3.153142176373433, "grad_norm": 11.05787181854248, "learning_rate": 1.177086741059946e-06, "loss": 0.40507312, "memory(GiB)": 34.88, "step": 116455, "train_speed(iter/s)": 0.411582 }, { "acc": 0.93991594, "epoch": 3.1532775566566484, "grad_norm": 5.288904190063477, "learning_rate": 1.1767262290270318e-06, "loss": 0.4148922, "memory(GiB)": 34.88, "step": 116460, "train_speed(iter/s)": 0.411583 }, { "acc": 0.95623932, "epoch": 3.1534129369398642, "grad_norm": 6.940585613250732, "learning_rate": 1.1763657648943963e-06, "loss": 0.27448006, "memory(GiB)": 34.88, "step": 116465, "train_speed(iter/s)": 0.411584 }, { "acc": 0.94699917, "epoch": 3.1535483172230796, "grad_norm": 6.223932266235352, "learning_rate": 1.1760053486665565e-06, "loss": 0.27565508, "memory(GiB)": 34.88, "step": 116470, "train_speed(iter/s)": 0.411584 }, { "acc": 0.93207998, "epoch": 3.1536836975062954, "grad_norm": 6.395752906799316, "learning_rate": 1.1756449803480267e-06, "loss": 0.35798657, "memory(GiB)": 34.88, "step": 116475, "train_speed(iter/s)": 0.411586 }, { "acc": 0.94771986, "epoch": 3.1538190777895108, "grad_norm": 4.07954216003418, "learning_rate": 1.1752846599433238e-06, "loss": 0.35203784, "memory(GiB)": 34.88, "step": 116480, "train_speed(iter/s)": 0.411586 }, { "acc": 0.93870363, "epoch": 3.153954458072726, "grad_norm": 8.133879661560059, "learning_rate": 1.1749243874569565e-06, "loss": 0.35684392, "memory(GiB)": 34.88, "step": 116485, "train_speed(iter/s)": 0.411587 }, { "acc": 0.95069008, "epoch": 3.154089838355942, "grad_norm": 12.369906425476074, "learning_rate": 1.1745641628934442e-06, "loss": 0.28207972, "memory(GiB)": 34.88, "step": 116490, "train_speed(iter/s)": 0.411588 }, { "acc": 0.95490427, "epoch": 3.1542252186391573, "grad_norm": 4.252717971801758, "learning_rate": 1.1742039862572945e-06, "loss": 0.28473635, "memory(GiB)": 34.88, "step": 116495, "train_speed(iter/s)": 0.411589 }, { "acc": 0.93447208, "epoch": 3.154360598922373, "grad_norm": 4.313098907470703, "learning_rate": 1.1738438575530226e-06, "loss": 0.3460433, "memory(GiB)": 34.88, "step": 116500, "train_speed(iter/s)": 0.41159 }, { "acc": 0.93475666, "epoch": 3.1544959792055884, "grad_norm": 3.6330442428588867, "learning_rate": 1.173483776785137e-06, "loss": 0.37470758, "memory(GiB)": 34.88, "step": 116505, "train_speed(iter/s)": 0.411591 }, { "acc": 0.95251465, "epoch": 3.1546313594888042, "grad_norm": 4.85272741317749, "learning_rate": 1.1731237439581491e-06, "loss": 0.29151692, "memory(GiB)": 34.88, "step": 116510, "train_speed(iter/s)": 0.411592 }, { "acc": 0.96488142, "epoch": 3.1547667397720196, "grad_norm": 3.248974323272705, "learning_rate": 1.1727637590765717e-06, "loss": 0.18065398, "memory(GiB)": 34.88, "step": 116515, "train_speed(iter/s)": 0.411593 }, { "acc": 0.93528233, "epoch": 3.154902120055235, "grad_norm": 5.524534225463867, "learning_rate": 1.172403822144909e-06, "loss": 0.4115499, "memory(GiB)": 34.88, "step": 116520, "train_speed(iter/s)": 0.411594 }, { "acc": 0.95329933, "epoch": 3.1550375003384508, "grad_norm": 8.330841064453125, "learning_rate": 1.172043933167676e-06, "loss": 0.22405863, "memory(GiB)": 34.88, "step": 116525, "train_speed(iter/s)": 0.411595 }, { "acc": 0.96019001, "epoch": 3.155172880621666, "grad_norm": 3.344050645828247, "learning_rate": 1.1716840921493768e-06, "loss": 0.21229534, "memory(GiB)": 34.88, "step": 116530, "train_speed(iter/s)": 0.411596 }, { "acc": 0.95004616, "epoch": 3.155308260904882, "grad_norm": 5.75408411026001, "learning_rate": 1.171324299094522e-06, "loss": 0.34312882, "memory(GiB)": 34.88, "step": 116535, "train_speed(iter/s)": 0.411597 }, { "acc": 0.93591576, "epoch": 3.1554436411880973, "grad_norm": 31.155502319335938, "learning_rate": 1.1709645540076136e-06, "loss": 0.4266593, "memory(GiB)": 34.88, "step": 116540, "train_speed(iter/s)": 0.411598 }, { "acc": 0.93695965, "epoch": 3.155579021471313, "grad_norm": 9.05888557434082, "learning_rate": 1.170604856893165e-06, "loss": 0.37743154, "memory(GiB)": 34.88, "step": 116545, "train_speed(iter/s)": 0.411598 }, { "acc": 0.93238525, "epoch": 3.1557144017545284, "grad_norm": 4.816989898681641, "learning_rate": 1.170245207755679e-06, "loss": 0.40266638, "memory(GiB)": 34.88, "step": 116550, "train_speed(iter/s)": 0.411599 }, { "acc": 0.94728403, "epoch": 3.155849782037744, "grad_norm": 9.196627616882324, "learning_rate": 1.1698856065996581e-06, "loss": 0.35826523, "memory(GiB)": 34.88, "step": 116555, "train_speed(iter/s)": 0.4116 }, { "acc": 0.95470562, "epoch": 3.1559851623209596, "grad_norm": 5.7225775718688965, "learning_rate": 1.1695260534296117e-06, "loss": 0.26403387, "memory(GiB)": 34.88, "step": 116560, "train_speed(iter/s)": 0.411601 }, { "acc": 0.94370708, "epoch": 3.156120542604175, "grad_norm": 9.092876434326172, "learning_rate": 1.1691665482500407e-06, "loss": 0.33098769, "memory(GiB)": 34.88, "step": 116565, "train_speed(iter/s)": 0.411602 }, { "acc": 0.93569193, "epoch": 3.1562559228873908, "grad_norm": 18.956947326660156, "learning_rate": 1.1688070910654512e-06, "loss": 0.38872809, "memory(GiB)": 34.88, "step": 116570, "train_speed(iter/s)": 0.411603 }, { "acc": 0.9578661, "epoch": 3.156391303170606, "grad_norm": 5.11350679397583, "learning_rate": 1.1684476818803416e-06, "loss": 0.21527209, "memory(GiB)": 34.88, "step": 116575, "train_speed(iter/s)": 0.411604 }, { "acc": 0.94657631, "epoch": 3.156526683453822, "grad_norm": 4.471937656402588, "learning_rate": 1.1680883206992207e-06, "loss": 0.28280952, "memory(GiB)": 34.88, "step": 116580, "train_speed(iter/s)": 0.411605 }, { "acc": 0.95181999, "epoch": 3.1566620637370373, "grad_norm": 7.050179481506348, "learning_rate": 1.1677290075265866e-06, "loss": 0.30340996, "memory(GiB)": 34.88, "step": 116585, "train_speed(iter/s)": 0.411606 }, { "acc": 0.93959646, "epoch": 3.156797444020253, "grad_norm": 7.680400848388672, "learning_rate": 1.1673697423669376e-06, "loss": 0.35916402, "memory(GiB)": 34.88, "step": 116590, "train_speed(iter/s)": 0.411606 }, { "acc": 0.94965935, "epoch": 3.1569328243034684, "grad_norm": 9.160852432250977, "learning_rate": 1.1670105252247796e-06, "loss": 0.28246527, "memory(GiB)": 34.88, "step": 116595, "train_speed(iter/s)": 0.411607 }, { "acc": 0.94969635, "epoch": 3.157068204586684, "grad_norm": 4.763954162597656, "learning_rate": 1.1666513561046091e-06, "loss": 0.28279467, "memory(GiB)": 34.88, "step": 116600, "train_speed(iter/s)": 0.411608 }, { "acc": 0.94862041, "epoch": 3.1572035848698996, "grad_norm": 7.679755210876465, "learning_rate": 1.1662922350109274e-06, "loss": 0.30869586, "memory(GiB)": 34.88, "step": 116605, "train_speed(iter/s)": 0.411609 }, { "acc": 0.935042, "epoch": 3.157338965153115, "grad_norm": 6.082271575927734, "learning_rate": 1.16593316194823e-06, "loss": 0.34493639, "memory(GiB)": 34.88, "step": 116610, "train_speed(iter/s)": 0.41161 }, { "acc": 0.93378792, "epoch": 3.1574743454363308, "grad_norm": 9.192596435546875, "learning_rate": 1.1655741369210195e-06, "loss": 0.31614225, "memory(GiB)": 34.88, "step": 116615, "train_speed(iter/s)": 0.411611 }, { "acc": 0.94239225, "epoch": 3.157609725719546, "grad_norm": 2.478101968765259, "learning_rate": 1.1652151599337896e-06, "loss": 0.33289988, "memory(GiB)": 34.88, "step": 116620, "train_speed(iter/s)": 0.411612 }, { "acc": 0.95145435, "epoch": 3.157745106002762, "grad_norm": 7.738214492797852, "learning_rate": 1.16485623099104e-06, "loss": 0.31974971, "memory(GiB)": 34.88, "step": 116625, "train_speed(iter/s)": 0.411613 }, { "acc": 0.94247627, "epoch": 3.1578804862859773, "grad_norm": 7.241184234619141, "learning_rate": 1.1644973500972647e-06, "loss": 0.31251805, "memory(GiB)": 34.88, "step": 116630, "train_speed(iter/s)": 0.411613 }, { "acc": 0.92963619, "epoch": 3.158015866569193, "grad_norm": 5.892845630645752, "learning_rate": 1.1641385172569598e-06, "loss": 0.40251355, "memory(GiB)": 34.88, "step": 116635, "train_speed(iter/s)": 0.411614 }, { "acc": 0.93034525, "epoch": 3.1581512468524084, "grad_norm": 9.605283737182617, "learning_rate": 1.1637797324746221e-06, "loss": 0.41293602, "memory(GiB)": 34.88, "step": 116640, "train_speed(iter/s)": 0.411615 }, { "acc": 0.95170059, "epoch": 3.158286627135624, "grad_norm": 8.22826862335205, "learning_rate": 1.1634209957547425e-06, "loss": 0.31365674, "memory(GiB)": 34.88, "step": 116645, "train_speed(iter/s)": 0.411616 }, { "acc": 0.92022562, "epoch": 3.1584220074188396, "grad_norm": 15.879738807678223, "learning_rate": 1.1630623071018207e-06, "loss": 0.4536869, "memory(GiB)": 34.88, "step": 116650, "train_speed(iter/s)": 0.411617 }, { "acc": 0.94789009, "epoch": 3.158557387702055, "grad_norm": 6.099061489105225, "learning_rate": 1.162703666520344e-06, "loss": 0.31757765, "memory(GiB)": 34.88, "step": 116655, "train_speed(iter/s)": 0.411618 }, { "acc": 0.94939213, "epoch": 3.1586927679852708, "grad_norm": 2.3508870601654053, "learning_rate": 1.16234507401481e-06, "loss": 0.31617332, "memory(GiB)": 34.88, "step": 116660, "train_speed(iter/s)": 0.411619 }, { "acc": 0.94819107, "epoch": 3.158828148268486, "grad_norm": 6.888943672180176, "learning_rate": 1.1619865295897048e-06, "loss": 0.21359937, "memory(GiB)": 34.88, "step": 116665, "train_speed(iter/s)": 0.41162 }, { "acc": 0.95277462, "epoch": 3.158963528551702, "grad_norm": 9.555177688598633, "learning_rate": 1.1616280332495268e-06, "loss": 0.32153792, "memory(GiB)": 34.88, "step": 116670, "train_speed(iter/s)": 0.411621 }, { "acc": 0.9399478, "epoch": 3.1590989088349173, "grad_norm": 12.365703582763672, "learning_rate": 1.1612695849987632e-06, "loss": 0.37581367, "memory(GiB)": 34.88, "step": 116675, "train_speed(iter/s)": 0.411622 }, { "acc": 0.96885662, "epoch": 3.1592342891181326, "grad_norm": 5.557831287384033, "learning_rate": 1.1609111848419024e-06, "loss": 0.19616321, "memory(GiB)": 34.88, "step": 116680, "train_speed(iter/s)": 0.411623 }, { "acc": 0.96136675, "epoch": 3.1593696694013484, "grad_norm": 5.316304683685303, "learning_rate": 1.1605528327834385e-06, "loss": 0.23601584, "memory(GiB)": 34.88, "step": 116685, "train_speed(iter/s)": 0.411624 }, { "acc": 0.92012424, "epoch": 3.159505049684564, "grad_norm": 9.39434814453125, "learning_rate": 1.1601945288278574e-06, "loss": 0.46537309, "memory(GiB)": 34.88, "step": 116690, "train_speed(iter/s)": 0.411625 }, { "acc": 0.94626141, "epoch": 3.1596404299677796, "grad_norm": 8.607404708862305, "learning_rate": 1.1598362729796497e-06, "loss": 0.32105246, "memory(GiB)": 34.88, "step": 116695, "train_speed(iter/s)": 0.411626 }, { "acc": 0.95433331, "epoch": 3.159775810250995, "grad_norm": 4.135648727416992, "learning_rate": 1.159478065243299e-06, "loss": 0.23353972, "memory(GiB)": 34.88, "step": 116700, "train_speed(iter/s)": 0.411626 }, { "acc": 0.95801544, "epoch": 3.1599111905342108, "grad_norm": 5.5726847648620605, "learning_rate": 1.1591199056232987e-06, "loss": 0.2585022, "memory(GiB)": 34.88, "step": 116705, "train_speed(iter/s)": 0.411627 }, { "acc": 0.92760124, "epoch": 3.160046570817426, "grad_norm": 6.08608341217041, "learning_rate": 1.1587617941241324e-06, "loss": 0.37840014, "memory(GiB)": 34.88, "step": 116710, "train_speed(iter/s)": 0.411628 }, { "acc": 0.92585564, "epoch": 3.1601819511006415, "grad_norm": 4.733512878417969, "learning_rate": 1.1584037307502831e-06, "loss": 0.46891546, "memory(GiB)": 34.88, "step": 116715, "train_speed(iter/s)": 0.411629 }, { "acc": 0.94308224, "epoch": 3.1603173313838573, "grad_norm": 8.264744758605957, "learning_rate": 1.1580457155062425e-06, "loss": 0.37338989, "memory(GiB)": 34.88, "step": 116720, "train_speed(iter/s)": 0.41163 }, { "acc": 0.93999538, "epoch": 3.1604527116670726, "grad_norm": 9.668625831604004, "learning_rate": 1.1576877483964904e-06, "loss": 0.33883004, "memory(GiB)": 34.88, "step": 116725, "train_speed(iter/s)": 0.411631 }, { "acc": 0.94355946, "epoch": 3.1605880919502884, "grad_norm": 10.352089881896973, "learning_rate": 1.1573298294255149e-06, "loss": 0.36270108, "memory(GiB)": 34.88, "step": 116730, "train_speed(iter/s)": 0.411632 }, { "acc": 0.93300772, "epoch": 3.160723472233504, "grad_norm": 10.469576835632324, "learning_rate": 1.1569719585977942e-06, "loss": 0.42015023, "memory(GiB)": 34.88, "step": 116735, "train_speed(iter/s)": 0.411633 }, { "acc": 0.95181589, "epoch": 3.1608588525167196, "grad_norm": 10.67402458190918, "learning_rate": 1.1566141359178176e-06, "loss": 0.28269734, "memory(GiB)": 34.88, "step": 116740, "train_speed(iter/s)": 0.411634 }, { "acc": 0.95416031, "epoch": 3.160994232799935, "grad_norm": 6.541703224182129, "learning_rate": 1.1562563613900632e-06, "loss": 0.26552482, "memory(GiB)": 34.88, "step": 116745, "train_speed(iter/s)": 0.411635 }, { "acc": 0.94182606, "epoch": 3.1611296130831508, "grad_norm": 9.5392484664917, "learning_rate": 1.1558986350190147e-06, "loss": 0.34561214, "memory(GiB)": 34.88, "step": 116750, "train_speed(iter/s)": 0.411636 }, { "acc": 0.94026794, "epoch": 3.161264993366366, "grad_norm": 7.402912139892578, "learning_rate": 1.1555409568091537e-06, "loss": 0.3234406, "memory(GiB)": 34.88, "step": 116755, "train_speed(iter/s)": 0.411637 }, { "acc": 0.93877201, "epoch": 3.1614003736495815, "grad_norm": 13.528666496276855, "learning_rate": 1.1551833267649593e-06, "loss": 0.42307644, "memory(GiB)": 34.88, "step": 116760, "train_speed(iter/s)": 0.411638 }, { "acc": 0.94400282, "epoch": 3.1615357539327973, "grad_norm": 9.419768333435059, "learning_rate": 1.1548257448909134e-06, "loss": 0.30348406, "memory(GiB)": 34.88, "step": 116765, "train_speed(iter/s)": 0.411639 }, { "acc": 0.94720802, "epoch": 3.1616711342160126, "grad_norm": 5.700157642364502, "learning_rate": 1.1544682111914918e-06, "loss": 0.32784863, "memory(GiB)": 34.88, "step": 116770, "train_speed(iter/s)": 0.41164 }, { "acc": 0.94312344, "epoch": 3.1618065144992284, "grad_norm": 9.58617115020752, "learning_rate": 1.154110725671178e-06, "loss": 0.35113511, "memory(GiB)": 34.88, "step": 116775, "train_speed(iter/s)": 0.41164 }, { "acc": 0.94344368, "epoch": 3.161941894782444, "grad_norm": 8.67011547088623, "learning_rate": 1.1537532883344468e-06, "loss": 0.32474132, "memory(GiB)": 34.88, "step": 116780, "train_speed(iter/s)": 0.411641 }, { "acc": 0.94175243, "epoch": 3.1620772750656596, "grad_norm": 2.412001848220825, "learning_rate": 1.1533958991857767e-06, "loss": 0.40282135, "memory(GiB)": 34.88, "step": 116785, "train_speed(iter/s)": 0.411642 }, { "acc": 0.94513721, "epoch": 3.162212655348875, "grad_norm": 7.339171409606934, "learning_rate": 1.153038558229646e-06, "loss": 0.33346064, "memory(GiB)": 34.88, "step": 116790, "train_speed(iter/s)": 0.411643 }, { "acc": 0.93546057, "epoch": 3.1623480356320908, "grad_norm": 7.161612033843994, "learning_rate": 1.1526812654705307e-06, "loss": 0.2954689, "memory(GiB)": 34.88, "step": 116795, "train_speed(iter/s)": 0.411644 }, { "acc": 0.948353, "epoch": 3.162483415915306, "grad_norm": 8.903919219970703, "learning_rate": 1.1523240209129065e-06, "loss": 0.24796336, "memory(GiB)": 34.88, "step": 116800, "train_speed(iter/s)": 0.411645 }, { "acc": 0.95959988, "epoch": 3.1626187961985215, "grad_norm": 4.118229866027832, "learning_rate": 1.151966824561245e-06, "loss": 0.26739984, "memory(GiB)": 34.88, "step": 116805, "train_speed(iter/s)": 0.411646 }, { "acc": 0.93387146, "epoch": 3.1627541764817373, "grad_norm": 7.771075248718262, "learning_rate": 1.1516096764200263e-06, "loss": 0.43566484, "memory(GiB)": 34.88, "step": 116810, "train_speed(iter/s)": 0.411647 }, { "acc": 0.93216267, "epoch": 3.1628895567649526, "grad_norm": 4.520243167877197, "learning_rate": 1.1512525764937207e-06, "loss": 0.36791043, "memory(GiB)": 34.88, "step": 116815, "train_speed(iter/s)": 0.411648 }, { "acc": 0.95143337, "epoch": 3.1630249370481685, "grad_norm": 18.437114715576172, "learning_rate": 1.1508955247868033e-06, "loss": 0.33340156, "memory(GiB)": 34.88, "step": 116820, "train_speed(iter/s)": 0.411649 }, { "acc": 0.94490709, "epoch": 3.163160317331384, "grad_norm": 6.354610443115234, "learning_rate": 1.150538521303746e-06, "loss": 0.33893013, "memory(GiB)": 34.88, "step": 116825, "train_speed(iter/s)": 0.41165 }, { "acc": 0.93279619, "epoch": 3.1632956976145996, "grad_norm": 5.287130355834961, "learning_rate": 1.1501815660490232e-06, "loss": 0.38211935, "memory(GiB)": 34.88, "step": 116830, "train_speed(iter/s)": 0.411651 }, { "acc": 0.95202847, "epoch": 3.163431077897815, "grad_norm": 8.030465126037598, "learning_rate": 1.1498246590271046e-06, "loss": 0.28548305, "memory(GiB)": 34.88, "step": 116835, "train_speed(iter/s)": 0.411651 }, { "acc": 0.93315086, "epoch": 3.1635664581810303, "grad_norm": 6.674051284790039, "learning_rate": 1.149467800242458e-06, "loss": 0.39832101, "memory(GiB)": 34.88, "step": 116840, "train_speed(iter/s)": 0.411652 }, { "acc": 0.94083977, "epoch": 3.163701838464246, "grad_norm": 11.708048820495605, "learning_rate": 1.1491109896995595e-06, "loss": 0.36288543, "memory(GiB)": 34.88, "step": 116845, "train_speed(iter/s)": 0.411653 }, { "acc": 0.96756697, "epoch": 3.1638372187474615, "grad_norm": 9.378734588623047, "learning_rate": 1.1487542274028755e-06, "loss": 0.20912132, "memory(GiB)": 34.88, "step": 116850, "train_speed(iter/s)": 0.411654 }, { "acc": 0.94840546, "epoch": 3.1639725990306773, "grad_norm": 21.656267166137695, "learning_rate": 1.1483975133568758e-06, "loss": 0.30687768, "memory(GiB)": 34.88, "step": 116855, "train_speed(iter/s)": 0.411655 }, { "acc": 0.94564819, "epoch": 3.1641079793138926, "grad_norm": 6.657719612121582, "learning_rate": 1.1480408475660297e-06, "loss": 0.3229696, "memory(GiB)": 34.88, "step": 116860, "train_speed(iter/s)": 0.411656 }, { "acc": 0.94812641, "epoch": 3.1642433595971085, "grad_norm": 5.216877460479736, "learning_rate": 1.147684230034805e-06, "loss": 0.34362431, "memory(GiB)": 34.88, "step": 116865, "train_speed(iter/s)": 0.411657 }, { "acc": 0.94247761, "epoch": 3.164378739880324, "grad_norm": 5.241128444671631, "learning_rate": 1.1473276607676678e-06, "loss": 0.39037127, "memory(GiB)": 34.88, "step": 116870, "train_speed(iter/s)": 0.411658 }, { "acc": 0.964147, "epoch": 3.164514120163539, "grad_norm": 3.809671640396118, "learning_rate": 1.1469711397690854e-06, "loss": 0.20568571, "memory(GiB)": 34.88, "step": 116875, "train_speed(iter/s)": 0.411659 }, { "acc": 0.94363928, "epoch": 3.164649500446755, "grad_norm": 10.379622459411621, "learning_rate": 1.1466146670435255e-06, "loss": 0.26993012, "memory(GiB)": 34.88, "step": 116880, "train_speed(iter/s)": 0.41166 }, { "acc": 0.94553661, "epoch": 3.1647848807299703, "grad_norm": 14.125800132751465, "learning_rate": 1.146258242595451e-06, "loss": 0.32264581, "memory(GiB)": 34.88, "step": 116885, "train_speed(iter/s)": 0.411661 }, { "acc": 0.95172644, "epoch": 3.164920261013186, "grad_norm": 3.726020574569702, "learning_rate": 1.145901866429328e-06, "loss": 0.28185644, "memory(GiB)": 34.88, "step": 116890, "train_speed(iter/s)": 0.411662 }, { "acc": 0.9413456, "epoch": 3.1650556412964015, "grad_norm": 7.4076738357543945, "learning_rate": 1.1455455385496211e-06, "loss": 0.2735436, "memory(GiB)": 34.88, "step": 116895, "train_speed(iter/s)": 0.411663 }, { "acc": 0.95835619, "epoch": 3.1651910215796173, "grad_norm": 7.155093193054199, "learning_rate": 1.1451892589607948e-06, "loss": 0.31645188, "memory(GiB)": 34.88, "step": 116900, "train_speed(iter/s)": 0.411664 }, { "acc": 0.9336153, "epoch": 3.1653264018628327, "grad_norm": 4.741881847381592, "learning_rate": 1.14483302766731e-06, "loss": 0.43979578, "memory(GiB)": 34.88, "step": 116905, "train_speed(iter/s)": 0.411665 }, { "acc": 0.94516945, "epoch": 3.1654617821460485, "grad_norm": 11.571866989135742, "learning_rate": 1.1444768446736308e-06, "loss": 0.31719866, "memory(GiB)": 34.88, "step": 116910, "train_speed(iter/s)": 0.411666 }, { "acc": 0.93837624, "epoch": 3.165597162429264, "grad_norm": 5.655649185180664, "learning_rate": 1.1441207099842182e-06, "loss": 0.39084375, "memory(GiB)": 34.88, "step": 116915, "train_speed(iter/s)": 0.411667 }, { "acc": 0.96364698, "epoch": 3.165732542712479, "grad_norm": 8.519224166870117, "learning_rate": 1.1437646236035354e-06, "loss": 0.21413648, "memory(GiB)": 34.88, "step": 116920, "train_speed(iter/s)": 0.411668 }, { "acc": 0.95320034, "epoch": 3.165867922995695, "grad_norm": 2.9449610710144043, "learning_rate": 1.1434085855360402e-06, "loss": 0.2647758, "memory(GiB)": 34.88, "step": 116925, "train_speed(iter/s)": 0.411668 }, { "acc": 0.93373823, "epoch": 3.1660033032789103, "grad_norm": 11.069287300109863, "learning_rate": 1.1430525957861944e-06, "loss": 0.325102, "memory(GiB)": 34.88, "step": 116930, "train_speed(iter/s)": 0.411669 }, { "acc": 0.93387203, "epoch": 3.166138683562126, "grad_norm": 9.878409385681152, "learning_rate": 1.1426966543584584e-06, "loss": 0.42283492, "memory(GiB)": 34.88, "step": 116935, "train_speed(iter/s)": 0.41167 }, { "acc": 0.9528429, "epoch": 3.1662740638453415, "grad_norm": 4.099017143249512, "learning_rate": 1.1423407612572888e-06, "loss": 0.2572432, "memory(GiB)": 34.88, "step": 116940, "train_speed(iter/s)": 0.411671 }, { "acc": 0.95453062, "epoch": 3.1664094441285573, "grad_norm": 6.856090545654297, "learning_rate": 1.1419849164871447e-06, "loss": 0.22158492, "memory(GiB)": 34.88, "step": 116945, "train_speed(iter/s)": 0.411672 }, { "acc": 0.95294476, "epoch": 3.1665448244117727, "grad_norm": 5.613617420196533, "learning_rate": 1.1416291200524838e-06, "loss": 0.27429943, "memory(GiB)": 34.88, "step": 116950, "train_speed(iter/s)": 0.411673 }, { "acc": 0.94865742, "epoch": 3.1666802046949885, "grad_norm": 20.546361923217773, "learning_rate": 1.1412733719577652e-06, "loss": 0.33410687, "memory(GiB)": 34.88, "step": 116955, "train_speed(iter/s)": 0.411674 }, { "acc": 0.93831997, "epoch": 3.166815584978204, "grad_norm": 7.825664520263672, "learning_rate": 1.1409176722074417e-06, "loss": 0.31367252, "memory(GiB)": 34.88, "step": 116960, "train_speed(iter/s)": 0.411675 }, { "acc": 0.95884228, "epoch": 3.166950965261419, "grad_norm": 6.282392978668213, "learning_rate": 1.1405620208059711e-06, "loss": 0.22379713, "memory(GiB)": 34.88, "step": 116965, "train_speed(iter/s)": 0.411676 }, { "acc": 0.95031338, "epoch": 3.167086345544635, "grad_norm": 10.542338371276855, "learning_rate": 1.1402064177578099e-06, "loss": 0.34953475, "memory(GiB)": 34.88, "step": 116970, "train_speed(iter/s)": 0.411677 }, { "acc": 0.93464336, "epoch": 3.1672217258278503, "grad_norm": 8.561079978942871, "learning_rate": 1.1398508630674097e-06, "loss": 0.33905108, "memory(GiB)": 34.88, "step": 116975, "train_speed(iter/s)": 0.411677 }, { "acc": 0.95432796, "epoch": 3.167357106111066, "grad_norm": 12.399690628051758, "learning_rate": 1.139495356739226e-06, "loss": 0.25117788, "memory(GiB)": 34.88, "step": 116980, "train_speed(iter/s)": 0.411678 }, { "acc": 0.93990803, "epoch": 3.1674924863942815, "grad_norm": 7.458199977874756, "learning_rate": 1.1391398987777123e-06, "loss": 0.32127428, "memory(GiB)": 34.88, "step": 116985, "train_speed(iter/s)": 0.411679 }, { "acc": 0.95286942, "epoch": 3.1676278666774973, "grad_norm": 4.350050926208496, "learning_rate": 1.1387844891873228e-06, "loss": 0.25918055, "memory(GiB)": 34.88, "step": 116990, "train_speed(iter/s)": 0.41168 }, { "acc": 0.9562727, "epoch": 3.1677632469607127, "grad_norm": 6.206615924835205, "learning_rate": 1.1384291279725076e-06, "loss": 0.2835535, "memory(GiB)": 34.88, "step": 116995, "train_speed(iter/s)": 0.411681 }, { "acc": 0.95264854, "epoch": 3.167898627243928, "grad_norm": 3.8201282024383545, "learning_rate": 1.1380738151377186e-06, "loss": 0.25625193, "memory(GiB)": 34.88, "step": 117000, "train_speed(iter/s)": 0.411682 }, { "acc": 0.95013771, "epoch": 3.168034007527144, "grad_norm": 7.135578632354736, "learning_rate": 1.1377185506874084e-06, "loss": 0.33858347, "memory(GiB)": 34.88, "step": 117005, "train_speed(iter/s)": 0.411683 }, { "acc": 0.94732256, "epoch": 3.168169387810359, "grad_norm": 9.355311393737793, "learning_rate": 1.1373633346260248e-06, "loss": 0.36266196, "memory(GiB)": 34.88, "step": 117010, "train_speed(iter/s)": 0.411684 }, { "acc": 0.94050941, "epoch": 3.168304768093575, "grad_norm": 5.154936790466309, "learning_rate": 1.1370081669580195e-06, "loss": 0.35777793, "memory(GiB)": 34.88, "step": 117015, "train_speed(iter/s)": 0.411685 }, { "acc": 0.93615208, "epoch": 3.1684401483767903, "grad_norm": 8.090279579162598, "learning_rate": 1.1366530476878406e-06, "loss": 0.39957626, "memory(GiB)": 34.88, "step": 117020, "train_speed(iter/s)": 0.411686 }, { "acc": 0.95013294, "epoch": 3.168575528660006, "grad_norm": 6.7895941734313965, "learning_rate": 1.136297976819939e-06, "loss": 0.24027102, "memory(GiB)": 34.88, "step": 117025, "train_speed(iter/s)": 0.411686 }, { "acc": 0.94133902, "epoch": 3.1687109089432215, "grad_norm": 19.309307098388672, "learning_rate": 1.1359429543587595e-06, "loss": 0.42154436, "memory(GiB)": 34.88, "step": 117030, "train_speed(iter/s)": 0.411687 }, { "acc": 0.94648399, "epoch": 3.168846289226437, "grad_norm": 5.63029670715332, "learning_rate": 1.1355879803087503e-06, "loss": 0.34074972, "memory(GiB)": 34.88, "step": 117035, "train_speed(iter/s)": 0.411688 }, { "acc": 0.94550533, "epoch": 3.1689816695096527, "grad_norm": 4.168134689331055, "learning_rate": 1.1352330546743591e-06, "loss": 0.31766233, "memory(GiB)": 34.88, "step": 117040, "train_speed(iter/s)": 0.411689 }, { "acc": 0.94392214, "epoch": 3.169117049792868, "grad_norm": 6.223016262054443, "learning_rate": 1.1348781774600334e-06, "loss": 0.34890375, "memory(GiB)": 34.88, "step": 117045, "train_speed(iter/s)": 0.41169 }, { "acc": 0.96140461, "epoch": 3.169252430076084, "grad_norm": 3.4105682373046875, "learning_rate": 1.1345233486702147e-06, "loss": 0.21182745, "memory(GiB)": 34.88, "step": 117050, "train_speed(iter/s)": 0.411691 }, { "acc": 0.94416351, "epoch": 3.169387810359299, "grad_norm": 5.811272144317627, "learning_rate": 1.1341685683093508e-06, "loss": 0.3626986, "memory(GiB)": 34.88, "step": 117055, "train_speed(iter/s)": 0.411692 }, { "acc": 0.93276672, "epoch": 3.169523190642515, "grad_norm": 10.14543342590332, "learning_rate": 1.1338138363818868e-06, "loss": 0.38788781, "memory(GiB)": 34.88, "step": 117060, "train_speed(iter/s)": 0.411693 }, { "acc": 0.95520458, "epoch": 3.1696585709257303, "grad_norm": 4.832751750946045, "learning_rate": 1.1334591528922631e-06, "loss": 0.25504727, "memory(GiB)": 34.88, "step": 117065, "train_speed(iter/s)": 0.411694 }, { "acc": 0.94887314, "epoch": 3.169793951208946, "grad_norm": 12.743269920349121, "learning_rate": 1.1331045178449253e-06, "loss": 0.32666249, "memory(GiB)": 34.88, "step": 117070, "train_speed(iter/s)": 0.411695 }, { "acc": 0.94040232, "epoch": 3.1699293314921615, "grad_norm": 8.537797927856445, "learning_rate": 1.1327499312443152e-06, "loss": 0.30614138, "memory(GiB)": 34.88, "step": 117075, "train_speed(iter/s)": 0.411695 }, { "acc": 0.93543968, "epoch": 3.170064711775377, "grad_norm": 17.356287002563477, "learning_rate": 1.1323953930948764e-06, "loss": 0.42087412, "memory(GiB)": 34.88, "step": 117080, "train_speed(iter/s)": 0.411696 }, { "acc": 0.94254189, "epoch": 3.1702000920585927, "grad_norm": 8.350663185119629, "learning_rate": 1.1320409034010455e-06, "loss": 0.29831638, "memory(GiB)": 34.88, "step": 117085, "train_speed(iter/s)": 0.411697 }, { "acc": 0.94897423, "epoch": 3.170335472341808, "grad_norm": 6.648813724517822, "learning_rate": 1.131686462167269e-06, "loss": 0.29639742, "memory(GiB)": 34.88, "step": 117090, "train_speed(iter/s)": 0.411698 }, { "acc": 0.9520669, "epoch": 3.170470852625024, "grad_norm": 9.526266098022461, "learning_rate": 1.1313320693979844e-06, "loss": 0.30527084, "memory(GiB)": 34.88, "step": 117095, "train_speed(iter/s)": 0.411699 }, { "acc": 0.94846096, "epoch": 3.170606232908239, "grad_norm": 8.12003231048584, "learning_rate": 1.1309777250976295e-06, "loss": 0.35588963, "memory(GiB)": 34.88, "step": 117100, "train_speed(iter/s)": 0.4117 }, { "acc": 0.94129019, "epoch": 3.170741613191455, "grad_norm": 7.294321060180664, "learning_rate": 1.1306234292706448e-06, "loss": 0.36742916, "memory(GiB)": 34.88, "step": 117105, "train_speed(iter/s)": 0.411701 }, { "acc": 0.94504652, "epoch": 3.1708769934746703, "grad_norm": 6.148138046264648, "learning_rate": 1.1302691819214686e-06, "loss": 0.30594559, "memory(GiB)": 34.88, "step": 117110, "train_speed(iter/s)": 0.411702 }, { "acc": 0.94728098, "epoch": 3.1710123737578857, "grad_norm": 6.866785526275635, "learning_rate": 1.1299149830545398e-06, "loss": 0.28210988, "memory(GiB)": 34.88, "step": 117115, "train_speed(iter/s)": 0.411703 }, { "acc": 0.93824158, "epoch": 3.1711477540411015, "grad_norm": 2.736887216567993, "learning_rate": 1.129560832674291e-06, "loss": 0.39120817, "memory(GiB)": 34.88, "step": 117120, "train_speed(iter/s)": 0.411704 }, { "acc": 0.94301329, "epoch": 3.171283134324317, "grad_norm": 4.956617832183838, "learning_rate": 1.1292067307851648e-06, "loss": 0.38500905, "memory(GiB)": 34.88, "step": 117125, "train_speed(iter/s)": 0.411705 }, { "acc": 0.93978539, "epoch": 3.1714185146075327, "grad_norm": 10.406518936157227, "learning_rate": 1.1288526773915934e-06, "loss": 0.33829002, "memory(GiB)": 34.88, "step": 117130, "train_speed(iter/s)": 0.411706 }, { "acc": 0.94056091, "epoch": 3.171553894890748, "grad_norm": 5.213208198547363, "learning_rate": 1.1284986724980117e-06, "loss": 0.34097004, "memory(GiB)": 34.88, "step": 117135, "train_speed(iter/s)": 0.411707 }, { "acc": 0.94068403, "epoch": 3.171689275173964, "grad_norm": 5.815193176269531, "learning_rate": 1.1281447161088544e-06, "loss": 0.3782182, "memory(GiB)": 34.88, "step": 117140, "train_speed(iter/s)": 0.411708 }, { "acc": 0.94394684, "epoch": 3.171824655457179, "grad_norm": 13.613211631774902, "learning_rate": 1.1277908082285565e-06, "loss": 0.29546576, "memory(GiB)": 34.88, "step": 117145, "train_speed(iter/s)": 0.411708 }, { "acc": 0.95391932, "epoch": 3.171960035740395, "grad_norm": 8.51413631439209, "learning_rate": 1.1274369488615526e-06, "loss": 0.25798812, "memory(GiB)": 34.88, "step": 117150, "train_speed(iter/s)": 0.411709 }, { "acc": 0.9299346, "epoch": 3.1720954160236103, "grad_norm": 9.55956745147705, "learning_rate": 1.1270831380122708e-06, "loss": 0.39829466, "memory(GiB)": 34.88, "step": 117155, "train_speed(iter/s)": 0.41171 }, { "acc": 0.94590855, "epoch": 3.1722307963068257, "grad_norm": 7.363542556762695, "learning_rate": 1.1267293756851496e-06, "loss": 0.33102055, "memory(GiB)": 34.88, "step": 117160, "train_speed(iter/s)": 0.411711 }, { "acc": 0.93920078, "epoch": 3.1723661765900415, "grad_norm": 14.040224075317383, "learning_rate": 1.1263756618846158e-06, "loss": 0.38276119, "memory(GiB)": 34.88, "step": 117165, "train_speed(iter/s)": 0.411712 }, { "acc": 0.95142803, "epoch": 3.172501556873257, "grad_norm": 2.7455992698669434, "learning_rate": 1.1260219966151037e-06, "loss": 0.255018, "memory(GiB)": 34.88, "step": 117170, "train_speed(iter/s)": 0.411713 }, { "acc": 0.96108179, "epoch": 3.1726369371564727, "grad_norm": 8.981283187866211, "learning_rate": 1.12566837988104e-06, "loss": 0.26488242, "memory(GiB)": 34.88, "step": 117175, "train_speed(iter/s)": 0.411714 }, { "acc": 0.92459707, "epoch": 3.172772317439688, "grad_norm": 5.890214920043945, "learning_rate": 1.1253148116868568e-06, "loss": 0.41707292, "memory(GiB)": 34.88, "step": 117180, "train_speed(iter/s)": 0.411715 }, { "acc": 0.92976084, "epoch": 3.172907697722904, "grad_norm": 6.492290019989014, "learning_rate": 1.1249612920369844e-06, "loss": 0.45022731, "memory(GiB)": 34.88, "step": 117185, "train_speed(iter/s)": 0.411715 }, { "acc": 0.96002941, "epoch": 3.173043078006119, "grad_norm": 7.771445274353027, "learning_rate": 1.1246078209358477e-06, "loss": 0.26328518, "memory(GiB)": 34.88, "step": 117190, "train_speed(iter/s)": 0.411716 }, { "acc": 0.94590855, "epoch": 3.1731784582893345, "grad_norm": 16.136550903320312, "learning_rate": 1.1242543983878775e-06, "loss": 0.25183563, "memory(GiB)": 34.88, "step": 117195, "train_speed(iter/s)": 0.411717 }, { "acc": 0.94342861, "epoch": 3.1733138385725503, "grad_norm": 19.365859985351562, "learning_rate": 1.1239010243974995e-06, "loss": 0.38467374, "memory(GiB)": 34.88, "step": 117200, "train_speed(iter/s)": 0.411718 }, { "acc": 0.93407135, "epoch": 3.1734492188557657, "grad_norm": 8.337824821472168, "learning_rate": 1.123547698969143e-06, "loss": 0.4182416, "memory(GiB)": 34.88, "step": 117205, "train_speed(iter/s)": 0.411719 }, { "acc": 0.93679276, "epoch": 3.1735845991389815, "grad_norm": 5.133426189422607, "learning_rate": 1.123194422107229e-06, "loss": 0.40050993, "memory(GiB)": 34.88, "step": 117210, "train_speed(iter/s)": 0.41172 }, { "acc": 0.95285883, "epoch": 3.173719979422197, "grad_norm": 4.53798770904541, "learning_rate": 1.1228411938161894e-06, "loss": 0.25096111, "memory(GiB)": 34.88, "step": 117215, "train_speed(iter/s)": 0.411721 }, { "acc": 0.94470978, "epoch": 3.1738553597054127, "grad_norm": 3.4496307373046875, "learning_rate": 1.1224880141004462e-06, "loss": 0.24357121, "memory(GiB)": 34.88, "step": 117220, "train_speed(iter/s)": 0.411722 }, { "acc": 0.94067993, "epoch": 3.173990739988628, "grad_norm": 7.943231582641602, "learning_rate": 1.1221348829644217e-06, "loss": 0.34683716, "memory(GiB)": 34.88, "step": 117225, "train_speed(iter/s)": 0.411723 }, { "acc": 0.94646244, "epoch": 3.1741261202718434, "grad_norm": 6.311285972595215, "learning_rate": 1.1217818004125413e-06, "loss": 0.30638571, "memory(GiB)": 34.88, "step": 117230, "train_speed(iter/s)": 0.411724 }, { "acc": 0.94813662, "epoch": 3.174261500555059, "grad_norm": 6.043930530548096, "learning_rate": 1.1214287664492274e-06, "loss": 0.33418458, "memory(GiB)": 34.88, "step": 117235, "train_speed(iter/s)": 0.411725 }, { "acc": 0.92448893, "epoch": 3.1743968808382745, "grad_norm": 10.787113189697266, "learning_rate": 1.1210757810789052e-06, "loss": 0.44330187, "memory(GiB)": 34.88, "step": 117240, "train_speed(iter/s)": 0.411725 }, { "acc": 0.93181896, "epoch": 3.1745322611214903, "grad_norm": 9.775163650512695, "learning_rate": 1.1207228443059917e-06, "loss": 0.37172475, "memory(GiB)": 34.88, "step": 117245, "train_speed(iter/s)": 0.411726 }, { "acc": 0.94121685, "epoch": 3.1746676414047057, "grad_norm": 7.568111419677734, "learning_rate": 1.1203699561349133e-06, "loss": 0.3041873, "memory(GiB)": 34.88, "step": 117250, "train_speed(iter/s)": 0.411727 }, { "acc": 0.94469738, "epoch": 3.1748030216879215, "grad_norm": 7.705620765686035, "learning_rate": 1.1200171165700884e-06, "loss": 0.30434012, "memory(GiB)": 34.88, "step": 117255, "train_speed(iter/s)": 0.411728 }, { "acc": 0.93892479, "epoch": 3.174938401971137, "grad_norm": 12.78408432006836, "learning_rate": 1.1196643256159352e-06, "loss": 0.39233961, "memory(GiB)": 34.88, "step": 117260, "train_speed(iter/s)": 0.411729 }, { "acc": 0.94810772, "epoch": 3.1750737822543527, "grad_norm": 6.683556079864502, "learning_rate": 1.1193115832768745e-06, "loss": 0.34272702, "memory(GiB)": 34.88, "step": 117265, "train_speed(iter/s)": 0.41173 }, { "acc": 0.9306181, "epoch": 3.175209162537568, "grad_norm": 15.270938873291016, "learning_rate": 1.118958889557325e-06, "loss": 0.43873386, "memory(GiB)": 34.88, "step": 117270, "train_speed(iter/s)": 0.411731 }, { "acc": 0.94776859, "epoch": 3.1753445428207834, "grad_norm": 5.600725173950195, "learning_rate": 1.118606244461707e-06, "loss": 0.37757874, "memory(GiB)": 34.88, "step": 117275, "train_speed(iter/s)": 0.411732 }, { "acc": 0.9492363, "epoch": 3.175479923103999, "grad_norm": 5.468749523162842, "learning_rate": 1.1182536479944337e-06, "loss": 0.32901461, "memory(GiB)": 34.88, "step": 117280, "train_speed(iter/s)": 0.411733 }, { "acc": 0.93703089, "epoch": 3.1756153033872145, "grad_norm": 7.731664180755615, "learning_rate": 1.1179011001599267e-06, "loss": 0.3731426, "memory(GiB)": 34.88, "step": 117285, "train_speed(iter/s)": 0.411734 }, { "acc": 0.93644772, "epoch": 3.1757506836704303, "grad_norm": 7.413939952850342, "learning_rate": 1.117548600962599e-06, "loss": 0.33074207, "memory(GiB)": 34.88, "step": 117290, "train_speed(iter/s)": 0.411735 }, { "acc": 0.94291744, "epoch": 3.1758860639536457, "grad_norm": 8.550132751464844, "learning_rate": 1.1171961504068692e-06, "loss": 0.30976381, "memory(GiB)": 34.88, "step": 117295, "train_speed(iter/s)": 0.411736 }, { "acc": 0.94035645, "epoch": 3.1760214442368615, "grad_norm": 7.993850231170654, "learning_rate": 1.1168437484971488e-06, "loss": 0.35061374, "memory(GiB)": 34.88, "step": 117300, "train_speed(iter/s)": 0.411737 }, { "acc": 0.9469676, "epoch": 3.176156824520077, "grad_norm": 5.853277206420898, "learning_rate": 1.1164913952378547e-06, "loss": 0.27739983, "memory(GiB)": 34.88, "step": 117305, "train_speed(iter/s)": 0.411738 }, { "acc": 0.94776955, "epoch": 3.1762922048032927, "grad_norm": 8.027223587036133, "learning_rate": 1.116139090633402e-06, "loss": 0.30332863, "memory(GiB)": 34.88, "step": 117310, "train_speed(iter/s)": 0.411739 }, { "acc": 0.94531994, "epoch": 3.176427585086508, "grad_norm": 15.90789794921875, "learning_rate": 1.1157868346881993e-06, "loss": 0.29387007, "memory(GiB)": 34.88, "step": 117315, "train_speed(iter/s)": 0.41174 }, { "acc": 0.94996414, "epoch": 3.1765629653697234, "grad_norm": 7.3151984214782715, "learning_rate": 1.1154346274066653e-06, "loss": 0.29936457, "memory(GiB)": 34.88, "step": 117320, "train_speed(iter/s)": 0.411741 }, { "acc": 0.9351222, "epoch": 3.176698345652939, "grad_norm": 7.617043495178223, "learning_rate": 1.115082468793208e-06, "loss": 0.34838519, "memory(GiB)": 34.88, "step": 117325, "train_speed(iter/s)": 0.411742 }, { "acc": 0.94843941, "epoch": 3.1768337259361545, "grad_norm": 3.080706834793091, "learning_rate": 1.1147303588522415e-06, "loss": 0.31879218, "memory(GiB)": 34.88, "step": 117330, "train_speed(iter/s)": 0.411743 }, { "acc": 0.9436676, "epoch": 3.1769691062193703, "grad_norm": 5.170910358428955, "learning_rate": 1.1143782975881725e-06, "loss": 0.39410884, "memory(GiB)": 34.88, "step": 117335, "train_speed(iter/s)": 0.411744 }, { "acc": 0.94571047, "epoch": 3.1771044865025857, "grad_norm": 8.058496475219727, "learning_rate": 1.1140262850054172e-06, "loss": 0.3225714, "memory(GiB)": 34.88, "step": 117340, "train_speed(iter/s)": 0.411745 }, { "acc": 0.93570271, "epoch": 3.1772398667858015, "grad_norm": 18.620031356811523, "learning_rate": 1.1136743211083816e-06, "loss": 0.41353259, "memory(GiB)": 34.88, "step": 117345, "train_speed(iter/s)": 0.411746 }, { "acc": 0.95260601, "epoch": 3.177375247069017, "grad_norm": 6.220515251159668, "learning_rate": 1.1133224059014722e-06, "loss": 0.27866955, "memory(GiB)": 34.88, "step": 117350, "train_speed(iter/s)": 0.411746 }, { "acc": 0.93076286, "epoch": 3.177510627352232, "grad_norm": 10.069829940795898, "learning_rate": 1.1129705393891038e-06, "loss": 0.43062701, "memory(GiB)": 34.88, "step": 117355, "train_speed(iter/s)": 0.411747 }, { "acc": 0.93296909, "epoch": 3.177646007635448, "grad_norm": 7.896615982055664, "learning_rate": 1.1126187215756792e-06, "loss": 0.40800047, "memory(GiB)": 34.88, "step": 117360, "train_speed(iter/s)": 0.411748 }, { "acc": 0.9279355, "epoch": 3.1777813879186634, "grad_norm": 19.871034622192383, "learning_rate": 1.1122669524656088e-06, "loss": 0.46986847, "memory(GiB)": 34.88, "step": 117365, "train_speed(iter/s)": 0.411749 }, { "acc": 0.95409756, "epoch": 3.177916768201879, "grad_norm": 5.290027618408203, "learning_rate": 1.1119152320632946e-06, "loss": 0.29918268, "memory(GiB)": 34.88, "step": 117370, "train_speed(iter/s)": 0.41175 }, { "acc": 0.9344945, "epoch": 3.1780521484850945, "grad_norm": 11.365028381347656, "learning_rate": 1.1115635603731492e-06, "loss": 0.35421977, "memory(GiB)": 34.88, "step": 117375, "train_speed(iter/s)": 0.411751 }, { "acc": 0.93937874, "epoch": 3.1781875287683103, "grad_norm": 5.211996555328369, "learning_rate": 1.1112119373995737e-06, "loss": 0.31379409, "memory(GiB)": 34.88, "step": 117380, "train_speed(iter/s)": 0.411752 }, { "acc": 0.93821058, "epoch": 3.1783229090515257, "grad_norm": 10.394207954406738, "learning_rate": 1.110860363146971e-06, "loss": 0.36212714, "memory(GiB)": 34.88, "step": 117385, "train_speed(iter/s)": 0.411753 }, { "acc": 0.93756027, "epoch": 3.178458289334741, "grad_norm": 24.938617706298828, "learning_rate": 1.1105088376197505e-06, "loss": 0.45298738, "memory(GiB)": 34.88, "step": 117390, "train_speed(iter/s)": 0.411754 }, { "acc": 0.93583183, "epoch": 3.178593669617957, "grad_norm": 5.268261909484863, "learning_rate": 1.1101573608223117e-06, "loss": 0.34072969, "memory(GiB)": 34.88, "step": 117395, "train_speed(iter/s)": 0.411755 }, { "acc": 0.95114956, "epoch": 3.178729049901172, "grad_norm": 2.397049903869629, "learning_rate": 1.1098059327590608e-06, "loss": 0.24533262, "memory(GiB)": 34.88, "step": 117400, "train_speed(iter/s)": 0.411756 }, { "acc": 0.95482559, "epoch": 3.178864430184388, "grad_norm": 6.052910327911377, "learning_rate": 1.1094545534343953e-06, "loss": 0.24879718, "memory(GiB)": 34.88, "step": 117405, "train_speed(iter/s)": 0.411756 }, { "acc": 0.93298779, "epoch": 3.1789998104676034, "grad_norm": 8.996484756469727, "learning_rate": 1.109103222852723e-06, "loss": 0.36432815, "memory(GiB)": 34.88, "step": 117410, "train_speed(iter/s)": 0.411757 }, { "acc": 0.94727163, "epoch": 3.179135190750819, "grad_norm": 4.328580856323242, "learning_rate": 1.10875194101844e-06, "loss": 0.34792869, "memory(GiB)": 34.88, "step": 117415, "train_speed(iter/s)": 0.411758 }, { "acc": 0.94114037, "epoch": 3.1792705710340345, "grad_norm": 7.0515828132629395, "learning_rate": 1.1084007079359494e-06, "loss": 0.34329696, "memory(GiB)": 34.88, "step": 117420, "train_speed(iter/s)": 0.411759 }, { "acc": 0.94246521, "epoch": 3.1794059513172503, "grad_norm": 6.795634746551514, "learning_rate": 1.1080495236096513e-06, "loss": 0.33016725, "memory(GiB)": 34.88, "step": 117425, "train_speed(iter/s)": 0.41176 }, { "acc": 0.94629917, "epoch": 3.1795413316004657, "grad_norm": 7.010820388793945, "learning_rate": 1.1076983880439427e-06, "loss": 0.3182847, "memory(GiB)": 34.88, "step": 117430, "train_speed(iter/s)": 0.411761 }, { "acc": 0.94586983, "epoch": 3.179676711883681, "grad_norm": 9.59533977508545, "learning_rate": 1.1073473012432246e-06, "loss": 0.33103075, "memory(GiB)": 34.88, "step": 117435, "train_speed(iter/s)": 0.411762 }, { "acc": 0.94490919, "epoch": 3.179812092166897, "grad_norm": 3.8854873180389404, "learning_rate": 1.1069962632118921e-06, "loss": 0.31386738, "memory(GiB)": 34.88, "step": 117440, "train_speed(iter/s)": 0.411763 }, { "acc": 0.94688835, "epoch": 3.1799474724501122, "grad_norm": 0.9978991150856018, "learning_rate": 1.1066452739543467e-06, "loss": 0.316079, "memory(GiB)": 34.88, "step": 117445, "train_speed(iter/s)": 0.411764 }, { "acc": 0.95121403, "epoch": 3.180082852733328, "grad_norm": 8.918558120727539, "learning_rate": 1.1062943334749824e-06, "loss": 0.31248677, "memory(GiB)": 34.88, "step": 117450, "train_speed(iter/s)": 0.411765 }, { "acc": 0.9535244, "epoch": 3.1802182330165434, "grad_norm": 8.735486030578613, "learning_rate": 1.105943441778196e-06, "loss": 0.32937331, "memory(GiB)": 34.88, "step": 117455, "train_speed(iter/s)": 0.411766 }, { "acc": 0.93145618, "epoch": 3.180353613299759, "grad_norm": 8.390303611755371, "learning_rate": 1.1055925988683827e-06, "loss": 0.44043894, "memory(GiB)": 34.88, "step": 117460, "train_speed(iter/s)": 0.411767 }, { "acc": 0.95836544, "epoch": 3.1804889935829745, "grad_norm": 6.1763916015625, "learning_rate": 1.1052418047499401e-06, "loss": 0.20661826, "memory(GiB)": 34.88, "step": 117465, "train_speed(iter/s)": 0.411767 }, { "acc": 0.93488474, "epoch": 3.1806243738661903, "grad_norm": 6.615274429321289, "learning_rate": 1.1048910594272606e-06, "loss": 0.38426373, "memory(GiB)": 34.88, "step": 117470, "train_speed(iter/s)": 0.411768 }, { "acc": 0.94738007, "epoch": 3.1807597541494057, "grad_norm": 5.71644401550293, "learning_rate": 1.104540362904735e-06, "loss": 0.38765178, "memory(GiB)": 34.88, "step": 117475, "train_speed(iter/s)": 0.411769 }, { "acc": 0.95019569, "epoch": 3.180895134432621, "grad_norm": 7.675408363342285, "learning_rate": 1.104189715186762e-06, "loss": 0.29025462, "memory(GiB)": 34.88, "step": 117480, "train_speed(iter/s)": 0.41177 }, { "acc": 0.93729935, "epoch": 3.181030514715837, "grad_norm": 6.6147284507751465, "learning_rate": 1.1038391162777302e-06, "loss": 0.37942798, "memory(GiB)": 34.88, "step": 117485, "train_speed(iter/s)": 0.411771 }, { "acc": 0.94603357, "epoch": 3.1811658949990522, "grad_norm": 6.014730453491211, "learning_rate": 1.1034885661820328e-06, "loss": 0.37549992, "memory(GiB)": 34.88, "step": 117490, "train_speed(iter/s)": 0.411772 }, { "acc": 0.94165211, "epoch": 3.181301275282268, "grad_norm": 4.924221515655518, "learning_rate": 1.1031380649040616e-06, "loss": 0.32107911, "memory(GiB)": 34.88, "step": 117495, "train_speed(iter/s)": 0.411773 }, { "acc": 0.94795094, "epoch": 3.1814366555654834, "grad_norm": 5.0113935470581055, "learning_rate": 1.1027876124482081e-06, "loss": 0.29493811, "memory(GiB)": 34.88, "step": 117500, "train_speed(iter/s)": 0.411774 }, { "acc": 0.93184834, "epoch": 3.181572035848699, "grad_norm": 7.614994525909424, "learning_rate": 1.1024372088188599e-06, "loss": 0.44528952, "memory(GiB)": 34.88, "step": 117505, "train_speed(iter/s)": 0.411775 }, { "acc": 0.93293705, "epoch": 3.1817074161319145, "grad_norm": 4.533740043640137, "learning_rate": 1.102086854020408e-06, "loss": 0.3707994, "memory(GiB)": 34.88, "step": 117510, "train_speed(iter/s)": 0.411776 }, { "acc": 0.9358139, "epoch": 3.18184279641513, "grad_norm": 3.7259345054626465, "learning_rate": 1.101736548057242e-06, "loss": 0.35050845, "memory(GiB)": 34.88, "step": 117515, "train_speed(iter/s)": 0.411777 }, { "acc": 0.94435587, "epoch": 3.1819781766983457, "grad_norm": 4.651581287384033, "learning_rate": 1.1013862909337486e-06, "loss": 0.3435029, "memory(GiB)": 34.88, "step": 117520, "train_speed(iter/s)": 0.411778 }, { "acc": 0.94242001, "epoch": 3.182113556981561, "grad_norm": 7.663272380828857, "learning_rate": 1.1010360826543157e-06, "loss": 0.34833777, "memory(GiB)": 34.88, "step": 117525, "train_speed(iter/s)": 0.411779 }, { "acc": 0.95631266, "epoch": 3.182248937264777, "grad_norm": 6.9565629959106445, "learning_rate": 1.100685923223331e-06, "loss": 0.24093246, "memory(GiB)": 34.88, "step": 117530, "train_speed(iter/s)": 0.41178 }, { "acc": 0.94844198, "epoch": 3.1823843175479922, "grad_norm": 3.834592580795288, "learning_rate": 1.1003358126451818e-06, "loss": 0.27642138, "memory(GiB)": 34.88, "step": 117535, "train_speed(iter/s)": 0.411781 }, { "acc": 0.951548, "epoch": 3.182519697831208, "grad_norm": 7.889179706573486, "learning_rate": 1.0999857509242519e-06, "loss": 0.32809563, "memory(GiB)": 34.88, "step": 117540, "train_speed(iter/s)": 0.411782 }, { "acc": 0.94867601, "epoch": 3.1826550781144234, "grad_norm": 4.771352291107178, "learning_rate": 1.099635738064927e-06, "loss": 0.26824198, "memory(GiB)": 34.88, "step": 117545, "train_speed(iter/s)": 0.411782 }, { "acc": 0.94121351, "epoch": 3.1827904583976387, "grad_norm": 5.901045322418213, "learning_rate": 1.0992857740715938e-06, "loss": 0.3896848, "memory(GiB)": 34.88, "step": 117550, "train_speed(iter/s)": 0.411783 }, { "acc": 0.94223385, "epoch": 3.1829258386808545, "grad_norm": 6.709790229797363, "learning_rate": 1.0989358589486335e-06, "loss": 0.2845856, "memory(GiB)": 34.88, "step": 117555, "train_speed(iter/s)": 0.411784 }, { "acc": 0.93705578, "epoch": 3.18306121896407, "grad_norm": 10.820374488830566, "learning_rate": 1.0985859927004316e-06, "loss": 0.32069213, "memory(GiB)": 34.88, "step": 117560, "train_speed(iter/s)": 0.411785 }, { "acc": 0.95026579, "epoch": 3.1831965992472857, "grad_norm": 4.51210880279541, "learning_rate": 1.098236175331367e-06, "loss": 0.26168499, "memory(GiB)": 34.88, "step": 117565, "train_speed(iter/s)": 0.411786 }, { "acc": 0.94943218, "epoch": 3.183331979530501, "grad_norm": 11.23669719696045, "learning_rate": 1.0978864068458282e-06, "loss": 0.31253543, "memory(GiB)": 34.88, "step": 117570, "train_speed(iter/s)": 0.411787 }, { "acc": 0.94465151, "epoch": 3.183467359813717, "grad_norm": 5.01608419418335, "learning_rate": 1.0975366872481915e-06, "loss": 0.26415315, "memory(GiB)": 34.88, "step": 117575, "train_speed(iter/s)": 0.411788 }, { "acc": 0.93277168, "epoch": 3.1836027400969322, "grad_norm": 8.691905975341797, "learning_rate": 1.0971870165428397e-06, "loss": 0.4049583, "memory(GiB)": 34.88, "step": 117580, "train_speed(iter/s)": 0.411789 }, { "acc": 0.92635136, "epoch": 3.183738120380148, "grad_norm": 13.881617546081543, "learning_rate": 1.0968373947341526e-06, "loss": 0.40558729, "memory(GiB)": 34.88, "step": 117585, "train_speed(iter/s)": 0.41179 }, { "acc": 0.9466711, "epoch": 3.1838735006633634, "grad_norm": 7.686915397644043, "learning_rate": 1.0964878218265123e-06, "loss": 0.32458763, "memory(GiB)": 34.88, "step": 117590, "train_speed(iter/s)": 0.411791 }, { "acc": 0.94846115, "epoch": 3.1840088809465787, "grad_norm": 5.813509941101074, "learning_rate": 1.0961382978242955e-06, "loss": 0.32873149, "memory(GiB)": 34.88, "step": 117595, "train_speed(iter/s)": 0.411791 }, { "acc": 0.95917568, "epoch": 3.1841442612297945, "grad_norm": 5.02349328994751, "learning_rate": 1.0957888227318782e-06, "loss": 0.31069531, "memory(GiB)": 34.88, "step": 117600, "train_speed(iter/s)": 0.411792 }, { "acc": 0.93869963, "epoch": 3.18427964151301, "grad_norm": 16.515792846679688, "learning_rate": 1.0954393965536443e-06, "loss": 0.36733923, "memory(GiB)": 34.88, "step": 117605, "train_speed(iter/s)": 0.411793 }, { "acc": 0.94621716, "epoch": 3.1844150217962257, "grad_norm": 4.562386512756348, "learning_rate": 1.0950900192939664e-06, "loss": 0.28594422, "memory(GiB)": 34.88, "step": 117610, "train_speed(iter/s)": 0.411794 }, { "acc": 0.94017754, "epoch": 3.184550402079441, "grad_norm": 6.014512062072754, "learning_rate": 1.0947406909572227e-06, "loss": 0.34053268, "memory(GiB)": 34.88, "step": 117615, "train_speed(iter/s)": 0.411795 }, { "acc": 0.93707104, "epoch": 3.184685782362657, "grad_norm": 4.980857849121094, "learning_rate": 1.0943914115477888e-06, "loss": 0.39385698, "memory(GiB)": 34.88, "step": 117620, "train_speed(iter/s)": 0.411796 }, { "acc": 0.94102345, "epoch": 3.1848211626458722, "grad_norm": 9.103116035461426, "learning_rate": 1.0940421810700418e-06, "loss": 0.38665521, "memory(GiB)": 34.88, "step": 117625, "train_speed(iter/s)": 0.411797 }, { "acc": 0.95078287, "epoch": 3.184956542929088, "grad_norm": 3.6895956993103027, "learning_rate": 1.0936929995283543e-06, "loss": 0.29648151, "memory(GiB)": 34.88, "step": 117630, "train_speed(iter/s)": 0.411798 }, { "acc": 0.92352982, "epoch": 3.1850919232123034, "grad_norm": 12.964385986328125, "learning_rate": 1.093343866927101e-06, "loss": 0.43065481, "memory(GiB)": 34.88, "step": 117635, "train_speed(iter/s)": 0.411799 }, { "acc": 0.95044575, "epoch": 3.1852273034955187, "grad_norm": 8.71643352508545, "learning_rate": 1.0929947832706569e-06, "loss": 0.27908897, "memory(GiB)": 34.88, "step": 117640, "train_speed(iter/s)": 0.4118 }, { "acc": 0.93738308, "epoch": 3.1853626837787345, "grad_norm": 4.6204423904418945, "learning_rate": 1.0926457485633928e-06, "loss": 0.34959469, "memory(GiB)": 34.88, "step": 117645, "train_speed(iter/s)": 0.411801 }, { "acc": 0.94465094, "epoch": 3.18549806406195, "grad_norm": 13.534997940063477, "learning_rate": 1.0922967628096824e-06, "loss": 0.36504014, "memory(GiB)": 34.88, "step": 117650, "train_speed(iter/s)": 0.411801 }, { "acc": 0.94214296, "epoch": 3.1856334443451657, "grad_norm": 9.133406639099121, "learning_rate": 1.0919478260138968e-06, "loss": 0.3100966, "memory(GiB)": 34.88, "step": 117655, "train_speed(iter/s)": 0.411802 }, { "acc": 0.93470259, "epoch": 3.185768824628381, "grad_norm": 9.16625690460205, "learning_rate": 1.091598938180409e-06, "loss": 0.41279888, "memory(GiB)": 34.88, "step": 117660, "train_speed(iter/s)": 0.411803 }, { "acc": 0.95676804, "epoch": 3.185904204911597, "grad_norm": 6.423371315002441, "learning_rate": 1.0912500993135866e-06, "loss": 0.27304325, "memory(GiB)": 34.88, "step": 117665, "train_speed(iter/s)": 0.411804 }, { "acc": 0.94004059, "epoch": 3.1860395851948122, "grad_norm": 6.786235809326172, "learning_rate": 1.090901309417801e-06, "loss": 0.3739408, "memory(GiB)": 34.88, "step": 117670, "train_speed(iter/s)": 0.411805 }, { "acc": 0.95175953, "epoch": 3.1861749654780276, "grad_norm": 3.6532793045043945, "learning_rate": 1.0905525684974225e-06, "loss": 0.26851974, "memory(GiB)": 34.88, "step": 117675, "train_speed(iter/s)": 0.411806 }, { "acc": 0.95999641, "epoch": 3.1863103457612434, "grad_norm": 3.671727418899536, "learning_rate": 1.090203876556817e-06, "loss": 0.2011703, "memory(GiB)": 34.88, "step": 117680, "train_speed(iter/s)": 0.411807 }, { "acc": 0.9449461, "epoch": 3.1864457260444587, "grad_norm": 7.306274890899658, "learning_rate": 1.0898552336003544e-06, "loss": 0.32335758, "memory(GiB)": 34.88, "step": 117685, "train_speed(iter/s)": 0.411808 }, { "acc": 0.95421848, "epoch": 3.1865811063276746, "grad_norm": 6.641595840454102, "learning_rate": 1.0895066396324024e-06, "loss": 0.30227091, "memory(GiB)": 34.88, "step": 117690, "train_speed(iter/s)": 0.411809 }, { "acc": 0.95124397, "epoch": 3.18671648661089, "grad_norm": 6.1188554763793945, "learning_rate": 1.089158094657328e-06, "loss": 0.24013166, "memory(GiB)": 34.88, "step": 117695, "train_speed(iter/s)": 0.41181 }, { "acc": 0.94712372, "epoch": 3.1868518668941057, "grad_norm": 9.751847267150879, "learning_rate": 1.0888095986794957e-06, "loss": 0.29081979, "memory(GiB)": 34.88, "step": 117700, "train_speed(iter/s)": 0.411811 }, { "acc": 0.95876045, "epoch": 3.186987247177321, "grad_norm": 3.2855515480041504, "learning_rate": 1.0884611517032722e-06, "loss": 0.26667786, "memory(GiB)": 34.88, "step": 117705, "train_speed(iter/s)": 0.411811 }, { "acc": 0.95266914, "epoch": 3.1871226274605364, "grad_norm": 3.518810510635376, "learning_rate": 1.0881127537330225e-06, "loss": 0.29878488, "memory(GiB)": 34.88, "step": 117710, "train_speed(iter/s)": 0.411812 }, { "acc": 0.93839588, "epoch": 3.1872580077437522, "grad_norm": 4.138917446136475, "learning_rate": 1.0877644047731118e-06, "loss": 0.3529181, "memory(GiB)": 34.88, "step": 117715, "train_speed(iter/s)": 0.411813 }, { "acc": 0.95557833, "epoch": 3.1873933880269676, "grad_norm": 6.584298133850098, "learning_rate": 1.0874161048279016e-06, "loss": 0.26390166, "memory(GiB)": 34.88, "step": 117720, "train_speed(iter/s)": 0.411814 }, { "acc": 0.95555754, "epoch": 3.1875287683101834, "grad_norm": 4.102628231048584, "learning_rate": 1.0870678539017567e-06, "loss": 0.2526, "memory(GiB)": 34.88, "step": 117725, "train_speed(iter/s)": 0.411815 }, { "acc": 0.94685764, "epoch": 3.1876641485933987, "grad_norm": 2.2293341159820557, "learning_rate": 1.0867196519990408e-06, "loss": 0.32387619, "memory(GiB)": 34.88, "step": 117730, "train_speed(iter/s)": 0.411816 }, { "acc": 0.94589148, "epoch": 3.1877995288766146, "grad_norm": 7.1677470207214355, "learning_rate": 1.0863714991241123e-06, "loss": 0.35676031, "memory(GiB)": 34.88, "step": 117735, "train_speed(iter/s)": 0.411817 }, { "acc": 0.91822376, "epoch": 3.18793490915983, "grad_norm": 13.794976234436035, "learning_rate": 1.0860233952813351e-06, "loss": 0.57594409, "memory(GiB)": 34.88, "step": 117740, "train_speed(iter/s)": 0.411818 }, { "acc": 0.91245804, "epoch": 3.1880702894430457, "grad_norm": 10.032500267028809, "learning_rate": 1.0856753404750692e-06, "loss": 0.6016572, "memory(GiB)": 34.88, "step": 117745, "train_speed(iter/s)": 0.411819 }, { "acc": 0.9216877, "epoch": 3.188205669726261, "grad_norm": 7.6994757652282715, "learning_rate": 1.085327334709676e-06, "loss": 0.47214646, "memory(GiB)": 34.88, "step": 117750, "train_speed(iter/s)": 0.41182 }, { "acc": 0.93853035, "epoch": 3.1883410500094764, "grad_norm": 7.638034820556641, "learning_rate": 1.0849793779895126e-06, "loss": 0.41769567, "memory(GiB)": 34.88, "step": 117755, "train_speed(iter/s)": 0.411821 }, { "acc": 0.95414934, "epoch": 3.1884764302926922, "grad_norm": 3.5646257400512695, "learning_rate": 1.084631470318939e-06, "loss": 0.22286096, "memory(GiB)": 34.88, "step": 117760, "train_speed(iter/s)": 0.411822 }, { "acc": 0.94604273, "epoch": 3.1886118105759076, "grad_norm": 6.984598159790039, "learning_rate": 1.0842836117023143e-06, "loss": 0.27220609, "memory(GiB)": 34.88, "step": 117765, "train_speed(iter/s)": 0.411823 }, { "acc": 0.93366337, "epoch": 3.1887471908591234, "grad_norm": 6.167909145355225, "learning_rate": 1.0839358021439939e-06, "loss": 0.38970726, "memory(GiB)": 34.88, "step": 117770, "train_speed(iter/s)": 0.411823 }, { "acc": 0.94694271, "epoch": 3.1888825711423388, "grad_norm": 7.952370643615723, "learning_rate": 1.0835880416483368e-06, "loss": 0.3144074, "memory(GiB)": 34.88, "step": 117775, "train_speed(iter/s)": 0.411824 }, { "acc": 0.93415298, "epoch": 3.1890179514255546, "grad_norm": 5.665996074676514, "learning_rate": 1.083240330219698e-06, "loss": 0.37953289, "memory(GiB)": 34.88, "step": 117780, "train_speed(iter/s)": 0.411825 }, { "acc": 0.94524956, "epoch": 3.18915333170877, "grad_norm": 5.419041633605957, "learning_rate": 1.0828926678624356e-06, "loss": 0.31473598, "memory(GiB)": 34.88, "step": 117785, "train_speed(iter/s)": 0.411826 }, { "acc": 0.93470831, "epoch": 3.1892887119919857, "grad_norm": 5.5240325927734375, "learning_rate": 1.082545054580902e-06, "loss": 0.37849777, "memory(GiB)": 34.88, "step": 117790, "train_speed(iter/s)": 0.411827 }, { "acc": 0.94327641, "epoch": 3.189424092275201, "grad_norm": 3.216549873352051, "learning_rate": 1.0821974903794521e-06, "loss": 0.30732555, "memory(GiB)": 34.88, "step": 117795, "train_speed(iter/s)": 0.411828 }, { "acc": 0.94541779, "epoch": 3.1895594725584164, "grad_norm": 8.82929515838623, "learning_rate": 1.0818499752624427e-06, "loss": 0.36355264, "memory(GiB)": 34.88, "step": 117800, "train_speed(iter/s)": 0.411829 }, { "acc": 0.95326672, "epoch": 3.1896948528416322, "grad_norm": 11.371726036071777, "learning_rate": 1.0815025092342233e-06, "loss": 0.25740633, "memory(GiB)": 34.88, "step": 117805, "train_speed(iter/s)": 0.41183 }, { "acc": 0.95072517, "epoch": 3.1898302331248476, "grad_norm": 3.143462896347046, "learning_rate": 1.0811550922991486e-06, "loss": 0.30579762, "memory(GiB)": 34.88, "step": 117810, "train_speed(iter/s)": 0.411831 }, { "acc": 0.92690172, "epoch": 3.1899656134080634, "grad_norm": 8.550950050354004, "learning_rate": 1.0808077244615698e-06, "loss": 0.41924, "memory(GiB)": 34.88, "step": 117815, "train_speed(iter/s)": 0.411832 }, { "acc": 0.9403223, "epoch": 3.1901009936912788, "grad_norm": 4.027647495269775, "learning_rate": 1.0804604057258404e-06, "loss": 0.33186045, "memory(GiB)": 34.88, "step": 117820, "train_speed(iter/s)": 0.411833 }, { "acc": 0.95129042, "epoch": 3.1902363739744946, "grad_norm": 4.220094203948975, "learning_rate": 1.0801131360963088e-06, "loss": 0.25235014, "memory(GiB)": 34.88, "step": 117825, "train_speed(iter/s)": 0.411834 }, { "acc": 0.94154253, "epoch": 3.19037175425771, "grad_norm": 7.813335418701172, "learning_rate": 1.079765915577326e-06, "loss": 0.32943537, "memory(GiB)": 34.88, "step": 117830, "train_speed(iter/s)": 0.411835 }, { "acc": 0.93930626, "epoch": 3.1905071345409253, "grad_norm": 6.885515213012695, "learning_rate": 1.0794187441732424e-06, "loss": 0.37920508, "memory(GiB)": 34.88, "step": 117835, "train_speed(iter/s)": 0.411836 }, { "acc": 0.94500704, "epoch": 3.190642514824141, "grad_norm": 5.450296401977539, "learning_rate": 1.079071621888407e-06, "loss": 0.31032293, "memory(GiB)": 34.88, "step": 117840, "train_speed(iter/s)": 0.411837 }, { "acc": 0.95222807, "epoch": 3.1907778951073564, "grad_norm": 10.674476623535156, "learning_rate": 1.0787245487271668e-06, "loss": 0.30970194, "memory(GiB)": 34.88, "step": 117845, "train_speed(iter/s)": 0.411838 }, { "acc": 0.94154034, "epoch": 3.1909132753905722, "grad_norm": 11.717445373535156, "learning_rate": 1.07837752469387e-06, "loss": 0.38533502, "memory(GiB)": 34.88, "step": 117850, "train_speed(iter/s)": 0.411838 }, { "acc": 0.94192867, "epoch": 3.1910486556737876, "grad_norm": 3.4197442531585693, "learning_rate": 1.0780305497928655e-06, "loss": 0.36863451, "memory(GiB)": 34.88, "step": 117855, "train_speed(iter/s)": 0.411839 }, { "acc": 0.94942598, "epoch": 3.1911840359570034, "grad_norm": 8.696109771728516, "learning_rate": 1.0776836240284975e-06, "loss": 0.27178183, "memory(GiB)": 34.88, "step": 117860, "train_speed(iter/s)": 0.41184 }, { "acc": 0.94470892, "epoch": 3.1913194162402188, "grad_norm": 4.09161901473999, "learning_rate": 1.077336747405113e-06, "loss": 0.32362597, "memory(GiB)": 34.88, "step": 117865, "train_speed(iter/s)": 0.411841 }, { "acc": 0.94490509, "epoch": 3.191454796523434, "grad_norm": 3.4723849296569824, "learning_rate": 1.0769899199270568e-06, "loss": 0.30564308, "memory(GiB)": 34.88, "step": 117870, "train_speed(iter/s)": 0.411842 }, { "acc": 0.94088707, "epoch": 3.19159017680665, "grad_norm": 9.265398025512695, "learning_rate": 1.0766431415986754e-06, "loss": 0.37947669, "memory(GiB)": 34.88, "step": 117875, "train_speed(iter/s)": 0.411843 }, { "acc": 0.93240738, "epoch": 3.1917255570898653, "grad_norm": 11.314467430114746, "learning_rate": 1.076296412424309e-06, "loss": 0.45676355, "memory(GiB)": 34.88, "step": 117880, "train_speed(iter/s)": 0.411844 }, { "acc": 0.93979797, "epoch": 3.191860937373081, "grad_norm": 5.769869327545166, "learning_rate": 1.0759497324083062e-06, "loss": 0.34269013, "memory(GiB)": 34.88, "step": 117885, "train_speed(iter/s)": 0.411845 }, { "acc": 0.9357584, "epoch": 3.1919963176562964, "grad_norm": 6.911643981933594, "learning_rate": 1.0756031015550074e-06, "loss": 0.34482238, "memory(GiB)": 34.88, "step": 117890, "train_speed(iter/s)": 0.411846 }, { "acc": 0.9418026, "epoch": 3.1921316979395122, "grad_norm": 10.712616920471191, "learning_rate": 1.0752565198687534e-06, "loss": 0.35108447, "memory(GiB)": 34.88, "step": 117895, "train_speed(iter/s)": 0.411847 }, { "acc": 0.95809793, "epoch": 3.1922670782227276, "grad_norm": 4.6345391273498535, "learning_rate": 1.0749099873538872e-06, "loss": 0.25859861, "memory(GiB)": 34.88, "step": 117900, "train_speed(iter/s)": 0.411848 }, { "acc": 0.95662422, "epoch": 3.1924024585059434, "grad_norm": 4.035269260406494, "learning_rate": 1.0745635040147488e-06, "loss": 0.2179235, "memory(GiB)": 34.88, "step": 117905, "train_speed(iter/s)": 0.411848 }, { "acc": 0.94740553, "epoch": 3.1925378387891588, "grad_norm": 16.029712677001953, "learning_rate": 1.0742170698556816e-06, "loss": 0.26139121, "memory(GiB)": 34.88, "step": 117910, "train_speed(iter/s)": 0.411849 }, { "acc": 0.95628281, "epoch": 3.192673219072374, "grad_norm": 2.284766912460327, "learning_rate": 1.07387068488102e-06, "loss": 0.23812625, "memory(GiB)": 34.88, "step": 117915, "train_speed(iter/s)": 0.41185 }, { "acc": 0.95707722, "epoch": 3.19280859935559, "grad_norm": 7.509084224700928, "learning_rate": 1.0735243490951096e-06, "loss": 0.22972157, "memory(GiB)": 34.88, "step": 117920, "train_speed(iter/s)": 0.411851 }, { "acc": 0.95175886, "epoch": 3.1929439796388053, "grad_norm": 6.9298481941223145, "learning_rate": 1.0731780625022854e-06, "loss": 0.31641464, "memory(GiB)": 34.88, "step": 117925, "train_speed(iter/s)": 0.411852 }, { "acc": 0.94903564, "epoch": 3.193079359922021, "grad_norm": 6.644323825836182, "learning_rate": 1.072831825106885e-06, "loss": 0.26054025, "memory(GiB)": 34.88, "step": 117930, "train_speed(iter/s)": 0.411853 }, { "acc": 0.9354023, "epoch": 3.1932147402052364, "grad_norm": 4.616804122924805, "learning_rate": 1.0724856369132454e-06, "loss": 0.41243424, "memory(GiB)": 34.88, "step": 117935, "train_speed(iter/s)": 0.411854 }, { "acc": 0.93385239, "epoch": 3.1933501204884522, "grad_norm": 45.61742401123047, "learning_rate": 1.0721394979257048e-06, "loss": 0.41281252, "memory(GiB)": 34.88, "step": 117940, "train_speed(iter/s)": 0.411855 }, { "acc": 0.93546562, "epoch": 3.1934855007716676, "grad_norm": 4.955466270446777, "learning_rate": 1.0717934081485997e-06, "loss": 0.4448432, "memory(GiB)": 34.88, "step": 117945, "train_speed(iter/s)": 0.411855 }, { "acc": 0.94134178, "epoch": 3.1936208810548834, "grad_norm": 19.89908218383789, "learning_rate": 1.0714473675862618e-06, "loss": 0.3380312, "memory(GiB)": 34.88, "step": 117950, "train_speed(iter/s)": 0.411856 }, { "acc": 0.94811583, "epoch": 3.1937562613380988, "grad_norm": 5.690194129943848, "learning_rate": 1.0711013762430315e-06, "loss": 0.30432942, "memory(GiB)": 34.88, "step": 117955, "train_speed(iter/s)": 0.411857 }, { "acc": 0.95107574, "epoch": 3.193891641621314, "grad_norm": 3.751054048538208, "learning_rate": 1.0707554341232394e-06, "loss": 0.29735615, "memory(GiB)": 34.88, "step": 117960, "train_speed(iter/s)": 0.411858 }, { "acc": 0.93164501, "epoch": 3.19402702190453, "grad_norm": 4.744179725646973, "learning_rate": 1.0704095412312207e-06, "loss": 0.40153484, "memory(GiB)": 34.88, "step": 117965, "train_speed(iter/s)": 0.411859 }, { "acc": 0.95516462, "epoch": 3.1941624021877453, "grad_norm": 6.563209533691406, "learning_rate": 1.0700636975713064e-06, "loss": 0.22806034, "memory(GiB)": 34.88, "step": 117970, "train_speed(iter/s)": 0.41186 }, { "acc": 0.94672813, "epoch": 3.194297782470961, "grad_norm": 4.2310919761657715, "learning_rate": 1.0697179031478303e-06, "loss": 0.34274127, "memory(GiB)": 34.88, "step": 117975, "train_speed(iter/s)": 0.411861 }, { "acc": 0.94557657, "epoch": 3.1944331627541764, "grad_norm": 4.905436038970947, "learning_rate": 1.0693721579651248e-06, "loss": 0.32874494, "memory(GiB)": 34.88, "step": 117980, "train_speed(iter/s)": 0.411862 }, { "acc": 0.93309498, "epoch": 3.1945685430373922, "grad_norm": 6.245237350463867, "learning_rate": 1.0690264620275179e-06, "loss": 0.37208035, "memory(GiB)": 34.88, "step": 117985, "train_speed(iter/s)": 0.411863 }, { "acc": 0.94970198, "epoch": 3.1947039233206076, "grad_norm": 4.28019905090332, "learning_rate": 1.068680815339345e-06, "loss": 0.30091445, "memory(GiB)": 34.88, "step": 117990, "train_speed(iter/s)": 0.411864 }, { "acc": 0.9422142, "epoch": 3.194839303603823, "grad_norm": 7.127232551574707, "learning_rate": 1.0683352179049316e-06, "loss": 0.38618562, "memory(GiB)": 34.88, "step": 117995, "train_speed(iter/s)": 0.411865 }, { "acc": 0.93830595, "epoch": 3.1949746838870388, "grad_norm": 7.917514801025391, "learning_rate": 1.0679896697286106e-06, "loss": 0.41526899, "memory(GiB)": 34.88, "step": 118000, "train_speed(iter/s)": 0.411866 }, { "acc": 0.9569767, "epoch": 3.195110064170254, "grad_norm": 2.8133385181427, "learning_rate": 1.067644170814706e-06, "loss": 0.27232435, "memory(GiB)": 34.88, "step": 118005, "train_speed(iter/s)": 0.411867 }, { "acc": 0.93854752, "epoch": 3.19524544445347, "grad_norm": 10.697493553161621, "learning_rate": 1.0672987211675514e-06, "loss": 0.37798243, "memory(GiB)": 34.88, "step": 118010, "train_speed(iter/s)": 0.411868 }, { "acc": 0.93277855, "epoch": 3.1953808247366853, "grad_norm": 13.02302360534668, "learning_rate": 1.066953320791472e-06, "loss": 0.41371469, "memory(GiB)": 34.88, "step": 118015, "train_speed(iter/s)": 0.411869 }, { "acc": 0.94203377, "epoch": 3.195516205019901, "grad_norm": 9.856539726257324, "learning_rate": 1.066607969690791e-06, "loss": 0.33116391, "memory(GiB)": 34.88, "step": 118020, "train_speed(iter/s)": 0.41187 }, { "acc": 0.94954195, "epoch": 3.1956515853031164, "grad_norm": 10.478612899780273, "learning_rate": 1.0662626678698406e-06, "loss": 0.31719, "memory(GiB)": 34.88, "step": 118025, "train_speed(iter/s)": 0.411871 }, { "acc": 0.92690735, "epoch": 3.195786965586332, "grad_norm": 16.445280075073242, "learning_rate": 1.0659174153329422e-06, "loss": 0.50354929, "memory(GiB)": 34.88, "step": 118030, "train_speed(iter/s)": 0.411871 }, { "acc": 0.93647671, "epoch": 3.1959223458695476, "grad_norm": 12.614791870117188, "learning_rate": 1.0655722120844231e-06, "loss": 0.35538161, "memory(GiB)": 34.88, "step": 118035, "train_speed(iter/s)": 0.411872 }, { "acc": 0.93137636, "epoch": 3.196057726152763, "grad_norm": 9.264817237854004, "learning_rate": 1.0652270581286043e-06, "loss": 0.45223951, "memory(GiB)": 34.88, "step": 118040, "train_speed(iter/s)": 0.411873 }, { "acc": 0.95238724, "epoch": 3.1961931064359788, "grad_norm": 8.352364540100098, "learning_rate": 1.064881953469815e-06, "loss": 0.26460612, "memory(GiB)": 34.88, "step": 118045, "train_speed(iter/s)": 0.411874 }, { "acc": 0.95084372, "epoch": 3.196328486719194, "grad_norm": 13.285568237304688, "learning_rate": 1.0645368981123728e-06, "loss": 0.24777303, "memory(GiB)": 34.88, "step": 118050, "train_speed(iter/s)": 0.411875 }, { "acc": 0.94237642, "epoch": 3.19646386700241, "grad_norm": 4.377272605895996, "learning_rate": 1.0641918920606037e-06, "loss": 0.39168229, "memory(GiB)": 34.88, "step": 118055, "train_speed(iter/s)": 0.411876 }, { "acc": 0.95035114, "epoch": 3.1965992472856253, "grad_norm": 9.796825408935547, "learning_rate": 1.0638469353188291e-06, "loss": 0.35428872, "memory(GiB)": 34.88, "step": 118060, "train_speed(iter/s)": 0.411877 }, { "acc": 0.94267921, "epoch": 3.196734627568841, "grad_norm": 4.067866802215576, "learning_rate": 1.0635020278913685e-06, "loss": 0.35262752, "memory(GiB)": 34.88, "step": 118065, "train_speed(iter/s)": 0.411878 }, { "acc": 0.93841915, "epoch": 3.1968700078520564, "grad_norm": 8.581523895263672, "learning_rate": 1.0631571697825452e-06, "loss": 0.36628144, "memory(GiB)": 34.88, "step": 118070, "train_speed(iter/s)": 0.411879 }, { "acc": 0.92901287, "epoch": 3.197005388135272, "grad_norm": 8.866948127746582, "learning_rate": 1.0628123609966748e-06, "loss": 0.45246954, "memory(GiB)": 34.88, "step": 118075, "train_speed(iter/s)": 0.41188 }, { "acc": 0.94265652, "epoch": 3.1971407684184876, "grad_norm": 7.131237506866455, "learning_rate": 1.0624676015380829e-06, "loss": 0.31676636, "memory(GiB)": 34.88, "step": 118080, "train_speed(iter/s)": 0.41188 }, { "acc": 0.93484173, "epoch": 3.197276148701703, "grad_norm": 19.795228958129883, "learning_rate": 1.062122891411083e-06, "loss": 0.37937815, "memory(GiB)": 34.88, "step": 118085, "train_speed(iter/s)": 0.411881 }, { "acc": 0.95612755, "epoch": 3.1974115289849188, "grad_norm": 5.109465599060059, "learning_rate": 1.0617782306199952e-06, "loss": 0.23864157, "memory(GiB)": 34.88, "step": 118090, "train_speed(iter/s)": 0.411882 }, { "acc": 0.93058329, "epoch": 3.197546909268134, "grad_norm": 2.9506983757019043, "learning_rate": 1.061433619169139e-06, "loss": 0.43043141, "memory(GiB)": 34.88, "step": 118095, "train_speed(iter/s)": 0.411883 }, { "acc": 0.9500556, "epoch": 3.19768228955135, "grad_norm": 2.5963964462280273, "learning_rate": 1.061089057062828e-06, "loss": 0.27672944, "memory(GiB)": 34.88, "step": 118100, "train_speed(iter/s)": 0.411884 }, { "acc": 0.94752674, "epoch": 3.1978176698345653, "grad_norm": 3.2638704776763916, "learning_rate": 1.0607445443053809e-06, "loss": 0.27349584, "memory(GiB)": 34.88, "step": 118105, "train_speed(iter/s)": 0.411885 }, { "acc": 0.95537691, "epoch": 3.197953050117781, "grad_norm": 10.19631576538086, "learning_rate": 1.06040008090111e-06, "loss": 0.26279502, "memory(GiB)": 34.88, "step": 118110, "train_speed(iter/s)": 0.411885 }, { "acc": 0.93410606, "epoch": 3.1980884304009964, "grad_norm": 4.960686206817627, "learning_rate": 1.060055666854336e-06, "loss": 0.40694046, "memory(GiB)": 34.88, "step": 118115, "train_speed(iter/s)": 0.411886 }, { "acc": 0.93468256, "epoch": 3.198223810684212, "grad_norm": 11.683716773986816, "learning_rate": 1.0597113021693682e-06, "loss": 0.35362988, "memory(GiB)": 34.88, "step": 118120, "train_speed(iter/s)": 0.411887 }, { "acc": 0.93223, "epoch": 3.1983591909674276, "grad_norm": 17.082735061645508, "learning_rate": 1.0593669868505245e-06, "loss": 0.4046515, "memory(GiB)": 34.88, "step": 118125, "train_speed(iter/s)": 0.411888 }, { "acc": 0.95121994, "epoch": 3.198494571250643, "grad_norm": 4.027713298797607, "learning_rate": 1.0590227209021132e-06, "loss": 0.28810983, "memory(GiB)": 34.88, "step": 118130, "train_speed(iter/s)": 0.411889 }, { "acc": 0.93935432, "epoch": 3.1986299515338588, "grad_norm": 9.649843215942383, "learning_rate": 1.058678504328452e-06, "loss": 0.3322628, "memory(GiB)": 34.88, "step": 118135, "train_speed(iter/s)": 0.41189 }, { "acc": 0.95950336, "epoch": 3.198765331817074, "grad_norm": 11.859471321105957, "learning_rate": 1.0583343371338514e-06, "loss": 0.22388468, "memory(GiB)": 34.88, "step": 118140, "train_speed(iter/s)": 0.411891 }, { "acc": 0.93261967, "epoch": 3.19890071210029, "grad_norm": 8.000665664672852, "learning_rate": 1.0579902193226188e-06, "loss": 0.33722737, "memory(GiB)": 34.88, "step": 118145, "train_speed(iter/s)": 0.411892 }, { "acc": 0.93298187, "epoch": 3.1990360923835053, "grad_norm": 10.139742851257324, "learning_rate": 1.0576461508990714e-06, "loss": 0.49221334, "memory(GiB)": 34.88, "step": 118150, "train_speed(iter/s)": 0.411893 }, { "acc": 0.95277147, "epoch": 3.1991714726667206, "grad_norm": 13.86258316040039, "learning_rate": 1.0573021318675147e-06, "loss": 0.27654166, "memory(GiB)": 34.88, "step": 118155, "train_speed(iter/s)": 0.411894 }, { "acc": 0.95543308, "epoch": 3.1993068529499364, "grad_norm": 8.743046760559082, "learning_rate": 1.0569581622322603e-06, "loss": 0.29554899, "memory(GiB)": 34.88, "step": 118160, "train_speed(iter/s)": 0.411894 }, { "acc": 0.95247421, "epoch": 3.199442233233152, "grad_norm": 10.1862154006958, "learning_rate": 1.0566142419976147e-06, "loss": 0.28024244, "memory(GiB)": 34.88, "step": 118165, "train_speed(iter/s)": 0.411895 }, { "acc": 0.95168839, "epoch": 3.1995776135163676, "grad_norm": 11.39676570892334, "learning_rate": 1.0562703711678903e-06, "loss": 0.2713563, "memory(GiB)": 34.88, "step": 118170, "train_speed(iter/s)": 0.411896 }, { "acc": 0.94682646, "epoch": 3.199712993799583, "grad_norm": 9.44648551940918, "learning_rate": 1.0559265497473911e-06, "loss": 0.26571791, "memory(GiB)": 34.88, "step": 118175, "train_speed(iter/s)": 0.411897 }, { "acc": 0.94636497, "epoch": 3.1998483740827988, "grad_norm": 6.255939960479736, "learning_rate": 1.055582777740425e-06, "loss": 0.31715493, "memory(GiB)": 34.88, "step": 118180, "train_speed(iter/s)": 0.411898 }, { "acc": 0.95278416, "epoch": 3.199983754366014, "grad_norm": 6.714641094207764, "learning_rate": 1.0552390551513008e-06, "loss": 0.22170956, "memory(GiB)": 34.88, "step": 118185, "train_speed(iter/s)": 0.411899 }, { "acc": 0.94124737, "epoch": 3.2001191346492295, "grad_norm": 9.702167510986328, "learning_rate": 1.0548953819843208e-06, "loss": 0.32077384, "memory(GiB)": 34.88, "step": 118190, "train_speed(iter/s)": 0.4119 }, { "acc": 0.94543667, "epoch": 3.2002545149324453, "grad_norm": 5.13954496383667, "learning_rate": 1.0545517582437932e-06, "loss": 0.28572083, "memory(GiB)": 34.88, "step": 118195, "train_speed(iter/s)": 0.411901 }, { "acc": 0.94763432, "epoch": 3.2003898952156606, "grad_norm": 8.27252197265625, "learning_rate": 1.0542081839340183e-06, "loss": 0.32912507, "memory(GiB)": 34.88, "step": 118200, "train_speed(iter/s)": 0.411902 }, { "acc": 0.94494724, "epoch": 3.2005252754988764, "grad_norm": 8.074130058288574, "learning_rate": 1.0538646590593056e-06, "loss": 0.34723806, "memory(GiB)": 34.88, "step": 118205, "train_speed(iter/s)": 0.411903 }, { "acc": 0.93725386, "epoch": 3.200660655782092, "grad_norm": 3.680294990539551, "learning_rate": 1.0535211836239541e-06, "loss": 0.36228228, "memory(GiB)": 34.88, "step": 118210, "train_speed(iter/s)": 0.411904 }, { "acc": 0.95191765, "epoch": 3.2007960360653076, "grad_norm": 4.453493118286133, "learning_rate": 1.0531777576322684e-06, "loss": 0.3023221, "memory(GiB)": 34.88, "step": 118215, "train_speed(iter/s)": 0.411904 }, { "acc": 0.94627256, "epoch": 3.200931416348523, "grad_norm": 7.40738582611084, "learning_rate": 1.0528343810885514e-06, "loss": 0.31401772, "memory(GiB)": 34.88, "step": 118220, "train_speed(iter/s)": 0.411905 }, { "acc": 0.93415394, "epoch": 3.2010667966317383, "grad_norm": 7.553997993469238, "learning_rate": 1.052491053997102e-06, "loss": 0.33595457, "memory(GiB)": 34.88, "step": 118225, "train_speed(iter/s)": 0.411906 }, { "acc": 0.94225187, "epoch": 3.201202176914954, "grad_norm": 5.330636024475098, "learning_rate": 1.052147776362224e-06, "loss": 0.35582268, "memory(GiB)": 34.88, "step": 118230, "train_speed(iter/s)": 0.411907 }, { "acc": 0.93653107, "epoch": 3.2013375571981695, "grad_norm": 7.712029457092285, "learning_rate": 1.0518045481882133e-06, "loss": 0.35321536, "memory(GiB)": 34.88, "step": 118235, "train_speed(iter/s)": 0.411908 }, { "acc": 0.94134254, "epoch": 3.2014729374813853, "grad_norm": 4.314072132110596, "learning_rate": 1.051461369479375e-06, "loss": 0.34873643, "memory(GiB)": 34.88, "step": 118240, "train_speed(iter/s)": 0.411909 }, { "acc": 0.94976187, "epoch": 3.2016083177646006, "grad_norm": 5.476651191711426, "learning_rate": 1.051118240240004e-06, "loss": 0.32666378, "memory(GiB)": 34.88, "step": 118245, "train_speed(iter/s)": 0.41191 }, { "acc": 0.94790821, "epoch": 3.2017436980478164, "grad_norm": 7.639745712280273, "learning_rate": 1.0507751604744001e-06, "loss": 0.27168856, "memory(GiB)": 34.88, "step": 118250, "train_speed(iter/s)": 0.411911 }, { "acc": 0.95201178, "epoch": 3.201879078331032, "grad_norm": 11.335431098937988, "learning_rate": 1.0504321301868612e-06, "loss": 0.33899643, "memory(GiB)": 34.88, "step": 118255, "train_speed(iter/s)": 0.411912 }, { "acc": 0.95927887, "epoch": 3.2020144586142476, "grad_norm": 3.1362359523773193, "learning_rate": 1.0500891493816854e-06, "loss": 0.23377674, "memory(GiB)": 34.88, "step": 118260, "train_speed(iter/s)": 0.411912 }, { "acc": 0.93771782, "epoch": 3.202149838897463, "grad_norm": 5.707433700561523, "learning_rate": 1.0497462180631681e-06, "loss": 0.37504468, "memory(GiB)": 34.88, "step": 118265, "train_speed(iter/s)": 0.411913 }, { "acc": 0.94043789, "epoch": 3.2022852191806783, "grad_norm": 5.515219211578369, "learning_rate": 1.0494033362356025e-06, "loss": 0.41049542, "memory(GiB)": 34.88, "step": 118270, "train_speed(iter/s)": 0.411914 }, { "acc": 0.94153805, "epoch": 3.202420599463894, "grad_norm": 15.38891887664795, "learning_rate": 1.049060503903289e-06, "loss": 0.35426228, "memory(GiB)": 34.88, "step": 118275, "train_speed(iter/s)": 0.411915 }, { "acc": 0.95130844, "epoch": 3.2025559797471095, "grad_norm": 12.647809982299805, "learning_rate": 1.0487177210705185e-06, "loss": 0.28106723, "memory(GiB)": 34.88, "step": 118280, "train_speed(iter/s)": 0.411916 }, { "acc": 0.93326511, "epoch": 3.2026913600303253, "grad_norm": 17.58262062072754, "learning_rate": 1.0483749877415864e-06, "loss": 0.35845838, "memory(GiB)": 34.88, "step": 118285, "train_speed(iter/s)": 0.411917 }, { "acc": 0.94222164, "epoch": 3.2028267403135406, "grad_norm": 6.44257926940918, "learning_rate": 1.048032303920786e-06, "loss": 0.36329269, "memory(GiB)": 34.88, "step": 118290, "train_speed(iter/s)": 0.411918 }, { "acc": 0.92709026, "epoch": 3.2029621205967564, "grad_norm": 10.315450668334961, "learning_rate": 1.0476896696124112e-06, "loss": 0.42348013, "memory(GiB)": 34.88, "step": 118295, "train_speed(iter/s)": 0.411919 }, { "acc": 0.94475784, "epoch": 3.203097500879972, "grad_norm": 12.653124809265137, "learning_rate": 1.0473470848207517e-06, "loss": 0.35101247, "memory(GiB)": 34.88, "step": 118300, "train_speed(iter/s)": 0.41192 }, { "acc": 0.94234257, "epoch": 3.2032328811631876, "grad_norm": 17.639907836914062, "learning_rate": 1.0470045495501007e-06, "loss": 0.31086392, "memory(GiB)": 34.88, "step": 118305, "train_speed(iter/s)": 0.411921 }, { "acc": 0.94040651, "epoch": 3.203368261446403, "grad_norm": 2.6416893005371094, "learning_rate": 1.0466620638047503e-06, "loss": 0.36953988, "memory(GiB)": 34.88, "step": 118310, "train_speed(iter/s)": 0.411922 }, { "acc": 0.94730864, "epoch": 3.2035036417296183, "grad_norm": 3.9144814014434814, "learning_rate": 1.0463196275889875e-06, "loss": 0.27484066, "memory(GiB)": 34.88, "step": 118315, "train_speed(iter/s)": 0.411923 }, { "acc": 0.93329487, "epoch": 3.203639022012834, "grad_norm": 5.314810752868652, "learning_rate": 1.0459772409071037e-06, "loss": 0.39692454, "memory(GiB)": 34.88, "step": 118320, "train_speed(iter/s)": 0.411924 }, { "acc": 0.95264606, "epoch": 3.2037744022960495, "grad_norm": 6.982507228851318, "learning_rate": 1.0456349037633887e-06, "loss": 0.27174337, "memory(GiB)": 34.88, "step": 118325, "train_speed(iter/s)": 0.411925 }, { "acc": 0.94108582, "epoch": 3.2039097825792653, "grad_norm": 5.630784034729004, "learning_rate": 1.0452926161621313e-06, "loss": 0.36284349, "memory(GiB)": 34.88, "step": 118330, "train_speed(iter/s)": 0.411926 }, { "acc": 0.93364182, "epoch": 3.2040451628624806, "grad_norm": 5.981929779052734, "learning_rate": 1.044950378107617e-06, "loss": 0.41893587, "memory(GiB)": 34.88, "step": 118335, "train_speed(iter/s)": 0.411927 }, { "acc": 0.9508749, "epoch": 3.2041805431456964, "grad_norm": 9.797879219055176, "learning_rate": 1.0446081896041347e-06, "loss": 0.28228447, "memory(GiB)": 34.88, "step": 118340, "train_speed(iter/s)": 0.411928 }, { "acc": 0.95238247, "epoch": 3.204315923428912, "grad_norm": 6.531575679779053, "learning_rate": 1.0442660506559715e-06, "loss": 0.31771417, "memory(GiB)": 34.88, "step": 118345, "train_speed(iter/s)": 0.411928 }, { "acc": 0.94348736, "epoch": 3.204451303712127, "grad_norm": 4.749427795410156, "learning_rate": 1.0439239612674113e-06, "loss": 0.38435864, "memory(GiB)": 34.88, "step": 118350, "train_speed(iter/s)": 0.411929 }, { "acc": 0.96188774, "epoch": 3.204586683995343, "grad_norm": 6.173473834991455, "learning_rate": 1.0435819214427407e-06, "loss": 0.24184918, "memory(GiB)": 34.88, "step": 118355, "train_speed(iter/s)": 0.41193 }, { "acc": 0.95405617, "epoch": 3.2047220642785583, "grad_norm": 10.693285942077637, "learning_rate": 1.043239931186244e-06, "loss": 0.25946288, "memory(GiB)": 34.88, "step": 118360, "train_speed(iter/s)": 0.411931 }, { "acc": 0.95640135, "epoch": 3.204857444561774, "grad_norm": 6.817018985748291, "learning_rate": 1.0428979905022077e-06, "loss": 0.2538496, "memory(GiB)": 34.88, "step": 118365, "train_speed(iter/s)": 0.411932 }, { "acc": 0.94068451, "epoch": 3.2049928248449895, "grad_norm": 4.9925103187561035, "learning_rate": 1.0425560993949114e-06, "loss": 0.38919921, "memory(GiB)": 34.88, "step": 118370, "train_speed(iter/s)": 0.411933 }, { "acc": 0.94668083, "epoch": 3.2051282051282053, "grad_norm": 6.059595584869385, "learning_rate": 1.04221425786864e-06, "loss": 0.30973516, "memory(GiB)": 34.88, "step": 118375, "train_speed(iter/s)": 0.411934 }, { "acc": 0.94310579, "epoch": 3.2052635854114206, "grad_norm": 18.16240119934082, "learning_rate": 1.0418724659276757e-06, "loss": 0.3467252, "memory(GiB)": 34.88, "step": 118380, "train_speed(iter/s)": 0.411935 }, { "acc": 0.93628292, "epoch": 3.205398965694636, "grad_norm": 6.479126930236816, "learning_rate": 1.0415307235763012e-06, "loss": 0.39615355, "memory(GiB)": 34.88, "step": 118385, "train_speed(iter/s)": 0.411936 }, { "acc": 0.93584328, "epoch": 3.205534345977852, "grad_norm": 8.044246673583984, "learning_rate": 1.0411890308187945e-06, "loss": 0.33434577, "memory(GiB)": 34.88, "step": 118390, "train_speed(iter/s)": 0.411937 }, { "acc": 0.9467392, "epoch": 3.205669726261067, "grad_norm": 6.655898571014404, "learning_rate": 1.040847387659438e-06, "loss": 0.36257424, "memory(GiB)": 34.88, "step": 118395, "train_speed(iter/s)": 0.411938 }, { "acc": 0.93941746, "epoch": 3.205805106544283, "grad_norm": 18.860397338867188, "learning_rate": 1.0405057941025127e-06, "loss": 0.34558849, "memory(GiB)": 34.88, "step": 118400, "train_speed(iter/s)": 0.411939 }, { "acc": 0.95950012, "epoch": 3.2059404868274983, "grad_norm": 7.029662132263184, "learning_rate": 1.0401642501522945e-06, "loss": 0.25082657, "memory(GiB)": 34.88, "step": 118405, "train_speed(iter/s)": 0.411939 }, { "acc": 0.940135, "epoch": 3.206075867110714, "grad_norm": 14.85181713104248, "learning_rate": 1.0398227558130638e-06, "loss": 0.38538575, "memory(GiB)": 34.88, "step": 118410, "train_speed(iter/s)": 0.41194 }, { "acc": 0.94852753, "epoch": 3.2062112473939295, "grad_norm": 10.102643013000488, "learning_rate": 1.0394813110890984e-06, "loss": 0.2902142, "memory(GiB)": 34.88, "step": 118415, "train_speed(iter/s)": 0.411941 }, { "acc": 0.94468136, "epoch": 3.2063466276771453, "grad_norm": 12.469136238098145, "learning_rate": 1.0391399159846766e-06, "loss": 0.35704331, "memory(GiB)": 34.88, "step": 118420, "train_speed(iter/s)": 0.411942 }, { "acc": 0.94103813, "epoch": 3.2064820079603606, "grad_norm": 36.385311126708984, "learning_rate": 1.0387985705040727e-06, "loss": 0.3728312, "memory(GiB)": 34.88, "step": 118425, "train_speed(iter/s)": 0.411943 }, { "acc": 0.93721571, "epoch": 3.206617388243576, "grad_norm": 6.231442928314209, "learning_rate": 1.0384572746515637e-06, "loss": 0.40894594, "memory(GiB)": 34.88, "step": 118430, "train_speed(iter/s)": 0.411944 }, { "acc": 0.95857611, "epoch": 3.206752768526792, "grad_norm": 3.4990971088409424, "learning_rate": 1.0381160284314269e-06, "loss": 0.27842038, "memory(GiB)": 34.88, "step": 118435, "train_speed(iter/s)": 0.411945 }, { "acc": 0.93488054, "epoch": 3.206888148810007, "grad_norm": 8.972901344299316, "learning_rate": 1.0377748318479337e-06, "loss": 0.38008699, "memory(GiB)": 34.88, "step": 118440, "train_speed(iter/s)": 0.411945 }, { "acc": 0.93598642, "epoch": 3.207023529093223, "grad_norm": 7.00230598449707, "learning_rate": 1.0374336849053607e-06, "loss": 0.4422081, "memory(GiB)": 34.88, "step": 118445, "train_speed(iter/s)": 0.411946 }, { "acc": 0.93623276, "epoch": 3.2071589093764383, "grad_norm": 10.029056549072266, "learning_rate": 1.0370925876079802e-06, "loss": 0.40459976, "memory(GiB)": 34.88, "step": 118450, "train_speed(iter/s)": 0.411947 }, { "acc": 0.93234816, "epoch": 3.207294289659654, "grad_norm": 13.139924049377441, "learning_rate": 1.0367515399600668e-06, "loss": 0.38889844, "memory(GiB)": 34.88, "step": 118455, "train_speed(iter/s)": 0.411948 }, { "acc": 0.94718304, "epoch": 3.2074296699428695, "grad_norm": 8.964015007019043, "learning_rate": 1.0364105419658903e-06, "loss": 0.33590784, "memory(GiB)": 34.88, "step": 118460, "train_speed(iter/s)": 0.411949 }, { "acc": 0.94112644, "epoch": 3.2075650502260853, "grad_norm": 7.512369632720947, "learning_rate": 1.0360695936297241e-06, "loss": 0.28410151, "memory(GiB)": 34.88, "step": 118465, "train_speed(iter/s)": 0.41195 }, { "acc": 0.95067253, "epoch": 3.2077004305093006, "grad_norm": 10.091889381408691, "learning_rate": 1.0357286949558403e-06, "loss": 0.25835176, "memory(GiB)": 34.88, "step": 118470, "train_speed(iter/s)": 0.411951 }, { "acc": 0.93063316, "epoch": 3.207835810792516, "grad_norm": 13.90443229675293, "learning_rate": 1.0353878459485055e-06, "loss": 0.40688825, "memory(GiB)": 34.88, "step": 118475, "train_speed(iter/s)": 0.411952 }, { "acc": 0.94942131, "epoch": 3.207971191075732, "grad_norm": 22.708251953125, "learning_rate": 1.035047046611993e-06, "loss": 0.28122332, "memory(GiB)": 34.88, "step": 118480, "train_speed(iter/s)": 0.411953 }, { "acc": 0.94413481, "epoch": 3.208106571358947, "grad_norm": 5.782163619995117, "learning_rate": 1.0347062969505702e-06, "loss": 0.33187752, "memory(GiB)": 34.88, "step": 118485, "train_speed(iter/s)": 0.411953 }, { "acc": 0.93947086, "epoch": 3.208241951642163, "grad_norm": 9.360297203063965, "learning_rate": 1.034365596968508e-06, "loss": 0.41476007, "memory(GiB)": 34.88, "step": 118490, "train_speed(iter/s)": 0.411954 }, { "acc": 0.93994007, "epoch": 3.2083773319253783, "grad_norm": 4.145654201507568, "learning_rate": 1.0340249466700704e-06, "loss": 0.36930463, "memory(GiB)": 34.88, "step": 118495, "train_speed(iter/s)": 0.411955 }, { "acc": 0.93189487, "epoch": 3.208512712208594, "grad_norm": 77.93897247314453, "learning_rate": 1.0336843460595281e-06, "loss": 0.32670734, "memory(GiB)": 34.88, "step": 118500, "train_speed(iter/s)": 0.411956 }, { "acc": 0.94751377, "epoch": 3.2086480924918095, "grad_norm": 4.231572151184082, "learning_rate": 1.0333437951411465e-06, "loss": 0.32440915, "memory(GiB)": 34.88, "step": 118505, "train_speed(iter/s)": 0.411957 }, { "acc": 0.94101686, "epoch": 3.208783472775025, "grad_norm": 5.842535495758057, "learning_rate": 1.0330032939191924e-06, "loss": 0.33827419, "memory(GiB)": 34.88, "step": 118510, "train_speed(iter/s)": 0.411958 }, { "acc": 0.94866238, "epoch": 3.2089188530582407, "grad_norm": 12.837714195251465, "learning_rate": 1.0326628423979297e-06, "loss": 0.29104061, "memory(GiB)": 34.88, "step": 118515, "train_speed(iter/s)": 0.411959 }, { "acc": 0.93666573, "epoch": 3.209054233341456, "grad_norm": 4.673152923583984, "learning_rate": 1.0323224405816244e-06, "loss": 0.37996318, "memory(GiB)": 34.88, "step": 118520, "train_speed(iter/s)": 0.411959 }, { "acc": 0.93752289, "epoch": 3.209189613624672, "grad_norm": 4.878568649291992, "learning_rate": 1.0319820884745416e-06, "loss": 0.34077032, "memory(GiB)": 34.88, "step": 118525, "train_speed(iter/s)": 0.41196 }, { "acc": 0.95146332, "epoch": 3.209324993907887, "grad_norm": 4.469172477722168, "learning_rate": 1.0316417860809422e-06, "loss": 0.28490598, "memory(GiB)": 34.88, "step": 118530, "train_speed(iter/s)": 0.411961 }, { "acc": 0.93934135, "epoch": 3.209460374191103, "grad_norm": 24.400054931640625, "learning_rate": 1.031301533405091e-06, "loss": 0.39280555, "memory(GiB)": 34.88, "step": 118535, "train_speed(iter/s)": 0.411962 }, { "acc": 0.94976759, "epoch": 3.2095957544743183, "grad_norm": 6.015069961547852, "learning_rate": 1.0309613304512505e-06, "loss": 0.32479498, "memory(GiB)": 34.88, "step": 118540, "train_speed(iter/s)": 0.411963 }, { "acc": 0.93602877, "epoch": 3.2097311347575337, "grad_norm": 6.967789649963379, "learning_rate": 1.030621177223683e-06, "loss": 0.42472014, "memory(GiB)": 34.88, "step": 118545, "train_speed(iter/s)": 0.411964 }, { "acc": 0.94280319, "epoch": 3.2098665150407495, "grad_norm": 2.9005162715911865, "learning_rate": 1.0302810737266454e-06, "loss": 0.28475075, "memory(GiB)": 34.88, "step": 118550, "train_speed(iter/s)": 0.411965 }, { "acc": 0.95427265, "epoch": 3.210001895323965, "grad_norm": 3.6191022396087646, "learning_rate": 1.0299410199644045e-06, "loss": 0.29243217, "memory(GiB)": 34.88, "step": 118555, "train_speed(iter/s)": 0.411966 }, { "acc": 0.95529499, "epoch": 3.2101372756071807, "grad_norm": 7.432587146759033, "learning_rate": 1.029601015941217e-06, "loss": 0.26831303, "memory(GiB)": 34.88, "step": 118560, "train_speed(iter/s)": 0.411967 }, { "acc": 0.948382, "epoch": 3.210272655890396, "grad_norm": 3.734801769256592, "learning_rate": 1.0292610616613402e-06, "loss": 0.23646281, "memory(GiB)": 34.88, "step": 118565, "train_speed(iter/s)": 0.411968 }, { "acc": 0.94530716, "epoch": 3.210408036173612, "grad_norm": 5.966175556182861, "learning_rate": 1.028921157129035e-06, "loss": 0.33520119, "memory(GiB)": 34.88, "step": 118570, "train_speed(iter/s)": 0.411968 }, { "acc": 0.93743362, "epoch": 3.210543416456827, "grad_norm": 5.50625467300415, "learning_rate": 1.028581302348559e-06, "loss": 0.34991536, "memory(GiB)": 34.88, "step": 118575, "train_speed(iter/s)": 0.411969 }, { "acc": 0.95574589, "epoch": 3.210678796740043, "grad_norm": 4.379730224609375, "learning_rate": 1.0282414973241708e-06, "loss": 0.22398267, "memory(GiB)": 34.88, "step": 118580, "train_speed(iter/s)": 0.41197 }, { "acc": 0.94761038, "epoch": 3.2108141770232583, "grad_norm": 6.079967498779297, "learning_rate": 1.0279017420601225e-06, "loss": 0.30532937, "memory(GiB)": 34.88, "step": 118585, "train_speed(iter/s)": 0.411971 }, { "acc": 0.94473724, "epoch": 3.2109495573064737, "grad_norm": 5.348811149597168, "learning_rate": 1.027562036560677e-06, "loss": 0.32180791, "memory(GiB)": 34.88, "step": 118590, "train_speed(iter/s)": 0.411972 }, { "acc": 0.93990183, "epoch": 3.2110849375896895, "grad_norm": 2.98606276512146, "learning_rate": 1.0272223808300848e-06, "loss": 0.34243283, "memory(GiB)": 34.88, "step": 118595, "train_speed(iter/s)": 0.411972 }, { "acc": 0.94259624, "epoch": 3.211220317872905, "grad_norm": 8.945172309875488, "learning_rate": 1.0268827748726037e-06, "loss": 0.33767357, "memory(GiB)": 34.88, "step": 118600, "train_speed(iter/s)": 0.411973 }, { "acc": 0.94494438, "epoch": 3.2113556981561207, "grad_norm": 12.002473831176758, "learning_rate": 1.0265432186924847e-06, "loss": 0.30913558, "memory(GiB)": 34.88, "step": 118605, "train_speed(iter/s)": 0.411974 }, { "acc": 0.9464468, "epoch": 3.211491078439336, "grad_norm": 4.602940559387207, "learning_rate": 1.0262037122939835e-06, "loss": 0.30543447, "memory(GiB)": 34.88, "step": 118610, "train_speed(iter/s)": 0.411975 }, { "acc": 0.94095125, "epoch": 3.211626458722552, "grad_norm": 5.115725994110107, "learning_rate": 1.0258642556813542e-06, "loss": 0.33651958, "memory(GiB)": 34.88, "step": 118615, "train_speed(iter/s)": 0.411975 }, { "acc": 0.95780268, "epoch": 3.211761839005767, "grad_norm": 7.826663494110107, "learning_rate": 1.0255248488588445e-06, "loss": 0.23311529, "memory(GiB)": 34.88, "step": 118620, "train_speed(iter/s)": 0.411976 }, { "acc": 0.94029675, "epoch": 3.211897219288983, "grad_norm": 5.0926103591918945, "learning_rate": 1.0251854918307122e-06, "loss": 0.36282046, "memory(GiB)": 34.88, "step": 118625, "train_speed(iter/s)": 0.411977 }, { "acc": 0.94426079, "epoch": 3.2120325995721983, "grad_norm": 5.09468412399292, "learning_rate": 1.0248461846012049e-06, "loss": 0.3635051, "memory(GiB)": 34.88, "step": 118630, "train_speed(iter/s)": 0.411978 }, { "acc": 0.95230312, "epoch": 3.2121679798554137, "grad_norm": 3.8719940185546875, "learning_rate": 1.0245069271745747e-06, "loss": 0.33211339, "memory(GiB)": 34.88, "step": 118635, "train_speed(iter/s)": 0.411979 }, { "acc": 0.94789543, "epoch": 3.2123033601386295, "grad_norm": 5.350306987762451, "learning_rate": 1.0241677195550691e-06, "loss": 0.39620762, "memory(GiB)": 34.88, "step": 118640, "train_speed(iter/s)": 0.411979 }, { "acc": 0.94234161, "epoch": 3.212438740421845, "grad_norm": 5.332607269287109, "learning_rate": 1.023828561746939e-06, "loss": 0.3925431, "memory(GiB)": 34.88, "step": 118645, "train_speed(iter/s)": 0.41198 }, { "acc": 0.94437523, "epoch": 3.2125741207050607, "grad_norm": 9.021441459655762, "learning_rate": 1.0234894537544342e-06, "loss": 0.35219686, "memory(GiB)": 34.88, "step": 118650, "train_speed(iter/s)": 0.411981 }, { "acc": 0.95611553, "epoch": 3.212709500988276, "grad_norm": 4.301481246948242, "learning_rate": 1.0231503955817987e-06, "loss": 0.30303097, "memory(GiB)": 34.88, "step": 118655, "train_speed(iter/s)": 0.411982 }, { "acc": 0.9440155, "epoch": 3.212844881271492, "grad_norm": 5.451543807983398, "learning_rate": 1.0228113872332846e-06, "loss": 0.31194227, "memory(GiB)": 34.88, "step": 118660, "train_speed(iter/s)": 0.411983 }, { "acc": 0.94224491, "epoch": 3.212980261554707, "grad_norm": 2.4298899173736572, "learning_rate": 1.0224724287131358e-06, "loss": 0.34975019, "memory(GiB)": 34.88, "step": 118665, "train_speed(iter/s)": 0.411984 }, { "acc": 0.93637028, "epoch": 3.2131156418379225, "grad_norm": 5.685987949371338, "learning_rate": 1.0221335200256005e-06, "loss": 0.39656539, "memory(GiB)": 34.88, "step": 118670, "train_speed(iter/s)": 0.411984 }, { "acc": 0.94281378, "epoch": 3.2132510221211383, "grad_norm": 16.399559020996094, "learning_rate": 1.0217946611749196e-06, "loss": 0.30869007, "memory(GiB)": 34.88, "step": 118675, "train_speed(iter/s)": 0.411985 }, { "acc": 0.94319239, "epoch": 3.2133864024043537, "grad_norm": 7.4091477394104, "learning_rate": 1.0214558521653445e-06, "loss": 0.29039295, "memory(GiB)": 34.88, "step": 118680, "train_speed(iter/s)": 0.411986 }, { "acc": 0.94421358, "epoch": 3.2135217826875695, "grad_norm": 4.279969215393066, "learning_rate": 1.0211170930011155e-06, "loss": 0.30162163, "memory(GiB)": 34.88, "step": 118685, "train_speed(iter/s)": 0.411987 }, { "acc": 0.94934311, "epoch": 3.213657162970785, "grad_norm": 10.410896301269531, "learning_rate": 1.020778383686476e-06, "loss": 0.25483971, "memory(GiB)": 34.88, "step": 118690, "train_speed(iter/s)": 0.411988 }, { "acc": 0.93854456, "epoch": 3.2137925432540007, "grad_norm": 9.09390926361084, "learning_rate": 1.0204397242256698e-06, "loss": 0.39947569, "memory(GiB)": 34.88, "step": 118695, "train_speed(iter/s)": 0.411989 }, { "acc": 0.94369392, "epoch": 3.213927923537216, "grad_norm": 8.978086471557617, "learning_rate": 1.0201011146229396e-06, "loss": 0.30238266, "memory(GiB)": 34.88, "step": 118700, "train_speed(iter/s)": 0.41199 }, { "acc": 0.94574661, "epoch": 3.2140633038204314, "grad_norm": 4.907046318054199, "learning_rate": 1.0197625548825275e-06, "loss": 0.33814311, "memory(GiB)": 34.88, "step": 118705, "train_speed(iter/s)": 0.411991 }, { "acc": 0.94546452, "epoch": 3.214198684103647, "grad_norm": 3.1870298385620117, "learning_rate": 1.019424045008672e-06, "loss": 0.29696703, "memory(GiB)": 34.88, "step": 118710, "train_speed(iter/s)": 0.411992 }, { "acc": 0.94671574, "epoch": 3.2143340643868625, "grad_norm": 2.426758050918579, "learning_rate": 1.019085585005618e-06, "loss": 0.36084733, "memory(GiB)": 34.88, "step": 118715, "train_speed(iter/s)": 0.411992 }, { "acc": 0.94215069, "epoch": 3.2144694446700783, "grad_norm": 5.650994777679443, "learning_rate": 1.0187471748776018e-06, "loss": 0.25974784, "memory(GiB)": 34.88, "step": 118720, "train_speed(iter/s)": 0.411993 }, { "acc": 0.94061518, "epoch": 3.2146048249532937, "grad_norm": 23.17957305908203, "learning_rate": 1.0184088146288653e-06, "loss": 0.34987693, "memory(GiB)": 34.88, "step": 118725, "train_speed(iter/s)": 0.411994 }, { "acc": 0.94544439, "epoch": 3.2147402052365095, "grad_norm": 6.9534711837768555, "learning_rate": 1.0180705042636447e-06, "loss": 0.37976234, "memory(GiB)": 34.88, "step": 118730, "train_speed(iter/s)": 0.411995 }, { "acc": 0.93823166, "epoch": 3.214875585519725, "grad_norm": 4.357977390289307, "learning_rate": 1.0177322437861785e-06, "loss": 0.3635123, "memory(GiB)": 34.88, "step": 118735, "train_speed(iter/s)": 0.411996 }, { "acc": 0.9244586, "epoch": 3.2150109658029407, "grad_norm": 16.551103591918945, "learning_rate": 1.0173940332007058e-06, "loss": 0.53826785, "memory(GiB)": 34.88, "step": 118740, "train_speed(iter/s)": 0.411996 }, { "acc": 0.94476652, "epoch": 3.215146346086156, "grad_norm": 10.615496635437012, "learning_rate": 1.0170558725114592e-06, "loss": 0.33570783, "memory(GiB)": 34.88, "step": 118745, "train_speed(iter/s)": 0.411997 }, { "acc": 0.93688679, "epoch": 3.2152817263693714, "grad_norm": 4.156003952026367, "learning_rate": 1.0167177617226812e-06, "loss": 0.42764893, "memory(GiB)": 34.88, "step": 118750, "train_speed(iter/s)": 0.411998 }, { "acc": 0.9484045, "epoch": 3.215417106652587, "grad_norm": 5.301733493804932, "learning_rate": 1.0163797008386022e-06, "loss": 0.29683867, "memory(GiB)": 34.88, "step": 118755, "train_speed(iter/s)": 0.411999 }, { "acc": 0.96001949, "epoch": 3.2155524869358025, "grad_norm": 5.004881858825684, "learning_rate": 1.01604168986346e-06, "loss": 0.24412346, "memory(GiB)": 34.88, "step": 118760, "train_speed(iter/s)": 0.411999 }, { "acc": 0.95016022, "epoch": 3.2156878672190183, "grad_norm": 6.892862319946289, "learning_rate": 1.0157037288014865e-06, "loss": 0.25168459, "memory(GiB)": 34.88, "step": 118765, "train_speed(iter/s)": 0.412 }, { "acc": 0.94382935, "epoch": 3.2158232475022337, "grad_norm": 5.0769267082214355, "learning_rate": 1.0153658176569158e-06, "loss": 0.39580078, "memory(GiB)": 34.88, "step": 118770, "train_speed(iter/s)": 0.412001 }, { "acc": 0.94329185, "epoch": 3.2159586277854495, "grad_norm": 18.825035095214844, "learning_rate": 1.0150279564339833e-06, "loss": 0.35720029, "memory(GiB)": 34.88, "step": 118775, "train_speed(iter/s)": 0.412002 }, { "acc": 0.94528046, "epoch": 3.216094008068665, "grad_norm": 5.505795001983643, "learning_rate": 1.014690145136917e-06, "loss": 0.31442487, "memory(GiB)": 34.88, "step": 118780, "train_speed(iter/s)": 0.412003 }, { "acc": 0.94605322, "epoch": 3.2162293883518807, "grad_norm": 4.234976768493652, "learning_rate": 1.014352383769954e-06, "loss": 0.31797891, "memory(GiB)": 34.88, "step": 118785, "train_speed(iter/s)": 0.412004 }, { "acc": 0.95054951, "epoch": 3.216364768635096, "grad_norm": 6.784307956695557, "learning_rate": 1.0140146723373211e-06, "loss": 0.31703219, "memory(GiB)": 34.88, "step": 118790, "train_speed(iter/s)": 0.412005 }, { "acc": 0.94970503, "epoch": 3.2165001489183114, "grad_norm": 8.159198760986328, "learning_rate": 1.0136770108432515e-06, "loss": 0.25217161, "memory(GiB)": 34.88, "step": 118795, "train_speed(iter/s)": 0.412005 }, { "acc": 0.94798641, "epoch": 3.216635529201527, "grad_norm": 12.67556381225586, "learning_rate": 1.013339399291971e-06, "loss": 0.32701707, "memory(GiB)": 34.88, "step": 118800, "train_speed(iter/s)": 0.412006 }, { "acc": 0.94164305, "epoch": 3.2167709094847425, "grad_norm": 5.154560565948486, "learning_rate": 1.013001837687715e-06, "loss": 0.32337193, "memory(GiB)": 34.88, "step": 118805, "train_speed(iter/s)": 0.412007 }, { "acc": 0.92105236, "epoch": 3.2169062897679583, "grad_norm": 25.13119888305664, "learning_rate": 1.0126643260347093e-06, "loss": 0.48367968, "memory(GiB)": 34.88, "step": 118810, "train_speed(iter/s)": 0.412008 }, { "acc": 0.94050865, "epoch": 3.2170416700511737, "grad_norm": 5.553609371185303, "learning_rate": 1.012326864337178e-06, "loss": 0.33937767, "memory(GiB)": 34.88, "step": 118815, "train_speed(iter/s)": 0.412009 }, { "acc": 0.95059967, "epoch": 3.2171770503343895, "grad_norm": 10.179515838623047, "learning_rate": 1.011989452599355e-06, "loss": 0.32570155, "memory(GiB)": 34.88, "step": 118820, "train_speed(iter/s)": 0.41201 }, { "acc": 0.94468832, "epoch": 3.217312430617605, "grad_norm": 15.822674751281738, "learning_rate": 1.0116520908254625e-06, "loss": 0.30542107, "memory(GiB)": 34.88, "step": 118825, "train_speed(iter/s)": 0.412011 }, { "acc": 0.94656401, "epoch": 3.21744781090082, "grad_norm": 9.656423568725586, "learning_rate": 1.0113147790197298e-06, "loss": 0.32736034, "memory(GiB)": 34.88, "step": 118830, "train_speed(iter/s)": 0.412012 }, { "acc": 0.9433567, "epoch": 3.217583191184036, "grad_norm": 5.337278842926025, "learning_rate": 1.0109775171863778e-06, "loss": 0.33175149, "memory(GiB)": 34.88, "step": 118835, "train_speed(iter/s)": 0.412012 }, { "acc": 0.94026165, "epoch": 3.2177185714672514, "grad_norm": 7.400100231170654, "learning_rate": 1.0106403053296372e-06, "loss": 0.32484097, "memory(GiB)": 34.88, "step": 118840, "train_speed(iter/s)": 0.412013 }, { "acc": 0.9426157, "epoch": 3.217853951750467, "grad_norm": 2.4493343830108643, "learning_rate": 1.0103031434537276e-06, "loss": 0.38754716, "memory(GiB)": 34.88, "step": 118845, "train_speed(iter/s)": 0.412014 }, { "acc": 0.95840883, "epoch": 3.2179893320336825, "grad_norm": 6.157840251922607, "learning_rate": 1.0099660315628748e-06, "loss": 0.21840153, "memory(GiB)": 34.88, "step": 118850, "train_speed(iter/s)": 0.412015 }, { "acc": 0.95003195, "epoch": 3.2181247123168983, "grad_norm": 2.7823240756988525, "learning_rate": 1.0096289696613021e-06, "loss": 0.2108633, "memory(GiB)": 34.88, "step": 118855, "train_speed(iter/s)": 0.412016 }, { "acc": 0.92943983, "epoch": 3.2182600926001137, "grad_norm": 7.305574417114258, "learning_rate": 1.00929195775323e-06, "loss": 0.43973513, "memory(GiB)": 34.88, "step": 118860, "train_speed(iter/s)": 0.412017 }, { "acc": 0.94100885, "epoch": 3.218395472883329, "grad_norm": 8.793992042541504, "learning_rate": 1.008954995842883e-06, "loss": 0.31240032, "memory(GiB)": 34.88, "step": 118865, "train_speed(iter/s)": 0.412018 }, { "acc": 0.9513298, "epoch": 3.218530853166545, "grad_norm": 4.824399948120117, "learning_rate": 1.0086180839344777e-06, "loss": 0.32030215, "memory(GiB)": 34.88, "step": 118870, "train_speed(iter/s)": 0.412018 }, { "acc": 0.93151741, "epoch": 3.21866623344976, "grad_norm": 9.775040626525879, "learning_rate": 1.0082812220322396e-06, "loss": 0.44357815, "memory(GiB)": 34.88, "step": 118875, "train_speed(iter/s)": 0.412019 }, { "acc": 0.93522081, "epoch": 3.218801613732976, "grad_norm": 7.978743076324463, "learning_rate": 1.007944410140386e-06, "loss": 0.42447057, "memory(GiB)": 34.88, "step": 118880, "train_speed(iter/s)": 0.41202 }, { "acc": 0.93725586, "epoch": 3.2189369940161914, "grad_norm": 6.550238609313965, "learning_rate": 1.0076076482631362e-06, "loss": 0.40578394, "memory(GiB)": 34.88, "step": 118885, "train_speed(iter/s)": 0.412021 }, { "acc": 0.95053291, "epoch": 3.219072374299407, "grad_norm": 3.0511088371276855, "learning_rate": 1.00727093640471e-06, "loss": 0.28545504, "memory(GiB)": 34.88, "step": 118890, "train_speed(iter/s)": 0.412022 }, { "acc": 0.93348589, "epoch": 3.2192077545826225, "grad_norm": 23.443132400512695, "learning_rate": 1.0069342745693237e-06, "loss": 0.37847805, "memory(GiB)": 34.88, "step": 118895, "train_speed(iter/s)": 0.412023 }, { "acc": 0.95392876, "epoch": 3.2193431348658383, "grad_norm": 3.3262689113616943, "learning_rate": 1.0065976627611966e-06, "loss": 0.25200987, "memory(GiB)": 34.88, "step": 118900, "train_speed(iter/s)": 0.412024 }, { "acc": 0.93912697, "epoch": 3.2194785151490537, "grad_norm": 5.24251127243042, "learning_rate": 1.0062611009845413e-06, "loss": 0.38495257, "memory(GiB)": 34.88, "step": 118905, "train_speed(iter/s)": 0.412025 }, { "acc": 0.96251488, "epoch": 3.219613895432269, "grad_norm": 6.789374351501465, "learning_rate": 1.0059245892435797e-06, "loss": 0.20707231, "memory(GiB)": 34.88, "step": 118910, "train_speed(iter/s)": 0.412026 }, { "acc": 0.94018383, "epoch": 3.219749275715485, "grad_norm": 16.033843994140625, "learning_rate": 1.0055881275425233e-06, "loss": 0.34975882, "memory(GiB)": 34.88, "step": 118915, "train_speed(iter/s)": 0.412026 }, { "acc": 0.94196701, "epoch": 3.2198846559987, "grad_norm": 4.483493804931641, "learning_rate": 1.0052517158855882e-06, "loss": 0.38792953, "memory(GiB)": 34.88, "step": 118920, "train_speed(iter/s)": 0.412027 }, { "acc": 0.94106169, "epoch": 3.220020036281916, "grad_norm": 6.461889743804932, "learning_rate": 1.004915354276988e-06, "loss": 0.3255568, "memory(GiB)": 34.88, "step": 118925, "train_speed(iter/s)": 0.412027 }, { "acc": 0.94130878, "epoch": 3.2201554165651314, "grad_norm": 5.350047588348389, "learning_rate": 1.0045790427209384e-06, "loss": 0.33218136, "memory(GiB)": 34.88, "step": 118930, "train_speed(iter/s)": 0.412028 }, { "acc": 0.95184765, "epoch": 3.220290796848347, "grad_norm": 4.535136699676514, "learning_rate": 1.0042427812216509e-06, "loss": 0.28529515, "memory(GiB)": 34.88, "step": 118935, "train_speed(iter/s)": 0.412029 }, { "acc": 0.93207207, "epoch": 3.2204261771315625, "grad_norm": 18.369544982910156, "learning_rate": 1.003906569783335e-06, "loss": 0.45390811, "memory(GiB)": 34.88, "step": 118940, "train_speed(iter/s)": 0.41203 }, { "acc": 0.9382206, "epoch": 3.2205615574147783, "grad_norm": 7.5575995445251465, "learning_rate": 1.0035704084102077e-06, "loss": 0.42209024, "memory(GiB)": 34.88, "step": 118945, "train_speed(iter/s)": 0.412031 }, { "acc": 0.9317852, "epoch": 3.2206969376979937, "grad_norm": 6.654635906219482, "learning_rate": 1.0032342971064765e-06, "loss": 0.40270443, "memory(GiB)": 34.88, "step": 118950, "train_speed(iter/s)": 0.412032 }, { "acc": 0.93429432, "epoch": 3.220832317981209, "grad_norm": 7.017824172973633, "learning_rate": 1.0028982358763522e-06, "loss": 0.41179309, "memory(GiB)": 34.88, "step": 118955, "train_speed(iter/s)": 0.412033 }, { "acc": 0.92626638, "epoch": 3.220967698264425, "grad_norm": 6.919806003570557, "learning_rate": 1.0025622247240454e-06, "loss": 0.3875972, "memory(GiB)": 34.88, "step": 118960, "train_speed(iter/s)": 0.412034 }, { "acc": 0.94528551, "epoch": 3.22110307854764, "grad_norm": 3.8898658752441406, "learning_rate": 1.0022262636537667e-06, "loss": 0.37585757, "memory(GiB)": 34.88, "step": 118965, "train_speed(iter/s)": 0.412034 }, { "acc": 0.93786545, "epoch": 3.221238458830856, "grad_norm": 9.715975761413574, "learning_rate": 1.0018903526697219e-06, "loss": 0.34299378, "memory(GiB)": 34.88, "step": 118970, "train_speed(iter/s)": 0.412035 }, { "acc": 0.9417057, "epoch": 3.2213738391140714, "grad_norm": 8.497828483581543, "learning_rate": 1.00155449177612e-06, "loss": 0.35808511, "memory(GiB)": 34.88, "step": 118975, "train_speed(iter/s)": 0.412036 }, { "acc": 0.95409164, "epoch": 3.221509219397287, "grad_norm": 8.807340621948242, "learning_rate": 1.00121868097717e-06, "loss": 0.23893561, "memory(GiB)": 34.88, "step": 118980, "train_speed(iter/s)": 0.412037 }, { "acc": 0.94618797, "epoch": 3.2216445996805025, "grad_norm": 11.655784606933594, "learning_rate": 1.0008829202770756e-06, "loss": 0.37029212, "memory(GiB)": 34.88, "step": 118985, "train_speed(iter/s)": 0.412038 }, { "acc": 0.93536015, "epoch": 3.221779979963718, "grad_norm": 7.745519638061523, "learning_rate": 1.0005472096800447e-06, "loss": 0.36969075, "memory(GiB)": 34.88, "step": 118990, "train_speed(iter/s)": 0.412039 }, { "acc": 0.94893379, "epoch": 3.2219153602469337, "grad_norm": 7.618381977081299, "learning_rate": 1.000211549190282e-06, "loss": 0.30349693, "memory(GiB)": 34.88, "step": 118995, "train_speed(iter/s)": 0.41204 }, { "acc": 0.95394487, "epoch": 3.222050740530149, "grad_norm": 3.4856929779052734, "learning_rate": 9.99875938811994e-07, "loss": 0.2437331, "memory(GiB)": 34.88, "step": 119000, "train_speed(iter/s)": 0.412041 }, { "acc": 0.94362965, "epoch": 3.222186120813365, "grad_norm": 10.872730255126953, "learning_rate": 9.995403785493825e-07, "loss": 0.2871223, "memory(GiB)": 34.88, "step": 119005, "train_speed(iter/s)": 0.412042 }, { "acc": 0.94214783, "epoch": 3.22232150109658, "grad_norm": 12.638921737670898, "learning_rate": 9.992048684066518e-07, "loss": 0.33814554, "memory(GiB)": 34.88, "step": 119010, "train_speed(iter/s)": 0.412043 }, { "acc": 0.94232178, "epoch": 3.222456881379796, "grad_norm": 8.607209205627441, "learning_rate": 9.988694083880068e-07, "loss": 0.37447076, "memory(GiB)": 34.88, "step": 119015, "train_speed(iter/s)": 0.412044 }, { "acc": 0.94550037, "epoch": 3.2225922616630114, "grad_norm": 7.447561740875244, "learning_rate": 9.985339984976468e-07, "loss": 0.2846508, "memory(GiB)": 34.88, "step": 119020, "train_speed(iter/s)": 0.412044 }, { "acc": 0.94211111, "epoch": 3.2227276419462267, "grad_norm": 7.782288074493408, "learning_rate": 9.981986387397748e-07, "loss": 0.3682476, "memory(GiB)": 34.88, "step": 119025, "train_speed(iter/s)": 0.412045 }, { "acc": 0.94044371, "epoch": 3.2228630222294425, "grad_norm": 7.040654182434082, "learning_rate": 9.978633291185917e-07, "loss": 0.35945549, "memory(GiB)": 34.88, "step": 119030, "train_speed(iter/s)": 0.412046 }, { "acc": 0.94904652, "epoch": 3.222998402512658, "grad_norm": 3.5164458751678467, "learning_rate": 9.975280696382997e-07, "loss": 0.35360827, "memory(GiB)": 34.88, "step": 119035, "train_speed(iter/s)": 0.412047 }, { "acc": 0.92610569, "epoch": 3.2231337827958737, "grad_norm": 13.593364715576172, "learning_rate": 9.971928603030952e-07, "loss": 0.51606426, "memory(GiB)": 34.88, "step": 119040, "train_speed(iter/s)": 0.412048 }, { "acc": 0.92587337, "epoch": 3.223269163079089, "grad_norm": 17.24121856689453, "learning_rate": 9.968577011171793e-07, "loss": 0.47966557, "memory(GiB)": 34.88, "step": 119045, "train_speed(iter/s)": 0.412049 }, { "acc": 0.95109272, "epoch": 3.223404543362305, "grad_norm": 9.88824462890625, "learning_rate": 9.965225920847504e-07, "loss": 0.26441846, "memory(GiB)": 34.88, "step": 119050, "train_speed(iter/s)": 0.41205 }, { "acc": 0.94941235, "epoch": 3.2235399236455202, "grad_norm": 6.881123065948486, "learning_rate": 9.961875332100075e-07, "loss": 0.328021, "memory(GiB)": 34.88, "step": 119055, "train_speed(iter/s)": 0.412051 }, { "acc": 0.93921547, "epoch": 3.223675303928736, "grad_norm": 8.230792045593262, "learning_rate": 9.95852524497147e-07, "loss": 0.34756458, "memory(GiB)": 34.88, "step": 119060, "train_speed(iter/s)": 0.412052 }, { "acc": 0.94950161, "epoch": 3.2238106842119514, "grad_norm": 3.9330759048461914, "learning_rate": 9.955175659503622e-07, "loss": 0.29424253, "memory(GiB)": 34.88, "step": 119065, "train_speed(iter/s)": 0.412053 }, { "acc": 0.94166565, "epoch": 3.2239460644951667, "grad_norm": 6.55296516418457, "learning_rate": 9.951826575738554e-07, "loss": 0.35544176, "memory(GiB)": 34.88, "step": 119070, "train_speed(iter/s)": 0.412054 }, { "acc": 0.93702278, "epoch": 3.2240814447783825, "grad_norm": 8.036140441894531, "learning_rate": 9.948477993718168e-07, "loss": 0.39766083, "memory(GiB)": 34.88, "step": 119075, "train_speed(iter/s)": 0.412055 }, { "acc": 0.95372286, "epoch": 3.224216825061598, "grad_norm": 7.632842540740967, "learning_rate": 9.945129913484433e-07, "loss": 0.25532031, "memory(GiB)": 34.88, "step": 119080, "train_speed(iter/s)": 0.412056 }, { "acc": 0.93579311, "epoch": 3.2243522053448137, "grad_norm": 3.6158602237701416, "learning_rate": 9.94178233507929e-07, "loss": 0.34556673, "memory(GiB)": 34.88, "step": 119085, "train_speed(iter/s)": 0.412057 }, { "acc": 0.95018997, "epoch": 3.224487585628029, "grad_norm": 17.149227142333984, "learning_rate": 9.938435258544686e-07, "loss": 0.32180324, "memory(GiB)": 34.88, "step": 119090, "train_speed(iter/s)": 0.412057 }, { "acc": 0.9253912, "epoch": 3.224622965911245, "grad_norm": 3.861079692840576, "learning_rate": 9.935088683922521e-07, "loss": 0.50427008, "memory(GiB)": 34.88, "step": 119095, "train_speed(iter/s)": 0.412058 }, { "acc": 0.94705906, "epoch": 3.2247583461944602, "grad_norm": 9.183196067810059, "learning_rate": 9.931742611254733e-07, "loss": 0.37595873, "memory(GiB)": 34.88, "step": 119100, "train_speed(iter/s)": 0.412059 }, { "acc": 0.93653889, "epoch": 3.224893726477676, "grad_norm": 14.136999130249023, "learning_rate": 9.92839704058325e-07, "loss": 0.38102398, "memory(GiB)": 34.88, "step": 119105, "train_speed(iter/s)": 0.41206 }, { "acc": 0.94054394, "epoch": 3.2250291067608914, "grad_norm": 6.102731704711914, "learning_rate": 9.925051971949958e-07, "loss": 0.35178049, "memory(GiB)": 34.88, "step": 119110, "train_speed(iter/s)": 0.412061 }, { "acc": 0.93274822, "epoch": 3.2251644870441067, "grad_norm": 1.9478023052215576, "learning_rate": 9.921707405396773e-07, "loss": 0.39274347, "memory(GiB)": 34.88, "step": 119115, "train_speed(iter/s)": 0.412062 }, { "acc": 0.94085941, "epoch": 3.2252998673273225, "grad_norm": 3.6208152770996094, "learning_rate": 9.918363340965584e-07, "loss": 0.34240193, "memory(GiB)": 34.88, "step": 119120, "train_speed(iter/s)": 0.412062 }, { "acc": 0.94900627, "epoch": 3.225435247610538, "grad_norm": 3.7055892944335938, "learning_rate": 9.915019778698313e-07, "loss": 0.31911774, "memory(GiB)": 34.88, "step": 119125, "train_speed(iter/s)": 0.412063 }, { "acc": 0.93605032, "epoch": 3.2255706278937537, "grad_norm": 10.344950675964355, "learning_rate": 9.911676718636802e-07, "loss": 0.35430496, "memory(GiB)": 34.88, "step": 119130, "train_speed(iter/s)": 0.412064 }, { "acc": 0.92753296, "epoch": 3.225706008176969, "grad_norm": 11.718132972717285, "learning_rate": 9.908334160822947e-07, "loss": 0.43695536, "memory(GiB)": 34.88, "step": 119135, "train_speed(iter/s)": 0.412065 }, { "acc": 0.9563509, "epoch": 3.225841388460185, "grad_norm": 5.14773416519165, "learning_rate": 9.904992105298625e-07, "loss": 0.26189504, "memory(GiB)": 34.88, "step": 119140, "train_speed(iter/s)": 0.412066 }, { "acc": 0.94414597, "epoch": 3.2259767687434002, "grad_norm": 8.827818870544434, "learning_rate": 9.90165055210571e-07, "loss": 0.32654114, "memory(GiB)": 34.88, "step": 119145, "train_speed(iter/s)": 0.412067 }, { "acc": 0.94221077, "epoch": 3.2261121490266156, "grad_norm": 10.289772033691406, "learning_rate": 9.898309501286034e-07, "loss": 0.34578846, "memory(GiB)": 34.88, "step": 119150, "train_speed(iter/s)": 0.412068 }, { "acc": 0.94980412, "epoch": 3.2262475293098314, "grad_norm": 8.372639656066895, "learning_rate": 9.89496895288147e-07, "loss": 0.33707628, "memory(GiB)": 34.88, "step": 119155, "train_speed(iter/s)": 0.412069 }, { "acc": 0.94182205, "epoch": 3.2263829095930467, "grad_norm": 10.392807006835938, "learning_rate": 9.891628906933874e-07, "loss": 0.33385618, "memory(GiB)": 34.88, "step": 119160, "train_speed(iter/s)": 0.41207 }, { "acc": 0.94910336, "epoch": 3.2265182898762625, "grad_norm": 7.5259246826171875, "learning_rate": 9.888289363485059e-07, "loss": 0.30575528, "memory(GiB)": 34.88, "step": 119165, "train_speed(iter/s)": 0.412071 }, { "acc": 0.94784842, "epoch": 3.226653670159478, "grad_norm": 5.420731544494629, "learning_rate": 9.88495032257687e-07, "loss": 0.30143876, "memory(GiB)": 34.88, "step": 119170, "train_speed(iter/s)": 0.412071 }, { "acc": 0.93824081, "epoch": 3.2267890504426937, "grad_norm": 21.184532165527344, "learning_rate": 9.88161178425114e-07, "loss": 0.37021685, "memory(GiB)": 34.88, "step": 119175, "train_speed(iter/s)": 0.412072 }, { "acc": 0.94847641, "epoch": 3.226924430725909, "grad_norm": 4.868313789367676, "learning_rate": 9.87827374854971e-07, "loss": 0.29960747, "memory(GiB)": 34.88, "step": 119180, "train_speed(iter/s)": 0.412073 }, { "acc": 0.94927034, "epoch": 3.2270598110091244, "grad_norm": 11.631246566772461, "learning_rate": 9.874936215514357e-07, "loss": 0.34511771, "memory(GiB)": 34.88, "step": 119185, "train_speed(iter/s)": 0.412074 }, { "acc": 0.94377203, "epoch": 3.2271951912923402, "grad_norm": 6.218040466308594, "learning_rate": 9.87159918518691e-07, "loss": 0.3099417, "memory(GiB)": 34.88, "step": 119190, "train_speed(iter/s)": 0.412075 }, { "acc": 0.95722551, "epoch": 3.2273305715755556, "grad_norm": 7.647611141204834, "learning_rate": 9.868262657609178e-07, "loss": 0.29204657, "memory(GiB)": 34.88, "step": 119195, "train_speed(iter/s)": 0.412076 }, { "acc": 0.95496798, "epoch": 3.2274659518587714, "grad_norm": 9.854825973510742, "learning_rate": 9.86492663282294e-07, "loss": 0.28388383, "memory(GiB)": 34.88, "step": 119200, "train_speed(iter/s)": 0.412077 }, { "acc": 0.94835167, "epoch": 3.2276013321419867, "grad_norm": 7.9000091552734375, "learning_rate": 9.86159111087e-07, "loss": 0.28521647, "memory(GiB)": 34.88, "step": 119205, "train_speed(iter/s)": 0.412077 }, { "acc": 0.9459341, "epoch": 3.2277367124252025, "grad_norm": 7.473300457000732, "learning_rate": 9.858256091792135e-07, "loss": 0.32079353, "memory(GiB)": 34.88, "step": 119210, "train_speed(iter/s)": 0.412078 }, { "acc": 0.95095873, "epoch": 3.227872092708418, "grad_norm": 4.267604351043701, "learning_rate": 9.854921575631136e-07, "loss": 0.28586814, "memory(GiB)": 34.88, "step": 119215, "train_speed(iter/s)": 0.412079 }, { "acc": 0.95650043, "epoch": 3.2280074729916333, "grad_norm": 5.77260684967041, "learning_rate": 9.851587562428742e-07, "loss": 0.2468519, "memory(GiB)": 34.88, "step": 119220, "train_speed(iter/s)": 0.41208 }, { "acc": 0.94799137, "epoch": 3.228142853274849, "grad_norm": 7.67979621887207, "learning_rate": 9.848254052226765e-07, "loss": 0.3211648, "memory(GiB)": 34.88, "step": 119225, "train_speed(iter/s)": 0.412081 }, { "acc": 0.9247076, "epoch": 3.2282782335580644, "grad_norm": 5.383551120758057, "learning_rate": 9.844921045066938e-07, "loss": 0.49247274, "memory(GiB)": 34.88, "step": 119230, "train_speed(iter/s)": 0.412082 }, { "acc": 0.94220257, "epoch": 3.2284136138412802, "grad_norm": 10.68132495880127, "learning_rate": 9.841588540991004e-07, "loss": 0.3481225, "memory(GiB)": 34.88, "step": 119235, "train_speed(iter/s)": 0.412083 }, { "acc": 0.94099703, "epoch": 3.2285489941244956, "grad_norm": 5.478313446044922, "learning_rate": 9.838256540040714e-07, "loss": 0.38470306, "memory(GiB)": 34.88, "step": 119240, "train_speed(iter/s)": 0.412083 }, { "acc": 0.94619112, "epoch": 3.2286843744077114, "grad_norm": 2.938981771469116, "learning_rate": 9.834925042257818e-07, "loss": 0.29010115, "memory(GiB)": 34.88, "step": 119245, "train_speed(iter/s)": 0.412084 }, { "acc": 0.94896173, "epoch": 3.2288197546909267, "grad_norm": 11.311047554016113, "learning_rate": 9.831594047684053e-07, "loss": 0.32774017, "memory(GiB)": 34.88, "step": 119250, "train_speed(iter/s)": 0.412085 }, { "acc": 0.94701862, "epoch": 3.2289551349741425, "grad_norm": 7.484805107116699, "learning_rate": 9.828263556361132e-07, "loss": 0.31070387, "memory(GiB)": 34.88, "step": 119255, "train_speed(iter/s)": 0.412086 }, { "acc": 0.93592644, "epoch": 3.229090515257358, "grad_norm": 18.226572036743164, "learning_rate": 9.824933568330778e-07, "loss": 0.38721766, "memory(GiB)": 34.88, "step": 119260, "train_speed(iter/s)": 0.412087 }, { "acc": 0.93848076, "epoch": 3.2292258955405733, "grad_norm": 12.872076988220215, "learning_rate": 9.821604083634712e-07, "loss": 0.39475198, "memory(GiB)": 34.88, "step": 119265, "train_speed(iter/s)": 0.412088 }, { "acc": 0.94942913, "epoch": 3.229361275823789, "grad_norm": 5.14909029006958, "learning_rate": 9.81827510231465e-07, "loss": 0.31598899, "memory(GiB)": 34.88, "step": 119270, "train_speed(iter/s)": 0.412088 }, { "acc": 0.95142765, "epoch": 3.2294966561070044, "grad_norm": 12.098550796508789, "learning_rate": 9.814946624412273e-07, "loss": 0.25619891, "memory(GiB)": 34.88, "step": 119275, "train_speed(iter/s)": 0.412089 }, { "acc": 0.94313984, "epoch": 3.2296320363902202, "grad_norm": 8.004918098449707, "learning_rate": 9.811618649969287e-07, "loss": 0.33771634, "memory(GiB)": 34.88, "step": 119280, "train_speed(iter/s)": 0.41209 }, { "acc": 0.92864742, "epoch": 3.2297674166734356, "grad_norm": 11.969691276550293, "learning_rate": 9.808291179027394e-07, "loss": 0.39965606, "memory(GiB)": 34.88, "step": 119285, "train_speed(iter/s)": 0.412091 }, { "acc": 0.94395046, "epoch": 3.2299027969566514, "grad_norm": 6.649717807769775, "learning_rate": 9.804964211628256e-07, "loss": 0.3516984, "memory(GiB)": 34.88, "step": 119290, "train_speed(iter/s)": 0.412092 }, { "acc": 0.95651245, "epoch": 3.2300381772398667, "grad_norm": 19.74331283569336, "learning_rate": 9.80163774781356e-07, "loss": 0.22547269, "memory(GiB)": 34.88, "step": 119295, "train_speed(iter/s)": 0.412093 }, { "acc": 0.95377426, "epoch": 3.2301735575230826, "grad_norm": 4.032104015350342, "learning_rate": 9.798311787624973e-07, "loss": 0.25662081, "memory(GiB)": 34.88, "step": 119300, "train_speed(iter/s)": 0.412093 }, { "acc": 0.9217555, "epoch": 3.230308937806298, "grad_norm": 21.571029663085938, "learning_rate": 9.794986331104172e-07, "loss": 0.49327884, "memory(GiB)": 34.88, "step": 119305, "train_speed(iter/s)": 0.412094 }, { "acc": 0.95945415, "epoch": 3.2304443180895133, "grad_norm": 11.84221076965332, "learning_rate": 9.791661378292793e-07, "loss": 0.21196954, "memory(GiB)": 34.88, "step": 119310, "train_speed(iter/s)": 0.412095 }, { "acc": 0.94209871, "epoch": 3.230579698372729, "grad_norm": 3.96956729888916, "learning_rate": 9.788336929232503e-07, "loss": 0.3219718, "memory(GiB)": 34.88, "step": 119315, "train_speed(iter/s)": 0.412096 }, { "acc": 0.94726543, "epoch": 3.2307150786559444, "grad_norm": 4.476871967315674, "learning_rate": 9.785012983964951e-07, "loss": 0.33003182, "memory(GiB)": 34.88, "step": 119320, "train_speed(iter/s)": 0.412097 }, { "acc": 0.93860273, "epoch": 3.2308504589391602, "grad_norm": 12.383020401000977, "learning_rate": 9.781689542531763e-07, "loss": 0.34029479, "memory(GiB)": 34.88, "step": 119325, "train_speed(iter/s)": 0.412098 }, { "acc": 0.95294571, "epoch": 3.2309858392223756, "grad_norm": 9.362727165222168, "learning_rate": 9.778366604974576e-07, "loss": 0.25353789, "memory(GiB)": 34.88, "step": 119330, "train_speed(iter/s)": 0.412099 }, { "acc": 0.95011616, "epoch": 3.2311212195055914, "grad_norm": 12.367955207824707, "learning_rate": 9.775044171335009e-07, "loss": 0.26374695, "memory(GiB)": 34.88, "step": 119335, "train_speed(iter/s)": 0.4121 }, { "acc": 0.95471745, "epoch": 3.2312565997888067, "grad_norm": 3.2114768028259277, "learning_rate": 9.771722241654712e-07, "loss": 0.38870897, "memory(GiB)": 34.88, "step": 119340, "train_speed(iter/s)": 0.4121 }, { "acc": 0.93290215, "epoch": 3.231391980072022, "grad_norm": 5.702515125274658, "learning_rate": 9.768400815975247e-07, "loss": 0.433109, "memory(GiB)": 34.88, "step": 119345, "train_speed(iter/s)": 0.412101 }, { "acc": 0.94355297, "epoch": 3.231527360355238, "grad_norm": 7.034152984619141, "learning_rate": 9.76507989433828e-07, "loss": 0.33154047, "memory(GiB)": 34.88, "step": 119350, "train_speed(iter/s)": 0.412102 }, { "acc": 0.94961739, "epoch": 3.2316627406384533, "grad_norm": 13.07955265045166, "learning_rate": 9.761759476785381e-07, "loss": 0.31523659, "memory(GiB)": 34.88, "step": 119355, "train_speed(iter/s)": 0.412103 }, { "acc": 0.93947468, "epoch": 3.231798120921669, "grad_norm": 9.434235572814941, "learning_rate": 9.758439563358139e-07, "loss": 0.3156713, "memory(GiB)": 34.88, "step": 119360, "train_speed(iter/s)": 0.412104 }, { "acc": 0.95162497, "epoch": 3.2319335012048844, "grad_norm": 6.166903495788574, "learning_rate": 9.755120154098147e-07, "loss": 0.29269004, "memory(GiB)": 34.88, "step": 119365, "train_speed(iter/s)": 0.412105 }, { "acc": 0.94226351, "epoch": 3.2320688814881002, "grad_norm": 19.181943893432617, "learning_rate": 9.751801249046986e-07, "loss": 0.32954316, "memory(GiB)": 34.88, "step": 119370, "train_speed(iter/s)": 0.412106 }, { "acc": 0.94990606, "epoch": 3.2322042617713156, "grad_norm": 7.869252681732178, "learning_rate": 9.748482848246257e-07, "loss": 0.28716063, "memory(GiB)": 34.88, "step": 119375, "train_speed(iter/s)": 0.412107 }, { "acc": 0.94589567, "epoch": 3.232339642054531, "grad_norm": 7.675436496734619, "learning_rate": 9.74516495173748e-07, "loss": 0.34883275, "memory(GiB)": 34.88, "step": 119380, "train_speed(iter/s)": 0.412107 }, { "acc": 0.95148697, "epoch": 3.2324750223377468, "grad_norm": 10.372146606445312, "learning_rate": 9.741847559562276e-07, "loss": 0.29872365, "memory(GiB)": 34.88, "step": 119385, "train_speed(iter/s)": 0.412108 }, { "acc": 0.93976736, "epoch": 3.232610402620962, "grad_norm": 11.15722370147705, "learning_rate": 9.738530671762157e-07, "loss": 0.40289092, "memory(GiB)": 34.88, "step": 119390, "train_speed(iter/s)": 0.412109 }, { "acc": 0.94909725, "epoch": 3.232745782904178, "grad_norm": 14.043299674987793, "learning_rate": 9.735214288378705e-07, "loss": 0.33785224, "memory(GiB)": 34.88, "step": 119395, "train_speed(iter/s)": 0.41211 }, { "acc": 0.9614562, "epoch": 3.2328811631873933, "grad_norm": 4.434630393981934, "learning_rate": 9.731898409453439e-07, "loss": 0.19659324, "memory(GiB)": 34.88, "step": 119400, "train_speed(iter/s)": 0.412111 }, { "acc": 0.94522266, "epoch": 3.233016543470609, "grad_norm": 5.372217655181885, "learning_rate": 9.728583035027906e-07, "loss": 0.27526565, "memory(GiB)": 34.88, "step": 119405, "train_speed(iter/s)": 0.412112 }, { "acc": 0.96309071, "epoch": 3.2331519237538244, "grad_norm": 5.10276985168457, "learning_rate": 9.725268165143649e-07, "loss": 0.231252, "memory(GiB)": 34.88, "step": 119410, "train_speed(iter/s)": 0.412113 }, { "acc": 0.93989601, "epoch": 3.2332873040370402, "grad_norm": 7.04444694519043, "learning_rate": 9.721953799842155e-07, "loss": 0.34656091, "memory(GiB)": 34.88, "step": 119415, "train_speed(iter/s)": 0.412114 }, { "acc": 0.93972187, "epoch": 3.2334226843202556, "grad_norm": 5.66358757019043, "learning_rate": 9.718639939164997e-07, "loss": 0.32841053, "memory(GiB)": 34.88, "step": 119420, "train_speed(iter/s)": 0.412115 }, { "acc": 0.93917961, "epoch": 3.233558064603471, "grad_norm": 5.442448139190674, "learning_rate": 9.71532658315365e-07, "loss": 0.36312754, "memory(GiB)": 34.88, "step": 119425, "train_speed(iter/s)": 0.412115 }, { "acc": 0.93467255, "epoch": 3.2336934448866868, "grad_norm": 4.112907409667969, "learning_rate": 9.712013731849646e-07, "loss": 0.40265884, "memory(GiB)": 34.88, "step": 119430, "train_speed(iter/s)": 0.412116 }, { "acc": 0.93540401, "epoch": 3.233828825169902, "grad_norm": 9.644457817077637, "learning_rate": 9.708701385294458e-07, "loss": 0.33402805, "memory(GiB)": 34.88, "step": 119435, "train_speed(iter/s)": 0.412117 }, { "acc": 0.96040478, "epoch": 3.233964205453118, "grad_norm": 5.273707866668701, "learning_rate": 9.70538954352959e-07, "loss": 0.22072768, "memory(GiB)": 34.88, "step": 119440, "train_speed(iter/s)": 0.412118 }, { "acc": 0.95325823, "epoch": 3.2340995857363333, "grad_norm": 4.081540584564209, "learning_rate": 9.702078206596547e-07, "loss": 0.24836359, "memory(GiB)": 34.88, "step": 119445, "train_speed(iter/s)": 0.412119 }, { "acc": 0.94550152, "epoch": 3.234234966019549, "grad_norm": 6.198538303375244, "learning_rate": 9.698767374536764e-07, "loss": 0.2921169, "memory(GiB)": 34.88, "step": 119450, "train_speed(iter/s)": 0.41212 }, { "acc": 0.94096966, "epoch": 3.2343703463027644, "grad_norm": 3.6496424674987793, "learning_rate": 9.695457047391775e-07, "loss": 0.32969041, "memory(GiB)": 34.88, "step": 119455, "train_speed(iter/s)": 0.412121 }, { "acc": 0.94857998, "epoch": 3.2345057265859802, "grad_norm": 4.763247966766357, "learning_rate": 9.692147225203006e-07, "loss": 0.2610281, "memory(GiB)": 34.88, "step": 119460, "train_speed(iter/s)": 0.412122 }, { "acc": 0.94358482, "epoch": 3.2346411068691956, "grad_norm": 10.854222297668457, "learning_rate": 9.688837908011947e-07, "loss": 0.33475697, "memory(GiB)": 34.88, "step": 119465, "train_speed(iter/s)": 0.412123 }, { "acc": 0.93569803, "epoch": 3.234776487152411, "grad_norm": 8.28624153137207, "learning_rate": 9.68552909586001e-07, "loss": 0.34477024, "memory(GiB)": 34.88, "step": 119470, "train_speed(iter/s)": 0.412124 }, { "acc": 0.94116631, "epoch": 3.2349118674356268, "grad_norm": 4.97096061706543, "learning_rate": 9.682220788788703e-07, "loss": 0.32406135, "memory(GiB)": 34.88, "step": 119475, "train_speed(iter/s)": 0.412124 }, { "acc": 0.94593086, "epoch": 3.235047247718842, "grad_norm": 23.775344848632812, "learning_rate": 9.678912986839444e-07, "loss": 0.32580872, "memory(GiB)": 34.88, "step": 119480, "train_speed(iter/s)": 0.412125 }, { "acc": 0.94192276, "epoch": 3.235182628002058, "grad_norm": 7.563480854034424, "learning_rate": 9.675605690053633e-07, "loss": 0.35622585, "memory(GiB)": 34.88, "step": 119485, "train_speed(iter/s)": 0.412126 }, { "acc": 0.95290489, "epoch": 3.2353180082852733, "grad_norm": 4.184000015258789, "learning_rate": 9.672298898472758e-07, "loss": 0.27316456, "memory(GiB)": 34.88, "step": 119490, "train_speed(iter/s)": 0.412127 }, { "acc": 0.93520184, "epoch": 3.235453388568489, "grad_norm": 5.406991004943848, "learning_rate": 9.66899261213821e-07, "loss": 0.36172745, "memory(GiB)": 34.88, "step": 119495, "train_speed(iter/s)": 0.412128 }, { "acc": 0.94358234, "epoch": 3.2355887688517044, "grad_norm": 4.308624744415283, "learning_rate": 9.665686831091423e-07, "loss": 0.29186008, "memory(GiB)": 34.88, "step": 119500, "train_speed(iter/s)": 0.412129 }, { "acc": 0.93990269, "epoch": 3.23572414913492, "grad_norm": 2.5790457725524902, "learning_rate": 9.662381555373776e-07, "loss": 0.30831089, "memory(GiB)": 34.88, "step": 119505, "train_speed(iter/s)": 0.41213 }, { "acc": 0.94948273, "epoch": 3.2358595294181356, "grad_norm": 4.46041202545166, "learning_rate": 9.659076785026724e-07, "loss": 0.27986031, "memory(GiB)": 34.88, "step": 119510, "train_speed(iter/s)": 0.412131 }, { "acc": 0.94663725, "epoch": 3.235994909701351, "grad_norm": 4.943734169006348, "learning_rate": 9.655772520091625e-07, "loss": 0.28127241, "memory(GiB)": 34.88, "step": 119515, "train_speed(iter/s)": 0.412132 }, { "acc": 0.95396671, "epoch": 3.2361302899845668, "grad_norm": 10.201696395874023, "learning_rate": 9.652468760609888e-07, "loss": 0.29892654, "memory(GiB)": 34.88, "step": 119520, "train_speed(iter/s)": 0.412133 }, { "acc": 0.9614789, "epoch": 3.236265670267782, "grad_norm": 4.488424777984619, "learning_rate": 9.649165506622914e-07, "loss": 0.21366386, "memory(GiB)": 34.88, "step": 119525, "train_speed(iter/s)": 0.412133 }, { "acc": 0.94345741, "epoch": 3.236401050550998, "grad_norm": 5.714992523193359, "learning_rate": 9.645862758172046e-07, "loss": 0.3714406, "memory(GiB)": 34.88, "step": 119530, "train_speed(iter/s)": 0.412134 }, { "acc": 0.94611015, "epoch": 3.2365364308342133, "grad_norm": 6.665555953979492, "learning_rate": 9.642560515298695e-07, "loss": 0.37870393, "memory(GiB)": 34.88, "step": 119535, "train_speed(iter/s)": 0.412135 }, { "acc": 0.93005409, "epoch": 3.2366718111174286, "grad_norm": 7.878835201263428, "learning_rate": 9.639258778044182e-07, "loss": 0.41499786, "memory(GiB)": 34.88, "step": 119540, "train_speed(iter/s)": 0.412136 }, { "acc": 0.94733276, "epoch": 3.2368071914006444, "grad_norm": 10.182538986206055, "learning_rate": 9.635957546449923e-07, "loss": 0.29652996, "memory(GiB)": 34.88, "step": 119545, "train_speed(iter/s)": 0.412137 }, { "acc": 0.93137665, "epoch": 3.23694257168386, "grad_norm": 14.13627815246582, "learning_rate": 9.63265682055724e-07, "loss": 0.42589111, "memory(GiB)": 34.88, "step": 119550, "train_speed(iter/s)": 0.412138 }, { "acc": 0.92868261, "epoch": 3.2370779519670756, "grad_norm": 6.886261940002441, "learning_rate": 9.629356600407477e-07, "loss": 0.42957869, "memory(GiB)": 34.88, "step": 119555, "train_speed(iter/s)": 0.412139 }, { "acc": 0.93996115, "epoch": 3.237213332250291, "grad_norm": 3.389312982559204, "learning_rate": 9.626056886042e-07, "loss": 0.33774304, "memory(GiB)": 34.88, "step": 119560, "train_speed(iter/s)": 0.41214 }, { "acc": 0.94867496, "epoch": 3.2373487125335068, "grad_norm": 9.72293758392334, "learning_rate": 9.62275767750212e-07, "loss": 0.326758, "memory(GiB)": 34.88, "step": 119565, "train_speed(iter/s)": 0.412141 }, { "acc": 0.94199009, "epoch": 3.237484092816722, "grad_norm": 7.769237041473389, "learning_rate": 9.619458974829191e-07, "loss": 0.30703747, "memory(GiB)": 34.88, "step": 119570, "train_speed(iter/s)": 0.412142 }, { "acc": 0.96061945, "epoch": 3.237619473099938, "grad_norm": 3.316218137741089, "learning_rate": 9.61616077806449e-07, "loss": 0.25431337, "memory(GiB)": 34.88, "step": 119575, "train_speed(iter/s)": 0.412143 }, { "acc": 0.94232836, "epoch": 3.2377548533831533, "grad_norm": 2.902949571609497, "learning_rate": 9.612863087249392e-07, "loss": 0.35342188, "memory(GiB)": 34.88, "step": 119580, "train_speed(iter/s)": 0.412144 }, { "acc": 0.93744164, "epoch": 3.2378902336663686, "grad_norm": 4.683130741119385, "learning_rate": 9.609565902425166e-07, "loss": 0.40234289, "memory(GiB)": 34.88, "step": 119585, "train_speed(iter/s)": 0.412144 }, { "acc": 0.93104172, "epoch": 3.2380256139495844, "grad_norm": 14.23076057434082, "learning_rate": 9.60626922363313e-07, "loss": 0.44835629, "memory(GiB)": 34.88, "step": 119590, "train_speed(iter/s)": 0.412145 }, { "acc": 0.9542141, "epoch": 3.2381609942328, "grad_norm": 3.9912779331207275, "learning_rate": 9.602973050914578e-07, "loss": 0.23671746, "memory(GiB)": 34.88, "step": 119595, "train_speed(iter/s)": 0.412146 }, { "acc": 0.95210409, "epoch": 3.2382963745160156, "grad_norm": 10.792458534240723, "learning_rate": 9.59967738431082e-07, "loss": 0.26188242, "memory(GiB)": 34.88, "step": 119600, "train_speed(iter/s)": 0.412147 }, { "acc": 0.93790436, "epoch": 3.238431754799231, "grad_norm": 6.846380233764648, "learning_rate": 9.596382223863123e-07, "loss": 0.35316944, "memory(GiB)": 34.88, "step": 119605, "train_speed(iter/s)": 0.412148 }, { "acc": 0.92453251, "epoch": 3.2385671350824468, "grad_norm": 5.504394054412842, "learning_rate": 9.593087569612734e-07, "loss": 0.38199568, "memory(GiB)": 34.88, "step": 119610, "train_speed(iter/s)": 0.412149 }, { "acc": 0.93535166, "epoch": 3.238702515365662, "grad_norm": 3.678133726119995, "learning_rate": 9.589793421600993e-07, "loss": 0.41318693, "memory(GiB)": 34.88, "step": 119615, "train_speed(iter/s)": 0.41215 }, { "acc": 0.94205303, "epoch": 3.238837895648878, "grad_norm": 8.501132011413574, "learning_rate": 9.586499779869113e-07, "loss": 0.30877132, "memory(GiB)": 34.88, "step": 119620, "train_speed(iter/s)": 0.41215 }, { "acc": 0.94291935, "epoch": 3.2389732759320933, "grad_norm": 13.967303276062012, "learning_rate": 9.583206644458381e-07, "loss": 0.30645299, "memory(GiB)": 34.88, "step": 119625, "train_speed(iter/s)": 0.412151 }, { "acc": 0.9524087, "epoch": 3.2391086562153086, "grad_norm": 5.54380464553833, "learning_rate": 9.579914015410017e-07, "loss": 0.27114027, "memory(GiB)": 34.88, "step": 119630, "train_speed(iter/s)": 0.412152 }, { "acc": 0.93872585, "epoch": 3.2392440364985244, "grad_norm": 4.9072699546813965, "learning_rate": 9.576621892765314e-07, "loss": 0.36130996, "memory(GiB)": 34.88, "step": 119635, "train_speed(iter/s)": 0.412153 }, { "acc": 0.93160133, "epoch": 3.23937941678174, "grad_norm": 6.0237579345703125, "learning_rate": 9.573330276565481e-07, "loss": 0.45182281, "memory(GiB)": 34.88, "step": 119640, "train_speed(iter/s)": 0.412154 }, { "acc": 0.94457998, "epoch": 3.2395147970649556, "grad_norm": 8.746914863586426, "learning_rate": 9.57003916685175e-07, "loss": 0.30333891, "memory(GiB)": 34.88, "step": 119645, "train_speed(iter/s)": 0.412155 }, { "acc": 0.95349483, "epoch": 3.239650177348171, "grad_norm": 4.89607048034668, "learning_rate": 9.56674856366538e-07, "loss": 0.27500629, "memory(GiB)": 34.88, "step": 119650, "train_speed(iter/s)": 0.412156 }, { "acc": 0.9432745, "epoch": 3.2397855576313868, "grad_norm": 4.696980953216553, "learning_rate": 9.563458467047548e-07, "loss": 0.29309134, "memory(GiB)": 34.88, "step": 119655, "train_speed(iter/s)": 0.412157 }, { "acc": 0.93885841, "epoch": 3.239920937914602, "grad_norm": 37.48660659790039, "learning_rate": 9.56016887703951e-07, "loss": 0.39287701, "memory(GiB)": 34.88, "step": 119660, "train_speed(iter/s)": 0.412158 }, { "acc": 0.94262943, "epoch": 3.2400563181978175, "grad_norm": 11.187032699584961, "learning_rate": 9.556879793682428e-07, "loss": 0.31807733, "memory(GiB)": 34.88, "step": 119665, "train_speed(iter/s)": 0.412158 }, { "acc": 0.93862486, "epoch": 3.2401916984810333, "grad_norm": 13.051998138427734, "learning_rate": 9.553591217017562e-07, "loss": 0.39145012, "memory(GiB)": 34.88, "step": 119670, "train_speed(iter/s)": 0.412159 }, { "acc": 0.94017239, "epoch": 3.2403270787642486, "grad_norm": 6.996893882751465, "learning_rate": 9.550303147086062e-07, "loss": 0.35658822, "memory(GiB)": 34.88, "step": 119675, "train_speed(iter/s)": 0.41216 }, { "acc": 0.94110289, "epoch": 3.2404624590474644, "grad_norm": 8.613439559936523, "learning_rate": 9.54701558392913e-07, "loss": 0.39115131, "memory(GiB)": 34.88, "step": 119680, "train_speed(iter/s)": 0.412161 }, { "acc": 0.94163475, "epoch": 3.24059783933068, "grad_norm": 5.810648441314697, "learning_rate": 9.54372852758796e-07, "loss": 0.30291374, "memory(GiB)": 34.88, "step": 119685, "train_speed(iter/s)": 0.412162 }, { "acc": 0.93517094, "epoch": 3.2407332196138956, "grad_norm": 17.54920768737793, "learning_rate": 9.540441978103727e-07, "loss": 0.45237513, "memory(GiB)": 34.88, "step": 119690, "train_speed(iter/s)": 0.412163 }, { "acc": 0.94191322, "epoch": 3.240868599897111, "grad_norm": 5.6798415184021, "learning_rate": 9.5371559355176e-07, "loss": 0.36678114, "memory(GiB)": 34.88, "step": 119695, "train_speed(iter/s)": 0.412164 }, { "acc": 0.92838793, "epoch": 3.2410039801803263, "grad_norm": 6.576321125030518, "learning_rate": 9.533870399870714e-07, "loss": 0.44448223, "memory(GiB)": 34.88, "step": 119700, "train_speed(iter/s)": 0.412165 }, { "acc": 0.9460434, "epoch": 3.241139360463542, "grad_norm": 8.196481704711914, "learning_rate": 9.530585371204275e-07, "loss": 0.34255018, "memory(GiB)": 34.88, "step": 119705, "train_speed(iter/s)": 0.412165 }, { "acc": 0.94685822, "epoch": 3.2412747407467575, "grad_norm": 3.0927557945251465, "learning_rate": 9.527300849559401e-07, "loss": 0.3369554, "memory(GiB)": 34.88, "step": 119710, "train_speed(iter/s)": 0.412166 }, { "acc": 0.95109482, "epoch": 3.2414101210299733, "grad_norm": 3.054403781890869, "learning_rate": 9.524016834977245e-07, "loss": 0.25956407, "memory(GiB)": 34.88, "step": 119715, "train_speed(iter/s)": 0.412167 }, { "acc": 0.93229294, "epoch": 3.2415455013131886, "grad_norm": 3.559278964996338, "learning_rate": 9.520733327498948e-07, "loss": 0.43683538, "memory(GiB)": 34.88, "step": 119720, "train_speed(iter/s)": 0.412168 }, { "acc": 0.93899994, "epoch": 3.2416808815964044, "grad_norm": 29.115022659301758, "learning_rate": 9.517450327165654e-07, "loss": 0.44976463, "memory(GiB)": 34.88, "step": 119725, "train_speed(iter/s)": 0.412169 }, { "acc": 0.95386648, "epoch": 3.24181626187962, "grad_norm": 4.4561238288879395, "learning_rate": 9.514167834018485e-07, "loss": 0.29398041, "memory(GiB)": 34.88, "step": 119730, "train_speed(iter/s)": 0.412169 }, { "acc": 0.94840517, "epoch": 3.2419516421628356, "grad_norm": 4.5710601806640625, "learning_rate": 9.51088584809852e-07, "loss": 0.28273544, "memory(GiB)": 34.88, "step": 119735, "train_speed(iter/s)": 0.41217 }, { "acc": 0.9450079, "epoch": 3.242087022446051, "grad_norm": 12.829025268554688, "learning_rate": 9.507604369446935e-07, "loss": 0.37674167, "memory(GiB)": 34.88, "step": 119740, "train_speed(iter/s)": 0.412171 }, { "acc": 0.94256735, "epoch": 3.2422224027292663, "grad_norm": 7.179133892059326, "learning_rate": 9.504323398104791e-07, "loss": 0.32922328, "memory(GiB)": 34.88, "step": 119745, "train_speed(iter/s)": 0.412172 }, { "acc": 0.93288975, "epoch": 3.242357783012482, "grad_norm": 7.979615211486816, "learning_rate": 9.501042934113212e-07, "loss": 0.41875682, "memory(GiB)": 34.88, "step": 119750, "train_speed(iter/s)": 0.412173 }, { "acc": 0.95844803, "epoch": 3.2424931632956975, "grad_norm": 3.240565538406372, "learning_rate": 9.497762977513276e-07, "loss": 0.21217318, "memory(GiB)": 34.88, "step": 119755, "train_speed(iter/s)": 0.412174 }, { "acc": 0.95542088, "epoch": 3.2426285435789133, "grad_norm": 4.570001125335693, "learning_rate": 9.494483528346103e-07, "loss": 0.23812177, "memory(GiB)": 34.88, "step": 119760, "train_speed(iter/s)": 0.412175 }, { "acc": 0.94432192, "epoch": 3.2427639238621286, "grad_norm": 11.371901512145996, "learning_rate": 9.491204586652733e-07, "loss": 0.35344005, "memory(GiB)": 34.88, "step": 119765, "train_speed(iter/s)": 0.412176 }, { "acc": 0.94297981, "epoch": 3.2428993041453444, "grad_norm": 4.743199348449707, "learning_rate": 9.487926152474263e-07, "loss": 0.34995522, "memory(GiB)": 34.88, "step": 119770, "train_speed(iter/s)": 0.412177 }, { "acc": 0.9391552, "epoch": 3.24303468442856, "grad_norm": 6.612402439117432, "learning_rate": 9.484648225851765e-07, "loss": 0.32838001, "memory(GiB)": 34.88, "step": 119775, "train_speed(iter/s)": 0.412178 }, { "acc": 0.94898405, "epoch": 3.2431700647117756, "grad_norm": 5.292807102203369, "learning_rate": 9.481370806826288e-07, "loss": 0.27758944, "memory(GiB)": 34.88, "step": 119780, "train_speed(iter/s)": 0.412178 }, { "acc": 0.93319731, "epoch": 3.243305444994991, "grad_norm": 6.21644401550293, "learning_rate": 9.478093895438896e-07, "loss": 0.41735821, "memory(GiB)": 34.88, "step": 119785, "train_speed(iter/s)": 0.412179 }, { "acc": 0.93649797, "epoch": 3.2434408252782063, "grad_norm": 8.47667407989502, "learning_rate": 9.474817491730637e-07, "loss": 0.40109663, "memory(GiB)": 34.88, "step": 119790, "train_speed(iter/s)": 0.41218 }, { "acc": 0.9558363, "epoch": 3.243576205561422, "grad_norm": 12.328730583190918, "learning_rate": 9.471541595742565e-07, "loss": 0.28977628, "memory(GiB)": 34.88, "step": 119795, "train_speed(iter/s)": 0.412181 }, { "acc": 0.9438694, "epoch": 3.2437115858446375, "grad_norm": 6.977553844451904, "learning_rate": 9.468266207515701e-07, "loss": 0.31134353, "memory(GiB)": 34.88, "step": 119800, "train_speed(iter/s)": 0.412182 }, { "acc": 0.94593182, "epoch": 3.2438469661278533, "grad_norm": 11.995627403259277, "learning_rate": 9.464991327091076e-07, "loss": 0.33754597, "memory(GiB)": 34.88, "step": 119805, "train_speed(iter/s)": 0.412183 }, { "acc": 0.95219622, "epoch": 3.2439823464110686, "grad_norm": 4.806017875671387, "learning_rate": 9.461716954509728e-07, "loss": 0.27061791, "memory(GiB)": 34.88, "step": 119810, "train_speed(iter/s)": 0.412184 }, { "acc": 0.94915447, "epoch": 3.2441177266942844, "grad_norm": 13.75229549407959, "learning_rate": 9.458443089812675e-07, "loss": 0.28440018, "memory(GiB)": 34.88, "step": 119815, "train_speed(iter/s)": 0.412184 }, { "acc": 0.94416685, "epoch": 3.2442531069775, "grad_norm": 5.113616943359375, "learning_rate": 9.455169733040907e-07, "loss": 0.31553252, "memory(GiB)": 34.88, "step": 119820, "train_speed(iter/s)": 0.412185 }, { "acc": 0.95519495, "epoch": 3.244388487260715, "grad_norm": 6.4295654296875, "learning_rate": 9.451896884235449e-07, "loss": 0.24849281, "memory(GiB)": 34.88, "step": 119825, "train_speed(iter/s)": 0.412186 }, { "acc": 0.93169365, "epoch": 3.244523867543931, "grad_norm": 5.624327659606934, "learning_rate": 9.448624543437304e-07, "loss": 0.3486325, "memory(GiB)": 34.88, "step": 119830, "train_speed(iter/s)": 0.412187 }, { "acc": 0.94517746, "epoch": 3.2446592478271463, "grad_norm": 4.92576789855957, "learning_rate": 9.44535271068744e-07, "loss": 0.34496055, "memory(GiB)": 34.88, "step": 119835, "train_speed(iter/s)": 0.412188 }, { "acc": 0.94316187, "epoch": 3.244794628110362, "grad_norm": 15.394293785095215, "learning_rate": 9.442081386026864e-07, "loss": 0.36532993, "memory(GiB)": 34.88, "step": 119840, "train_speed(iter/s)": 0.412189 }, { "acc": 0.93009558, "epoch": 3.2449300083935775, "grad_norm": 8.029090881347656, "learning_rate": 9.438810569496542e-07, "loss": 0.41559868, "memory(GiB)": 34.88, "step": 119845, "train_speed(iter/s)": 0.412189 }, { "acc": 0.9467988, "epoch": 3.2450653886767933, "grad_norm": 13.229318618774414, "learning_rate": 9.435540261137471e-07, "loss": 0.27305946, "memory(GiB)": 34.88, "step": 119850, "train_speed(iter/s)": 0.41219 }, { "acc": 0.94823875, "epoch": 3.2452007689600086, "grad_norm": 10.620621681213379, "learning_rate": 9.432270460990588e-07, "loss": 0.28552217, "memory(GiB)": 34.88, "step": 119855, "train_speed(iter/s)": 0.412191 }, { "acc": 0.95293217, "epoch": 3.245336149243224, "grad_norm": 6.3703932762146, "learning_rate": 9.429001169096872e-07, "loss": 0.28564138, "memory(GiB)": 34.88, "step": 119860, "train_speed(iter/s)": 0.412192 }, { "acc": 0.95216656, "epoch": 3.24547152952644, "grad_norm": 3.95794415473938, "learning_rate": 9.425732385497285e-07, "loss": 0.25359726, "memory(GiB)": 34.88, "step": 119865, "train_speed(iter/s)": 0.412193 }, { "acc": 0.95921078, "epoch": 3.245606909809655, "grad_norm": 2.631434202194214, "learning_rate": 9.422464110232747e-07, "loss": 0.25874708, "memory(GiB)": 34.88, "step": 119870, "train_speed(iter/s)": 0.412194 }, { "acc": 0.94653721, "epoch": 3.245742290092871, "grad_norm": 8.466771125793457, "learning_rate": 9.419196343344215e-07, "loss": 0.29180746, "memory(GiB)": 34.88, "step": 119875, "train_speed(iter/s)": 0.412195 }, { "acc": 0.93416662, "epoch": 3.2458776703760863, "grad_norm": 7.811756134033203, "learning_rate": 9.415929084872628e-07, "loss": 0.43174491, "memory(GiB)": 34.88, "step": 119880, "train_speed(iter/s)": 0.412196 }, { "acc": 0.95002995, "epoch": 3.246013050659302, "grad_norm": 4.779163360595703, "learning_rate": 9.412662334858921e-07, "loss": 0.29109521, "memory(GiB)": 34.88, "step": 119885, "train_speed(iter/s)": 0.412196 }, { "acc": 0.94716988, "epoch": 3.2461484309425175, "grad_norm": 4.910677909851074, "learning_rate": 9.409396093343999e-07, "loss": 0.26308289, "memory(GiB)": 34.88, "step": 119890, "train_speed(iter/s)": 0.412197 }, { "acc": 0.96133308, "epoch": 3.2462838112257333, "grad_norm": 6.414046287536621, "learning_rate": 9.406130360368781e-07, "loss": 0.24335599, "memory(GiB)": 34.88, "step": 119895, "train_speed(iter/s)": 0.412198 }, { "acc": 0.94073811, "epoch": 3.2464191915089486, "grad_norm": 13.326408386230469, "learning_rate": 9.4028651359742e-07, "loss": 0.42486219, "memory(GiB)": 34.88, "step": 119900, "train_speed(iter/s)": 0.412199 }, { "acc": 0.93610611, "epoch": 3.246554571792164, "grad_norm": 7.659241676330566, "learning_rate": 9.399600420201122e-07, "loss": 0.41286211, "memory(GiB)": 34.88, "step": 119905, "train_speed(iter/s)": 0.4122 }, { "acc": 0.94521408, "epoch": 3.24668995207538, "grad_norm": 4.710710525512695, "learning_rate": 9.396336213090465e-07, "loss": 0.30040174, "memory(GiB)": 34.88, "step": 119910, "train_speed(iter/s)": 0.4122 }, { "acc": 0.9338644, "epoch": 3.246825332358595, "grad_norm": 8.841435432434082, "learning_rate": 9.393072514683123e-07, "loss": 0.41540594, "memory(GiB)": 34.88, "step": 119915, "train_speed(iter/s)": 0.412201 }, { "acc": 0.94035702, "epoch": 3.246960712641811, "grad_norm": 10.264702796936035, "learning_rate": 9.389809325019984e-07, "loss": 0.34983468, "memory(GiB)": 34.88, "step": 119920, "train_speed(iter/s)": 0.412202 }, { "acc": 0.94048519, "epoch": 3.2470960929250263, "grad_norm": 36.475990295410156, "learning_rate": 9.386546644141902e-07, "loss": 0.37193737, "memory(GiB)": 34.88, "step": 119925, "train_speed(iter/s)": 0.412203 }, { "acc": 0.93824234, "epoch": 3.247231473208242, "grad_norm": 3.5245773792266846, "learning_rate": 9.383284472089768e-07, "loss": 0.40502234, "memory(GiB)": 34.88, "step": 119930, "train_speed(iter/s)": 0.412204 }, { "acc": 0.95338154, "epoch": 3.2473668534914575, "grad_norm": 2.909562826156616, "learning_rate": 9.380022808904438e-07, "loss": 0.24308872, "memory(GiB)": 34.88, "step": 119935, "train_speed(iter/s)": 0.412205 }, { "acc": 0.95047379, "epoch": 3.2475022337746733, "grad_norm": 7.449303150177002, "learning_rate": 9.376761654626794e-07, "loss": 0.2823544, "memory(GiB)": 34.88, "step": 119940, "train_speed(iter/s)": 0.412206 }, { "acc": 0.94118748, "epoch": 3.2476376140578886, "grad_norm": 11.18589973449707, "learning_rate": 9.373501009297653e-07, "loss": 0.42104235, "memory(GiB)": 34.88, "step": 119945, "train_speed(iter/s)": 0.412206 }, { "acc": 0.95343266, "epoch": 3.247772994341104, "grad_norm": 9.671931266784668, "learning_rate": 9.370240872957874e-07, "loss": 0.24363163, "memory(GiB)": 34.88, "step": 119950, "train_speed(iter/s)": 0.412207 }, { "acc": 0.94363031, "epoch": 3.24790837462432, "grad_norm": 5.505200386047363, "learning_rate": 9.36698124564832e-07, "loss": 0.34693725, "memory(GiB)": 34.88, "step": 119955, "train_speed(iter/s)": 0.412208 }, { "acc": 0.95698223, "epoch": 3.248043754907535, "grad_norm": 10.27418041229248, "learning_rate": 9.363722127409787e-07, "loss": 0.22700891, "memory(GiB)": 34.88, "step": 119960, "train_speed(iter/s)": 0.412209 }, { "acc": 0.94205284, "epoch": 3.248179135190751, "grad_norm": 6.15123987197876, "learning_rate": 9.360463518283121e-07, "loss": 0.3533987, "memory(GiB)": 34.88, "step": 119965, "train_speed(iter/s)": 0.41221 }, { "acc": 0.95253248, "epoch": 3.2483145154739663, "grad_norm": 6.333643913269043, "learning_rate": 9.357205418309143e-07, "loss": 0.27772512, "memory(GiB)": 34.88, "step": 119970, "train_speed(iter/s)": 0.412211 }, { "acc": 0.94002647, "epoch": 3.248449895757182, "grad_norm": 21.610843658447266, "learning_rate": 9.353947827528675e-07, "loss": 0.32860641, "memory(GiB)": 34.88, "step": 119975, "train_speed(iter/s)": 0.412211 }, { "acc": 0.95478439, "epoch": 3.2485852760403975, "grad_norm": 9.009593963623047, "learning_rate": 9.350690745982499e-07, "loss": 0.26329994, "memory(GiB)": 34.88, "step": 119980, "train_speed(iter/s)": 0.412212 }, { "acc": 0.93019009, "epoch": 3.248720656323613, "grad_norm": 7.6935553550720215, "learning_rate": 9.347434173711438e-07, "loss": 0.38327529, "memory(GiB)": 34.88, "step": 119985, "train_speed(iter/s)": 0.412213 }, { "acc": 0.95836811, "epoch": 3.2488560366068286, "grad_norm": 3.5023202896118164, "learning_rate": 9.344178110756288e-07, "loss": 0.27023304, "memory(GiB)": 34.88, "step": 119990, "train_speed(iter/s)": 0.412214 }, { "acc": 0.9531086, "epoch": 3.248991416890044, "grad_norm": 3.6684560775756836, "learning_rate": 9.340922557157821e-07, "loss": 0.27201066, "memory(GiB)": 34.88, "step": 119995, "train_speed(iter/s)": 0.412215 }, { "acc": 0.96010342, "epoch": 3.24912679717326, "grad_norm": 7.321425437927246, "learning_rate": 9.33766751295683e-07, "loss": 0.25988078, "memory(GiB)": 34.88, "step": 120000, "train_speed(iter/s)": 0.412216 }, { "epoch": 3.24912679717326, "eval_acc": 0.6250454034699982, "eval_loss": 1.219205379486084, "eval_runtime": 1297.6493, "eval_samples_per_second": 66.509, "eval_steps_per_second": 2.079, "step": 120000 }, { "acc": 0.93569326, "epoch": 3.249262177456475, "grad_norm": 8.078091621398926, "learning_rate": 9.334412978194085e-07, "loss": 0.39509616, "memory(GiB)": 34.88, "step": 120005, "train_speed(iter/s)": 0.410353 }, { "acc": 0.94004936, "epoch": 3.249397557739691, "grad_norm": 45.13002395629883, "learning_rate": 9.331158952910378e-07, "loss": 0.39825027, "memory(GiB)": 34.88, "step": 120010, "train_speed(iter/s)": 0.410354 }, { "acc": 0.95593252, "epoch": 3.2495329380229063, "grad_norm": 12.285816192626953, "learning_rate": 9.327905437146429e-07, "loss": 0.21610365, "memory(GiB)": 34.88, "step": 120015, "train_speed(iter/s)": 0.410355 }, { "acc": 0.94218388, "epoch": 3.2496683183061217, "grad_norm": 13.50877857208252, "learning_rate": 9.324652430943042e-07, "loss": 0.34493992, "memory(GiB)": 34.88, "step": 120020, "train_speed(iter/s)": 0.410356 }, { "acc": 0.95020914, "epoch": 3.2498036985893375, "grad_norm": 7.089505195617676, "learning_rate": 9.321399934340944e-07, "loss": 0.31193171, "memory(GiB)": 34.88, "step": 120025, "train_speed(iter/s)": 0.410357 }, { "acc": 0.95622902, "epoch": 3.249939078872553, "grad_norm": 9.939009666442871, "learning_rate": 9.318147947380869e-07, "loss": 0.28387613, "memory(GiB)": 34.88, "step": 120030, "train_speed(iter/s)": 0.410358 }, { "acc": 0.93271599, "epoch": 3.2500744591557686, "grad_norm": 3.5833539962768555, "learning_rate": 9.314896470103562e-07, "loss": 0.39338489, "memory(GiB)": 34.88, "step": 120035, "train_speed(iter/s)": 0.410359 }, { "acc": 0.93236599, "epoch": 3.250209839438984, "grad_norm": 4.157802104949951, "learning_rate": 9.311645502549764e-07, "loss": 0.38646643, "memory(GiB)": 34.88, "step": 120040, "train_speed(iter/s)": 0.41036 }, { "acc": 0.93173103, "epoch": 3.2503452197222, "grad_norm": 7.0562424659729, "learning_rate": 9.308395044760203e-07, "loss": 0.38170471, "memory(GiB)": 34.88, "step": 120045, "train_speed(iter/s)": 0.410361 }, { "acc": 0.96007252, "epoch": 3.250480600005415, "grad_norm": 11.025006294250488, "learning_rate": 9.305145096775559e-07, "loss": 0.27453494, "memory(GiB)": 34.88, "step": 120050, "train_speed(iter/s)": 0.410362 }, { "acc": 0.95136738, "epoch": 3.2506159802886305, "grad_norm": 9.2842378616333, "learning_rate": 9.3018956586366e-07, "loss": 0.25757766, "memory(GiB)": 34.88, "step": 120055, "train_speed(iter/s)": 0.410363 }, { "acc": 0.94300642, "epoch": 3.2507513605718463, "grad_norm": 8.434749603271484, "learning_rate": 9.298646730383996e-07, "loss": 0.35467017, "memory(GiB)": 34.88, "step": 120060, "train_speed(iter/s)": 0.410364 }, { "acc": 0.94188843, "epoch": 3.2508867408550617, "grad_norm": 5.61526346206665, "learning_rate": 9.295398312058464e-07, "loss": 0.38680086, "memory(GiB)": 34.88, "step": 120065, "train_speed(iter/s)": 0.410365 }, { "acc": 0.94500856, "epoch": 3.2510221211382775, "grad_norm": 4.034435749053955, "learning_rate": 9.292150403700673e-07, "loss": 0.35493634, "memory(GiB)": 34.88, "step": 120070, "train_speed(iter/s)": 0.410366 }, { "acc": 0.94687958, "epoch": 3.251157501421493, "grad_norm": 4.3447723388671875, "learning_rate": 9.288903005351328e-07, "loss": 0.31108084, "memory(GiB)": 34.88, "step": 120075, "train_speed(iter/s)": 0.410367 }, { "acc": 0.95810881, "epoch": 3.2512928817047086, "grad_norm": 9.558932304382324, "learning_rate": 9.285656117051112e-07, "loss": 0.274772, "memory(GiB)": 34.88, "step": 120080, "train_speed(iter/s)": 0.410368 }, { "acc": 0.94562645, "epoch": 3.251428261987924, "grad_norm": 3.580258369445801, "learning_rate": 9.282409738840674e-07, "loss": 0.2984314, "memory(GiB)": 34.88, "step": 120085, "train_speed(iter/s)": 0.410369 }, { "acc": 0.9447628, "epoch": 3.25156364227114, "grad_norm": 13.715275764465332, "learning_rate": 9.279163870760718e-07, "loss": 0.29816978, "memory(GiB)": 34.88, "step": 120090, "train_speed(iter/s)": 0.41037 }, { "acc": 0.95763264, "epoch": 3.251699022554355, "grad_norm": 8.286913871765137, "learning_rate": 9.275918512851874e-07, "loss": 0.26335721, "memory(GiB)": 34.88, "step": 120095, "train_speed(iter/s)": 0.410371 }, { "acc": 0.94496441, "epoch": 3.251834402837571, "grad_norm": 6.376849174499512, "learning_rate": 9.272673665154823e-07, "loss": 0.34555161, "memory(GiB)": 34.88, "step": 120100, "train_speed(iter/s)": 0.410372 }, { "acc": 0.93800974, "epoch": 3.2519697831207863, "grad_norm": 6.393220901489258, "learning_rate": 9.269429327710179e-07, "loss": 0.45543809, "memory(GiB)": 34.88, "step": 120105, "train_speed(iter/s)": 0.410373 }, { "acc": 0.95820227, "epoch": 3.2521051634040017, "grad_norm": 4.3240509033203125, "learning_rate": 9.266185500558609e-07, "loss": 0.23818898, "memory(GiB)": 34.88, "step": 120110, "train_speed(iter/s)": 0.410374 }, { "acc": 0.94753523, "epoch": 3.2522405436872175, "grad_norm": 16.169885635375977, "learning_rate": 9.262942183740754e-07, "loss": 0.31393766, "memory(GiB)": 34.88, "step": 120115, "train_speed(iter/s)": 0.410374 }, { "acc": 0.93691006, "epoch": 3.252375923970433, "grad_norm": 11.053263664245605, "learning_rate": 9.259699377297206e-07, "loss": 0.40155959, "memory(GiB)": 34.88, "step": 120120, "train_speed(iter/s)": 0.410375 }, { "acc": 0.95280457, "epoch": 3.2525113042536486, "grad_norm": 4.113354206085205, "learning_rate": 9.256457081268634e-07, "loss": 0.34297366, "memory(GiB)": 34.88, "step": 120125, "train_speed(iter/s)": 0.410376 }, { "acc": 0.94958153, "epoch": 3.252646684536864, "grad_norm": 3.211594820022583, "learning_rate": 9.253215295695628e-07, "loss": 0.30124233, "memory(GiB)": 34.88, "step": 120130, "train_speed(iter/s)": 0.410377 }, { "acc": 0.93625679, "epoch": 3.25278206482008, "grad_norm": 6.5884270668029785, "learning_rate": 9.24997402061881e-07, "loss": 0.32527561, "memory(GiB)": 34.88, "step": 120135, "train_speed(iter/s)": 0.410378 }, { "acc": 0.93859825, "epoch": 3.252917445103295, "grad_norm": 6.062251567840576, "learning_rate": 9.246733256078749e-07, "loss": 0.36838694, "memory(GiB)": 34.88, "step": 120140, "train_speed(iter/s)": 0.410379 }, { "acc": 0.952384, "epoch": 3.2530528253865105, "grad_norm": 4.893354892730713, "learning_rate": 9.243493002116098e-07, "loss": 0.2694701, "memory(GiB)": 34.88, "step": 120145, "train_speed(iter/s)": 0.41038 }, { "acc": 0.95248613, "epoch": 3.2531882056697263, "grad_norm": 4.093545436859131, "learning_rate": 9.240253258771416e-07, "loss": 0.23715544, "memory(GiB)": 34.88, "step": 120150, "train_speed(iter/s)": 0.410381 }, { "acc": 0.92702847, "epoch": 3.2533235859529417, "grad_norm": 6.362197399139404, "learning_rate": 9.237014026085265e-07, "loss": 0.46498241, "memory(GiB)": 34.88, "step": 120155, "train_speed(iter/s)": 0.410382 }, { "acc": 0.93255272, "epoch": 3.2534589662361575, "grad_norm": 6.925117492675781, "learning_rate": 9.233775304098276e-07, "loss": 0.36901774, "memory(GiB)": 34.88, "step": 120160, "train_speed(iter/s)": 0.410383 }, { "acc": 0.9501565, "epoch": 3.253594346519373, "grad_norm": 4.944283962249756, "learning_rate": 9.230537092850972e-07, "loss": 0.28964281, "memory(GiB)": 34.88, "step": 120165, "train_speed(iter/s)": 0.410384 }, { "acc": 0.94945335, "epoch": 3.2537297268025887, "grad_norm": 10.025279998779297, "learning_rate": 9.227299392383957e-07, "loss": 0.304421, "memory(GiB)": 34.88, "step": 120170, "train_speed(iter/s)": 0.410385 }, { "acc": 0.94024124, "epoch": 3.253865107085804, "grad_norm": 6.111739158630371, "learning_rate": 9.224062202737739e-07, "loss": 0.46967058, "memory(GiB)": 34.88, "step": 120175, "train_speed(iter/s)": 0.410386 }, { "acc": 0.95661602, "epoch": 3.2540004873690194, "grad_norm": 5.538678169250488, "learning_rate": 9.22082552395293e-07, "loss": 0.25157828, "memory(GiB)": 34.88, "step": 120180, "train_speed(iter/s)": 0.410387 }, { "acc": 0.94355221, "epoch": 3.254135867652235, "grad_norm": 6.462876796722412, "learning_rate": 9.217589356070033e-07, "loss": 0.40144053, "memory(GiB)": 34.88, "step": 120185, "train_speed(iter/s)": 0.410388 }, { "acc": 0.94111328, "epoch": 3.2542712479354505, "grad_norm": 14.827414512634277, "learning_rate": 9.214353699129614e-07, "loss": 0.39495845, "memory(GiB)": 34.88, "step": 120190, "train_speed(iter/s)": 0.410389 }, { "acc": 0.94133911, "epoch": 3.2544066282186663, "grad_norm": 5.045154094696045, "learning_rate": 9.211118553172183e-07, "loss": 0.37127223, "memory(GiB)": 34.88, "step": 120195, "train_speed(iter/s)": 0.41039 }, { "acc": 0.93195419, "epoch": 3.2545420085018817, "grad_norm": 9.737871170043945, "learning_rate": 9.207883918238273e-07, "loss": 0.36444066, "memory(GiB)": 34.88, "step": 120200, "train_speed(iter/s)": 0.410391 }, { "acc": 0.96070967, "epoch": 3.2546773887850975, "grad_norm": 6.177015781402588, "learning_rate": 9.204649794368427e-07, "loss": 0.27333634, "memory(GiB)": 34.88, "step": 120205, "train_speed(iter/s)": 0.410392 }, { "acc": 0.9529644, "epoch": 3.254812769068313, "grad_norm": 13.278182983398438, "learning_rate": 9.201416181603107e-07, "loss": 0.29430234, "memory(GiB)": 34.88, "step": 120210, "train_speed(iter/s)": 0.410393 }, { "acc": 0.95008755, "epoch": 3.254948149351528, "grad_norm": 11.485518455505371, "learning_rate": 9.198183079982885e-07, "loss": 0.27638268, "memory(GiB)": 34.88, "step": 120215, "train_speed(iter/s)": 0.410394 }, { "acc": 0.94537745, "epoch": 3.255083529634744, "grad_norm": 5.837399482727051, "learning_rate": 9.19495048954821e-07, "loss": 0.31924956, "memory(GiB)": 34.88, "step": 120220, "train_speed(iter/s)": 0.410394 }, { "acc": 0.94733562, "epoch": 3.2552189099179594, "grad_norm": 8.216324806213379, "learning_rate": 9.191718410339617e-07, "loss": 0.35570464, "memory(GiB)": 34.88, "step": 120225, "train_speed(iter/s)": 0.410395 }, { "acc": 0.93812962, "epoch": 3.255354290201175, "grad_norm": 10.821260452270508, "learning_rate": 9.188486842397547e-07, "loss": 0.36940441, "memory(GiB)": 34.88, "step": 120230, "train_speed(iter/s)": 0.410396 }, { "acc": 0.94454365, "epoch": 3.2554896704843905, "grad_norm": 6.859078884124756, "learning_rate": 9.18525578576253e-07, "loss": 0.33936565, "memory(GiB)": 34.88, "step": 120235, "train_speed(iter/s)": 0.410397 }, { "acc": 0.94819746, "epoch": 3.2556250507676063, "grad_norm": 6.197649002075195, "learning_rate": 9.182025240475023e-07, "loss": 0.33464088, "memory(GiB)": 34.88, "step": 120240, "train_speed(iter/s)": 0.410398 }, { "acc": 0.93906555, "epoch": 3.2557604310508217, "grad_norm": 5.08472204208374, "learning_rate": 9.178795206575466e-07, "loss": 0.39681463, "memory(GiB)": 34.88, "step": 120245, "train_speed(iter/s)": 0.410399 }, { "acc": 0.96091919, "epoch": 3.2558958113340375, "grad_norm": 5.287158966064453, "learning_rate": 9.175565684104373e-07, "loss": 0.24626698, "memory(GiB)": 34.88, "step": 120250, "train_speed(iter/s)": 0.4104 }, { "acc": 0.93875446, "epoch": 3.256031191617253, "grad_norm": 7.6588053703308105, "learning_rate": 9.17233667310216e-07, "loss": 0.41666088, "memory(GiB)": 34.88, "step": 120255, "train_speed(iter/s)": 0.410401 }, { "acc": 0.94955149, "epoch": 3.2561665719004687, "grad_norm": 5.791749954223633, "learning_rate": 9.169108173609307e-07, "loss": 0.27370262, "memory(GiB)": 34.88, "step": 120260, "train_speed(iter/s)": 0.410402 }, { "acc": 0.94876595, "epoch": 3.256301952183684, "grad_norm": 7.241164684295654, "learning_rate": 9.165880185666214e-07, "loss": 0.32341244, "memory(GiB)": 34.88, "step": 120265, "train_speed(iter/s)": 0.410403 }, { "acc": 0.94011536, "epoch": 3.2564373324668994, "grad_norm": 14.424235343933105, "learning_rate": 9.162652709313371e-07, "loss": 0.34516339, "memory(GiB)": 34.88, "step": 120270, "train_speed(iter/s)": 0.410404 }, { "acc": 0.94574509, "epoch": 3.256572712750115, "grad_norm": 4.757030010223389, "learning_rate": 9.159425744591187e-07, "loss": 0.28035588, "memory(GiB)": 34.88, "step": 120275, "train_speed(iter/s)": 0.410405 }, { "acc": 0.95503502, "epoch": 3.2567080930333305, "grad_norm": 3.864297866821289, "learning_rate": 9.15619929154005e-07, "loss": 0.22122383, "memory(GiB)": 34.88, "step": 120280, "train_speed(iter/s)": 0.410406 }, { "acc": 0.93626623, "epoch": 3.2568434733165463, "grad_norm": 12.502761840820312, "learning_rate": 9.152973350200437e-07, "loss": 0.33472168, "memory(GiB)": 34.88, "step": 120285, "train_speed(iter/s)": 0.410407 }, { "acc": 0.94494572, "epoch": 3.2569788535997617, "grad_norm": 12.02054214477539, "learning_rate": 9.149747920612719e-07, "loss": 0.35341978, "memory(GiB)": 34.88, "step": 120290, "train_speed(iter/s)": 0.410408 }, { "acc": 0.94549046, "epoch": 3.2571142338829775, "grad_norm": 6.766211986541748, "learning_rate": 9.14652300281733e-07, "loss": 0.29798751, "memory(GiB)": 34.88, "step": 120295, "train_speed(iter/s)": 0.410409 }, { "acc": 0.95560102, "epoch": 3.257249614166193, "grad_norm": 4.546875, "learning_rate": 9.143298596854628e-07, "loss": 0.26378479, "memory(GiB)": 34.88, "step": 120300, "train_speed(iter/s)": 0.41041 }, { "acc": 0.9618022, "epoch": 3.257384994449408, "grad_norm": 6.407511234283447, "learning_rate": 9.140074702765045e-07, "loss": 0.18027258, "memory(GiB)": 34.88, "step": 120305, "train_speed(iter/s)": 0.410411 }, { "acc": 0.93390446, "epoch": 3.257520374732624, "grad_norm": 3.8391289710998535, "learning_rate": 9.136851320588944e-07, "loss": 0.34121695, "memory(GiB)": 34.88, "step": 120310, "train_speed(iter/s)": 0.410412 }, { "acc": 0.94964466, "epoch": 3.2576557550158394, "grad_norm": 4.240707874298096, "learning_rate": 9.133628450366711e-07, "loss": 0.30158401, "memory(GiB)": 34.88, "step": 120315, "train_speed(iter/s)": 0.410413 }, { "acc": 0.94254646, "epoch": 3.257791135299055, "grad_norm": 7.260497570037842, "learning_rate": 9.130406092138733e-07, "loss": 0.33663836, "memory(GiB)": 34.88, "step": 120320, "train_speed(iter/s)": 0.410414 }, { "acc": 0.96516037, "epoch": 3.2579265155822705, "grad_norm": 12.492188453674316, "learning_rate": 9.127184245945346e-07, "loss": 0.20813394, "memory(GiB)": 34.88, "step": 120325, "train_speed(iter/s)": 0.410415 }, { "acc": 0.94398518, "epoch": 3.2580618958654863, "grad_norm": 24.317201614379883, "learning_rate": 9.123962911826945e-07, "loss": 0.3767704, "memory(GiB)": 34.88, "step": 120330, "train_speed(iter/s)": 0.410416 }, { "acc": 0.94030819, "epoch": 3.2581972761487017, "grad_norm": 6.221701145172119, "learning_rate": 9.120742089823829e-07, "loss": 0.40174007, "memory(GiB)": 34.88, "step": 120335, "train_speed(iter/s)": 0.410417 }, { "acc": 0.95265274, "epoch": 3.258332656431917, "grad_norm": 5.169654369354248, "learning_rate": 9.117521779976415e-07, "loss": 0.29858718, "memory(GiB)": 34.88, "step": 120340, "train_speed(iter/s)": 0.410418 }, { "acc": 0.94984283, "epoch": 3.258468036715133, "grad_norm": 8.131525039672852, "learning_rate": 9.114301982324992e-07, "loss": 0.30132227, "memory(GiB)": 34.88, "step": 120345, "train_speed(iter/s)": 0.410419 }, { "acc": 0.95656404, "epoch": 3.258603416998348, "grad_norm": 5.838190078735352, "learning_rate": 9.111082696909916e-07, "loss": 0.22760987, "memory(GiB)": 34.88, "step": 120350, "train_speed(iter/s)": 0.41042 }, { "acc": 0.92692795, "epoch": 3.258738797281564, "grad_norm": 16.638933181762695, "learning_rate": 9.107863923771508e-07, "loss": 0.46839333, "memory(GiB)": 34.88, "step": 120355, "train_speed(iter/s)": 0.410421 }, { "acc": 0.92429428, "epoch": 3.2588741775647794, "grad_norm": 9.473953247070312, "learning_rate": 9.104645662950106e-07, "loss": 0.42079611, "memory(GiB)": 34.88, "step": 120360, "train_speed(iter/s)": 0.410421 }, { "acc": 0.95100336, "epoch": 3.259009557847995, "grad_norm": 6.972515106201172, "learning_rate": 9.101427914486005e-07, "loss": 0.31290216, "memory(GiB)": 34.88, "step": 120365, "train_speed(iter/s)": 0.410422 }, { "acc": 0.95466404, "epoch": 3.2591449381312105, "grad_norm": 9.927979469299316, "learning_rate": 9.098210678419498e-07, "loss": 0.29845059, "memory(GiB)": 34.88, "step": 120370, "train_speed(iter/s)": 0.410423 }, { "acc": 0.95108719, "epoch": 3.259280318414426, "grad_norm": 10.173836708068848, "learning_rate": 9.09499395479094e-07, "loss": 0.29321737, "memory(GiB)": 34.88, "step": 120375, "train_speed(iter/s)": 0.410424 }, { "acc": 0.94567928, "epoch": 3.2594156986976417, "grad_norm": 7.143346309661865, "learning_rate": 9.091777743640575e-07, "loss": 0.29783933, "memory(GiB)": 34.88, "step": 120380, "train_speed(iter/s)": 0.410425 }, { "acc": 0.94740295, "epoch": 3.259551078980857, "grad_norm": 4.3355231285095215, "learning_rate": 9.088562045008716e-07, "loss": 0.30157356, "memory(GiB)": 34.88, "step": 120385, "train_speed(iter/s)": 0.410426 }, { "acc": 0.94460773, "epoch": 3.259686459264073, "grad_norm": 5.687736988067627, "learning_rate": 9.085346858935645e-07, "loss": 0.34986174, "memory(GiB)": 34.88, "step": 120390, "train_speed(iter/s)": 0.410427 }, { "acc": 0.93388634, "epoch": 3.259821839547288, "grad_norm": 8.112177848815918, "learning_rate": 9.08213218546165e-07, "loss": 0.40228772, "memory(GiB)": 34.88, "step": 120395, "train_speed(iter/s)": 0.410428 }, { "acc": 0.93188877, "epoch": 3.259957219830504, "grad_norm": 6.438024520874023, "learning_rate": 9.078918024626985e-07, "loss": 0.42104101, "memory(GiB)": 34.88, "step": 120400, "train_speed(iter/s)": 0.410429 }, { "acc": 0.94765778, "epoch": 3.2600926001137194, "grad_norm": 5.900852203369141, "learning_rate": 9.075704376471894e-07, "loss": 0.28309903, "memory(GiB)": 34.88, "step": 120405, "train_speed(iter/s)": 0.41043 }, { "acc": 0.94344482, "epoch": 3.260227980396935, "grad_norm": 8.396322250366211, "learning_rate": 9.072491241036679e-07, "loss": 0.34189954, "memory(GiB)": 34.88, "step": 120410, "train_speed(iter/s)": 0.410431 }, { "acc": 0.93806725, "epoch": 3.2603633606801505, "grad_norm": 8.971013069152832, "learning_rate": 9.069278618361555e-07, "loss": 0.41567073, "memory(GiB)": 34.88, "step": 120415, "train_speed(iter/s)": 0.410432 }, { "acc": 0.94684219, "epoch": 3.2604987409633663, "grad_norm": 3.19801926612854, "learning_rate": 9.066066508486778e-07, "loss": 0.28026416, "memory(GiB)": 34.88, "step": 120420, "train_speed(iter/s)": 0.410433 }, { "acc": 0.94993315, "epoch": 3.2606341212465817, "grad_norm": 5.975661277770996, "learning_rate": 9.062854911452592e-07, "loss": 0.24859781, "memory(GiB)": 34.88, "step": 120425, "train_speed(iter/s)": 0.410434 }, { "acc": 0.93101206, "epoch": 3.260769501529797, "grad_norm": 12.841628074645996, "learning_rate": 9.059643827299236e-07, "loss": 0.46246505, "memory(GiB)": 34.88, "step": 120430, "train_speed(iter/s)": 0.410435 }, { "acc": 0.95775232, "epoch": 3.260904881813013, "grad_norm": 5.7768988609313965, "learning_rate": 9.056433256066914e-07, "loss": 0.23671994, "memory(GiB)": 34.88, "step": 120435, "train_speed(iter/s)": 0.410436 }, { "acc": 0.93164873, "epoch": 3.261040262096228, "grad_norm": 7.0751824378967285, "learning_rate": 9.053223197795847e-07, "loss": 0.45941792, "memory(GiB)": 34.88, "step": 120440, "train_speed(iter/s)": 0.410437 }, { "acc": 0.93527431, "epoch": 3.261175642379444, "grad_norm": 8.337669372558594, "learning_rate": 9.050013652526274e-07, "loss": 0.34792838, "memory(GiB)": 34.88, "step": 120445, "train_speed(iter/s)": 0.410438 }, { "acc": 0.95589085, "epoch": 3.2613110226626594, "grad_norm": 9.877891540527344, "learning_rate": 9.046804620298365e-07, "loss": 0.2749877, "memory(GiB)": 34.88, "step": 120450, "train_speed(iter/s)": 0.410439 }, { "acc": 0.93598881, "epoch": 3.261446402945875, "grad_norm": 6.735393047332764, "learning_rate": 9.043596101152341e-07, "loss": 0.35899894, "memory(GiB)": 34.88, "step": 120455, "train_speed(iter/s)": 0.41044 }, { "acc": 0.94446678, "epoch": 3.2615817832290905, "grad_norm": 14.502828598022461, "learning_rate": 9.040388095128385e-07, "loss": 0.34190073, "memory(GiB)": 34.88, "step": 120460, "train_speed(iter/s)": 0.410441 }, { "acc": 0.95295496, "epoch": 3.261717163512306, "grad_norm": 4.613864421844482, "learning_rate": 9.037180602266697e-07, "loss": 0.23141418, "memory(GiB)": 34.88, "step": 120465, "train_speed(iter/s)": 0.410442 }, { "acc": 0.94144373, "epoch": 3.2618525437955217, "grad_norm": 5.345813751220703, "learning_rate": 9.033973622607444e-07, "loss": 0.29064691, "memory(GiB)": 34.88, "step": 120470, "train_speed(iter/s)": 0.410443 }, { "acc": 0.95484161, "epoch": 3.261987924078737, "grad_norm": 3.446349859237671, "learning_rate": 9.030767156190799e-07, "loss": 0.26321182, "memory(GiB)": 34.88, "step": 120475, "train_speed(iter/s)": 0.410443 }, { "acc": 0.92864752, "epoch": 3.262123304361953, "grad_norm": 8.029234886169434, "learning_rate": 9.027561203056939e-07, "loss": 0.39645557, "memory(GiB)": 34.88, "step": 120480, "train_speed(iter/s)": 0.410444 }, { "acc": 0.9360918, "epoch": 3.262258684645168, "grad_norm": 4.462831497192383, "learning_rate": 9.024355763246031e-07, "loss": 0.38572335, "memory(GiB)": 34.88, "step": 120485, "train_speed(iter/s)": 0.410445 }, { "acc": 0.94605103, "epoch": 3.262394064928384, "grad_norm": 11.463383674621582, "learning_rate": 9.021150836798207e-07, "loss": 0.33896215, "memory(GiB)": 34.88, "step": 120490, "train_speed(iter/s)": 0.410446 }, { "acc": 0.95968933, "epoch": 3.2625294452115994, "grad_norm": 11.641175270080566, "learning_rate": 9.017946423753623e-07, "loss": 0.253756, "memory(GiB)": 34.88, "step": 120495, "train_speed(iter/s)": 0.410447 }, { "acc": 0.94285736, "epoch": 3.2626648254948147, "grad_norm": 6.370880603790283, "learning_rate": 9.01474252415244e-07, "loss": 0.3297574, "memory(GiB)": 34.88, "step": 120500, "train_speed(iter/s)": 0.410448 }, { "acc": 0.93660698, "epoch": 3.2628002057780305, "grad_norm": 6.667839527130127, "learning_rate": 9.011539138034763e-07, "loss": 0.40669537, "memory(GiB)": 34.88, "step": 120505, "train_speed(iter/s)": 0.410449 }, { "acc": 0.94925556, "epoch": 3.262935586061246, "grad_norm": 5.96385383605957, "learning_rate": 9.00833626544074e-07, "loss": 0.23090994, "memory(GiB)": 34.88, "step": 120510, "train_speed(iter/s)": 0.41045 }, { "acc": 0.93251114, "epoch": 3.2630709663444617, "grad_norm": 6.53316593170166, "learning_rate": 9.005133906410484e-07, "loss": 0.35391221, "memory(GiB)": 34.88, "step": 120515, "train_speed(iter/s)": 0.41045 }, { "acc": 0.95846319, "epoch": 3.263206346627677, "grad_norm": 3.401930570602417, "learning_rate": 9.001932060984129e-07, "loss": 0.23876061, "memory(GiB)": 34.88, "step": 120520, "train_speed(iter/s)": 0.410451 }, { "acc": 0.93776283, "epoch": 3.263341726910893, "grad_norm": 4.831873416900635, "learning_rate": 8.998730729201758e-07, "loss": 0.33984075, "memory(GiB)": 34.88, "step": 120525, "train_speed(iter/s)": 0.410452 }, { "acc": 0.939785, "epoch": 3.263477107194108, "grad_norm": 5.896942138671875, "learning_rate": 8.995529911103491e-07, "loss": 0.29553041, "memory(GiB)": 34.88, "step": 120530, "train_speed(iter/s)": 0.410453 }, { "acc": 0.95152149, "epoch": 3.2636124874773236, "grad_norm": 11.192992210388184, "learning_rate": 8.992329606729429e-07, "loss": 0.29461632, "memory(GiB)": 34.88, "step": 120535, "train_speed(iter/s)": 0.410454 }, { "acc": 0.94310608, "epoch": 3.2637478677605394, "grad_norm": 5.004087448120117, "learning_rate": 8.989129816119642e-07, "loss": 0.31237612, "memory(GiB)": 34.88, "step": 120540, "train_speed(iter/s)": 0.410455 }, { "acc": 0.93856993, "epoch": 3.2638832480437547, "grad_norm": 6.314690113067627, "learning_rate": 8.985930539314228e-07, "loss": 0.38284843, "memory(GiB)": 34.88, "step": 120545, "train_speed(iter/s)": 0.410456 }, { "acc": 0.94912024, "epoch": 3.2640186283269705, "grad_norm": 5.413853645324707, "learning_rate": 8.982731776353256e-07, "loss": 0.28152361, "memory(GiB)": 34.88, "step": 120550, "train_speed(iter/s)": 0.410457 }, { "acc": 0.93705273, "epoch": 3.264154008610186, "grad_norm": 8.263657569885254, "learning_rate": 8.979533527276817e-07, "loss": 0.41117201, "memory(GiB)": 34.88, "step": 120555, "train_speed(iter/s)": 0.410457 }, { "acc": 0.93705606, "epoch": 3.2642893888934017, "grad_norm": 6.890859127044678, "learning_rate": 8.976335792124946e-07, "loss": 0.35050178, "memory(GiB)": 34.88, "step": 120560, "train_speed(iter/s)": 0.410458 }, { "acc": 0.93866673, "epoch": 3.264424769176617, "grad_norm": 7.28524112701416, "learning_rate": 8.973138570937716e-07, "loss": 0.38155556, "memory(GiB)": 34.88, "step": 120565, "train_speed(iter/s)": 0.410459 }, { "acc": 0.94340611, "epoch": 3.264560149459833, "grad_norm": 7.440781116485596, "learning_rate": 8.969941863755195e-07, "loss": 0.31251888, "memory(GiB)": 34.88, "step": 120570, "train_speed(iter/s)": 0.41046 }, { "acc": 0.94658852, "epoch": 3.264695529743048, "grad_norm": 8.855162620544434, "learning_rate": 8.966745670617391e-07, "loss": 0.33151412, "memory(GiB)": 34.88, "step": 120575, "train_speed(iter/s)": 0.410461 }, { "acc": 0.94423771, "epoch": 3.264830910026264, "grad_norm": 5.176028728485107, "learning_rate": 8.963549991564367e-07, "loss": 0.34255331, "memory(GiB)": 34.88, "step": 120580, "train_speed(iter/s)": 0.410462 }, { "acc": 0.94746304, "epoch": 3.2649662903094794, "grad_norm": 13.016580581665039, "learning_rate": 8.96035482663615e-07, "loss": 0.30635636, "memory(GiB)": 34.88, "step": 120585, "train_speed(iter/s)": 0.410463 }, { "acc": 0.95056667, "epoch": 3.2651016705926947, "grad_norm": 12.570438385009766, "learning_rate": 8.957160175872782e-07, "loss": 0.3313509, "memory(GiB)": 34.88, "step": 120590, "train_speed(iter/s)": 0.410463 }, { "acc": 0.95519905, "epoch": 3.2652370508759105, "grad_norm": 4.678739070892334, "learning_rate": 8.953966039314246e-07, "loss": 0.29435186, "memory(GiB)": 34.88, "step": 120595, "train_speed(iter/s)": 0.410465 }, { "acc": 0.93932476, "epoch": 3.265372431159126, "grad_norm": 23.023025512695312, "learning_rate": 8.950772417000582e-07, "loss": 0.34396603, "memory(GiB)": 34.88, "step": 120600, "train_speed(iter/s)": 0.410465 }, { "acc": 0.95728102, "epoch": 3.2655078114423417, "grad_norm": 6.348836898803711, "learning_rate": 8.947579308971791e-07, "loss": 0.26377964, "memory(GiB)": 34.88, "step": 120605, "train_speed(iter/s)": 0.410466 }, { "acc": 0.95145607, "epoch": 3.265643191725557, "grad_norm": 4.435183525085449, "learning_rate": 8.944386715267882e-07, "loss": 0.25254948, "memory(GiB)": 34.88, "step": 120610, "train_speed(iter/s)": 0.410467 }, { "acc": 0.94421654, "epoch": 3.265778572008773, "grad_norm": 4.294358253479004, "learning_rate": 8.941194635928823e-07, "loss": 0.33242073, "memory(GiB)": 34.88, "step": 120615, "train_speed(iter/s)": 0.410468 }, { "acc": 0.95467434, "epoch": 3.265913952291988, "grad_norm": 7.823472023010254, "learning_rate": 8.93800307099462e-07, "loss": 0.28868861, "memory(GiB)": 34.88, "step": 120620, "train_speed(iter/s)": 0.410469 }, { "acc": 0.95347595, "epoch": 3.2660493325752036, "grad_norm": 1.6557201147079468, "learning_rate": 8.934812020505261e-07, "loss": 0.32760396, "memory(GiB)": 34.88, "step": 120625, "train_speed(iter/s)": 0.41047 }, { "acc": 0.94754601, "epoch": 3.2661847128584194, "grad_norm": 6.335041522979736, "learning_rate": 8.931621484500697e-07, "loss": 0.29820662, "memory(GiB)": 34.88, "step": 120630, "train_speed(iter/s)": 0.410471 }, { "acc": 0.94987965, "epoch": 3.2663200931416347, "grad_norm": 11.581855773925781, "learning_rate": 8.92843146302091e-07, "loss": 0.33803098, "memory(GiB)": 34.88, "step": 120635, "train_speed(iter/s)": 0.410471 }, { "acc": 0.94230289, "epoch": 3.2664554734248505, "grad_norm": 10.351421356201172, "learning_rate": 8.925241956105858e-07, "loss": 0.33832002, "memory(GiB)": 34.88, "step": 120640, "train_speed(iter/s)": 0.410472 }, { "acc": 0.94116526, "epoch": 3.266590853708066, "grad_norm": 8.243739128112793, "learning_rate": 8.922052963795506e-07, "loss": 0.32911425, "memory(GiB)": 34.88, "step": 120645, "train_speed(iter/s)": 0.410473 }, { "acc": 0.94210014, "epoch": 3.2667262339912817, "grad_norm": 6.249955654144287, "learning_rate": 8.918864486129783e-07, "loss": 0.36628544, "memory(GiB)": 34.88, "step": 120650, "train_speed(iter/s)": 0.410474 }, { "acc": 0.94659023, "epoch": 3.266861614274497, "grad_norm": 8.350919723510742, "learning_rate": 8.915676523148646e-07, "loss": 0.32672939, "memory(GiB)": 34.88, "step": 120655, "train_speed(iter/s)": 0.410475 }, { "acc": 0.94480495, "epoch": 3.2669969945577124, "grad_norm": 67.00626373291016, "learning_rate": 8.912489074892029e-07, "loss": 0.36228445, "memory(GiB)": 34.88, "step": 120660, "train_speed(iter/s)": 0.410476 }, { "acc": 0.93991795, "epoch": 3.267132374840928, "grad_norm": 10.246543884277344, "learning_rate": 8.909302141399856e-07, "loss": 0.35901165, "memory(GiB)": 34.88, "step": 120665, "train_speed(iter/s)": 0.410477 }, { "acc": 0.92736702, "epoch": 3.2672677551241436, "grad_norm": 7.036537170410156, "learning_rate": 8.906115722712043e-07, "loss": 0.4218379, "memory(GiB)": 34.88, "step": 120670, "train_speed(iter/s)": 0.410478 }, { "acc": 0.94477863, "epoch": 3.2674031354073594, "grad_norm": 12.167933464050293, "learning_rate": 8.902929818868522e-07, "loss": 0.31985455, "memory(GiB)": 34.88, "step": 120675, "train_speed(iter/s)": 0.410478 }, { "acc": 0.94289989, "epoch": 3.2675385156905747, "grad_norm": 9.600250244140625, "learning_rate": 8.899744429909202e-07, "loss": 0.34535892, "memory(GiB)": 34.88, "step": 120680, "train_speed(iter/s)": 0.410479 }, { "acc": 0.92997322, "epoch": 3.2676738959737905, "grad_norm": 7.14644718170166, "learning_rate": 8.896559555873957e-07, "loss": 0.39473181, "memory(GiB)": 34.88, "step": 120685, "train_speed(iter/s)": 0.41048 }, { "acc": 0.93473082, "epoch": 3.267809276257006, "grad_norm": 7.111518383026123, "learning_rate": 8.893375196802735e-07, "loss": 0.43790126, "memory(GiB)": 34.88, "step": 120690, "train_speed(iter/s)": 0.410481 }, { "acc": 0.9351119, "epoch": 3.2679446565402213, "grad_norm": 12.562003135681152, "learning_rate": 8.890191352735391e-07, "loss": 0.44279881, "memory(GiB)": 34.88, "step": 120695, "train_speed(iter/s)": 0.410482 }, { "acc": 0.93975754, "epoch": 3.268080036823437, "grad_norm": 4.844443321228027, "learning_rate": 8.887008023711808e-07, "loss": 0.35035033, "memory(GiB)": 34.88, "step": 120700, "train_speed(iter/s)": 0.410483 }, { "acc": 0.95321016, "epoch": 3.2682154171066524, "grad_norm": 3.569894552230835, "learning_rate": 8.883825209771867e-07, "loss": 0.27252345, "memory(GiB)": 34.88, "step": 120705, "train_speed(iter/s)": 0.410484 }, { "acc": 0.94411097, "epoch": 3.2683507973898682, "grad_norm": 9.161338806152344, "learning_rate": 8.880642910955452e-07, "loss": 0.30117226, "memory(GiB)": 34.88, "step": 120710, "train_speed(iter/s)": 0.410485 }, { "acc": 0.94420242, "epoch": 3.2684861776730836, "grad_norm": 11.420541763305664, "learning_rate": 8.877461127302424e-07, "loss": 0.38272038, "memory(GiB)": 34.88, "step": 120715, "train_speed(iter/s)": 0.410485 }, { "acc": 0.94361382, "epoch": 3.2686215579562994, "grad_norm": 7.911701202392578, "learning_rate": 8.87427985885261e-07, "loss": 0.35891547, "memory(GiB)": 34.88, "step": 120720, "train_speed(iter/s)": 0.410486 }, { "acc": 0.93563766, "epoch": 3.2687569382395147, "grad_norm": 10.562230110168457, "learning_rate": 8.871099105645918e-07, "loss": 0.37807627, "memory(GiB)": 34.88, "step": 120725, "train_speed(iter/s)": 0.410487 }, { "acc": 0.93143711, "epoch": 3.26889231852273, "grad_norm": 5.331934452056885, "learning_rate": 8.867918867722144e-07, "loss": 0.40551176, "memory(GiB)": 34.88, "step": 120730, "train_speed(iter/s)": 0.410488 }, { "acc": 0.9552865, "epoch": 3.269027698805946, "grad_norm": 6.049151420593262, "learning_rate": 8.86473914512116e-07, "loss": 0.25301523, "memory(GiB)": 34.88, "step": 120735, "train_speed(iter/s)": 0.410489 }, { "acc": 0.94694176, "epoch": 3.2691630790891613, "grad_norm": 12.568306922912598, "learning_rate": 8.861559937882776e-07, "loss": 0.27322178, "memory(GiB)": 34.88, "step": 120740, "train_speed(iter/s)": 0.41049 }, { "acc": 0.93519325, "epoch": 3.269298459372377, "grad_norm": 18.38884162902832, "learning_rate": 8.858381246046819e-07, "loss": 0.37315483, "memory(GiB)": 34.88, "step": 120745, "train_speed(iter/s)": 0.410491 }, { "acc": 0.93660183, "epoch": 3.2694338396555924, "grad_norm": 4.870358467102051, "learning_rate": 8.855203069653131e-07, "loss": 0.38513896, "memory(GiB)": 34.88, "step": 120750, "train_speed(iter/s)": 0.410492 }, { "acc": 0.94883766, "epoch": 3.2695692199388082, "grad_norm": 11.324625968933105, "learning_rate": 8.852025408741495e-07, "loss": 0.29983883, "memory(GiB)": 34.88, "step": 120755, "train_speed(iter/s)": 0.410493 }, { "acc": 0.95053024, "epoch": 3.2697046002220236, "grad_norm": 6.045248031616211, "learning_rate": 8.848848263351731e-07, "loss": 0.27548819, "memory(GiB)": 34.88, "step": 120760, "train_speed(iter/s)": 0.410494 }, { "acc": 0.9465806, "epoch": 3.2698399805052394, "grad_norm": 15.446653366088867, "learning_rate": 8.845671633523641e-07, "loss": 0.40149488, "memory(GiB)": 34.88, "step": 120765, "train_speed(iter/s)": 0.410495 }, { "acc": 0.92021294, "epoch": 3.2699753607884547, "grad_norm": 11.223587989807129, "learning_rate": 8.842495519297027e-07, "loss": 0.51500483, "memory(GiB)": 34.88, "step": 120770, "train_speed(iter/s)": 0.410496 }, { "acc": 0.94114456, "epoch": 3.2701107410716705, "grad_norm": 7.913054466247559, "learning_rate": 8.839319920711639e-07, "loss": 0.34361408, "memory(GiB)": 34.88, "step": 120775, "train_speed(iter/s)": 0.410496 }, { "acc": 0.93699112, "epoch": 3.270246121354886, "grad_norm": 7.4249091148376465, "learning_rate": 8.836144837807311e-07, "loss": 0.38285956, "memory(GiB)": 34.88, "step": 120780, "train_speed(iter/s)": 0.410497 }, { "acc": 0.95005054, "epoch": 3.2703815016381013, "grad_norm": 6.223175048828125, "learning_rate": 8.832970270623793e-07, "loss": 0.23813946, "memory(GiB)": 34.88, "step": 120785, "train_speed(iter/s)": 0.410498 }, { "acc": 0.95674686, "epoch": 3.270516881921317, "grad_norm": 6.791045188903809, "learning_rate": 8.829796219200835e-07, "loss": 0.25087767, "memory(GiB)": 34.88, "step": 120790, "train_speed(iter/s)": 0.410499 }, { "acc": 0.94072132, "epoch": 3.2706522622045324, "grad_norm": 6.452314376831055, "learning_rate": 8.826622683578216e-07, "loss": 0.31754365, "memory(GiB)": 34.88, "step": 120795, "train_speed(iter/s)": 0.410499 }, { "acc": 0.94930611, "epoch": 3.2707876424877482, "grad_norm": 9.021490097045898, "learning_rate": 8.823449663795686e-07, "loss": 0.33673408, "memory(GiB)": 34.88, "step": 120800, "train_speed(iter/s)": 0.4105 }, { "acc": 0.94939327, "epoch": 3.2709230227709636, "grad_norm": 5.634845733642578, "learning_rate": 8.820277159893011e-07, "loss": 0.34916539, "memory(GiB)": 34.88, "step": 120805, "train_speed(iter/s)": 0.410501 }, { "acc": 0.94075136, "epoch": 3.2710584030541794, "grad_norm": 5.640315055847168, "learning_rate": 8.817105171909889e-07, "loss": 0.36022804, "memory(GiB)": 34.88, "step": 120810, "train_speed(iter/s)": 0.410502 }, { "acc": 0.95948801, "epoch": 3.2711937833373947, "grad_norm": 4.391772270202637, "learning_rate": 8.813933699886108e-07, "loss": 0.26238894, "memory(GiB)": 34.88, "step": 120815, "train_speed(iter/s)": 0.410503 }, { "acc": 0.93405113, "epoch": 3.27132916362061, "grad_norm": 20.918678283691406, "learning_rate": 8.810762743861377e-07, "loss": 0.40717592, "memory(GiB)": 34.88, "step": 120820, "train_speed(iter/s)": 0.410504 }, { "acc": 0.93915606, "epoch": 3.271464543903826, "grad_norm": 19.954849243164062, "learning_rate": 8.8075923038754e-07, "loss": 0.34565959, "memory(GiB)": 34.88, "step": 120825, "train_speed(iter/s)": 0.410505 }, { "acc": 0.94099541, "epoch": 3.2715999241870413, "grad_norm": 2.8057808876037598, "learning_rate": 8.804422379967906e-07, "loss": 0.32303188, "memory(GiB)": 34.88, "step": 120830, "train_speed(iter/s)": 0.410506 }, { "acc": 0.94691925, "epoch": 3.271735304470257, "grad_norm": 8.011054992675781, "learning_rate": 8.801252972178606e-07, "loss": 0.30713503, "memory(GiB)": 34.88, "step": 120835, "train_speed(iter/s)": 0.410507 }, { "acc": 0.95817528, "epoch": 3.2718706847534724, "grad_norm": 4.663036823272705, "learning_rate": 8.798084080547215e-07, "loss": 0.22423892, "memory(GiB)": 34.88, "step": 120840, "train_speed(iter/s)": 0.410508 }, { "acc": 0.95318985, "epoch": 3.2720060650366882, "grad_norm": 3.6757118701934814, "learning_rate": 8.794915705113395e-07, "loss": 0.25905905, "memory(GiB)": 34.88, "step": 120845, "train_speed(iter/s)": 0.410508 }, { "acc": 0.9481472, "epoch": 3.2721414453199036, "grad_norm": 11.434478759765625, "learning_rate": 8.791747845916879e-07, "loss": 0.29104671, "memory(GiB)": 34.88, "step": 120850, "train_speed(iter/s)": 0.410509 }, { "acc": 0.94150925, "epoch": 3.272276825603119, "grad_norm": 5.98240327835083, "learning_rate": 8.788580502997321e-07, "loss": 0.39979784, "memory(GiB)": 34.88, "step": 120855, "train_speed(iter/s)": 0.41051 }, { "acc": 0.93571043, "epoch": 3.2724122058863347, "grad_norm": 11.012076377868652, "learning_rate": 8.785413676394418e-07, "loss": 0.35626683, "memory(GiB)": 34.88, "step": 120860, "train_speed(iter/s)": 0.410511 }, { "acc": 0.93974724, "epoch": 3.27254758616955, "grad_norm": 7.74392032623291, "learning_rate": 8.782247366147819e-07, "loss": 0.41122837, "memory(GiB)": 34.88, "step": 120865, "train_speed(iter/s)": 0.410512 }, { "acc": 0.93508558, "epoch": 3.272682966452766, "grad_norm": 8.317327499389648, "learning_rate": 8.779081572297199e-07, "loss": 0.43592858, "memory(GiB)": 34.88, "step": 120870, "train_speed(iter/s)": 0.410513 }, { "acc": 0.94021416, "epoch": 3.2728183467359813, "grad_norm": 4.4371490478515625, "learning_rate": 8.775916294882227e-07, "loss": 0.34288542, "memory(GiB)": 34.88, "step": 120875, "train_speed(iter/s)": 0.410514 }, { "acc": 0.95352173, "epoch": 3.272953727019197, "grad_norm": 7.198389053344727, "learning_rate": 8.772751533942524e-07, "loss": 0.25979609, "memory(GiB)": 34.88, "step": 120880, "train_speed(iter/s)": 0.410515 }, { "acc": 0.94919033, "epoch": 3.2730891073024124, "grad_norm": 7.449596881866455, "learning_rate": 8.76958728951777e-07, "loss": 0.34488139, "memory(GiB)": 34.88, "step": 120885, "train_speed(iter/s)": 0.410516 }, { "acc": 0.94147377, "epoch": 3.273224487585628, "grad_norm": 3.3919200897216797, "learning_rate": 8.76642356164758e-07, "loss": 0.38434262, "memory(GiB)": 34.88, "step": 120890, "train_speed(iter/s)": 0.410517 }, { "acc": 0.94957991, "epoch": 3.2733598678688436, "grad_norm": 3.661780595779419, "learning_rate": 8.763260350371608e-07, "loss": 0.24761446, "memory(GiB)": 34.88, "step": 120895, "train_speed(iter/s)": 0.410517 }, { "acc": 0.94057159, "epoch": 3.273495248152059, "grad_norm": 23.092809677124023, "learning_rate": 8.760097655729438e-07, "loss": 0.34830563, "memory(GiB)": 34.88, "step": 120900, "train_speed(iter/s)": 0.410518 }, { "acc": 0.93932838, "epoch": 3.2736306284352747, "grad_norm": 8.033273696899414, "learning_rate": 8.756935477760743e-07, "loss": 0.35918198, "memory(GiB)": 34.88, "step": 120905, "train_speed(iter/s)": 0.410519 }, { "acc": 0.94568729, "epoch": 3.27376600871849, "grad_norm": 4.381811618804932, "learning_rate": 8.753773816505106e-07, "loss": 0.29477801, "memory(GiB)": 34.88, "step": 120910, "train_speed(iter/s)": 0.41052 }, { "acc": 0.94272709, "epoch": 3.273901389001706, "grad_norm": 7.006595611572266, "learning_rate": 8.750612672002106e-07, "loss": 0.3314245, "memory(GiB)": 34.88, "step": 120915, "train_speed(iter/s)": 0.410521 }, { "acc": 0.94725323, "epoch": 3.2740367692849213, "grad_norm": 2.567993402481079, "learning_rate": 8.747452044291407e-07, "loss": 0.27244594, "memory(GiB)": 34.88, "step": 120920, "train_speed(iter/s)": 0.410522 }, { "acc": 0.920504, "epoch": 3.274172149568137, "grad_norm": 14.936717987060547, "learning_rate": 8.744291933412549e-07, "loss": 0.51696339, "memory(GiB)": 34.88, "step": 120925, "train_speed(iter/s)": 0.410523 }, { "acc": 0.93688154, "epoch": 3.2743075298513524, "grad_norm": 6.06762170791626, "learning_rate": 8.741132339405147e-07, "loss": 0.37594194, "memory(GiB)": 34.88, "step": 120930, "train_speed(iter/s)": 0.410524 }, { "acc": 0.95849094, "epoch": 3.2744429101345682, "grad_norm": 5.936478137969971, "learning_rate": 8.737973262308751e-07, "loss": 0.26715198, "memory(GiB)": 34.88, "step": 120935, "train_speed(iter/s)": 0.410525 }, { "acc": 0.94391422, "epoch": 3.2745782904177836, "grad_norm": 3.4051156044006348, "learning_rate": 8.734814702162976e-07, "loss": 0.30194759, "memory(GiB)": 34.88, "step": 120940, "train_speed(iter/s)": 0.410526 }, { "acc": 0.93650589, "epoch": 3.274713670700999, "grad_norm": 4.076794624328613, "learning_rate": 8.731656659007369e-07, "loss": 0.40909448, "memory(GiB)": 34.88, "step": 120945, "train_speed(iter/s)": 0.410527 }, { "acc": 0.96020441, "epoch": 3.2748490509842147, "grad_norm": 33.652496337890625, "learning_rate": 8.728499132881466e-07, "loss": 0.26581283, "memory(GiB)": 34.88, "step": 120950, "train_speed(iter/s)": 0.410527 }, { "acc": 0.92662621, "epoch": 3.27498443126743, "grad_norm": 3.8918445110321045, "learning_rate": 8.725342123824868e-07, "loss": 0.40334883, "memory(GiB)": 34.88, "step": 120955, "train_speed(iter/s)": 0.410528 }, { "acc": 0.94919386, "epoch": 3.275119811550646, "grad_norm": 3.524261474609375, "learning_rate": 8.722185631877084e-07, "loss": 0.31764758, "memory(GiB)": 34.88, "step": 120960, "train_speed(iter/s)": 0.410529 }, { "acc": 0.94336605, "epoch": 3.2752551918338613, "grad_norm": 5.105875492095947, "learning_rate": 8.719029657077687e-07, "loss": 0.35130854, "memory(GiB)": 34.88, "step": 120965, "train_speed(iter/s)": 0.41053 }, { "acc": 0.94484024, "epoch": 3.275390572117077, "grad_norm": 3.8658947944641113, "learning_rate": 8.715874199466165e-07, "loss": 0.37947745, "memory(GiB)": 34.88, "step": 120970, "train_speed(iter/s)": 0.410531 }, { "acc": 0.94606667, "epoch": 3.2755259524002924, "grad_norm": 6.622001647949219, "learning_rate": 8.712719259082107e-07, "loss": 0.31446412, "memory(GiB)": 34.88, "step": 120975, "train_speed(iter/s)": 0.410532 }, { "acc": 0.94652815, "epoch": 3.275661332683508, "grad_norm": 9.740947723388672, "learning_rate": 8.709564835964989e-07, "loss": 0.27616379, "memory(GiB)": 34.88, "step": 120980, "train_speed(iter/s)": 0.410533 }, { "acc": 0.93952961, "epoch": 3.2757967129667236, "grad_norm": 6.197385787963867, "learning_rate": 8.706410930154343e-07, "loss": 0.39155574, "memory(GiB)": 34.88, "step": 120985, "train_speed(iter/s)": 0.410534 }, { "acc": 0.92587404, "epoch": 3.275932093249939, "grad_norm": 10.554542541503906, "learning_rate": 8.703257541689692e-07, "loss": 0.50719466, "memory(GiB)": 34.88, "step": 120990, "train_speed(iter/s)": 0.410535 }, { "acc": 0.94345627, "epoch": 3.2760674735331548, "grad_norm": 6.505859375, "learning_rate": 8.700104670610505e-07, "loss": 0.32927818, "memory(GiB)": 34.88, "step": 120995, "train_speed(iter/s)": 0.410536 }, { "acc": 0.94869461, "epoch": 3.27620285381637, "grad_norm": 2.8688321113586426, "learning_rate": 8.696952316956312e-07, "loss": 0.31009271, "memory(GiB)": 34.88, "step": 121000, "train_speed(iter/s)": 0.410536 }, { "acc": 0.93963118, "epoch": 3.276338234099586, "grad_norm": 7.953527927398682, "learning_rate": 8.693800480766558e-07, "loss": 0.37151825, "memory(GiB)": 34.88, "step": 121005, "train_speed(iter/s)": 0.410537 }, { "acc": 0.94266586, "epoch": 3.2764736143828013, "grad_norm": 7.69542121887207, "learning_rate": 8.690649162080785e-07, "loss": 0.31968164, "memory(GiB)": 34.88, "step": 121010, "train_speed(iter/s)": 0.410538 }, { "acc": 0.9397686, "epoch": 3.2766089946660166, "grad_norm": 16.09217071533203, "learning_rate": 8.687498360938419e-07, "loss": 0.34156079, "memory(GiB)": 34.88, "step": 121015, "train_speed(iter/s)": 0.410539 }, { "acc": 0.94921722, "epoch": 3.2767443749492324, "grad_norm": 10.373854637145996, "learning_rate": 8.684348077378958e-07, "loss": 0.27643232, "memory(GiB)": 34.88, "step": 121020, "train_speed(iter/s)": 0.41054 }, { "acc": 0.93518867, "epoch": 3.276879755232448, "grad_norm": 7.56223726272583, "learning_rate": 8.681198311441851e-07, "loss": 0.39184461, "memory(GiB)": 34.88, "step": 121025, "train_speed(iter/s)": 0.410541 }, { "acc": 0.94973679, "epoch": 3.2770151355156636, "grad_norm": 8.375870704650879, "learning_rate": 8.678049063166577e-07, "loss": 0.33299775, "memory(GiB)": 34.88, "step": 121030, "train_speed(iter/s)": 0.410542 }, { "acc": 0.94006615, "epoch": 3.277150515798879, "grad_norm": 9.823668479919434, "learning_rate": 8.674900332592569e-07, "loss": 0.40369062, "memory(GiB)": 34.88, "step": 121035, "train_speed(iter/s)": 0.410543 }, { "acc": 0.9521965, "epoch": 3.2772858960820948, "grad_norm": 7.6292290687561035, "learning_rate": 8.67175211975925e-07, "loss": 0.27817271, "memory(GiB)": 34.88, "step": 121040, "train_speed(iter/s)": 0.410544 }, { "acc": 0.94003258, "epoch": 3.27742127636531, "grad_norm": 5.507474422454834, "learning_rate": 8.668604424706105e-07, "loss": 0.34253287, "memory(GiB)": 34.88, "step": 121045, "train_speed(iter/s)": 0.410545 }, { "acc": 0.95571394, "epoch": 3.2775566566485255, "grad_norm": 3.012807846069336, "learning_rate": 8.665457247472522e-07, "loss": 0.28824058, "memory(GiB)": 34.88, "step": 121050, "train_speed(iter/s)": 0.410546 }, { "acc": 0.93919125, "epoch": 3.2776920369317413, "grad_norm": 6.295272350311279, "learning_rate": 8.662310588097952e-07, "loss": 0.37553697, "memory(GiB)": 34.88, "step": 121055, "train_speed(iter/s)": 0.410547 }, { "acc": 0.94106922, "epoch": 3.2778274172149566, "grad_norm": 16.970905303955078, "learning_rate": 8.659164446621804e-07, "loss": 0.34009302, "memory(GiB)": 34.88, "step": 121060, "train_speed(iter/s)": 0.410548 }, { "acc": 0.957514, "epoch": 3.2779627974981724, "grad_norm": 5.591878890991211, "learning_rate": 8.656018823083496e-07, "loss": 0.2518599, "memory(GiB)": 34.88, "step": 121065, "train_speed(iter/s)": 0.410548 }, { "acc": 0.94876862, "epoch": 3.278098177781388, "grad_norm": 4.594241619110107, "learning_rate": 8.652873717522433e-07, "loss": 0.31168821, "memory(GiB)": 34.88, "step": 121070, "train_speed(iter/s)": 0.410549 }, { "acc": 0.93289261, "epoch": 3.2782335580646036, "grad_norm": 23.125207901000977, "learning_rate": 8.649729129977985e-07, "loss": 0.4322238, "memory(GiB)": 34.88, "step": 121075, "train_speed(iter/s)": 0.41055 }, { "acc": 0.94065666, "epoch": 3.278368938347819, "grad_norm": 5.811807632446289, "learning_rate": 8.646585060489592e-07, "loss": 0.32178679, "memory(GiB)": 34.88, "step": 121080, "train_speed(iter/s)": 0.410551 }, { "acc": 0.94218445, "epoch": 3.2785043186310348, "grad_norm": 7.169393539428711, "learning_rate": 8.6434415090966e-07, "loss": 0.36494818, "memory(GiB)": 34.88, "step": 121085, "train_speed(iter/s)": 0.410552 }, { "acc": 0.94320889, "epoch": 3.27863969891425, "grad_norm": 24.520748138427734, "learning_rate": 8.640298475838406e-07, "loss": 0.31226511, "memory(GiB)": 34.88, "step": 121090, "train_speed(iter/s)": 0.410553 }, { "acc": 0.93684978, "epoch": 3.278775079197466, "grad_norm": 8.4805326461792, "learning_rate": 8.637155960754385e-07, "loss": 0.40329337, "memory(GiB)": 34.88, "step": 121095, "train_speed(iter/s)": 0.410554 }, { "acc": 0.94547167, "epoch": 3.2789104594806813, "grad_norm": 5.077686309814453, "learning_rate": 8.6340139638839e-07, "loss": 0.27111404, "memory(GiB)": 34.88, "step": 121100, "train_speed(iter/s)": 0.410555 }, { "acc": 0.94248486, "epoch": 3.2790458397638966, "grad_norm": 8.17896842956543, "learning_rate": 8.630872485266305e-07, "loss": 0.351052, "memory(GiB)": 34.88, "step": 121105, "train_speed(iter/s)": 0.410556 }, { "acc": 0.95664434, "epoch": 3.2791812200471124, "grad_norm": 3.9656009674072266, "learning_rate": 8.627731524940951e-07, "loss": 0.24361379, "memory(GiB)": 34.88, "step": 121110, "train_speed(iter/s)": 0.410557 }, { "acc": 0.94340172, "epoch": 3.279316600330328, "grad_norm": 2.8860461711883545, "learning_rate": 8.624591082947211e-07, "loss": 0.29187152, "memory(GiB)": 34.88, "step": 121115, "train_speed(iter/s)": 0.410557 }, { "acc": 0.94713936, "epoch": 3.2794519806135436, "grad_norm": 3.971566677093506, "learning_rate": 8.621451159324382e-07, "loss": 0.26227059, "memory(GiB)": 34.88, "step": 121120, "train_speed(iter/s)": 0.410558 }, { "acc": 0.9355648, "epoch": 3.279587360896759, "grad_norm": 7.381375312805176, "learning_rate": 8.618311754111841e-07, "loss": 0.34160786, "memory(GiB)": 34.88, "step": 121125, "train_speed(iter/s)": 0.410559 }, { "acc": 0.93634071, "epoch": 3.2797227411799748, "grad_norm": 10.463202476501465, "learning_rate": 8.615172867348866e-07, "loss": 0.37557313, "memory(GiB)": 34.88, "step": 121130, "train_speed(iter/s)": 0.41056 }, { "acc": 0.94361801, "epoch": 3.27985812146319, "grad_norm": 9.054655075073242, "learning_rate": 8.612034499074834e-07, "loss": 0.26540298, "memory(GiB)": 34.88, "step": 121135, "train_speed(iter/s)": 0.410561 }, { "acc": 0.94173956, "epoch": 3.2799935017464055, "grad_norm": 6.221932411193848, "learning_rate": 8.608896649329018e-07, "loss": 0.33190722, "memory(GiB)": 34.88, "step": 121140, "train_speed(iter/s)": 0.410562 }, { "acc": 0.94829683, "epoch": 3.2801288820296213, "grad_norm": 3.9214704036712646, "learning_rate": 8.605759318150744e-07, "loss": 0.28625011, "memory(GiB)": 34.88, "step": 121145, "train_speed(iter/s)": 0.410563 }, { "acc": 0.96169834, "epoch": 3.2802642623128366, "grad_norm": 4.31636381149292, "learning_rate": 8.602622505579302e-07, "loss": 0.22564583, "memory(GiB)": 34.88, "step": 121150, "train_speed(iter/s)": 0.410564 }, { "acc": 0.95111685, "epoch": 3.2803996425960524, "grad_norm": 5.119030475616455, "learning_rate": 8.59948621165401e-07, "loss": 0.31234331, "memory(GiB)": 34.88, "step": 121155, "train_speed(iter/s)": 0.410565 }, { "acc": 0.9227499, "epoch": 3.280535022879268, "grad_norm": 7.237326145172119, "learning_rate": 8.59635043641414e-07, "loss": 0.49815378, "memory(GiB)": 34.88, "step": 121160, "train_speed(iter/s)": 0.410566 }, { "acc": 0.95746822, "epoch": 3.2806704031624836, "grad_norm": 5.174391269683838, "learning_rate": 8.593215179898949e-07, "loss": 0.24191175, "memory(GiB)": 34.88, "step": 121165, "train_speed(iter/s)": 0.410567 }, { "acc": 0.94050722, "epoch": 3.280805783445699, "grad_norm": 12.502110481262207, "learning_rate": 8.590080442147759e-07, "loss": 0.36286209, "memory(GiB)": 34.88, "step": 121170, "train_speed(iter/s)": 0.410568 }, { "acc": 0.93638678, "epoch": 3.2809411637289143, "grad_norm": 10.369653701782227, "learning_rate": 8.586946223199812e-07, "loss": 0.35662847, "memory(GiB)": 34.88, "step": 121175, "train_speed(iter/s)": 0.410569 }, { "acc": 0.9431715, "epoch": 3.28107654401213, "grad_norm": 7.97952938079834, "learning_rate": 8.583812523094368e-07, "loss": 0.32217851, "memory(GiB)": 34.88, "step": 121180, "train_speed(iter/s)": 0.41057 }, { "acc": 0.94681892, "epoch": 3.2812119242953455, "grad_norm": 6.128743648529053, "learning_rate": 8.580679341870687e-07, "loss": 0.33928535, "memory(GiB)": 34.88, "step": 121185, "train_speed(iter/s)": 0.410571 }, { "acc": 0.9356122, "epoch": 3.2813473045785613, "grad_norm": 6.714751720428467, "learning_rate": 8.577546679568041e-07, "loss": 0.377018, "memory(GiB)": 34.88, "step": 121190, "train_speed(iter/s)": 0.410572 }, { "acc": 0.94047451, "epoch": 3.2814826848617766, "grad_norm": 6.2579755783081055, "learning_rate": 8.574414536225649e-07, "loss": 0.36733224, "memory(GiB)": 34.88, "step": 121195, "train_speed(iter/s)": 0.410573 }, { "acc": 0.93593531, "epoch": 3.2816180651449924, "grad_norm": 9.952506065368652, "learning_rate": 8.571282911882725e-07, "loss": 0.38155551, "memory(GiB)": 34.88, "step": 121200, "train_speed(iter/s)": 0.410574 }, { "acc": 0.94281702, "epoch": 3.281753445428208, "grad_norm": 12.97789478302002, "learning_rate": 8.568151806578547e-07, "loss": 0.33389957, "memory(GiB)": 34.88, "step": 121205, "train_speed(iter/s)": 0.410574 }, { "acc": 0.93371134, "epoch": 3.281888825711423, "grad_norm": 4.709872245788574, "learning_rate": 8.565021220352306e-07, "loss": 0.46136131, "memory(GiB)": 34.88, "step": 121210, "train_speed(iter/s)": 0.410575 }, { "acc": 0.93753204, "epoch": 3.282024205994639, "grad_norm": 7.49143648147583, "learning_rate": 8.561891153243234e-07, "loss": 0.35223694, "memory(GiB)": 34.88, "step": 121215, "train_speed(iter/s)": 0.410576 }, { "acc": 0.9480032, "epoch": 3.2821595862778543, "grad_norm": 3.7926485538482666, "learning_rate": 8.558761605290527e-07, "loss": 0.26250441, "memory(GiB)": 34.88, "step": 121220, "train_speed(iter/s)": 0.410577 }, { "acc": 0.94357262, "epoch": 3.28229496656107, "grad_norm": 5.503840446472168, "learning_rate": 8.555632576533414e-07, "loss": 0.33078938, "memory(GiB)": 34.88, "step": 121225, "train_speed(iter/s)": 0.410578 }, { "acc": 0.93485355, "epoch": 3.2824303468442855, "grad_norm": 14.70584487915039, "learning_rate": 8.552504067011064e-07, "loss": 0.34591579, "memory(GiB)": 34.88, "step": 121230, "train_speed(iter/s)": 0.410579 }, { "acc": 0.93035336, "epoch": 3.2825657271275013, "grad_norm": 19.016162872314453, "learning_rate": 8.549376076762679e-07, "loss": 0.3978971, "memory(GiB)": 34.88, "step": 121235, "train_speed(iter/s)": 0.41058 }, { "acc": 0.94856215, "epoch": 3.2827011074107166, "grad_norm": 8.297445297241211, "learning_rate": 8.546248605827459e-07, "loss": 0.25290346, "memory(GiB)": 34.88, "step": 121240, "train_speed(iter/s)": 0.410581 }, { "acc": 0.95246658, "epoch": 3.2828364876939324, "grad_norm": 9.428030014038086, "learning_rate": 8.543121654244552e-07, "loss": 0.26435175, "memory(GiB)": 34.88, "step": 121245, "train_speed(iter/s)": 0.410582 }, { "acc": 0.95833731, "epoch": 3.282971867977148, "grad_norm": 4.851905822753906, "learning_rate": 8.539995222053144e-07, "loss": 0.25776982, "memory(GiB)": 34.88, "step": 121250, "train_speed(iter/s)": 0.410583 }, { "acc": 0.93451986, "epoch": 3.2831072482603636, "grad_norm": 6.691351890563965, "learning_rate": 8.536869309292395e-07, "loss": 0.35198197, "memory(GiB)": 34.88, "step": 121255, "train_speed(iter/s)": 0.410584 }, { "acc": 0.95798235, "epoch": 3.283242628543579, "grad_norm": 2.376068592071533, "learning_rate": 8.533743916001486e-07, "loss": 0.22771559, "memory(GiB)": 34.88, "step": 121260, "train_speed(iter/s)": 0.410585 }, { "acc": 0.93572083, "epoch": 3.2833780088267943, "grad_norm": 5.923544406890869, "learning_rate": 8.530619042219538e-07, "loss": 0.36176934, "memory(GiB)": 34.88, "step": 121265, "train_speed(iter/s)": 0.410586 }, { "acc": 0.94622688, "epoch": 3.28351338911001, "grad_norm": 7.835172653198242, "learning_rate": 8.527494687985702e-07, "loss": 0.31222486, "memory(GiB)": 34.88, "step": 121270, "train_speed(iter/s)": 0.410587 }, { "acc": 0.96068029, "epoch": 3.2836487693932255, "grad_norm": 3.7720515727996826, "learning_rate": 8.52437085333913e-07, "loss": 0.23531234, "memory(GiB)": 34.88, "step": 121275, "train_speed(iter/s)": 0.410587 }, { "acc": 0.94353371, "epoch": 3.2837841496764413, "grad_norm": 17.059337615966797, "learning_rate": 8.521247538318957e-07, "loss": 0.31003292, "memory(GiB)": 34.88, "step": 121280, "train_speed(iter/s)": 0.410588 }, { "acc": 0.94038763, "epoch": 3.2839195299596566, "grad_norm": 10.835325241088867, "learning_rate": 8.518124742964286e-07, "loss": 0.30413761, "memory(GiB)": 34.88, "step": 121285, "train_speed(iter/s)": 0.410589 }, { "acc": 0.95069466, "epoch": 3.2840549102428724, "grad_norm": 9.90221118927002, "learning_rate": 8.51500246731425e-07, "loss": 0.28417916, "memory(GiB)": 34.88, "step": 121290, "train_speed(iter/s)": 0.41059 }, { "acc": 0.94266224, "epoch": 3.284190290526088, "grad_norm": 8.429900169372559, "learning_rate": 8.511880711407974e-07, "loss": 0.39553883, "memory(GiB)": 34.88, "step": 121295, "train_speed(iter/s)": 0.410591 }, { "acc": 0.93612804, "epoch": 3.284325670809303, "grad_norm": 6.974411487579346, "learning_rate": 8.508759475284533e-07, "loss": 0.39818957, "memory(GiB)": 34.88, "step": 121300, "train_speed(iter/s)": 0.410592 }, { "acc": 0.9311552, "epoch": 3.284461051092519, "grad_norm": 3.1047165393829346, "learning_rate": 8.505638758983051e-07, "loss": 0.43116989, "memory(GiB)": 34.88, "step": 121305, "train_speed(iter/s)": 0.410593 }, { "acc": 0.94355888, "epoch": 3.2845964313757343, "grad_norm": 10.731229782104492, "learning_rate": 8.502518562542611e-07, "loss": 0.31721504, "memory(GiB)": 34.88, "step": 121310, "train_speed(iter/s)": 0.410594 }, { "acc": 0.94941444, "epoch": 3.28473181165895, "grad_norm": 9.218107223510742, "learning_rate": 8.499398886002319e-07, "loss": 0.2881634, "memory(GiB)": 34.88, "step": 121315, "train_speed(iter/s)": 0.410595 }, { "acc": 0.94086838, "epoch": 3.2848671919421655, "grad_norm": 12.93157958984375, "learning_rate": 8.496279729401227e-07, "loss": 0.38484085, "memory(GiB)": 34.88, "step": 121320, "train_speed(iter/s)": 0.410596 }, { "acc": 0.95842495, "epoch": 3.2850025722253813, "grad_norm": 6.916228294372559, "learning_rate": 8.493161092778418e-07, "loss": 0.26557217, "memory(GiB)": 34.88, "step": 121325, "train_speed(iter/s)": 0.410597 }, { "acc": 0.93022156, "epoch": 3.2851379525085966, "grad_norm": 11.106362342834473, "learning_rate": 8.490042976172972e-07, "loss": 0.4729435, "memory(GiB)": 34.88, "step": 121330, "train_speed(iter/s)": 0.410597 }, { "acc": 0.94937944, "epoch": 3.285273332791812, "grad_norm": 4.881014823913574, "learning_rate": 8.486925379623935e-07, "loss": 0.31350758, "memory(GiB)": 34.88, "step": 121335, "train_speed(iter/s)": 0.410598 }, { "acc": 0.94517841, "epoch": 3.285408713075028, "grad_norm": 4.06664514541626, "learning_rate": 8.483808303170356e-07, "loss": 0.30465074, "memory(GiB)": 34.88, "step": 121340, "train_speed(iter/s)": 0.410599 }, { "acc": 0.93494892, "epoch": 3.285544093358243, "grad_norm": 16.273290634155273, "learning_rate": 8.480691746851298e-07, "loss": 0.35880923, "memory(GiB)": 34.88, "step": 121345, "train_speed(iter/s)": 0.4106 }, { "acc": 0.95192404, "epoch": 3.285679473641459, "grad_norm": 5.412729740142822, "learning_rate": 8.477575710705807e-07, "loss": 0.26404483, "memory(GiB)": 34.88, "step": 121350, "train_speed(iter/s)": 0.410601 }, { "acc": 0.94644318, "epoch": 3.2858148539246743, "grad_norm": 3.538975954055786, "learning_rate": 8.474460194772893e-07, "loss": 0.30614507, "memory(GiB)": 34.88, "step": 121355, "train_speed(iter/s)": 0.410602 }, { "acc": 0.93514805, "epoch": 3.28595023420789, "grad_norm": 15.858355522155762, "learning_rate": 8.4713451990916e-07, "loss": 0.45656939, "memory(GiB)": 34.88, "step": 121360, "train_speed(iter/s)": 0.410603 }, { "acc": 0.94633808, "epoch": 3.2860856144911055, "grad_norm": 3.945030689239502, "learning_rate": 8.468230723700958e-07, "loss": 0.2996737, "memory(GiB)": 34.88, "step": 121365, "train_speed(iter/s)": 0.410604 }, { "acc": 0.93784304, "epoch": 3.286220994774321, "grad_norm": 13.800997734069824, "learning_rate": 8.465116768639959e-07, "loss": 0.36534033, "memory(GiB)": 34.88, "step": 121370, "train_speed(iter/s)": 0.410605 }, { "acc": 0.95504169, "epoch": 3.2863563750575366, "grad_norm": 11.25358772277832, "learning_rate": 8.462003333947625e-07, "loss": 0.20211105, "memory(GiB)": 34.88, "step": 121375, "train_speed(iter/s)": 0.410606 }, { "acc": 0.91888905, "epoch": 3.286491755340752, "grad_norm": 15.592111587524414, "learning_rate": 8.458890419662955e-07, "loss": 0.47445841, "memory(GiB)": 34.88, "step": 121380, "train_speed(iter/s)": 0.410607 }, { "acc": 0.95091877, "epoch": 3.286627135623968, "grad_norm": 4.810795307159424, "learning_rate": 8.455778025824955e-07, "loss": 0.33497462, "memory(GiB)": 34.88, "step": 121385, "train_speed(iter/s)": 0.410608 }, { "acc": 0.94526682, "epoch": 3.286762515907183, "grad_norm": 10.288580894470215, "learning_rate": 8.452666152472593e-07, "loss": 0.30599625, "memory(GiB)": 34.88, "step": 121390, "train_speed(iter/s)": 0.410609 }, { "acc": 0.96092052, "epoch": 3.286897896190399, "grad_norm": 5.841273307800293, "learning_rate": 8.449554799644861e-07, "loss": 0.24605699, "memory(GiB)": 34.88, "step": 121395, "train_speed(iter/s)": 0.41061 }, { "acc": 0.95117874, "epoch": 3.2870332764736143, "grad_norm": 5.204979419708252, "learning_rate": 8.44644396738074e-07, "loss": 0.29260945, "memory(GiB)": 34.88, "step": 121400, "train_speed(iter/s)": 0.410611 }, { "acc": 0.94623203, "epoch": 3.28716865675683, "grad_norm": 6.4086222648620605, "learning_rate": 8.443333655719205e-07, "loss": 0.35751295, "memory(GiB)": 34.88, "step": 121405, "train_speed(iter/s)": 0.410612 }, { "acc": 0.93746567, "epoch": 3.2873040370400455, "grad_norm": 8.571959495544434, "learning_rate": 8.440223864699201e-07, "loss": 0.37359495, "memory(GiB)": 34.88, "step": 121410, "train_speed(iter/s)": 0.410613 }, { "acc": 0.93727303, "epoch": 3.2874394173232613, "grad_norm": 9.63107967376709, "learning_rate": 8.437114594359692e-07, "loss": 0.37543473, "memory(GiB)": 34.88, "step": 121415, "train_speed(iter/s)": 0.410614 }, { "acc": 0.95984449, "epoch": 3.2875747976064766, "grad_norm": 3.2314915657043457, "learning_rate": 8.434005844739637e-07, "loss": 0.26514316, "memory(GiB)": 34.88, "step": 121420, "train_speed(iter/s)": 0.410615 }, { "acc": 0.94482241, "epoch": 3.287710177889692, "grad_norm": 9.122960090637207, "learning_rate": 8.430897615877961e-07, "loss": 0.30682807, "memory(GiB)": 34.88, "step": 121425, "train_speed(iter/s)": 0.410616 }, { "acc": 0.94101696, "epoch": 3.287845558172908, "grad_norm": 10.570693016052246, "learning_rate": 8.42778990781361e-07, "loss": 0.34544458, "memory(GiB)": 34.88, "step": 121430, "train_speed(iter/s)": 0.410617 }, { "acc": 0.94096537, "epoch": 3.287980938456123, "grad_norm": 6.138326168060303, "learning_rate": 8.424682720585514e-07, "loss": 0.37518878, "memory(GiB)": 34.88, "step": 121435, "train_speed(iter/s)": 0.410618 }, { "acc": 0.93962708, "epoch": 3.288116318739339, "grad_norm": 3.6319429874420166, "learning_rate": 8.421576054232609e-07, "loss": 0.34757667, "memory(GiB)": 34.88, "step": 121440, "train_speed(iter/s)": 0.410619 }, { "acc": 0.95064144, "epoch": 3.2882516990225543, "grad_norm": 5.032264709472656, "learning_rate": 8.418469908793778e-07, "loss": 0.27134736, "memory(GiB)": 34.88, "step": 121445, "train_speed(iter/s)": 0.41062 }, { "acc": 0.93860168, "epoch": 3.28838707930577, "grad_norm": 7.066075325012207, "learning_rate": 8.415364284307974e-07, "loss": 0.33649092, "memory(GiB)": 34.88, "step": 121450, "train_speed(iter/s)": 0.410621 }, { "acc": 0.94327831, "epoch": 3.2885224595889855, "grad_norm": 10.229029655456543, "learning_rate": 8.412259180814085e-07, "loss": 0.34700408, "memory(GiB)": 34.88, "step": 121455, "train_speed(iter/s)": 0.410622 }, { "acc": 0.94724884, "epoch": 3.288657839872201, "grad_norm": 4.426126480102539, "learning_rate": 8.409154598350992e-07, "loss": 0.34359078, "memory(GiB)": 34.88, "step": 121460, "train_speed(iter/s)": 0.410622 }, { "acc": 0.96093063, "epoch": 3.2887932201554166, "grad_norm": 3.379405975341797, "learning_rate": 8.406050536957594e-07, "loss": 0.22578366, "memory(GiB)": 34.88, "step": 121465, "train_speed(iter/s)": 0.410623 }, { "acc": 0.94066782, "epoch": 3.288928600438632, "grad_norm": 19.271194458007812, "learning_rate": 8.402946996672784e-07, "loss": 0.32587912, "memory(GiB)": 34.88, "step": 121470, "train_speed(iter/s)": 0.410624 }, { "acc": 0.93971157, "epoch": 3.289063980721848, "grad_norm": 12.308639526367188, "learning_rate": 8.399843977535448e-07, "loss": 0.35912516, "memory(GiB)": 34.88, "step": 121475, "train_speed(iter/s)": 0.410625 }, { "acc": 0.93988428, "epoch": 3.289199361005063, "grad_norm": 22.873443603515625, "learning_rate": 8.396741479584418e-07, "loss": 0.41465764, "memory(GiB)": 34.88, "step": 121480, "train_speed(iter/s)": 0.410626 }, { "acc": 0.94556713, "epoch": 3.289334741288279, "grad_norm": 7.682858467102051, "learning_rate": 8.393639502858615e-07, "loss": 0.31143527, "memory(GiB)": 34.88, "step": 121485, "train_speed(iter/s)": 0.410627 }, { "acc": 0.9437891, "epoch": 3.2894701215714943, "grad_norm": 3.8294668197631836, "learning_rate": 8.390538047396865e-07, "loss": 0.31297443, "memory(GiB)": 34.88, "step": 121490, "train_speed(iter/s)": 0.410628 }, { "acc": 0.95400476, "epoch": 3.2896055018547097, "grad_norm": 8.843144416809082, "learning_rate": 8.387437113238012e-07, "loss": 0.23647108, "memory(GiB)": 34.88, "step": 121495, "train_speed(iter/s)": 0.410629 }, { "acc": 0.94156189, "epoch": 3.2897408821379255, "grad_norm": 6.106715202331543, "learning_rate": 8.384336700420907e-07, "loss": 0.38901863, "memory(GiB)": 34.88, "step": 121500, "train_speed(iter/s)": 0.41063 }, { "acc": 0.93364677, "epoch": 3.289876262421141, "grad_norm": 10.000842094421387, "learning_rate": 8.381236808984401e-07, "loss": 0.43196726, "memory(GiB)": 34.88, "step": 121505, "train_speed(iter/s)": 0.410631 }, { "acc": 0.95204954, "epoch": 3.2900116427043566, "grad_norm": 3.6491589546203613, "learning_rate": 8.378137438967331e-07, "loss": 0.19527532, "memory(GiB)": 34.88, "step": 121510, "train_speed(iter/s)": 0.410632 }, { "acc": 0.93979006, "epoch": 3.290147022987572, "grad_norm": 6.46059513092041, "learning_rate": 8.375038590408488e-07, "loss": 0.31552825, "memory(GiB)": 34.88, "step": 121515, "train_speed(iter/s)": 0.410633 }, { "acc": 0.94650917, "epoch": 3.290282403270788, "grad_norm": 7.397567272186279, "learning_rate": 8.371940263346733e-07, "loss": 0.29426475, "memory(GiB)": 34.88, "step": 121520, "train_speed(iter/s)": 0.410634 }, { "acc": 0.94273853, "epoch": 3.290417783554003, "grad_norm": 4.709095478057861, "learning_rate": 8.368842457820854e-07, "loss": 0.27507515, "memory(GiB)": 34.88, "step": 121525, "train_speed(iter/s)": 0.410635 }, { "acc": 0.91860695, "epoch": 3.2905531638372185, "grad_norm": 6.788115501403809, "learning_rate": 8.365745173869671e-07, "loss": 0.49350538, "memory(GiB)": 34.88, "step": 121530, "train_speed(iter/s)": 0.410635 }, { "acc": 0.94999008, "epoch": 3.2906885441204343, "grad_norm": 5.007284164428711, "learning_rate": 8.362648411531964e-07, "loss": 0.30030329, "memory(GiB)": 34.88, "step": 121535, "train_speed(iter/s)": 0.410636 }, { "acc": 0.94216022, "epoch": 3.2908239244036497, "grad_norm": 14.685528755187988, "learning_rate": 8.359552170846539e-07, "loss": 0.36988196, "memory(GiB)": 34.88, "step": 121540, "train_speed(iter/s)": 0.410637 }, { "acc": 0.95974741, "epoch": 3.2909593046868655, "grad_norm": 5.545079708099365, "learning_rate": 8.356456451852194e-07, "loss": 0.22566063, "memory(GiB)": 34.88, "step": 121545, "train_speed(iter/s)": 0.410638 }, { "acc": 0.94710579, "epoch": 3.291094684970081, "grad_norm": 14.111342430114746, "learning_rate": 8.353361254587672e-07, "loss": 0.32352929, "memory(GiB)": 34.88, "step": 121550, "train_speed(iter/s)": 0.410639 }, { "acc": 0.947925, "epoch": 3.2912300652532966, "grad_norm": 3.970906972885132, "learning_rate": 8.350266579091792e-07, "loss": 0.3116065, "memory(GiB)": 34.88, "step": 121555, "train_speed(iter/s)": 0.41064 }, { "acc": 0.94885483, "epoch": 3.291365445536512, "grad_norm": 5.043745517730713, "learning_rate": 8.347172425403288e-07, "loss": 0.30494008, "memory(GiB)": 34.88, "step": 121560, "train_speed(iter/s)": 0.410641 }, { "acc": 0.94524231, "epoch": 3.291500825819728, "grad_norm": 11.216851234436035, "learning_rate": 8.344078793560948e-07, "loss": 0.30127482, "memory(GiB)": 34.88, "step": 121565, "train_speed(iter/s)": 0.410642 }, { "acc": 0.94614325, "epoch": 3.291636206102943, "grad_norm": 10.759161949157715, "learning_rate": 8.340985683603483e-07, "loss": 0.30322952, "memory(GiB)": 34.88, "step": 121570, "train_speed(iter/s)": 0.410643 }, { "acc": 0.95408602, "epoch": 3.291771586386159, "grad_norm": 2.3681750297546387, "learning_rate": 8.337893095569691e-07, "loss": 0.3016463, "memory(GiB)": 34.88, "step": 121575, "train_speed(iter/s)": 0.410644 }, { "acc": 0.93780346, "epoch": 3.2919069666693743, "grad_norm": 18.40980339050293, "learning_rate": 8.334801029498288e-07, "loss": 0.3858242, "memory(GiB)": 34.88, "step": 121580, "train_speed(iter/s)": 0.410645 }, { "acc": 0.95558815, "epoch": 3.2920423469525897, "grad_norm": 5.006596088409424, "learning_rate": 8.331709485427987e-07, "loss": 0.26443553, "memory(GiB)": 34.88, "step": 121585, "train_speed(iter/s)": 0.410646 }, { "acc": 0.95524979, "epoch": 3.2921777272358055, "grad_norm": 7.632380485534668, "learning_rate": 8.328618463397557e-07, "loss": 0.27547455, "memory(GiB)": 34.88, "step": 121590, "train_speed(iter/s)": 0.410647 }, { "acc": 0.92646294, "epoch": 3.292313107519021, "grad_norm": 19.048002243041992, "learning_rate": 8.325527963445692e-07, "loss": 0.43264117, "memory(GiB)": 34.88, "step": 121595, "train_speed(iter/s)": 0.410648 }, { "acc": 0.95476265, "epoch": 3.2924484878022366, "grad_norm": 5.269140243530273, "learning_rate": 8.322437985611123e-07, "loss": 0.23650618, "memory(GiB)": 34.88, "step": 121600, "train_speed(iter/s)": 0.410649 }, { "acc": 0.95016727, "epoch": 3.292583868085452, "grad_norm": 4.6715192794799805, "learning_rate": 8.319348529932527e-07, "loss": 0.31508064, "memory(GiB)": 34.88, "step": 121605, "train_speed(iter/s)": 0.410649 }, { "acc": 0.94354668, "epoch": 3.292719248368668, "grad_norm": 10.375753402709961, "learning_rate": 8.316259596448651e-07, "loss": 0.37871335, "memory(GiB)": 34.88, "step": 121610, "train_speed(iter/s)": 0.41065 }, { "acc": 0.93889771, "epoch": 3.292854628651883, "grad_norm": 8.740267753601074, "learning_rate": 8.313171185198173e-07, "loss": 0.43045897, "memory(GiB)": 34.88, "step": 121615, "train_speed(iter/s)": 0.410651 }, { "acc": 0.95055962, "epoch": 3.2929900089350985, "grad_norm": 3.9915385246276855, "learning_rate": 8.310083296219745e-07, "loss": 0.23977308, "memory(GiB)": 34.88, "step": 121620, "train_speed(iter/s)": 0.410652 }, { "acc": 0.94405861, "epoch": 3.2931253892183143, "grad_norm": 9.969303131103516, "learning_rate": 8.306995929552101e-07, "loss": 0.34942298, "memory(GiB)": 34.88, "step": 121625, "train_speed(iter/s)": 0.410653 }, { "acc": 0.94269047, "epoch": 3.2932607695015297, "grad_norm": 11.635465621948242, "learning_rate": 8.303909085233884e-07, "loss": 0.39121573, "memory(GiB)": 34.88, "step": 121630, "train_speed(iter/s)": 0.410654 }, { "acc": 0.9541111, "epoch": 3.2933961497847455, "grad_norm": 3.0782206058502197, "learning_rate": 8.300822763303792e-07, "loss": 0.24226313, "memory(GiB)": 34.88, "step": 121635, "train_speed(iter/s)": 0.410655 }, { "acc": 0.94506178, "epoch": 3.293531530067961, "grad_norm": 4.7554192543029785, "learning_rate": 8.297736963800438e-07, "loss": 0.27184851, "memory(GiB)": 34.88, "step": 121640, "train_speed(iter/s)": 0.410656 }, { "acc": 0.93967781, "epoch": 3.2936669103511766, "grad_norm": 6.088489532470703, "learning_rate": 8.294651686762531e-07, "loss": 0.41871586, "memory(GiB)": 34.88, "step": 121645, "train_speed(iter/s)": 0.410657 }, { "acc": 0.95045414, "epoch": 3.293802290634392, "grad_norm": 16.179611206054688, "learning_rate": 8.291566932228687e-07, "loss": 0.33042307, "memory(GiB)": 34.88, "step": 121650, "train_speed(iter/s)": 0.410658 }, { "acc": 0.94306221, "epoch": 3.2939376709176074, "grad_norm": 6.482594966888428, "learning_rate": 8.288482700237562e-07, "loss": 0.33808825, "memory(GiB)": 34.88, "step": 121655, "train_speed(iter/s)": 0.410659 }, { "acc": 0.94278202, "epoch": 3.294073051200823, "grad_norm": 9.294642448425293, "learning_rate": 8.285398990827798e-07, "loss": 0.34223814, "memory(GiB)": 34.88, "step": 121660, "train_speed(iter/s)": 0.41066 }, { "acc": 0.95592651, "epoch": 3.2942084314840385, "grad_norm": 6.006424903869629, "learning_rate": 8.282315804038006e-07, "loss": 0.21909959, "memory(GiB)": 34.88, "step": 121665, "train_speed(iter/s)": 0.410661 }, { "acc": 0.93686562, "epoch": 3.2943438117672543, "grad_norm": 8.265957832336426, "learning_rate": 8.279233139906824e-07, "loss": 0.37338319, "memory(GiB)": 34.88, "step": 121670, "train_speed(iter/s)": 0.410661 }, { "acc": 0.95437717, "epoch": 3.2944791920504697, "grad_norm": 4.704987525939941, "learning_rate": 8.276150998472844e-07, "loss": 0.29441798, "memory(GiB)": 34.88, "step": 121675, "train_speed(iter/s)": 0.410662 }, { "acc": 0.93627195, "epoch": 3.2946145723336855, "grad_norm": 16.860105514526367, "learning_rate": 8.273069379774719e-07, "loss": 0.30284481, "memory(GiB)": 34.88, "step": 121680, "train_speed(iter/s)": 0.410663 }, { "acc": 0.95438118, "epoch": 3.294749952616901, "grad_norm": 7.69761323928833, "learning_rate": 8.269988283851016e-07, "loss": 0.28064344, "memory(GiB)": 34.88, "step": 121685, "train_speed(iter/s)": 0.410664 }, { "acc": 0.94085484, "epoch": 3.294885332900116, "grad_norm": 44.842342376708984, "learning_rate": 8.266907710740356e-07, "loss": 0.31348138, "memory(GiB)": 34.88, "step": 121690, "train_speed(iter/s)": 0.410665 }, { "acc": 0.93545599, "epoch": 3.295020713183332, "grad_norm": 9.236903190612793, "learning_rate": 8.263827660481293e-07, "loss": 0.41792903, "memory(GiB)": 34.88, "step": 121695, "train_speed(iter/s)": 0.410666 }, { "acc": 0.96312943, "epoch": 3.2951560934665474, "grad_norm": 2.291450262069702, "learning_rate": 8.260748133112456e-07, "loss": 0.25400376, "memory(GiB)": 34.88, "step": 121700, "train_speed(iter/s)": 0.410667 }, { "acc": 0.95520287, "epoch": 3.295291473749763, "grad_norm": 5.810335636138916, "learning_rate": 8.257669128672403e-07, "loss": 0.31612933, "memory(GiB)": 34.88, "step": 121705, "train_speed(iter/s)": 0.410668 }, { "acc": 0.95024986, "epoch": 3.2954268540329785, "grad_norm": 9.698803901672363, "learning_rate": 8.254590647199681e-07, "loss": 0.34002621, "memory(GiB)": 34.88, "step": 121710, "train_speed(iter/s)": 0.410668 }, { "acc": 0.9297039, "epoch": 3.2955622343161943, "grad_norm": 7.251666069030762, "learning_rate": 8.251512688732895e-07, "loss": 0.43397713, "memory(GiB)": 34.88, "step": 121715, "train_speed(iter/s)": 0.410669 }, { "acc": 0.93763475, "epoch": 3.2956976145994097, "grad_norm": 7.450311660766602, "learning_rate": 8.248435253310576e-07, "loss": 0.39811411, "memory(GiB)": 34.88, "step": 121720, "train_speed(iter/s)": 0.41067 }, { "acc": 0.92847404, "epoch": 3.2958329948826255, "grad_norm": 9.279281616210938, "learning_rate": 8.245358340971289e-07, "loss": 0.44185123, "memory(GiB)": 34.88, "step": 121725, "train_speed(iter/s)": 0.410671 }, { "acc": 0.95089931, "epoch": 3.295968375165841, "grad_norm": 8.790714263916016, "learning_rate": 8.24228195175355e-07, "loss": 0.25761352, "memory(GiB)": 34.88, "step": 121730, "train_speed(iter/s)": 0.410672 }, { "acc": 0.94454632, "epoch": 3.296103755449056, "grad_norm": 3.865680456161499, "learning_rate": 8.239206085695942e-07, "loss": 0.34901214, "memory(GiB)": 34.88, "step": 121735, "train_speed(iter/s)": 0.410673 }, { "acc": 0.93737164, "epoch": 3.296239135732272, "grad_norm": 6.784305095672607, "learning_rate": 8.236130742836973e-07, "loss": 0.36879573, "memory(GiB)": 34.88, "step": 121740, "train_speed(iter/s)": 0.410674 }, { "acc": 0.94925909, "epoch": 3.2963745160154874, "grad_norm": 5.11499547958374, "learning_rate": 8.233055923215141e-07, "loss": 0.31013498, "memory(GiB)": 34.88, "step": 121745, "train_speed(iter/s)": 0.410675 }, { "acc": 0.93439579, "epoch": 3.296509896298703, "grad_norm": 6.4847798347473145, "learning_rate": 8.229981626869015e-07, "loss": 0.41174207, "memory(GiB)": 34.88, "step": 121750, "train_speed(iter/s)": 0.410676 }, { "acc": 0.93796329, "epoch": 3.2966452765819185, "grad_norm": 10.908105850219727, "learning_rate": 8.226907853837069e-07, "loss": 0.38085079, "memory(GiB)": 34.88, "step": 121755, "train_speed(iter/s)": 0.410677 }, { "acc": 0.93760166, "epoch": 3.2967806568651343, "grad_norm": 60.451751708984375, "learning_rate": 8.223834604157834e-07, "loss": 0.39914899, "memory(GiB)": 34.88, "step": 121760, "train_speed(iter/s)": 0.410678 }, { "acc": 0.93739777, "epoch": 3.2969160371483497, "grad_norm": 5.602565765380859, "learning_rate": 8.220761877869771e-07, "loss": 0.38645434, "memory(GiB)": 34.88, "step": 121765, "train_speed(iter/s)": 0.410679 }, { "acc": 0.96819372, "epoch": 3.2970514174315655, "grad_norm": 5.6728925704956055, "learning_rate": 8.217689675011423e-07, "loss": 0.20580478, "memory(GiB)": 34.88, "step": 121770, "train_speed(iter/s)": 0.410679 }, { "acc": 0.94347095, "epoch": 3.297186797714781, "grad_norm": 6.230251312255859, "learning_rate": 8.214617995621233e-07, "loss": 0.29491622, "memory(GiB)": 34.88, "step": 121775, "train_speed(iter/s)": 0.41068 }, { "acc": 0.94179668, "epoch": 3.297322177997996, "grad_norm": 15.145971298217773, "learning_rate": 8.2115468397377e-07, "loss": 0.38193655, "memory(GiB)": 34.88, "step": 121780, "train_speed(iter/s)": 0.410681 }, { "acc": 0.93486519, "epoch": 3.297457558281212, "grad_norm": 8.976004600524902, "learning_rate": 8.208476207399301e-07, "loss": 0.44532032, "memory(GiB)": 34.88, "step": 121785, "train_speed(iter/s)": 0.410682 }, { "acc": 0.95474052, "epoch": 3.2975929385644274, "grad_norm": 7.754733085632324, "learning_rate": 8.205406098644478e-07, "loss": 0.2563385, "memory(GiB)": 34.88, "step": 121790, "train_speed(iter/s)": 0.410683 }, { "acc": 0.94588547, "epoch": 3.297728318847643, "grad_norm": 1.804890513420105, "learning_rate": 8.20233651351172e-07, "loss": 0.34193227, "memory(GiB)": 34.88, "step": 121795, "train_speed(iter/s)": 0.410684 }, { "acc": 0.97016621, "epoch": 3.2978636991308585, "grad_norm": 2.686319351196289, "learning_rate": 8.19926745203944e-07, "loss": 0.17776431, "memory(GiB)": 34.88, "step": 121800, "train_speed(iter/s)": 0.410685 }, { "acc": 0.93016891, "epoch": 3.2979990794140743, "grad_norm": 5.615833759307861, "learning_rate": 8.196198914266129e-07, "loss": 0.5220232, "memory(GiB)": 34.88, "step": 121805, "train_speed(iter/s)": 0.410686 }, { "acc": 0.936689, "epoch": 3.2981344596972897, "grad_norm": 11.551695823669434, "learning_rate": 8.193130900230199e-07, "loss": 0.32883711, "memory(GiB)": 34.88, "step": 121810, "train_speed(iter/s)": 0.410687 }, { "acc": 0.96055946, "epoch": 3.298269839980505, "grad_norm": 4.503632545471191, "learning_rate": 8.190063409970085e-07, "loss": 0.20784009, "memory(GiB)": 34.88, "step": 121815, "train_speed(iter/s)": 0.410687 }, { "acc": 0.94459629, "epoch": 3.298405220263721, "grad_norm": 8.183382034301758, "learning_rate": 8.186996443524223e-07, "loss": 0.32662191, "memory(GiB)": 34.88, "step": 121820, "train_speed(iter/s)": 0.410688 }, { "acc": 0.93682823, "epoch": 3.298540600546936, "grad_norm": 16.066017150878906, "learning_rate": 8.183930000931034e-07, "loss": 0.32897687, "memory(GiB)": 34.88, "step": 121825, "train_speed(iter/s)": 0.410689 }, { "acc": 0.95465908, "epoch": 3.298675980830152, "grad_norm": 2.900820732116699, "learning_rate": 8.180864082228929e-07, "loss": 0.20619936, "memory(GiB)": 34.88, "step": 121830, "train_speed(iter/s)": 0.41069 }, { "acc": 0.93915911, "epoch": 3.2988113611133674, "grad_norm": 6.092811107635498, "learning_rate": 8.177798687456289e-07, "loss": 0.41839085, "memory(GiB)": 34.88, "step": 121835, "train_speed(iter/s)": 0.410691 }, { "acc": 0.95051708, "epoch": 3.298946741396583, "grad_norm": 6.583409786224365, "learning_rate": 8.174733816651556e-07, "loss": 0.28418384, "memory(GiB)": 34.88, "step": 121840, "train_speed(iter/s)": 0.410692 }, { "acc": 0.93482523, "epoch": 3.2990821216797985, "grad_norm": 9.85170841217041, "learning_rate": 8.171669469853091e-07, "loss": 0.3884593, "memory(GiB)": 34.88, "step": 121845, "train_speed(iter/s)": 0.410693 }, { "acc": 0.94639444, "epoch": 3.299217501963014, "grad_norm": 4.598418235778809, "learning_rate": 8.16860564709929e-07, "loss": 0.33705981, "memory(GiB)": 34.88, "step": 121850, "train_speed(iter/s)": 0.410694 }, { "acc": 0.93891649, "epoch": 3.2993528822462297, "grad_norm": 7.974148750305176, "learning_rate": 8.165542348428543e-07, "loss": 0.33198943, "memory(GiB)": 34.88, "step": 121855, "train_speed(iter/s)": 0.410695 }, { "acc": 0.93635578, "epoch": 3.299488262529445, "grad_norm": 34.613216400146484, "learning_rate": 8.162479573879227e-07, "loss": 0.43853579, "memory(GiB)": 34.88, "step": 121860, "train_speed(iter/s)": 0.410696 }, { "acc": 0.95783863, "epoch": 3.299623642812661, "grad_norm": 2.474116802215576, "learning_rate": 8.159417323489685e-07, "loss": 0.23159721, "memory(GiB)": 34.88, "step": 121865, "train_speed(iter/s)": 0.410697 }, { "acc": 0.94399509, "epoch": 3.299759023095876, "grad_norm": 16.48855972290039, "learning_rate": 8.156355597298294e-07, "loss": 0.38020535, "memory(GiB)": 34.88, "step": 121870, "train_speed(iter/s)": 0.410697 }, { "acc": 0.92758865, "epoch": 3.299894403379092, "grad_norm": 6.59692907333374, "learning_rate": 8.153294395343421e-07, "loss": 0.46442003, "memory(GiB)": 34.88, "step": 121875, "train_speed(iter/s)": 0.410698 }, { "acc": 0.94680443, "epoch": 3.3000297836623074, "grad_norm": 5.431564807891846, "learning_rate": 8.150233717663387e-07, "loss": 0.35295143, "memory(GiB)": 34.88, "step": 121880, "train_speed(iter/s)": 0.410699 }, { "acc": 0.94988003, "epoch": 3.3001651639455227, "grad_norm": 14.90048599243164, "learning_rate": 8.147173564296543e-07, "loss": 0.295522, "memory(GiB)": 34.88, "step": 121885, "train_speed(iter/s)": 0.4107 }, { "acc": 0.95112915, "epoch": 3.3003005442287385, "grad_norm": 4.665185451507568, "learning_rate": 8.14411393528123e-07, "loss": 0.26111412, "memory(GiB)": 34.88, "step": 121890, "train_speed(iter/s)": 0.410701 }, { "acc": 0.93865814, "epoch": 3.300435924511954, "grad_norm": 13.431607246398926, "learning_rate": 8.141054830655777e-07, "loss": 0.37377274, "memory(GiB)": 34.88, "step": 121895, "train_speed(iter/s)": 0.410702 }, { "acc": 0.94004765, "epoch": 3.3005713047951697, "grad_norm": 5.626223087310791, "learning_rate": 8.137996250458492e-07, "loss": 0.33791561, "memory(GiB)": 34.88, "step": 121900, "train_speed(iter/s)": 0.410703 }, { "acc": 0.9343811, "epoch": 3.300706685078385, "grad_norm": 8.093403816223145, "learning_rate": 8.134938194727695e-07, "loss": 0.44565549, "memory(GiB)": 34.88, "step": 121905, "train_speed(iter/s)": 0.410704 }, { "acc": 0.94395771, "epoch": 3.300842065361601, "grad_norm": 3.0649666786193848, "learning_rate": 8.131880663501711e-07, "loss": 0.33049273, "memory(GiB)": 34.88, "step": 121910, "train_speed(iter/s)": 0.410704 }, { "acc": 0.93522987, "epoch": 3.300977445644816, "grad_norm": 7.848002910614014, "learning_rate": 8.128823656818813e-07, "loss": 0.33415928, "memory(GiB)": 34.88, "step": 121915, "train_speed(iter/s)": 0.410705 }, { "acc": 0.9367507, "epoch": 3.301112825928032, "grad_norm": 6.988274574279785, "learning_rate": 8.12576717471731e-07, "loss": 0.3689544, "memory(GiB)": 34.88, "step": 121920, "train_speed(iter/s)": 0.410706 }, { "acc": 0.96346035, "epoch": 3.3012482062112474, "grad_norm": 2.7593111991882324, "learning_rate": 8.122711217235495e-07, "loss": 0.22270973, "memory(GiB)": 34.88, "step": 121925, "train_speed(iter/s)": 0.410707 }, { "acc": 0.94550743, "epoch": 3.301383586494463, "grad_norm": 22.203575134277344, "learning_rate": 8.119655784411651e-07, "loss": 0.36565063, "memory(GiB)": 34.88, "step": 121930, "train_speed(iter/s)": 0.410708 }, { "acc": 0.95883341, "epoch": 3.3015189667776785, "grad_norm": 3.630613327026367, "learning_rate": 8.116600876284044e-07, "loss": 0.25698478, "memory(GiB)": 34.88, "step": 121935, "train_speed(iter/s)": 0.410709 }, { "acc": 0.92851858, "epoch": 3.301654347060894, "grad_norm": 6.850139617919922, "learning_rate": 8.113546492890937e-07, "loss": 0.37940893, "memory(GiB)": 34.88, "step": 121940, "train_speed(iter/s)": 0.41071 }, { "acc": 0.9577961, "epoch": 3.3017897273441097, "grad_norm": 3.3920843601226807, "learning_rate": 8.11049263427061e-07, "loss": 0.26124675, "memory(GiB)": 34.88, "step": 121945, "train_speed(iter/s)": 0.41071 }, { "acc": 0.94605598, "epoch": 3.301925107627325, "grad_norm": 10.024164199829102, "learning_rate": 8.107439300461321e-07, "loss": 0.31857154, "memory(GiB)": 34.88, "step": 121950, "train_speed(iter/s)": 0.410711 }, { "acc": 0.94873772, "epoch": 3.302060487910541, "grad_norm": 17.330307006835938, "learning_rate": 8.104386491501293e-07, "loss": 0.29999924, "memory(GiB)": 34.88, "step": 121955, "train_speed(iter/s)": 0.410712 }, { "acc": 0.93390884, "epoch": 3.302195868193756, "grad_norm": 3.9347596168518066, "learning_rate": 8.101334207428782e-07, "loss": 0.40931234, "memory(GiB)": 34.88, "step": 121960, "train_speed(iter/s)": 0.410713 }, { "acc": 0.95256929, "epoch": 3.302331248476972, "grad_norm": 5.711008071899414, "learning_rate": 8.098282448282043e-07, "loss": 0.25612998, "memory(GiB)": 34.88, "step": 121965, "train_speed(iter/s)": 0.410714 }, { "acc": 0.9408535, "epoch": 3.3024666287601874, "grad_norm": 4.688965320587158, "learning_rate": 8.095231214099274e-07, "loss": 0.32050824, "memory(GiB)": 34.88, "step": 121970, "train_speed(iter/s)": 0.410715 }, { "acc": 0.94020758, "epoch": 3.3026020090434027, "grad_norm": 11.423173904418945, "learning_rate": 8.092180504918711e-07, "loss": 0.37944496, "memory(GiB)": 34.88, "step": 121975, "train_speed(iter/s)": 0.410716 }, { "acc": 0.94785147, "epoch": 3.3027373893266185, "grad_norm": 6.185787677764893, "learning_rate": 8.08913032077857e-07, "loss": 0.34732888, "memory(GiB)": 34.88, "step": 121980, "train_speed(iter/s)": 0.410717 }, { "acc": 0.94293594, "epoch": 3.302872769609834, "grad_norm": 5.840883731842041, "learning_rate": 8.086080661717076e-07, "loss": 0.35050237, "memory(GiB)": 34.88, "step": 121985, "train_speed(iter/s)": 0.410718 }, { "acc": 0.94673643, "epoch": 3.3030081498930497, "grad_norm": 6.453812599182129, "learning_rate": 8.083031527772401e-07, "loss": 0.3599683, "memory(GiB)": 34.88, "step": 121990, "train_speed(iter/s)": 0.410718 }, { "acc": 0.93981133, "epoch": 3.303143530176265, "grad_norm": 6.985726833343506, "learning_rate": 8.079982918982761e-07, "loss": 0.31968958, "memory(GiB)": 34.88, "step": 121995, "train_speed(iter/s)": 0.410719 }, { "acc": 0.93834114, "epoch": 3.303278910459481, "grad_norm": 7.47872257232666, "learning_rate": 8.076934835386353e-07, "loss": 0.32246161, "memory(GiB)": 34.88, "step": 122000, "train_speed(iter/s)": 0.41072 }, { "acc": 0.95029354, "epoch": 3.303414290742696, "grad_norm": 3.103128433227539, "learning_rate": 8.073887277021341e-07, "loss": 0.31992133, "memory(GiB)": 34.88, "step": 122005, "train_speed(iter/s)": 0.410721 }, { "acc": 0.94324188, "epoch": 3.3035496710259116, "grad_norm": 7.849076747894287, "learning_rate": 8.070840243925903e-07, "loss": 0.32871654, "memory(GiB)": 34.88, "step": 122010, "train_speed(iter/s)": 0.410722 }, { "acc": 0.95680428, "epoch": 3.3036850513091274, "grad_norm": 7.059226989746094, "learning_rate": 8.067793736138223e-07, "loss": 0.23886199, "memory(GiB)": 34.88, "step": 122015, "train_speed(iter/s)": 0.410723 }, { "acc": 0.94595709, "epoch": 3.3038204315923427, "grad_norm": 18.058284759521484, "learning_rate": 8.064747753696467e-07, "loss": 0.30408535, "memory(GiB)": 34.88, "step": 122020, "train_speed(iter/s)": 0.410724 }, { "acc": 0.94413853, "epoch": 3.3039558118755585, "grad_norm": 6.627941608428955, "learning_rate": 8.061702296638773e-07, "loss": 0.28971701, "memory(GiB)": 34.88, "step": 122025, "train_speed(iter/s)": 0.410725 }, { "acc": 0.95157909, "epoch": 3.304091192158774, "grad_norm": 3.5443718433380127, "learning_rate": 8.058657365003302e-07, "loss": 0.26044986, "memory(GiB)": 34.88, "step": 122030, "train_speed(iter/s)": 0.410726 }, { "acc": 0.93244877, "epoch": 3.3042265724419897, "grad_norm": 6.037748336791992, "learning_rate": 8.055612958828205e-07, "loss": 0.52739029, "memory(GiB)": 34.88, "step": 122035, "train_speed(iter/s)": 0.410727 }, { "acc": 0.94081049, "epoch": 3.304361952725205, "grad_norm": 9.22761058807373, "learning_rate": 8.0525690781516e-07, "loss": 0.36004019, "memory(GiB)": 34.88, "step": 122040, "train_speed(iter/s)": 0.410728 }, { "acc": 0.94405403, "epoch": 3.3044973330084204, "grad_norm": 9.952710151672363, "learning_rate": 8.049525723011627e-07, "loss": 0.32207985, "memory(GiB)": 34.88, "step": 122045, "train_speed(iter/s)": 0.410729 }, { "acc": 0.94803305, "epoch": 3.304632713291636, "grad_norm": 8.09335994720459, "learning_rate": 8.046482893446415e-07, "loss": 0.32850564, "memory(GiB)": 34.88, "step": 122050, "train_speed(iter/s)": 0.41073 }, { "acc": 0.94013481, "epoch": 3.3047680935748516, "grad_norm": 10.00853443145752, "learning_rate": 8.043440589494092e-07, "loss": 0.34092338, "memory(GiB)": 34.88, "step": 122055, "train_speed(iter/s)": 0.41073 }, { "acc": 0.93205938, "epoch": 3.3049034738580674, "grad_norm": 5.968838214874268, "learning_rate": 8.040398811192736e-07, "loss": 0.43582106, "memory(GiB)": 34.88, "step": 122060, "train_speed(iter/s)": 0.410731 }, { "acc": 0.9430109, "epoch": 3.3050388541412827, "grad_norm": 8.972124099731445, "learning_rate": 8.037357558580476e-07, "loss": 0.40023241, "memory(GiB)": 34.88, "step": 122065, "train_speed(iter/s)": 0.410732 }, { "acc": 0.93595257, "epoch": 3.3051742344244985, "grad_norm": 8.176650047302246, "learning_rate": 8.034316831695405e-07, "loss": 0.40383101, "memory(GiB)": 34.88, "step": 122070, "train_speed(iter/s)": 0.410733 }, { "acc": 0.94753914, "epoch": 3.305309614707714, "grad_norm": 6.7609124183654785, "learning_rate": 8.031276630575617e-07, "loss": 0.37874515, "memory(GiB)": 34.88, "step": 122075, "train_speed(iter/s)": 0.410734 }, { "acc": 0.95074358, "epoch": 3.3054449949909297, "grad_norm": 8.819318771362305, "learning_rate": 8.028236955259186e-07, "loss": 0.28990643, "memory(GiB)": 34.88, "step": 122080, "train_speed(iter/s)": 0.410735 }, { "acc": 0.96894579, "epoch": 3.305580375274145, "grad_norm": 10.584217071533203, "learning_rate": 8.025197805784194e-07, "loss": 0.15886886, "memory(GiB)": 34.88, "step": 122085, "train_speed(iter/s)": 0.410736 }, { "acc": 0.93611917, "epoch": 3.305715755557361, "grad_norm": 5.464033603668213, "learning_rate": 8.02215918218873e-07, "loss": 0.38655813, "memory(GiB)": 34.88, "step": 122090, "train_speed(iter/s)": 0.410737 }, { "acc": 0.94055233, "epoch": 3.305851135840576, "grad_norm": 5.061710357666016, "learning_rate": 8.019121084510829e-07, "loss": 0.35398178, "memory(GiB)": 34.88, "step": 122095, "train_speed(iter/s)": 0.410738 }, { "acc": 0.9436161, "epoch": 3.3059865161237916, "grad_norm": 7.213939666748047, "learning_rate": 8.016083512788567e-07, "loss": 0.39865274, "memory(GiB)": 34.88, "step": 122100, "train_speed(iter/s)": 0.410739 }, { "acc": 0.95006599, "epoch": 3.3061218964070074, "grad_norm": 3.142033100128174, "learning_rate": 8.013046467059988e-07, "loss": 0.30562596, "memory(GiB)": 34.88, "step": 122105, "train_speed(iter/s)": 0.410739 }, { "acc": 0.93171387, "epoch": 3.3062572766902227, "grad_norm": 7.313014984130859, "learning_rate": 8.010009947363154e-07, "loss": 0.41408701, "memory(GiB)": 34.88, "step": 122110, "train_speed(iter/s)": 0.41074 }, { "acc": 0.95371838, "epoch": 3.3063926569734385, "grad_norm": 6.711677551269531, "learning_rate": 8.006973953736072e-07, "loss": 0.29977334, "memory(GiB)": 34.88, "step": 122115, "train_speed(iter/s)": 0.410741 }, { "acc": 0.94457016, "epoch": 3.306528037256654, "grad_norm": 7.372339725494385, "learning_rate": 8.00393848621681e-07, "loss": 0.37985239, "memory(GiB)": 34.88, "step": 122120, "train_speed(iter/s)": 0.410742 }, { "acc": 0.94565573, "epoch": 3.3066634175398697, "grad_norm": 7.336958885192871, "learning_rate": 8.000903544843375e-07, "loss": 0.40400257, "memory(GiB)": 34.88, "step": 122125, "train_speed(iter/s)": 0.410743 }, { "acc": 0.95608644, "epoch": 3.306798797823085, "grad_norm": 7.8775177001953125, "learning_rate": 7.997869129653775e-07, "loss": 0.25320296, "memory(GiB)": 34.88, "step": 122130, "train_speed(iter/s)": 0.410744 }, { "acc": 0.947752, "epoch": 3.3069341781063004, "grad_norm": 8.304543495178223, "learning_rate": 7.994835240686033e-07, "loss": 0.30109787, "memory(GiB)": 34.88, "step": 122135, "train_speed(iter/s)": 0.410745 }, { "acc": 0.93726606, "epoch": 3.307069558389516, "grad_norm": 10.18803596496582, "learning_rate": 7.991801877978155e-07, "loss": 0.40581923, "memory(GiB)": 34.88, "step": 122140, "train_speed(iter/s)": 0.410746 }, { "acc": 0.95467262, "epoch": 3.3072049386727316, "grad_norm": 5.914940357208252, "learning_rate": 7.988769041568152e-07, "loss": 0.21816912, "memory(GiB)": 34.88, "step": 122145, "train_speed(iter/s)": 0.410747 }, { "acc": 0.9443121, "epoch": 3.3073403189559474, "grad_norm": 23.502424240112305, "learning_rate": 7.985736731493978e-07, "loss": 0.29443755, "memory(GiB)": 34.88, "step": 122150, "train_speed(iter/s)": 0.410747 }, { "acc": 0.95266247, "epoch": 3.3074756992391627, "grad_norm": 3.564727306365967, "learning_rate": 7.98270494779367e-07, "loss": 0.23581319, "memory(GiB)": 34.88, "step": 122155, "train_speed(iter/s)": 0.410748 }, { "acc": 0.93072519, "epoch": 3.3076110795223785, "grad_norm": 12.063282012939453, "learning_rate": 7.979673690505176e-07, "loss": 0.47977715, "memory(GiB)": 34.88, "step": 122160, "train_speed(iter/s)": 0.410749 }, { "acc": 0.92566299, "epoch": 3.307746459805594, "grad_norm": 15.837834358215332, "learning_rate": 7.976642959666468e-07, "loss": 0.4247097, "memory(GiB)": 34.88, "step": 122165, "train_speed(iter/s)": 0.41075 }, { "acc": 0.95828915, "epoch": 3.3078818400888093, "grad_norm": 6.408899307250977, "learning_rate": 7.973612755315517e-07, "loss": 0.29095988, "memory(GiB)": 34.88, "step": 122170, "train_speed(iter/s)": 0.410751 }, { "acc": 0.94618874, "epoch": 3.308017220372025, "grad_norm": 7.352838516235352, "learning_rate": 7.970583077490278e-07, "loss": 0.35057282, "memory(GiB)": 34.88, "step": 122175, "train_speed(iter/s)": 0.410752 }, { "acc": 0.94786644, "epoch": 3.3081526006552404, "grad_norm": 6.958817005157471, "learning_rate": 7.967553926228721e-07, "loss": 0.28043523, "memory(GiB)": 34.88, "step": 122180, "train_speed(iter/s)": 0.410753 }, { "acc": 0.94168663, "epoch": 3.308287980938456, "grad_norm": 20.0656681060791, "learning_rate": 7.96452530156876e-07, "loss": 0.33352559, "memory(GiB)": 34.88, "step": 122185, "train_speed(iter/s)": 0.410754 }, { "acc": 0.94022074, "epoch": 3.3084233612216716, "grad_norm": 4.248373508453369, "learning_rate": 7.961497203548379e-07, "loss": 0.3210726, "memory(GiB)": 34.88, "step": 122190, "train_speed(iter/s)": 0.410755 }, { "acc": 0.95267181, "epoch": 3.3085587415048874, "grad_norm": 9.231979370117188, "learning_rate": 7.958469632205469e-07, "loss": 0.27036104, "memory(GiB)": 34.88, "step": 122195, "train_speed(iter/s)": 0.410755 }, { "acc": 0.94403429, "epoch": 3.3086941217881027, "grad_norm": 7.170443058013916, "learning_rate": 7.955442587577988e-07, "loss": 0.31568694, "memory(GiB)": 34.88, "step": 122200, "train_speed(iter/s)": 0.410756 }, { "acc": 0.94017887, "epoch": 3.308829502071318, "grad_norm": 12.531951904296875, "learning_rate": 7.952416069703826e-07, "loss": 0.34927506, "memory(GiB)": 34.88, "step": 122205, "train_speed(iter/s)": 0.410757 }, { "acc": 0.94644032, "epoch": 3.308964882354534, "grad_norm": 10.349675178527832, "learning_rate": 7.949390078620915e-07, "loss": 0.28456869, "memory(GiB)": 34.88, "step": 122210, "train_speed(iter/s)": 0.410758 }, { "acc": 0.94202061, "epoch": 3.3091002626377493, "grad_norm": 5.305726528167725, "learning_rate": 7.946364614367165e-07, "loss": 0.31743941, "memory(GiB)": 34.88, "step": 122215, "train_speed(iter/s)": 0.410759 }, { "acc": 0.94284401, "epoch": 3.309235642920965, "grad_norm": 6.553228855133057, "learning_rate": 7.943339676980447e-07, "loss": 0.32274384, "memory(GiB)": 34.88, "step": 122220, "train_speed(iter/s)": 0.41076 }, { "acc": 0.9508234, "epoch": 3.3093710232041804, "grad_norm": 12.053947448730469, "learning_rate": 7.940315266498699e-07, "loss": 0.3222321, "memory(GiB)": 34.88, "step": 122225, "train_speed(iter/s)": 0.410761 }, { "acc": 0.93282166, "epoch": 3.309506403487396, "grad_norm": 9.806325912475586, "learning_rate": 7.937291382959772e-07, "loss": 0.40474586, "memory(GiB)": 34.88, "step": 122230, "train_speed(iter/s)": 0.410762 }, { "acc": 0.95875921, "epoch": 3.3096417837706116, "grad_norm": 5.0183939933776855, "learning_rate": 7.934268026401569e-07, "loss": 0.21900907, "memory(GiB)": 34.88, "step": 122235, "train_speed(iter/s)": 0.410763 }, { "acc": 0.93678446, "epoch": 3.3097771640538274, "grad_norm": 17.361305236816406, "learning_rate": 7.931245196861929e-07, "loss": 0.37957146, "memory(GiB)": 34.88, "step": 122240, "train_speed(iter/s)": 0.410764 }, { "acc": 0.9297657, "epoch": 3.3099125443370427, "grad_norm": 13.023848533630371, "learning_rate": 7.928222894378769e-07, "loss": 0.37214859, "memory(GiB)": 34.88, "step": 122245, "train_speed(iter/s)": 0.410765 }, { "acc": 0.94237919, "epoch": 3.3100479246202585, "grad_norm": 5.479493618011475, "learning_rate": 7.925201118989918e-07, "loss": 0.3590847, "memory(GiB)": 34.88, "step": 122250, "train_speed(iter/s)": 0.410765 }, { "acc": 0.95416794, "epoch": 3.310183304903474, "grad_norm": 22.981313705444336, "learning_rate": 7.922179870733229e-07, "loss": 0.2929791, "memory(GiB)": 34.88, "step": 122255, "train_speed(iter/s)": 0.410766 }, { "acc": 0.95005512, "epoch": 3.3103186851866893, "grad_norm": 4.5218329429626465, "learning_rate": 7.91915914964655e-07, "loss": 0.27994769, "memory(GiB)": 34.88, "step": 122260, "train_speed(iter/s)": 0.410767 }, { "acc": 0.95208845, "epoch": 3.310454065469905, "grad_norm": 14.421073913574219, "learning_rate": 7.91613895576773e-07, "loss": 0.2954133, "memory(GiB)": 34.88, "step": 122265, "train_speed(iter/s)": 0.410768 }, { "acc": 0.94520779, "epoch": 3.3105894457531204, "grad_norm": 8.291635513305664, "learning_rate": 7.913119289134613e-07, "loss": 0.34529505, "memory(GiB)": 34.88, "step": 122270, "train_speed(iter/s)": 0.410769 }, { "acc": 0.95730181, "epoch": 3.310724826036336, "grad_norm": 11.663647651672363, "learning_rate": 7.910100149784981e-07, "loss": 0.23857067, "memory(GiB)": 34.88, "step": 122275, "train_speed(iter/s)": 0.41077 }, { "acc": 0.93671398, "epoch": 3.3108602063195516, "grad_norm": 5.662041187286377, "learning_rate": 7.907081537756711e-07, "loss": 0.32392805, "memory(GiB)": 34.88, "step": 122280, "train_speed(iter/s)": 0.410771 }, { "acc": 0.9431942, "epoch": 3.3109955866027674, "grad_norm": 12.966450691223145, "learning_rate": 7.904063453087598e-07, "loss": 0.35741677, "memory(GiB)": 34.88, "step": 122285, "train_speed(iter/s)": 0.410772 }, { "acc": 0.95002937, "epoch": 3.3111309668859827, "grad_norm": 4.1052470207214355, "learning_rate": 7.901045895815431e-07, "loss": 0.27886884, "memory(GiB)": 34.88, "step": 122290, "train_speed(iter/s)": 0.410773 }, { "acc": 0.95511675, "epoch": 3.311266347169198, "grad_norm": 5.621451377868652, "learning_rate": 7.898028865978016e-07, "loss": 0.24989681, "memory(GiB)": 34.88, "step": 122295, "train_speed(iter/s)": 0.410774 }, { "acc": 0.94959259, "epoch": 3.311401727452414, "grad_norm": 4.014608860015869, "learning_rate": 7.895012363613157e-07, "loss": 0.31000116, "memory(GiB)": 34.88, "step": 122300, "train_speed(iter/s)": 0.410775 }, { "acc": 0.93977413, "epoch": 3.3115371077356293, "grad_norm": 7.659118175506592, "learning_rate": 7.891996388758658e-07, "loss": 0.33812394, "memory(GiB)": 34.88, "step": 122305, "train_speed(iter/s)": 0.410776 }, { "acc": 0.94216461, "epoch": 3.311672488018845, "grad_norm": 3.490694999694824, "learning_rate": 7.888980941452248e-07, "loss": 0.31621146, "memory(GiB)": 34.88, "step": 122310, "train_speed(iter/s)": 0.410777 }, { "acc": 0.93585758, "epoch": 3.3118078683020604, "grad_norm": 6.503374099731445, "learning_rate": 7.885966021731765e-07, "loss": 0.43443756, "memory(GiB)": 34.88, "step": 122315, "train_speed(iter/s)": 0.410778 }, { "acc": 0.95448437, "epoch": 3.3119432485852762, "grad_norm": 7.444708347320557, "learning_rate": 7.882951629634933e-07, "loss": 0.26563101, "memory(GiB)": 34.88, "step": 122320, "train_speed(iter/s)": 0.410778 }, { "acc": 0.95415926, "epoch": 3.3120786288684916, "grad_norm": 6.251855373382568, "learning_rate": 7.879937765199534e-07, "loss": 0.28296895, "memory(GiB)": 34.88, "step": 122325, "train_speed(iter/s)": 0.410779 }, { "acc": 0.9420578, "epoch": 3.312214009151707, "grad_norm": 9.962882041931152, "learning_rate": 7.876924428463309e-07, "loss": 0.32153988, "memory(GiB)": 34.88, "step": 122330, "train_speed(iter/s)": 0.41078 }, { "acc": 0.9511198, "epoch": 3.3123493894349227, "grad_norm": 4.944418907165527, "learning_rate": 7.87391161946401e-07, "loss": 0.27519836, "memory(GiB)": 34.88, "step": 122335, "train_speed(iter/s)": 0.410781 }, { "acc": 0.95322857, "epoch": 3.312484769718138, "grad_norm": 6.7810540199279785, "learning_rate": 7.870899338239389e-07, "loss": 0.25771277, "memory(GiB)": 34.88, "step": 122340, "train_speed(iter/s)": 0.410782 }, { "acc": 0.95086632, "epoch": 3.312620150001354, "grad_norm": 5.010391712188721, "learning_rate": 7.867887584827155e-07, "loss": 0.329883, "memory(GiB)": 34.88, "step": 122345, "train_speed(iter/s)": 0.410782 }, { "acc": 0.94372492, "epoch": 3.3127555302845693, "grad_norm": 4.759700298309326, "learning_rate": 7.864876359265068e-07, "loss": 0.31895423, "memory(GiB)": 34.88, "step": 122350, "train_speed(iter/s)": 0.410783 }, { "acc": 0.91767502, "epoch": 3.312890910567785, "grad_norm": 16.976125717163086, "learning_rate": 7.86186566159083e-07, "loss": 0.51657515, "memory(GiB)": 34.88, "step": 122355, "train_speed(iter/s)": 0.410784 }, { "acc": 0.94906349, "epoch": 3.3130262908510004, "grad_norm": 3.623750686645508, "learning_rate": 7.85885549184217e-07, "loss": 0.29086492, "memory(GiB)": 34.88, "step": 122360, "train_speed(iter/s)": 0.410785 }, { "acc": 0.94786491, "epoch": 3.313161671134216, "grad_norm": 6.006185054779053, "learning_rate": 7.855845850056767e-07, "loss": 0.26695824, "memory(GiB)": 34.88, "step": 122365, "train_speed(iter/s)": 0.410786 }, { "acc": 0.95060568, "epoch": 3.3132970514174316, "grad_norm": 8.479498863220215, "learning_rate": 7.852836736272361e-07, "loss": 0.25071115, "memory(GiB)": 34.88, "step": 122370, "train_speed(iter/s)": 0.410787 }, { "acc": 0.947966, "epoch": 3.313432431700647, "grad_norm": 12.289166450500488, "learning_rate": 7.849828150526631e-07, "loss": 0.26458511, "memory(GiB)": 34.88, "step": 122375, "train_speed(iter/s)": 0.410788 }, { "acc": 0.95039339, "epoch": 3.3135678119838627, "grad_norm": 3.854806423187256, "learning_rate": 7.846820092857241e-07, "loss": 0.34913454, "memory(GiB)": 34.88, "step": 122380, "train_speed(iter/s)": 0.410789 }, { "acc": 0.94500809, "epoch": 3.313703192267078, "grad_norm": 9.56367301940918, "learning_rate": 7.843812563301916e-07, "loss": 0.30209363, "memory(GiB)": 34.88, "step": 122385, "train_speed(iter/s)": 0.41079 }, { "acc": 0.94246912, "epoch": 3.313838572550294, "grad_norm": 6.125353813171387, "learning_rate": 7.840805561898302e-07, "loss": 0.33579512, "memory(GiB)": 34.88, "step": 122390, "train_speed(iter/s)": 0.410791 }, { "acc": 0.94671841, "epoch": 3.3139739528335093, "grad_norm": 6.836028575897217, "learning_rate": 7.837799088684083e-07, "loss": 0.33528538, "memory(GiB)": 34.88, "step": 122395, "train_speed(iter/s)": 0.410792 }, { "acc": 0.95277042, "epoch": 3.314109333116725, "grad_norm": 15.634531021118164, "learning_rate": 7.834793143696892e-07, "loss": 0.24799404, "memory(GiB)": 34.88, "step": 122400, "train_speed(iter/s)": 0.410793 }, { "acc": 0.94549732, "epoch": 3.3142447133999404, "grad_norm": 10.199795722961426, "learning_rate": 7.831787726974431e-07, "loss": 0.32871804, "memory(GiB)": 34.88, "step": 122405, "train_speed(iter/s)": 0.410793 }, { "acc": 0.9438179, "epoch": 3.3143800936831562, "grad_norm": 4.023080348968506, "learning_rate": 7.828782838554309e-07, "loss": 0.38413916, "memory(GiB)": 34.88, "step": 122410, "train_speed(iter/s)": 0.410794 }, { "acc": 0.94670048, "epoch": 3.3145154739663716, "grad_norm": 3.5197715759277344, "learning_rate": 7.82577847847419e-07, "loss": 0.36596301, "memory(GiB)": 34.88, "step": 122415, "train_speed(iter/s)": 0.410795 }, { "acc": 0.92997322, "epoch": 3.314650854249587, "grad_norm": 6.734749794006348, "learning_rate": 7.822774646771711e-07, "loss": 0.41220732, "memory(GiB)": 34.88, "step": 122420, "train_speed(iter/s)": 0.410796 }, { "acc": 0.95036697, "epoch": 3.3147862345328027, "grad_norm": 4.256978988647461, "learning_rate": 7.819771343484475e-07, "loss": 0.26141996, "memory(GiB)": 34.88, "step": 122425, "train_speed(iter/s)": 0.410797 }, { "acc": 0.93985519, "epoch": 3.314921614816018, "grad_norm": 8.886109352111816, "learning_rate": 7.81676856865014e-07, "loss": 0.36720376, "memory(GiB)": 34.88, "step": 122430, "train_speed(iter/s)": 0.410798 }, { "acc": 0.93486452, "epoch": 3.315056995099234, "grad_norm": 6.044561386108398, "learning_rate": 7.813766322306275e-07, "loss": 0.40072293, "memory(GiB)": 34.88, "step": 122435, "train_speed(iter/s)": 0.410799 }, { "acc": 0.93257122, "epoch": 3.3151923753824493, "grad_norm": 3.2752034664154053, "learning_rate": 7.810764604490549e-07, "loss": 0.40253563, "memory(GiB)": 34.88, "step": 122440, "train_speed(iter/s)": 0.4108 }, { "acc": 0.94755859, "epoch": 3.315327755665665, "grad_norm": 6.0078606605529785, "learning_rate": 7.807763415240514e-07, "loss": 0.28009932, "memory(GiB)": 34.88, "step": 122445, "train_speed(iter/s)": 0.410801 }, { "acc": 0.93608093, "epoch": 3.3154631359488804, "grad_norm": 29.928503036499023, "learning_rate": 7.804762754593784e-07, "loss": 0.38962111, "memory(GiB)": 34.88, "step": 122450, "train_speed(iter/s)": 0.410801 }, { "acc": 0.95602436, "epoch": 3.315598516232096, "grad_norm": 5.2455902099609375, "learning_rate": 7.801762622587968e-07, "loss": 0.23713322, "memory(GiB)": 34.88, "step": 122455, "train_speed(iter/s)": 0.410803 }, { "acc": 0.94641724, "epoch": 3.3157338965153116, "grad_norm": 5.679935455322266, "learning_rate": 7.798763019260607e-07, "loss": 0.27955632, "memory(GiB)": 34.88, "step": 122460, "train_speed(iter/s)": 0.410803 }, { "acc": 0.9611742, "epoch": 3.315869276798527, "grad_norm": 5.049158573150635, "learning_rate": 7.795763944649318e-07, "loss": 0.18121905, "memory(GiB)": 34.88, "step": 122465, "train_speed(iter/s)": 0.410804 }, { "acc": 0.93520994, "epoch": 3.3160046570817427, "grad_norm": 66.71480560302734, "learning_rate": 7.792765398791628e-07, "loss": 0.45103393, "memory(GiB)": 34.88, "step": 122470, "train_speed(iter/s)": 0.410805 }, { "acc": 0.94697552, "epoch": 3.316140037364958, "grad_norm": 7.352604866027832, "learning_rate": 7.789767381725145e-07, "loss": 0.29617651, "memory(GiB)": 34.88, "step": 122475, "train_speed(iter/s)": 0.410806 }, { "acc": 0.94654102, "epoch": 3.316275417648174, "grad_norm": 3.236611843109131, "learning_rate": 7.786769893487396e-07, "loss": 0.28086052, "memory(GiB)": 34.88, "step": 122480, "train_speed(iter/s)": 0.410807 }, { "acc": 0.94228497, "epoch": 3.3164107979313893, "grad_norm": 5.342724800109863, "learning_rate": 7.783772934115929e-07, "loss": 0.41803055, "memory(GiB)": 34.88, "step": 122485, "train_speed(iter/s)": 0.410808 }, { "acc": 0.93974419, "epoch": 3.3165461782146046, "grad_norm": 7.2931318283081055, "learning_rate": 7.780776503648302e-07, "loss": 0.42502584, "memory(GiB)": 34.88, "step": 122490, "train_speed(iter/s)": 0.410809 }, { "acc": 0.9389184, "epoch": 3.3166815584978204, "grad_norm": 8.079293251037598, "learning_rate": 7.777780602122056e-07, "loss": 0.34911799, "memory(GiB)": 34.88, "step": 122495, "train_speed(iter/s)": 0.41081 }, { "acc": 0.93300743, "epoch": 3.316816938781036, "grad_norm": 6.7744364738464355, "learning_rate": 7.774785229574709e-07, "loss": 0.41406145, "memory(GiB)": 34.88, "step": 122500, "train_speed(iter/s)": 0.410811 }, { "acc": 0.93957539, "epoch": 3.3169523190642516, "grad_norm": 10.068798065185547, "learning_rate": 7.771790386043759e-07, "loss": 0.29952483, "memory(GiB)": 34.88, "step": 122505, "train_speed(iter/s)": 0.410812 }, { "acc": 0.95483522, "epoch": 3.317087699347467, "grad_norm": 5.133573532104492, "learning_rate": 7.76879607156677e-07, "loss": 0.31324949, "memory(GiB)": 34.88, "step": 122510, "train_speed(iter/s)": 0.410813 }, { "acc": 0.94807167, "epoch": 3.3172230796306827, "grad_norm": 10.712120056152344, "learning_rate": 7.765802286181222e-07, "loss": 0.31890512, "memory(GiB)": 34.88, "step": 122515, "train_speed(iter/s)": 0.410814 }, { "acc": 0.95501909, "epoch": 3.317358459913898, "grad_norm": 27.2357234954834, "learning_rate": 7.762809029924625e-07, "loss": 0.26054382, "memory(GiB)": 34.88, "step": 122520, "train_speed(iter/s)": 0.410814 }, { "acc": 0.93004274, "epoch": 3.3174938401971135, "grad_norm": 15.49213981628418, "learning_rate": 7.759816302834469e-07, "loss": 0.44051571, "memory(GiB)": 34.88, "step": 122525, "train_speed(iter/s)": 0.410815 }, { "acc": 0.9447607, "epoch": 3.3176292204803293, "grad_norm": 6.622549533843994, "learning_rate": 7.756824104948271e-07, "loss": 0.30913186, "memory(GiB)": 34.88, "step": 122530, "train_speed(iter/s)": 0.410816 }, { "acc": 0.95642681, "epoch": 3.3177646007635446, "grad_norm": 6.005172252655029, "learning_rate": 7.753832436303474e-07, "loss": 0.25883956, "memory(GiB)": 34.88, "step": 122535, "train_speed(iter/s)": 0.410817 }, { "acc": 0.94892731, "epoch": 3.3178999810467604, "grad_norm": 10.249146461486816, "learning_rate": 7.750841296937578e-07, "loss": 0.27605586, "memory(GiB)": 34.88, "step": 122540, "train_speed(iter/s)": 0.410818 }, { "acc": 0.93317451, "epoch": 3.318035361329976, "grad_norm": 11.051321029663086, "learning_rate": 7.74785068688806e-07, "loss": 0.40066414, "memory(GiB)": 34.88, "step": 122545, "train_speed(iter/s)": 0.410819 }, { "acc": 0.95692854, "epoch": 3.3181707416131916, "grad_norm": 8.323375701904297, "learning_rate": 7.744860606192362e-07, "loss": 0.2788121, "memory(GiB)": 34.88, "step": 122550, "train_speed(iter/s)": 0.41082 }, { "acc": 0.95337467, "epoch": 3.318306121896407, "grad_norm": 3.5274651050567627, "learning_rate": 7.74187105488795e-07, "loss": 0.269841, "memory(GiB)": 34.88, "step": 122555, "train_speed(iter/s)": 0.410821 }, { "acc": 0.93954268, "epoch": 3.3184415021796227, "grad_norm": 7.3879714012146, "learning_rate": 7.738882033012276e-07, "loss": 0.34989607, "memory(GiB)": 34.88, "step": 122560, "train_speed(iter/s)": 0.410822 }, { "acc": 0.94658203, "epoch": 3.318576882462838, "grad_norm": 13.245752334594727, "learning_rate": 7.735893540602793e-07, "loss": 0.37083364, "memory(GiB)": 34.88, "step": 122565, "train_speed(iter/s)": 0.410823 }, { "acc": 0.91247568, "epoch": 3.318712262746054, "grad_norm": 19.870885848999023, "learning_rate": 7.732905577696916e-07, "loss": 0.59705124, "memory(GiB)": 34.88, "step": 122570, "train_speed(iter/s)": 0.410824 }, { "acc": 0.9641942, "epoch": 3.3188476430292693, "grad_norm": 4.992355823516846, "learning_rate": 7.729918144332081e-07, "loss": 0.23279328, "memory(GiB)": 34.88, "step": 122575, "train_speed(iter/s)": 0.410825 }, { "acc": 0.93466949, "epoch": 3.3189830233124846, "grad_norm": 8.688287734985352, "learning_rate": 7.726931240545732e-07, "loss": 0.37254822, "memory(GiB)": 34.88, "step": 122580, "train_speed(iter/s)": 0.410826 }, { "acc": 0.95451031, "epoch": 3.3191184035957004, "grad_norm": 2.46541428565979, "learning_rate": 7.723944866375259e-07, "loss": 0.22910371, "memory(GiB)": 34.88, "step": 122585, "train_speed(iter/s)": 0.410827 }, { "acc": 0.95071507, "epoch": 3.319253783878916, "grad_norm": 6.090995788574219, "learning_rate": 7.720959021858089e-07, "loss": 0.26168628, "memory(GiB)": 34.88, "step": 122590, "train_speed(iter/s)": 0.410828 }, { "acc": 0.93961678, "epoch": 3.3193891641621316, "grad_norm": 13.805391311645508, "learning_rate": 7.717973707031615e-07, "loss": 0.37537081, "memory(GiB)": 34.88, "step": 122595, "train_speed(iter/s)": 0.410829 }, { "acc": 0.95810585, "epoch": 3.319524544445347, "grad_norm": 9.948843002319336, "learning_rate": 7.714988921933248e-07, "loss": 0.25805125, "memory(GiB)": 34.88, "step": 122600, "train_speed(iter/s)": 0.41083 }, { "acc": 0.94591217, "epoch": 3.3196599247285627, "grad_norm": 5.08396577835083, "learning_rate": 7.712004666600362e-07, "loss": 0.3064784, "memory(GiB)": 34.88, "step": 122605, "train_speed(iter/s)": 0.41083 }, { "acc": 0.95463676, "epoch": 3.319795305011778, "grad_norm": 7.263647556304932, "learning_rate": 7.709020941070349e-07, "loss": 0.32943933, "memory(GiB)": 34.88, "step": 122610, "train_speed(iter/s)": 0.410831 }, { "acc": 0.94782972, "epoch": 3.3199306852949935, "grad_norm": 7.259760856628418, "learning_rate": 7.70603774538058e-07, "loss": 0.28036387, "memory(GiB)": 34.88, "step": 122615, "train_speed(iter/s)": 0.410832 }, { "acc": 0.9516983, "epoch": 3.3200660655782093, "grad_norm": 6.095458507537842, "learning_rate": 7.703055079568448e-07, "loss": 0.30679951, "memory(GiB)": 34.88, "step": 122620, "train_speed(iter/s)": 0.410833 }, { "acc": 0.94510937, "epoch": 3.3202014458614246, "grad_norm": 10.433249473571777, "learning_rate": 7.700072943671293e-07, "loss": 0.3174123, "memory(GiB)": 34.88, "step": 122625, "train_speed(iter/s)": 0.410834 }, { "acc": 0.93542671, "epoch": 3.3203368261446404, "grad_norm": 9.438464164733887, "learning_rate": 7.697091337726459e-07, "loss": 0.36324203, "memory(GiB)": 34.88, "step": 122630, "train_speed(iter/s)": 0.410835 }, { "acc": 0.9525095, "epoch": 3.320472206427856, "grad_norm": 3.5659210681915283, "learning_rate": 7.694110261771343e-07, "loss": 0.27241018, "memory(GiB)": 34.88, "step": 122635, "train_speed(iter/s)": 0.410836 }, { "acc": 0.95233107, "epoch": 3.3206075867110716, "grad_norm": 6.59303617477417, "learning_rate": 7.691129715843243e-07, "loss": 0.31800983, "memory(GiB)": 34.88, "step": 122640, "train_speed(iter/s)": 0.410837 }, { "acc": 0.95860214, "epoch": 3.320742966994287, "grad_norm": 7.880124568939209, "learning_rate": 7.68814969997952e-07, "loss": 0.24494686, "memory(GiB)": 34.88, "step": 122645, "train_speed(iter/s)": 0.410838 }, { "acc": 0.93616104, "epoch": 3.3208783472775023, "grad_norm": 5.201085090637207, "learning_rate": 7.685170214217497e-07, "loss": 0.37791319, "memory(GiB)": 34.88, "step": 122650, "train_speed(iter/s)": 0.410839 }, { "acc": 0.95554132, "epoch": 3.321013727560718, "grad_norm": 5.795809745788574, "learning_rate": 7.682191258594511e-07, "loss": 0.23622582, "memory(GiB)": 34.88, "step": 122655, "train_speed(iter/s)": 0.41084 }, { "acc": 0.95468712, "epoch": 3.3211491078439335, "grad_norm": 7.696081161499023, "learning_rate": 7.679212833147856e-07, "loss": 0.25796962, "memory(GiB)": 34.88, "step": 122660, "train_speed(iter/s)": 0.410841 }, { "acc": 0.93773956, "epoch": 3.3212844881271493, "grad_norm": 5.027821063995361, "learning_rate": 7.676234937914862e-07, "loss": 0.36216042, "memory(GiB)": 34.88, "step": 122665, "train_speed(iter/s)": 0.410842 }, { "acc": 0.94725256, "epoch": 3.3214198684103646, "grad_norm": 6.5616302490234375, "learning_rate": 7.673257572932832e-07, "loss": 0.31964176, "memory(GiB)": 34.88, "step": 122670, "train_speed(iter/s)": 0.410842 }, { "acc": 0.95660038, "epoch": 3.3215552486935804, "grad_norm": 10.386774063110352, "learning_rate": 7.67028073823905e-07, "loss": 0.29625711, "memory(GiB)": 34.88, "step": 122675, "train_speed(iter/s)": 0.410843 }, { "acc": 0.93276558, "epoch": 3.321690628976796, "grad_norm": 25.068580627441406, "learning_rate": 7.66730443387081e-07, "loss": 0.45733128, "memory(GiB)": 34.88, "step": 122680, "train_speed(iter/s)": 0.410844 }, { "acc": 0.92874746, "epoch": 3.321826009260011, "grad_norm": 9.647214889526367, "learning_rate": 7.664328659865401e-07, "loss": 0.47816429, "memory(GiB)": 34.88, "step": 122685, "train_speed(iter/s)": 0.410845 }, { "acc": 0.94552059, "epoch": 3.321961389543227, "grad_norm": 3.449735164642334, "learning_rate": 7.661353416260112e-07, "loss": 0.32024598, "memory(GiB)": 34.88, "step": 122690, "train_speed(iter/s)": 0.410846 }, { "acc": 0.95486698, "epoch": 3.3220967698264423, "grad_norm": 5.810342311859131, "learning_rate": 7.658378703092191e-07, "loss": 0.29643583, "memory(GiB)": 34.88, "step": 122695, "train_speed(iter/s)": 0.410847 }, { "acc": 0.93877192, "epoch": 3.322232150109658, "grad_norm": 8.179126739501953, "learning_rate": 7.655404520398911e-07, "loss": 0.30546947, "memory(GiB)": 34.88, "step": 122700, "train_speed(iter/s)": 0.410848 }, { "acc": 0.9461113, "epoch": 3.3223675303928735, "grad_norm": 6.459776401519775, "learning_rate": 7.652430868217546e-07, "loss": 0.30656371, "memory(GiB)": 34.88, "step": 122705, "train_speed(iter/s)": 0.410849 }, { "acc": 0.93960695, "epoch": 3.3225029106760893, "grad_norm": 9.513303756713867, "learning_rate": 7.64945774658532e-07, "loss": 0.35291388, "memory(GiB)": 34.88, "step": 122710, "train_speed(iter/s)": 0.41085 }, { "acc": 0.91993103, "epoch": 3.3226382909593046, "grad_norm": 10.011058807373047, "learning_rate": 7.646485155539486e-07, "loss": 0.49588213, "memory(GiB)": 34.88, "step": 122715, "train_speed(iter/s)": 0.41085 }, { "acc": 0.95128546, "epoch": 3.3227736712425204, "grad_norm": 6.047263145446777, "learning_rate": 7.643513095117286e-07, "loss": 0.29733281, "memory(GiB)": 34.88, "step": 122720, "train_speed(iter/s)": 0.410851 }, { "acc": 0.9526844, "epoch": 3.322909051525736, "grad_norm": 7.619011878967285, "learning_rate": 7.640541565355966e-07, "loss": 0.292731, "memory(GiB)": 34.88, "step": 122725, "train_speed(iter/s)": 0.410852 }, { "acc": 0.9555584, "epoch": 3.3230444318089516, "grad_norm": 3.341350555419922, "learning_rate": 7.637570566292718e-07, "loss": 0.22996845, "memory(GiB)": 34.88, "step": 122730, "train_speed(iter/s)": 0.410853 }, { "acc": 0.95521708, "epoch": 3.323179812092167, "grad_norm": 6.105139255523682, "learning_rate": 7.634600097964773e-07, "loss": 0.25033433, "memory(GiB)": 34.88, "step": 122735, "train_speed(iter/s)": 0.410854 }, { "acc": 0.9464138, "epoch": 3.3233151923753823, "grad_norm": 7.718046188354492, "learning_rate": 7.631630160409349e-07, "loss": 0.31467459, "memory(GiB)": 34.88, "step": 122740, "train_speed(iter/s)": 0.410855 }, { "acc": 0.9493618, "epoch": 3.323450572658598, "grad_norm": 3.8962724208831787, "learning_rate": 7.628660753663656e-07, "loss": 0.32586575, "memory(GiB)": 34.88, "step": 122745, "train_speed(iter/s)": 0.410856 }, { "acc": 0.94308739, "epoch": 3.3235859529418135, "grad_norm": 3.826770782470703, "learning_rate": 7.625691877764868e-07, "loss": 0.29970536, "memory(GiB)": 34.88, "step": 122750, "train_speed(iter/s)": 0.410857 }, { "acc": 0.95595732, "epoch": 3.3237213332250293, "grad_norm": 16.40119743347168, "learning_rate": 7.622723532750191e-07, "loss": 0.28384159, "memory(GiB)": 34.88, "step": 122755, "train_speed(iter/s)": 0.410858 }, { "acc": 0.94098368, "epoch": 3.3238567135082446, "grad_norm": 6.581122875213623, "learning_rate": 7.619755718656821e-07, "loss": 0.37217255, "memory(GiB)": 34.88, "step": 122760, "train_speed(iter/s)": 0.410858 }, { "acc": 0.93858852, "epoch": 3.3239920937914604, "grad_norm": 11.996877670288086, "learning_rate": 7.616788435521906e-07, "loss": 0.32899346, "memory(GiB)": 34.88, "step": 122765, "train_speed(iter/s)": 0.410859 }, { "acc": 0.93853769, "epoch": 3.324127474074676, "grad_norm": 4.782962322235107, "learning_rate": 7.613821683382636e-07, "loss": 0.3328506, "memory(GiB)": 34.88, "step": 122770, "train_speed(iter/s)": 0.41086 }, { "acc": 0.94036465, "epoch": 3.324262854357891, "grad_norm": 4.915891647338867, "learning_rate": 7.610855462276176e-07, "loss": 0.34376726, "memory(GiB)": 34.88, "step": 122775, "train_speed(iter/s)": 0.410861 }, { "acc": 0.95210257, "epoch": 3.324398234641107, "grad_norm": 7.565480709075928, "learning_rate": 7.607889772239689e-07, "loss": 0.29610298, "memory(GiB)": 34.88, "step": 122780, "train_speed(iter/s)": 0.410862 }, { "acc": 0.94876795, "epoch": 3.3245336149243223, "grad_norm": 7.735111236572266, "learning_rate": 7.604924613310296e-07, "loss": 0.28074532, "memory(GiB)": 34.88, "step": 122785, "train_speed(iter/s)": 0.410863 }, { "acc": 0.95106716, "epoch": 3.324668995207538, "grad_norm": 6.066683769226074, "learning_rate": 7.601959985525181e-07, "loss": 0.32174008, "memory(GiB)": 34.88, "step": 122790, "train_speed(iter/s)": 0.410864 }, { "acc": 0.93939734, "epoch": 3.3248043754907535, "grad_norm": 10.260232925415039, "learning_rate": 7.598995888921471e-07, "loss": 0.37835016, "memory(GiB)": 34.88, "step": 122795, "train_speed(iter/s)": 0.410865 }, { "acc": 0.94498882, "epoch": 3.3249397557739693, "grad_norm": 3.684896945953369, "learning_rate": 7.596032323536275e-07, "loss": 0.32737598, "memory(GiB)": 34.88, "step": 122800, "train_speed(iter/s)": 0.410866 }, { "acc": 0.94265842, "epoch": 3.3250751360571846, "grad_norm": 6.2633748054504395, "learning_rate": 7.593069289406737e-07, "loss": 0.3690721, "memory(GiB)": 34.88, "step": 122805, "train_speed(iter/s)": 0.410867 }, { "acc": 0.9435997, "epoch": 3.3252105163404, "grad_norm": 14.88336181640625, "learning_rate": 7.590106786569965e-07, "loss": 0.37495055, "memory(GiB)": 34.88, "step": 122810, "train_speed(iter/s)": 0.410867 }, { "acc": 0.94249935, "epoch": 3.325345896623616, "grad_norm": 4.333620071411133, "learning_rate": 7.587144815063091e-07, "loss": 0.41428852, "memory(GiB)": 34.88, "step": 122815, "train_speed(iter/s)": 0.410868 }, { "acc": 0.94367828, "epoch": 3.325481276906831, "grad_norm": 2.638857126235962, "learning_rate": 7.584183374923189e-07, "loss": 0.32432866, "memory(GiB)": 34.88, "step": 122820, "train_speed(iter/s)": 0.410869 }, { "acc": 0.94173069, "epoch": 3.325616657190047, "grad_norm": 6.15000057220459, "learning_rate": 7.581222466187374e-07, "loss": 0.28884685, "memory(GiB)": 34.88, "step": 122825, "train_speed(iter/s)": 0.41087 }, { "acc": 0.94549189, "epoch": 3.3257520374732623, "grad_norm": 13.356935501098633, "learning_rate": 7.578262088892748e-07, "loss": 0.30642059, "memory(GiB)": 34.88, "step": 122830, "train_speed(iter/s)": 0.410871 }, { "acc": 0.94733105, "epoch": 3.325887417756478, "grad_norm": 5.233746528625488, "learning_rate": 7.575302243076371e-07, "loss": 0.30663438, "memory(GiB)": 34.88, "step": 122835, "train_speed(iter/s)": 0.410872 }, { "acc": 0.94485321, "epoch": 3.3260227980396935, "grad_norm": 4.599674701690674, "learning_rate": 7.57234292877533e-07, "loss": 0.30547099, "memory(GiB)": 34.88, "step": 122840, "train_speed(iter/s)": 0.410873 }, { "acc": 0.94301491, "epoch": 3.326158178322909, "grad_norm": 8.376001358032227, "learning_rate": 7.569384146026702e-07, "loss": 0.35383327, "memory(GiB)": 34.88, "step": 122845, "train_speed(iter/s)": 0.410874 }, { "acc": 0.94848509, "epoch": 3.3262935586061246, "grad_norm": 18.080177307128906, "learning_rate": 7.566425894867556e-07, "loss": 0.25355859, "memory(GiB)": 34.88, "step": 122850, "train_speed(iter/s)": 0.410874 }, { "acc": 0.95025196, "epoch": 3.32642893888934, "grad_norm": 7.570167064666748, "learning_rate": 7.563468175334936e-07, "loss": 0.29600313, "memory(GiB)": 34.88, "step": 122855, "train_speed(iter/s)": 0.410875 }, { "acc": 0.93787241, "epoch": 3.326564319172556, "grad_norm": 8.669357299804688, "learning_rate": 7.560510987465892e-07, "loss": 0.34266281, "memory(GiB)": 34.88, "step": 122860, "train_speed(iter/s)": 0.410876 }, { "acc": 0.94717922, "epoch": 3.326699699455771, "grad_norm": 5.246090888977051, "learning_rate": 7.55755433129748e-07, "loss": 0.24957972, "memory(GiB)": 34.88, "step": 122865, "train_speed(iter/s)": 0.410877 }, { "acc": 0.94863043, "epoch": 3.326835079738987, "grad_norm": 6.531306266784668, "learning_rate": 7.554598206866752e-07, "loss": 0.2649864, "memory(GiB)": 34.88, "step": 122870, "train_speed(iter/s)": 0.410878 }, { "acc": 0.93575754, "epoch": 3.3269704600222023, "grad_norm": 37.71951675415039, "learning_rate": 7.5516426142107e-07, "loss": 0.37473896, "memory(GiB)": 34.88, "step": 122875, "train_speed(iter/s)": 0.410879 }, { "acc": 0.94563131, "epoch": 3.3271058403054177, "grad_norm": 6.0183610916137695, "learning_rate": 7.548687553366374e-07, "loss": 0.28833857, "memory(GiB)": 34.88, "step": 122880, "train_speed(iter/s)": 0.410879 }, { "acc": 0.94500885, "epoch": 3.3272412205886335, "grad_norm": 4.630334377288818, "learning_rate": 7.545733024370803e-07, "loss": 0.38334215, "memory(GiB)": 34.88, "step": 122885, "train_speed(iter/s)": 0.41088 }, { "acc": 0.94602795, "epoch": 3.327376600871849, "grad_norm": 4.012860298156738, "learning_rate": 7.542779027260965e-07, "loss": 0.39582987, "memory(GiB)": 34.88, "step": 122890, "train_speed(iter/s)": 0.410881 }, { "acc": 0.93602667, "epoch": 3.3275119811550646, "grad_norm": 3.5799920558929443, "learning_rate": 7.539825562073886e-07, "loss": 0.38461061, "memory(GiB)": 34.88, "step": 122895, "train_speed(iter/s)": 0.410882 }, { "acc": 0.94740715, "epoch": 3.32764736143828, "grad_norm": 7.1290411949157715, "learning_rate": 7.536872628846563e-07, "loss": 0.30840151, "memory(GiB)": 34.88, "step": 122900, "train_speed(iter/s)": 0.410883 }, { "acc": 0.94298973, "epoch": 3.327782741721496, "grad_norm": 5.9800519943237305, "learning_rate": 7.533920227615994e-07, "loss": 0.32071342, "memory(GiB)": 34.88, "step": 122905, "train_speed(iter/s)": 0.410884 }, { "acc": 0.94129028, "epoch": 3.327918122004711, "grad_norm": 5.6202545166015625, "learning_rate": 7.53096835841913e-07, "loss": 0.32884221, "memory(GiB)": 34.88, "step": 122910, "train_speed(iter/s)": 0.410885 }, { "acc": 0.95146227, "epoch": 3.328053502287927, "grad_norm": 3.634373903274536, "learning_rate": 7.528017021292997e-07, "loss": 0.2879364, "memory(GiB)": 34.88, "step": 122915, "train_speed(iter/s)": 0.410886 }, { "acc": 0.94811764, "epoch": 3.3281888825711423, "grad_norm": 9.554296493530273, "learning_rate": 7.525066216274549e-07, "loss": 0.28724167, "memory(GiB)": 34.88, "step": 122920, "train_speed(iter/s)": 0.410886 }, { "acc": 0.94284687, "epoch": 3.328324262854358, "grad_norm": 7.549741744995117, "learning_rate": 7.522115943400726e-07, "loss": 0.31226053, "memory(GiB)": 34.88, "step": 122925, "train_speed(iter/s)": 0.410887 }, { "acc": 0.9442544, "epoch": 3.3284596431375735, "grad_norm": 7.038583278656006, "learning_rate": 7.51916620270851e-07, "loss": 0.4101006, "memory(GiB)": 34.88, "step": 122930, "train_speed(iter/s)": 0.410888 }, { "acc": 0.94553776, "epoch": 3.328595023420789, "grad_norm": 3.350264072418213, "learning_rate": 7.516216994234842e-07, "loss": 0.28445992, "memory(GiB)": 34.88, "step": 122935, "train_speed(iter/s)": 0.410889 }, { "acc": 0.94542217, "epoch": 3.3287304037040046, "grad_norm": 7.957009792327881, "learning_rate": 7.513268318016683e-07, "loss": 0.34221263, "memory(GiB)": 34.88, "step": 122940, "train_speed(iter/s)": 0.41089 }, { "acc": 0.9301013, "epoch": 3.32886578398722, "grad_norm": 5.2331438064575195, "learning_rate": 7.510320174090938e-07, "loss": 0.43503876, "memory(GiB)": 34.88, "step": 122945, "train_speed(iter/s)": 0.410891 }, { "acc": 0.9354496, "epoch": 3.329001164270436, "grad_norm": 5.468832969665527, "learning_rate": 7.507372562494586e-07, "loss": 0.41574979, "memory(GiB)": 34.88, "step": 122950, "train_speed(iter/s)": 0.410892 }, { "acc": 0.94416943, "epoch": 3.329136544553651, "grad_norm": 7.251224994659424, "learning_rate": 7.504425483264512e-07, "loss": 0.36676581, "memory(GiB)": 34.88, "step": 122955, "train_speed(iter/s)": 0.410893 }, { "acc": 0.95167036, "epoch": 3.329271924836867, "grad_norm": 9.9375638961792, "learning_rate": 7.501478936437654e-07, "loss": 0.24334712, "memory(GiB)": 34.88, "step": 122960, "train_speed(iter/s)": 0.410894 }, { "acc": 0.94949446, "epoch": 3.3294073051200823, "grad_norm": 9.494504928588867, "learning_rate": 7.498532922050912e-07, "loss": 0.24872694, "memory(GiB)": 34.88, "step": 122965, "train_speed(iter/s)": 0.410895 }, { "acc": 0.93591843, "epoch": 3.3295426854032977, "grad_norm": 6.584998607635498, "learning_rate": 7.495587440141187e-07, "loss": 0.37526226, "memory(GiB)": 34.88, "step": 122970, "train_speed(iter/s)": 0.410896 }, { "acc": 0.92904797, "epoch": 3.3296780656865135, "grad_norm": 15.352131843566895, "learning_rate": 7.492642490745403e-07, "loss": 0.44077692, "memory(GiB)": 34.88, "step": 122975, "train_speed(iter/s)": 0.410897 }, { "acc": 0.94770184, "epoch": 3.329813445969729, "grad_norm": 6.934630870819092, "learning_rate": 7.489698073900413e-07, "loss": 0.29456677, "memory(GiB)": 34.88, "step": 122980, "train_speed(iter/s)": 0.410897 }, { "acc": 0.94864902, "epoch": 3.3299488262529446, "grad_norm": 4.229971885681152, "learning_rate": 7.486754189643139e-07, "loss": 0.33473923, "memory(GiB)": 34.88, "step": 122985, "train_speed(iter/s)": 0.410898 }, { "acc": 0.93958015, "epoch": 3.33008420653616, "grad_norm": 8.113885879516602, "learning_rate": 7.483810838010434e-07, "loss": 0.38674042, "memory(GiB)": 34.88, "step": 122990, "train_speed(iter/s)": 0.410899 }, { "acc": 0.95023127, "epoch": 3.330219586819376, "grad_norm": 6.512790203094482, "learning_rate": 7.480868019039185e-07, "loss": 0.243208, "memory(GiB)": 34.88, "step": 122995, "train_speed(iter/s)": 0.4109 }, { "acc": 0.943925, "epoch": 3.330354967102591, "grad_norm": 12.274359703063965, "learning_rate": 7.477925732766241e-07, "loss": 0.30507271, "memory(GiB)": 34.88, "step": 123000, "train_speed(iter/s)": 0.410901 }, { "acc": 0.93922234, "epoch": 3.3304903473858065, "grad_norm": 8.517484664916992, "learning_rate": 7.474983979228467e-07, "loss": 0.32499099, "memory(GiB)": 34.88, "step": 123005, "train_speed(iter/s)": 0.410902 }, { "acc": 0.95268555, "epoch": 3.3306257276690223, "grad_norm": 5.998104572296143, "learning_rate": 7.472042758462722e-07, "loss": 0.24718926, "memory(GiB)": 34.88, "step": 123010, "train_speed(iter/s)": 0.410903 }, { "acc": 0.95099716, "epoch": 3.3307611079522377, "grad_norm": 11.645614624023438, "learning_rate": 7.469102070505828e-07, "loss": 0.29055068, "memory(GiB)": 34.88, "step": 123015, "train_speed(iter/s)": 0.410903 }, { "acc": 0.937815, "epoch": 3.3308964882354535, "grad_norm": 4.524351596832275, "learning_rate": 7.466161915394657e-07, "loss": 0.32978702, "memory(GiB)": 34.88, "step": 123020, "train_speed(iter/s)": 0.410904 }, { "acc": 0.94615192, "epoch": 3.331031868518669, "grad_norm": 17.97565460205078, "learning_rate": 7.463222293166011e-07, "loss": 0.35650122, "memory(GiB)": 34.88, "step": 123025, "train_speed(iter/s)": 0.410905 }, { "acc": 0.94332132, "epoch": 3.3311672488018846, "grad_norm": 10.14304256439209, "learning_rate": 7.460283203856734e-07, "loss": 0.36191485, "memory(GiB)": 34.88, "step": 123030, "train_speed(iter/s)": 0.410906 }, { "acc": 0.93762684, "epoch": 3.3313026290851, "grad_norm": 12.679966926574707, "learning_rate": 7.457344647503614e-07, "loss": 0.36460781, "memory(GiB)": 34.88, "step": 123035, "train_speed(iter/s)": 0.410907 }, { "acc": 0.94450312, "epoch": 3.3314380093683154, "grad_norm": 10.006965637207031, "learning_rate": 7.454406624143507e-07, "loss": 0.32588706, "memory(GiB)": 34.88, "step": 123040, "train_speed(iter/s)": 0.410908 }, { "acc": 0.94920197, "epoch": 3.331573389651531, "grad_norm": 5.646575450897217, "learning_rate": 7.451469133813194e-07, "loss": 0.32029405, "memory(GiB)": 34.88, "step": 123045, "train_speed(iter/s)": 0.410908 }, { "acc": 0.95003357, "epoch": 3.3317087699347465, "grad_norm": 4.660670757293701, "learning_rate": 7.448532176549449e-07, "loss": 0.28727221, "memory(GiB)": 34.88, "step": 123050, "train_speed(iter/s)": 0.410909 }, { "acc": 0.93942852, "epoch": 3.3318441502179623, "grad_norm": 7.7160115242004395, "learning_rate": 7.445595752389113e-07, "loss": 0.34518723, "memory(GiB)": 34.88, "step": 123055, "train_speed(iter/s)": 0.41091 }, { "acc": 0.94947634, "epoch": 3.3319795305011777, "grad_norm": 5.146138668060303, "learning_rate": 7.442659861368931e-07, "loss": 0.29758265, "memory(GiB)": 34.88, "step": 123060, "train_speed(iter/s)": 0.410911 }, { "acc": 0.94139662, "epoch": 3.3321149107843935, "grad_norm": 7.8250041007995605, "learning_rate": 7.439724503525702e-07, "loss": 0.30971906, "memory(GiB)": 34.88, "step": 123065, "train_speed(iter/s)": 0.410912 }, { "acc": 0.96474304, "epoch": 3.332250291067609, "grad_norm": 2.4334876537323, "learning_rate": 7.43678967889617e-07, "loss": 0.22019305, "memory(GiB)": 34.88, "step": 123070, "train_speed(iter/s)": 0.410913 }, { "acc": 0.95277615, "epoch": 3.3323856713508246, "grad_norm": 4.136410236358643, "learning_rate": 7.433855387517143e-07, "loss": 0.26640823, "memory(GiB)": 34.88, "step": 123075, "train_speed(iter/s)": 0.410914 }, { "acc": 0.95229082, "epoch": 3.33252105163404, "grad_norm": 10.331879615783691, "learning_rate": 7.430921629425338e-07, "loss": 0.34195409, "memory(GiB)": 34.88, "step": 123080, "train_speed(iter/s)": 0.410914 }, { "acc": 0.93745918, "epoch": 3.332656431917256, "grad_norm": 6.370981693267822, "learning_rate": 7.427988404657525e-07, "loss": 0.41015501, "memory(GiB)": 34.88, "step": 123085, "train_speed(iter/s)": 0.410915 }, { "acc": 0.94496002, "epoch": 3.332791812200471, "grad_norm": 8.964166641235352, "learning_rate": 7.425055713250457e-07, "loss": 0.35055225, "memory(GiB)": 34.88, "step": 123090, "train_speed(iter/s)": 0.410916 }, { "acc": 0.95011673, "epoch": 3.3329271924836865, "grad_norm": 5.088848114013672, "learning_rate": 7.422123555240848e-07, "loss": 0.37462487, "memory(GiB)": 34.88, "step": 123095, "train_speed(iter/s)": 0.410917 }, { "acc": 0.93705349, "epoch": 3.3330625727669023, "grad_norm": 4.943258285522461, "learning_rate": 7.419191930665457e-07, "loss": 0.336341, "memory(GiB)": 34.88, "step": 123100, "train_speed(iter/s)": 0.410918 }, { "acc": 0.95970364, "epoch": 3.3331979530501177, "grad_norm": 7.658000946044922, "learning_rate": 7.416260839560966e-07, "loss": 0.24426467, "memory(GiB)": 34.88, "step": 123105, "train_speed(iter/s)": 0.410919 }, { "acc": 0.93665295, "epoch": 3.3333333333333335, "grad_norm": 12.28552532196045, "learning_rate": 7.413330281964147e-07, "loss": 0.35079226, "memory(GiB)": 34.88, "step": 123110, "train_speed(iter/s)": 0.41092 }, { "acc": 0.94652596, "epoch": 3.333468713616549, "grad_norm": 6.394436836242676, "learning_rate": 7.410400257911667e-07, "loss": 0.33825796, "memory(GiB)": 34.88, "step": 123115, "train_speed(iter/s)": 0.41092 }, { "acc": 0.95205832, "epoch": 3.3336040938997646, "grad_norm": 7.426982879638672, "learning_rate": 7.40747076744025e-07, "loss": 0.19393233, "memory(GiB)": 34.88, "step": 123120, "train_speed(iter/s)": 0.410921 }, { "acc": 0.95197487, "epoch": 3.33373947418298, "grad_norm": 9.780657768249512, "learning_rate": 7.404541810586601e-07, "loss": 0.28267579, "memory(GiB)": 34.88, "step": 123125, "train_speed(iter/s)": 0.410922 }, { "acc": 0.93421783, "epoch": 3.3338748544661954, "grad_norm": 14.104402542114258, "learning_rate": 7.401613387387392e-07, "loss": 0.35264137, "memory(GiB)": 34.88, "step": 123130, "train_speed(iter/s)": 0.410923 }, { "acc": 0.93890438, "epoch": 3.334010234749411, "grad_norm": 5.535778522491455, "learning_rate": 7.398685497879323e-07, "loss": 0.36407056, "memory(GiB)": 34.88, "step": 123135, "train_speed(iter/s)": 0.410924 }, { "acc": 0.94431572, "epoch": 3.3341456150326265, "grad_norm": 3.8852427005767822, "learning_rate": 7.395758142099048e-07, "loss": 0.32026339, "memory(GiB)": 34.88, "step": 123140, "train_speed(iter/s)": 0.410925 }, { "acc": 0.93827801, "epoch": 3.3342809953158423, "grad_norm": 11.478839874267578, "learning_rate": 7.392831320083267e-07, "loss": 0.37997522, "memory(GiB)": 34.88, "step": 123145, "train_speed(iter/s)": 0.410926 }, { "acc": 0.93015366, "epoch": 3.3344163755990577, "grad_norm": 5.378833293914795, "learning_rate": 7.38990503186863e-07, "loss": 0.40265064, "memory(GiB)": 34.88, "step": 123150, "train_speed(iter/s)": 0.410927 }, { "acc": 0.94832611, "epoch": 3.3345517558822735, "grad_norm": 7.193368434906006, "learning_rate": 7.386979277491788e-07, "loss": 0.33613565, "memory(GiB)": 34.88, "step": 123155, "train_speed(iter/s)": 0.410928 }, { "acc": 0.92966862, "epoch": 3.334687136165489, "grad_norm": 11.158145904541016, "learning_rate": 7.384054056989403e-07, "loss": 0.48215294, "memory(GiB)": 34.88, "step": 123160, "train_speed(iter/s)": 0.410929 }, { "acc": 0.95229378, "epoch": 3.334822516448704, "grad_norm": 5.16804313659668, "learning_rate": 7.381129370398131e-07, "loss": 0.35206409, "memory(GiB)": 34.88, "step": 123165, "train_speed(iter/s)": 0.41093 }, { "acc": 0.94424229, "epoch": 3.33495789673192, "grad_norm": 8.402815818786621, "learning_rate": 7.378205217754591e-07, "loss": 0.30969486, "memory(GiB)": 34.88, "step": 123170, "train_speed(iter/s)": 0.410931 }, { "acc": 0.9516346, "epoch": 3.3350932770151354, "grad_norm": 5.3404459953308105, "learning_rate": 7.375281599095396e-07, "loss": 0.29807034, "memory(GiB)": 34.88, "step": 123175, "train_speed(iter/s)": 0.410931 }, { "acc": 0.93469067, "epoch": 3.335228657298351, "grad_norm": 7.231412410736084, "learning_rate": 7.372358514457211e-07, "loss": 0.42518845, "memory(GiB)": 34.88, "step": 123180, "train_speed(iter/s)": 0.410932 }, { "acc": 0.93655796, "epoch": 3.3353640375815665, "grad_norm": 5.385775566101074, "learning_rate": 7.369435963876624e-07, "loss": 0.34773772, "memory(GiB)": 34.88, "step": 123185, "train_speed(iter/s)": 0.410933 }, { "acc": 0.95545177, "epoch": 3.3354994178647823, "grad_norm": 7.378081321716309, "learning_rate": 7.366513947390268e-07, "loss": 0.25312262, "memory(GiB)": 34.88, "step": 123190, "train_speed(iter/s)": 0.410934 }, { "acc": 0.94603653, "epoch": 3.3356347981479977, "grad_norm": 3.8728675842285156, "learning_rate": 7.363592465034711e-07, "loss": 0.35796802, "memory(GiB)": 34.88, "step": 123195, "train_speed(iter/s)": 0.410935 }, { "acc": 0.93895473, "epoch": 3.335770178431213, "grad_norm": 12.20259952545166, "learning_rate": 7.360671516846593e-07, "loss": 0.36771777, "memory(GiB)": 34.88, "step": 123200, "train_speed(iter/s)": 0.410936 }, { "acc": 0.9410347, "epoch": 3.335905558714429, "grad_norm": 4.159080982208252, "learning_rate": 7.357751102862478e-07, "loss": 0.31572835, "memory(GiB)": 34.88, "step": 123205, "train_speed(iter/s)": 0.410937 }, { "acc": 0.93819141, "epoch": 3.336040938997644, "grad_norm": 8.604938507080078, "learning_rate": 7.354831223118954e-07, "loss": 0.37530255, "memory(GiB)": 34.88, "step": 123210, "train_speed(iter/s)": 0.410938 }, { "acc": 0.95391083, "epoch": 3.33617631928086, "grad_norm": 3.0676748752593994, "learning_rate": 7.351911877652613e-07, "loss": 0.22917073, "memory(GiB)": 34.88, "step": 123215, "train_speed(iter/s)": 0.410938 }, { "acc": 0.93209, "epoch": 3.3363116995640754, "grad_norm": 7.989710330963135, "learning_rate": 7.348993066500006e-07, "loss": 0.40886497, "memory(GiB)": 34.88, "step": 123220, "train_speed(iter/s)": 0.410939 }, { "acc": 0.94395485, "epoch": 3.336447079847291, "grad_norm": 5.564438819885254, "learning_rate": 7.346074789697711e-07, "loss": 0.34792519, "memory(GiB)": 34.88, "step": 123225, "train_speed(iter/s)": 0.41094 }, { "acc": 0.95516891, "epoch": 3.3365824601305065, "grad_norm": 4.105055332183838, "learning_rate": 7.343157047282259e-07, "loss": 0.24036789, "memory(GiB)": 34.88, "step": 123230, "train_speed(iter/s)": 0.410941 }, { "acc": 0.9543169, "epoch": 3.3367178404137223, "grad_norm": 7.440495491027832, "learning_rate": 7.340239839290244e-07, "loss": 0.31570611, "memory(GiB)": 34.88, "step": 123235, "train_speed(iter/s)": 0.410942 }, { "acc": 0.95294838, "epoch": 3.3368532206969377, "grad_norm": 5.678990364074707, "learning_rate": 7.337323165758173e-07, "loss": 0.29293716, "memory(GiB)": 34.88, "step": 123240, "train_speed(iter/s)": 0.410943 }, { "acc": 0.94413242, "epoch": 3.3369886009801535, "grad_norm": 4.872976303100586, "learning_rate": 7.334407026722595e-07, "loss": 0.3392426, "memory(GiB)": 34.88, "step": 123245, "train_speed(iter/s)": 0.410943 }, { "acc": 0.950173, "epoch": 3.337123981263369, "grad_norm": 3.4873218536376953, "learning_rate": 7.331491422220056e-07, "loss": 0.23339403, "memory(GiB)": 34.88, "step": 123250, "train_speed(iter/s)": 0.410944 }, { "acc": 0.92146139, "epoch": 3.337259361546584, "grad_norm": 8.145902633666992, "learning_rate": 7.328576352287051e-07, "loss": 0.54037461, "memory(GiB)": 34.88, "step": 123255, "train_speed(iter/s)": 0.410945 }, { "acc": 0.9468689, "epoch": 3.3373947418298, "grad_norm": 10.978581428527832, "learning_rate": 7.325661816960124e-07, "loss": 0.30276947, "memory(GiB)": 34.88, "step": 123260, "train_speed(iter/s)": 0.410946 }, { "acc": 0.95047855, "epoch": 3.3375301221130154, "grad_norm": 3.610422134399414, "learning_rate": 7.322747816275754e-07, "loss": 0.29364233, "memory(GiB)": 34.88, "step": 123265, "train_speed(iter/s)": 0.410947 }, { "acc": 0.94531727, "epoch": 3.337665502396231, "grad_norm": 15.24022102355957, "learning_rate": 7.319834350270479e-07, "loss": 0.36514783, "memory(GiB)": 34.88, "step": 123270, "train_speed(iter/s)": 0.410948 }, { "acc": 0.93802252, "epoch": 3.3378008826794465, "grad_norm": 11.108222961425781, "learning_rate": 7.316921418980774e-07, "loss": 0.36991231, "memory(GiB)": 34.88, "step": 123275, "train_speed(iter/s)": 0.410949 }, { "acc": 0.94191952, "epoch": 3.3379362629626623, "grad_norm": 11.727910995483398, "learning_rate": 7.314009022443132e-07, "loss": 0.29686761, "memory(GiB)": 34.88, "step": 123280, "train_speed(iter/s)": 0.41095 }, { "acc": 0.94472904, "epoch": 3.3380716432458777, "grad_norm": 7.97298002243042, "learning_rate": 7.311097160694037e-07, "loss": 0.31032579, "memory(GiB)": 34.88, "step": 123285, "train_speed(iter/s)": 0.410951 }, { "acc": 0.95225735, "epoch": 3.338207023529093, "grad_norm": 32.25009536743164, "learning_rate": 7.308185833769986e-07, "loss": 0.31464646, "memory(GiB)": 34.88, "step": 123290, "train_speed(iter/s)": 0.410952 }, { "acc": 0.9515502, "epoch": 3.338342403812309, "grad_norm": 11.499831199645996, "learning_rate": 7.305275041707429e-07, "loss": 0.25059748, "memory(GiB)": 34.88, "step": 123295, "train_speed(iter/s)": 0.410952 }, { "acc": 0.94034157, "epoch": 3.338477784095524, "grad_norm": 6.318661212921143, "learning_rate": 7.302364784542808e-07, "loss": 0.36301179, "memory(GiB)": 34.88, "step": 123300, "train_speed(iter/s)": 0.410953 }, { "acc": 0.94934263, "epoch": 3.33861316437874, "grad_norm": 7.399337291717529, "learning_rate": 7.299455062312625e-07, "loss": 0.33828738, "memory(GiB)": 34.88, "step": 123305, "train_speed(iter/s)": 0.410954 }, { "acc": 0.9400013, "epoch": 3.3387485446619554, "grad_norm": 11.675360679626465, "learning_rate": 7.296545875053299e-07, "loss": 0.34997301, "memory(GiB)": 34.88, "step": 123310, "train_speed(iter/s)": 0.410955 }, { "acc": 0.95070791, "epoch": 3.338883924945171, "grad_norm": 9.337949752807617, "learning_rate": 7.293637222801285e-07, "loss": 0.33469465, "memory(GiB)": 34.88, "step": 123315, "train_speed(iter/s)": 0.410956 }, { "acc": 0.93689957, "epoch": 3.3390193052283865, "grad_norm": 21.001693725585938, "learning_rate": 7.290729105593016e-07, "loss": 0.35456452, "memory(GiB)": 34.88, "step": 123320, "train_speed(iter/s)": 0.410957 }, { "acc": 0.94605808, "epoch": 3.339154685511602, "grad_norm": 5.799840927124023, "learning_rate": 7.287821523464932e-07, "loss": 0.34875226, "memory(GiB)": 34.88, "step": 123325, "train_speed(iter/s)": 0.410958 }, { "acc": 0.94973564, "epoch": 3.3392900657948177, "grad_norm": 6.037114143371582, "learning_rate": 7.284914476453439e-07, "loss": 0.30169802, "memory(GiB)": 34.88, "step": 123330, "train_speed(iter/s)": 0.410959 }, { "acc": 0.94060202, "epoch": 3.339425446078033, "grad_norm": 5.173839569091797, "learning_rate": 7.28200796459496e-07, "loss": 0.3905653, "memory(GiB)": 34.88, "step": 123335, "train_speed(iter/s)": 0.410959 }, { "acc": 0.94974747, "epoch": 3.339560826361249, "grad_norm": 5.414687156677246, "learning_rate": 7.279101987925918e-07, "loss": 0.23119016, "memory(GiB)": 34.88, "step": 123340, "train_speed(iter/s)": 0.41096 }, { "acc": 0.94571991, "epoch": 3.339696206644464, "grad_norm": 9.046660423278809, "learning_rate": 7.276196546482697e-07, "loss": 0.35212731, "memory(GiB)": 34.88, "step": 123345, "train_speed(iter/s)": 0.410961 }, { "acc": 0.94153385, "epoch": 3.33983158692768, "grad_norm": 5.556846618652344, "learning_rate": 7.273291640301699e-07, "loss": 0.34724185, "memory(GiB)": 34.88, "step": 123350, "train_speed(iter/s)": 0.410962 }, { "acc": 0.96604242, "epoch": 3.3399669672108954, "grad_norm": 2.7233006954193115, "learning_rate": 7.270387269419314e-07, "loss": 0.21958776, "memory(GiB)": 34.88, "step": 123355, "train_speed(iter/s)": 0.410963 }, { "acc": 0.95721016, "epoch": 3.3401023474941107, "grad_norm": 1.993444800376892, "learning_rate": 7.267483433871943e-07, "loss": 0.23699579, "memory(GiB)": 34.88, "step": 123360, "train_speed(iter/s)": 0.410964 }, { "acc": 0.94418449, "epoch": 3.3402377277773265, "grad_norm": 4.397522449493408, "learning_rate": 7.264580133695932e-07, "loss": 0.34242363, "memory(GiB)": 34.88, "step": 123365, "train_speed(iter/s)": 0.410964 }, { "acc": 0.94084749, "epoch": 3.340373108060542, "grad_norm": 4.084299087524414, "learning_rate": 7.261677368927663e-07, "loss": 0.2915987, "memory(GiB)": 34.88, "step": 123370, "train_speed(iter/s)": 0.410965 }, { "acc": 0.94455433, "epoch": 3.3405084883437577, "grad_norm": 8.370366096496582, "learning_rate": 7.258775139603516e-07, "loss": 0.32895534, "memory(GiB)": 34.88, "step": 123375, "train_speed(iter/s)": 0.410966 }, { "acc": 0.94706726, "epoch": 3.340643868626973, "grad_norm": 10.297327995300293, "learning_rate": 7.255873445759817e-07, "loss": 0.37638922, "memory(GiB)": 34.88, "step": 123380, "train_speed(iter/s)": 0.410967 }, { "acc": 0.93859568, "epoch": 3.340779248910189, "grad_norm": 7.947301864624023, "learning_rate": 7.252972287432928e-07, "loss": 0.37366905, "memory(GiB)": 34.88, "step": 123385, "train_speed(iter/s)": 0.410968 }, { "acc": 0.93822193, "epoch": 3.340914629193404, "grad_norm": 5.251718521118164, "learning_rate": 7.250071664659197e-07, "loss": 0.38559866, "memory(GiB)": 34.88, "step": 123390, "train_speed(iter/s)": 0.410969 }, { "acc": 0.94885387, "epoch": 3.34105000947662, "grad_norm": 7.816747188568115, "learning_rate": 7.247171577474965e-07, "loss": 0.32398691, "memory(GiB)": 34.88, "step": 123395, "train_speed(iter/s)": 0.41097 }, { "acc": 0.92300072, "epoch": 3.3411853897598354, "grad_norm": 8.224486351013184, "learning_rate": 7.244272025916545e-07, "loss": 0.50464478, "memory(GiB)": 34.88, "step": 123400, "train_speed(iter/s)": 0.410971 }, { "acc": 0.95199146, "epoch": 3.341320770043051, "grad_norm": 4.212733268737793, "learning_rate": 7.241373010020268e-07, "loss": 0.28948054, "memory(GiB)": 34.88, "step": 123405, "train_speed(iter/s)": 0.410972 }, { "acc": 0.93962555, "epoch": 3.3414561503262665, "grad_norm": 7.489274024963379, "learning_rate": 7.238474529822447e-07, "loss": 0.28477435, "memory(GiB)": 34.88, "step": 123410, "train_speed(iter/s)": 0.410973 }, { "acc": 0.96292973, "epoch": 3.341591530609482, "grad_norm": 5.367887020111084, "learning_rate": 7.235576585359408e-07, "loss": 0.23285527, "memory(GiB)": 34.88, "step": 123415, "train_speed(iter/s)": 0.410974 }, { "acc": 0.95917425, "epoch": 3.3417269108926977, "grad_norm": 4.607844829559326, "learning_rate": 7.232679176667424e-07, "loss": 0.23947814, "memory(GiB)": 34.88, "step": 123420, "train_speed(iter/s)": 0.410975 }, { "acc": 0.94109764, "epoch": 3.341862291175913, "grad_norm": 2.9453628063201904, "learning_rate": 7.229782303782806e-07, "loss": 0.3270633, "memory(GiB)": 34.88, "step": 123425, "train_speed(iter/s)": 0.410975 }, { "acc": 0.94889584, "epoch": 3.341997671459129, "grad_norm": 11.590843200683594, "learning_rate": 7.226885966741857e-07, "loss": 0.25584865, "memory(GiB)": 34.88, "step": 123430, "train_speed(iter/s)": 0.410976 }, { "acc": 0.94100409, "epoch": 3.342133051742344, "grad_norm": 5.173356056213379, "learning_rate": 7.22399016558083e-07, "loss": 0.35426359, "memory(GiB)": 34.88, "step": 123435, "train_speed(iter/s)": 0.410977 }, { "acc": 0.95318623, "epoch": 3.34226843202556, "grad_norm": 12.522238731384277, "learning_rate": 7.221094900336017e-07, "loss": 0.27440133, "memory(GiB)": 34.88, "step": 123440, "train_speed(iter/s)": 0.410978 }, { "acc": 0.9411293, "epoch": 3.3424038123087754, "grad_norm": 10.815176963806152, "learning_rate": 7.218200171043683e-07, "loss": 0.36749425, "memory(GiB)": 34.88, "step": 123445, "train_speed(iter/s)": 0.410979 }, { "acc": 0.94400997, "epoch": 3.3425391925919907, "grad_norm": 7.279734134674072, "learning_rate": 7.215305977740111e-07, "loss": 0.3295809, "memory(GiB)": 34.88, "step": 123450, "train_speed(iter/s)": 0.41098 }, { "acc": 0.95195627, "epoch": 3.3426745728752065, "grad_norm": 6.128235816955566, "learning_rate": 7.212412320461524e-07, "loss": 0.28714745, "memory(GiB)": 34.88, "step": 123455, "train_speed(iter/s)": 0.410981 }, { "acc": 0.95348253, "epoch": 3.342809953158422, "grad_norm": 7.76484489440918, "learning_rate": 7.209519199244183e-07, "loss": 0.31494484, "memory(GiB)": 34.88, "step": 123460, "train_speed(iter/s)": 0.410982 }, { "acc": 0.95433483, "epoch": 3.3429453334416377, "grad_norm": 4.4335036277771, "learning_rate": 7.206626614124345e-07, "loss": 0.23439677, "memory(GiB)": 34.88, "step": 123465, "train_speed(iter/s)": 0.410983 }, { "acc": 0.94011402, "epoch": 3.343080713724853, "grad_norm": 5.805050373077393, "learning_rate": 7.203734565138222e-07, "loss": 0.3746238, "memory(GiB)": 34.88, "step": 123470, "train_speed(iter/s)": 0.410983 }, { "acc": 0.92637959, "epoch": 3.343216094008069, "grad_norm": 8.844854354858398, "learning_rate": 7.200843052322047e-07, "loss": 0.44118032, "memory(GiB)": 34.88, "step": 123475, "train_speed(iter/s)": 0.410984 }, { "acc": 0.94195576, "epoch": 3.343351474291284, "grad_norm": 6.6848883628845215, "learning_rate": 7.197952075712056e-07, "loss": 0.36804328, "memory(GiB)": 34.88, "step": 123480, "train_speed(iter/s)": 0.410985 }, { "acc": 0.94723129, "epoch": 3.3434868545744996, "grad_norm": 2.878862142562866, "learning_rate": 7.195061635344465e-07, "loss": 0.29947534, "memory(GiB)": 34.88, "step": 123485, "train_speed(iter/s)": 0.410986 }, { "acc": 0.94860439, "epoch": 3.3436222348577154, "grad_norm": 11.278316497802734, "learning_rate": 7.192171731255457e-07, "loss": 0.24847088, "memory(GiB)": 34.88, "step": 123490, "train_speed(iter/s)": 0.410987 }, { "acc": 0.94508352, "epoch": 3.3437576151409307, "grad_norm": 4.611189365386963, "learning_rate": 7.189282363481258e-07, "loss": 0.28349891, "memory(GiB)": 34.88, "step": 123495, "train_speed(iter/s)": 0.410988 }, { "acc": 0.94438934, "epoch": 3.3438929954241465, "grad_norm": 7.037990093231201, "learning_rate": 7.186393532058049e-07, "loss": 0.32311053, "memory(GiB)": 34.88, "step": 123500, "train_speed(iter/s)": 0.410989 }, { "acc": 0.95214367, "epoch": 3.344028375707362, "grad_norm": 7.958112716674805, "learning_rate": 7.183505237022037e-07, "loss": 0.28604279, "memory(GiB)": 34.88, "step": 123505, "train_speed(iter/s)": 0.41099 }, { "acc": 0.94050531, "epoch": 3.3441637559905777, "grad_norm": 6.087304592132568, "learning_rate": 7.180617478409381e-07, "loss": 0.38452282, "memory(GiB)": 34.88, "step": 123510, "train_speed(iter/s)": 0.410991 }, { "acc": 0.93202477, "epoch": 3.344299136273793, "grad_norm": 14.975470542907715, "learning_rate": 7.177730256256262e-07, "loss": 0.46219592, "memory(GiB)": 34.88, "step": 123515, "train_speed(iter/s)": 0.410992 }, { "acc": 0.9470789, "epoch": 3.3444345165570084, "grad_norm": 5.681127548217773, "learning_rate": 7.174843570598865e-07, "loss": 0.34822369, "memory(GiB)": 34.88, "step": 123520, "train_speed(iter/s)": 0.410992 }, { "acc": 0.95539923, "epoch": 3.344569896840224, "grad_norm": 5.452512264251709, "learning_rate": 7.17195742147333e-07, "loss": 0.26123257, "memory(GiB)": 34.88, "step": 123525, "train_speed(iter/s)": 0.410993 }, { "acc": 0.9563549, "epoch": 3.3447052771234396, "grad_norm": 4.473747730255127, "learning_rate": 7.16907180891582e-07, "loss": 0.2678175, "memory(GiB)": 34.88, "step": 123530, "train_speed(iter/s)": 0.410994 }, { "acc": 0.95152931, "epoch": 3.3448406574066554, "grad_norm": 9.288752555847168, "learning_rate": 7.166186732962484e-07, "loss": 0.32528033, "memory(GiB)": 34.88, "step": 123535, "train_speed(iter/s)": 0.410995 }, { "acc": 0.9440134, "epoch": 3.3449760376898707, "grad_norm": 4.996462345123291, "learning_rate": 7.163302193649467e-07, "loss": 0.3400115, "memory(GiB)": 34.88, "step": 123540, "train_speed(iter/s)": 0.410996 }, { "acc": 0.95601406, "epoch": 3.3451114179730865, "grad_norm": 7.107100009918213, "learning_rate": 7.160418191012893e-07, "loss": 0.26892526, "memory(GiB)": 34.88, "step": 123545, "train_speed(iter/s)": 0.410997 }, { "acc": 0.93536663, "epoch": 3.345246798256302, "grad_norm": 9.358251571655273, "learning_rate": 7.157534725088896e-07, "loss": 0.34601974, "memory(GiB)": 34.88, "step": 123550, "train_speed(iter/s)": 0.410998 }, { "acc": 0.93417645, "epoch": 3.3453821785395177, "grad_norm": 9.365497589111328, "learning_rate": 7.154651795913606e-07, "loss": 0.40367074, "memory(GiB)": 34.88, "step": 123555, "train_speed(iter/s)": 0.410999 }, { "acc": 0.94317474, "epoch": 3.345517558822733, "grad_norm": 9.967903137207031, "learning_rate": 7.151769403523116e-07, "loss": 0.42746124, "memory(GiB)": 34.88, "step": 123560, "train_speed(iter/s)": 0.411 }, { "acc": 0.95005226, "epoch": 3.345652939105949, "grad_norm": 6.558238983154297, "learning_rate": 7.148887547953544e-07, "loss": 0.31602883, "memory(GiB)": 34.88, "step": 123565, "train_speed(iter/s)": 0.411001 }, { "acc": 0.94252367, "epoch": 3.345788319389164, "grad_norm": 8.35098648071289, "learning_rate": 7.146006229240999e-07, "loss": 0.3493319, "memory(GiB)": 34.88, "step": 123570, "train_speed(iter/s)": 0.411002 }, { "acc": 0.94667492, "epoch": 3.3459236996723796, "grad_norm": 14.132384300231934, "learning_rate": 7.143125447421577e-07, "loss": 0.25965171, "memory(GiB)": 34.88, "step": 123575, "train_speed(iter/s)": 0.411002 }, { "acc": 0.9393012, "epoch": 3.3460590799555954, "grad_norm": 4.230287075042725, "learning_rate": 7.140245202531332e-07, "loss": 0.34413238, "memory(GiB)": 34.88, "step": 123580, "train_speed(iter/s)": 0.411003 }, { "acc": 0.94711666, "epoch": 3.3461944602388107, "grad_norm": 4.428835391998291, "learning_rate": 7.137365494606393e-07, "loss": 0.3113893, "memory(GiB)": 34.88, "step": 123585, "train_speed(iter/s)": 0.411004 }, { "acc": 0.96237936, "epoch": 3.3463298405220265, "grad_norm": 3.777557134628296, "learning_rate": 7.134486323682815e-07, "loss": 0.25703359, "memory(GiB)": 34.88, "step": 123590, "train_speed(iter/s)": 0.411005 }, { "acc": 0.95691471, "epoch": 3.346465220805242, "grad_norm": 4.71608304977417, "learning_rate": 7.131607689796645e-07, "loss": 0.28145764, "memory(GiB)": 34.88, "step": 123595, "train_speed(iter/s)": 0.411006 }, { "acc": 0.9397481, "epoch": 3.3466006010884577, "grad_norm": 6.090651512145996, "learning_rate": 7.128729592983959e-07, "loss": 0.38385859, "memory(GiB)": 34.88, "step": 123600, "train_speed(iter/s)": 0.411007 }, { "acc": 0.94416523, "epoch": 3.346735981371673, "grad_norm": 11.566914558410645, "learning_rate": 7.125852033280816e-07, "loss": 0.33654823, "memory(GiB)": 34.88, "step": 123605, "train_speed(iter/s)": 0.411008 }, { "acc": 0.93976622, "epoch": 3.3468713616548884, "grad_norm": 11.343255043029785, "learning_rate": 7.122975010723271e-07, "loss": 0.33701692, "memory(GiB)": 34.88, "step": 123610, "train_speed(iter/s)": 0.411009 }, { "acc": 0.92991438, "epoch": 3.347006741938104, "grad_norm": 6.646332740783691, "learning_rate": 7.120098525347326e-07, "loss": 0.44490051, "memory(GiB)": 34.88, "step": 123615, "train_speed(iter/s)": 0.41101 }, { "acc": 0.95081863, "epoch": 3.3471421222213196, "grad_norm": 15.18244457244873, "learning_rate": 7.117222577189064e-07, "loss": 0.28921952, "memory(GiB)": 34.88, "step": 123620, "train_speed(iter/s)": 0.411011 }, { "acc": 0.95267849, "epoch": 3.3472775025045354, "grad_norm": 11.8978853225708, "learning_rate": 7.114347166284474e-07, "loss": 0.33162074, "memory(GiB)": 34.88, "step": 123625, "train_speed(iter/s)": 0.411012 }, { "acc": 0.96879768, "epoch": 3.3474128827877507, "grad_norm": 4.241684436798096, "learning_rate": 7.11147229266961e-07, "loss": 0.17945685, "memory(GiB)": 34.88, "step": 123630, "train_speed(iter/s)": 0.411013 }, { "acc": 0.94923353, "epoch": 3.3475482630709665, "grad_norm": 9.779497146606445, "learning_rate": 7.108597956380449e-07, "loss": 0.30782561, "memory(GiB)": 34.88, "step": 123635, "train_speed(iter/s)": 0.411014 }, { "acc": 0.94918995, "epoch": 3.347683643354182, "grad_norm": 11.504800796508789, "learning_rate": 7.105724157453014e-07, "loss": 0.29040856, "memory(GiB)": 34.88, "step": 123640, "train_speed(iter/s)": 0.411014 }, { "acc": 0.95116577, "epoch": 3.3478190236373973, "grad_norm": 5.120272636413574, "learning_rate": 7.102850895923323e-07, "loss": 0.26040323, "memory(GiB)": 34.88, "step": 123645, "train_speed(iter/s)": 0.411015 }, { "acc": 0.94356441, "epoch": 3.347954403920613, "grad_norm": 6.356788635253906, "learning_rate": 7.099978171827325e-07, "loss": 0.38726468, "memory(GiB)": 34.88, "step": 123650, "train_speed(iter/s)": 0.411016 }, { "acc": 0.94090862, "epoch": 3.3480897842038284, "grad_norm": 2.615327835083008, "learning_rate": 7.097105985201058e-07, "loss": 0.36737742, "memory(GiB)": 34.88, "step": 123655, "train_speed(iter/s)": 0.411017 }, { "acc": 0.9514307, "epoch": 3.348225164487044, "grad_norm": 8.637619018554688, "learning_rate": 7.094234336080464e-07, "loss": 0.31864023, "memory(GiB)": 34.88, "step": 123660, "train_speed(iter/s)": 0.411018 }, { "acc": 0.96518183, "epoch": 3.3483605447702596, "grad_norm": 6.232928276062012, "learning_rate": 7.09136322450154e-07, "loss": 0.18223424, "memory(GiB)": 34.88, "step": 123665, "train_speed(iter/s)": 0.411019 }, { "acc": 0.94946423, "epoch": 3.3484959250534754, "grad_norm": 24.406118392944336, "learning_rate": 7.088492650500232e-07, "loss": 0.28593745, "memory(GiB)": 34.88, "step": 123670, "train_speed(iter/s)": 0.41102 }, { "acc": 0.95309849, "epoch": 3.3486313053366907, "grad_norm": 4.807991027832031, "learning_rate": 7.085622614112512e-07, "loss": 0.22516057, "memory(GiB)": 34.88, "step": 123675, "train_speed(iter/s)": 0.411021 }, { "acc": 0.93836784, "epoch": 3.348766685619906, "grad_norm": 3.6091699600219727, "learning_rate": 7.082753115374337e-07, "loss": 0.38217151, "memory(GiB)": 34.88, "step": 123680, "train_speed(iter/s)": 0.411022 }, { "acc": 0.95278797, "epoch": 3.348902065903122, "grad_norm": 6.609189033508301, "learning_rate": 7.079884154321624e-07, "loss": 0.24138155, "memory(GiB)": 34.88, "step": 123685, "train_speed(iter/s)": 0.411022 }, { "acc": 0.94741459, "epoch": 3.3490374461863373, "grad_norm": 6.995180606842041, "learning_rate": 7.077015730990358e-07, "loss": 0.29050393, "memory(GiB)": 34.88, "step": 123690, "train_speed(iter/s)": 0.411023 }, { "acc": 0.95478897, "epoch": 3.349172826469553, "grad_norm": 3.7833352088928223, "learning_rate": 7.074147845416441e-07, "loss": 0.28979821, "memory(GiB)": 34.88, "step": 123695, "train_speed(iter/s)": 0.411024 }, { "acc": 0.95029316, "epoch": 3.3493082067527684, "grad_norm": 4.375296115875244, "learning_rate": 7.071280497635814e-07, "loss": 0.27120371, "memory(GiB)": 34.88, "step": 123700, "train_speed(iter/s)": 0.411025 }, { "acc": 0.93637676, "epoch": 3.349443587035984, "grad_norm": 9.473175048828125, "learning_rate": 7.068413687684368e-07, "loss": 0.39511578, "memory(GiB)": 34.88, "step": 123705, "train_speed(iter/s)": 0.411026 }, { "acc": 0.94088268, "epoch": 3.3495789673191996, "grad_norm": 5.175363063812256, "learning_rate": 7.065547415598055e-07, "loss": 0.32324305, "memory(GiB)": 34.88, "step": 123710, "train_speed(iter/s)": 0.411027 }, { "acc": 0.94201336, "epoch": 3.3497143476024154, "grad_norm": 10.989903450012207, "learning_rate": 7.062681681412766e-07, "loss": 0.30865426, "memory(GiB)": 34.88, "step": 123715, "train_speed(iter/s)": 0.411028 }, { "acc": 0.92991638, "epoch": 3.3498497278856307, "grad_norm": 10.846551895141602, "learning_rate": 7.059816485164369e-07, "loss": 0.37398973, "memory(GiB)": 34.88, "step": 123720, "train_speed(iter/s)": 0.411029 }, { "acc": 0.94644928, "epoch": 3.3499851081688465, "grad_norm": 11.936275482177734, "learning_rate": 7.056951826888804e-07, "loss": 0.29355843, "memory(GiB)": 34.88, "step": 123725, "train_speed(iter/s)": 0.41103 }, { "acc": 0.93905153, "epoch": 3.350120488452062, "grad_norm": 5.010049819946289, "learning_rate": 7.054087706621926e-07, "loss": 0.33312182, "memory(GiB)": 34.88, "step": 123730, "train_speed(iter/s)": 0.411031 }, { "acc": 0.95211849, "epoch": 3.3502558687352773, "grad_norm": 8.127335548400879, "learning_rate": 7.051224124399633e-07, "loss": 0.25798621, "memory(GiB)": 34.88, "step": 123735, "train_speed(iter/s)": 0.411032 }, { "acc": 0.94987345, "epoch": 3.350391249018493, "grad_norm": 6.655078411102295, "learning_rate": 7.048361080257769e-07, "loss": 0.29838214, "memory(GiB)": 34.88, "step": 123740, "train_speed(iter/s)": 0.411032 }, { "acc": 0.95350924, "epoch": 3.3505266293017084, "grad_norm": 5.563732147216797, "learning_rate": 7.045498574232235e-07, "loss": 0.32559514, "memory(GiB)": 34.88, "step": 123745, "train_speed(iter/s)": 0.411033 }, { "acc": 0.94583178, "epoch": 3.350662009584924, "grad_norm": 4.251684188842773, "learning_rate": 7.04263660635886e-07, "loss": 0.30836864, "memory(GiB)": 34.88, "step": 123750, "train_speed(iter/s)": 0.411034 }, { "acc": 0.93716555, "epoch": 3.3507973898681396, "grad_norm": 4.409885883331299, "learning_rate": 7.039775176673519e-07, "loss": 0.39662089, "memory(GiB)": 34.88, "step": 123755, "train_speed(iter/s)": 0.411035 }, { "acc": 0.95765228, "epoch": 3.3509327701513554, "grad_norm": 23.633798599243164, "learning_rate": 7.036914285212038e-07, "loss": 0.24155502, "memory(GiB)": 34.88, "step": 123760, "train_speed(iter/s)": 0.411036 }, { "acc": 0.94617863, "epoch": 3.3510681504345707, "grad_norm": 8.0615873336792, "learning_rate": 7.034053932010258e-07, "loss": 0.3252516, "memory(GiB)": 34.88, "step": 123765, "train_speed(iter/s)": 0.411037 }, { "acc": 0.93792267, "epoch": 3.351203530717786, "grad_norm": 18.791446685791016, "learning_rate": 7.031194117104031e-07, "loss": 0.4017724, "memory(GiB)": 34.88, "step": 123770, "train_speed(iter/s)": 0.411038 }, { "acc": 0.94819908, "epoch": 3.351338911001002, "grad_norm": 9.317987442016602, "learning_rate": 7.028334840529141e-07, "loss": 0.31141956, "memory(GiB)": 34.88, "step": 123775, "train_speed(iter/s)": 0.411039 }, { "acc": 0.95575247, "epoch": 3.3514742912842173, "grad_norm": 4.0939226150512695, "learning_rate": 7.025476102321456e-07, "loss": 0.27133384, "memory(GiB)": 34.88, "step": 123780, "train_speed(iter/s)": 0.41104 }, { "acc": 0.94131355, "epoch": 3.351609671567433, "grad_norm": 11.87617015838623, "learning_rate": 7.022617902516747e-07, "loss": 0.32222233, "memory(GiB)": 34.88, "step": 123785, "train_speed(iter/s)": 0.411041 }, { "acc": 0.95463734, "epoch": 3.3517450518506484, "grad_norm": 6.461194038391113, "learning_rate": 7.019760241150847e-07, "loss": 0.29008329, "memory(GiB)": 34.88, "step": 123790, "train_speed(iter/s)": 0.411042 }, { "acc": 0.95202789, "epoch": 3.351880432133864, "grad_norm": 5.309690952301025, "learning_rate": 7.016903118259532e-07, "loss": 0.3090327, "memory(GiB)": 34.88, "step": 123795, "train_speed(iter/s)": 0.411043 }, { "acc": 0.94843636, "epoch": 3.3520158124170796, "grad_norm": 27.290597915649414, "learning_rate": 7.014046533878596e-07, "loss": 0.31822941, "memory(GiB)": 34.88, "step": 123800, "train_speed(iter/s)": 0.411044 }, { "acc": 0.94610405, "epoch": 3.352151192700295, "grad_norm": 10.359219551086426, "learning_rate": 7.011190488043845e-07, "loss": 0.30691135, "memory(GiB)": 34.88, "step": 123805, "train_speed(iter/s)": 0.411044 }, { "acc": 0.95992937, "epoch": 3.3522865729835107, "grad_norm": 2.9276058673858643, "learning_rate": 7.008334980791015e-07, "loss": 0.23815448, "memory(GiB)": 34.88, "step": 123810, "train_speed(iter/s)": 0.411045 }, { "acc": 0.94504337, "epoch": 3.352421953266726, "grad_norm": 11.116578102111816, "learning_rate": 7.005480012155932e-07, "loss": 0.30800762, "memory(GiB)": 34.88, "step": 123815, "train_speed(iter/s)": 0.411046 }, { "acc": 0.934128, "epoch": 3.352557333549942, "grad_norm": 11.152759552001953, "learning_rate": 7.002625582174311e-07, "loss": 0.46822309, "memory(GiB)": 34.88, "step": 123820, "train_speed(iter/s)": 0.411047 }, { "acc": 0.95071678, "epoch": 3.3526927138331573, "grad_norm": 3.9191677570343018, "learning_rate": 6.999771690881944e-07, "loss": 0.30709095, "memory(GiB)": 34.88, "step": 123825, "train_speed(iter/s)": 0.411048 }, { "acc": 0.94953194, "epoch": 3.352828094116373, "grad_norm": 7.672627925872803, "learning_rate": 6.996918338314544e-07, "loss": 0.30318964, "memory(GiB)": 34.88, "step": 123830, "train_speed(iter/s)": 0.411049 }, { "acc": 0.94163532, "epoch": 3.3529634743995884, "grad_norm": 9.036495208740234, "learning_rate": 6.994065524507905e-07, "loss": 0.37714777, "memory(GiB)": 34.88, "step": 123835, "train_speed(iter/s)": 0.41105 }, { "acc": 0.93832989, "epoch": 3.3530988546828038, "grad_norm": 6.214881420135498, "learning_rate": 6.991213249497729e-07, "loss": 0.32541604, "memory(GiB)": 34.88, "step": 123840, "train_speed(iter/s)": 0.411051 }, { "acc": 0.94487858, "epoch": 3.3532342349660196, "grad_norm": 7.311717510223389, "learning_rate": 6.988361513319737e-07, "loss": 0.30693607, "memory(GiB)": 34.88, "step": 123845, "train_speed(iter/s)": 0.411052 }, { "acc": 0.94934196, "epoch": 3.353369615249235, "grad_norm": 7.960300445556641, "learning_rate": 6.98551031600969e-07, "loss": 0.33014255, "memory(GiB)": 34.88, "step": 123850, "train_speed(iter/s)": 0.411053 }, { "acc": 0.94781399, "epoch": 3.3535049955324507, "grad_norm": 16.27939796447754, "learning_rate": 6.982659657603274e-07, "loss": 0.30133789, "memory(GiB)": 34.88, "step": 123855, "train_speed(iter/s)": 0.411054 }, { "acc": 0.94888706, "epoch": 3.353640375815666, "grad_norm": 6.741082191467285, "learning_rate": 6.979809538136216e-07, "loss": 0.30322196, "memory(GiB)": 34.88, "step": 123860, "train_speed(iter/s)": 0.411054 }, { "acc": 0.94217653, "epoch": 3.353775756098882, "grad_norm": 6.7957353591918945, "learning_rate": 6.976959957644198e-07, "loss": 0.37950363, "memory(GiB)": 34.88, "step": 123865, "train_speed(iter/s)": 0.411055 }, { "acc": 0.93617668, "epoch": 3.3539111363820973, "grad_norm": 11.43806266784668, "learning_rate": 6.97411091616295e-07, "loss": 0.38397982, "memory(GiB)": 34.88, "step": 123870, "train_speed(iter/s)": 0.411056 }, { "acc": 0.96118202, "epoch": 3.3540465166653126, "grad_norm": 4.704473495483398, "learning_rate": 6.971262413728127e-07, "loss": 0.22263579, "memory(GiB)": 34.88, "step": 123875, "train_speed(iter/s)": 0.411057 }, { "acc": 0.94482803, "epoch": 3.3541818969485284, "grad_norm": 9.4180908203125, "learning_rate": 6.968414450375439e-07, "loss": 0.35239611, "memory(GiB)": 34.88, "step": 123880, "train_speed(iter/s)": 0.411058 }, { "acc": 0.96031666, "epoch": 3.3543172772317438, "grad_norm": 10.284920692443848, "learning_rate": 6.965567026140559e-07, "loss": 0.2156146, "memory(GiB)": 34.88, "step": 123885, "train_speed(iter/s)": 0.411059 }, { "acc": 0.95033321, "epoch": 3.3544526575149596, "grad_norm": 5.184803009033203, "learning_rate": 6.962720141059135e-07, "loss": 0.27306814, "memory(GiB)": 34.88, "step": 123890, "train_speed(iter/s)": 0.41106 }, { "acc": 0.94789, "epoch": 3.354588037798175, "grad_norm": 21.879322052001953, "learning_rate": 6.959873795166854e-07, "loss": 0.32325611, "memory(GiB)": 34.88, "step": 123895, "train_speed(iter/s)": 0.411061 }, { "acc": 0.94374981, "epoch": 3.3547234180813907, "grad_norm": 4.543474197387695, "learning_rate": 6.957027988499344e-07, "loss": 0.35088153, "memory(GiB)": 34.88, "step": 123900, "train_speed(iter/s)": 0.411061 }, { "acc": 0.95193205, "epoch": 3.354858798364606, "grad_norm": 6.311404228210449, "learning_rate": 6.954182721092297e-07, "loss": 0.31014199, "memory(GiB)": 34.88, "step": 123905, "train_speed(iter/s)": 0.411062 }, { "acc": 0.9488163, "epoch": 3.354994178647822, "grad_norm": 11.494882583618164, "learning_rate": 6.951337992981312e-07, "loss": 0.30487211, "memory(GiB)": 34.88, "step": 123910, "train_speed(iter/s)": 0.411063 }, { "acc": 0.9397789, "epoch": 3.3551295589310373, "grad_norm": 5.214299201965332, "learning_rate": 6.948493804202053e-07, "loss": 0.35541892, "memory(GiB)": 34.88, "step": 123915, "train_speed(iter/s)": 0.411064 }, { "acc": 0.9329216, "epoch": 3.355264939214253, "grad_norm": 9.119455337524414, "learning_rate": 6.945650154790148e-07, "loss": 0.41584969, "memory(GiB)": 34.88, "step": 123920, "train_speed(iter/s)": 0.411065 }, { "acc": 0.94176168, "epoch": 3.3554003194974684, "grad_norm": 3.997041940689087, "learning_rate": 6.942807044781198e-07, "loss": 0.34165182, "memory(GiB)": 34.88, "step": 123925, "train_speed(iter/s)": 0.411066 }, { "acc": 0.94752674, "epoch": 3.355535699780684, "grad_norm": 3.902146577835083, "learning_rate": 6.939964474210839e-07, "loss": 0.29744132, "memory(GiB)": 34.88, "step": 123930, "train_speed(iter/s)": 0.411067 }, { "acc": 0.92928009, "epoch": 3.3556710800638996, "grad_norm": 6.880041122436523, "learning_rate": 6.937122443114655e-07, "loss": 0.46897402, "memory(GiB)": 34.88, "step": 123935, "train_speed(iter/s)": 0.411068 }, { "acc": 0.93914557, "epoch": 3.355806460347115, "grad_norm": 4.621492862701416, "learning_rate": 6.934280951528293e-07, "loss": 0.38378282, "memory(GiB)": 34.88, "step": 123940, "train_speed(iter/s)": 0.411069 }, { "acc": 0.93716564, "epoch": 3.3559418406303307, "grad_norm": 12.919241905212402, "learning_rate": 6.931439999487306e-07, "loss": 0.39147253, "memory(GiB)": 34.88, "step": 123945, "train_speed(iter/s)": 0.41107 }, { "acc": 0.95012999, "epoch": 3.356077220913546, "grad_norm": 9.389860153198242, "learning_rate": 6.928599587027293e-07, "loss": 0.30196619, "memory(GiB)": 34.88, "step": 123950, "train_speed(iter/s)": 0.411071 }, { "acc": 0.94548683, "epoch": 3.356212601196762, "grad_norm": 20.571041107177734, "learning_rate": 6.925759714183844e-07, "loss": 0.33888159, "memory(GiB)": 34.88, "step": 123955, "train_speed(iter/s)": 0.411071 }, { "acc": 0.94081545, "epoch": 3.3563479814799773, "grad_norm": 6.277301788330078, "learning_rate": 6.922920380992543e-07, "loss": 0.30991545, "memory(GiB)": 34.88, "step": 123960, "train_speed(iter/s)": 0.411072 }, { "acc": 0.9327693, "epoch": 3.3564833617631926, "grad_norm": 4.798786163330078, "learning_rate": 6.920081587488942e-07, "loss": 0.43687859, "memory(GiB)": 34.88, "step": 123965, "train_speed(iter/s)": 0.411073 }, { "acc": 0.93209743, "epoch": 3.3566187420464084, "grad_norm": 9.846281051635742, "learning_rate": 6.917243333708584e-07, "loss": 0.40225964, "memory(GiB)": 34.88, "step": 123970, "train_speed(iter/s)": 0.411074 }, { "acc": 0.9476881, "epoch": 3.356754122329624, "grad_norm": 12.06013011932373, "learning_rate": 6.914405619687069e-07, "loss": 0.3196311, "memory(GiB)": 34.88, "step": 123975, "train_speed(iter/s)": 0.411075 }, { "acc": 0.94835768, "epoch": 3.3568895026128396, "grad_norm": 5.764657020568848, "learning_rate": 6.911568445459907e-07, "loss": 0.33629439, "memory(GiB)": 34.88, "step": 123980, "train_speed(iter/s)": 0.411076 }, { "acc": 0.96148005, "epoch": 3.357024882896055, "grad_norm": 7.803297519683838, "learning_rate": 6.90873181106266e-07, "loss": 0.21180184, "memory(GiB)": 34.88, "step": 123985, "train_speed(iter/s)": 0.411077 }, { "acc": 0.93541203, "epoch": 3.3571602631792707, "grad_norm": 10.416265487670898, "learning_rate": 6.905895716530852e-07, "loss": 0.40165319, "memory(GiB)": 34.88, "step": 123990, "train_speed(iter/s)": 0.411078 }, { "acc": 0.95503998, "epoch": 3.357295643462486, "grad_norm": 6.512247562408447, "learning_rate": 6.903060161900026e-07, "loss": 0.26083469, "memory(GiB)": 34.88, "step": 123995, "train_speed(iter/s)": 0.411078 }, { "acc": 0.92324104, "epoch": 3.3574310237457015, "grad_norm": 15.305465698242188, "learning_rate": 6.900225147205681e-07, "loss": 0.5074821, "memory(GiB)": 34.88, "step": 124000, "train_speed(iter/s)": 0.411079 }, { "acc": 0.95678425, "epoch": 3.3575664040289173, "grad_norm": 8.699420928955078, "learning_rate": 6.897390672483338e-07, "loss": 0.20198159, "memory(GiB)": 34.88, "step": 124005, "train_speed(iter/s)": 0.41108 }, { "acc": 0.94886465, "epoch": 3.3577017843121326, "grad_norm": 4.372943878173828, "learning_rate": 6.894556737768525e-07, "loss": 0.24919622, "memory(GiB)": 34.88, "step": 124010, "train_speed(iter/s)": 0.411081 }, { "acc": 0.94990501, "epoch": 3.3578371645953484, "grad_norm": 5.427201271057129, "learning_rate": 6.891723343096713e-07, "loss": 0.28646178, "memory(GiB)": 34.88, "step": 124015, "train_speed(iter/s)": 0.411082 }, { "acc": 0.93429489, "epoch": 3.357972544878564, "grad_norm": 13.807720184326172, "learning_rate": 6.888890488503411e-07, "loss": 0.41327724, "memory(GiB)": 34.88, "step": 124020, "train_speed(iter/s)": 0.411083 }, { "acc": 0.92726173, "epoch": 3.3581079251617796, "grad_norm": 44.39564514160156, "learning_rate": 6.886058174024096e-07, "loss": 0.47252445, "memory(GiB)": 34.88, "step": 124025, "train_speed(iter/s)": 0.411084 }, { "acc": 0.94407597, "epoch": 3.358243305444995, "grad_norm": 10.000547409057617, "learning_rate": 6.883226399694276e-07, "loss": 0.29912858, "memory(GiB)": 34.88, "step": 124030, "train_speed(iter/s)": 0.411084 }, { "acc": 0.95102549, "epoch": 3.3583786857282103, "grad_norm": 9.012170791625977, "learning_rate": 6.880395165549386e-07, "loss": 0.27268929, "memory(GiB)": 34.88, "step": 124035, "train_speed(iter/s)": 0.411085 }, { "acc": 0.94795923, "epoch": 3.358514066011426, "grad_norm": 6.418504238128662, "learning_rate": 6.877564471624916e-07, "loss": 0.25559566, "memory(GiB)": 34.88, "step": 124040, "train_speed(iter/s)": 0.411086 }, { "acc": 0.92339849, "epoch": 3.3586494462946415, "grad_norm": 13.456089973449707, "learning_rate": 6.874734317956318e-07, "loss": 0.48707047, "memory(GiB)": 34.88, "step": 124045, "train_speed(iter/s)": 0.411087 }, { "acc": 0.94973698, "epoch": 3.3587848265778573, "grad_norm": 4.607161521911621, "learning_rate": 6.871904704579063e-07, "loss": 0.28013852, "memory(GiB)": 34.88, "step": 124050, "train_speed(iter/s)": 0.411088 }, { "acc": 0.93785982, "epoch": 3.3589202068610726, "grad_norm": 13.000967025756836, "learning_rate": 6.869075631528567e-07, "loss": 0.42578487, "memory(GiB)": 34.88, "step": 124055, "train_speed(iter/s)": 0.411089 }, { "acc": 0.93977871, "epoch": 3.3590555871442884, "grad_norm": 4.577693462371826, "learning_rate": 6.866247098840291e-07, "loss": 0.31751049, "memory(GiB)": 34.88, "step": 124060, "train_speed(iter/s)": 0.41109 }, { "acc": 0.95471153, "epoch": 3.359190967427504, "grad_norm": 5.722218036651611, "learning_rate": 6.863419106549669e-07, "loss": 0.2870024, "memory(GiB)": 34.88, "step": 124065, "train_speed(iter/s)": 0.411091 }, { "acc": 0.95350151, "epoch": 3.3593263477107196, "grad_norm": 6.785172462463379, "learning_rate": 6.860591654692117e-07, "loss": 0.31546152, "memory(GiB)": 34.88, "step": 124070, "train_speed(iter/s)": 0.411091 }, { "acc": 0.95794268, "epoch": 3.359461727993935, "grad_norm": 9.556035995483398, "learning_rate": 6.857764743303048e-07, "loss": 0.23764732, "memory(GiB)": 34.88, "step": 124075, "train_speed(iter/s)": 0.411092 }, { "acc": 0.94879999, "epoch": 3.3595971082771507, "grad_norm": 5.846816062927246, "learning_rate": 6.85493837241789e-07, "loss": 0.31254959, "memory(GiB)": 34.88, "step": 124080, "train_speed(iter/s)": 0.411093 }, { "acc": 0.94417381, "epoch": 3.359732488560366, "grad_norm": 5.847107410430908, "learning_rate": 6.852112542072057e-07, "loss": 0.3023777, "memory(GiB)": 34.88, "step": 124085, "train_speed(iter/s)": 0.411094 }, { "acc": 0.94540415, "epoch": 3.3598678688435815, "grad_norm": 7.377016067504883, "learning_rate": 6.849287252300922e-07, "loss": 0.33315225, "memory(GiB)": 34.88, "step": 124090, "train_speed(iter/s)": 0.411095 }, { "acc": 0.93754272, "epoch": 3.3600032491267973, "grad_norm": 6.862253665924072, "learning_rate": 6.846462503139889e-07, "loss": 0.39592423, "memory(GiB)": 34.88, "step": 124095, "train_speed(iter/s)": 0.411096 }, { "acc": 0.95383205, "epoch": 3.3601386294100126, "grad_norm": 6.511326313018799, "learning_rate": 6.843638294624353e-07, "loss": 0.25872085, "memory(GiB)": 34.88, "step": 124100, "train_speed(iter/s)": 0.411097 }, { "acc": 0.94433813, "epoch": 3.3602740096932284, "grad_norm": 4.299831867218018, "learning_rate": 6.84081462678968e-07, "loss": 0.32257845, "memory(GiB)": 34.88, "step": 124105, "train_speed(iter/s)": 0.411098 }, { "acc": 0.93572598, "epoch": 3.360409389976444, "grad_norm": 10.83735466003418, "learning_rate": 6.837991499671241e-07, "loss": 0.39696243, "memory(GiB)": 34.88, "step": 124110, "train_speed(iter/s)": 0.411099 }, { "acc": 0.94087296, "epoch": 3.3605447702596596, "grad_norm": 5.38753604888916, "learning_rate": 6.835168913304404e-07, "loss": 0.3109776, "memory(GiB)": 34.88, "step": 124115, "train_speed(iter/s)": 0.411099 }, { "acc": 0.93382816, "epoch": 3.360680150542875, "grad_norm": 7.197757720947266, "learning_rate": 6.832346867724546e-07, "loss": 0.33894143, "memory(GiB)": 34.88, "step": 124120, "train_speed(iter/s)": 0.4111 }, { "acc": 0.96683035, "epoch": 3.3608155308260903, "grad_norm": 5.470619201660156, "learning_rate": 6.829525362966993e-07, "loss": 0.23064101, "memory(GiB)": 34.88, "step": 124125, "train_speed(iter/s)": 0.411101 }, { "acc": 0.93808937, "epoch": 3.360950911109306, "grad_norm": 4.737085819244385, "learning_rate": 6.826704399067097e-07, "loss": 0.35348845, "memory(GiB)": 34.88, "step": 124130, "train_speed(iter/s)": 0.411102 }, { "acc": 0.94346962, "epoch": 3.3610862913925215, "grad_norm": 14.524106979370117, "learning_rate": 6.823883976060206e-07, "loss": 0.32271755, "memory(GiB)": 34.88, "step": 124135, "train_speed(iter/s)": 0.411103 }, { "acc": 0.94259424, "epoch": 3.3612216716757373, "grad_norm": 5.455353260040283, "learning_rate": 6.82106409398164e-07, "loss": 0.36497488, "memory(GiB)": 34.88, "step": 124140, "train_speed(iter/s)": 0.411104 }, { "acc": 0.93506823, "epoch": 3.3613570519589526, "grad_norm": 6.716712474822998, "learning_rate": 6.818244752866721e-07, "loss": 0.40725746, "memory(GiB)": 34.88, "step": 124145, "train_speed(iter/s)": 0.411105 }, { "acc": 0.9501626, "epoch": 3.3614924322421684, "grad_norm": 45.94768142700195, "learning_rate": 6.815425952750776e-07, "loss": 0.29378786, "memory(GiB)": 34.88, "step": 124150, "train_speed(iter/s)": 0.411106 }, { "acc": 0.94000654, "epoch": 3.361627812525384, "grad_norm": 6.259086608886719, "learning_rate": 6.812607693669125e-07, "loss": 0.39462461, "memory(GiB)": 34.88, "step": 124155, "train_speed(iter/s)": 0.411107 }, { "acc": 0.94076557, "epoch": 3.361763192808599, "grad_norm": 10.256953239440918, "learning_rate": 6.809789975657051e-07, "loss": 0.33682165, "memory(GiB)": 34.88, "step": 124160, "train_speed(iter/s)": 0.411108 }, { "acc": 0.95089741, "epoch": 3.361898573091815, "grad_norm": 12.608246803283691, "learning_rate": 6.806972798749858e-07, "loss": 0.33835526, "memory(GiB)": 34.88, "step": 124165, "train_speed(iter/s)": 0.411109 }, { "acc": 0.95424299, "epoch": 3.3620339533750303, "grad_norm": 4.553371906280518, "learning_rate": 6.804156162982839e-07, "loss": 0.28948975, "memory(GiB)": 34.88, "step": 124170, "train_speed(iter/s)": 0.411109 }, { "acc": 0.94551926, "epoch": 3.362169333658246, "grad_norm": 5.921257495880127, "learning_rate": 6.801340068391292e-07, "loss": 0.34790974, "memory(GiB)": 34.88, "step": 124175, "train_speed(iter/s)": 0.41111 }, { "acc": 0.9509613, "epoch": 3.3623047139414615, "grad_norm": 4.657601833343506, "learning_rate": 6.798524515010467e-07, "loss": 0.27274001, "memory(GiB)": 34.88, "step": 124180, "train_speed(iter/s)": 0.411111 }, { "acc": 0.95673332, "epoch": 3.3624400942246773, "grad_norm": 4.97307014465332, "learning_rate": 6.795709502875651e-07, "loss": 0.24225712, "memory(GiB)": 34.88, "step": 124185, "train_speed(iter/s)": 0.411112 }, { "acc": 0.94491768, "epoch": 3.3625754745078926, "grad_norm": 8.45806884765625, "learning_rate": 6.792895032022112e-07, "loss": 0.314713, "memory(GiB)": 34.88, "step": 124190, "train_speed(iter/s)": 0.411113 }, { "acc": 0.96076231, "epoch": 3.362710854791108, "grad_norm": 4.485509872436523, "learning_rate": 6.790081102485095e-07, "loss": 0.26231756, "memory(GiB)": 34.88, "step": 124195, "train_speed(iter/s)": 0.411114 }, { "acc": 0.94673119, "epoch": 3.362846235074324, "grad_norm": 7.635932922363281, "learning_rate": 6.787267714299846e-07, "loss": 0.40772281, "memory(GiB)": 34.88, "step": 124200, "train_speed(iter/s)": 0.411115 }, { "acc": 0.96574697, "epoch": 3.362981615357539, "grad_norm": 7.452362537384033, "learning_rate": 6.784454867501622e-07, "loss": 0.18512254, "memory(GiB)": 34.88, "step": 124205, "train_speed(iter/s)": 0.411116 }, { "acc": 0.94825897, "epoch": 3.363116995640755, "grad_norm": 5.951691150665283, "learning_rate": 6.781642562125665e-07, "loss": 0.31340339, "memory(GiB)": 34.88, "step": 124210, "train_speed(iter/s)": 0.411117 }, { "acc": 0.9510232, "epoch": 3.3632523759239703, "grad_norm": 9.905004501342773, "learning_rate": 6.778830798207181e-07, "loss": 0.33565338, "memory(GiB)": 34.88, "step": 124215, "train_speed(iter/s)": 0.411118 }, { "acc": 0.96015692, "epoch": 3.363387756207186, "grad_norm": 4.724673748016357, "learning_rate": 6.776019575781406e-07, "loss": 0.21209145, "memory(GiB)": 34.88, "step": 124220, "train_speed(iter/s)": 0.411119 }, { "acc": 0.9499155, "epoch": 3.3635231364904015, "grad_norm": 14.794118881225586, "learning_rate": 6.773208894883568e-07, "loss": 0.3065469, "memory(GiB)": 34.88, "step": 124225, "train_speed(iter/s)": 0.411119 }, { "acc": 0.94035273, "epoch": 3.3636585167736173, "grad_norm": 20.288562774658203, "learning_rate": 6.770398755548846e-07, "loss": 0.39183216, "memory(GiB)": 34.88, "step": 124230, "train_speed(iter/s)": 0.41112 }, { "acc": 0.93032684, "epoch": 3.3637938970568326, "grad_norm": 6.088500022888184, "learning_rate": 6.767589157812467e-07, "loss": 0.39935288, "memory(GiB)": 34.88, "step": 124235, "train_speed(iter/s)": 0.411121 }, { "acc": 0.95036716, "epoch": 3.3639292773400484, "grad_norm": 7.960209846496582, "learning_rate": 6.764780101709618e-07, "loss": 0.26131835, "memory(GiB)": 34.88, "step": 124240, "train_speed(iter/s)": 0.411122 }, { "acc": 0.93656349, "epoch": 3.364064657623264, "grad_norm": 10.789458274841309, "learning_rate": 6.761971587275502e-07, "loss": 0.43654461, "memory(GiB)": 34.88, "step": 124245, "train_speed(iter/s)": 0.411123 }, { "acc": 0.95877752, "epoch": 3.364200037906479, "grad_norm": 5.163431644439697, "learning_rate": 6.759163614545272e-07, "loss": 0.25838146, "memory(GiB)": 34.88, "step": 124250, "train_speed(iter/s)": 0.411124 }, { "acc": 0.94627676, "epoch": 3.364335418189695, "grad_norm": 15.210800170898438, "learning_rate": 6.756356183554133e-07, "loss": 0.31275802, "memory(GiB)": 34.88, "step": 124255, "train_speed(iter/s)": 0.411125 }, { "acc": 0.94862919, "epoch": 3.3644707984729103, "grad_norm": 3.82576322555542, "learning_rate": 6.753549294337248e-07, "loss": 0.34127343, "memory(GiB)": 34.88, "step": 124260, "train_speed(iter/s)": 0.411126 }, { "acc": 0.94017534, "epoch": 3.364606178756126, "grad_norm": 4.239132881164551, "learning_rate": 6.750742946929761e-07, "loss": 0.44303651, "memory(GiB)": 34.88, "step": 124265, "train_speed(iter/s)": 0.411126 }, { "acc": 0.94592342, "epoch": 3.3647415590393415, "grad_norm": 8.034940719604492, "learning_rate": 6.747937141366836e-07, "loss": 0.28378358, "memory(GiB)": 34.88, "step": 124270, "train_speed(iter/s)": 0.411127 }, { "acc": 0.9282443, "epoch": 3.3648769393225573, "grad_norm": 6.585381031036377, "learning_rate": 6.74513187768362e-07, "loss": 0.38869276, "memory(GiB)": 34.88, "step": 124275, "train_speed(iter/s)": 0.411128 }, { "acc": 0.94899569, "epoch": 3.3650123196057726, "grad_norm": 4.297672271728516, "learning_rate": 6.742327155915274e-07, "loss": 0.28275015, "memory(GiB)": 34.88, "step": 124280, "train_speed(iter/s)": 0.411129 }, { "acc": 0.96168766, "epoch": 3.365147699888988, "grad_norm": 4.620907306671143, "learning_rate": 6.739522976096889e-07, "loss": 0.17212604, "memory(GiB)": 34.88, "step": 124285, "train_speed(iter/s)": 0.41113 }, { "acc": 0.94516993, "epoch": 3.365283080172204, "grad_norm": 4.74794864654541, "learning_rate": 6.736719338263649e-07, "loss": 0.30173674, "memory(GiB)": 34.88, "step": 124290, "train_speed(iter/s)": 0.411131 }, { "acc": 0.95038614, "epoch": 3.365418460455419, "grad_norm": 3.469449043273926, "learning_rate": 6.733916242450635e-07, "loss": 0.27572989, "memory(GiB)": 34.88, "step": 124295, "train_speed(iter/s)": 0.411132 }, { "acc": 0.95152607, "epoch": 3.365553840738635, "grad_norm": 9.734124183654785, "learning_rate": 6.73111368869298e-07, "loss": 0.30273333, "memory(GiB)": 34.88, "step": 124300, "train_speed(iter/s)": 0.411133 }, { "acc": 0.96000757, "epoch": 3.3656892210218503, "grad_norm": 4.8758673667907715, "learning_rate": 6.728311677025771e-07, "loss": 0.23437142, "memory(GiB)": 34.88, "step": 124305, "train_speed(iter/s)": 0.411134 }, { "acc": 0.94228163, "epoch": 3.365824601305066, "grad_norm": 9.105813980102539, "learning_rate": 6.725510207484123e-07, "loss": 0.44332886, "memory(GiB)": 34.88, "step": 124310, "train_speed(iter/s)": 0.411135 }, { "acc": 0.94510651, "epoch": 3.3659599815882815, "grad_norm": 4.189849376678467, "learning_rate": 6.722709280103144e-07, "loss": 0.38185601, "memory(GiB)": 34.88, "step": 124315, "train_speed(iter/s)": 0.411135 }, { "acc": 0.94779711, "epoch": 3.366095361871497, "grad_norm": 7.9873528480529785, "learning_rate": 6.719908894917891e-07, "loss": 0.31977234, "memory(GiB)": 34.88, "step": 124320, "train_speed(iter/s)": 0.411136 }, { "acc": 0.94186792, "epoch": 3.3662307421547126, "grad_norm": 5.799651145935059, "learning_rate": 6.71710905196346e-07, "loss": 0.38267586, "memory(GiB)": 34.88, "step": 124325, "train_speed(iter/s)": 0.411137 }, { "acc": 0.94669399, "epoch": 3.366366122437928, "grad_norm": 10.493854522705078, "learning_rate": 6.714309751274924e-07, "loss": 0.31824493, "memory(GiB)": 34.88, "step": 124330, "train_speed(iter/s)": 0.411138 }, { "acc": 0.96246338, "epoch": 3.366501502721144, "grad_norm": 2.4798450469970703, "learning_rate": 6.711510992887363e-07, "loss": 0.16148255, "memory(GiB)": 34.88, "step": 124335, "train_speed(iter/s)": 0.411139 }, { "acc": 0.94333134, "epoch": 3.366636883004359, "grad_norm": 21.091426849365234, "learning_rate": 6.708712776835806e-07, "loss": 0.30938501, "memory(GiB)": 34.88, "step": 124340, "train_speed(iter/s)": 0.41114 }, { "acc": 0.96138763, "epoch": 3.366772263287575, "grad_norm": 5.9340314865112305, "learning_rate": 6.705915103155329e-07, "loss": 0.21558127, "memory(GiB)": 34.88, "step": 124345, "train_speed(iter/s)": 0.41114 }, { "acc": 0.96134357, "epoch": 3.3669076435707903, "grad_norm": 6.189312934875488, "learning_rate": 6.703117971880982e-07, "loss": 0.23120747, "memory(GiB)": 34.88, "step": 124350, "train_speed(iter/s)": 0.411141 }, { "acc": 0.94137325, "epoch": 3.3670430238540057, "grad_norm": 2.6593222618103027, "learning_rate": 6.700321383047785e-07, "loss": 0.27767568, "memory(GiB)": 34.88, "step": 124355, "train_speed(iter/s)": 0.411142 }, { "acc": 0.95460548, "epoch": 3.3671784041372215, "grad_norm": 6.822939395904541, "learning_rate": 6.697525336690781e-07, "loss": 0.27377157, "memory(GiB)": 34.88, "step": 124360, "train_speed(iter/s)": 0.411143 }, { "acc": 0.95981522, "epoch": 3.367313784420437, "grad_norm": 5.160148620605469, "learning_rate": 6.694729832844992e-07, "loss": 0.20381322, "memory(GiB)": 34.88, "step": 124365, "train_speed(iter/s)": 0.411144 }, { "acc": 0.92878942, "epoch": 3.3674491647036526, "grad_norm": 7.230598449707031, "learning_rate": 6.691934871545459e-07, "loss": 0.43875856, "memory(GiB)": 34.88, "step": 124370, "train_speed(iter/s)": 0.411145 }, { "acc": 0.9598237, "epoch": 3.367584544986868, "grad_norm": 6.7012763023376465, "learning_rate": 6.689140452827153e-07, "loss": 0.22407832, "memory(GiB)": 34.88, "step": 124375, "train_speed(iter/s)": 0.411146 }, { "acc": 0.94957066, "epoch": 3.367719925270084, "grad_norm": 12.026123046875, "learning_rate": 6.686346576725123e-07, "loss": 0.32371407, "memory(GiB)": 34.88, "step": 124380, "train_speed(iter/s)": 0.411147 }, { "acc": 0.94446735, "epoch": 3.367855305553299, "grad_norm": 13.46697998046875, "learning_rate": 6.683553243274352e-07, "loss": 0.39625897, "memory(GiB)": 34.88, "step": 124385, "train_speed(iter/s)": 0.411148 }, { "acc": 0.94744301, "epoch": 3.367990685836515, "grad_norm": 15.950834274291992, "learning_rate": 6.68076045250981e-07, "loss": 0.30325277, "memory(GiB)": 34.88, "step": 124390, "train_speed(iter/s)": 0.411148 }, { "acc": 0.94754505, "epoch": 3.3681260661197303, "grad_norm": 4.635239601135254, "learning_rate": 6.677968204466509e-07, "loss": 0.31355762, "memory(GiB)": 34.88, "step": 124395, "train_speed(iter/s)": 0.411149 }, { "acc": 0.94898872, "epoch": 3.368261446402946, "grad_norm": 22.28741455078125, "learning_rate": 6.675176499179413e-07, "loss": 0.32134886, "memory(GiB)": 34.88, "step": 124400, "train_speed(iter/s)": 0.41115 }, { "acc": 0.94947739, "epoch": 3.3683968266861615, "grad_norm": 11.15587329864502, "learning_rate": 6.672385336683511e-07, "loss": 0.35201387, "memory(GiB)": 34.88, "step": 124405, "train_speed(iter/s)": 0.411151 }, { "acc": 0.95721302, "epoch": 3.368532206969377, "grad_norm": 10.385017395019531, "learning_rate": 6.669594717013734e-07, "loss": 0.2200567, "memory(GiB)": 34.88, "step": 124410, "train_speed(iter/s)": 0.411152 }, { "acc": 0.9354579, "epoch": 3.3686675872525926, "grad_norm": 14.067865371704102, "learning_rate": 6.666804640205085e-07, "loss": 0.43298545, "memory(GiB)": 34.88, "step": 124415, "train_speed(iter/s)": 0.411153 }, { "acc": 0.95658512, "epoch": 3.368802967535808, "grad_norm": 11.922811508178711, "learning_rate": 6.664015106292479e-07, "loss": 0.28552818, "memory(GiB)": 34.88, "step": 124420, "train_speed(iter/s)": 0.411154 }, { "acc": 0.94861717, "epoch": 3.368938347819024, "grad_norm": 6.035629749298096, "learning_rate": 6.661226115310888e-07, "loss": 0.302844, "memory(GiB)": 34.88, "step": 124425, "train_speed(iter/s)": 0.411155 }, { "acc": 0.9510046, "epoch": 3.369073728102239, "grad_norm": 8.785965919494629, "learning_rate": 6.658437667295225e-07, "loss": 0.27740924, "memory(GiB)": 34.88, "step": 124430, "train_speed(iter/s)": 0.411156 }, { "acc": 0.94410887, "epoch": 3.369209108385455, "grad_norm": 6.929659366607666, "learning_rate": 6.655649762280428e-07, "loss": 0.33563638, "memory(GiB)": 34.88, "step": 124435, "train_speed(iter/s)": 0.411157 }, { "acc": 0.93109341, "epoch": 3.3693444886686703, "grad_norm": 6.36728048324585, "learning_rate": 6.65286240030144e-07, "loss": 0.46190262, "memory(GiB)": 34.88, "step": 124440, "train_speed(iter/s)": 0.411157 }, { "acc": 0.93161964, "epoch": 3.3694798689518857, "grad_norm": 8.057647705078125, "learning_rate": 6.650075581393134e-07, "loss": 0.45894165, "memory(GiB)": 34.88, "step": 124445, "train_speed(iter/s)": 0.411158 }, { "acc": 0.94914351, "epoch": 3.3696152492351015, "grad_norm": 4.749279975891113, "learning_rate": 6.647289305590473e-07, "loss": 0.27901802, "memory(GiB)": 34.88, "step": 124450, "train_speed(iter/s)": 0.411159 }, { "acc": 0.94739285, "epoch": 3.369750629518317, "grad_norm": 6.045831680297852, "learning_rate": 6.644503572928326e-07, "loss": 0.37040315, "memory(GiB)": 34.88, "step": 124455, "train_speed(iter/s)": 0.41116 }, { "acc": 0.93059139, "epoch": 3.3698860098015326, "grad_norm": 13.493562698364258, "learning_rate": 6.641718383441613e-07, "loss": 0.37592342, "memory(GiB)": 34.88, "step": 124460, "train_speed(iter/s)": 0.411161 }, { "acc": 0.93087921, "epoch": 3.370021390084748, "grad_norm": 20.659591674804688, "learning_rate": 6.638933737165195e-07, "loss": 0.43199539, "memory(GiB)": 34.88, "step": 124465, "train_speed(iter/s)": 0.411162 }, { "acc": 0.9411521, "epoch": 3.370156770367964, "grad_norm": 6.117345809936523, "learning_rate": 6.636149634133974e-07, "loss": 0.35681219, "memory(GiB)": 34.88, "step": 124470, "train_speed(iter/s)": 0.411163 }, { "acc": 0.94181309, "epoch": 3.370292150651179, "grad_norm": 10.471745491027832, "learning_rate": 6.633366074382831e-07, "loss": 0.41157398, "memory(GiB)": 34.88, "step": 124475, "train_speed(iter/s)": 0.411164 }, { "acc": 0.95676613, "epoch": 3.3704275309343945, "grad_norm": 4.559726238250732, "learning_rate": 6.630583057946611e-07, "loss": 0.28394744, "memory(GiB)": 34.88, "step": 124480, "train_speed(iter/s)": 0.411165 }, { "acc": 0.94281359, "epoch": 3.3705629112176103, "grad_norm": 28.208637237548828, "learning_rate": 6.627800584860213e-07, "loss": 0.30778468, "memory(GiB)": 34.88, "step": 124485, "train_speed(iter/s)": 0.411166 }, { "acc": 0.94641819, "epoch": 3.3706982915008257, "grad_norm": 4.776183605194092, "learning_rate": 6.625018655158463e-07, "loss": 0.2774888, "memory(GiB)": 34.88, "step": 124490, "train_speed(iter/s)": 0.411167 }, { "acc": 0.93326015, "epoch": 3.3708336717840415, "grad_norm": 8.754422187805176, "learning_rate": 6.622237268876233e-07, "loss": 0.41004906, "memory(GiB)": 34.88, "step": 124495, "train_speed(iter/s)": 0.411167 }, { "acc": 0.93595524, "epoch": 3.370969052067257, "grad_norm": 3.8353865146636963, "learning_rate": 6.619456426048329e-07, "loss": 0.39921837, "memory(GiB)": 34.88, "step": 124500, "train_speed(iter/s)": 0.411168 }, { "acc": 0.94946079, "epoch": 3.3711044323504726, "grad_norm": 6.847667217254639, "learning_rate": 6.616676126709636e-07, "loss": 0.36863446, "memory(GiB)": 34.88, "step": 124505, "train_speed(iter/s)": 0.411169 }, { "acc": 0.94384384, "epoch": 3.371239812633688, "grad_norm": 5.430158615112305, "learning_rate": 6.613896370894955e-07, "loss": 0.31270072, "memory(GiB)": 34.88, "step": 124510, "train_speed(iter/s)": 0.41117 }, { "acc": 0.93540602, "epoch": 3.3713751929169034, "grad_norm": 12.731659889221191, "learning_rate": 6.611117158639091e-07, "loss": 0.45229702, "memory(GiB)": 34.88, "step": 124515, "train_speed(iter/s)": 0.411171 }, { "acc": 0.95142937, "epoch": 3.371510573200119, "grad_norm": 6.895289897918701, "learning_rate": 6.608338489976894e-07, "loss": 0.29674892, "memory(GiB)": 34.88, "step": 124520, "train_speed(iter/s)": 0.411171 }, { "acc": 0.95169802, "epoch": 3.3716459534833345, "grad_norm": 8.602998733520508, "learning_rate": 6.605560364943149e-07, "loss": 0.30718505, "memory(GiB)": 34.88, "step": 124525, "train_speed(iter/s)": 0.411172 }, { "acc": 0.94396286, "epoch": 3.3717813337665503, "grad_norm": 3.1190569400787354, "learning_rate": 6.602782783572683e-07, "loss": 0.32231851, "memory(GiB)": 34.88, "step": 124530, "train_speed(iter/s)": 0.411173 }, { "acc": 0.95830173, "epoch": 3.3719167140497657, "grad_norm": 7.029592990875244, "learning_rate": 6.600005745900243e-07, "loss": 0.26697016, "memory(GiB)": 34.88, "step": 124535, "train_speed(iter/s)": 0.411174 }, { "acc": 0.95584946, "epoch": 3.3720520943329815, "grad_norm": 9.449211120605469, "learning_rate": 6.597229251960668e-07, "loss": 0.31476731, "memory(GiB)": 34.88, "step": 124540, "train_speed(iter/s)": 0.411175 }, { "acc": 0.94027853, "epoch": 3.372187474616197, "grad_norm": 10.367870330810547, "learning_rate": 6.594453301788711e-07, "loss": 0.35517268, "memory(GiB)": 34.88, "step": 124545, "train_speed(iter/s)": 0.411176 }, { "acc": 0.93452387, "epoch": 3.3723228548994126, "grad_norm": 11.78575611114502, "learning_rate": 6.591677895419149e-07, "loss": 0.37142508, "memory(GiB)": 34.88, "step": 124550, "train_speed(iter/s)": 0.411177 }, { "acc": 0.9514123, "epoch": 3.372458235182628, "grad_norm": 3.508164167404175, "learning_rate": 6.588903032886769e-07, "loss": 0.27294996, "memory(GiB)": 34.88, "step": 124555, "train_speed(iter/s)": 0.411178 }, { "acc": 0.94136076, "epoch": 3.372593615465844, "grad_norm": 8.123913764953613, "learning_rate": 6.5861287142263e-07, "loss": 0.32422407, "memory(GiB)": 34.88, "step": 124560, "train_speed(iter/s)": 0.411178 }, { "acc": 0.95034199, "epoch": 3.372728995749059, "grad_norm": 7.60075569152832, "learning_rate": 6.583354939472527e-07, "loss": 0.29055099, "memory(GiB)": 34.88, "step": 124565, "train_speed(iter/s)": 0.411179 }, { "acc": 0.94945793, "epoch": 3.3728643760322745, "grad_norm": 6.4624433517456055, "learning_rate": 6.58058170866016e-07, "loss": 0.31209519, "memory(GiB)": 34.88, "step": 124570, "train_speed(iter/s)": 0.41118 }, { "acc": 0.93644123, "epoch": 3.3729997563154903, "grad_norm": 5.277482509613037, "learning_rate": 6.577809021823982e-07, "loss": 0.41536512, "memory(GiB)": 34.88, "step": 124575, "train_speed(iter/s)": 0.411181 }, { "acc": 0.95046101, "epoch": 3.3731351365987057, "grad_norm": 3.3340840339660645, "learning_rate": 6.57503687899869e-07, "loss": 0.27039154, "memory(GiB)": 34.88, "step": 124580, "train_speed(iter/s)": 0.411182 }, { "acc": 0.93527117, "epoch": 3.3732705168819215, "grad_norm": 8.041629791259766, "learning_rate": 6.572265280219031e-07, "loss": 0.38197436, "memory(GiB)": 34.88, "step": 124585, "train_speed(iter/s)": 0.411183 }, { "acc": 0.95889416, "epoch": 3.373405897165137, "grad_norm": 2.446478843688965, "learning_rate": 6.569494225519718e-07, "loss": 0.22018275, "memory(GiB)": 34.88, "step": 124590, "train_speed(iter/s)": 0.411184 }, { "acc": 0.95391617, "epoch": 3.3735412774483526, "grad_norm": 5.587855815887451, "learning_rate": 6.566723714935476e-07, "loss": 0.24823699, "memory(GiB)": 34.88, "step": 124595, "train_speed(iter/s)": 0.411185 }, { "acc": 0.9517601, "epoch": 3.373676657731568, "grad_norm": 11.526351928710938, "learning_rate": 6.563953748501e-07, "loss": 0.27832355, "memory(GiB)": 34.88, "step": 124600, "train_speed(iter/s)": 0.411186 }, { "acc": 0.93834009, "epoch": 3.3738120380147834, "grad_norm": 11.076995849609375, "learning_rate": 6.56118432625097e-07, "loss": 0.38487489, "memory(GiB)": 34.88, "step": 124605, "train_speed(iter/s)": 0.411187 }, { "acc": 0.95499363, "epoch": 3.373947418297999, "grad_norm": 11.646978378295898, "learning_rate": 6.558415448220118e-07, "loss": 0.30374086, "memory(GiB)": 34.88, "step": 124610, "train_speed(iter/s)": 0.411188 }, { "acc": 0.94815559, "epoch": 3.3740827985812145, "grad_norm": 2.1200478076934814, "learning_rate": 6.555647114443101e-07, "loss": 0.33275824, "memory(GiB)": 34.88, "step": 124615, "train_speed(iter/s)": 0.411188 }, { "acc": 0.94542694, "epoch": 3.3742181788644303, "grad_norm": 3.2047431468963623, "learning_rate": 6.552879324954601e-07, "loss": 0.3556361, "memory(GiB)": 34.88, "step": 124620, "train_speed(iter/s)": 0.411189 }, { "acc": 0.94959202, "epoch": 3.3743535591476457, "grad_norm": 5.045907497406006, "learning_rate": 6.550112079789303e-07, "loss": 0.29172316, "memory(GiB)": 34.88, "step": 124625, "train_speed(iter/s)": 0.41119 }, { "acc": 0.94448738, "epoch": 3.3744889394308615, "grad_norm": 12.04788589477539, "learning_rate": 6.547345378981873e-07, "loss": 0.34223287, "memory(GiB)": 34.88, "step": 124630, "train_speed(iter/s)": 0.411191 }, { "acc": 0.93792725, "epoch": 3.374624319714077, "grad_norm": 12.423055648803711, "learning_rate": 6.544579222566963e-07, "loss": 0.35317826, "memory(GiB)": 34.88, "step": 124635, "train_speed(iter/s)": 0.411192 }, { "acc": 0.94917488, "epoch": 3.374759699997292, "grad_norm": 9.820764541625977, "learning_rate": 6.541813610579198e-07, "loss": 0.30827091, "memory(GiB)": 34.88, "step": 124640, "train_speed(iter/s)": 0.411193 }, { "acc": 0.94637547, "epoch": 3.374895080280508, "grad_norm": 9.001895904541016, "learning_rate": 6.539048543053269e-07, "loss": 0.35534945, "memory(GiB)": 34.88, "step": 124645, "train_speed(iter/s)": 0.411194 }, { "acc": 0.93876495, "epoch": 3.3750304605637234, "grad_norm": 10.903669357299805, "learning_rate": 6.536284020023783e-07, "loss": 0.39033439, "memory(GiB)": 34.88, "step": 124650, "train_speed(iter/s)": 0.411195 }, { "acc": 0.95161467, "epoch": 3.375165840846939, "grad_norm": 5.639840126037598, "learning_rate": 6.533520041525377e-07, "loss": 0.2974822, "memory(GiB)": 34.88, "step": 124655, "train_speed(iter/s)": 0.411195 }, { "acc": 0.9367527, "epoch": 3.3753012211301545, "grad_norm": 5.883828163146973, "learning_rate": 6.530756607592687e-07, "loss": 0.34784679, "memory(GiB)": 34.88, "step": 124660, "train_speed(iter/s)": 0.411196 }, { "acc": 0.93489265, "epoch": 3.3754366014133703, "grad_norm": 14.527215957641602, "learning_rate": 6.52799371826033e-07, "loss": 0.46202092, "memory(GiB)": 34.88, "step": 124665, "train_speed(iter/s)": 0.411197 }, { "acc": 0.94678669, "epoch": 3.3755719816965857, "grad_norm": 10.078378677368164, "learning_rate": 6.525231373562898e-07, "loss": 0.29132614, "memory(GiB)": 34.88, "step": 124670, "train_speed(iter/s)": 0.411198 }, { "acc": 0.94434395, "epoch": 3.375707361979801, "grad_norm": 8.5090913772583, "learning_rate": 6.522469573535003e-07, "loss": 0.28162057, "memory(GiB)": 34.88, "step": 124675, "train_speed(iter/s)": 0.411199 }, { "acc": 0.94510469, "epoch": 3.375842742263017, "grad_norm": 7.071433067321777, "learning_rate": 6.519708318211253e-07, "loss": 0.32798045, "memory(GiB)": 34.88, "step": 124680, "train_speed(iter/s)": 0.4112 }, { "acc": 0.95578518, "epoch": 3.375978122546232, "grad_norm": 9.36388111114502, "learning_rate": 6.516947607626224e-07, "loss": 0.25388064, "memory(GiB)": 34.88, "step": 124685, "train_speed(iter/s)": 0.411201 }, { "acc": 0.93530827, "epoch": 3.376113502829448, "grad_norm": 13.832807540893555, "learning_rate": 6.514187441814511e-07, "loss": 0.33777463, "memory(GiB)": 34.88, "step": 124690, "train_speed(iter/s)": 0.411201 }, { "acc": 0.94443493, "epoch": 3.3762488831126634, "grad_norm": 6.032920837402344, "learning_rate": 6.511427820810667e-07, "loss": 0.32600317, "memory(GiB)": 34.88, "step": 124695, "train_speed(iter/s)": 0.411202 }, { "acc": 0.93875837, "epoch": 3.376384263395879, "grad_norm": 4.717850685119629, "learning_rate": 6.5086687446493e-07, "loss": 0.41813383, "memory(GiB)": 34.88, "step": 124700, "train_speed(iter/s)": 0.411203 }, { "acc": 0.94271221, "epoch": 3.3765196436790945, "grad_norm": 5.756227016448975, "learning_rate": 6.505910213364941e-07, "loss": 0.31023765, "memory(GiB)": 34.88, "step": 124705, "train_speed(iter/s)": 0.411204 }, { "acc": 0.95432568, "epoch": 3.3766550239623103, "grad_norm": 6.862554550170898, "learning_rate": 6.503152226992163e-07, "loss": 0.27776527, "memory(GiB)": 34.88, "step": 124710, "train_speed(iter/s)": 0.411205 }, { "acc": 0.9403183, "epoch": 3.3767904042455257, "grad_norm": 7.631837368011475, "learning_rate": 6.500394785565501e-07, "loss": 0.33876476, "memory(GiB)": 34.88, "step": 124715, "train_speed(iter/s)": 0.411206 }, { "acc": 0.95271606, "epoch": 3.3769257845287415, "grad_norm": 6.133078098297119, "learning_rate": 6.497637889119519e-07, "loss": 0.27046175, "memory(GiB)": 34.88, "step": 124720, "train_speed(iter/s)": 0.411207 }, { "acc": 0.9349802, "epoch": 3.377061164811957, "grad_norm": 22.020122528076172, "learning_rate": 6.494881537688743e-07, "loss": 0.42416706, "memory(GiB)": 34.88, "step": 124725, "train_speed(iter/s)": 0.411208 }, { "acc": 0.9514904, "epoch": 3.377196545095172, "grad_norm": 8.698872566223145, "learning_rate": 6.492125731307672e-07, "loss": 0.27114062, "memory(GiB)": 34.88, "step": 124730, "train_speed(iter/s)": 0.411209 }, { "acc": 0.95191422, "epoch": 3.377331925378388, "grad_norm": 5.76095724105835, "learning_rate": 6.489370470010872e-07, "loss": 0.26052625, "memory(GiB)": 34.88, "step": 124735, "train_speed(iter/s)": 0.411209 }, { "acc": 0.94169159, "epoch": 3.3774673056616034, "grad_norm": 8.655089378356934, "learning_rate": 6.486615753832838e-07, "loss": 0.40918064, "memory(GiB)": 34.88, "step": 124740, "train_speed(iter/s)": 0.41121 }, { "acc": 0.92939053, "epoch": 3.377602685944819, "grad_norm": 8.976351737976074, "learning_rate": 6.483861582808075e-07, "loss": 0.42323642, "memory(GiB)": 34.88, "step": 124745, "train_speed(iter/s)": 0.411211 }, { "acc": 0.96093102, "epoch": 3.3777380662280345, "grad_norm": 5.090633392333984, "learning_rate": 6.481107956971089e-07, "loss": 0.22418108, "memory(GiB)": 34.88, "step": 124750, "train_speed(iter/s)": 0.411212 }, { "acc": 0.95351486, "epoch": 3.3778734465112503, "grad_norm": 4.454720497131348, "learning_rate": 6.478354876356387e-07, "loss": 0.27896948, "memory(GiB)": 34.88, "step": 124755, "train_speed(iter/s)": 0.411213 }, { "acc": 0.94704113, "epoch": 3.3780088267944657, "grad_norm": 6.184124946594238, "learning_rate": 6.475602340998443e-07, "loss": 0.32821906, "memory(GiB)": 34.88, "step": 124760, "train_speed(iter/s)": 0.411213 }, { "acc": 0.94640093, "epoch": 3.378144207077681, "grad_norm": 7.691773891448975, "learning_rate": 6.472850350931725e-07, "loss": 0.30198781, "memory(GiB)": 34.88, "step": 124765, "train_speed(iter/s)": 0.411214 }, { "acc": 0.95736179, "epoch": 3.378279587360897, "grad_norm": 2.6999168395996094, "learning_rate": 6.470098906190736e-07, "loss": 0.1997071, "memory(GiB)": 34.88, "step": 124770, "train_speed(iter/s)": 0.411215 }, { "acc": 0.94190168, "epoch": 3.378414967644112, "grad_norm": 7.204959392547607, "learning_rate": 6.467348006809918e-07, "loss": 0.38273196, "memory(GiB)": 34.88, "step": 124775, "train_speed(iter/s)": 0.411216 }, { "acc": 0.95196228, "epoch": 3.378550347927328, "grad_norm": 4.937397003173828, "learning_rate": 6.464597652823748e-07, "loss": 0.28667049, "memory(GiB)": 34.88, "step": 124780, "train_speed(iter/s)": 0.411217 }, { "acc": 0.93719854, "epoch": 3.3786857282105434, "grad_norm": 5.761597633361816, "learning_rate": 6.461847844266672e-07, "loss": 0.4015964, "memory(GiB)": 34.88, "step": 124785, "train_speed(iter/s)": 0.411218 }, { "acc": 0.94932117, "epoch": 3.378821108493759, "grad_norm": 8.704249382019043, "learning_rate": 6.459098581173151e-07, "loss": 0.26443181, "memory(GiB)": 34.88, "step": 124790, "train_speed(iter/s)": 0.411219 }, { "acc": 0.94186306, "epoch": 3.3789564887769745, "grad_norm": 9.8024263381958, "learning_rate": 6.4563498635776e-07, "loss": 0.39133444, "memory(GiB)": 34.88, "step": 124795, "train_speed(iter/s)": 0.41122 }, { "acc": 0.94557552, "epoch": 3.37909186906019, "grad_norm": 2.956333875656128, "learning_rate": 6.453601691514468e-07, "loss": 0.30357089, "memory(GiB)": 34.88, "step": 124800, "train_speed(iter/s)": 0.411221 }, { "acc": 0.95363922, "epoch": 3.3792272493434057, "grad_norm": 5.863541603088379, "learning_rate": 6.450854065018186e-07, "loss": 0.2862186, "memory(GiB)": 34.88, "step": 124805, "train_speed(iter/s)": 0.411221 }, { "acc": 0.93718739, "epoch": 3.379362629626621, "grad_norm": 5.6021928787231445, "learning_rate": 6.448106984123155e-07, "loss": 0.43568926, "memory(GiB)": 34.88, "step": 124810, "train_speed(iter/s)": 0.411222 }, { "acc": 0.95856266, "epoch": 3.379498009909837, "grad_norm": 8.042332649230957, "learning_rate": 6.445360448863797e-07, "loss": 0.28815722, "memory(GiB)": 34.88, "step": 124815, "train_speed(iter/s)": 0.411223 }, { "acc": 0.94899044, "epoch": 3.379633390193052, "grad_norm": 7.181100368499756, "learning_rate": 6.442614459274521e-07, "loss": 0.34299648, "memory(GiB)": 34.88, "step": 124820, "train_speed(iter/s)": 0.411224 }, { "acc": 0.94353056, "epoch": 3.379768770476268, "grad_norm": 6.940134525299072, "learning_rate": 6.43986901538973e-07, "loss": 0.3314621, "memory(GiB)": 34.88, "step": 124825, "train_speed(iter/s)": 0.411225 }, { "acc": 0.95358706, "epoch": 3.3799041507594834, "grad_norm": 3.0778427124023438, "learning_rate": 6.437124117243803e-07, "loss": 0.29943795, "memory(GiB)": 34.88, "step": 124830, "train_speed(iter/s)": 0.411226 }, { "acc": 0.9527998, "epoch": 3.3800395310426987, "grad_norm": 5.601136684417725, "learning_rate": 6.434379764871133e-07, "loss": 0.29392052, "memory(GiB)": 34.88, "step": 124835, "train_speed(iter/s)": 0.411226 }, { "acc": 0.93378325, "epoch": 3.3801749113259145, "grad_norm": 8.01257610321045, "learning_rate": 6.431635958306097e-07, "loss": 0.49722934, "memory(GiB)": 34.88, "step": 124840, "train_speed(iter/s)": 0.411227 }, { "acc": 0.94890528, "epoch": 3.38031029160913, "grad_norm": 7.233522415161133, "learning_rate": 6.428892697583077e-07, "loss": 0.32917252, "memory(GiB)": 34.88, "step": 124845, "train_speed(iter/s)": 0.411228 }, { "acc": 0.93422956, "epoch": 3.3804456718923457, "grad_norm": 11.733835220336914, "learning_rate": 6.426149982736421e-07, "loss": 0.45502911, "memory(GiB)": 34.88, "step": 124850, "train_speed(iter/s)": 0.411229 }, { "acc": 0.94101706, "epoch": 3.380581052175561, "grad_norm": 6.984171390533447, "learning_rate": 6.423407813800485e-07, "loss": 0.33366587, "memory(GiB)": 34.88, "step": 124855, "train_speed(iter/s)": 0.41123 }, { "acc": 0.94203835, "epoch": 3.380716432458777, "grad_norm": 7.816674709320068, "learning_rate": 6.420666190809645e-07, "loss": 0.39021194, "memory(GiB)": 34.88, "step": 124860, "train_speed(iter/s)": 0.411231 }, { "acc": 0.92252483, "epoch": 3.380851812741992, "grad_norm": 3.5169739723205566, "learning_rate": 6.417925113798219e-07, "loss": 0.48871088, "memory(GiB)": 34.88, "step": 124865, "train_speed(iter/s)": 0.411232 }, { "acc": 0.9408802, "epoch": 3.3809871930252076, "grad_norm": 10.05351734161377, "learning_rate": 6.415184582800556e-07, "loss": 0.38962972, "memory(GiB)": 34.88, "step": 124870, "train_speed(iter/s)": 0.411233 }, { "acc": 0.93331738, "epoch": 3.3811225733084234, "grad_norm": 6.249307632446289, "learning_rate": 6.412444597850983e-07, "loss": 0.41727209, "memory(GiB)": 34.88, "step": 124875, "train_speed(iter/s)": 0.411233 }, { "acc": 0.94174538, "epoch": 3.3812579535916387, "grad_norm": 6.48972749710083, "learning_rate": 6.409705158983832e-07, "loss": 0.40795712, "memory(GiB)": 34.88, "step": 124880, "train_speed(iter/s)": 0.411234 }, { "acc": 0.93600025, "epoch": 3.3813933338748545, "grad_norm": 4.795945644378662, "learning_rate": 6.406966266233412e-07, "loss": 0.3725872, "memory(GiB)": 34.88, "step": 124885, "train_speed(iter/s)": 0.411235 }, { "acc": 0.94081287, "epoch": 3.38152871415807, "grad_norm": 12.812475204467773, "learning_rate": 6.404227919634024e-07, "loss": 0.38658488, "memory(GiB)": 34.88, "step": 124890, "train_speed(iter/s)": 0.411236 }, { "acc": 0.95813522, "epoch": 3.3816640944412857, "grad_norm": 6.508782863616943, "learning_rate": 6.401490119219998e-07, "loss": 0.27356148, "memory(GiB)": 34.88, "step": 124895, "train_speed(iter/s)": 0.411237 }, { "acc": 0.95313406, "epoch": 3.381799474724501, "grad_norm": 4.952751636505127, "learning_rate": 6.398752865025604e-07, "loss": 0.33011026, "memory(GiB)": 34.88, "step": 124900, "train_speed(iter/s)": 0.411238 }, { "acc": 0.94868593, "epoch": 3.381934855007717, "grad_norm": 6.819289684295654, "learning_rate": 6.396016157085139e-07, "loss": 0.31203351, "memory(GiB)": 34.88, "step": 124905, "train_speed(iter/s)": 0.411238 }, { "acc": 0.95286121, "epoch": 3.382070235290932, "grad_norm": 7.518986225128174, "learning_rate": 6.393279995432894e-07, "loss": 0.28120956, "memory(GiB)": 34.88, "step": 124910, "train_speed(iter/s)": 0.411239 }, { "acc": 0.94372978, "epoch": 3.382205615574148, "grad_norm": 8.161857604980469, "learning_rate": 6.390544380103144e-07, "loss": 0.33166513, "memory(GiB)": 34.88, "step": 124915, "train_speed(iter/s)": 0.41124 }, { "acc": 0.94978447, "epoch": 3.3823409958573634, "grad_norm": 10.497941970825195, "learning_rate": 6.38780931113014e-07, "loss": 0.31432807, "memory(GiB)": 34.88, "step": 124920, "train_speed(iter/s)": 0.411241 }, { "acc": 0.95437164, "epoch": 3.3824763761405787, "grad_norm": 10.15594482421875, "learning_rate": 6.385074788548164e-07, "loss": 0.22798624, "memory(GiB)": 34.88, "step": 124925, "train_speed(iter/s)": 0.411242 }, { "acc": 0.94806547, "epoch": 3.3826117564237945, "grad_norm": 9.364723205566406, "learning_rate": 6.382340812391468e-07, "loss": 0.26720018, "memory(GiB)": 34.88, "step": 124930, "train_speed(iter/s)": 0.411243 }, { "acc": 0.95213852, "epoch": 3.38274713670701, "grad_norm": 9.839925765991211, "learning_rate": 6.379607382694287e-07, "loss": 0.31126728, "memory(GiB)": 34.88, "step": 124935, "train_speed(iter/s)": 0.411244 }, { "acc": 0.95201626, "epoch": 3.3828825169902257, "grad_norm": 3.528519630432129, "learning_rate": 6.376874499490876e-07, "loss": 0.29041467, "memory(GiB)": 34.88, "step": 124940, "train_speed(iter/s)": 0.411244 }, { "acc": 0.93457384, "epoch": 3.383017897273441, "grad_norm": 15.57636547088623, "learning_rate": 6.374142162815464e-07, "loss": 0.38320906, "memory(GiB)": 34.88, "step": 124945, "train_speed(iter/s)": 0.411245 }, { "acc": 0.94812889, "epoch": 3.383153277556657, "grad_norm": 5.557345390319824, "learning_rate": 6.371410372702296e-07, "loss": 0.37293456, "memory(GiB)": 34.88, "step": 124950, "train_speed(iter/s)": 0.411246 }, { "acc": 0.94504242, "epoch": 3.383288657839872, "grad_norm": 6.20056676864624, "learning_rate": 6.368679129185559e-07, "loss": 0.33854134, "memory(GiB)": 34.88, "step": 124955, "train_speed(iter/s)": 0.411247 }, { "acc": 0.95972137, "epoch": 3.3834240381230876, "grad_norm": 7.4834418296813965, "learning_rate": 6.365948432299491e-07, "loss": 0.25535409, "memory(GiB)": 34.88, "step": 124960, "train_speed(iter/s)": 0.411248 }, { "acc": 0.93268929, "epoch": 3.3835594184063034, "grad_norm": 11.599303245544434, "learning_rate": 6.363218282078293e-07, "loss": 0.4425467, "memory(GiB)": 34.88, "step": 124965, "train_speed(iter/s)": 0.411249 }, { "acc": 0.94728584, "epoch": 3.3836947986895187, "grad_norm": 6.650190830230713, "learning_rate": 6.360488678556183e-07, "loss": 0.28814721, "memory(GiB)": 34.88, "step": 124970, "train_speed(iter/s)": 0.41125 }, { "acc": 0.95021877, "epoch": 3.3838301789727345, "grad_norm": 11.282599449157715, "learning_rate": 6.35775962176732e-07, "loss": 0.23364301, "memory(GiB)": 34.88, "step": 124975, "train_speed(iter/s)": 0.41125 }, { "acc": 0.93309746, "epoch": 3.38396555925595, "grad_norm": 12.340194702148438, "learning_rate": 6.355031111745914e-07, "loss": 0.40205016, "memory(GiB)": 34.88, "step": 124980, "train_speed(iter/s)": 0.411251 }, { "acc": 0.94660997, "epoch": 3.3841009395391657, "grad_norm": 4.313121318817139, "learning_rate": 6.352303148526148e-07, "loss": 0.3788161, "memory(GiB)": 34.88, "step": 124985, "train_speed(iter/s)": 0.411252 }, { "acc": 0.94689674, "epoch": 3.384236319822381, "grad_norm": 4.573391914367676, "learning_rate": 6.349575732142182e-07, "loss": 0.32714472, "memory(GiB)": 34.88, "step": 124990, "train_speed(iter/s)": 0.411253 }, { "acc": 0.93651953, "epoch": 3.3843717001055964, "grad_norm": 5.551401138305664, "learning_rate": 6.346848862628184e-07, "loss": 0.34101114, "memory(GiB)": 34.88, "step": 124995, "train_speed(iter/s)": 0.411254 }, { "acc": 0.94557343, "epoch": 3.384507080388812, "grad_norm": 5.642373085021973, "learning_rate": 6.344122540018313e-07, "loss": 0.3247412, "memory(GiB)": 34.88, "step": 125000, "train_speed(iter/s)": 0.411255 }, { "acc": 0.93466501, "epoch": 3.3846424606720276, "grad_norm": 4.439285755157471, "learning_rate": 6.341396764346744e-07, "loss": 0.36913037, "memory(GiB)": 34.88, "step": 125005, "train_speed(iter/s)": 0.411256 }, { "acc": 0.9290554, "epoch": 3.3847778409552434, "grad_norm": 7.129976272583008, "learning_rate": 6.33867153564759e-07, "loss": 0.4399559, "memory(GiB)": 34.88, "step": 125010, "train_speed(iter/s)": 0.411256 }, { "acc": 0.93741989, "epoch": 3.3849132212384587, "grad_norm": 4.218405246734619, "learning_rate": 6.335946853955004e-07, "loss": 0.35239682, "memory(GiB)": 34.88, "step": 125015, "train_speed(iter/s)": 0.411257 }, { "acc": 0.93111935, "epoch": 3.3850486015216745, "grad_norm": 10.95753002166748, "learning_rate": 6.333222719303128e-07, "loss": 0.44785223, "memory(GiB)": 34.88, "step": 125020, "train_speed(iter/s)": 0.411258 }, { "acc": 0.95010748, "epoch": 3.38518398180489, "grad_norm": 5.017407417297363, "learning_rate": 6.330499131726068e-07, "loss": 0.36879334, "memory(GiB)": 34.88, "step": 125025, "train_speed(iter/s)": 0.411259 }, { "acc": 0.95072899, "epoch": 3.3853193620881052, "grad_norm": 7.713922023773193, "learning_rate": 6.327776091257952e-07, "loss": 0.27044685, "memory(GiB)": 34.88, "step": 125030, "train_speed(iter/s)": 0.41126 }, { "acc": 0.94879169, "epoch": 3.385454742371321, "grad_norm": 6.819278240203857, "learning_rate": 6.325053597932893e-07, "loss": 0.25164714, "memory(GiB)": 34.88, "step": 125035, "train_speed(iter/s)": 0.411261 }, { "acc": 0.94798336, "epoch": 3.3855901226545364, "grad_norm": 4.905946731567383, "learning_rate": 6.322331651785002e-07, "loss": 0.29395058, "memory(GiB)": 34.88, "step": 125040, "train_speed(iter/s)": 0.411262 }, { "acc": 0.94525366, "epoch": 3.385725502937752, "grad_norm": 9.170605659484863, "learning_rate": 6.319610252848344e-07, "loss": 0.34347692, "memory(GiB)": 34.88, "step": 125045, "train_speed(iter/s)": 0.411263 }, { "acc": 0.94978638, "epoch": 3.3858608832209676, "grad_norm": 3.7807726860046387, "learning_rate": 6.316889401157064e-07, "loss": 0.25992708, "memory(GiB)": 34.88, "step": 125050, "train_speed(iter/s)": 0.411264 }, { "acc": 0.93701534, "epoch": 3.3859962635041834, "grad_norm": 8.599111557006836, "learning_rate": 6.31416909674521e-07, "loss": 0.32615042, "memory(GiB)": 34.88, "step": 125055, "train_speed(iter/s)": 0.411265 }, { "acc": 0.94254189, "epoch": 3.3861316437873987, "grad_norm": 6.799006938934326, "learning_rate": 6.311449339646854e-07, "loss": 0.31569173, "memory(GiB)": 34.88, "step": 125060, "train_speed(iter/s)": 0.411266 }, { "acc": 0.93609352, "epoch": 3.3862670240706145, "grad_norm": 8.920272827148438, "learning_rate": 6.308730129896082e-07, "loss": 0.42265458, "memory(GiB)": 34.88, "step": 125065, "train_speed(iter/s)": 0.411267 }, { "acc": 0.95228691, "epoch": 3.38640240435383, "grad_norm": 9.11217212677002, "learning_rate": 6.306011467526949e-07, "loss": 0.23130031, "memory(GiB)": 34.88, "step": 125070, "train_speed(iter/s)": 0.411267 }, { "acc": 0.94092178, "epoch": 3.3865377846370457, "grad_norm": 9.94756031036377, "learning_rate": 6.303293352573529e-07, "loss": 0.34494452, "memory(GiB)": 34.88, "step": 125075, "train_speed(iter/s)": 0.411268 }, { "acc": 0.95351896, "epoch": 3.386673164920261, "grad_norm": 7.599008083343506, "learning_rate": 6.300575785069833e-07, "loss": 0.29495726, "memory(GiB)": 34.88, "step": 125080, "train_speed(iter/s)": 0.411269 }, { "acc": 0.94908123, "epoch": 3.3868085452034764, "grad_norm": 12.71805191040039, "learning_rate": 6.297858765049952e-07, "loss": 0.27708187, "memory(GiB)": 34.88, "step": 125085, "train_speed(iter/s)": 0.41127 }, { "acc": 0.93705692, "epoch": 3.386943925486692, "grad_norm": 12.350244522094727, "learning_rate": 6.295142292547891e-07, "loss": 0.36339927, "memory(GiB)": 34.88, "step": 125090, "train_speed(iter/s)": 0.411271 }, { "acc": 0.9322155, "epoch": 3.3870793057699076, "grad_norm": 13.975432395935059, "learning_rate": 6.292426367597697e-07, "loss": 0.47126908, "memory(GiB)": 34.88, "step": 125095, "train_speed(iter/s)": 0.411272 }, { "acc": 0.94919014, "epoch": 3.3872146860531234, "grad_norm": 14.57386302947998, "learning_rate": 6.289710990233376e-07, "loss": 0.3235276, "memory(GiB)": 34.88, "step": 125100, "train_speed(iter/s)": 0.411273 }, { "acc": 0.93534527, "epoch": 3.3873500663363387, "grad_norm": 8.125648498535156, "learning_rate": 6.286996160488943e-07, "loss": 0.42121348, "memory(GiB)": 34.88, "step": 125105, "train_speed(iter/s)": 0.411274 }, { "acc": 0.94360056, "epoch": 3.3874854466195545, "grad_norm": 7.988174915313721, "learning_rate": 6.284281878398431e-07, "loss": 0.35182509, "memory(GiB)": 34.88, "step": 125110, "train_speed(iter/s)": 0.411275 }, { "acc": 0.93755207, "epoch": 3.38762082690277, "grad_norm": 8.775655746459961, "learning_rate": 6.281568143995799e-07, "loss": 0.35547905, "memory(GiB)": 34.88, "step": 125115, "train_speed(iter/s)": 0.411275 }, { "acc": 0.94652719, "epoch": 3.3877562071859852, "grad_norm": 2.29874324798584, "learning_rate": 6.278854957315092e-07, "loss": 0.35630124, "memory(GiB)": 34.88, "step": 125120, "train_speed(iter/s)": 0.411276 }, { "acc": 0.94159031, "epoch": 3.387891587469201, "grad_norm": 11.059167861938477, "learning_rate": 6.276142318390261e-07, "loss": 0.34525766, "memory(GiB)": 34.88, "step": 125125, "train_speed(iter/s)": 0.411277 }, { "acc": 0.95166388, "epoch": 3.3880269677524164, "grad_norm": 5.963088512420654, "learning_rate": 6.273430227255312e-07, "loss": 0.30795898, "memory(GiB)": 34.88, "step": 125130, "train_speed(iter/s)": 0.411278 }, { "acc": 0.95124788, "epoch": 3.388162348035632, "grad_norm": 5.187810897827148, "learning_rate": 6.270718683944182e-07, "loss": 0.27036753, "memory(GiB)": 34.88, "step": 125135, "train_speed(iter/s)": 0.411279 }, { "acc": 0.95349216, "epoch": 3.3882977283188476, "grad_norm": 3.781140089035034, "learning_rate": 6.268007688490882e-07, "loss": 0.31766651, "memory(GiB)": 34.88, "step": 125140, "train_speed(iter/s)": 0.41128 }, { "acc": 0.94709558, "epoch": 3.3884331086020634, "grad_norm": 4.530124664306641, "learning_rate": 6.265297240929359e-07, "loss": 0.31360207, "memory(GiB)": 34.88, "step": 125145, "train_speed(iter/s)": 0.411281 }, { "acc": 0.95312338, "epoch": 3.3885684888852787, "grad_norm": 8.918173789978027, "learning_rate": 6.262587341293537e-07, "loss": 0.2415278, "memory(GiB)": 34.88, "step": 125150, "train_speed(iter/s)": 0.411282 }, { "acc": 0.93990707, "epoch": 3.388703869168494, "grad_norm": 6.010678768157959, "learning_rate": 6.259877989617414e-07, "loss": 0.40141764, "memory(GiB)": 34.88, "step": 125155, "train_speed(iter/s)": 0.411283 }, { "acc": 0.95392084, "epoch": 3.38883924945171, "grad_norm": 9.584951400756836, "learning_rate": 6.257169185934888e-07, "loss": 0.2862844, "memory(GiB)": 34.88, "step": 125160, "train_speed(iter/s)": 0.411283 }, { "acc": 0.93982639, "epoch": 3.3889746297349252, "grad_norm": 6.067535877227783, "learning_rate": 6.254460930279918e-07, "loss": 0.39490707, "memory(GiB)": 34.88, "step": 125165, "train_speed(iter/s)": 0.411284 }, { "acc": 0.94166069, "epoch": 3.389110010018141, "grad_norm": 7.225834369659424, "learning_rate": 6.251753222686398e-07, "loss": 0.37752934, "memory(GiB)": 34.88, "step": 125170, "train_speed(iter/s)": 0.411285 }, { "acc": 0.93680182, "epoch": 3.3892453903013564, "grad_norm": 12.041324615478516, "learning_rate": 6.249046063188295e-07, "loss": 0.41471963, "memory(GiB)": 34.88, "step": 125175, "train_speed(iter/s)": 0.411286 }, { "acc": 0.92986584, "epoch": 3.389380770584572, "grad_norm": 13.236589431762695, "learning_rate": 6.246339451819488e-07, "loss": 0.45593052, "memory(GiB)": 34.88, "step": 125180, "train_speed(iter/s)": 0.411287 }, { "acc": 0.9546814, "epoch": 3.3895161508677876, "grad_norm": 8.274256706237793, "learning_rate": 6.243633388613874e-07, "loss": 0.26385279, "memory(GiB)": 34.88, "step": 125185, "train_speed(iter/s)": 0.411287 }, { "acc": 0.95341682, "epoch": 3.389651531151003, "grad_norm": 7.835565090179443, "learning_rate": 6.240927873605385e-07, "loss": 0.25439775, "memory(GiB)": 34.88, "step": 125190, "train_speed(iter/s)": 0.411288 }, { "acc": 0.94498177, "epoch": 3.3897869114342187, "grad_norm": 5.540281772613525, "learning_rate": 6.238222906827888e-07, "loss": 0.30453112, "memory(GiB)": 34.88, "step": 125195, "train_speed(iter/s)": 0.411289 }, { "acc": 0.93690739, "epoch": 3.389922291717434, "grad_norm": 18.61016082763672, "learning_rate": 6.235518488315289e-07, "loss": 0.40856647, "memory(GiB)": 34.88, "step": 125200, "train_speed(iter/s)": 0.41129 }, { "acc": 0.95279016, "epoch": 3.39005767200065, "grad_norm": 5.539529323577881, "learning_rate": 6.232814618101428e-07, "loss": 0.23569078, "memory(GiB)": 34.88, "step": 125205, "train_speed(iter/s)": 0.411291 }, { "acc": 0.94756432, "epoch": 3.3901930522838652, "grad_norm": 34.19127655029297, "learning_rate": 6.23011129622022e-07, "loss": 0.34348631, "memory(GiB)": 34.88, "step": 125210, "train_speed(iter/s)": 0.411292 }, { "acc": 0.95201063, "epoch": 3.390328432567081, "grad_norm": 8.679317474365234, "learning_rate": 6.227408522705504e-07, "loss": 0.3040375, "memory(GiB)": 34.88, "step": 125215, "train_speed(iter/s)": 0.411293 }, { "acc": 0.93801079, "epoch": 3.3904638128502964, "grad_norm": 7.7473955154418945, "learning_rate": 6.224706297591143e-07, "loss": 0.36985402, "memory(GiB)": 34.88, "step": 125220, "train_speed(iter/s)": 0.411293 }, { "acc": 0.94732571, "epoch": 3.390599193133512, "grad_norm": 3.119886875152588, "learning_rate": 6.222004620910997e-07, "loss": 0.33296375, "memory(GiB)": 34.88, "step": 125225, "train_speed(iter/s)": 0.411294 }, { "acc": 0.94225569, "epoch": 3.3907345734167276, "grad_norm": 7.146740913391113, "learning_rate": 6.219303492698892e-07, "loss": 0.33005219, "memory(GiB)": 34.88, "step": 125230, "train_speed(iter/s)": 0.411295 }, { "acc": 0.95274448, "epoch": 3.3908699536999434, "grad_norm": 2.9598541259765625, "learning_rate": 6.216602912988687e-07, "loss": 0.29531684, "memory(GiB)": 34.88, "step": 125235, "train_speed(iter/s)": 0.411296 }, { "acc": 0.95740643, "epoch": 3.3910053339831587, "grad_norm": 3.8390629291534424, "learning_rate": 6.213902881814174e-07, "loss": 0.21959386, "memory(GiB)": 34.88, "step": 125240, "train_speed(iter/s)": 0.411296 }, { "acc": 0.95552082, "epoch": 3.391140714266374, "grad_norm": 6.980128765106201, "learning_rate": 6.211203399209228e-07, "loss": 0.2953552, "memory(GiB)": 34.88, "step": 125245, "train_speed(iter/s)": 0.411297 }, { "acc": 0.94383116, "epoch": 3.39127609454959, "grad_norm": 16.94019889831543, "learning_rate": 6.208504465207629e-07, "loss": 0.33811831, "memory(GiB)": 34.88, "step": 125250, "train_speed(iter/s)": 0.411298 }, { "acc": 0.94701939, "epoch": 3.3914114748328053, "grad_norm": 4.985980987548828, "learning_rate": 6.205806079843204e-07, "loss": 0.34646404, "memory(GiB)": 34.88, "step": 125255, "train_speed(iter/s)": 0.411299 }, { "acc": 0.93843307, "epoch": 3.391546855116021, "grad_norm": 8.669085502624512, "learning_rate": 6.203108243149725e-07, "loss": 0.40081854, "memory(GiB)": 34.88, "step": 125260, "train_speed(iter/s)": 0.4113 }, { "acc": 0.937677, "epoch": 3.3916822353992364, "grad_norm": 8.493415832519531, "learning_rate": 6.200410955161036e-07, "loss": 0.3986665, "memory(GiB)": 34.88, "step": 125265, "train_speed(iter/s)": 0.411301 }, { "acc": 0.94368935, "epoch": 3.391817615682452, "grad_norm": 5.386218547821045, "learning_rate": 6.1977142159109e-07, "loss": 0.36571598, "memory(GiB)": 34.88, "step": 125270, "train_speed(iter/s)": 0.411302 }, { "acc": 0.93929186, "epoch": 3.3919529959656676, "grad_norm": 10.943567276000977, "learning_rate": 6.195018025433078e-07, "loss": 0.36236041, "memory(GiB)": 34.88, "step": 125275, "train_speed(iter/s)": 0.411302 }, { "acc": 0.94568605, "epoch": 3.392088376248883, "grad_norm": 3.6580605506896973, "learning_rate": 6.192322383761386e-07, "loss": 0.30647628, "memory(GiB)": 34.88, "step": 125280, "train_speed(iter/s)": 0.411303 }, { "acc": 0.94600124, "epoch": 3.3922237565320987, "grad_norm": 10.07713508605957, "learning_rate": 6.189627290929563e-07, "loss": 0.36222548, "memory(GiB)": 34.88, "step": 125285, "train_speed(iter/s)": 0.411304 }, { "acc": 0.94562206, "epoch": 3.392359136815314, "grad_norm": 4.925215721130371, "learning_rate": 6.186932746971393e-07, "loss": 0.3280951, "memory(GiB)": 34.88, "step": 125290, "train_speed(iter/s)": 0.411305 }, { "acc": 0.93198071, "epoch": 3.39249451709853, "grad_norm": 9.036397933959961, "learning_rate": 6.184238751920593e-07, "loss": 0.43966494, "memory(GiB)": 34.88, "step": 125295, "train_speed(iter/s)": 0.411306 }, { "acc": 0.94223499, "epoch": 3.3926298973817453, "grad_norm": 4.31500244140625, "learning_rate": 6.181545305810962e-07, "loss": 0.37426291, "memory(GiB)": 34.88, "step": 125300, "train_speed(iter/s)": 0.411307 }, { "acc": 0.95226097, "epoch": 3.392765277664961, "grad_norm": 10.940863609313965, "learning_rate": 6.178852408676207e-07, "loss": 0.27425919, "memory(GiB)": 34.88, "step": 125305, "train_speed(iter/s)": 0.411307 }, { "acc": 0.93861523, "epoch": 3.3929006579481764, "grad_norm": 16.829788208007812, "learning_rate": 6.176160060550051e-07, "loss": 0.40055971, "memory(GiB)": 34.88, "step": 125310, "train_speed(iter/s)": 0.411308 }, { "acc": 0.95167341, "epoch": 3.3930360382313918, "grad_norm": 5.769399642944336, "learning_rate": 6.173468261466264e-07, "loss": 0.34823856, "memory(GiB)": 34.88, "step": 125315, "train_speed(iter/s)": 0.411309 }, { "acc": 0.95125866, "epoch": 3.3931714185146076, "grad_norm": 3.665001392364502, "learning_rate": 6.170777011458526e-07, "loss": 0.2858603, "memory(GiB)": 34.88, "step": 125320, "train_speed(iter/s)": 0.41131 }, { "acc": 0.95251226, "epoch": 3.393306798797823, "grad_norm": 3.6694467067718506, "learning_rate": 6.168086310560575e-07, "loss": 0.32657762, "memory(GiB)": 34.88, "step": 125325, "train_speed(iter/s)": 0.41131 }, { "acc": 0.94459076, "epoch": 3.3934421790810387, "grad_norm": 8.920963287353516, "learning_rate": 6.165396158806086e-07, "loss": 0.33805115, "memory(GiB)": 34.88, "step": 125330, "train_speed(iter/s)": 0.411311 }, { "acc": 0.9422348, "epoch": 3.393577559364254, "grad_norm": 19.079931259155273, "learning_rate": 6.162706556228805e-07, "loss": 0.31435781, "memory(GiB)": 34.88, "step": 125335, "train_speed(iter/s)": 0.411312 }, { "acc": 0.95624456, "epoch": 3.39371293964747, "grad_norm": 7.847458362579346, "learning_rate": 6.160017502862385e-07, "loss": 0.28097432, "memory(GiB)": 34.88, "step": 125340, "train_speed(iter/s)": 0.411313 }, { "acc": 0.9465992, "epoch": 3.3938483199306853, "grad_norm": 3.5320754051208496, "learning_rate": 6.157328998740526e-07, "loss": 0.31531696, "memory(GiB)": 34.88, "step": 125345, "train_speed(iter/s)": 0.411314 }, { "acc": 0.94784927, "epoch": 3.3939837002139006, "grad_norm": 7.9002299308776855, "learning_rate": 6.154641043896917e-07, "loss": 0.38806357, "memory(GiB)": 34.88, "step": 125350, "train_speed(iter/s)": 0.411315 }, { "acc": 0.93301325, "epoch": 3.3941190804971164, "grad_norm": 6.7839202880859375, "learning_rate": 6.151953638365212e-07, "loss": 0.40386114, "memory(GiB)": 34.88, "step": 125355, "train_speed(iter/s)": 0.411315 }, { "acc": 0.94867048, "epoch": 3.3942544607803318, "grad_norm": 1.6298789978027344, "learning_rate": 6.149266782179092e-07, "loss": 0.25673196, "memory(GiB)": 34.88, "step": 125360, "train_speed(iter/s)": 0.411316 }, { "acc": 0.94585857, "epoch": 3.3943898410635476, "grad_norm": 8.730979919433594, "learning_rate": 6.146580475372183e-07, "loss": 0.31666951, "memory(GiB)": 34.88, "step": 125365, "train_speed(iter/s)": 0.411317 }, { "acc": 0.94049721, "epoch": 3.394525221346763, "grad_norm": 4.879944324493408, "learning_rate": 6.143894717978188e-07, "loss": 0.25933971, "memory(GiB)": 34.88, "step": 125370, "train_speed(iter/s)": 0.411318 }, { "acc": 0.95339336, "epoch": 3.3946606016299787, "grad_norm": 5.827914714813232, "learning_rate": 6.141209510030708e-07, "loss": 0.28210118, "memory(GiB)": 34.88, "step": 125375, "train_speed(iter/s)": 0.411319 }, { "acc": 0.94581318, "epoch": 3.394795981913194, "grad_norm": 4.277023792266846, "learning_rate": 6.138524851563398e-07, "loss": 0.29034743, "memory(GiB)": 34.88, "step": 125380, "train_speed(iter/s)": 0.41132 }, { "acc": 0.94883575, "epoch": 3.39493136219641, "grad_norm": 7.231595516204834, "learning_rate": 6.135840742609888e-07, "loss": 0.31235204, "memory(GiB)": 34.88, "step": 125385, "train_speed(iter/s)": 0.41132 }, { "acc": 0.95527201, "epoch": 3.3950667424796253, "grad_norm": 3.0050885677337646, "learning_rate": 6.13315718320381e-07, "loss": 0.26604166, "memory(GiB)": 34.88, "step": 125390, "train_speed(iter/s)": 0.411321 }, { "acc": 0.94584579, "epoch": 3.395202122762841, "grad_norm": 11.964879989624023, "learning_rate": 6.130474173378772e-07, "loss": 0.31997433, "memory(GiB)": 34.88, "step": 125395, "train_speed(iter/s)": 0.411322 }, { "acc": 0.94577131, "epoch": 3.3953375030460564, "grad_norm": 6.528619766235352, "learning_rate": 6.127791713168359e-07, "loss": 0.25732942, "memory(GiB)": 34.88, "step": 125400, "train_speed(iter/s)": 0.411323 }, { "acc": 0.94092102, "epoch": 3.3954728833292718, "grad_norm": 4.706399917602539, "learning_rate": 6.12510980260622e-07, "loss": 0.36790111, "memory(GiB)": 34.88, "step": 125405, "train_speed(iter/s)": 0.411324 }, { "acc": 0.94163837, "epoch": 3.3956082636124876, "grad_norm": 3.336116075515747, "learning_rate": 6.122428441725924e-07, "loss": 0.32681773, "memory(GiB)": 34.88, "step": 125410, "train_speed(iter/s)": 0.411325 }, { "acc": 0.94705696, "epoch": 3.395743643895703, "grad_norm": 8.863679885864258, "learning_rate": 6.119747630561059e-07, "loss": 0.30141256, "memory(GiB)": 34.88, "step": 125415, "train_speed(iter/s)": 0.411326 }, { "acc": 0.94674225, "epoch": 3.3958790241789187, "grad_norm": 7.136768341064453, "learning_rate": 6.117067369145221e-07, "loss": 0.35321753, "memory(GiB)": 34.88, "step": 125420, "train_speed(iter/s)": 0.411327 }, { "acc": 0.93264332, "epoch": 3.396014404462134, "grad_norm": 4.843228816986084, "learning_rate": 6.114387657511986e-07, "loss": 0.43113203, "memory(GiB)": 34.88, "step": 125425, "train_speed(iter/s)": 0.411328 }, { "acc": 0.94307089, "epoch": 3.39614978474535, "grad_norm": 6.15245246887207, "learning_rate": 6.111708495694913e-07, "loss": 0.34747825, "memory(GiB)": 34.88, "step": 125430, "train_speed(iter/s)": 0.411328 }, { "acc": 0.94818077, "epoch": 3.3962851650285653, "grad_norm": 4.371662139892578, "learning_rate": 6.109029883727547e-07, "loss": 0.31059997, "memory(GiB)": 34.88, "step": 125435, "train_speed(iter/s)": 0.411329 }, { "acc": 0.94151545, "epoch": 3.3964205453117806, "grad_norm": 5.968609809875488, "learning_rate": 6.106351821643485e-07, "loss": 0.34042497, "memory(GiB)": 34.88, "step": 125440, "train_speed(iter/s)": 0.41133 }, { "acc": 0.95056591, "epoch": 3.3965559255949964, "grad_norm": 11.047333717346191, "learning_rate": 6.103674309476242e-07, "loss": 0.3309628, "memory(GiB)": 34.88, "step": 125445, "train_speed(iter/s)": 0.411331 }, { "acc": 0.94358692, "epoch": 3.3966913058782118, "grad_norm": 10.980634689331055, "learning_rate": 6.100997347259368e-07, "loss": 0.33115258, "memory(GiB)": 34.88, "step": 125450, "train_speed(iter/s)": 0.411332 }, { "acc": 0.94000416, "epoch": 3.3968266861614276, "grad_norm": 10.031854629516602, "learning_rate": 6.098320935026399e-07, "loss": 0.40307617, "memory(GiB)": 34.88, "step": 125455, "train_speed(iter/s)": 0.411333 }, { "acc": 0.96089954, "epoch": 3.396962066444643, "grad_norm": 11.216229438781738, "learning_rate": 6.095645072810871e-07, "loss": 0.2708776, "memory(GiB)": 34.88, "step": 125460, "train_speed(iter/s)": 0.411333 }, { "acc": 0.93634882, "epoch": 3.3970974467278587, "grad_norm": 5.730642795562744, "learning_rate": 6.092969760646282e-07, "loss": 0.32244599, "memory(GiB)": 34.88, "step": 125465, "train_speed(iter/s)": 0.411334 }, { "acc": 0.94472218, "epoch": 3.397232827011074, "grad_norm": 8.021699905395508, "learning_rate": 6.090294998566164e-07, "loss": 0.35943205, "memory(GiB)": 34.88, "step": 125470, "train_speed(iter/s)": 0.411335 }, { "acc": 0.94573984, "epoch": 3.3973682072942895, "grad_norm": 7.401546001434326, "learning_rate": 6.087620786604023e-07, "loss": 0.35169897, "memory(GiB)": 34.88, "step": 125475, "train_speed(iter/s)": 0.411336 }, { "acc": 0.94251356, "epoch": 3.3975035875775053, "grad_norm": 3.7611372470855713, "learning_rate": 6.08494712479335e-07, "loss": 0.2508275, "memory(GiB)": 34.88, "step": 125480, "train_speed(iter/s)": 0.411336 }, { "acc": 0.9495759, "epoch": 3.3976389678607206, "grad_norm": 6.327879428863525, "learning_rate": 6.082274013167638e-07, "loss": 0.3176918, "memory(GiB)": 34.88, "step": 125485, "train_speed(iter/s)": 0.411337 }, { "acc": 0.94644127, "epoch": 3.3977743481439364, "grad_norm": 15.489324569702148, "learning_rate": 6.079601451760381e-07, "loss": 0.25606318, "memory(GiB)": 34.88, "step": 125490, "train_speed(iter/s)": 0.411338 }, { "acc": 0.94744091, "epoch": 3.3979097284271518, "grad_norm": 7.31079626083374, "learning_rate": 6.07692944060506e-07, "loss": 0.2824234, "memory(GiB)": 34.88, "step": 125495, "train_speed(iter/s)": 0.411339 }, { "acc": 0.95220413, "epoch": 3.3980451087103676, "grad_norm": 4.469719886779785, "learning_rate": 6.074257979735135e-07, "loss": 0.30854316, "memory(GiB)": 34.88, "step": 125500, "train_speed(iter/s)": 0.41134 }, { "acc": 0.92811832, "epoch": 3.398180488993583, "grad_norm": 5.174228668212891, "learning_rate": 6.071587069184073e-07, "loss": 0.44790206, "memory(GiB)": 34.88, "step": 125505, "train_speed(iter/s)": 0.411341 }, { "acc": 0.93554611, "epoch": 3.3983158692767983, "grad_norm": 11.223992347717285, "learning_rate": 6.068916708985341e-07, "loss": 0.45831356, "memory(GiB)": 34.88, "step": 125510, "train_speed(iter/s)": 0.411342 }, { "acc": 0.94058723, "epoch": 3.398451249560014, "grad_norm": 12.994193077087402, "learning_rate": 6.066246899172391e-07, "loss": 0.31684318, "memory(GiB)": 34.88, "step": 125515, "train_speed(iter/s)": 0.411342 }, { "acc": 0.93780584, "epoch": 3.3985866298432295, "grad_norm": 8.458916664123535, "learning_rate": 6.06357763977866e-07, "loss": 0.39660728, "memory(GiB)": 34.88, "step": 125520, "train_speed(iter/s)": 0.411343 }, { "acc": 0.94062862, "epoch": 3.3987220101264453, "grad_norm": 5.475286483764648, "learning_rate": 6.060908930837583e-07, "loss": 0.34177966, "memory(GiB)": 34.88, "step": 125525, "train_speed(iter/s)": 0.411344 }, { "acc": 0.95169106, "epoch": 3.3988573904096606, "grad_norm": 11.774144172668457, "learning_rate": 6.05824077238261e-07, "loss": 0.32361755, "memory(GiB)": 34.88, "step": 125530, "train_speed(iter/s)": 0.411345 }, { "acc": 0.95953255, "epoch": 3.3989927706928764, "grad_norm": 4.396045684814453, "learning_rate": 6.055573164447144e-07, "loss": 0.22133982, "memory(GiB)": 34.88, "step": 125535, "train_speed(iter/s)": 0.411346 }, { "acc": 0.93870392, "epoch": 3.3991281509760918, "grad_norm": 13.376419067382812, "learning_rate": 6.052906107064603e-07, "loss": 0.38711386, "memory(GiB)": 34.88, "step": 125540, "train_speed(iter/s)": 0.411347 }, { "acc": 0.94147396, "epoch": 3.3992635312593076, "grad_norm": 17.18776512145996, "learning_rate": 6.050239600268408e-07, "loss": 0.32202551, "memory(GiB)": 34.88, "step": 125545, "train_speed(iter/s)": 0.411348 }, { "acc": 0.93669233, "epoch": 3.399398911542523, "grad_norm": 19.501708984375, "learning_rate": 6.047573644091968e-07, "loss": 0.38037488, "memory(GiB)": 34.88, "step": 125550, "train_speed(iter/s)": 0.411348 }, { "acc": 0.9644454, "epoch": 3.3995342918257387, "grad_norm": 4.613324165344238, "learning_rate": 6.044908238568659e-07, "loss": 0.20886979, "memory(GiB)": 34.88, "step": 125555, "train_speed(iter/s)": 0.411349 }, { "acc": 0.95801144, "epoch": 3.399669672108954, "grad_norm": 7.947220802307129, "learning_rate": 6.042243383731877e-07, "loss": 0.20274713, "memory(GiB)": 34.88, "step": 125560, "train_speed(iter/s)": 0.41135 }, { "acc": 0.94056931, "epoch": 3.3998050523921695, "grad_norm": 6.792550563812256, "learning_rate": 6.039579079615023e-07, "loss": 0.35836577, "memory(GiB)": 34.88, "step": 125565, "train_speed(iter/s)": 0.411351 }, { "acc": 0.94645786, "epoch": 3.3999404326753853, "grad_norm": 9.243193626403809, "learning_rate": 6.036915326251446e-07, "loss": 0.31150837, "memory(GiB)": 34.88, "step": 125570, "train_speed(iter/s)": 0.411352 }, { "acc": 0.94952946, "epoch": 3.4000758129586006, "grad_norm": 3.58907151222229, "learning_rate": 6.034252123674525e-07, "loss": 0.31804347, "memory(GiB)": 34.88, "step": 125575, "train_speed(iter/s)": 0.411353 }, { "acc": 0.95102682, "epoch": 3.4002111932418164, "grad_norm": 12.487162590026855, "learning_rate": 6.031589471917623e-07, "loss": 0.28651304, "memory(GiB)": 34.88, "step": 125580, "train_speed(iter/s)": 0.411354 }, { "acc": 0.95960684, "epoch": 3.400346573525032, "grad_norm": 8.703229904174805, "learning_rate": 6.02892737101411e-07, "loss": 0.20712533, "memory(GiB)": 34.88, "step": 125585, "train_speed(iter/s)": 0.411354 }, { "acc": 0.94556866, "epoch": 3.4004819538082476, "grad_norm": 5.15164852142334, "learning_rate": 6.026265820997306e-07, "loss": 0.32843761, "memory(GiB)": 34.88, "step": 125590, "train_speed(iter/s)": 0.411355 }, { "acc": 0.94483261, "epoch": 3.400617334091463, "grad_norm": 9.67605972290039, "learning_rate": 6.023604821900571e-07, "loss": 0.35486307, "memory(GiB)": 34.88, "step": 125595, "train_speed(iter/s)": 0.411356 }, { "acc": 0.9540082, "epoch": 3.4007527143746783, "grad_norm": 7.343963146209717, "learning_rate": 6.020944373757239e-07, "loss": 0.23558087, "memory(GiB)": 34.88, "step": 125600, "train_speed(iter/s)": 0.411357 }, { "acc": 0.95190964, "epoch": 3.400888094657894, "grad_norm": 8.197973251342773, "learning_rate": 6.018284476600621e-07, "loss": 0.27002485, "memory(GiB)": 34.88, "step": 125605, "train_speed(iter/s)": 0.411358 }, { "acc": 0.94145947, "epoch": 3.4010234749411095, "grad_norm": 5.387745380401611, "learning_rate": 6.015625130464054e-07, "loss": 0.32321, "memory(GiB)": 34.88, "step": 125610, "train_speed(iter/s)": 0.411359 }, { "acc": 0.93873653, "epoch": 3.4011588552243253, "grad_norm": 7.413732528686523, "learning_rate": 6.01296633538084e-07, "loss": 0.36618795, "memory(GiB)": 34.88, "step": 125615, "train_speed(iter/s)": 0.411359 }, { "acc": 0.94971647, "epoch": 3.4012942355075406, "grad_norm": 9.775094032287598, "learning_rate": 6.010308091384305e-07, "loss": 0.32293162, "memory(GiB)": 34.88, "step": 125620, "train_speed(iter/s)": 0.41136 }, { "acc": 0.95487003, "epoch": 3.4014296157907564, "grad_norm": 15.581526756286621, "learning_rate": 6.007650398507725e-07, "loss": 0.32293487, "memory(GiB)": 34.88, "step": 125625, "train_speed(iter/s)": 0.411361 }, { "acc": 0.94157448, "epoch": 3.401564996073972, "grad_norm": 3.9891061782836914, "learning_rate": 6.004993256784402e-07, "loss": 0.31243944, "memory(GiB)": 34.88, "step": 125630, "train_speed(iter/s)": 0.411362 }, { "acc": 0.95825367, "epoch": 3.401700376357187, "grad_norm": 12.90986156463623, "learning_rate": 6.002336666247626e-07, "loss": 0.23452709, "memory(GiB)": 34.88, "step": 125635, "train_speed(iter/s)": 0.411363 }, { "acc": 0.95653934, "epoch": 3.401835756640403, "grad_norm": 4.33311128616333, "learning_rate": 5.999680626930686e-07, "loss": 0.30792947, "memory(GiB)": 34.88, "step": 125640, "train_speed(iter/s)": 0.411363 }, { "acc": 0.94388895, "epoch": 3.4019711369236183, "grad_norm": 4.192547798156738, "learning_rate": 5.997025138866825e-07, "loss": 0.38247585, "memory(GiB)": 34.88, "step": 125645, "train_speed(iter/s)": 0.411364 }, { "acc": 0.94090233, "epoch": 3.402106517206834, "grad_norm": 3.9886207580566406, "learning_rate": 5.99437020208933e-07, "loss": 0.34294324, "memory(GiB)": 34.88, "step": 125650, "train_speed(iter/s)": 0.411365 }, { "acc": 0.95549126, "epoch": 3.4022418974900495, "grad_norm": 5.859241962432861, "learning_rate": 5.991715816631461e-07, "loss": 0.2662302, "memory(GiB)": 34.88, "step": 125655, "train_speed(iter/s)": 0.411366 }, { "acc": 0.93864946, "epoch": 3.4023772777732653, "grad_norm": 3.359665632247925, "learning_rate": 5.989061982526453e-07, "loss": 0.41701174, "memory(GiB)": 34.88, "step": 125660, "train_speed(iter/s)": 0.411367 }, { "acc": 0.94654131, "epoch": 3.4025126580564806, "grad_norm": 3.213130474090576, "learning_rate": 5.98640869980756e-07, "loss": 0.35724905, "memory(GiB)": 34.88, "step": 125665, "train_speed(iter/s)": 0.411367 }, { "acc": 0.94902782, "epoch": 3.402648038339696, "grad_norm": 6.578690052032471, "learning_rate": 5.98375596850802e-07, "loss": 0.29348059, "memory(GiB)": 34.88, "step": 125670, "train_speed(iter/s)": 0.411368 }, { "acc": 0.95936184, "epoch": 3.402783418622912, "grad_norm": 4.920695781707764, "learning_rate": 5.981103788661067e-07, "loss": 0.26370759, "memory(GiB)": 34.88, "step": 125675, "train_speed(iter/s)": 0.411369 }, { "acc": 0.93297329, "epoch": 3.402918798906127, "grad_norm": 7.816827774047852, "learning_rate": 5.978452160299908e-07, "loss": 0.38138385, "memory(GiB)": 34.88, "step": 125680, "train_speed(iter/s)": 0.41137 }, { "acc": 0.94688845, "epoch": 3.403054179189343, "grad_norm": 9.02550983428955, "learning_rate": 5.975801083457793e-07, "loss": 0.31813741, "memory(GiB)": 34.88, "step": 125685, "train_speed(iter/s)": 0.411371 }, { "acc": 0.95580807, "epoch": 3.4031895594725583, "grad_norm": 2.4853177070617676, "learning_rate": 5.973150558167907e-07, "loss": 0.19887476, "memory(GiB)": 34.88, "step": 125690, "train_speed(iter/s)": 0.411372 }, { "acc": 0.95467644, "epoch": 3.403324939755774, "grad_norm": 4.901271820068359, "learning_rate": 5.970500584463449e-07, "loss": 0.29713831, "memory(GiB)": 34.88, "step": 125695, "train_speed(iter/s)": 0.411373 }, { "acc": 0.95713291, "epoch": 3.4034603200389895, "grad_norm": 5.534625053405762, "learning_rate": 5.967851162377625e-07, "loss": 0.24573607, "memory(GiB)": 34.88, "step": 125700, "train_speed(iter/s)": 0.411374 }, { "acc": 0.93409252, "epoch": 3.4035957003222053, "grad_norm": 9.162484169006348, "learning_rate": 5.965202291943622e-07, "loss": 0.43305759, "memory(GiB)": 34.88, "step": 125705, "train_speed(iter/s)": 0.411374 }, { "acc": 0.93132181, "epoch": 3.4037310806054206, "grad_norm": 12.730818748474121, "learning_rate": 5.962553973194638e-07, "loss": 0.38089683, "memory(GiB)": 34.88, "step": 125710, "train_speed(iter/s)": 0.411375 }, { "acc": 0.94288034, "epoch": 3.4038664608886364, "grad_norm": 3.8655130863189697, "learning_rate": 5.959906206163809e-07, "loss": 0.32599468, "memory(GiB)": 34.88, "step": 125715, "train_speed(iter/s)": 0.411376 }, { "acc": 0.93979511, "epoch": 3.404001841171852, "grad_norm": 7.786840438842773, "learning_rate": 5.957258990884349e-07, "loss": 0.36981068, "memory(GiB)": 34.88, "step": 125720, "train_speed(iter/s)": 0.411377 }, { "acc": 0.93315954, "epoch": 3.404137221455067, "grad_norm": 4.627615928649902, "learning_rate": 5.954612327389396e-07, "loss": 0.35422721, "memory(GiB)": 34.88, "step": 125725, "train_speed(iter/s)": 0.411378 }, { "acc": 0.95863752, "epoch": 3.404272601738283, "grad_norm": 6.057347774505615, "learning_rate": 5.951966215712103e-07, "loss": 0.23858876, "memory(GiB)": 34.88, "step": 125730, "train_speed(iter/s)": 0.411379 }, { "acc": 0.95773411, "epoch": 3.4044079820214983, "grad_norm": 4.740596294403076, "learning_rate": 5.949320655885619e-07, "loss": 0.22812366, "memory(GiB)": 34.88, "step": 125735, "train_speed(iter/s)": 0.41138 }, { "acc": 0.9398016, "epoch": 3.404543362304714, "grad_norm": 9.814035415649414, "learning_rate": 5.94667564794309e-07, "loss": 0.3482338, "memory(GiB)": 34.88, "step": 125740, "train_speed(iter/s)": 0.411381 }, { "acc": 0.94331284, "epoch": 3.4046787425879295, "grad_norm": 5.815083980560303, "learning_rate": 5.944031191917654e-07, "loss": 0.31535897, "memory(GiB)": 34.88, "step": 125745, "train_speed(iter/s)": 0.411381 }, { "acc": 0.94657421, "epoch": 3.4048141228711453, "grad_norm": 9.382438659667969, "learning_rate": 5.941387287842415e-07, "loss": 0.34587984, "memory(GiB)": 34.88, "step": 125750, "train_speed(iter/s)": 0.411382 }, { "acc": 0.94530926, "epoch": 3.4049495031543606, "grad_norm": 4.379150390625, "learning_rate": 5.938743935750531e-07, "loss": 0.27859817, "memory(GiB)": 34.88, "step": 125755, "train_speed(iter/s)": 0.411383 }, { "acc": 0.93699932, "epoch": 3.405084883437576, "grad_norm": 7.600858688354492, "learning_rate": 5.936101135675083e-07, "loss": 0.39522817, "memory(GiB)": 34.88, "step": 125760, "train_speed(iter/s)": 0.411384 }, { "acc": 0.95490818, "epoch": 3.405220263720792, "grad_norm": 2.5056073665618896, "learning_rate": 5.933458887649199e-07, "loss": 0.2858788, "memory(GiB)": 34.88, "step": 125765, "train_speed(iter/s)": 0.411385 }, { "acc": 0.94039717, "epoch": 3.405355644004007, "grad_norm": 6.096109390258789, "learning_rate": 5.930817191705959e-07, "loss": 0.35953679, "memory(GiB)": 34.88, "step": 125770, "train_speed(iter/s)": 0.411386 }, { "acc": 0.96651478, "epoch": 3.405491024287223, "grad_norm": 27.483484268188477, "learning_rate": 5.92817604787846e-07, "loss": 0.17571754, "memory(GiB)": 34.88, "step": 125775, "train_speed(iter/s)": 0.411386 }, { "acc": 0.94512243, "epoch": 3.4056264045704383, "grad_norm": 9.753096580505371, "learning_rate": 5.925535456199806e-07, "loss": 0.28423877, "memory(GiB)": 34.88, "step": 125780, "train_speed(iter/s)": 0.411387 }, { "acc": 0.95414467, "epoch": 3.405761784853654, "grad_norm": 4.065869331359863, "learning_rate": 5.922895416703034e-07, "loss": 0.32089362, "memory(GiB)": 34.88, "step": 125785, "train_speed(iter/s)": 0.411388 }, { "acc": 0.95926418, "epoch": 3.4058971651368695, "grad_norm": 1.5511575937271118, "learning_rate": 5.920255929421272e-07, "loss": 0.25748677, "memory(GiB)": 34.88, "step": 125790, "train_speed(iter/s)": 0.411389 }, { "acc": 0.9444294, "epoch": 3.406032545420085, "grad_norm": 1.9156522750854492, "learning_rate": 5.917616994387542e-07, "loss": 0.38520398, "memory(GiB)": 34.88, "step": 125795, "train_speed(iter/s)": 0.41139 }, { "acc": 0.94519272, "epoch": 3.4061679257033006, "grad_norm": 5.508333683013916, "learning_rate": 5.914978611634929e-07, "loss": 0.34301388, "memory(GiB)": 34.88, "step": 125800, "train_speed(iter/s)": 0.411391 }, { "acc": 0.94125175, "epoch": 3.406303305986516, "grad_norm": 6.977838516235352, "learning_rate": 5.912340781196445e-07, "loss": 0.33890963, "memory(GiB)": 34.88, "step": 125805, "train_speed(iter/s)": 0.411392 }, { "acc": 0.94179697, "epoch": 3.406438686269732, "grad_norm": 8.627429962158203, "learning_rate": 5.909703503105183e-07, "loss": 0.33995376, "memory(GiB)": 34.88, "step": 125810, "train_speed(iter/s)": 0.411393 }, { "acc": 0.95553751, "epoch": 3.406574066552947, "grad_norm": 6.672231197357178, "learning_rate": 5.907066777394159e-07, "loss": 0.30530407, "memory(GiB)": 34.88, "step": 125815, "train_speed(iter/s)": 0.411393 }, { "acc": 0.94673891, "epoch": 3.406709446836163, "grad_norm": 3.4934310913085938, "learning_rate": 5.904430604096387e-07, "loss": 0.26559434, "memory(GiB)": 34.88, "step": 125820, "train_speed(iter/s)": 0.411394 }, { "acc": 0.9421751, "epoch": 3.4068448271193783, "grad_norm": 5.318042755126953, "learning_rate": 5.901794983244902e-07, "loss": 0.32668734, "memory(GiB)": 34.88, "step": 125825, "train_speed(iter/s)": 0.411395 }, { "acc": 0.94695301, "epoch": 3.4069802074025937, "grad_norm": 5.594189167022705, "learning_rate": 5.899159914872728e-07, "loss": 0.33606834, "memory(GiB)": 34.88, "step": 125830, "train_speed(iter/s)": 0.411396 }, { "acc": 0.94344807, "epoch": 3.4071155876858095, "grad_norm": 5.456722259521484, "learning_rate": 5.896525399012877e-07, "loss": 0.36194901, "memory(GiB)": 34.88, "step": 125835, "train_speed(iter/s)": 0.411397 }, { "acc": 0.93899279, "epoch": 3.407250967969025, "grad_norm": 5.561132907867432, "learning_rate": 5.89389143569832e-07, "loss": 0.34249339, "memory(GiB)": 34.88, "step": 125840, "train_speed(iter/s)": 0.411398 }, { "acc": 0.93881359, "epoch": 3.4073863482522406, "grad_norm": 18.983863830566406, "learning_rate": 5.891258024962101e-07, "loss": 0.31246557, "memory(GiB)": 34.88, "step": 125845, "train_speed(iter/s)": 0.411398 }, { "acc": 0.95512094, "epoch": 3.407521728535456, "grad_norm": 2.827601909637451, "learning_rate": 5.888625166837182e-07, "loss": 0.24602671, "memory(GiB)": 34.88, "step": 125850, "train_speed(iter/s)": 0.411399 }, { "acc": 0.93384857, "epoch": 3.407657108818672, "grad_norm": 5.380360126495361, "learning_rate": 5.885992861356535e-07, "loss": 0.32851689, "memory(GiB)": 34.88, "step": 125855, "train_speed(iter/s)": 0.4114 }, { "acc": 0.93999386, "epoch": 3.407792489101887, "grad_norm": 4.862939357757568, "learning_rate": 5.883361108553147e-07, "loss": 0.39343395, "memory(GiB)": 34.88, "step": 125860, "train_speed(iter/s)": 0.411401 }, { "acc": 0.94204788, "epoch": 3.4079278693851025, "grad_norm": 5.37915563583374, "learning_rate": 5.880729908459983e-07, "loss": 0.38804755, "memory(GiB)": 34.88, "step": 125865, "train_speed(iter/s)": 0.411402 }, { "acc": 0.95504475, "epoch": 3.4080632496683183, "grad_norm": 5.89499568939209, "learning_rate": 5.878099261110023e-07, "loss": 0.27057624, "memory(GiB)": 34.88, "step": 125870, "train_speed(iter/s)": 0.411403 }, { "acc": 0.94252186, "epoch": 3.4081986299515337, "grad_norm": 5.631181716918945, "learning_rate": 5.875469166536178e-07, "loss": 0.34284759, "memory(GiB)": 34.88, "step": 125875, "train_speed(iter/s)": 0.411404 }, { "acc": 0.9460722, "epoch": 3.4083340102347495, "grad_norm": 3.3397696018218994, "learning_rate": 5.872839624771439e-07, "loss": 0.28647132, "memory(GiB)": 34.88, "step": 125880, "train_speed(iter/s)": 0.411405 }, { "acc": 0.94661312, "epoch": 3.408469390517965, "grad_norm": 4.462350368499756, "learning_rate": 5.870210635848721e-07, "loss": 0.36986523, "memory(GiB)": 34.88, "step": 125885, "train_speed(iter/s)": 0.411406 }, { "acc": 0.95066919, "epoch": 3.4086047708011806, "grad_norm": 5.652529716491699, "learning_rate": 5.867582199800973e-07, "loss": 0.31312423, "memory(GiB)": 34.88, "step": 125890, "train_speed(iter/s)": 0.411406 }, { "acc": 0.95529556, "epoch": 3.408740151084396, "grad_norm": 2.6836044788360596, "learning_rate": 5.864954316661105e-07, "loss": 0.29001861, "memory(GiB)": 34.88, "step": 125895, "train_speed(iter/s)": 0.411407 }, { "acc": 0.94730206, "epoch": 3.408875531367612, "grad_norm": 7.340151786804199, "learning_rate": 5.862326986462046e-07, "loss": 0.36701236, "memory(GiB)": 34.88, "step": 125900, "train_speed(iter/s)": 0.411408 }, { "acc": 0.94444551, "epoch": 3.409010911650827, "grad_norm": 7.807221412658691, "learning_rate": 5.859700209236714e-07, "loss": 0.32696967, "memory(GiB)": 34.88, "step": 125905, "train_speed(iter/s)": 0.411409 }, { "acc": 0.94148102, "epoch": 3.409146291934043, "grad_norm": 9.510231971740723, "learning_rate": 5.857073985017987e-07, "loss": 0.37412958, "memory(GiB)": 34.88, "step": 125910, "train_speed(iter/s)": 0.41141 }, { "acc": 0.96302624, "epoch": 3.4092816722172583, "grad_norm": 11.546976089477539, "learning_rate": 5.854448313838804e-07, "loss": 0.18828644, "memory(GiB)": 34.88, "step": 125915, "train_speed(iter/s)": 0.411411 }, { "acc": 0.95688963, "epoch": 3.4094170525004737, "grad_norm": 3.7285959720611572, "learning_rate": 5.851823195732025e-07, "loss": 0.24453254, "memory(GiB)": 34.88, "step": 125920, "train_speed(iter/s)": 0.411412 }, { "acc": 0.94113674, "epoch": 3.4095524327836895, "grad_norm": 3.3391611576080322, "learning_rate": 5.849198630730558e-07, "loss": 0.37257941, "memory(GiB)": 34.88, "step": 125925, "train_speed(iter/s)": 0.411413 }, { "acc": 0.95302029, "epoch": 3.409687813066905, "grad_norm": 5.010032653808594, "learning_rate": 5.846574618867251e-07, "loss": 0.29291468, "memory(GiB)": 34.88, "step": 125930, "train_speed(iter/s)": 0.411414 }, { "acc": 0.94999466, "epoch": 3.4098231933501206, "grad_norm": 9.064391136169434, "learning_rate": 5.843951160175009e-07, "loss": 0.26477902, "memory(GiB)": 34.88, "step": 125935, "train_speed(iter/s)": 0.411414 }, { "acc": 0.93104744, "epoch": 3.409958573633336, "grad_norm": 6.560494422912598, "learning_rate": 5.841328254686684e-07, "loss": 0.45011482, "memory(GiB)": 34.88, "step": 125940, "train_speed(iter/s)": 0.411415 }, { "acc": 0.95358372, "epoch": 3.410093953916552, "grad_norm": 3.2943379878997803, "learning_rate": 5.83870590243511e-07, "loss": 0.27308035, "memory(GiB)": 34.88, "step": 125945, "train_speed(iter/s)": 0.411416 }, { "acc": 0.93806057, "epoch": 3.410229334199767, "grad_norm": 8.576369285583496, "learning_rate": 5.836084103453171e-07, "loss": 0.3525372, "memory(GiB)": 34.88, "step": 125950, "train_speed(iter/s)": 0.411417 }, { "acc": 0.94478283, "epoch": 3.4103647144829825, "grad_norm": 5.339353084564209, "learning_rate": 5.833462857773691e-07, "loss": 0.34558945, "memory(GiB)": 34.88, "step": 125955, "train_speed(iter/s)": 0.411418 }, { "acc": 0.94625645, "epoch": 3.4105000947661983, "grad_norm": 6.0478034019470215, "learning_rate": 5.830842165429516e-07, "loss": 0.32052112, "memory(GiB)": 34.88, "step": 125960, "train_speed(iter/s)": 0.411419 }, { "acc": 0.94289646, "epoch": 3.4106354750494137, "grad_norm": 4.793587684631348, "learning_rate": 5.82822202645345e-07, "loss": 0.33793774, "memory(GiB)": 34.88, "step": 125965, "train_speed(iter/s)": 0.41142 }, { "acc": 0.96090183, "epoch": 3.4107708553326295, "grad_norm": 10.826010704040527, "learning_rate": 5.825602440878358e-07, "loss": 0.25394738, "memory(GiB)": 34.88, "step": 125970, "train_speed(iter/s)": 0.411421 }, { "acc": 0.94649029, "epoch": 3.410906235615845, "grad_norm": 6.114217281341553, "learning_rate": 5.822983408737032e-07, "loss": 0.35086031, "memory(GiB)": 34.88, "step": 125975, "train_speed(iter/s)": 0.411421 }, { "acc": 0.95455656, "epoch": 3.4110416158990606, "grad_norm": 19.77501106262207, "learning_rate": 5.82036493006226e-07, "loss": 0.31069965, "memory(GiB)": 34.88, "step": 125980, "train_speed(iter/s)": 0.411422 }, { "acc": 0.93714943, "epoch": 3.411176996182276, "grad_norm": 11.992966651916504, "learning_rate": 5.817747004886886e-07, "loss": 0.35269399, "memory(GiB)": 34.88, "step": 125985, "train_speed(iter/s)": 0.411423 }, { "acc": 0.95078659, "epoch": 3.4113123764654913, "grad_norm": 10.994400978088379, "learning_rate": 5.815129633243672e-07, "loss": 0.27638035, "memory(GiB)": 34.88, "step": 125990, "train_speed(iter/s)": 0.411424 }, { "acc": 0.9449934, "epoch": 3.411447756748707, "grad_norm": 4.614470481872559, "learning_rate": 5.812512815165429e-07, "loss": 0.36701136, "memory(GiB)": 34.88, "step": 125995, "train_speed(iter/s)": 0.411425 }, { "acc": 0.94586105, "epoch": 3.4115831370319225, "grad_norm": 14.576956748962402, "learning_rate": 5.809896550684905e-07, "loss": 0.3856519, "memory(GiB)": 34.88, "step": 126000, "train_speed(iter/s)": 0.411426 }, { "acc": 0.93249531, "epoch": 3.4117185173151383, "grad_norm": 8.686869621276855, "learning_rate": 5.807280839834916e-07, "loss": 0.43938751, "memory(GiB)": 34.88, "step": 126005, "train_speed(iter/s)": 0.411427 }, { "acc": 0.94529448, "epoch": 3.4118538975983537, "grad_norm": 7.425166130065918, "learning_rate": 5.804665682648198e-07, "loss": 0.36449466, "memory(GiB)": 34.88, "step": 126010, "train_speed(iter/s)": 0.411428 }, { "acc": 0.94985771, "epoch": 3.4119892778815695, "grad_norm": 6.123103141784668, "learning_rate": 5.802051079157522e-07, "loss": 0.278543, "memory(GiB)": 34.88, "step": 126015, "train_speed(iter/s)": 0.411428 }, { "acc": 0.94286098, "epoch": 3.412124658164785, "grad_norm": 9.85672664642334, "learning_rate": 5.799437029395646e-07, "loss": 0.3099371, "memory(GiB)": 34.88, "step": 126020, "train_speed(iter/s)": 0.411429 }, { "acc": 0.94983253, "epoch": 3.412260038448, "grad_norm": 5.542625427246094, "learning_rate": 5.796823533395305e-07, "loss": 0.27484062, "memory(GiB)": 34.88, "step": 126025, "train_speed(iter/s)": 0.41143 }, { "acc": 0.93086405, "epoch": 3.412395418731216, "grad_norm": 12.333107948303223, "learning_rate": 5.794210591189249e-07, "loss": 0.41590452, "memory(GiB)": 34.88, "step": 126030, "train_speed(iter/s)": 0.411431 }, { "acc": 0.95201244, "epoch": 3.4125307990144313, "grad_norm": 11.339860916137695, "learning_rate": 5.791598202810185e-07, "loss": 0.30205138, "memory(GiB)": 34.88, "step": 126035, "train_speed(iter/s)": 0.411432 }, { "acc": 0.94272842, "epoch": 3.412666179297647, "grad_norm": 5.3936309814453125, "learning_rate": 5.78898636829088e-07, "loss": 0.30398023, "memory(GiB)": 34.88, "step": 126040, "train_speed(iter/s)": 0.411433 }, { "acc": 0.95203514, "epoch": 3.4128015595808625, "grad_norm": 7.521160125732422, "learning_rate": 5.786375087664019e-07, "loss": 0.27094843, "memory(GiB)": 34.88, "step": 126045, "train_speed(iter/s)": 0.411434 }, { "acc": 0.94090347, "epoch": 3.4129369398640783, "grad_norm": 21.47862434387207, "learning_rate": 5.783764360962322e-07, "loss": 0.41704588, "memory(GiB)": 34.88, "step": 126050, "train_speed(iter/s)": 0.411434 }, { "acc": 0.9654808, "epoch": 3.4130723201472937, "grad_norm": 4.310125827789307, "learning_rate": 5.781154188218496e-07, "loss": 0.23647785, "memory(GiB)": 34.88, "step": 126055, "train_speed(iter/s)": 0.411435 }, { "acc": 0.9388649, "epoch": 3.4132077004305095, "grad_norm": 11.872610092163086, "learning_rate": 5.778544569465255e-07, "loss": 0.28504629, "memory(GiB)": 34.88, "step": 126060, "train_speed(iter/s)": 0.411436 }, { "acc": 0.94689789, "epoch": 3.413343080713725, "grad_norm": 7.029573440551758, "learning_rate": 5.775935504735271e-07, "loss": 0.35578752, "memory(GiB)": 34.88, "step": 126065, "train_speed(iter/s)": 0.411437 }, { "acc": 0.94286938, "epoch": 3.4134784609969406, "grad_norm": 17.418521881103516, "learning_rate": 5.773326994061204e-07, "loss": 0.35654678, "memory(GiB)": 34.88, "step": 126070, "train_speed(iter/s)": 0.411438 }, { "acc": 0.94978085, "epoch": 3.413613841280156, "grad_norm": 12.10477352142334, "learning_rate": 5.770719037475781e-07, "loss": 0.26970072, "memory(GiB)": 34.88, "step": 126075, "train_speed(iter/s)": 0.411439 }, { "acc": 0.94885998, "epoch": 3.4137492215633713, "grad_norm": 43.77532958984375, "learning_rate": 5.768111635011635e-07, "loss": 0.32183979, "memory(GiB)": 34.88, "step": 126080, "train_speed(iter/s)": 0.41144 }, { "acc": 0.96309357, "epoch": 3.413884601846587, "grad_norm": 2.3725051879882812, "learning_rate": 5.765504786701441e-07, "loss": 0.22396712, "memory(GiB)": 34.88, "step": 126085, "train_speed(iter/s)": 0.411441 }, { "acc": 0.93501263, "epoch": 3.4140199821298025, "grad_norm": 7.423463821411133, "learning_rate": 5.762898492577858e-07, "loss": 0.46897264, "memory(GiB)": 34.88, "step": 126090, "train_speed(iter/s)": 0.411442 }, { "acc": 0.93737116, "epoch": 3.4141553624130183, "grad_norm": 8.450559616088867, "learning_rate": 5.760292752673541e-07, "loss": 0.38607054, "memory(GiB)": 34.88, "step": 126095, "train_speed(iter/s)": 0.411442 }, { "acc": 0.93742599, "epoch": 3.4142907426962337, "grad_norm": 4.844834327697754, "learning_rate": 5.75768756702112e-07, "loss": 0.44506493, "memory(GiB)": 34.88, "step": 126100, "train_speed(iter/s)": 0.411443 }, { "acc": 0.935709, "epoch": 3.4144261229794495, "grad_norm": 19.328065872192383, "learning_rate": 5.755082935653217e-07, "loss": 0.38049026, "memory(GiB)": 34.88, "step": 126105, "train_speed(iter/s)": 0.411444 }, { "acc": 0.94125595, "epoch": 3.414561503262665, "grad_norm": 13.084517478942871, "learning_rate": 5.752478858602491e-07, "loss": 0.4026432, "memory(GiB)": 34.88, "step": 126110, "train_speed(iter/s)": 0.411445 }, { "acc": 0.94384022, "epoch": 3.41469688354588, "grad_norm": 4.088870525360107, "learning_rate": 5.749875335901541e-07, "loss": 0.30761032, "memory(GiB)": 34.88, "step": 126115, "train_speed(iter/s)": 0.411446 }, { "acc": 0.9502203, "epoch": 3.414832263829096, "grad_norm": 4.546698570251465, "learning_rate": 5.747272367582992e-07, "loss": 0.33587768, "memory(GiB)": 34.88, "step": 126120, "train_speed(iter/s)": 0.411446 }, { "acc": 0.93796577, "epoch": 3.4149676441123114, "grad_norm": 19.168424606323242, "learning_rate": 5.744669953679444e-07, "loss": 0.36914468, "memory(GiB)": 34.88, "step": 126125, "train_speed(iter/s)": 0.411447 }, { "acc": 0.94830589, "epoch": 3.415103024395527, "grad_norm": 6.292394161224365, "learning_rate": 5.742068094223512e-07, "loss": 0.30285125, "memory(GiB)": 34.88, "step": 126130, "train_speed(iter/s)": 0.411448 }, { "acc": 0.94023829, "epoch": 3.4152384046787425, "grad_norm": 14.14728832244873, "learning_rate": 5.739466789247765e-07, "loss": 0.33168619, "memory(GiB)": 34.88, "step": 126135, "train_speed(iter/s)": 0.411449 }, { "acc": 0.94809761, "epoch": 3.4153737849619583, "grad_norm": 5.822872638702393, "learning_rate": 5.73686603878481e-07, "loss": 0.31634665, "memory(GiB)": 34.88, "step": 126140, "train_speed(iter/s)": 0.41145 }, { "acc": 0.95521603, "epoch": 3.4155091652451737, "grad_norm": 5.773400783538818, "learning_rate": 5.734265842867224e-07, "loss": 0.28850946, "memory(GiB)": 34.88, "step": 126145, "train_speed(iter/s)": 0.411451 }, { "acc": 0.95730247, "epoch": 3.415644545528389, "grad_norm": 2.961529493331909, "learning_rate": 5.731666201527571e-07, "loss": 0.23581753, "memory(GiB)": 34.88, "step": 126150, "train_speed(iter/s)": 0.411452 }, { "acc": 0.94583216, "epoch": 3.415779925811605, "grad_norm": 6.739372730255127, "learning_rate": 5.729067114798415e-07, "loss": 0.28088322, "memory(GiB)": 34.88, "step": 126155, "train_speed(iter/s)": 0.411453 }, { "acc": 0.9274394, "epoch": 3.41591530609482, "grad_norm": 8.303592681884766, "learning_rate": 5.726468582712325e-07, "loss": 0.48907056, "memory(GiB)": 34.88, "step": 126160, "train_speed(iter/s)": 0.411453 }, { "acc": 0.95797405, "epoch": 3.416050686378036, "grad_norm": 12.262657165527344, "learning_rate": 5.723870605301857e-07, "loss": 0.26617846, "memory(GiB)": 34.88, "step": 126165, "train_speed(iter/s)": 0.411454 }, { "acc": 0.94343052, "epoch": 3.4161860666612514, "grad_norm": 14.472851753234863, "learning_rate": 5.721273182599531e-07, "loss": 0.39167504, "memory(GiB)": 34.88, "step": 126170, "train_speed(iter/s)": 0.411455 }, { "acc": 0.94553585, "epoch": 3.416321446944467, "grad_norm": 7.854417324066162, "learning_rate": 5.718676314637906e-07, "loss": 0.29552124, "memory(GiB)": 34.88, "step": 126175, "train_speed(iter/s)": 0.411456 }, { "acc": 0.93265486, "epoch": 3.4164568272276825, "grad_norm": 14.757672309875488, "learning_rate": 5.71608000144951e-07, "loss": 0.41408582, "memory(GiB)": 34.88, "step": 126180, "train_speed(iter/s)": 0.411457 }, { "acc": 0.93536301, "epoch": 3.416592207510898, "grad_norm": 13.212796211242676, "learning_rate": 5.713484243066873e-07, "loss": 0.3860148, "memory(GiB)": 34.88, "step": 126185, "train_speed(iter/s)": 0.411458 }, { "acc": 0.94244633, "epoch": 3.4167275877941137, "grad_norm": 18.921459197998047, "learning_rate": 5.7108890395225e-07, "loss": 0.35257802, "memory(GiB)": 34.88, "step": 126190, "train_speed(iter/s)": 0.411459 }, { "acc": 0.95841455, "epoch": 3.416862968077329, "grad_norm": 5.40781831741333, "learning_rate": 5.70829439084889e-07, "loss": 0.24887652, "memory(GiB)": 34.88, "step": 126195, "train_speed(iter/s)": 0.411459 }, { "acc": 0.95360126, "epoch": 3.416998348360545, "grad_norm": 4.821484088897705, "learning_rate": 5.705700297078573e-07, "loss": 0.28393836, "memory(GiB)": 34.88, "step": 126200, "train_speed(iter/s)": 0.41146 }, { "acc": 0.9431736, "epoch": 3.41713372864376, "grad_norm": 23.062700271606445, "learning_rate": 5.703106758244032e-07, "loss": 0.38382905, "memory(GiB)": 34.88, "step": 126205, "train_speed(iter/s)": 0.411461 }, { "acc": 0.94069843, "epoch": 3.417269108926976, "grad_norm": 7.535580158233643, "learning_rate": 5.70051377437775e-07, "loss": 0.42169724, "memory(GiB)": 34.88, "step": 126210, "train_speed(iter/s)": 0.411462 }, { "acc": 0.95738564, "epoch": 3.4174044892101914, "grad_norm": 6.458401679992676, "learning_rate": 5.697921345512217e-07, "loss": 0.27792394, "memory(GiB)": 34.88, "step": 126215, "train_speed(iter/s)": 0.411463 }, { "acc": 0.96615267, "epoch": 3.417539869493407, "grad_norm": 8.834972381591797, "learning_rate": 5.695329471679919e-07, "loss": 0.2366694, "memory(GiB)": 34.88, "step": 126220, "train_speed(iter/s)": 0.411464 }, { "acc": 0.95144377, "epoch": 3.4176752497766225, "grad_norm": 4.923813819885254, "learning_rate": 5.692738152913301e-07, "loss": 0.26864381, "memory(GiB)": 34.88, "step": 126225, "train_speed(iter/s)": 0.411465 }, { "acc": 0.94007874, "epoch": 3.4178106300598383, "grad_norm": 6.5028486251831055, "learning_rate": 5.690147389244841e-07, "loss": 0.40723114, "memory(GiB)": 34.88, "step": 126230, "train_speed(iter/s)": 0.411466 }, { "acc": 0.93981247, "epoch": 3.4179460103430537, "grad_norm": 3.911478281021118, "learning_rate": 5.687557180706992e-07, "loss": 0.31324062, "memory(GiB)": 34.88, "step": 126235, "train_speed(iter/s)": 0.411466 }, { "acc": 0.94115133, "epoch": 3.418081390626269, "grad_norm": 12.575335502624512, "learning_rate": 5.684967527332194e-07, "loss": 0.34155293, "memory(GiB)": 34.88, "step": 126240, "train_speed(iter/s)": 0.411467 }, { "acc": 0.94934845, "epoch": 3.418216770909485, "grad_norm": 4.749108791351318, "learning_rate": 5.682378429152887e-07, "loss": 0.29796054, "memory(GiB)": 34.88, "step": 126245, "train_speed(iter/s)": 0.411468 }, { "acc": 0.93623066, "epoch": 3.4183521511927, "grad_norm": 8.751092910766602, "learning_rate": 5.679789886201511e-07, "loss": 0.39749274, "memory(GiB)": 34.88, "step": 126250, "train_speed(iter/s)": 0.411469 }, { "acc": 0.95369482, "epoch": 3.418487531475916, "grad_norm": 5.344905376434326, "learning_rate": 5.677201898510501e-07, "loss": 0.25505834, "memory(GiB)": 34.88, "step": 126255, "train_speed(iter/s)": 0.41147 }, { "acc": 0.94196758, "epoch": 3.4186229117591314, "grad_norm": 6.22338342666626, "learning_rate": 5.674614466112251e-07, "loss": 0.39515991, "memory(GiB)": 34.88, "step": 126260, "train_speed(iter/s)": 0.411471 }, { "acc": 0.93425951, "epoch": 3.418758292042347, "grad_norm": 7.733310222625732, "learning_rate": 5.672027589039194e-07, "loss": 0.43299351, "memory(GiB)": 34.88, "step": 126265, "train_speed(iter/s)": 0.411472 }, { "acc": 0.94036255, "epoch": 3.4188936723255625, "grad_norm": 6.5123090744018555, "learning_rate": 5.669441267323739e-07, "loss": 0.35714636, "memory(GiB)": 34.88, "step": 126270, "train_speed(iter/s)": 0.411472 }, { "acc": 0.93499031, "epoch": 3.419029052608778, "grad_norm": 4.08626127243042, "learning_rate": 5.666855500998268e-07, "loss": 0.34326563, "memory(GiB)": 34.88, "step": 126275, "train_speed(iter/s)": 0.411473 }, { "acc": 0.94660444, "epoch": 3.4191644328919937, "grad_norm": 11.295526504516602, "learning_rate": 5.664270290095179e-07, "loss": 0.33113251, "memory(GiB)": 34.88, "step": 126280, "train_speed(iter/s)": 0.411474 }, { "acc": 0.94460754, "epoch": 3.419299813175209, "grad_norm": 4.232966899871826, "learning_rate": 5.661685634646861e-07, "loss": 0.31835105, "memory(GiB)": 34.88, "step": 126285, "train_speed(iter/s)": 0.411475 }, { "acc": 0.94928055, "epoch": 3.419435193458425, "grad_norm": 6.821011543273926, "learning_rate": 5.659101534685699e-07, "loss": 0.26187396, "memory(GiB)": 34.88, "step": 126290, "train_speed(iter/s)": 0.411476 }, { "acc": 0.95588284, "epoch": 3.41957057374164, "grad_norm": 10.802906036376953, "learning_rate": 5.656517990244044e-07, "loss": 0.21055567, "memory(GiB)": 34.88, "step": 126295, "train_speed(iter/s)": 0.411477 }, { "acc": 0.95120754, "epoch": 3.419705954024856, "grad_norm": 6.18250036239624, "learning_rate": 5.653935001354272e-07, "loss": 0.26717668, "memory(GiB)": 34.88, "step": 126300, "train_speed(iter/s)": 0.411477 }, { "acc": 0.95347338, "epoch": 3.4198413343080714, "grad_norm": 4.18038272857666, "learning_rate": 5.651352568048736e-07, "loss": 0.23793688, "memory(GiB)": 34.88, "step": 126305, "train_speed(iter/s)": 0.411478 }, { "acc": 0.94825783, "epoch": 3.4199767145912867, "grad_norm": 2.980375289916992, "learning_rate": 5.648770690359803e-07, "loss": 0.3003849, "memory(GiB)": 34.88, "step": 126310, "train_speed(iter/s)": 0.411479 }, { "acc": 0.94424677, "epoch": 3.4201120948745025, "grad_norm": 8.709312438964844, "learning_rate": 5.646189368319787e-07, "loss": 0.35642412, "memory(GiB)": 34.88, "step": 126315, "train_speed(iter/s)": 0.41148 }, { "acc": 0.93123112, "epoch": 3.420247475157718, "grad_norm": 6.091118812561035, "learning_rate": 5.643608601961043e-07, "loss": 0.52946234, "memory(GiB)": 34.88, "step": 126320, "train_speed(iter/s)": 0.411481 }, { "acc": 0.95426626, "epoch": 3.4203828554409337, "grad_norm": 2.5238876342773438, "learning_rate": 5.641028391315909e-07, "loss": 0.27502155, "memory(GiB)": 34.88, "step": 126325, "train_speed(iter/s)": 0.411482 }, { "acc": 0.93784113, "epoch": 3.420518235724149, "grad_norm": 8.101837158203125, "learning_rate": 5.638448736416676e-07, "loss": 0.36817143, "memory(GiB)": 34.88, "step": 126330, "train_speed(iter/s)": 0.411483 }, { "acc": 0.94028282, "epoch": 3.420653616007365, "grad_norm": 6.205036640167236, "learning_rate": 5.635869637295684e-07, "loss": 0.35287523, "memory(GiB)": 34.88, "step": 126335, "train_speed(iter/s)": 0.411483 }, { "acc": 0.94495964, "epoch": 3.42078899629058, "grad_norm": 12.594306945800781, "learning_rate": 5.633291093985236e-07, "loss": 0.3699507, "memory(GiB)": 34.88, "step": 126340, "train_speed(iter/s)": 0.411484 }, { "acc": 0.93329334, "epoch": 3.4209243765737956, "grad_norm": 8.955734252929688, "learning_rate": 5.630713106517636e-07, "loss": 0.42673731, "memory(GiB)": 34.88, "step": 126345, "train_speed(iter/s)": 0.411485 }, { "acc": 0.94617796, "epoch": 3.4210597568570114, "grad_norm": 2.81986141204834, "learning_rate": 5.628135674925157e-07, "loss": 0.27515366, "memory(GiB)": 34.88, "step": 126350, "train_speed(iter/s)": 0.411486 }, { "acc": 0.96221313, "epoch": 3.4211951371402267, "grad_norm": 5.205441474914551, "learning_rate": 5.625558799240128e-07, "loss": 0.20633063, "memory(GiB)": 34.88, "step": 126355, "train_speed(iter/s)": 0.411487 }, { "acc": 0.94393845, "epoch": 3.4213305174234425, "grad_norm": 7.487298011779785, "learning_rate": 5.622982479494801e-07, "loss": 0.31870861, "memory(GiB)": 34.88, "step": 126360, "train_speed(iter/s)": 0.411488 }, { "acc": 0.95339479, "epoch": 3.421465897706658, "grad_norm": 8.638694763183594, "learning_rate": 5.620406715721449e-07, "loss": 0.29193692, "memory(GiB)": 34.88, "step": 126365, "train_speed(iter/s)": 0.411488 }, { "acc": 0.94231262, "epoch": 3.4216012779898737, "grad_norm": 6.072754859924316, "learning_rate": 5.61783150795234e-07, "loss": 0.32476423, "memory(GiB)": 34.88, "step": 126370, "train_speed(iter/s)": 0.411489 }, { "acc": 0.94720764, "epoch": 3.421736658273089, "grad_norm": 8.974385261535645, "learning_rate": 5.615256856219738e-07, "loss": 0.32252932, "memory(GiB)": 34.88, "step": 126375, "train_speed(iter/s)": 0.41149 }, { "acc": 0.95481024, "epoch": 3.421872038556305, "grad_norm": 2.4487268924713135, "learning_rate": 5.612682760555904e-07, "loss": 0.32604828, "memory(GiB)": 34.88, "step": 126380, "train_speed(iter/s)": 0.411491 }, { "acc": 0.95254402, "epoch": 3.42200741883952, "grad_norm": 5.85475492477417, "learning_rate": 5.610109220993065e-07, "loss": 0.26667528, "memory(GiB)": 34.88, "step": 126385, "train_speed(iter/s)": 0.411492 }, { "acc": 0.95789127, "epoch": 3.422142799122736, "grad_norm": 16.335269927978516, "learning_rate": 5.607536237563467e-07, "loss": 0.27364063, "memory(GiB)": 34.88, "step": 126390, "train_speed(iter/s)": 0.411493 }, { "acc": 0.95462227, "epoch": 3.4222781794059514, "grad_norm": 4.97956657409668, "learning_rate": 5.60496381029935e-07, "loss": 0.28808429, "memory(GiB)": 34.88, "step": 126395, "train_speed(iter/s)": 0.411494 }, { "acc": 0.95356245, "epoch": 3.4224135596891667, "grad_norm": 4.284508228302002, "learning_rate": 5.602391939232929e-07, "loss": 0.30913281, "memory(GiB)": 34.88, "step": 126400, "train_speed(iter/s)": 0.411494 }, { "acc": 0.9471179, "epoch": 3.4225489399723825, "grad_norm": 4.178257465362549, "learning_rate": 5.599820624396418e-07, "loss": 0.30533681, "memory(GiB)": 34.88, "step": 126405, "train_speed(iter/s)": 0.411495 }, { "acc": 0.95687542, "epoch": 3.422684320255598, "grad_norm": 6.710827827453613, "learning_rate": 5.597249865822042e-07, "loss": 0.27922285, "memory(GiB)": 34.88, "step": 126410, "train_speed(iter/s)": 0.411496 }, { "acc": 0.95507393, "epoch": 3.4228197005388137, "grad_norm": 10.609636306762695, "learning_rate": 5.594679663542006e-07, "loss": 0.24974089, "memory(GiB)": 34.88, "step": 126415, "train_speed(iter/s)": 0.411497 }, { "acc": 0.94105396, "epoch": 3.422955080822029, "grad_norm": 6.912991523742676, "learning_rate": 5.592110017588484e-07, "loss": 0.33373055, "memory(GiB)": 34.88, "step": 126420, "train_speed(iter/s)": 0.411498 }, { "acc": 0.94897251, "epoch": 3.423090461105245, "grad_norm": 8.138866424560547, "learning_rate": 5.589540927993686e-07, "loss": 0.3481704, "memory(GiB)": 34.88, "step": 126425, "train_speed(iter/s)": 0.411499 }, { "acc": 0.94329081, "epoch": 3.42322584138846, "grad_norm": 7.66195011138916, "learning_rate": 5.586972394789786e-07, "loss": 0.31116381, "memory(GiB)": 34.88, "step": 126430, "train_speed(iter/s)": 0.4115 }, { "acc": 0.9308609, "epoch": 3.4233612216716756, "grad_norm": 22.927642822265625, "learning_rate": 5.584404418008974e-07, "loss": 0.42748394, "memory(GiB)": 34.88, "step": 126435, "train_speed(iter/s)": 0.411501 }, { "acc": 0.93944044, "epoch": 3.4234966019548914, "grad_norm": 4.783324718475342, "learning_rate": 5.581836997683394e-07, "loss": 0.33926208, "memory(GiB)": 34.88, "step": 126440, "train_speed(iter/s)": 0.411501 }, { "acc": 0.93999224, "epoch": 3.4236319822381067, "grad_norm": 9.937662124633789, "learning_rate": 5.579270133845225e-07, "loss": 0.40170307, "memory(GiB)": 34.88, "step": 126445, "train_speed(iter/s)": 0.411502 }, { "acc": 0.9620698, "epoch": 3.4237673625213225, "grad_norm": 4.110314846038818, "learning_rate": 5.57670382652663e-07, "loss": 0.25451922, "memory(GiB)": 34.88, "step": 126450, "train_speed(iter/s)": 0.411503 }, { "acc": 0.95201969, "epoch": 3.423902742804538, "grad_norm": 5.8572258949279785, "learning_rate": 5.574138075759735e-07, "loss": 0.26512129, "memory(GiB)": 34.88, "step": 126455, "train_speed(iter/s)": 0.411504 }, { "acc": 0.95418224, "epoch": 3.4240381230877537, "grad_norm": 5.917751312255859, "learning_rate": 5.571572881576693e-07, "loss": 0.29928517, "memory(GiB)": 34.88, "step": 126460, "train_speed(iter/s)": 0.411505 }, { "acc": 0.9344038, "epoch": 3.424173503370969, "grad_norm": 5.850340366363525, "learning_rate": 5.569008244009636e-07, "loss": 0.41824718, "memory(GiB)": 34.88, "step": 126465, "train_speed(iter/s)": 0.411506 }, { "acc": 0.9540947, "epoch": 3.4243088836541844, "grad_norm": 7.738283157348633, "learning_rate": 5.566444163090704e-07, "loss": 0.23073409, "memory(GiB)": 34.88, "step": 126470, "train_speed(iter/s)": 0.411507 }, { "acc": 0.96268492, "epoch": 3.4244442639374, "grad_norm": 7.171285152435303, "learning_rate": 5.563880638851983e-07, "loss": 0.2197679, "memory(GiB)": 34.88, "step": 126475, "train_speed(iter/s)": 0.411508 }, { "acc": 0.9448597, "epoch": 3.4245796442206156, "grad_norm": 11.65737533569336, "learning_rate": 5.561317671325629e-07, "loss": 0.2789916, "memory(GiB)": 34.88, "step": 126480, "train_speed(iter/s)": 0.411508 }, { "acc": 0.9575695, "epoch": 3.4247150245038314, "grad_norm": 4.690899848937988, "learning_rate": 5.558755260543728e-07, "loss": 0.28439395, "memory(GiB)": 34.88, "step": 126485, "train_speed(iter/s)": 0.411509 }, { "acc": 0.94625187, "epoch": 3.4248504047870467, "grad_norm": 5.5930562019348145, "learning_rate": 5.556193406538369e-07, "loss": 0.35926337, "memory(GiB)": 34.88, "step": 126490, "train_speed(iter/s)": 0.41151 }, { "acc": 0.95289869, "epoch": 3.4249857850702625, "grad_norm": 8.474893569946289, "learning_rate": 5.553632109341656e-07, "loss": 0.28760624, "memory(GiB)": 34.88, "step": 126495, "train_speed(iter/s)": 0.411511 }, { "acc": 0.93894558, "epoch": 3.425121165353478, "grad_norm": 14.69735050201416, "learning_rate": 5.551071368985671e-07, "loss": 0.36669128, "memory(GiB)": 34.88, "step": 126500, "train_speed(iter/s)": 0.411512 }, { "acc": 0.94601955, "epoch": 3.4252565456366932, "grad_norm": 3.133075714111328, "learning_rate": 5.548511185502507e-07, "loss": 0.31872382, "memory(GiB)": 34.88, "step": 126505, "train_speed(iter/s)": 0.411513 }, { "acc": 0.93998308, "epoch": 3.425391925919909, "grad_norm": 7.709131240844727, "learning_rate": 5.545951558924198e-07, "loss": 0.37364929, "memory(GiB)": 34.88, "step": 126510, "train_speed(iter/s)": 0.411513 }, { "acc": 0.95746918, "epoch": 3.4255273062031244, "grad_norm": 3.5197927951812744, "learning_rate": 5.543392489282852e-07, "loss": 0.26072817, "memory(GiB)": 34.88, "step": 126515, "train_speed(iter/s)": 0.411514 }, { "acc": 0.94388504, "epoch": 3.42566268648634, "grad_norm": 5.159889221191406, "learning_rate": 5.540833976610513e-07, "loss": 0.33308682, "memory(GiB)": 34.88, "step": 126520, "train_speed(iter/s)": 0.411515 }, { "acc": 0.94240685, "epoch": 3.4257980667695556, "grad_norm": 7.235400676727295, "learning_rate": 5.538276020939209e-07, "loss": 0.41534929, "memory(GiB)": 34.88, "step": 126525, "train_speed(iter/s)": 0.411516 }, { "acc": 0.94666157, "epoch": 3.4259334470527714, "grad_norm": 6.284125804901123, "learning_rate": 5.535718622301004e-07, "loss": 0.32254047, "memory(GiB)": 34.88, "step": 126530, "train_speed(iter/s)": 0.411517 }, { "acc": 0.94412928, "epoch": 3.4260688273359867, "grad_norm": 11.527922630310059, "learning_rate": 5.533161780727932e-07, "loss": 0.33251126, "memory(GiB)": 34.88, "step": 126535, "train_speed(iter/s)": 0.411518 }, { "acc": 0.93763494, "epoch": 3.4262042076192025, "grad_norm": 21.284791946411133, "learning_rate": 5.530605496252021e-07, "loss": 0.36132708, "memory(GiB)": 34.88, "step": 126540, "train_speed(iter/s)": 0.411518 }, { "acc": 0.95051231, "epoch": 3.426339587902418, "grad_norm": 6.772863388061523, "learning_rate": 5.528049768905281e-07, "loss": 0.27048154, "memory(GiB)": 34.88, "step": 126545, "train_speed(iter/s)": 0.411519 }, { "acc": 0.95273819, "epoch": 3.4264749681856337, "grad_norm": 4.319585800170898, "learning_rate": 5.525494598719759e-07, "loss": 0.26979322, "memory(GiB)": 34.88, "step": 126550, "train_speed(iter/s)": 0.41152 }, { "acc": 0.95412207, "epoch": 3.426610348468849, "grad_norm": 7.669862747192383, "learning_rate": 5.522939985727435e-07, "loss": 0.29714718, "memory(GiB)": 34.88, "step": 126555, "train_speed(iter/s)": 0.411521 }, { "acc": 0.93534565, "epoch": 3.4267457287520644, "grad_norm": 7.645501613616943, "learning_rate": 5.520385929960328e-07, "loss": 0.39627924, "memory(GiB)": 34.88, "step": 126560, "train_speed(iter/s)": 0.411522 }, { "acc": 0.94500675, "epoch": 3.42688110903528, "grad_norm": 6.537388801574707, "learning_rate": 5.517832431450412e-07, "loss": 0.37521453, "memory(GiB)": 34.88, "step": 126565, "train_speed(iter/s)": 0.411523 }, { "acc": 0.95409203, "epoch": 3.4270164893184956, "grad_norm": 6.235002040863037, "learning_rate": 5.515279490229694e-07, "loss": 0.20631213, "memory(GiB)": 34.88, "step": 126570, "train_speed(iter/s)": 0.411523 }, { "acc": 0.94443417, "epoch": 3.4271518696017114, "grad_norm": 14.03601360321045, "learning_rate": 5.51272710633015e-07, "loss": 0.35863364, "memory(GiB)": 34.88, "step": 126575, "train_speed(iter/s)": 0.411524 }, { "acc": 0.95127449, "epoch": 3.4272872498849267, "grad_norm": 10.43689250946045, "learning_rate": 5.510175279783733e-07, "loss": 0.3258112, "memory(GiB)": 34.88, "step": 126580, "train_speed(iter/s)": 0.411525 }, { "acc": 0.94228106, "epoch": 3.4274226301681425, "grad_norm": 9.027295112609863, "learning_rate": 5.507624010622448e-07, "loss": 0.37187691, "memory(GiB)": 34.88, "step": 126585, "train_speed(iter/s)": 0.411526 }, { "acc": 0.94116364, "epoch": 3.427558010451358, "grad_norm": 6.017206192016602, "learning_rate": 5.50507329887823e-07, "loss": 0.33101764, "memory(GiB)": 34.88, "step": 126590, "train_speed(iter/s)": 0.411527 }, { "acc": 0.94218664, "epoch": 3.4276933907345732, "grad_norm": 6.926431179046631, "learning_rate": 5.502523144583042e-07, "loss": 0.37089448, "memory(GiB)": 34.88, "step": 126595, "train_speed(iter/s)": 0.411528 }, { "acc": 0.95535889, "epoch": 3.427828771017789, "grad_norm": 9.53289794921875, "learning_rate": 5.499973547768799e-07, "loss": 0.27734127, "memory(GiB)": 34.88, "step": 126600, "train_speed(iter/s)": 0.411529 }, { "acc": 0.9505415, "epoch": 3.4279641513010044, "grad_norm": 9.447212219238281, "learning_rate": 5.497424508467493e-07, "loss": 0.30936589, "memory(GiB)": 34.88, "step": 126605, "train_speed(iter/s)": 0.411529 }, { "acc": 0.95683556, "epoch": 3.42809953158422, "grad_norm": 6.7031402587890625, "learning_rate": 5.494876026711019e-07, "loss": 0.24200242, "memory(GiB)": 34.88, "step": 126610, "train_speed(iter/s)": 0.41153 }, { "acc": 0.94451876, "epoch": 3.4282349118674356, "grad_norm": 9.157806396484375, "learning_rate": 5.492328102531294e-07, "loss": 0.32207756, "memory(GiB)": 34.88, "step": 126615, "train_speed(iter/s)": 0.411531 }, { "acc": 0.94967003, "epoch": 3.4283702921506514, "grad_norm": 3.8702268600463867, "learning_rate": 5.489780735960274e-07, "loss": 0.2779355, "memory(GiB)": 34.88, "step": 126620, "train_speed(iter/s)": 0.411532 }, { "acc": 0.94780083, "epoch": 3.4285056724338667, "grad_norm": 9.240180015563965, "learning_rate": 5.487233927029833e-07, "loss": 0.29679146, "memory(GiB)": 34.88, "step": 126625, "train_speed(iter/s)": 0.411533 }, { "acc": 0.95131273, "epoch": 3.428641052717082, "grad_norm": 3.8084654808044434, "learning_rate": 5.484687675771905e-07, "loss": 0.25538828, "memory(GiB)": 34.88, "step": 126630, "train_speed(iter/s)": 0.411534 }, { "acc": 0.9339282, "epoch": 3.428776433000298, "grad_norm": 10.066710472106934, "learning_rate": 5.482141982218346e-07, "loss": 0.33766341, "memory(GiB)": 34.88, "step": 126635, "train_speed(iter/s)": 0.411535 }, { "acc": 0.94908867, "epoch": 3.4289118132835132, "grad_norm": 4.741390705108643, "learning_rate": 5.479596846401091e-07, "loss": 0.26779869, "memory(GiB)": 34.88, "step": 126640, "train_speed(iter/s)": 0.411536 }, { "acc": 0.94623928, "epoch": 3.429047193566729, "grad_norm": 5.726020336151123, "learning_rate": 5.477052268352008e-07, "loss": 0.34653435, "memory(GiB)": 34.88, "step": 126645, "train_speed(iter/s)": 0.411536 }, { "acc": 0.92726831, "epoch": 3.4291825738499444, "grad_norm": 11.94116497039795, "learning_rate": 5.474508248102946e-07, "loss": 0.44359865, "memory(GiB)": 34.88, "step": 126650, "train_speed(iter/s)": 0.411537 }, { "acc": 0.95457029, "epoch": 3.42931795413316, "grad_norm": 2.5135576725006104, "learning_rate": 5.471964785685813e-07, "loss": 0.20882659, "memory(GiB)": 34.88, "step": 126655, "train_speed(iter/s)": 0.411538 }, { "acc": 0.93675537, "epoch": 3.4294533344163756, "grad_norm": 6.4364447593688965, "learning_rate": 5.469421881132444e-07, "loss": 0.38830774, "memory(GiB)": 34.88, "step": 126660, "train_speed(iter/s)": 0.411539 }, { "acc": 0.94791679, "epoch": 3.429588714699591, "grad_norm": 3.5943756103515625, "learning_rate": 5.46687953447471e-07, "loss": 0.33203974, "memory(GiB)": 34.88, "step": 126665, "train_speed(iter/s)": 0.41154 }, { "acc": 0.95328655, "epoch": 3.4297240949828067, "grad_norm": 8.733165740966797, "learning_rate": 5.464337745744436e-07, "loss": 0.27962103, "memory(GiB)": 34.88, "step": 126670, "train_speed(iter/s)": 0.411541 }, { "acc": 0.94562111, "epoch": 3.429859475266022, "grad_norm": 6.8487653732299805, "learning_rate": 5.461796514973495e-07, "loss": 0.30976875, "memory(GiB)": 34.88, "step": 126675, "train_speed(iter/s)": 0.411542 }, { "acc": 0.94878025, "epoch": 3.429994855549238, "grad_norm": 4.484758377075195, "learning_rate": 5.459255842193703e-07, "loss": 0.26158543, "memory(GiB)": 34.88, "step": 126680, "train_speed(iter/s)": 0.411542 }, { "acc": 0.95263557, "epoch": 3.4301302358324532, "grad_norm": 6.9324445724487305, "learning_rate": 5.45671572743688e-07, "loss": 0.26349204, "memory(GiB)": 34.88, "step": 126685, "train_speed(iter/s)": 0.411543 }, { "acc": 0.96468334, "epoch": 3.430265616115669, "grad_norm": 12.034587860107422, "learning_rate": 5.454176170734872e-07, "loss": 0.21224577, "memory(GiB)": 34.88, "step": 126690, "train_speed(iter/s)": 0.411544 }, { "acc": 0.94224367, "epoch": 3.4304009963988844, "grad_norm": 9.560619354248047, "learning_rate": 5.451637172119457e-07, "loss": 0.35086102, "memory(GiB)": 34.88, "step": 126695, "train_speed(iter/s)": 0.411545 }, { "acc": 0.94298601, "epoch": 3.4305363766821, "grad_norm": 7.961188316345215, "learning_rate": 5.449098731622478e-07, "loss": 0.40663004, "memory(GiB)": 34.88, "step": 126700, "train_speed(iter/s)": 0.411546 }, { "acc": 0.94247599, "epoch": 3.4306717569653156, "grad_norm": 4.518840312957764, "learning_rate": 5.446560849275686e-07, "loss": 0.40173182, "memory(GiB)": 34.88, "step": 126705, "train_speed(iter/s)": 0.411547 }, { "acc": 0.94812346, "epoch": 3.4308071372485314, "grad_norm": 9.641278266906738, "learning_rate": 5.444023525110928e-07, "loss": 0.33269882, "memory(GiB)": 34.88, "step": 126710, "train_speed(iter/s)": 0.411548 }, { "acc": 0.94622555, "epoch": 3.4309425175317467, "grad_norm": 13.002284049987793, "learning_rate": 5.441486759159944e-07, "loss": 0.35561585, "memory(GiB)": 34.88, "step": 126715, "train_speed(iter/s)": 0.411548 }, { "acc": 0.94533482, "epoch": 3.431077897814962, "grad_norm": 6.65300989151001, "learning_rate": 5.438950551454537e-07, "loss": 0.312745, "memory(GiB)": 34.88, "step": 126720, "train_speed(iter/s)": 0.411549 }, { "acc": 0.95047102, "epoch": 3.431213278098178, "grad_norm": 6.845245838165283, "learning_rate": 5.436414902026475e-07, "loss": 0.29543614, "memory(GiB)": 34.88, "step": 126725, "train_speed(iter/s)": 0.41155 }, { "acc": 0.95229864, "epoch": 3.4313486583813932, "grad_norm": 4.688490867614746, "learning_rate": 5.433879810907519e-07, "loss": 0.32708459, "memory(GiB)": 34.88, "step": 126730, "train_speed(iter/s)": 0.411551 }, { "acc": 0.95809174, "epoch": 3.431484038664609, "grad_norm": 2.504150867462158, "learning_rate": 5.431345278129432e-07, "loss": 0.20630331, "memory(GiB)": 34.88, "step": 126735, "train_speed(iter/s)": 0.411552 }, { "acc": 0.94564476, "epoch": 3.4316194189478244, "grad_norm": 9.032085418701172, "learning_rate": 5.428811303723938e-07, "loss": 0.31478717, "memory(GiB)": 34.88, "step": 126740, "train_speed(iter/s)": 0.411553 }, { "acc": 0.95719051, "epoch": 3.43175479923104, "grad_norm": 2.3529696464538574, "learning_rate": 5.426277887722814e-07, "loss": 0.2344842, "memory(GiB)": 34.88, "step": 126745, "train_speed(iter/s)": 0.411553 }, { "acc": 0.94658775, "epoch": 3.4318901795142556, "grad_norm": 4.003065586090088, "learning_rate": 5.423745030157778e-07, "loss": 0.27861235, "memory(GiB)": 34.88, "step": 126750, "train_speed(iter/s)": 0.411554 }, { "acc": 0.9636013, "epoch": 3.432025559797471, "grad_norm": 6.231256484985352, "learning_rate": 5.421212731060567e-07, "loss": 0.24232805, "memory(GiB)": 34.88, "step": 126755, "train_speed(iter/s)": 0.411555 }, { "acc": 0.96438332, "epoch": 3.4321609400806867, "grad_norm": 2.7372372150421143, "learning_rate": 5.41868099046288e-07, "loss": 0.192497, "memory(GiB)": 34.88, "step": 126760, "train_speed(iter/s)": 0.411556 }, { "acc": 0.94701576, "epoch": 3.432296320363902, "grad_norm": 10.022665977478027, "learning_rate": 5.416149808396471e-07, "loss": 0.27001431, "memory(GiB)": 34.88, "step": 126765, "train_speed(iter/s)": 0.411557 }, { "acc": 0.95020151, "epoch": 3.432431700647118, "grad_norm": 5.110182762145996, "learning_rate": 5.413619184893019e-07, "loss": 0.28524477, "memory(GiB)": 34.88, "step": 126770, "train_speed(iter/s)": 0.411558 }, { "acc": 0.96949368, "epoch": 3.4325670809303332, "grad_norm": 1.7390916347503662, "learning_rate": 5.411089119984233e-07, "loss": 0.1772786, "memory(GiB)": 34.88, "step": 126775, "train_speed(iter/s)": 0.411559 }, { "acc": 0.9395731, "epoch": 3.432702461213549, "grad_norm": 5.773961067199707, "learning_rate": 5.408559613701818e-07, "loss": 0.34432683, "memory(GiB)": 34.88, "step": 126780, "train_speed(iter/s)": 0.41156 }, { "acc": 0.95201273, "epoch": 3.4328378414967644, "grad_norm": 21.61480140686035, "learning_rate": 5.406030666077441e-07, "loss": 0.28777578, "memory(GiB)": 34.88, "step": 126785, "train_speed(iter/s)": 0.41156 }, { "acc": 0.9388339, "epoch": 3.4329732217799798, "grad_norm": 7.419942855834961, "learning_rate": 5.403502277142799e-07, "loss": 0.383271, "memory(GiB)": 34.88, "step": 126790, "train_speed(iter/s)": 0.411561 }, { "acc": 0.9477663, "epoch": 3.4331086020631956, "grad_norm": 7.9141364097595215, "learning_rate": 5.400974446929538e-07, "loss": 0.350441, "memory(GiB)": 34.88, "step": 126795, "train_speed(iter/s)": 0.411562 }, { "acc": 0.94363852, "epoch": 3.433243982346411, "grad_norm": 6.166920185089111, "learning_rate": 5.398447175469367e-07, "loss": 0.37220378, "memory(GiB)": 34.88, "step": 126800, "train_speed(iter/s)": 0.411563 }, { "acc": 0.93758755, "epoch": 3.4333793626296267, "grad_norm": 8.55160903930664, "learning_rate": 5.395920462793908e-07, "loss": 0.45449171, "memory(GiB)": 34.88, "step": 126805, "train_speed(iter/s)": 0.411564 }, { "acc": 0.94156694, "epoch": 3.433514742912842, "grad_norm": 4.273087978363037, "learning_rate": 5.393394308934832e-07, "loss": 0.3469321, "memory(GiB)": 34.88, "step": 126810, "train_speed(iter/s)": 0.411565 }, { "acc": 0.94327097, "epoch": 3.433650123196058, "grad_norm": 17.028289794921875, "learning_rate": 5.390868713923783e-07, "loss": 0.32598047, "memory(GiB)": 34.88, "step": 126815, "train_speed(iter/s)": 0.411566 }, { "acc": 0.93536167, "epoch": 3.4337855034792732, "grad_norm": 11.642033576965332, "learning_rate": 5.38834367779239e-07, "loss": 0.34253697, "memory(GiB)": 34.88, "step": 126820, "train_speed(iter/s)": 0.411567 }, { "acc": 0.94540672, "epoch": 3.4339208837624886, "grad_norm": 6.249073505401611, "learning_rate": 5.385819200572294e-07, "loss": 0.31921973, "memory(GiB)": 34.88, "step": 126825, "train_speed(iter/s)": 0.411567 }, { "acc": 0.94619913, "epoch": 3.4340562640457044, "grad_norm": 8.335671424865723, "learning_rate": 5.383295282295095e-07, "loss": 0.30844269, "memory(GiB)": 34.88, "step": 126830, "train_speed(iter/s)": 0.411568 }, { "acc": 0.95375767, "epoch": 3.4341916443289198, "grad_norm": 7.836047172546387, "learning_rate": 5.380771922992455e-07, "loss": 0.27488222, "memory(GiB)": 34.88, "step": 126835, "train_speed(iter/s)": 0.411569 }, { "acc": 0.96016769, "epoch": 3.4343270246121356, "grad_norm": 9.844898223876953, "learning_rate": 5.378249122695943e-07, "loss": 0.22212434, "memory(GiB)": 34.88, "step": 126840, "train_speed(iter/s)": 0.41157 }, { "acc": 0.94946957, "epoch": 3.434462404895351, "grad_norm": 18.123699188232422, "learning_rate": 5.375726881437177e-07, "loss": 0.32240953, "memory(GiB)": 34.88, "step": 126845, "train_speed(iter/s)": 0.411571 }, { "acc": 0.95628624, "epoch": 3.4345977851785667, "grad_norm": 8.73663330078125, "learning_rate": 5.373205199247756e-07, "loss": 0.31542156, "memory(GiB)": 34.88, "step": 126850, "train_speed(iter/s)": 0.411572 }, { "acc": 0.94949265, "epoch": 3.434733165461782, "grad_norm": 5.255863189697266, "learning_rate": 5.370684076159272e-07, "loss": 0.32995303, "memory(GiB)": 34.88, "step": 126855, "train_speed(iter/s)": 0.411573 }, { "acc": 0.93410854, "epoch": 3.434868545744998, "grad_norm": 5.105719089508057, "learning_rate": 5.368163512203305e-07, "loss": 0.42495823, "memory(GiB)": 34.88, "step": 126860, "train_speed(iter/s)": 0.411573 }, { "acc": 0.93718176, "epoch": 3.4350039260282133, "grad_norm": 7.4704813957214355, "learning_rate": 5.365643507411403e-07, "loss": 0.37710686, "memory(GiB)": 34.88, "step": 126865, "train_speed(iter/s)": 0.411574 }, { "acc": 0.94368076, "epoch": 3.435139306311429, "grad_norm": 7.190454959869385, "learning_rate": 5.363124061815182e-07, "loss": 0.34933162, "memory(GiB)": 34.88, "step": 126870, "train_speed(iter/s)": 0.411575 }, { "acc": 0.94728012, "epoch": 3.4352746865946444, "grad_norm": 15.953691482543945, "learning_rate": 5.360605175446162e-07, "loss": 0.35098925, "memory(GiB)": 34.88, "step": 126875, "train_speed(iter/s)": 0.411576 }, { "acc": 0.92932644, "epoch": 3.4354100668778598, "grad_norm": 4.836003303527832, "learning_rate": 5.358086848335918e-07, "loss": 0.47427473, "memory(GiB)": 34.88, "step": 126880, "train_speed(iter/s)": 0.411577 }, { "acc": 0.93911343, "epoch": 3.4355454471610756, "grad_norm": 8.791668891906738, "learning_rate": 5.355569080515991e-07, "loss": 0.33379922, "memory(GiB)": 34.88, "step": 126885, "train_speed(iter/s)": 0.411578 }, { "acc": 0.94577942, "epoch": 3.435680827444291, "grad_norm": 8.149555206298828, "learning_rate": 5.353051872017933e-07, "loss": 0.30856452, "memory(GiB)": 34.88, "step": 126890, "train_speed(iter/s)": 0.411578 }, { "acc": 0.96191254, "epoch": 3.4358162077275067, "grad_norm": 3.942626714706421, "learning_rate": 5.350535222873257e-07, "loss": 0.20342517, "memory(GiB)": 34.88, "step": 126895, "train_speed(iter/s)": 0.411579 }, { "acc": 0.96210537, "epoch": 3.435951588010722, "grad_norm": 5.308671474456787, "learning_rate": 5.348019133113497e-07, "loss": 0.22087803, "memory(GiB)": 34.88, "step": 126900, "train_speed(iter/s)": 0.41158 }, { "acc": 0.94865894, "epoch": 3.436086968293938, "grad_norm": 2.5912837982177734, "learning_rate": 5.345503602770181e-07, "loss": 0.3173027, "memory(GiB)": 34.88, "step": 126905, "train_speed(iter/s)": 0.411581 }, { "acc": 0.92181044, "epoch": 3.4362223485771533, "grad_norm": 22.40016746520996, "learning_rate": 5.342988631874802e-07, "loss": 0.48130398, "memory(GiB)": 34.88, "step": 126910, "train_speed(iter/s)": 0.411582 }, { "acc": 0.95797396, "epoch": 3.4363577288603686, "grad_norm": 4.707032203674316, "learning_rate": 5.340474220458882e-07, "loss": 0.24277833, "memory(GiB)": 34.88, "step": 126915, "train_speed(iter/s)": 0.411582 }, { "acc": 0.93717823, "epoch": 3.4364931091435844, "grad_norm": 13.585000991821289, "learning_rate": 5.337960368553909e-07, "loss": 0.44009476, "memory(GiB)": 34.88, "step": 126920, "train_speed(iter/s)": 0.411583 }, { "acc": 0.95638523, "epoch": 3.4366284894267998, "grad_norm": 5.142239093780518, "learning_rate": 5.335447076191389e-07, "loss": 0.2497622, "memory(GiB)": 34.88, "step": 126925, "train_speed(iter/s)": 0.411584 }, { "acc": 0.94580297, "epoch": 3.4367638697100156, "grad_norm": 5.778210639953613, "learning_rate": 5.332934343402787e-07, "loss": 0.34327309, "memory(GiB)": 34.88, "step": 126930, "train_speed(iter/s)": 0.411585 }, { "acc": 0.94221554, "epoch": 3.436899249993231, "grad_norm": 10.450393676757812, "learning_rate": 5.330422170219585e-07, "loss": 0.34325349, "memory(GiB)": 34.88, "step": 126935, "train_speed(iter/s)": 0.411586 }, { "acc": 0.95255795, "epoch": 3.4370346302764467, "grad_norm": 4.762617111206055, "learning_rate": 5.327910556673263e-07, "loss": 0.2540982, "memory(GiB)": 34.88, "step": 126940, "train_speed(iter/s)": 0.411586 }, { "acc": 0.94696579, "epoch": 3.437170010559662, "grad_norm": 9.041315078735352, "learning_rate": 5.325399502795273e-07, "loss": 0.33505692, "memory(GiB)": 34.88, "step": 126945, "train_speed(iter/s)": 0.411587 }, { "acc": 0.94553165, "epoch": 3.4373053908428775, "grad_norm": 7.986329078674316, "learning_rate": 5.322889008617068e-07, "loss": 0.36165831, "memory(GiB)": 34.88, "step": 126950, "train_speed(iter/s)": 0.411588 }, { "acc": 0.94851923, "epoch": 3.4374407711260933, "grad_norm": 8.013298034667969, "learning_rate": 5.32037907417011e-07, "loss": 0.32434576, "memory(GiB)": 34.88, "step": 126955, "train_speed(iter/s)": 0.411589 }, { "acc": 0.95504627, "epoch": 3.4375761514093086, "grad_norm": 4.764006614685059, "learning_rate": 5.317869699485839e-07, "loss": 0.25670183, "memory(GiB)": 34.88, "step": 126960, "train_speed(iter/s)": 0.41159 }, { "acc": 0.94885569, "epoch": 3.4377115316925244, "grad_norm": 6.296426296234131, "learning_rate": 5.315360884595678e-07, "loss": 0.26375151, "memory(GiB)": 34.88, "step": 126965, "train_speed(iter/s)": 0.411591 }, { "acc": 0.95199461, "epoch": 3.4378469119757398, "grad_norm": 5.791687965393066, "learning_rate": 5.312852629531058e-07, "loss": 0.28543096, "memory(GiB)": 34.88, "step": 126970, "train_speed(iter/s)": 0.411592 }, { "acc": 0.95005474, "epoch": 3.4379822922589556, "grad_norm": 12.93409252166748, "learning_rate": 5.310344934323409e-07, "loss": 0.31018815, "memory(GiB)": 34.88, "step": 126975, "train_speed(iter/s)": 0.411592 }, { "acc": 0.93968697, "epoch": 3.438117672542171, "grad_norm": 6.337292194366455, "learning_rate": 5.307837799004143e-07, "loss": 0.35934072, "memory(GiB)": 34.88, "step": 126980, "train_speed(iter/s)": 0.411593 }, { "acc": 0.94497051, "epoch": 3.4382530528253863, "grad_norm": 7.466784954071045, "learning_rate": 5.305331223604659e-07, "loss": 0.32358651, "memory(GiB)": 34.88, "step": 126985, "train_speed(iter/s)": 0.411594 }, { "acc": 0.93574734, "epoch": 3.438388433108602, "grad_norm": 6.64824104309082, "learning_rate": 5.302825208156362e-07, "loss": 0.41134911, "memory(GiB)": 34.88, "step": 126990, "train_speed(iter/s)": 0.411595 }, { "acc": 0.94423847, "epoch": 3.4385238133918175, "grad_norm": 2.52117919921875, "learning_rate": 5.300319752690652e-07, "loss": 0.26110592, "memory(GiB)": 34.88, "step": 126995, "train_speed(iter/s)": 0.411596 }, { "acc": 0.94840546, "epoch": 3.4386591936750333, "grad_norm": 21.487903594970703, "learning_rate": 5.297814857238895e-07, "loss": 0.30732546, "memory(GiB)": 34.88, "step": 127000, "train_speed(iter/s)": 0.411597 }, { "acc": 0.94982109, "epoch": 3.4387945739582486, "grad_norm": 7.237833023071289, "learning_rate": 5.295310521832483e-07, "loss": 0.33322828, "memory(GiB)": 34.88, "step": 127005, "train_speed(iter/s)": 0.411598 }, { "acc": 0.94481068, "epoch": 3.4389299542414644, "grad_norm": 9.096697807312012, "learning_rate": 5.292806746502787e-07, "loss": 0.27865071, "memory(GiB)": 34.88, "step": 127010, "train_speed(iter/s)": 0.411598 }, { "acc": 0.95177584, "epoch": 3.4390653345246798, "grad_norm": 5.713841915130615, "learning_rate": 5.290303531281178e-07, "loss": 0.24001646, "memory(GiB)": 34.88, "step": 127015, "train_speed(iter/s)": 0.411599 }, { "acc": 0.93617611, "epoch": 3.439200714807895, "grad_norm": 6.080411434173584, "learning_rate": 5.287800876198995e-07, "loss": 0.42783685, "memory(GiB)": 34.88, "step": 127020, "train_speed(iter/s)": 0.4116 }, { "acc": 0.93579426, "epoch": 3.439336095091111, "grad_norm": 8.357305526733398, "learning_rate": 5.285298781287605e-07, "loss": 0.36999195, "memory(GiB)": 34.88, "step": 127025, "train_speed(iter/s)": 0.411601 }, { "acc": 0.93976374, "epoch": 3.4394714753743263, "grad_norm": 5.602746963500977, "learning_rate": 5.282797246578357e-07, "loss": 0.42379169, "memory(GiB)": 34.88, "step": 127030, "train_speed(iter/s)": 0.411602 }, { "acc": 0.94538918, "epoch": 3.439606855657542, "grad_norm": 10.655477523803711, "learning_rate": 5.280296272102566e-07, "loss": 0.38025227, "memory(GiB)": 34.88, "step": 127035, "train_speed(iter/s)": 0.411602 }, { "acc": 0.95249977, "epoch": 3.4397422359407575, "grad_norm": 6.4344868659973145, "learning_rate": 5.277795857891572e-07, "loss": 0.30009234, "memory(GiB)": 34.88, "step": 127040, "train_speed(iter/s)": 0.411603 }, { "acc": 0.93440208, "epoch": 3.4398776162239733, "grad_norm": 6.908443450927734, "learning_rate": 5.275296003976696e-07, "loss": 0.39839373, "memory(GiB)": 34.88, "step": 127045, "train_speed(iter/s)": 0.411604 }, { "acc": 0.94831247, "epoch": 3.4400129965071886, "grad_norm": 8.361197471618652, "learning_rate": 5.272796710389272e-07, "loss": 0.32109787, "memory(GiB)": 34.88, "step": 127050, "train_speed(iter/s)": 0.4116 }, { "acc": 0.95719223, "epoch": 3.4401483767904044, "grad_norm": 2.6896660327911377, "learning_rate": 5.270297977160577e-07, "loss": 0.31629663, "memory(GiB)": 34.88, "step": 127055, "train_speed(iter/s)": 0.411601 }, { "acc": 0.93198032, "epoch": 3.4402837570736198, "grad_norm": 4.346351623535156, "learning_rate": 5.267799804321932e-07, "loss": 0.49456229, "memory(GiB)": 34.88, "step": 127060, "train_speed(iter/s)": 0.411602 }, { "acc": 0.94859304, "epoch": 3.4404191373568356, "grad_norm": 5.612792491912842, "learning_rate": 5.265302191904636e-07, "loss": 0.28295007, "memory(GiB)": 34.88, "step": 127065, "train_speed(iter/s)": 0.411603 }, { "acc": 0.96204624, "epoch": 3.440554517640051, "grad_norm": 3.177072763442993, "learning_rate": 5.262805139939963e-07, "loss": 0.17580363, "memory(GiB)": 34.88, "step": 127070, "train_speed(iter/s)": 0.411604 }, { "acc": 0.94677486, "epoch": 3.4406898979232663, "grad_norm": 9.867164611816406, "learning_rate": 5.260308648459195e-07, "loss": 0.33340163, "memory(GiB)": 34.88, "step": 127075, "train_speed(iter/s)": 0.411605 }, { "acc": 0.95170975, "epoch": 3.440825278206482, "grad_norm": 5.30338716506958, "learning_rate": 5.257812717493609e-07, "loss": 0.31706488, "memory(GiB)": 34.88, "step": 127080, "train_speed(iter/s)": 0.411605 }, { "acc": 0.941539, "epoch": 3.4409606584896975, "grad_norm": 6.713511943817139, "learning_rate": 5.255317347074483e-07, "loss": 0.33939698, "memory(GiB)": 34.88, "step": 127085, "train_speed(iter/s)": 0.411606 }, { "acc": 0.95486603, "epoch": 3.4410960387729133, "grad_norm": 8.991222381591797, "learning_rate": 5.252822537233054e-07, "loss": 0.28623824, "memory(GiB)": 34.88, "step": 127090, "train_speed(iter/s)": 0.411607 }, { "acc": 0.93613911, "epoch": 3.4412314190561286, "grad_norm": 3.3663153648376465, "learning_rate": 5.250328288000585e-07, "loss": 0.42332163, "memory(GiB)": 34.88, "step": 127095, "train_speed(iter/s)": 0.411608 }, { "acc": 0.94616718, "epoch": 3.4413667993393444, "grad_norm": 10.973207473754883, "learning_rate": 5.247834599408325e-07, "loss": 0.3054558, "memory(GiB)": 34.88, "step": 127100, "train_speed(iter/s)": 0.411609 }, { "acc": 0.94400158, "epoch": 3.4415021796225598, "grad_norm": 11.17829704284668, "learning_rate": 5.245341471487518e-07, "loss": 0.34867678, "memory(GiB)": 34.88, "step": 127105, "train_speed(iter/s)": 0.411609 }, { "acc": 0.93850746, "epoch": 3.441637559905775, "grad_norm": 13.141936302185059, "learning_rate": 5.242848904269379e-07, "loss": 0.38573594, "memory(GiB)": 34.88, "step": 127110, "train_speed(iter/s)": 0.41161 }, { "acc": 0.93617077, "epoch": 3.441772940188991, "grad_norm": 7.090369701385498, "learning_rate": 5.240356897785138e-07, "loss": 0.40005398, "memory(GiB)": 34.88, "step": 127115, "train_speed(iter/s)": 0.411611 }, { "acc": 0.93402805, "epoch": 3.4419083204722063, "grad_norm": 9.78833293914795, "learning_rate": 5.23786545206602e-07, "loss": 0.42096539, "memory(GiB)": 34.88, "step": 127120, "train_speed(iter/s)": 0.411612 }, { "acc": 0.94617968, "epoch": 3.442043700755422, "grad_norm": 5.55902099609375, "learning_rate": 5.235374567143225e-07, "loss": 0.30410037, "memory(GiB)": 34.88, "step": 127125, "train_speed(iter/s)": 0.411613 }, { "acc": 0.96289473, "epoch": 3.4421790810386375, "grad_norm": 3.0956954956054688, "learning_rate": 5.232884243047956e-07, "loss": 0.22710443, "memory(GiB)": 34.88, "step": 127130, "train_speed(iter/s)": 0.411614 }, { "acc": 0.94398499, "epoch": 3.4423144613218533, "grad_norm": 5.832562446594238, "learning_rate": 5.230394479811418e-07, "loss": 0.35649459, "memory(GiB)": 34.88, "step": 127135, "train_speed(iter/s)": 0.411614 }, { "acc": 0.96400948, "epoch": 3.4424498416050686, "grad_norm": 4.0817437171936035, "learning_rate": 5.227905277464805e-07, "loss": 0.18474895, "memory(GiB)": 34.88, "step": 127140, "train_speed(iter/s)": 0.411615 }, { "acc": 0.93284178, "epoch": 3.442585221888284, "grad_norm": 12.59100341796875, "learning_rate": 5.225416636039269e-07, "loss": 0.35110369, "memory(GiB)": 34.88, "step": 127145, "train_speed(iter/s)": 0.411616 }, { "acc": 0.96707125, "epoch": 3.4427206021714998, "grad_norm": 3.7786495685577393, "learning_rate": 5.222928555566028e-07, "loss": 0.22786756, "memory(GiB)": 34.88, "step": 127150, "train_speed(iter/s)": 0.411617 }, { "acc": 0.95092316, "epoch": 3.442855982454715, "grad_norm": 11.06800365447998, "learning_rate": 5.220441036076226e-07, "loss": 0.28066878, "memory(GiB)": 34.88, "step": 127155, "train_speed(iter/s)": 0.411618 }, { "acc": 0.93528404, "epoch": 3.442991362737931, "grad_norm": 13.05358600616455, "learning_rate": 5.217954077601017e-07, "loss": 0.36058552, "memory(GiB)": 34.88, "step": 127160, "train_speed(iter/s)": 0.411619 }, { "acc": 0.94261389, "epoch": 3.4431267430211463, "grad_norm": 4.898220539093018, "learning_rate": 5.215467680171569e-07, "loss": 0.38147945, "memory(GiB)": 34.88, "step": 127165, "train_speed(iter/s)": 0.41162 }, { "acc": 0.96755638, "epoch": 3.443262123304362, "grad_norm": 6.766378879547119, "learning_rate": 5.21298184381902e-07, "loss": 0.17845103, "memory(GiB)": 34.88, "step": 127170, "train_speed(iter/s)": 0.41162 }, { "acc": 0.95588398, "epoch": 3.4433975035875775, "grad_norm": 5.363193035125732, "learning_rate": 5.210496568574525e-07, "loss": 0.34278691, "memory(GiB)": 34.88, "step": 127175, "train_speed(iter/s)": 0.411621 }, { "acc": 0.94682064, "epoch": 3.443532883870793, "grad_norm": 6.026303291320801, "learning_rate": 5.20801185446919e-07, "loss": 0.32940035, "memory(GiB)": 34.88, "step": 127180, "train_speed(iter/s)": 0.411622 }, { "acc": 0.95112772, "epoch": 3.4436682641540086, "grad_norm": 4.878619194030762, "learning_rate": 5.205527701534175e-07, "loss": 0.25598333, "memory(GiB)": 34.88, "step": 127185, "train_speed(iter/s)": 0.411623 }, { "acc": 0.95300112, "epoch": 3.443803644437224, "grad_norm": 4.790510177612305, "learning_rate": 5.20304410980058e-07, "loss": 0.27656231, "memory(GiB)": 34.88, "step": 127190, "train_speed(iter/s)": 0.411624 }, { "acc": 0.9475338, "epoch": 3.44393902472044, "grad_norm": 8.866640090942383, "learning_rate": 5.200561079299511e-07, "loss": 0.30307999, "memory(GiB)": 34.88, "step": 127195, "train_speed(iter/s)": 0.411625 }, { "acc": 0.94539108, "epoch": 3.444074405003655, "grad_norm": 4.499276638031006, "learning_rate": 5.198078610062079e-07, "loss": 0.30490141, "memory(GiB)": 34.88, "step": 127200, "train_speed(iter/s)": 0.411625 }, { "acc": 0.93354921, "epoch": 3.444209785286871, "grad_norm": 9.215847969055176, "learning_rate": 5.195596702119382e-07, "loss": 0.37046738, "memory(GiB)": 34.88, "step": 127205, "train_speed(iter/s)": 0.411626 }, { "acc": 0.94229555, "epoch": 3.4443451655700863, "grad_norm": 7.63227653503418, "learning_rate": 5.193115355502522e-07, "loss": 0.37191491, "memory(GiB)": 34.88, "step": 127210, "train_speed(iter/s)": 0.411627 }, { "acc": 0.93085575, "epoch": 3.444480545853302, "grad_norm": 3.6866023540496826, "learning_rate": 5.190634570242554e-07, "loss": 0.45456123, "memory(GiB)": 34.88, "step": 127215, "train_speed(iter/s)": 0.411628 }, { "acc": 0.94673738, "epoch": 3.4446159261365175, "grad_norm": 4.087001323699951, "learning_rate": 5.188154346370587e-07, "loss": 0.30958724, "memory(GiB)": 34.88, "step": 127220, "train_speed(iter/s)": 0.411629 }, { "acc": 0.93643684, "epoch": 3.4447513064197333, "grad_norm": 3.5139126777648926, "learning_rate": 5.185674683917672e-07, "loss": 0.42545471, "memory(GiB)": 34.88, "step": 127225, "train_speed(iter/s)": 0.41163 }, { "acc": 0.94570522, "epoch": 3.4448866867029486, "grad_norm": 7.023093223571777, "learning_rate": 5.183195582914888e-07, "loss": 0.31626792, "memory(GiB)": 34.88, "step": 127230, "train_speed(iter/s)": 0.411631 }, { "acc": 0.95613251, "epoch": 3.445022066986164, "grad_norm": 8.443163871765137, "learning_rate": 5.180717043393266e-07, "loss": 0.23640661, "memory(GiB)": 34.88, "step": 127235, "train_speed(iter/s)": 0.411631 }, { "acc": 0.94037533, "epoch": 3.44515744726938, "grad_norm": 22.25025177001953, "learning_rate": 5.178239065383868e-07, "loss": 0.34568889, "memory(GiB)": 34.88, "step": 127240, "train_speed(iter/s)": 0.411632 }, { "acc": 0.95655861, "epoch": 3.445292827552595, "grad_norm": 2.811009645462036, "learning_rate": 5.175761648917744e-07, "loss": 0.22683747, "memory(GiB)": 34.88, "step": 127245, "train_speed(iter/s)": 0.411633 }, { "acc": 0.94763432, "epoch": 3.445428207835811, "grad_norm": 9.04782772064209, "learning_rate": 5.173284794025899e-07, "loss": 0.31793556, "memory(GiB)": 34.88, "step": 127250, "train_speed(iter/s)": 0.411634 }, { "acc": 0.93758526, "epoch": 3.4455635881190263, "grad_norm": 4.575366973876953, "learning_rate": 5.170808500739405e-07, "loss": 0.43759708, "memory(GiB)": 34.88, "step": 127255, "train_speed(iter/s)": 0.411635 }, { "acc": 0.94767323, "epoch": 3.445698968402242, "grad_norm": 4.980323791503906, "learning_rate": 5.168332769089246e-07, "loss": 0.27633126, "memory(GiB)": 34.88, "step": 127260, "train_speed(iter/s)": 0.411636 }, { "acc": 0.96239176, "epoch": 3.4458343486854575, "grad_norm": 8.467245101928711, "learning_rate": 5.165857599106458e-07, "loss": 0.22876158, "memory(GiB)": 34.88, "step": 127265, "train_speed(iter/s)": 0.411637 }, { "acc": 0.94991627, "epoch": 3.445969728968673, "grad_norm": 7.189723014831543, "learning_rate": 5.163382990822017e-07, "loss": 0.29876592, "memory(GiB)": 34.88, "step": 127270, "train_speed(iter/s)": 0.411637 }, { "acc": 0.94376717, "epoch": 3.4461051092518886, "grad_norm": 7.020227432250977, "learning_rate": 5.160908944266965e-07, "loss": 0.34036953, "memory(GiB)": 34.88, "step": 127275, "train_speed(iter/s)": 0.411638 }, { "acc": 0.95447655, "epoch": 3.446240489535104, "grad_norm": 9.923885345458984, "learning_rate": 5.15843545947227e-07, "loss": 0.25118287, "memory(GiB)": 34.88, "step": 127280, "train_speed(iter/s)": 0.411639 }, { "acc": 0.93477077, "epoch": 3.44637586981832, "grad_norm": 2.652203321456909, "learning_rate": 5.155962536468902e-07, "loss": 0.35433083, "memory(GiB)": 34.88, "step": 127285, "train_speed(iter/s)": 0.41164 }, { "acc": 0.94942093, "epoch": 3.446511250101535, "grad_norm": 6.016451358795166, "learning_rate": 5.153490175287873e-07, "loss": 0.27153621, "memory(GiB)": 34.88, "step": 127290, "train_speed(iter/s)": 0.411641 }, { "acc": 0.94123268, "epoch": 3.446646630384751, "grad_norm": 7.183972358703613, "learning_rate": 5.151018375960129e-07, "loss": 0.34022183, "memory(GiB)": 34.88, "step": 127295, "train_speed(iter/s)": 0.411642 }, { "acc": 0.953304, "epoch": 3.4467820106679663, "grad_norm": 7.762423038482666, "learning_rate": 5.148547138516651e-07, "loss": 0.25241017, "memory(GiB)": 34.88, "step": 127300, "train_speed(iter/s)": 0.411643 }, { "acc": 0.95247574, "epoch": 3.4469173909511817, "grad_norm": 7.4895219802856445, "learning_rate": 5.146076462988367e-07, "loss": 0.28349016, "memory(GiB)": 34.88, "step": 127305, "train_speed(iter/s)": 0.411644 }, { "acc": 0.94532061, "epoch": 3.4470527712343975, "grad_norm": 6.51467752456665, "learning_rate": 5.143606349406273e-07, "loss": 0.28763723, "memory(GiB)": 34.88, "step": 127310, "train_speed(iter/s)": 0.411644 }, { "acc": 0.95337257, "epoch": 3.447188151517613, "grad_norm": 5.7499680519104, "learning_rate": 5.141136797801273e-07, "loss": 0.2877177, "memory(GiB)": 34.88, "step": 127315, "train_speed(iter/s)": 0.411645 }, { "acc": 0.94682693, "epoch": 3.4473235318008286, "grad_norm": 3.3191256523132324, "learning_rate": 5.138667808204329e-07, "loss": 0.32695, "memory(GiB)": 34.88, "step": 127320, "train_speed(iter/s)": 0.411646 }, { "acc": 0.93549871, "epoch": 3.447458912084044, "grad_norm": 7.435863494873047, "learning_rate": 5.136199380646344e-07, "loss": 0.39566031, "memory(GiB)": 34.88, "step": 127325, "train_speed(iter/s)": 0.411647 }, { "acc": 0.93711433, "epoch": 3.44759429236726, "grad_norm": 7.911196708679199, "learning_rate": 5.13373151515826e-07, "loss": 0.3750613, "memory(GiB)": 34.88, "step": 127330, "train_speed(iter/s)": 0.411648 }, { "acc": 0.95157423, "epoch": 3.447729672650475, "grad_norm": 5.044956207275391, "learning_rate": 5.131264211770992e-07, "loss": 0.3127542, "memory(GiB)": 34.88, "step": 127335, "train_speed(iter/s)": 0.411649 }, { "acc": 0.92818413, "epoch": 3.4478650529336905, "grad_norm": 5.4141669273376465, "learning_rate": 5.128797470515424e-07, "loss": 0.40326519, "memory(GiB)": 34.88, "step": 127340, "train_speed(iter/s)": 0.41165 }, { "acc": 0.93319845, "epoch": 3.4480004332169063, "grad_norm": 4.90650749206543, "learning_rate": 5.126331291422489e-07, "loss": 0.4494154, "memory(GiB)": 34.88, "step": 127345, "train_speed(iter/s)": 0.411651 }, { "acc": 0.95401859, "epoch": 3.4481358135001217, "grad_norm": 10.460546493530273, "learning_rate": 5.123865674523058e-07, "loss": 0.30304, "memory(GiB)": 34.88, "step": 127350, "train_speed(iter/s)": 0.411651 }, { "acc": 0.94889622, "epoch": 3.4482711937833375, "grad_norm": 12.713362693786621, "learning_rate": 5.121400619848038e-07, "loss": 0.27160964, "memory(GiB)": 34.88, "step": 127355, "train_speed(iter/s)": 0.411652 }, { "acc": 0.95207348, "epoch": 3.448406574066553, "grad_norm": 11.942561149597168, "learning_rate": 5.118936127428286e-07, "loss": 0.29304652, "memory(GiB)": 34.88, "step": 127360, "train_speed(iter/s)": 0.411653 }, { "acc": 0.94532948, "epoch": 3.4485419543497686, "grad_norm": 6.14637565612793, "learning_rate": 5.116472197294687e-07, "loss": 0.41083021, "memory(GiB)": 34.88, "step": 127365, "train_speed(iter/s)": 0.411654 }, { "acc": 0.95394421, "epoch": 3.448677334632984, "grad_norm": 15.22984504699707, "learning_rate": 5.11400882947811e-07, "loss": 0.3021529, "memory(GiB)": 34.88, "step": 127370, "train_speed(iter/s)": 0.411655 }, { "acc": 0.93556976, "epoch": 3.4488127149162, "grad_norm": 11.564541816711426, "learning_rate": 5.111546024009388e-07, "loss": 0.44281664, "memory(GiB)": 34.88, "step": 127375, "train_speed(iter/s)": 0.411656 }, { "acc": 0.94567356, "epoch": 3.448948095199415, "grad_norm": 3.6412880420684814, "learning_rate": 5.109083780919417e-07, "loss": 0.29390259, "memory(GiB)": 34.88, "step": 127380, "train_speed(iter/s)": 0.411656 }, { "acc": 0.95319691, "epoch": 3.449083475482631, "grad_norm": 8.250247955322266, "learning_rate": 5.106622100239001e-07, "loss": 0.22602553, "memory(GiB)": 34.88, "step": 127385, "train_speed(iter/s)": 0.411657 }, { "acc": 0.94834452, "epoch": 3.4492188557658463, "grad_norm": 8.493151664733887, "learning_rate": 5.104160981999006e-07, "loss": 0.33610227, "memory(GiB)": 34.88, "step": 127390, "train_speed(iter/s)": 0.411658 }, { "acc": 0.940448, "epoch": 3.4493542360490617, "grad_norm": 22.090499877929688, "learning_rate": 5.101700426230232e-07, "loss": 0.37642891, "memory(GiB)": 34.88, "step": 127395, "train_speed(iter/s)": 0.411659 }, { "acc": 0.95585384, "epoch": 3.4494896163322775, "grad_norm": 3.661745071411133, "learning_rate": 5.099240432963536e-07, "loss": 0.24414496, "memory(GiB)": 34.88, "step": 127400, "train_speed(iter/s)": 0.41166 }, { "acc": 0.94420443, "epoch": 3.449624996615493, "grad_norm": 6.0429158210754395, "learning_rate": 5.096781002229724e-07, "loss": 0.3979382, "memory(GiB)": 34.88, "step": 127405, "train_speed(iter/s)": 0.411661 }, { "acc": 0.95014553, "epoch": 3.4497603768987086, "grad_norm": 3.5726892948150635, "learning_rate": 5.094322134059577e-07, "loss": 0.29828458, "memory(GiB)": 34.88, "step": 127410, "train_speed(iter/s)": 0.411662 }, { "acc": 0.95542145, "epoch": 3.449895757181924, "grad_norm": 6.812583923339844, "learning_rate": 5.091863828483937e-07, "loss": 0.25597038, "memory(GiB)": 34.88, "step": 127415, "train_speed(iter/s)": 0.411662 }, { "acc": 0.94333382, "epoch": 3.45003113746514, "grad_norm": 6.71500301361084, "learning_rate": 5.089406085533573e-07, "loss": 0.39789634, "memory(GiB)": 34.88, "step": 127420, "train_speed(iter/s)": 0.411663 }, { "acc": 0.95053129, "epoch": 3.450166517748355, "grad_norm": 8.708373069763184, "learning_rate": 5.086948905239294e-07, "loss": 0.27757411, "memory(GiB)": 34.88, "step": 127425, "train_speed(iter/s)": 0.411664 }, { "acc": 0.95281572, "epoch": 3.4503018980315705, "grad_norm": 7.265933036804199, "learning_rate": 5.084492287631847e-07, "loss": 0.28508606, "memory(GiB)": 34.88, "step": 127430, "train_speed(iter/s)": 0.411665 }, { "acc": 0.93566742, "epoch": 3.4504372783147863, "grad_norm": 8.839612007141113, "learning_rate": 5.082036232742044e-07, "loss": 0.40879011, "memory(GiB)": 34.88, "step": 127435, "train_speed(iter/s)": 0.411666 }, { "acc": 0.94552536, "epoch": 3.4505726585980017, "grad_norm": 6.307449817657471, "learning_rate": 5.079580740600631e-07, "loss": 0.36193333, "memory(GiB)": 34.88, "step": 127440, "train_speed(iter/s)": 0.411667 }, { "acc": 0.94599419, "epoch": 3.4507080388812175, "grad_norm": 4.76529598236084, "learning_rate": 5.077125811238368e-07, "loss": 0.28384974, "memory(GiB)": 34.88, "step": 127445, "train_speed(iter/s)": 0.411668 }, { "acc": 0.94797306, "epoch": 3.450843419164433, "grad_norm": 11.564350128173828, "learning_rate": 5.074671444686028e-07, "loss": 0.25798228, "memory(GiB)": 34.88, "step": 127450, "train_speed(iter/s)": 0.411669 }, { "acc": 0.95423098, "epoch": 3.4509787994476486, "grad_norm": 6.7326579093933105, "learning_rate": 5.072217640974327e-07, "loss": 0.28222823, "memory(GiB)": 34.88, "step": 127455, "train_speed(iter/s)": 0.411669 }, { "acc": 0.96377192, "epoch": 3.451114179730864, "grad_norm": 5.447137832641602, "learning_rate": 5.069764400134028e-07, "loss": 0.20266879, "memory(GiB)": 34.88, "step": 127460, "train_speed(iter/s)": 0.41167 }, { "acc": 0.94968424, "epoch": 3.4512495600140793, "grad_norm": 5.998802661895752, "learning_rate": 5.067311722195834e-07, "loss": 0.31127105, "memory(GiB)": 34.88, "step": 127465, "train_speed(iter/s)": 0.411671 }, { "acc": 0.95601101, "epoch": 3.451384940297295, "grad_norm": 4.68322229385376, "learning_rate": 5.064859607190507e-07, "loss": 0.26475687, "memory(GiB)": 34.88, "step": 127470, "train_speed(iter/s)": 0.411672 }, { "acc": 0.95630302, "epoch": 3.4515203205805105, "grad_norm": 5.138876438140869, "learning_rate": 5.062408055148737e-07, "loss": 0.26988559, "memory(GiB)": 34.88, "step": 127475, "train_speed(iter/s)": 0.411673 }, { "acc": 0.9514842, "epoch": 3.4516557008637263, "grad_norm": 5.952988624572754, "learning_rate": 5.05995706610124e-07, "loss": 0.33345139, "memory(GiB)": 34.88, "step": 127480, "train_speed(iter/s)": 0.411673 }, { "acc": 0.94011459, "epoch": 3.4517910811469417, "grad_norm": 3.6270551681518555, "learning_rate": 5.057506640078733e-07, "loss": 0.3540525, "memory(GiB)": 34.88, "step": 127485, "train_speed(iter/s)": 0.411674 }, { "acc": 0.94658346, "epoch": 3.4519264614301575, "grad_norm": 5.465817451477051, "learning_rate": 5.055056777111895e-07, "loss": 0.33705044, "memory(GiB)": 34.88, "step": 127490, "train_speed(iter/s)": 0.411675 }, { "acc": 0.94620247, "epoch": 3.452061841713373, "grad_norm": 4.397129535675049, "learning_rate": 5.05260747723143e-07, "loss": 0.30846086, "memory(GiB)": 34.88, "step": 127495, "train_speed(iter/s)": 0.411676 }, { "acc": 0.95588284, "epoch": 3.452197221996588, "grad_norm": 5.710560321807861, "learning_rate": 5.050158740467995e-07, "loss": 0.30972581, "memory(GiB)": 34.88, "step": 127500, "train_speed(iter/s)": 0.411677 }, { "acc": 0.93771162, "epoch": 3.452332602279804, "grad_norm": 4.9867939949035645, "learning_rate": 5.0477105668523e-07, "loss": 0.39504492, "memory(GiB)": 34.88, "step": 127505, "train_speed(iter/s)": 0.411677 }, { "acc": 0.95247831, "epoch": 3.4524679825630193, "grad_norm": 6.133393287658691, "learning_rate": 5.045262956414992e-07, "loss": 0.23898447, "memory(GiB)": 34.88, "step": 127510, "train_speed(iter/s)": 0.411678 }, { "acc": 0.96053238, "epoch": 3.452603362846235, "grad_norm": 8.036282539367676, "learning_rate": 5.04281590918673e-07, "loss": 0.29487762, "memory(GiB)": 34.88, "step": 127515, "train_speed(iter/s)": 0.411679 }, { "acc": 0.94393177, "epoch": 3.4527387431294505, "grad_norm": 8.562834739685059, "learning_rate": 5.040369425198175e-07, "loss": 0.33947573, "memory(GiB)": 34.88, "step": 127520, "train_speed(iter/s)": 0.41168 }, { "acc": 0.95012703, "epoch": 3.4528741234126663, "grad_norm": 12.896944046020508, "learning_rate": 5.037923504479984e-07, "loss": 0.30421963, "memory(GiB)": 34.88, "step": 127525, "train_speed(iter/s)": 0.411681 }, { "acc": 0.94219503, "epoch": 3.4530095036958817, "grad_norm": 5.243200302124023, "learning_rate": 5.035478147062786e-07, "loss": 0.33223515, "memory(GiB)": 34.88, "step": 127530, "train_speed(iter/s)": 0.411682 }, { "acc": 0.94076319, "epoch": 3.4531448839790975, "grad_norm": 2.633211135864258, "learning_rate": 5.033033352977195e-07, "loss": 0.37348104, "memory(GiB)": 34.88, "step": 127535, "train_speed(iter/s)": 0.411683 }, { "acc": 0.9463932, "epoch": 3.453280264262313, "grad_norm": 14.414902687072754, "learning_rate": 5.030589122253875e-07, "loss": 0.37780108, "memory(GiB)": 34.88, "step": 127540, "train_speed(iter/s)": 0.411684 }, { "acc": 0.92749691, "epoch": 3.4534156445455286, "grad_norm": 10.766414642333984, "learning_rate": 5.028145454923419e-07, "loss": 0.42655301, "memory(GiB)": 34.88, "step": 127545, "train_speed(iter/s)": 0.411684 }, { "acc": 0.94168968, "epoch": 3.453551024828744, "grad_norm": 3.996638536453247, "learning_rate": 5.025702351016445e-07, "loss": 0.33570051, "memory(GiB)": 34.88, "step": 127550, "train_speed(iter/s)": 0.411685 }, { "acc": 0.94996662, "epoch": 3.4536864051119593, "grad_norm": 3.0763988494873047, "learning_rate": 5.023259810563552e-07, "loss": 0.33748827, "memory(GiB)": 34.88, "step": 127555, "train_speed(iter/s)": 0.411686 }, { "acc": 0.94324245, "epoch": 3.453821785395175, "grad_norm": 3.2981529235839844, "learning_rate": 5.020817833595359e-07, "loss": 0.3820086, "memory(GiB)": 34.88, "step": 127560, "train_speed(iter/s)": 0.411687 }, { "acc": 0.95437584, "epoch": 3.4539571656783905, "grad_norm": 7.144330024719238, "learning_rate": 5.018376420142429e-07, "loss": 0.33582685, "memory(GiB)": 34.88, "step": 127565, "train_speed(iter/s)": 0.411688 }, { "acc": 0.9497571, "epoch": 3.4540925459616063, "grad_norm": 9.744281768798828, "learning_rate": 5.015935570235361e-07, "loss": 0.27605927, "memory(GiB)": 34.88, "step": 127570, "train_speed(iter/s)": 0.411689 }, { "acc": 0.93805428, "epoch": 3.4542279262448217, "grad_norm": 15.735369682312012, "learning_rate": 5.013495283904738e-07, "loss": 0.42343502, "memory(GiB)": 34.88, "step": 127575, "train_speed(iter/s)": 0.41169 }, { "acc": 0.94204884, "epoch": 3.4543633065280375, "grad_norm": 8.106561660766602, "learning_rate": 5.011055561181107e-07, "loss": 0.35730085, "memory(GiB)": 34.88, "step": 127580, "train_speed(iter/s)": 0.41169 }, { "acc": 0.95973749, "epoch": 3.454498686811253, "grad_norm": 2.178377866744995, "learning_rate": 5.008616402095043e-07, "loss": 0.22105348, "memory(GiB)": 34.88, "step": 127585, "train_speed(iter/s)": 0.411691 }, { "acc": 0.95733986, "epoch": 3.454634067094468, "grad_norm": 6.599267959594727, "learning_rate": 5.006177806677107e-07, "loss": 0.26158891, "memory(GiB)": 34.88, "step": 127590, "train_speed(iter/s)": 0.411692 }, { "acc": 0.9497508, "epoch": 3.454769447377684, "grad_norm": 9.558670043945312, "learning_rate": 5.003739774957844e-07, "loss": 0.26737022, "memory(GiB)": 34.88, "step": 127595, "train_speed(iter/s)": 0.411693 }, { "acc": 0.94764891, "epoch": 3.4549048276608993, "grad_norm": 4.86084508895874, "learning_rate": 5.001302306967788e-07, "loss": 0.30550125, "memory(GiB)": 34.88, "step": 127600, "train_speed(iter/s)": 0.411694 }, { "acc": 0.94462719, "epoch": 3.455040207944115, "grad_norm": 9.76590347290039, "learning_rate": 4.998865402737476e-07, "loss": 0.34805603, "memory(GiB)": 34.88, "step": 127605, "train_speed(iter/s)": 0.411695 }, { "acc": 0.95088625, "epoch": 3.4551755882273305, "grad_norm": 8.183145523071289, "learning_rate": 4.996429062297444e-07, "loss": 0.32746053, "memory(GiB)": 34.88, "step": 127610, "train_speed(iter/s)": 0.411695 }, { "acc": 0.96057081, "epoch": 3.4553109685105463, "grad_norm": 6.698356628417969, "learning_rate": 4.993993285678197e-07, "loss": 0.23309994, "memory(GiB)": 34.88, "step": 127615, "train_speed(iter/s)": 0.411696 }, { "acc": 0.94546452, "epoch": 3.4554463487937617, "grad_norm": 10.097407341003418, "learning_rate": 4.991558072910257e-07, "loss": 0.30160735, "memory(GiB)": 34.88, "step": 127620, "train_speed(iter/s)": 0.411697 }, { "acc": 0.95239201, "epoch": 3.455581729076977, "grad_norm": 12.113871574401855, "learning_rate": 4.989123424024126e-07, "loss": 0.25983424, "memory(GiB)": 34.88, "step": 127625, "train_speed(iter/s)": 0.411698 }, { "acc": 0.93749971, "epoch": 3.455717109360193, "grad_norm": 7.513339996337891, "learning_rate": 4.986689339050319e-07, "loss": 0.3918726, "memory(GiB)": 34.88, "step": 127630, "train_speed(iter/s)": 0.411698 }, { "acc": 0.95338106, "epoch": 3.455852489643408, "grad_norm": 22.69662857055664, "learning_rate": 4.984255818019298e-07, "loss": 0.29575977, "memory(GiB)": 34.88, "step": 127635, "train_speed(iter/s)": 0.411699 }, { "acc": 0.9538023, "epoch": 3.455987869926624, "grad_norm": 4.0188307762146, "learning_rate": 4.981822860961564e-07, "loss": 0.26556334, "memory(GiB)": 34.88, "step": 127640, "train_speed(iter/s)": 0.4117 }, { "acc": 0.94518471, "epoch": 3.4561232502098393, "grad_norm": 5.987452983856201, "learning_rate": 4.979390467907601e-07, "loss": 0.30044327, "memory(GiB)": 34.88, "step": 127645, "train_speed(iter/s)": 0.411701 }, { "acc": 0.94701567, "epoch": 3.456258630493055, "grad_norm": 6.604856491088867, "learning_rate": 4.976958638887875e-07, "loss": 0.29748602, "memory(GiB)": 34.88, "step": 127650, "train_speed(iter/s)": 0.411701 }, { "acc": 0.94490376, "epoch": 3.4563940107762705, "grad_norm": 3.8355789184570312, "learning_rate": 4.974527373932843e-07, "loss": 0.2679132, "memory(GiB)": 34.88, "step": 127655, "train_speed(iter/s)": 0.411702 }, { "acc": 0.94587994, "epoch": 3.456529391059486, "grad_norm": 9.285065650939941, "learning_rate": 4.972096673072964e-07, "loss": 0.37537727, "memory(GiB)": 34.88, "step": 127660, "train_speed(iter/s)": 0.411703 }, { "acc": 0.9415288, "epoch": 3.4566647713427017, "grad_norm": 9.006752967834473, "learning_rate": 4.969666536338696e-07, "loss": 0.42504296, "memory(GiB)": 34.88, "step": 127665, "train_speed(iter/s)": 0.411704 }, { "acc": 0.94227352, "epoch": 3.456800151625917, "grad_norm": 5.332180976867676, "learning_rate": 4.967236963760462e-07, "loss": 0.31287014, "memory(GiB)": 34.88, "step": 127670, "train_speed(iter/s)": 0.411704 }, { "acc": 0.94050045, "epoch": 3.456935531909133, "grad_norm": 23.142717361450195, "learning_rate": 4.964807955368712e-07, "loss": 0.38304093, "memory(GiB)": 34.88, "step": 127675, "train_speed(iter/s)": 0.411705 }, { "acc": 0.94632664, "epoch": 3.457070912192348, "grad_norm": 6.729793548583984, "learning_rate": 4.962379511193873e-07, "loss": 0.29026039, "memory(GiB)": 34.88, "step": 127680, "train_speed(iter/s)": 0.411706 }, { "acc": 0.95210838, "epoch": 3.457206292475564, "grad_norm": 9.165567398071289, "learning_rate": 4.959951631266368e-07, "loss": 0.23067174, "memory(GiB)": 34.88, "step": 127685, "train_speed(iter/s)": 0.411707 }, { "acc": 0.95076799, "epoch": 3.4573416727587793, "grad_norm": 4.1537580490112305, "learning_rate": 4.957524315616606e-07, "loss": 0.32411878, "memory(GiB)": 34.88, "step": 127690, "train_speed(iter/s)": 0.411708 }, { "acc": 0.94723454, "epoch": 3.457477053041995, "grad_norm": 9.545563697814941, "learning_rate": 4.955097564274988e-07, "loss": 0.31596618, "memory(GiB)": 34.88, "step": 127695, "train_speed(iter/s)": 0.411709 }, { "acc": 0.949928, "epoch": 3.4576124333252105, "grad_norm": 7.163098335266113, "learning_rate": 4.952671377271934e-07, "loss": 0.32717764, "memory(GiB)": 34.88, "step": 127700, "train_speed(iter/s)": 0.41171 }, { "acc": 0.96145992, "epoch": 3.4577478136084263, "grad_norm": 6.201071262359619, "learning_rate": 4.950245754637816e-07, "loss": 0.25819459, "memory(GiB)": 34.88, "step": 127705, "train_speed(iter/s)": 0.41171 }, { "acc": 0.95286884, "epoch": 3.4578831938916417, "grad_norm": 4.432968616485596, "learning_rate": 4.947820696403024e-07, "loss": 0.2998702, "memory(GiB)": 34.88, "step": 127710, "train_speed(iter/s)": 0.411711 }, { "acc": 0.95133495, "epoch": 3.458018574174857, "grad_norm": 4.228907585144043, "learning_rate": 4.94539620259794e-07, "loss": 0.27334728, "memory(GiB)": 34.88, "step": 127715, "train_speed(iter/s)": 0.411712 }, { "acc": 0.9527792, "epoch": 3.458153954458073, "grad_norm": 6.218319416046143, "learning_rate": 4.942972273252945e-07, "loss": 0.3107163, "memory(GiB)": 34.88, "step": 127720, "train_speed(iter/s)": 0.411713 }, { "acc": 0.94245567, "epoch": 3.458289334741288, "grad_norm": 7.19833517074585, "learning_rate": 4.940548908398386e-07, "loss": 0.2965739, "memory(GiB)": 34.88, "step": 127725, "train_speed(iter/s)": 0.411713 }, { "acc": 0.95573702, "epoch": 3.458424715024504, "grad_norm": 7.937325477600098, "learning_rate": 4.938126108064628e-07, "loss": 0.25807872, "memory(GiB)": 34.88, "step": 127730, "train_speed(iter/s)": 0.411714 }, { "acc": 0.95226097, "epoch": 3.4585600953077194, "grad_norm": 3.6123147010803223, "learning_rate": 4.93570387228203e-07, "loss": 0.28486786, "memory(GiB)": 34.88, "step": 127735, "train_speed(iter/s)": 0.411715 }, { "acc": 0.94855137, "epoch": 3.458695475590935, "grad_norm": 8.657831192016602, "learning_rate": 4.933282201080917e-07, "loss": 0.26772881, "memory(GiB)": 34.88, "step": 127740, "train_speed(iter/s)": 0.411716 }, { "acc": 0.95958328, "epoch": 3.4588308558741505, "grad_norm": 7.268320083618164, "learning_rate": 4.930861094491638e-07, "loss": 0.27329941, "memory(GiB)": 34.88, "step": 127745, "train_speed(iter/s)": 0.411717 }, { "acc": 0.95559998, "epoch": 3.458966236157366, "grad_norm": 16.666133880615234, "learning_rate": 4.92844055254452e-07, "loss": 0.31556144, "memory(GiB)": 34.88, "step": 127750, "train_speed(iter/s)": 0.411718 }, { "acc": 0.93159885, "epoch": 3.4591016164405817, "grad_norm": 9.763201713562012, "learning_rate": 4.926020575269889e-07, "loss": 0.43247128, "memory(GiB)": 34.88, "step": 127755, "train_speed(iter/s)": 0.411718 }, { "acc": 0.94422455, "epoch": 3.459236996723797, "grad_norm": 10.556530952453613, "learning_rate": 4.923601162698048e-07, "loss": 0.33157015, "memory(GiB)": 34.88, "step": 127760, "train_speed(iter/s)": 0.411719 }, { "acc": 0.93821535, "epoch": 3.459372377007013, "grad_norm": 8.821587562561035, "learning_rate": 4.921182314859316e-07, "loss": 0.38999085, "memory(GiB)": 34.88, "step": 127765, "train_speed(iter/s)": 0.41172 }, { "acc": 0.94231768, "epoch": 3.459507757290228, "grad_norm": 3.562727212905884, "learning_rate": 4.918764031783985e-07, "loss": 0.28416855, "memory(GiB)": 34.88, "step": 127770, "train_speed(iter/s)": 0.411721 }, { "acc": 0.94428864, "epoch": 3.459643137573444, "grad_norm": 10.047786712646484, "learning_rate": 4.916346313502366e-07, "loss": 0.33626165, "memory(GiB)": 34.88, "step": 127775, "train_speed(iter/s)": 0.411722 }, { "acc": 0.94782619, "epoch": 3.4597785178566594, "grad_norm": 8.79492473602295, "learning_rate": 4.913929160044722e-07, "loss": 0.36747279, "memory(GiB)": 34.88, "step": 127780, "train_speed(iter/s)": 0.411722 }, { "acc": 0.95560989, "epoch": 3.4599138981398747, "grad_norm": 3.990732431411743, "learning_rate": 4.911512571441345e-07, "loss": 0.31613021, "memory(GiB)": 34.88, "step": 127785, "train_speed(iter/s)": 0.411723 }, { "acc": 0.93987312, "epoch": 3.4600492784230905, "grad_norm": 8.606372833251953, "learning_rate": 4.909096547722518e-07, "loss": 0.34513636, "memory(GiB)": 34.88, "step": 127790, "train_speed(iter/s)": 0.411724 }, { "acc": 0.967593, "epoch": 3.460184658706306, "grad_norm": 6.287698745727539, "learning_rate": 4.90668108891848e-07, "loss": 0.20105999, "memory(GiB)": 34.88, "step": 127795, "train_speed(iter/s)": 0.411724 }, { "acc": 0.95398388, "epoch": 3.4603200389895217, "grad_norm": 4.812503337860107, "learning_rate": 4.90426619505951e-07, "loss": 0.23170381, "memory(GiB)": 34.88, "step": 127800, "train_speed(iter/s)": 0.411725 }, { "acc": 0.95093451, "epoch": 3.460455419272737, "grad_norm": 5.634034633636475, "learning_rate": 4.901851866175852e-07, "loss": 0.28120499, "memory(GiB)": 34.88, "step": 127805, "train_speed(iter/s)": 0.411726 }, { "acc": 0.94238863, "epoch": 3.460590799555953, "grad_norm": 17.458127975463867, "learning_rate": 4.899438102297763e-07, "loss": 0.43235989, "memory(GiB)": 34.88, "step": 127810, "train_speed(iter/s)": 0.411727 }, { "acc": 0.94308844, "epoch": 3.460726179839168, "grad_norm": 1.4766074419021606, "learning_rate": 4.897024903455448e-07, "loss": 0.32516108, "memory(GiB)": 34.88, "step": 127815, "train_speed(iter/s)": 0.411728 }, { "acc": 0.94286585, "epoch": 3.4608615601223836, "grad_norm": 4.4939069747924805, "learning_rate": 4.894612269679177e-07, "loss": 0.29234734, "memory(GiB)": 34.88, "step": 127820, "train_speed(iter/s)": 0.411729 }, { "acc": 0.94323921, "epoch": 3.4609969404055994, "grad_norm": 4.461277008056641, "learning_rate": 4.892200200999152e-07, "loss": 0.35904021, "memory(GiB)": 34.88, "step": 127825, "train_speed(iter/s)": 0.411729 }, { "acc": 0.95673037, "epoch": 3.4611323206888147, "grad_norm": 5.43209171295166, "learning_rate": 4.889788697445582e-07, "loss": 0.28304157, "memory(GiB)": 34.88, "step": 127830, "train_speed(iter/s)": 0.41173 }, { "acc": 0.94908237, "epoch": 3.4612677009720305, "grad_norm": 3.556044340133667, "learning_rate": 4.88737775904869e-07, "loss": 0.34861436, "memory(GiB)": 34.88, "step": 127835, "train_speed(iter/s)": 0.411731 }, { "acc": 0.96081429, "epoch": 3.461403081255246, "grad_norm": 10.203136444091797, "learning_rate": 4.88496738583867e-07, "loss": 0.21658163, "memory(GiB)": 34.88, "step": 127840, "train_speed(iter/s)": 0.411732 }, { "acc": 0.95144196, "epoch": 3.4615384615384617, "grad_norm": 3.2976012229919434, "learning_rate": 4.882557577845727e-07, "loss": 0.30645168, "memory(GiB)": 34.88, "step": 127845, "train_speed(iter/s)": 0.411733 }, { "acc": 0.95531158, "epoch": 3.461673841821677, "grad_norm": 14.999486923217773, "learning_rate": 4.88014833510002e-07, "loss": 0.22336102, "memory(GiB)": 34.88, "step": 127850, "train_speed(iter/s)": 0.411734 }, { "acc": 0.94200039, "epoch": 3.461809222104893, "grad_norm": 10.417811393737793, "learning_rate": 4.877739657631775e-07, "loss": 0.35091255, "memory(GiB)": 34.88, "step": 127855, "train_speed(iter/s)": 0.411734 }, { "acc": 0.94529724, "epoch": 3.461944602388108, "grad_norm": 3.981194496154785, "learning_rate": 4.875331545471127e-07, "loss": 0.31886458, "memory(GiB)": 34.88, "step": 127860, "train_speed(iter/s)": 0.411735 }, { "acc": 0.96166134, "epoch": 3.462079982671324, "grad_norm": 17.908084869384766, "learning_rate": 4.872923998648266e-07, "loss": 0.29044576, "memory(GiB)": 34.88, "step": 127865, "train_speed(iter/s)": 0.411736 }, { "acc": 0.92758179, "epoch": 3.4622153629545394, "grad_norm": 11.428390502929688, "learning_rate": 4.870517017193331e-07, "loss": 0.40998282, "memory(GiB)": 34.88, "step": 127870, "train_speed(iter/s)": 0.411737 }, { "acc": 0.94900074, "epoch": 3.4623507432377547, "grad_norm": 12.465600967407227, "learning_rate": 4.868110601136489e-07, "loss": 0.27205215, "memory(GiB)": 34.88, "step": 127875, "train_speed(iter/s)": 0.411738 }, { "acc": 0.95403929, "epoch": 3.4624861235209705, "grad_norm": 3.9488658905029297, "learning_rate": 4.865704750507885e-07, "loss": 0.28690639, "memory(GiB)": 34.88, "step": 127880, "train_speed(iter/s)": 0.411738 }, { "acc": 0.94476614, "epoch": 3.462621503804186, "grad_norm": 8.2769136428833, "learning_rate": 4.863299465337647e-07, "loss": 0.31924071, "memory(GiB)": 34.88, "step": 127885, "train_speed(iter/s)": 0.411739 }, { "acc": 0.94929199, "epoch": 3.4627568840874017, "grad_norm": 2.9648213386535645, "learning_rate": 4.86089474565591e-07, "loss": 0.29286735, "memory(GiB)": 34.88, "step": 127890, "train_speed(iter/s)": 0.41174 }, { "acc": 0.94768686, "epoch": 3.462892264370617, "grad_norm": 9.426136016845703, "learning_rate": 4.858490591492799e-07, "loss": 0.32602606, "memory(GiB)": 34.88, "step": 127895, "train_speed(iter/s)": 0.411741 }, { "acc": 0.94698238, "epoch": 3.463027644653833, "grad_norm": 6.740162372589111, "learning_rate": 4.856087002878438e-07, "loss": 0.3524554, "memory(GiB)": 34.88, "step": 127900, "train_speed(iter/s)": 0.411742 }, { "acc": 0.94542065, "epoch": 3.463163024937048, "grad_norm": 7.975147247314453, "learning_rate": 4.853683979842924e-07, "loss": 0.26562729, "memory(GiB)": 34.88, "step": 127905, "train_speed(iter/s)": 0.411742 }, { "acc": 0.94373989, "epoch": 3.4632984052202636, "grad_norm": 3.0918936729431152, "learning_rate": 4.851281522416363e-07, "loss": 0.35369878, "memory(GiB)": 34.88, "step": 127910, "train_speed(iter/s)": 0.411743 }, { "acc": 0.94007092, "epoch": 3.4634337855034794, "grad_norm": 11.09625244140625, "learning_rate": 4.848879630628863e-07, "loss": 0.32396255, "memory(GiB)": 34.88, "step": 127915, "train_speed(iter/s)": 0.411744 }, { "acc": 0.95977459, "epoch": 3.4635691657866947, "grad_norm": 5.649418830871582, "learning_rate": 4.846478304510494e-07, "loss": 0.24009814, "memory(GiB)": 34.88, "step": 127920, "train_speed(iter/s)": 0.411745 }, { "acc": 0.95022736, "epoch": 3.4637045460699105, "grad_norm": 4.8200225830078125, "learning_rate": 4.844077544091343e-07, "loss": 0.32411993, "memory(GiB)": 34.88, "step": 127925, "train_speed(iter/s)": 0.411746 }, { "acc": 0.95487671, "epoch": 3.463839926353126, "grad_norm": 2.243635892868042, "learning_rate": 4.841677349401487e-07, "loss": 0.22818785, "memory(GiB)": 34.88, "step": 127930, "train_speed(iter/s)": 0.411746 }, { "acc": 0.95204744, "epoch": 3.4639753066363417, "grad_norm": 5.789066314697266, "learning_rate": 4.839277720471005e-07, "loss": 0.28037958, "memory(GiB)": 34.88, "step": 127935, "train_speed(iter/s)": 0.411747 }, { "acc": 0.95159626, "epoch": 3.464110686919557, "grad_norm": 6.454421043395996, "learning_rate": 4.836878657329924e-07, "loss": 0.31892357, "memory(GiB)": 34.88, "step": 127940, "train_speed(iter/s)": 0.411748 }, { "acc": 0.94543056, "epoch": 3.4642460672027724, "grad_norm": 9.231433868408203, "learning_rate": 4.834480160008341e-07, "loss": 0.29294436, "memory(GiB)": 34.88, "step": 127945, "train_speed(iter/s)": 0.411749 }, { "acc": 0.9409152, "epoch": 3.464381447485988, "grad_norm": 5.999056816101074, "learning_rate": 4.832082228536273e-07, "loss": 0.33710961, "memory(GiB)": 34.88, "step": 127950, "train_speed(iter/s)": 0.41175 }, { "acc": 0.95669041, "epoch": 3.4645168277692036, "grad_norm": 4.699682235717773, "learning_rate": 4.829684862943758e-07, "loss": 0.23504415, "memory(GiB)": 34.88, "step": 127955, "train_speed(iter/s)": 0.411751 }, { "acc": 0.94554825, "epoch": 3.4646522080524194, "grad_norm": 8.950902938842773, "learning_rate": 4.827288063260833e-07, "loss": 0.34426804, "memory(GiB)": 34.88, "step": 127960, "train_speed(iter/s)": 0.411752 }, { "acc": 0.94068546, "epoch": 3.4647875883356347, "grad_norm": 11.729588508605957, "learning_rate": 4.82489182951753e-07, "loss": 0.30265846, "memory(GiB)": 34.88, "step": 127965, "train_speed(iter/s)": 0.411752 }, { "acc": 0.94478874, "epoch": 3.4649229686188505, "grad_norm": 9.545191764831543, "learning_rate": 4.822496161743861e-07, "loss": 0.37846689, "memory(GiB)": 34.88, "step": 127970, "train_speed(iter/s)": 0.411753 }, { "acc": 0.9411479, "epoch": 3.465058348902066, "grad_norm": 7.052305221557617, "learning_rate": 4.820101059969822e-07, "loss": 0.34855061, "memory(GiB)": 34.88, "step": 127975, "train_speed(iter/s)": 0.411754 }, { "acc": 0.93500519, "epoch": 3.4651937291852812, "grad_norm": 4.641775131225586, "learning_rate": 4.817706524225451e-07, "loss": 0.33277121, "memory(GiB)": 34.88, "step": 127980, "train_speed(iter/s)": 0.411754 }, { "acc": 0.96070576, "epoch": 3.465329109468497, "grad_norm": 14.409976959228516, "learning_rate": 4.815312554540712e-07, "loss": 0.21365838, "memory(GiB)": 34.88, "step": 127985, "train_speed(iter/s)": 0.411755 }, { "acc": 0.94624071, "epoch": 3.4654644897517124, "grad_norm": 10.86685562133789, "learning_rate": 4.812919150945616e-07, "loss": 0.35611472, "memory(GiB)": 34.88, "step": 127990, "train_speed(iter/s)": 0.411756 }, { "acc": 0.94683123, "epoch": 3.465599870034928, "grad_norm": 6.212250709533691, "learning_rate": 4.810526313470126e-07, "loss": 0.31186204, "memory(GiB)": 34.88, "step": 127995, "train_speed(iter/s)": 0.411757 }, { "acc": 0.95935326, "epoch": 3.4657352503181436, "grad_norm": 5.7722649574279785, "learning_rate": 4.808134042144224e-07, "loss": 0.20861759, "memory(GiB)": 34.88, "step": 128000, "train_speed(iter/s)": 0.411758 }, { "acc": 0.96071177, "epoch": 3.4658706306013594, "grad_norm": 4.390571594238281, "learning_rate": 4.805742336997889e-07, "loss": 0.23768821, "memory(GiB)": 34.88, "step": 128005, "train_speed(iter/s)": 0.411759 }, { "acc": 0.94267359, "epoch": 3.4660060108845747, "grad_norm": 3.4672937393188477, "learning_rate": 4.803351198061057e-07, "loss": 0.37357609, "memory(GiB)": 34.88, "step": 128010, "train_speed(iter/s)": 0.411759 }, { "acc": 0.94218941, "epoch": 3.46614139116779, "grad_norm": 10.209867477416992, "learning_rate": 4.800960625363711e-07, "loss": 0.40963454, "memory(GiB)": 34.88, "step": 128015, "train_speed(iter/s)": 0.41176 }, { "acc": 0.93966799, "epoch": 3.466276771451006, "grad_norm": 7.750201225280762, "learning_rate": 4.798570618935779e-07, "loss": 0.38697505, "memory(GiB)": 34.88, "step": 128020, "train_speed(iter/s)": 0.411761 }, { "acc": 0.9535635, "epoch": 3.4664121517342212, "grad_norm": 5.390375137329102, "learning_rate": 4.796181178807212e-07, "loss": 0.26507881, "memory(GiB)": 34.88, "step": 128025, "train_speed(iter/s)": 0.411762 }, { "acc": 0.9510747, "epoch": 3.466547532017437, "grad_norm": 5.030753135681152, "learning_rate": 4.793792305007928e-07, "loss": 0.28535445, "memory(GiB)": 34.88, "step": 128030, "train_speed(iter/s)": 0.411763 }, { "acc": 0.95790405, "epoch": 3.4666829123006524, "grad_norm": 4.97776985168457, "learning_rate": 4.791403997567863e-07, "loss": 0.2581588, "memory(GiB)": 34.88, "step": 128035, "train_speed(iter/s)": 0.411764 }, { "acc": 0.94479427, "epoch": 3.466818292583868, "grad_norm": 9.090542793273926, "learning_rate": 4.789016256516936e-07, "loss": 0.34892716, "memory(GiB)": 34.88, "step": 128040, "train_speed(iter/s)": 0.411764 }, { "acc": 0.94119854, "epoch": 3.4669536728670836, "grad_norm": 22.01491355895996, "learning_rate": 4.786629081885039e-07, "loss": 0.34901466, "memory(GiB)": 34.88, "step": 128045, "train_speed(iter/s)": 0.411765 }, { "acc": 0.95068665, "epoch": 3.4670890531502994, "grad_norm": 8.529462814331055, "learning_rate": 4.784242473702107e-07, "loss": 0.28733468, "memory(GiB)": 34.88, "step": 128050, "train_speed(iter/s)": 0.411766 }, { "acc": 0.93543739, "epoch": 3.4672244334335147, "grad_norm": 56.51593017578125, "learning_rate": 4.781856431998017e-07, "loss": 0.42303467, "memory(GiB)": 34.88, "step": 128055, "train_speed(iter/s)": 0.411767 }, { "acc": 0.94593697, "epoch": 3.4673598137167305, "grad_norm": 10.460261344909668, "learning_rate": 4.779470956802672e-07, "loss": 0.37691891, "memory(GiB)": 34.88, "step": 128060, "train_speed(iter/s)": 0.411768 }, { "acc": 0.9404295, "epoch": 3.467495193999946, "grad_norm": 22.790925979614258, "learning_rate": 4.777086048145926e-07, "loss": 0.36137938, "memory(GiB)": 34.88, "step": 128065, "train_speed(iter/s)": 0.411769 }, { "acc": 0.95529537, "epoch": 3.4676305742831612, "grad_norm": 4.295923233032227, "learning_rate": 4.774701706057698e-07, "loss": 0.24040356, "memory(GiB)": 34.88, "step": 128070, "train_speed(iter/s)": 0.411769 }, { "acc": 0.95742111, "epoch": 3.467765954566377, "grad_norm": 6.284554958343506, "learning_rate": 4.772317930567826e-07, "loss": 0.26379743, "memory(GiB)": 34.88, "step": 128075, "train_speed(iter/s)": 0.41177 }, { "acc": 0.9329402, "epoch": 3.4679013348495924, "grad_norm": 7.381379127502441, "learning_rate": 4.769934721706163e-07, "loss": 0.37928872, "memory(GiB)": 34.88, "step": 128080, "train_speed(iter/s)": 0.411771 }, { "acc": 0.94553261, "epoch": 3.468036715132808, "grad_norm": 7.249821662902832, "learning_rate": 4.767552079502605e-07, "loss": 0.29584205, "memory(GiB)": 34.88, "step": 128085, "train_speed(iter/s)": 0.411772 }, { "acc": 0.95049286, "epoch": 3.4681720954160236, "grad_norm": 5.167539119720459, "learning_rate": 4.7651700039869555e-07, "loss": 0.36821725, "memory(GiB)": 34.88, "step": 128090, "train_speed(iter/s)": 0.411773 }, { "acc": 0.9431263, "epoch": 3.4683074756992394, "grad_norm": 6.79771614074707, "learning_rate": 4.762788495189084e-07, "loss": 0.35051939, "memory(GiB)": 34.88, "step": 128095, "train_speed(iter/s)": 0.411774 }, { "acc": 0.94072714, "epoch": 3.4684428559824547, "grad_norm": 7.529847621917725, "learning_rate": 4.7604075531387976e-07, "loss": 0.3360208, "memory(GiB)": 34.88, "step": 128100, "train_speed(iter/s)": 0.411774 }, { "acc": 0.92575531, "epoch": 3.46857823626567, "grad_norm": 8.500894546508789, "learning_rate": 4.758027177865951e-07, "loss": 0.49401264, "memory(GiB)": 34.88, "step": 128105, "train_speed(iter/s)": 0.411775 }, { "acc": 0.94785976, "epoch": 3.468713616548886, "grad_norm": 6.121938228607178, "learning_rate": 4.7556473694003407e-07, "loss": 0.31242404, "memory(GiB)": 34.88, "step": 128110, "train_speed(iter/s)": 0.411776 }, { "acc": 0.95306416, "epoch": 3.4688489968321012, "grad_norm": 4.12894344329834, "learning_rate": 4.75326812777179e-07, "loss": 0.24264264, "memory(GiB)": 34.88, "step": 128115, "train_speed(iter/s)": 0.411777 }, { "acc": 0.93897829, "epoch": 3.468984377115317, "grad_norm": 7.708547115325928, "learning_rate": 4.7508894530101104e-07, "loss": 0.33036413, "memory(GiB)": 34.88, "step": 128120, "train_speed(iter/s)": 0.411778 }, { "acc": 0.95257721, "epoch": 3.4691197573985324, "grad_norm": 3.9840593338012695, "learning_rate": 4.7485113451450754e-07, "loss": 0.20871358, "memory(GiB)": 34.88, "step": 128125, "train_speed(iter/s)": 0.411779 }, { "acc": 0.95220833, "epoch": 3.469255137681748, "grad_norm": 4.933618068695068, "learning_rate": 4.746133804206502e-07, "loss": 0.25587354, "memory(GiB)": 34.88, "step": 128130, "train_speed(iter/s)": 0.41178 }, { "acc": 0.94462156, "epoch": 3.4693905179649636, "grad_norm": 7.2085280418396, "learning_rate": 4.7437568302241424e-07, "loss": 0.32016315, "memory(GiB)": 34.88, "step": 128135, "train_speed(iter/s)": 0.41178 }, { "acc": 0.94080334, "epoch": 3.469525898248179, "grad_norm": 10.159807205200195, "learning_rate": 4.741380423227807e-07, "loss": 0.38854647, "memory(GiB)": 34.88, "step": 128140, "train_speed(iter/s)": 0.411781 }, { "acc": 0.95399303, "epoch": 3.4696612785313947, "grad_norm": 3.80608868598938, "learning_rate": 4.7390045832472426e-07, "loss": 0.23977323, "memory(GiB)": 34.88, "step": 128145, "train_speed(iter/s)": 0.411782 }, { "acc": 0.95319891, "epoch": 3.46979665881461, "grad_norm": 3.090799570083618, "learning_rate": 4.736629310312223e-07, "loss": 0.30012715, "memory(GiB)": 34.88, "step": 128150, "train_speed(iter/s)": 0.411783 }, { "acc": 0.95057182, "epoch": 3.469932039097826, "grad_norm": 4.606196403503418, "learning_rate": 4.7342546044525025e-07, "loss": 0.28337753, "memory(GiB)": 34.88, "step": 128155, "train_speed(iter/s)": 0.411784 }, { "acc": 0.94807987, "epoch": 3.4700674193810412, "grad_norm": 8.273260116577148, "learning_rate": 4.731880465697818e-07, "loss": 0.26150634, "memory(GiB)": 34.88, "step": 128160, "train_speed(iter/s)": 0.411785 }, { "acc": 0.96130047, "epoch": 3.470202799664257, "grad_norm": 6.07614278793335, "learning_rate": 4.7295068940779233e-07, "loss": 0.25857337, "memory(GiB)": 34.88, "step": 128165, "train_speed(iter/s)": 0.411785 }, { "acc": 0.95934896, "epoch": 3.4703381799474724, "grad_norm": 7.335610866546631, "learning_rate": 4.727133889622533e-07, "loss": 0.25628796, "memory(GiB)": 34.88, "step": 128170, "train_speed(iter/s)": 0.411786 }, { "acc": 0.94767647, "epoch": 3.4704735602306878, "grad_norm": 5.0498762130737305, "learning_rate": 4.724761452361397e-07, "loss": 0.27889333, "memory(GiB)": 34.88, "step": 128175, "train_speed(iter/s)": 0.411787 }, { "acc": 0.94175501, "epoch": 3.4706089405139036, "grad_norm": 5.3774566650390625, "learning_rate": 4.7223895823242217e-07, "loss": 0.3037725, "memory(GiB)": 34.88, "step": 128180, "train_speed(iter/s)": 0.411788 }, { "acc": 0.93977728, "epoch": 3.470744320797119, "grad_norm": 11.971994400024414, "learning_rate": 4.7200182795407197e-07, "loss": 0.39725142, "memory(GiB)": 34.88, "step": 128185, "train_speed(iter/s)": 0.411789 }, { "acc": 0.95223808, "epoch": 3.4708797010803347, "grad_norm": 3.565052032470703, "learning_rate": 4.7176475440405973e-07, "loss": 0.27967885, "memory(GiB)": 34.88, "step": 128190, "train_speed(iter/s)": 0.411789 }, { "acc": 0.95673313, "epoch": 3.47101508136355, "grad_norm": 4.067650318145752, "learning_rate": 4.715277375853567e-07, "loss": 0.2132112, "memory(GiB)": 34.88, "step": 128195, "train_speed(iter/s)": 0.41179 }, { "acc": 0.94328251, "epoch": 3.471150461646766, "grad_norm": 3.3058836460113525, "learning_rate": 4.712907775009302e-07, "loss": 0.32057347, "memory(GiB)": 34.88, "step": 128200, "train_speed(iter/s)": 0.411791 }, { "acc": 0.95669289, "epoch": 3.4712858419299812, "grad_norm": 5.726132392883301, "learning_rate": 4.710538741537475e-07, "loss": 0.255265, "memory(GiB)": 34.88, "step": 128205, "train_speed(iter/s)": 0.411792 }, { "acc": 0.92759094, "epoch": 3.471421222213197, "grad_norm": 10.696075439453125, "learning_rate": 4.7081702754677993e-07, "loss": 0.47032728, "memory(GiB)": 34.88, "step": 128210, "train_speed(iter/s)": 0.411793 }, { "acc": 0.9412569, "epoch": 3.4715566024964124, "grad_norm": 5.275152683258057, "learning_rate": 4.7058023768299086e-07, "loss": 0.33918047, "memory(GiB)": 34.88, "step": 128215, "train_speed(iter/s)": 0.411794 }, { "acc": 0.95082884, "epoch": 3.471691982779628, "grad_norm": 6.594237804412842, "learning_rate": 4.703435045653487e-07, "loss": 0.30491295, "memory(GiB)": 34.88, "step": 128220, "train_speed(iter/s)": 0.411794 }, { "acc": 0.95092106, "epoch": 3.4718273630628436, "grad_norm": 7.355785846710205, "learning_rate": 4.701068281968181e-07, "loss": 0.22523265, "memory(GiB)": 34.88, "step": 128225, "train_speed(iter/s)": 0.411795 }, { "acc": 0.95251808, "epoch": 3.471962743346059, "grad_norm": 3.5848684310913086, "learning_rate": 4.698702085803652e-07, "loss": 0.26615243, "memory(GiB)": 34.88, "step": 128230, "train_speed(iter/s)": 0.411796 }, { "acc": 0.94486704, "epoch": 3.4720981236292747, "grad_norm": 10.910737991333008, "learning_rate": 4.696336457189523e-07, "loss": 0.28227458, "memory(GiB)": 34.88, "step": 128235, "train_speed(iter/s)": 0.411796 }, { "acc": 0.9541975, "epoch": 3.47223350391249, "grad_norm": 5.652239799499512, "learning_rate": 4.6939713961554345e-07, "loss": 0.22281413, "memory(GiB)": 34.88, "step": 128240, "train_speed(iter/s)": 0.411797 }, { "acc": 0.94978714, "epoch": 3.472368884195706, "grad_norm": 4.299870014190674, "learning_rate": 4.691606902731022e-07, "loss": 0.35088172, "memory(GiB)": 34.88, "step": 128245, "train_speed(iter/s)": 0.411798 }, { "acc": 0.93016777, "epoch": 3.4725042644789212, "grad_norm": 6.4554595947265625, "learning_rate": 4.689242976945891e-07, "loss": 0.42073069, "memory(GiB)": 34.88, "step": 128250, "train_speed(iter/s)": 0.411799 }, { "acc": 0.93716459, "epoch": 3.472639644762137, "grad_norm": 4.619873046875, "learning_rate": 4.6868796188296754e-07, "loss": 0.42255893, "memory(GiB)": 34.88, "step": 128255, "train_speed(iter/s)": 0.4118 }, { "acc": 0.95839195, "epoch": 3.4727750250453524, "grad_norm": 2.5946528911590576, "learning_rate": 4.684516828411944e-07, "loss": 0.29249866, "memory(GiB)": 34.88, "step": 128260, "train_speed(iter/s)": 0.411801 }, { "acc": 0.95379887, "epoch": 3.4729104053285678, "grad_norm": 6.637983322143555, "learning_rate": 4.682154605722342e-07, "loss": 0.28945003, "memory(GiB)": 34.88, "step": 128265, "train_speed(iter/s)": 0.411802 }, { "acc": 0.9452878, "epoch": 3.4730457856117836, "grad_norm": 7.588726997375488, "learning_rate": 4.6797929507904265e-07, "loss": 0.29544153, "memory(GiB)": 34.88, "step": 128270, "train_speed(iter/s)": 0.411802 }, { "acc": 0.94831133, "epoch": 3.473181165894999, "grad_norm": 5.586009979248047, "learning_rate": 4.677431863645793e-07, "loss": 0.27631316, "memory(GiB)": 34.88, "step": 128275, "train_speed(iter/s)": 0.411803 }, { "acc": 0.94314508, "epoch": 3.4733165461782147, "grad_norm": 7.7453179359436035, "learning_rate": 4.6750713443180254e-07, "loss": 0.35520663, "memory(GiB)": 34.88, "step": 128280, "train_speed(iter/s)": 0.411804 }, { "acc": 0.9524806, "epoch": 3.47345192646143, "grad_norm": 7.7509684562683105, "learning_rate": 4.6727113928366813e-07, "loss": 0.24870992, "memory(GiB)": 34.88, "step": 128285, "train_speed(iter/s)": 0.411805 }, { "acc": 0.94686604, "epoch": 3.473587306744646, "grad_norm": 11.961711883544922, "learning_rate": 4.670352009231338e-07, "loss": 0.27256894, "memory(GiB)": 34.88, "step": 128290, "train_speed(iter/s)": 0.411806 }, { "acc": 0.93336315, "epoch": 3.4737226870278612, "grad_norm": 8.781730651855469, "learning_rate": 4.667993193531521e-07, "loss": 0.39139709, "memory(GiB)": 34.88, "step": 128295, "train_speed(iter/s)": 0.411807 }, { "acc": 0.93904476, "epoch": 3.4738580673110766, "grad_norm": 16.961936950683594, "learning_rate": 4.665634945766825e-07, "loss": 0.39122784, "memory(GiB)": 34.88, "step": 128300, "train_speed(iter/s)": 0.411808 }, { "acc": 0.95675144, "epoch": 3.4739934475942924, "grad_norm": 5.790592193603516, "learning_rate": 4.663277265966757e-07, "loss": 0.19808145, "memory(GiB)": 34.88, "step": 128305, "train_speed(iter/s)": 0.411809 }, { "acc": 0.94453831, "epoch": 3.4741288278775078, "grad_norm": 26.66189956665039, "learning_rate": 4.6609201541608674e-07, "loss": 0.36373568, "memory(GiB)": 34.88, "step": 128310, "train_speed(iter/s)": 0.411809 }, { "acc": 0.96011686, "epoch": 3.4742642081607236, "grad_norm": 8.137051582336426, "learning_rate": 4.6585636103786745e-07, "loss": 0.20532436, "memory(GiB)": 34.88, "step": 128315, "train_speed(iter/s)": 0.41181 }, { "acc": 0.92941427, "epoch": 3.474399588443939, "grad_norm": 6.618053436279297, "learning_rate": 4.6562076346497077e-07, "loss": 0.51064291, "memory(GiB)": 34.88, "step": 128320, "train_speed(iter/s)": 0.411811 }, { "acc": 0.93640099, "epoch": 3.4745349687271547, "grad_norm": 9.325772285461426, "learning_rate": 4.6538522270034843e-07, "loss": 0.32796254, "memory(GiB)": 34.88, "step": 128325, "train_speed(iter/s)": 0.411812 }, { "acc": 0.94641237, "epoch": 3.47467034901037, "grad_norm": 36.32184982299805, "learning_rate": 4.6514973874694786e-07, "loss": 0.32244067, "memory(GiB)": 34.88, "step": 128330, "train_speed(iter/s)": 0.411813 }, { "acc": 0.95189476, "epoch": 3.4748057292935854, "grad_norm": 6.8122687339782715, "learning_rate": 4.6491431160772297e-07, "loss": 0.28364835, "memory(GiB)": 34.88, "step": 128335, "train_speed(iter/s)": 0.411814 }, { "acc": 0.96229877, "epoch": 3.4749411095768012, "grad_norm": 2.652284622192383, "learning_rate": 4.6467894128562054e-07, "loss": 0.2314383, "memory(GiB)": 34.88, "step": 128340, "train_speed(iter/s)": 0.411815 }, { "acc": 0.94321642, "epoch": 3.4750764898600166, "grad_norm": 22.936254501342773, "learning_rate": 4.644436277835897e-07, "loss": 0.36614387, "memory(GiB)": 34.88, "step": 128345, "train_speed(iter/s)": 0.411815 }, { "acc": 0.94460144, "epoch": 3.4752118701432324, "grad_norm": 6.848130226135254, "learning_rate": 4.6420837110457836e-07, "loss": 0.35465508, "memory(GiB)": 34.88, "step": 128350, "train_speed(iter/s)": 0.411816 }, { "acc": 0.93519516, "epoch": 3.4753472504264478, "grad_norm": 6.81300687789917, "learning_rate": 4.639731712515343e-07, "loss": 0.41524878, "memory(GiB)": 34.88, "step": 128355, "train_speed(iter/s)": 0.411817 }, { "acc": 0.94078159, "epoch": 3.4754826307096636, "grad_norm": 4.743553638458252, "learning_rate": 4.637380282274015e-07, "loss": 0.3148756, "memory(GiB)": 34.88, "step": 128360, "train_speed(iter/s)": 0.411818 }, { "acc": 0.95151081, "epoch": 3.475618010992879, "grad_norm": 7.3004302978515625, "learning_rate": 4.63502942035128e-07, "loss": 0.36845067, "memory(GiB)": 34.88, "step": 128365, "train_speed(iter/s)": 0.411819 }, { "acc": 0.93455219, "epoch": 3.4757533912760947, "grad_norm": 6.842519283294678, "learning_rate": 4.6326791267765843e-07, "loss": 0.35289912, "memory(GiB)": 34.88, "step": 128370, "train_speed(iter/s)": 0.41182 }, { "acc": 0.92712593, "epoch": 3.47588877155931, "grad_norm": 4.090951442718506, "learning_rate": 4.6303294015793544e-07, "loss": 0.42733903, "memory(GiB)": 34.88, "step": 128375, "train_speed(iter/s)": 0.41182 }, { "acc": 0.93715315, "epoch": 3.476024151842526, "grad_norm": 6.14592981338501, "learning_rate": 4.627980244789033e-07, "loss": 0.35599077, "memory(GiB)": 34.88, "step": 128380, "train_speed(iter/s)": 0.411821 }, { "acc": 0.94042282, "epoch": 3.4761595321257412, "grad_norm": 4.059840202331543, "learning_rate": 4.6256316564350525e-07, "loss": 0.3749373, "memory(GiB)": 34.88, "step": 128385, "train_speed(iter/s)": 0.411822 }, { "acc": 0.92786007, "epoch": 3.4762949124089566, "grad_norm": 11.49419116973877, "learning_rate": 4.6232836365468386e-07, "loss": 0.46253891, "memory(GiB)": 34.88, "step": 128390, "train_speed(iter/s)": 0.411823 }, { "acc": 0.94397964, "epoch": 3.4764302926921724, "grad_norm": 4.79263162612915, "learning_rate": 4.620936185153792e-07, "loss": 0.30647867, "memory(GiB)": 34.88, "step": 128395, "train_speed(iter/s)": 0.411824 }, { "acc": 0.95336237, "epoch": 3.4765656729753878, "grad_norm": 6.277724742889404, "learning_rate": 4.618589302285318e-07, "loss": 0.29561815, "memory(GiB)": 34.88, "step": 128400, "train_speed(iter/s)": 0.411824 }, { "acc": 0.94164572, "epoch": 3.4767010532586036, "grad_norm": 5.610298156738281, "learning_rate": 4.616242987970825e-07, "loss": 0.40893097, "memory(GiB)": 34.88, "step": 128405, "train_speed(iter/s)": 0.411825 }, { "acc": 0.94779816, "epoch": 3.476836433541819, "grad_norm": 4.176453590393066, "learning_rate": 4.613897242239715e-07, "loss": 0.27941165, "memory(GiB)": 34.88, "step": 128410, "train_speed(iter/s)": 0.411826 }, { "acc": 0.94559193, "epoch": 3.4769718138250347, "grad_norm": 12.360295295715332, "learning_rate": 4.611552065121348e-07, "loss": 0.37419753, "memory(GiB)": 34.88, "step": 128415, "train_speed(iter/s)": 0.411827 }, { "acc": 0.94578018, "epoch": 3.47710719410825, "grad_norm": 8.219232559204102, "learning_rate": 4.609207456645111e-07, "loss": 0.32606885, "memory(GiB)": 34.88, "step": 128420, "train_speed(iter/s)": 0.411828 }, { "acc": 0.95014105, "epoch": 3.4772425743914654, "grad_norm": 5.377023220062256, "learning_rate": 4.606863416840393e-07, "loss": 0.29338002, "memory(GiB)": 34.88, "step": 128425, "train_speed(iter/s)": 0.411829 }, { "acc": 0.94626751, "epoch": 3.4773779546746812, "grad_norm": 7.3774824142456055, "learning_rate": 4.604519945736529e-07, "loss": 0.33253787, "memory(GiB)": 34.88, "step": 128430, "train_speed(iter/s)": 0.41183 }, { "acc": 0.95099669, "epoch": 3.4775133349578966, "grad_norm": 4.385562419891357, "learning_rate": 4.602177043362893e-07, "loss": 0.32566071, "memory(GiB)": 34.88, "step": 128435, "train_speed(iter/s)": 0.41183 }, { "acc": 0.9557354, "epoch": 3.4776487152411124, "grad_norm": 4.317622661590576, "learning_rate": 4.59983470974883e-07, "loss": 0.25723228, "memory(GiB)": 34.88, "step": 128440, "train_speed(iter/s)": 0.411831 }, { "acc": 0.96255932, "epoch": 3.4777840955243278, "grad_norm": 5.12590217590332, "learning_rate": 4.597492944923692e-07, "loss": 0.1975126, "memory(GiB)": 34.88, "step": 128445, "train_speed(iter/s)": 0.411832 }, { "acc": 0.95050907, "epoch": 3.4779194758075436, "grad_norm": 11.82686996459961, "learning_rate": 4.595151748916796e-07, "loss": 0.29745793, "memory(GiB)": 34.88, "step": 128450, "train_speed(iter/s)": 0.411833 }, { "acc": 0.95508451, "epoch": 3.478054856090759, "grad_norm": 4.297714710235596, "learning_rate": 4.592811121757479e-07, "loss": 0.26726658, "memory(GiB)": 34.88, "step": 128455, "train_speed(iter/s)": 0.411834 }, { "acc": 0.94640465, "epoch": 3.4781902363739743, "grad_norm": 11.27206802368164, "learning_rate": 4.5904710634750735e-07, "loss": 0.40142469, "memory(GiB)": 34.88, "step": 128460, "train_speed(iter/s)": 0.411834 }, { "acc": 0.94342117, "epoch": 3.47832561665719, "grad_norm": 4.056872844696045, "learning_rate": 4.5881315740988706e-07, "loss": 0.35825171, "memory(GiB)": 34.88, "step": 128465, "train_speed(iter/s)": 0.411835 }, { "acc": 0.93921576, "epoch": 3.4784609969404054, "grad_norm": 3.0855050086975098, "learning_rate": 4.585792653658188e-07, "loss": 0.35683022, "memory(GiB)": 34.88, "step": 128470, "train_speed(iter/s)": 0.411836 }, { "acc": 0.9544508, "epoch": 3.4785963772236212, "grad_norm": 5.5566511154174805, "learning_rate": 4.583454302182333e-07, "loss": 0.24821458, "memory(GiB)": 34.88, "step": 128475, "train_speed(iter/s)": 0.411837 }, { "acc": 0.95416851, "epoch": 3.4787317575068366, "grad_norm": 9.413473129272461, "learning_rate": 4.581116519700591e-07, "loss": 0.2425638, "memory(GiB)": 34.88, "step": 128480, "train_speed(iter/s)": 0.411838 }, { "acc": 0.94405375, "epoch": 3.4788671377900524, "grad_norm": 3.3411622047424316, "learning_rate": 4.578779306242245e-07, "loss": 0.34461117, "memory(GiB)": 34.88, "step": 128485, "train_speed(iter/s)": 0.411839 }, { "acc": 0.95390701, "epoch": 3.4790025180732678, "grad_norm": 8.220097541809082, "learning_rate": 4.5764426618365705e-07, "loss": 0.33529267, "memory(GiB)": 34.88, "step": 128490, "train_speed(iter/s)": 0.411839 }, { "acc": 0.94479866, "epoch": 3.479137898356483, "grad_norm": 12.251359939575195, "learning_rate": 4.5741065865128567e-07, "loss": 0.34910469, "memory(GiB)": 34.88, "step": 128495, "train_speed(iter/s)": 0.41184 }, { "acc": 0.9463913, "epoch": 3.479273278639699, "grad_norm": 5.744865417480469, "learning_rate": 4.571771080300345e-07, "loss": 0.33437169, "memory(GiB)": 34.88, "step": 128500, "train_speed(iter/s)": 0.411841 }, { "acc": 0.94345226, "epoch": 3.4794086589229143, "grad_norm": 7.211215496063232, "learning_rate": 4.569436143228302e-07, "loss": 0.37163146, "memory(GiB)": 34.88, "step": 128505, "train_speed(iter/s)": 0.411842 }, { "acc": 0.94210281, "epoch": 3.47954403920613, "grad_norm": 5.654488563537598, "learning_rate": 4.5671017753259745e-07, "loss": 0.40255051, "memory(GiB)": 34.88, "step": 128510, "train_speed(iter/s)": 0.411843 }, { "acc": 0.95162468, "epoch": 3.4796794194893454, "grad_norm": 5.7315144538879395, "learning_rate": 4.564767976622619e-07, "loss": 0.28410938, "memory(GiB)": 34.88, "step": 128515, "train_speed(iter/s)": 0.411843 }, { "acc": 0.96488705, "epoch": 3.4798147997725613, "grad_norm": 5.947656154632568, "learning_rate": 4.562434747147449e-07, "loss": 0.21219282, "memory(GiB)": 34.88, "step": 128520, "train_speed(iter/s)": 0.411844 }, { "acc": 0.95282583, "epoch": 3.4799501800557766, "grad_norm": 5.0947442054748535, "learning_rate": 4.5601020869297044e-07, "loss": 0.26349669, "memory(GiB)": 34.88, "step": 128525, "train_speed(iter/s)": 0.411845 }, { "acc": 0.96034737, "epoch": 3.4800855603389924, "grad_norm": 5.99109411239624, "learning_rate": 4.557769995998603e-07, "loss": 0.26560073, "memory(GiB)": 34.88, "step": 128530, "train_speed(iter/s)": 0.411846 }, { "acc": 0.92630615, "epoch": 3.4802209406222078, "grad_norm": 7.157195568084717, "learning_rate": 4.5554384743833695e-07, "loss": 0.52844114, "memory(GiB)": 34.88, "step": 128535, "train_speed(iter/s)": 0.411847 }, { "acc": 0.9538475, "epoch": 3.4803563209054236, "grad_norm": 4.816119194030762, "learning_rate": 4.5531075221131984e-07, "loss": 0.24895411, "memory(GiB)": 34.88, "step": 128540, "train_speed(iter/s)": 0.411847 }, { "acc": 0.95208721, "epoch": 3.480491701188639, "grad_norm": 8.817102432250977, "learning_rate": 4.5507771392172864e-07, "loss": 0.22533629, "memory(GiB)": 34.88, "step": 128545, "train_speed(iter/s)": 0.411848 }, { "acc": 0.95505428, "epoch": 3.4806270814718543, "grad_norm": 34.735389709472656, "learning_rate": 4.5484473257248466e-07, "loss": 0.26238451, "memory(GiB)": 34.88, "step": 128550, "train_speed(iter/s)": 0.411849 }, { "acc": 0.93535461, "epoch": 3.48076246175507, "grad_norm": 4.7001824378967285, "learning_rate": 4.546118081665041e-07, "loss": 0.40774546, "memory(GiB)": 34.88, "step": 128555, "train_speed(iter/s)": 0.41185 }, { "acc": 0.95282202, "epoch": 3.4808978420382855, "grad_norm": 7.2299065589904785, "learning_rate": 4.543789407067059e-07, "loss": 0.26192961, "memory(GiB)": 34.88, "step": 128560, "train_speed(iter/s)": 0.411851 }, { "acc": 0.95444479, "epoch": 3.4810332223215013, "grad_norm": 5.927936553955078, "learning_rate": 4.5414613019600715e-07, "loss": 0.22821167, "memory(GiB)": 34.88, "step": 128565, "train_speed(iter/s)": 0.411852 }, { "acc": 0.9533905, "epoch": 3.4811686026047166, "grad_norm": 8.483731269836426, "learning_rate": 4.5391337663732494e-07, "loss": 0.28756065, "memory(GiB)": 34.88, "step": 128570, "train_speed(iter/s)": 0.411852 }, { "acc": 0.94944649, "epoch": 3.4813039828879324, "grad_norm": 2.608196496963501, "learning_rate": 4.5368068003357353e-07, "loss": 0.295049, "memory(GiB)": 34.88, "step": 128575, "train_speed(iter/s)": 0.411853 }, { "acc": 0.94336271, "epoch": 3.4814393631711478, "grad_norm": 3.8799784183502197, "learning_rate": 4.534480403876679e-07, "loss": 0.34843254, "memory(GiB)": 34.88, "step": 128580, "train_speed(iter/s)": 0.411854 }, { "acc": 0.95580578, "epoch": 3.481574743454363, "grad_norm": 4.486792087554932, "learning_rate": 4.532154577025244e-07, "loss": 0.26713247, "memory(GiB)": 34.88, "step": 128585, "train_speed(iter/s)": 0.411855 }, { "acc": 0.93194427, "epoch": 3.481710123737579, "grad_norm": 5.116975784301758, "learning_rate": 4.5298293198105433e-07, "loss": 0.38223429, "memory(GiB)": 34.88, "step": 128590, "train_speed(iter/s)": 0.411855 }, { "acc": 0.93555546, "epoch": 3.4818455040207943, "grad_norm": 3.743319034576416, "learning_rate": 4.5275046322617115e-07, "loss": 0.31469882, "memory(GiB)": 34.88, "step": 128595, "train_speed(iter/s)": 0.411856 }, { "acc": 0.94510946, "epoch": 3.48198088430401, "grad_norm": 10.467317581176758, "learning_rate": 4.525180514407878e-07, "loss": 0.34785414, "memory(GiB)": 34.88, "step": 128600, "train_speed(iter/s)": 0.411857 }, { "acc": 0.95662479, "epoch": 3.4821162645872255, "grad_norm": 7.487768173217773, "learning_rate": 4.522856966278155e-07, "loss": 0.23514888, "memory(GiB)": 34.88, "step": 128605, "train_speed(iter/s)": 0.411858 }, { "acc": 0.94326954, "epoch": 3.4822516448704413, "grad_norm": 5.830087184906006, "learning_rate": 4.5205339879016286e-07, "loss": 0.35745838, "memory(GiB)": 34.88, "step": 128610, "train_speed(iter/s)": 0.411859 }, { "acc": 0.95671692, "epoch": 3.4823870251536566, "grad_norm": 9.503019332885742, "learning_rate": 4.518211579307438e-07, "loss": 0.23948429, "memory(GiB)": 34.88, "step": 128615, "train_speed(iter/s)": 0.41186 }, { "acc": 0.93660564, "epoch": 3.482522405436872, "grad_norm": 7.461243152618408, "learning_rate": 4.5158897405246455e-07, "loss": 0.4126492, "memory(GiB)": 34.88, "step": 128620, "train_speed(iter/s)": 0.41186 }, { "acc": 0.94419003, "epoch": 3.4826577857200878, "grad_norm": 5.4908552169799805, "learning_rate": 4.5135684715823437e-07, "loss": 0.34182019, "memory(GiB)": 34.88, "step": 128625, "train_speed(iter/s)": 0.411861 }, { "acc": 0.94227085, "epoch": 3.482793166003303, "grad_norm": 7.541215419769287, "learning_rate": 4.51124777250961e-07, "loss": 0.35051608, "memory(GiB)": 34.88, "step": 128630, "train_speed(iter/s)": 0.411862 }, { "acc": 0.95571003, "epoch": 3.482928546286519, "grad_norm": 3.500218629837036, "learning_rate": 4.508927643335513e-07, "loss": 0.27737465, "memory(GiB)": 34.88, "step": 128635, "train_speed(iter/s)": 0.411863 }, { "acc": 0.93895769, "epoch": 3.4830639265697343, "grad_norm": 7.83236837387085, "learning_rate": 4.506608084089139e-07, "loss": 0.41359549, "memory(GiB)": 34.88, "step": 128640, "train_speed(iter/s)": 0.411864 }, { "acc": 0.94427605, "epoch": 3.48319930685295, "grad_norm": 4.836952209472656, "learning_rate": 4.5042890947995047e-07, "loss": 0.34901977, "memory(GiB)": 34.88, "step": 128645, "train_speed(iter/s)": 0.411865 }, { "acc": 0.95403967, "epoch": 3.4833346871361655, "grad_norm": 3.6030256748199463, "learning_rate": 4.501970675495702e-07, "loss": 0.30073977, "memory(GiB)": 34.88, "step": 128650, "train_speed(iter/s)": 0.411866 }, { "acc": 0.95295258, "epoch": 3.483470067419381, "grad_norm": 2.3898448944091797, "learning_rate": 4.499652826206743e-07, "loss": 0.25382872, "memory(GiB)": 34.88, "step": 128655, "train_speed(iter/s)": 0.411866 }, { "acc": 0.94019909, "epoch": 3.4836054477025966, "grad_norm": 4.275092601776123, "learning_rate": 4.49733554696169e-07, "loss": 0.35156779, "memory(GiB)": 34.88, "step": 128660, "train_speed(iter/s)": 0.411867 }, { "acc": 0.96846924, "epoch": 3.483740827985812, "grad_norm": 3.9707577228546143, "learning_rate": 4.495018837789545e-07, "loss": 0.21058736, "memory(GiB)": 34.88, "step": 128665, "train_speed(iter/s)": 0.411868 }, { "acc": 0.94337559, "epoch": 3.4838762082690278, "grad_norm": 4.298546314239502, "learning_rate": 4.492702698719343e-07, "loss": 0.35597417, "memory(GiB)": 34.88, "step": 128670, "train_speed(iter/s)": 0.411869 }, { "acc": 0.9582037, "epoch": 3.484011588552243, "grad_norm": 4.180543899536133, "learning_rate": 4.490387129780102e-07, "loss": 0.27371516, "memory(GiB)": 34.88, "step": 128675, "train_speed(iter/s)": 0.41187 }, { "acc": 0.94421558, "epoch": 3.484146968835459, "grad_norm": 4.456584453582764, "learning_rate": 4.488072131000808e-07, "loss": 0.28300474, "memory(GiB)": 34.88, "step": 128680, "train_speed(iter/s)": 0.411871 }, { "acc": 0.93912868, "epoch": 3.4842823491186743, "grad_norm": 11.255060195922852, "learning_rate": 4.4857577024104897e-07, "loss": 0.39103568, "memory(GiB)": 34.88, "step": 128685, "train_speed(iter/s)": 0.411871 }, { "acc": 0.95118761, "epoch": 3.48441772940189, "grad_norm": 4.006499767303467, "learning_rate": 4.4834438440381213e-07, "loss": 0.32887712, "memory(GiB)": 34.88, "step": 128690, "train_speed(iter/s)": 0.411872 }, { "acc": 0.95636635, "epoch": 3.4845531096851055, "grad_norm": 6.03663444519043, "learning_rate": 4.481130555912704e-07, "loss": 0.29415729, "memory(GiB)": 34.88, "step": 128695, "train_speed(iter/s)": 0.411873 }, { "acc": 0.93940945, "epoch": 3.4846884899683213, "grad_norm": 5.260683059692383, "learning_rate": 4.47881783806319e-07, "loss": 0.36629937, "memory(GiB)": 34.88, "step": 128700, "train_speed(iter/s)": 0.411874 }, { "acc": 0.94581099, "epoch": 3.4848238702515366, "grad_norm": 7.817267417907715, "learning_rate": 4.4765056905185754e-07, "loss": 0.31328638, "memory(GiB)": 34.88, "step": 128705, "train_speed(iter/s)": 0.411875 }, { "acc": 0.94564762, "epoch": 3.484959250534752, "grad_norm": 5.130600929260254, "learning_rate": 4.474194113307817e-07, "loss": 0.328513, "memory(GiB)": 34.88, "step": 128710, "train_speed(iter/s)": 0.411876 }, { "acc": 0.94016619, "epoch": 3.4850946308179678, "grad_norm": 6.157345294952393, "learning_rate": 4.471883106459856e-07, "loss": 0.41143727, "memory(GiB)": 34.88, "step": 128715, "train_speed(iter/s)": 0.411877 }, { "acc": 0.9562006, "epoch": 3.485230011101183, "grad_norm": 6.561509609222412, "learning_rate": 4.469572670003672e-07, "loss": 0.295856, "memory(GiB)": 34.88, "step": 128720, "train_speed(iter/s)": 0.411877 }, { "acc": 0.95217381, "epoch": 3.485365391384399, "grad_norm": 7.412638187408447, "learning_rate": 4.4672628039681824e-07, "loss": 0.29642355, "memory(GiB)": 34.88, "step": 128725, "train_speed(iter/s)": 0.411878 }, { "acc": 0.94639063, "epoch": 3.4855007716676143, "grad_norm": 7.828482627868652, "learning_rate": 4.464953508382345e-07, "loss": 0.33720379, "memory(GiB)": 34.88, "step": 128730, "train_speed(iter/s)": 0.411879 }, { "acc": 0.94654713, "epoch": 3.48563615195083, "grad_norm": 12.376731872558594, "learning_rate": 4.462644783275057e-07, "loss": 0.30485816, "memory(GiB)": 34.88, "step": 128735, "train_speed(iter/s)": 0.41188 }, { "acc": 0.94613533, "epoch": 3.4857715322340455, "grad_norm": 9.404585838317871, "learning_rate": 4.460336628675279e-07, "loss": 0.33245111, "memory(GiB)": 34.88, "step": 128740, "train_speed(iter/s)": 0.411881 }, { "acc": 0.95119343, "epoch": 3.485906912517261, "grad_norm": 6.224625587463379, "learning_rate": 4.458029044611899e-07, "loss": 0.30666497, "memory(GiB)": 34.88, "step": 128745, "train_speed(iter/s)": 0.411881 }, { "acc": 0.95649557, "epoch": 3.4860422928004766, "grad_norm": 2.644517421722412, "learning_rate": 4.4557220311138164e-07, "loss": 0.25945697, "memory(GiB)": 34.88, "step": 128750, "train_speed(iter/s)": 0.411882 }, { "acc": 0.94938974, "epoch": 3.486177673083692, "grad_norm": 7.022365570068359, "learning_rate": 4.4534155882099614e-07, "loss": 0.3202379, "memory(GiB)": 34.88, "step": 128755, "train_speed(iter/s)": 0.411883 }, { "acc": 0.94935093, "epoch": 3.4863130533669078, "grad_norm": 8.500235557556152, "learning_rate": 4.451109715929203e-07, "loss": 0.31605968, "memory(GiB)": 34.88, "step": 128760, "train_speed(iter/s)": 0.411884 }, { "acc": 0.9438179, "epoch": 3.486448433650123, "grad_norm": 8.26054859161377, "learning_rate": 4.4488044143004367e-07, "loss": 0.34895542, "memory(GiB)": 34.88, "step": 128765, "train_speed(iter/s)": 0.411885 }, { "acc": 0.95856228, "epoch": 3.486583813933339, "grad_norm": 7.062391757965088, "learning_rate": 4.446499683352526e-07, "loss": 0.2138504, "memory(GiB)": 34.88, "step": 128770, "train_speed(iter/s)": 0.411886 }, { "acc": 0.94631701, "epoch": 3.4867191942165543, "grad_norm": 3.604433298110962, "learning_rate": 4.4441955231143667e-07, "loss": 0.27455173, "memory(GiB)": 34.88, "step": 128775, "train_speed(iter/s)": 0.411886 }, { "acc": 0.95331783, "epoch": 3.4868545744997697, "grad_norm": 4.113994598388672, "learning_rate": 4.4418919336148057e-07, "loss": 0.2732425, "memory(GiB)": 34.88, "step": 128780, "train_speed(iter/s)": 0.411887 }, { "acc": 0.95609303, "epoch": 3.4869899547829855, "grad_norm": 5.714982509613037, "learning_rate": 4.439588914882705e-07, "loss": 0.31314888, "memory(GiB)": 34.88, "step": 128785, "train_speed(iter/s)": 0.411888 }, { "acc": 0.95098038, "epoch": 3.487125335066201, "grad_norm": 4.315598964691162, "learning_rate": 4.437286466946917e-07, "loss": 0.2920939, "memory(GiB)": 34.88, "step": 128790, "train_speed(iter/s)": 0.411889 }, { "acc": 0.96239986, "epoch": 3.4872607153494166, "grad_norm": 4.6143317222595215, "learning_rate": 4.434984589836277e-07, "loss": 0.22796474, "memory(GiB)": 34.88, "step": 128795, "train_speed(iter/s)": 0.41189 }, { "acc": 0.95066118, "epoch": 3.487396095632632, "grad_norm": 10.30016040802002, "learning_rate": 4.432683283579636e-07, "loss": 0.35273051, "memory(GiB)": 34.88, "step": 128800, "train_speed(iter/s)": 0.41189 }, { "acc": 0.95478106, "epoch": 3.487531475915848, "grad_norm": 3.6347148418426514, "learning_rate": 4.430382548205791e-07, "loss": 0.23162291, "memory(GiB)": 34.88, "step": 128805, "train_speed(iter/s)": 0.411891 }, { "acc": 0.95775747, "epoch": 3.487666856199063, "grad_norm": 6.77178955078125, "learning_rate": 4.428082383743599e-07, "loss": 0.24233682, "memory(GiB)": 34.88, "step": 128810, "train_speed(iter/s)": 0.411892 }, { "acc": 0.94207993, "epoch": 3.4878022364822785, "grad_norm": 6.8832268714904785, "learning_rate": 4.425782790221851e-07, "loss": 0.37515507, "memory(GiB)": 34.88, "step": 128815, "train_speed(iter/s)": 0.411893 }, { "acc": 0.95551901, "epoch": 3.4879376167654943, "grad_norm": 7.260143756866455, "learning_rate": 4.423483767669371e-07, "loss": 0.27773218, "memory(GiB)": 34.88, "step": 128820, "train_speed(iter/s)": 0.411894 }, { "acc": 0.94622803, "epoch": 3.4880729970487097, "grad_norm": 5.335312843322754, "learning_rate": 4.421185316114939e-07, "loss": 0.26392598, "memory(GiB)": 34.88, "step": 128825, "train_speed(iter/s)": 0.411895 }, { "acc": 0.92681274, "epoch": 3.4882083773319255, "grad_norm": 6.661509037017822, "learning_rate": 4.4188874355873617e-07, "loss": 0.44361658, "memory(GiB)": 34.88, "step": 128830, "train_speed(iter/s)": 0.411895 }, { "acc": 0.93588753, "epoch": 3.488343757615141, "grad_norm": 9.711447715759277, "learning_rate": 4.4165901261154303e-07, "loss": 0.38506081, "memory(GiB)": 34.88, "step": 128835, "train_speed(iter/s)": 0.411896 }, { "acc": 0.9482604, "epoch": 3.4884791378983566, "grad_norm": 6.552320957183838, "learning_rate": 4.4142933877278917e-07, "loss": 0.35999627, "memory(GiB)": 34.88, "step": 128840, "train_speed(iter/s)": 0.411897 }, { "acc": 0.93921165, "epoch": 3.488614518181572, "grad_norm": 12.534238815307617, "learning_rate": 4.411997220453557e-07, "loss": 0.32801015, "memory(GiB)": 34.88, "step": 128845, "train_speed(iter/s)": 0.411898 }, { "acc": 0.94849815, "epoch": 3.488749898464788, "grad_norm": 10.516764640808105, "learning_rate": 4.409701624321163e-07, "loss": 0.29266081, "memory(GiB)": 34.88, "step": 128850, "train_speed(iter/s)": 0.411899 }, { "acc": 0.94888954, "epoch": 3.488885278748003, "grad_norm": 12.911417961120605, "learning_rate": 4.4074065993594893e-07, "loss": 0.31979337, "memory(GiB)": 34.88, "step": 128855, "train_speed(iter/s)": 0.4119 }, { "acc": 0.94716234, "epoch": 3.489020659031219, "grad_norm": 5.143882751464844, "learning_rate": 4.405112145597249e-07, "loss": 0.30952854, "memory(GiB)": 34.88, "step": 128860, "train_speed(iter/s)": 0.4119 }, { "acc": 0.9503212, "epoch": 3.4891560393144343, "grad_norm": 12.100577354431152, "learning_rate": 4.4028182630632266e-07, "loss": 0.29139085, "memory(GiB)": 34.88, "step": 128865, "train_speed(iter/s)": 0.411901 }, { "acc": 0.93887615, "epoch": 3.4892914195976497, "grad_norm": 9.35930347442627, "learning_rate": 4.400524951786141e-07, "loss": 0.39659634, "memory(GiB)": 34.88, "step": 128870, "train_speed(iter/s)": 0.411902 }, { "acc": 0.94552069, "epoch": 3.4894267998808655, "grad_norm": 6.791435718536377, "learning_rate": 4.3982322117947e-07, "loss": 0.36387479, "memory(GiB)": 34.88, "step": 128875, "train_speed(iter/s)": 0.411903 }, { "acc": 0.94362612, "epoch": 3.489562180164081, "grad_norm": 8.95644474029541, "learning_rate": 4.395940043117661e-07, "loss": 0.3332068, "memory(GiB)": 34.88, "step": 128880, "train_speed(iter/s)": 0.411904 }, { "acc": 0.94100418, "epoch": 3.4896975604472966, "grad_norm": 8.265734672546387, "learning_rate": 4.393648445783708e-07, "loss": 0.30829282, "memory(GiB)": 34.88, "step": 128885, "train_speed(iter/s)": 0.411905 }, { "acc": 0.94916077, "epoch": 3.489832940730512, "grad_norm": 2.8962326049804688, "learning_rate": 4.3913574198215673e-07, "loss": 0.28020082, "memory(GiB)": 34.88, "step": 128890, "train_speed(iter/s)": 0.411906 }, { "acc": 0.94415321, "epoch": 3.489968321013728, "grad_norm": 7.713769912719727, "learning_rate": 4.389066965259917e-07, "loss": 0.36458461, "memory(GiB)": 34.88, "step": 128895, "train_speed(iter/s)": 0.411907 }, { "acc": 0.95172958, "epoch": 3.490103701296943, "grad_norm": 6.272259712219238, "learning_rate": 4.3867770821274774e-07, "loss": 0.2399394, "memory(GiB)": 34.88, "step": 128900, "train_speed(iter/s)": 0.411907 }, { "acc": 0.96318474, "epoch": 3.4902390815801585, "grad_norm": 8.92282772064209, "learning_rate": 4.38448777045291e-07, "loss": 0.25720179, "memory(GiB)": 34.88, "step": 128905, "train_speed(iter/s)": 0.411908 }, { "acc": 0.94622259, "epoch": 3.4903744618633743, "grad_norm": 4.951371192932129, "learning_rate": 4.382199030264905e-07, "loss": 0.2948781, "memory(GiB)": 34.88, "step": 128910, "train_speed(iter/s)": 0.411909 }, { "acc": 0.95684261, "epoch": 3.4905098421465897, "grad_norm": 9.984394073486328, "learning_rate": 4.379910861592139e-07, "loss": 0.27475512, "memory(GiB)": 34.88, "step": 128915, "train_speed(iter/s)": 0.41191 }, { "acc": 0.96847439, "epoch": 3.4906452224298055, "grad_norm": 4.384177207946777, "learning_rate": 4.3776232644632574e-07, "loss": 0.15594952, "memory(GiB)": 34.88, "step": 128920, "train_speed(iter/s)": 0.411911 }, { "acc": 0.93131428, "epoch": 3.490780602713021, "grad_norm": 8.481429100036621, "learning_rate": 4.375336238906939e-07, "loss": 0.41976328, "memory(GiB)": 34.88, "step": 128925, "train_speed(iter/s)": 0.411912 }, { "acc": 0.94981308, "epoch": 3.4909159829962366, "grad_norm": 4.1806206703186035, "learning_rate": 4.373049784951803e-07, "loss": 0.29471447, "memory(GiB)": 34.88, "step": 128930, "train_speed(iter/s)": 0.411913 }, { "acc": 0.93586655, "epoch": 3.491051363279452, "grad_norm": 4.695782661437988, "learning_rate": 4.3707639026265234e-07, "loss": 0.32774551, "memory(GiB)": 34.88, "step": 128935, "train_speed(iter/s)": 0.411914 }, { "acc": 0.94786444, "epoch": 3.4911867435626673, "grad_norm": 6.938746452331543, "learning_rate": 4.3684785919597204e-07, "loss": 0.29969931, "memory(GiB)": 34.88, "step": 128940, "train_speed(iter/s)": 0.411914 }, { "acc": 0.95937157, "epoch": 3.491322123845883, "grad_norm": 5.752143859863281, "learning_rate": 4.366193852980017e-07, "loss": 0.21960893, "memory(GiB)": 34.88, "step": 128945, "train_speed(iter/s)": 0.411915 }, { "acc": 0.93560944, "epoch": 3.4914575041290985, "grad_norm": 4.383432388305664, "learning_rate": 4.3639096857160427e-07, "loss": 0.35193455, "memory(GiB)": 34.88, "step": 128950, "train_speed(iter/s)": 0.411916 }, { "acc": 0.94800587, "epoch": 3.4915928844123143, "grad_norm": 3.4809017181396484, "learning_rate": 4.3616260901964163e-07, "loss": 0.34221456, "memory(GiB)": 34.88, "step": 128955, "train_speed(iter/s)": 0.411917 }, { "acc": 0.95676575, "epoch": 3.4917282646955297, "grad_norm": 6.76829719543457, "learning_rate": 4.359343066449741e-07, "loss": 0.2389637, "memory(GiB)": 34.88, "step": 128960, "train_speed(iter/s)": 0.411917 }, { "acc": 0.94415445, "epoch": 3.4918636449787455, "grad_norm": 7.727224349975586, "learning_rate": 4.357060614504595e-07, "loss": 0.29958596, "memory(GiB)": 34.88, "step": 128965, "train_speed(iter/s)": 0.411918 }, { "acc": 0.93998175, "epoch": 3.491999025261961, "grad_norm": 6.109884262084961, "learning_rate": 4.3547787343896037e-07, "loss": 0.36744018, "memory(GiB)": 34.88, "step": 128970, "train_speed(iter/s)": 0.411919 }, { "acc": 0.93804474, "epoch": 3.492134405545176, "grad_norm": 6.535851001739502, "learning_rate": 4.3524974261333243e-07, "loss": 0.40001087, "memory(GiB)": 34.88, "step": 128975, "train_speed(iter/s)": 0.41192 }, { "acc": 0.96070147, "epoch": 3.492269785828392, "grad_norm": 9.836173057556152, "learning_rate": 4.3502166897643534e-07, "loss": 0.24979835, "memory(GiB)": 34.88, "step": 128980, "train_speed(iter/s)": 0.411921 }, { "acc": 0.94951496, "epoch": 3.4924051661116073, "grad_norm": 14.927342414855957, "learning_rate": 4.3479365253112487e-07, "loss": 0.24221745, "memory(GiB)": 34.88, "step": 128985, "train_speed(iter/s)": 0.411921 }, { "acc": 0.94158764, "epoch": 3.492540546394823, "grad_norm": 6.2822489738464355, "learning_rate": 4.3456569328025943e-07, "loss": 0.36652727, "memory(GiB)": 34.88, "step": 128990, "train_speed(iter/s)": 0.411922 }, { "acc": 0.93416405, "epoch": 3.4926759266780385, "grad_norm": 3.8655765056610107, "learning_rate": 4.343377912266928e-07, "loss": 0.3609767, "memory(GiB)": 34.88, "step": 128995, "train_speed(iter/s)": 0.411923 }, { "acc": 0.95242519, "epoch": 3.4928113069612543, "grad_norm": 8.518709182739258, "learning_rate": 4.3410994637327834e-07, "loss": 0.25663908, "memory(GiB)": 34.88, "step": 129000, "train_speed(iter/s)": 0.411924 }, { "acc": 0.96977272, "epoch": 3.4929466872444697, "grad_norm": 3.9308269023895264, "learning_rate": 4.338821587228747e-07, "loss": 0.16333489, "memory(GiB)": 34.88, "step": 129005, "train_speed(iter/s)": 0.411925 }, { "acc": 0.95208807, "epoch": 3.493082067527685, "grad_norm": 5.118773460388184, "learning_rate": 4.336544282783314e-07, "loss": 0.29262664, "memory(GiB)": 34.88, "step": 129010, "train_speed(iter/s)": 0.411926 }, { "acc": 0.93524113, "epoch": 3.493217447810901, "grad_norm": 9.327237129211426, "learning_rate": 4.334267550425033e-07, "loss": 0.39490986, "memory(GiB)": 34.88, "step": 129015, "train_speed(iter/s)": 0.411927 }, { "acc": 0.94710102, "epoch": 3.493352828094116, "grad_norm": 5.720514297485352, "learning_rate": 4.331991390182415e-07, "loss": 0.28916459, "memory(GiB)": 34.88, "step": 129020, "train_speed(iter/s)": 0.411927 }, { "acc": 0.95148029, "epoch": 3.493488208377332, "grad_norm": 6.682653427124023, "learning_rate": 4.3297158020839854e-07, "loss": 0.30557134, "memory(GiB)": 34.88, "step": 129025, "train_speed(iter/s)": 0.411928 }, { "acc": 0.94209404, "epoch": 3.4936235886605473, "grad_norm": 5.346583366394043, "learning_rate": 4.3274407861582357e-07, "loss": 0.40310478, "memory(GiB)": 34.88, "step": 129030, "train_speed(iter/s)": 0.411929 }, { "acc": 0.94244957, "epoch": 3.493758968943763, "grad_norm": 7.457423686981201, "learning_rate": 4.3251663424336726e-07, "loss": 0.32932663, "memory(GiB)": 34.88, "step": 129035, "train_speed(iter/s)": 0.41193 }, { "acc": 0.92924309, "epoch": 3.4938943492269785, "grad_norm": 7.668031692504883, "learning_rate": 4.3228924709387934e-07, "loss": 0.4420393, "memory(GiB)": 34.88, "step": 129040, "train_speed(iter/s)": 0.411931 }, { "acc": 0.94448795, "epoch": 3.4940297295101943, "grad_norm": 8.276556015014648, "learning_rate": 4.3206191717020665e-07, "loss": 0.31650872, "memory(GiB)": 34.88, "step": 129045, "train_speed(iter/s)": 0.411932 }, { "acc": 0.9335885, "epoch": 3.4941651097934097, "grad_norm": 5.210735321044922, "learning_rate": 4.3183464447519775e-07, "loss": 0.40098329, "memory(GiB)": 34.88, "step": 129050, "train_speed(iter/s)": 0.411933 }, { "acc": 0.94161091, "epoch": 3.4943004900766255, "grad_norm": 44.254188537597656, "learning_rate": 4.3160742901170004e-07, "loss": 0.33722053, "memory(GiB)": 34.88, "step": 129055, "train_speed(iter/s)": 0.411933 }, { "acc": 0.9353055, "epoch": 3.494435870359841, "grad_norm": 5.891622543334961, "learning_rate": 4.313802707825605e-07, "loss": 0.40072708, "memory(GiB)": 34.88, "step": 129060, "train_speed(iter/s)": 0.411934 }, { "acc": 0.92906666, "epoch": 3.494571250643056, "grad_norm": 8.985082626342773, "learning_rate": 4.3115316979062315e-07, "loss": 0.45557704, "memory(GiB)": 34.88, "step": 129065, "train_speed(iter/s)": 0.411935 }, { "acc": 0.94507389, "epoch": 3.494706630926272, "grad_norm": 14.279417037963867, "learning_rate": 4.309261260387338e-07, "loss": 0.34485898, "memory(GiB)": 34.88, "step": 129070, "train_speed(iter/s)": 0.411936 }, { "acc": 0.94240551, "epoch": 3.4948420112094873, "grad_norm": 8.363396644592285, "learning_rate": 4.30699139529736e-07, "loss": 0.33301804, "memory(GiB)": 34.88, "step": 129075, "train_speed(iter/s)": 0.411937 }, { "acc": 0.9537014, "epoch": 3.494977391492703, "grad_norm": 12.906291007995605, "learning_rate": 4.3047221026647443e-07, "loss": 0.24797678, "memory(GiB)": 34.88, "step": 129080, "train_speed(iter/s)": 0.411938 }, { "acc": 0.9493803, "epoch": 3.4951127717759185, "grad_norm": 4.681553363800049, "learning_rate": 4.302453382517903e-07, "loss": 0.32041178, "memory(GiB)": 34.88, "step": 129085, "train_speed(iter/s)": 0.411938 }, { "acc": 0.9517951, "epoch": 3.4952481520591343, "grad_norm": 6.675717830657959, "learning_rate": 4.300185234885268e-07, "loss": 0.24817574, "memory(GiB)": 34.88, "step": 129090, "train_speed(iter/s)": 0.411939 }, { "acc": 0.95882702, "epoch": 3.4953835323423497, "grad_norm": 8.988865852355957, "learning_rate": 4.2979176597952514e-07, "loss": 0.28136072, "memory(GiB)": 34.88, "step": 129095, "train_speed(iter/s)": 0.41194 }, { "acc": 0.94454765, "epoch": 3.495518912625565, "grad_norm": 7.978848934173584, "learning_rate": 4.295650657276244e-07, "loss": 0.34074454, "memory(GiB)": 34.88, "step": 129100, "train_speed(iter/s)": 0.411941 }, { "acc": 0.94670649, "epoch": 3.495654292908781, "grad_norm": 6.437500476837158, "learning_rate": 4.2933842273566595e-07, "loss": 0.33532653, "memory(GiB)": 34.88, "step": 129105, "train_speed(iter/s)": 0.411942 }, { "acc": 0.95767174, "epoch": 3.495789673191996, "grad_norm": 6.599537372589111, "learning_rate": 4.291118370064885e-07, "loss": 0.29358928, "memory(GiB)": 34.88, "step": 129110, "train_speed(iter/s)": 0.411943 }, { "acc": 0.94412985, "epoch": 3.495925053475212, "grad_norm": 5.287051677703857, "learning_rate": 4.288853085429315e-07, "loss": 0.36688991, "memory(GiB)": 34.88, "step": 129115, "train_speed(iter/s)": 0.411944 }, { "acc": 0.94302673, "epoch": 3.4960604337584273, "grad_norm": 21.90912437438965, "learning_rate": 4.286588373478308e-07, "loss": 0.38286495, "memory(GiB)": 34.88, "step": 129120, "train_speed(iter/s)": 0.411944 }, { "acc": 0.94759674, "epoch": 3.496195814041643, "grad_norm": 8.775235176086426, "learning_rate": 4.284324234240244e-07, "loss": 0.27530572, "memory(GiB)": 34.88, "step": 129125, "train_speed(iter/s)": 0.411945 }, { "acc": 0.94153595, "epoch": 3.4963311943248585, "grad_norm": 8.519217491149902, "learning_rate": 4.282060667743492e-07, "loss": 0.38289294, "memory(GiB)": 34.88, "step": 129130, "train_speed(iter/s)": 0.411946 }, { "acc": 0.94561014, "epoch": 3.496466574608074, "grad_norm": 5.08916711807251, "learning_rate": 4.2797976740163936e-07, "loss": 0.30697498, "memory(GiB)": 34.88, "step": 129135, "train_speed(iter/s)": 0.411947 }, { "acc": 0.944347, "epoch": 3.4966019548912897, "grad_norm": 6.571771144866943, "learning_rate": 4.277535253087306e-07, "loss": 0.38431487, "memory(GiB)": 34.88, "step": 129140, "train_speed(iter/s)": 0.411948 }, { "acc": 0.95846977, "epoch": 3.496737335174505, "grad_norm": 2.960615634918213, "learning_rate": 4.2752734049845645e-07, "loss": 0.23745689, "memory(GiB)": 34.88, "step": 129145, "train_speed(iter/s)": 0.411949 }, { "acc": 0.95580425, "epoch": 3.496872715457721, "grad_norm": 6.813963890075684, "learning_rate": 4.2730121297365174e-07, "loss": 0.25673778, "memory(GiB)": 34.88, "step": 129150, "train_speed(iter/s)": 0.411949 }, { "acc": 0.95439053, "epoch": 3.497008095740936, "grad_norm": 9.560344696044922, "learning_rate": 4.270751427371471e-07, "loss": 0.22638192, "memory(GiB)": 34.88, "step": 129155, "train_speed(iter/s)": 0.41195 }, { "acc": 0.94034977, "epoch": 3.497143476024152, "grad_norm": 5.836981773376465, "learning_rate": 4.2684912979177607e-07, "loss": 0.32140694, "memory(GiB)": 34.88, "step": 129160, "train_speed(iter/s)": 0.411951 }, { "acc": 0.94769878, "epoch": 3.4972788563073673, "grad_norm": 3.253826856613159, "learning_rate": 4.266231741403696e-07, "loss": 0.34389741, "memory(GiB)": 34.88, "step": 129165, "train_speed(iter/s)": 0.411952 }, { "acc": 0.95191641, "epoch": 3.4974142365905827, "grad_norm": 27.795928955078125, "learning_rate": 4.2639727578575776e-07, "loss": 0.29824667, "memory(GiB)": 34.88, "step": 129170, "train_speed(iter/s)": 0.411953 }, { "acc": 0.93931561, "epoch": 3.4975496168737985, "grad_norm": 10.188121795654297, "learning_rate": 4.2617143473077034e-07, "loss": 0.44412346, "memory(GiB)": 34.88, "step": 129175, "train_speed(iter/s)": 0.411954 }, { "acc": 0.95326424, "epoch": 3.497684997157014, "grad_norm": 8.814347267150879, "learning_rate": 4.259456509782369e-07, "loss": 0.30925736, "memory(GiB)": 34.88, "step": 129180, "train_speed(iter/s)": 0.411954 }, { "acc": 0.93944111, "epoch": 3.4978203774402297, "grad_norm": 11.100769996643066, "learning_rate": 4.2571992453098613e-07, "loss": 0.38137214, "memory(GiB)": 34.88, "step": 129185, "train_speed(iter/s)": 0.411955 }, { "acc": 0.94166336, "epoch": 3.497955757723445, "grad_norm": 9.752934455871582, "learning_rate": 4.2549425539184486e-07, "loss": 0.35922606, "memory(GiB)": 34.88, "step": 129190, "train_speed(iter/s)": 0.411956 }, { "acc": 0.95940371, "epoch": 3.498091138006661, "grad_norm": 6.130762100219727, "learning_rate": 4.2526864356364e-07, "loss": 0.28492413, "memory(GiB)": 34.88, "step": 129195, "train_speed(iter/s)": 0.411957 }, { "acc": 0.96106958, "epoch": 3.498226518289876, "grad_norm": 7.117741107940674, "learning_rate": 4.2504308904919846e-07, "loss": 0.25693486, "memory(GiB)": 34.88, "step": 129200, "train_speed(iter/s)": 0.411958 }, { "acc": 0.94773149, "epoch": 3.498361898573092, "grad_norm": 5.383523464202881, "learning_rate": 4.2481759185134663e-07, "loss": 0.36323009, "memory(GiB)": 34.88, "step": 129205, "train_speed(iter/s)": 0.411958 }, { "acc": 0.95516224, "epoch": 3.4984972788563073, "grad_norm": 6.82131814956665, "learning_rate": 4.245921519729068e-07, "loss": 0.26999726, "memory(GiB)": 34.88, "step": 129210, "train_speed(iter/s)": 0.411959 }, { "acc": 0.95185032, "epoch": 3.498632659139523, "grad_norm": 9.617753028869629, "learning_rate": 4.243667694167044e-07, "loss": 0.30237703, "memory(GiB)": 34.88, "step": 129215, "train_speed(iter/s)": 0.41196 }, { "acc": 0.94168854, "epoch": 3.4987680394227385, "grad_norm": 4.1646833419799805, "learning_rate": 4.2414144418556404e-07, "loss": 0.4220305, "memory(GiB)": 34.88, "step": 129220, "train_speed(iter/s)": 0.411961 }, { "acc": 0.95039082, "epoch": 3.498903419705954, "grad_norm": 7.690433979034424, "learning_rate": 4.2391617628230586e-07, "loss": 0.26186831, "memory(GiB)": 34.88, "step": 129225, "train_speed(iter/s)": 0.411961 }, { "acc": 0.95647602, "epoch": 3.4990387999891697, "grad_norm": 7.6633405685424805, "learning_rate": 4.2369096570975306e-07, "loss": 0.22558699, "memory(GiB)": 34.88, "step": 129230, "train_speed(iter/s)": 0.411962 }, { "acc": 0.96156349, "epoch": 3.499174180272385, "grad_norm": 6.663765907287598, "learning_rate": 4.234658124707274e-07, "loss": 0.27914095, "memory(GiB)": 34.88, "step": 129235, "train_speed(iter/s)": 0.411963 }, { "acc": 0.934268, "epoch": 3.499309560555601, "grad_norm": 14.031838417053223, "learning_rate": 4.2324071656804917e-07, "loss": 0.37115035, "memory(GiB)": 34.88, "step": 129240, "train_speed(iter/s)": 0.411964 }, { "acc": 0.94518089, "epoch": 3.499444940838816, "grad_norm": 6.248420715332031, "learning_rate": 4.23015678004537e-07, "loss": 0.28341014, "memory(GiB)": 34.88, "step": 129245, "train_speed(iter/s)": 0.411965 }, { "acc": 0.95290556, "epoch": 3.499580321122032, "grad_norm": 7.701140403747559, "learning_rate": 4.2279069678301046e-07, "loss": 0.31302447, "memory(GiB)": 34.88, "step": 129250, "train_speed(iter/s)": 0.411966 }, { "acc": 0.95933743, "epoch": 3.4997157014052473, "grad_norm": 7.406226634979248, "learning_rate": 4.2256577290628933e-07, "loss": 0.2628742, "memory(GiB)": 34.88, "step": 129255, "train_speed(iter/s)": 0.411966 }, { "acc": 0.94992771, "epoch": 3.4998510816884627, "grad_norm": 6.0289459228515625, "learning_rate": 4.2234090637718876e-07, "loss": 0.3397943, "memory(GiB)": 34.88, "step": 129260, "train_speed(iter/s)": 0.411967 }, { "acc": 0.95077343, "epoch": 3.4999864619716785, "grad_norm": 6.589334964752197, "learning_rate": 4.221160971985275e-07, "loss": 0.30829782, "memory(GiB)": 34.88, "step": 129265, "train_speed(iter/s)": 0.411968 }, { "acc": 0.94120522, "epoch": 3.500121842254894, "grad_norm": 4.287672519683838, "learning_rate": 4.2189134537312054e-07, "loss": 0.34524956, "memory(GiB)": 34.88, "step": 129270, "train_speed(iter/s)": 0.411969 }, { "acc": 0.94405355, "epoch": 3.5002572225381097, "grad_norm": 4.268531322479248, "learning_rate": 4.2166665090378496e-07, "loss": 0.40415988, "memory(GiB)": 34.88, "step": 129275, "train_speed(iter/s)": 0.411969 }, { "acc": 0.93852062, "epoch": 3.500392602821325, "grad_norm": 17.4614200592041, "learning_rate": 4.214420137933332e-07, "loss": 0.33884666, "memory(GiB)": 34.88, "step": 129280, "train_speed(iter/s)": 0.41197 }, { "acc": 0.95103521, "epoch": 3.500527983104541, "grad_norm": 8.111084938049316, "learning_rate": 4.212174340445817e-07, "loss": 0.28817716, "memory(GiB)": 34.88, "step": 129285, "train_speed(iter/s)": 0.411971 }, { "acc": 0.94949293, "epoch": 3.500663363387756, "grad_norm": 4.9387593269348145, "learning_rate": 4.209929116603428e-07, "loss": 0.2904593, "memory(GiB)": 34.88, "step": 129290, "train_speed(iter/s)": 0.411972 }, { "acc": 0.95349627, "epoch": 3.5007987436709715, "grad_norm": 7.48336124420166, "learning_rate": 4.2076844664342836e-07, "loss": 0.28920913, "memory(GiB)": 34.88, "step": 129295, "train_speed(iter/s)": 0.411973 }, { "acc": 0.95842066, "epoch": 3.5009341239541873, "grad_norm": 5.7133965492248535, "learning_rate": 4.2054403899665043e-07, "loss": 0.25997541, "memory(GiB)": 34.88, "step": 129300, "train_speed(iter/s)": 0.411974 }, { "acc": 0.93675213, "epoch": 3.5010695042374027, "grad_norm": 4.47928524017334, "learning_rate": 4.2031968872282036e-07, "loss": 0.38647432, "memory(GiB)": 34.88, "step": 129305, "train_speed(iter/s)": 0.411975 }, { "acc": 0.94970732, "epoch": 3.5012048845206185, "grad_norm": 8.597167015075684, "learning_rate": 4.2009539582475003e-07, "loss": 0.33230262, "memory(GiB)": 34.88, "step": 129310, "train_speed(iter/s)": 0.411975 }, { "acc": 0.95961609, "epoch": 3.501340264803834, "grad_norm": 5.184840202331543, "learning_rate": 4.198711603052457e-07, "loss": 0.20864739, "memory(GiB)": 34.88, "step": 129315, "train_speed(iter/s)": 0.411976 }, { "acc": 0.94309483, "epoch": 3.5014756450870497, "grad_norm": 13.475972175598145, "learning_rate": 4.1964698216712065e-07, "loss": 0.34973059, "memory(GiB)": 34.88, "step": 129320, "train_speed(iter/s)": 0.411977 }, { "acc": 0.93972054, "epoch": 3.501611025370265, "grad_norm": 21.144861221313477, "learning_rate": 4.1942286141317985e-07, "loss": 0.41889906, "memory(GiB)": 34.88, "step": 129325, "train_speed(iter/s)": 0.411978 }, { "acc": 0.95100231, "epoch": 3.5017464056534804, "grad_norm": 6.751049995422363, "learning_rate": 4.191987980462326e-07, "loss": 0.30594616, "memory(GiB)": 34.88, "step": 129330, "train_speed(iter/s)": 0.411979 }, { "acc": 0.95734673, "epoch": 3.501881785936696, "grad_norm": 5.836562156677246, "learning_rate": 4.1897479206908466e-07, "loss": 0.26590123, "memory(GiB)": 34.88, "step": 129335, "train_speed(iter/s)": 0.41198 }, { "acc": 0.94307117, "epoch": 3.502017166219912, "grad_norm": 6.803231239318848, "learning_rate": 4.187508434845424e-07, "loss": 0.3335592, "memory(GiB)": 34.88, "step": 129340, "train_speed(iter/s)": 0.411981 }, { "acc": 0.93957539, "epoch": 3.5021525465031274, "grad_norm": 3.3587727546691895, "learning_rate": 4.185269522954128e-07, "loss": 0.3087296, "memory(GiB)": 34.88, "step": 129345, "train_speed(iter/s)": 0.411981 }, { "acc": 0.94545498, "epoch": 3.5022879267863427, "grad_norm": 4.9509501457214355, "learning_rate": 4.183031185044972e-07, "loss": 0.29760976, "memory(GiB)": 34.88, "step": 129350, "train_speed(iter/s)": 0.411982 }, { "acc": 0.94864883, "epoch": 3.5024233070695585, "grad_norm": 7.427882194519043, "learning_rate": 4.1807934211460297e-07, "loss": 0.31080849, "memory(GiB)": 34.88, "step": 129355, "train_speed(iter/s)": 0.411983 }, { "acc": 0.95685863, "epoch": 3.502558687352774, "grad_norm": 4.842072010040283, "learning_rate": 4.1785562312853155e-07, "loss": 0.21440406, "memory(GiB)": 34.88, "step": 129360, "train_speed(iter/s)": 0.411984 }, { "acc": 0.94081049, "epoch": 3.5026940676359892, "grad_norm": 4.724213123321533, "learning_rate": 4.1763196154908655e-07, "loss": 0.37582951, "memory(GiB)": 34.88, "step": 129365, "train_speed(iter/s)": 0.411985 }, { "acc": 0.95474234, "epoch": 3.502829447919205, "grad_norm": 3.717029094696045, "learning_rate": 4.174083573790676e-07, "loss": 0.26753273, "memory(GiB)": 34.88, "step": 129370, "train_speed(iter/s)": 0.411986 }, { "acc": 0.94883547, "epoch": 3.502964828202421, "grad_norm": 8.689438819885254, "learning_rate": 4.171848106212779e-07, "loss": 0.33454523, "memory(GiB)": 34.88, "step": 129375, "train_speed(iter/s)": 0.411987 }, { "acc": 0.95661221, "epoch": 3.503100208485636, "grad_norm": 5.789180755615234, "learning_rate": 4.1696132127851806e-07, "loss": 0.28823831, "memory(GiB)": 34.88, "step": 129380, "train_speed(iter/s)": 0.411987 }, { "acc": 0.95307465, "epoch": 3.5032355887688515, "grad_norm": 4.640926837921143, "learning_rate": 4.167378893535857e-07, "loss": 0.27779155, "memory(GiB)": 34.88, "step": 129385, "train_speed(iter/s)": 0.411988 }, { "acc": 0.94593105, "epoch": 3.5033709690520674, "grad_norm": 4.128128528594971, "learning_rate": 4.16514514849281e-07, "loss": 0.3338398, "memory(GiB)": 34.88, "step": 129390, "train_speed(iter/s)": 0.411989 }, { "acc": 0.95429869, "epoch": 3.5035063493352827, "grad_norm": 8.033737182617188, "learning_rate": 4.162911977684015e-07, "loss": 0.28903661, "memory(GiB)": 34.88, "step": 129395, "train_speed(iter/s)": 0.41199 }, { "acc": 0.94774914, "epoch": 3.5036417296184985, "grad_norm": 4.826298236846924, "learning_rate": 4.1606793811374687e-07, "loss": 0.27187557, "memory(GiB)": 34.88, "step": 129400, "train_speed(iter/s)": 0.411991 }, { "acc": 0.92940331, "epoch": 3.503777109901714, "grad_norm": 6.208868026733398, "learning_rate": 4.1584473588810957e-07, "loss": 0.37037148, "memory(GiB)": 34.88, "step": 129405, "train_speed(iter/s)": 0.411991 }, { "acc": 0.94251947, "epoch": 3.5039124901849297, "grad_norm": 7.041635036468506, "learning_rate": 4.1562159109429045e-07, "loss": 0.33949409, "memory(GiB)": 34.88, "step": 129410, "train_speed(iter/s)": 0.411992 }, { "acc": 0.95020628, "epoch": 3.504047870468145, "grad_norm": 6.622671604156494, "learning_rate": 4.1539850373508314e-07, "loss": 0.28466825, "memory(GiB)": 34.88, "step": 129415, "train_speed(iter/s)": 0.411993 }, { "acc": 0.94192381, "epoch": 3.5041832507513604, "grad_norm": 6.3004560470581055, "learning_rate": 4.1517547381328057e-07, "loss": 0.30212359, "memory(GiB)": 34.88, "step": 129420, "train_speed(iter/s)": 0.411994 }, { "acc": 0.95306587, "epoch": 3.504318631034576, "grad_norm": 9.131568908691406, "learning_rate": 4.149525013316776e-07, "loss": 0.25920429, "memory(GiB)": 34.88, "step": 129425, "train_speed(iter/s)": 0.411995 }, { "acc": 0.94548874, "epoch": 3.5044540113177916, "grad_norm": 3.4051196575164795, "learning_rate": 4.1472958629306767e-07, "loss": 0.3050101, "memory(GiB)": 34.88, "step": 129430, "train_speed(iter/s)": 0.411995 }, { "acc": 0.95426731, "epoch": 3.5045893916010074, "grad_norm": 10.223492622375488, "learning_rate": 4.1450672870024453e-07, "loss": 0.24969368, "memory(GiB)": 34.88, "step": 129435, "train_speed(iter/s)": 0.411996 }, { "acc": 0.94192591, "epoch": 3.5047247718842227, "grad_norm": 4.361302852630615, "learning_rate": 4.142839285559961e-07, "loss": 0.3521266, "memory(GiB)": 34.88, "step": 129440, "train_speed(iter/s)": 0.411997 }, { "acc": 0.95169363, "epoch": 3.5048601521674385, "grad_norm": 3.9847583770751953, "learning_rate": 4.140611858631182e-07, "loss": 0.33087964, "memory(GiB)": 34.88, "step": 129445, "train_speed(iter/s)": 0.411998 }, { "acc": 0.95144253, "epoch": 3.504995532450654, "grad_norm": 6.960378646850586, "learning_rate": 4.138385006243973e-07, "loss": 0.28068581, "memory(GiB)": 34.88, "step": 129450, "train_speed(iter/s)": 0.411999 }, { "acc": 0.93361015, "epoch": 3.5051309127338692, "grad_norm": 16.078548431396484, "learning_rate": 4.1361587284262586e-07, "loss": 0.36681361, "memory(GiB)": 34.88, "step": 129455, "train_speed(iter/s)": 0.412 }, { "acc": 0.95689802, "epoch": 3.505266293017085, "grad_norm": 6.969109058380127, "learning_rate": 4.1339330252059025e-07, "loss": 0.23581738, "memory(GiB)": 34.88, "step": 129460, "train_speed(iter/s)": 0.412 }, { "acc": 0.95319576, "epoch": 3.5054016733003004, "grad_norm": 29.012845993041992, "learning_rate": 4.1317078966108014e-07, "loss": 0.26718547, "memory(GiB)": 34.88, "step": 129465, "train_speed(iter/s)": 0.412001 }, { "acc": 0.94913616, "epoch": 3.505537053583516, "grad_norm": 6.249359607696533, "learning_rate": 4.1294833426688255e-07, "loss": 0.28226674, "memory(GiB)": 34.88, "step": 129470, "train_speed(iter/s)": 0.412002 }, { "acc": 0.96465807, "epoch": 3.5056724338667316, "grad_norm": 7.654246807098389, "learning_rate": 4.1272593634078326e-07, "loss": 0.20548415, "memory(GiB)": 34.88, "step": 129475, "train_speed(iter/s)": 0.412003 }, { "acc": 0.95446453, "epoch": 3.5058078141499474, "grad_norm": 2.4002649784088135, "learning_rate": 4.125035958855703e-07, "loss": 0.23718266, "memory(GiB)": 34.88, "step": 129480, "train_speed(iter/s)": 0.412004 }, { "acc": 0.94077587, "epoch": 3.5059431944331627, "grad_norm": 9.056761741638184, "learning_rate": 4.1228131290402665e-07, "loss": 0.31234007, "memory(GiB)": 34.88, "step": 129485, "train_speed(iter/s)": 0.412005 }, { "acc": 0.94409695, "epoch": 3.506078574716378, "grad_norm": 6.077579498291016, "learning_rate": 4.1205908739893883e-07, "loss": 0.31754503, "memory(GiB)": 34.88, "step": 129490, "train_speed(iter/s)": 0.412005 }, { "acc": 0.93474073, "epoch": 3.506213954999594, "grad_norm": 7.170821189880371, "learning_rate": 4.1183691937308816e-07, "loss": 0.32036614, "memory(GiB)": 34.88, "step": 129495, "train_speed(iter/s)": 0.412006 }, { "acc": 0.96009989, "epoch": 3.5063493352828097, "grad_norm": 4.5020575523376465, "learning_rate": 4.116148088292611e-07, "loss": 0.26123841, "memory(GiB)": 34.88, "step": 129500, "train_speed(iter/s)": 0.412007 }, { "acc": 0.95530205, "epoch": 3.506484715566025, "grad_norm": 5.0133819580078125, "learning_rate": 4.1139275577023784e-07, "loss": 0.23990407, "memory(GiB)": 34.88, "step": 129505, "train_speed(iter/s)": 0.412008 }, { "acc": 0.9414155, "epoch": 3.5066200958492404, "grad_norm": 7.532464027404785, "learning_rate": 4.111707601987986e-07, "loss": 0.39411359, "memory(GiB)": 34.88, "step": 129510, "train_speed(iter/s)": 0.412009 }, { "acc": 0.94038582, "epoch": 3.506755476132456, "grad_norm": 8.156448364257812, "learning_rate": 4.1094882211772823e-07, "loss": 0.36089451, "memory(GiB)": 34.88, "step": 129515, "train_speed(iter/s)": 0.41201 }, { "acc": 0.95292931, "epoch": 3.5068908564156716, "grad_norm": 9.453285217285156, "learning_rate": 4.1072694152980417e-07, "loss": 0.29572554, "memory(GiB)": 34.88, "step": 129520, "train_speed(iter/s)": 0.41201 }, { "acc": 0.94231586, "epoch": 3.507026236698887, "grad_norm": 6.314815521240234, "learning_rate": 4.1050511843780673e-07, "loss": 0.35260091, "memory(GiB)": 34.88, "step": 129525, "train_speed(iter/s)": 0.412011 }, { "acc": 0.95294485, "epoch": 3.5071616169821027, "grad_norm": 2.541039228439331, "learning_rate": 4.102833528445133e-07, "loss": 0.31614425, "memory(GiB)": 34.88, "step": 129530, "train_speed(iter/s)": 0.412012 }, { "acc": 0.95297728, "epoch": 3.5072969972653185, "grad_norm": 5.615997314453125, "learning_rate": 4.1006164475270486e-07, "loss": 0.28924687, "memory(GiB)": 34.88, "step": 129535, "train_speed(iter/s)": 0.412013 }, { "acc": 0.95179653, "epoch": 3.507432377548534, "grad_norm": 6.730852127075195, "learning_rate": 4.098399941651565e-07, "loss": 0.27255709, "memory(GiB)": 34.88, "step": 129540, "train_speed(iter/s)": 0.412014 }, { "acc": 0.95006657, "epoch": 3.5075677578317492, "grad_norm": 7.063937664031982, "learning_rate": 4.096184010846443e-07, "loss": 0.26304121, "memory(GiB)": 34.88, "step": 129545, "train_speed(iter/s)": 0.412015 }, { "acc": 0.93672447, "epoch": 3.507703138114965, "grad_norm": 6.9901580810546875, "learning_rate": 4.093968655139461e-07, "loss": 0.39783309, "memory(GiB)": 34.88, "step": 129550, "train_speed(iter/s)": 0.412016 }, { "acc": 0.95393, "epoch": 3.5078385183981804, "grad_norm": 8.196185111999512, "learning_rate": 4.0917538745583624e-07, "loss": 0.29773927, "memory(GiB)": 34.88, "step": 129555, "train_speed(iter/s)": 0.412017 }, { "acc": 0.94260082, "epoch": 3.507973898681396, "grad_norm": 8.601572036743164, "learning_rate": 4.089539669130893e-07, "loss": 0.3852416, "memory(GiB)": 34.88, "step": 129560, "train_speed(iter/s)": 0.412017 }, { "acc": 0.95537767, "epoch": 3.5081092789646116, "grad_norm": 8.752455711364746, "learning_rate": 4.087326038884773e-07, "loss": 0.2604023, "memory(GiB)": 34.88, "step": 129565, "train_speed(iter/s)": 0.412018 }, { "acc": 0.95668049, "epoch": 3.5082446592478274, "grad_norm": 8.486645698547363, "learning_rate": 4.085112983847761e-07, "loss": 0.23253624, "memory(GiB)": 34.88, "step": 129570, "train_speed(iter/s)": 0.412019 }, { "acc": 0.9520957, "epoch": 3.5083800395310427, "grad_norm": 11.610233306884766, "learning_rate": 4.082900504047565e-07, "loss": 0.23350906, "memory(GiB)": 34.88, "step": 129575, "train_speed(iter/s)": 0.41202 }, { "acc": 0.94634151, "epoch": 3.508515419814258, "grad_norm": 6.293120861053467, "learning_rate": 4.0806885995118985e-07, "loss": 0.29120717, "memory(GiB)": 34.88, "step": 129580, "train_speed(iter/s)": 0.412021 }, { "acc": 0.95463877, "epoch": 3.508650800097474, "grad_norm": 6.766623020172119, "learning_rate": 4.0784772702684756e-07, "loss": 0.30629597, "memory(GiB)": 34.88, "step": 129585, "train_speed(iter/s)": 0.412022 }, { "acc": 0.93843184, "epoch": 3.5087861803806892, "grad_norm": 7.003367900848389, "learning_rate": 4.076266516344995e-07, "loss": 0.34939752, "memory(GiB)": 34.88, "step": 129590, "train_speed(iter/s)": 0.412022 }, { "acc": 0.95525322, "epoch": 3.508921560663905, "grad_norm": 8.581801414489746, "learning_rate": 4.0740563377691583e-07, "loss": 0.31737278, "memory(GiB)": 34.88, "step": 129595, "train_speed(iter/s)": 0.412023 }, { "acc": 0.94683294, "epoch": 3.5090569409471204, "grad_norm": 7.276711940765381, "learning_rate": 4.0718467345686234e-07, "loss": 0.32920494, "memory(GiB)": 34.88, "step": 129600, "train_speed(iter/s)": 0.412024 }, { "acc": 0.95221825, "epoch": 3.509192321230336, "grad_norm": 4.71214485168457, "learning_rate": 4.0696377067711174e-07, "loss": 0.31674867, "memory(GiB)": 34.88, "step": 129605, "train_speed(iter/s)": 0.412025 }, { "acc": 0.94713669, "epoch": 3.5093277015135516, "grad_norm": 5.638111114501953, "learning_rate": 4.067429254404268e-07, "loss": 0.30007858, "memory(GiB)": 34.88, "step": 129610, "train_speed(iter/s)": 0.412026 }, { "acc": 0.94074392, "epoch": 3.509463081796767, "grad_norm": 6.846628189086914, "learning_rate": 4.0652213774957645e-07, "loss": 0.35034685, "memory(GiB)": 34.88, "step": 129615, "train_speed(iter/s)": 0.412026 }, { "acc": 0.95320148, "epoch": 3.5095984620799827, "grad_norm": 4.082098007202148, "learning_rate": 4.063014076073263e-07, "loss": 0.30179315, "memory(GiB)": 34.88, "step": 129620, "train_speed(iter/s)": 0.412027 }, { "acc": 0.94660969, "epoch": 3.509733842363198, "grad_norm": 3.3394296169281006, "learning_rate": 4.0608073501644177e-07, "loss": 0.29387383, "memory(GiB)": 34.88, "step": 129625, "train_speed(iter/s)": 0.412028 }, { "acc": 0.95091982, "epoch": 3.509869222646414, "grad_norm": 4.149792671203613, "learning_rate": 4.058601199796865e-07, "loss": 0.30188916, "memory(GiB)": 34.88, "step": 129630, "train_speed(iter/s)": 0.412029 }, { "acc": 0.9547595, "epoch": 3.5100046029296292, "grad_norm": 7.266178131103516, "learning_rate": 4.0563956249982225e-07, "loss": 0.25006156, "memory(GiB)": 34.88, "step": 129635, "train_speed(iter/s)": 0.41203 }, { "acc": 0.95089569, "epoch": 3.510139983212845, "grad_norm": 6.747341156005859, "learning_rate": 4.0541906257961567e-07, "loss": 0.2577034, "memory(GiB)": 34.88, "step": 129640, "train_speed(iter/s)": 0.412031 }, { "acc": 0.95923777, "epoch": 3.5102753634960604, "grad_norm": 6.563017845153809, "learning_rate": 4.051986202218263e-07, "loss": 0.22556069, "memory(GiB)": 34.88, "step": 129645, "train_speed(iter/s)": 0.412032 }, { "acc": 0.95789633, "epoch": 3.5104107437792758, "grad_norm": 4.239887237548828, "learning_rate": 4.049782354292163e-07, "loss": 0.24052379, "memory(GiB)": 34.88, "step": 129650, "train_speed(iter/s)": 0.412032 }, { "acc": 0.92466755, "epoch": 3.5105461240624916, "grad_norm": 6.026042461395264, "learning_rate": 4.0475790820454693e-07, "loss": 0.4995306, "memory(GiB)": 34.88, "step": 129655, "train_speed(iter/s)": 0.412033 }, { "acc": 0.96445084, "epoch": 3.5106815043457074, "grad_norm": 2.288134813308716, "learning_rate": 4.0453763855057846e-07, "loss": 0.24671955, "memory(GiB)": 34.88, "step": 129660, "train_speed(iter/s)": 0.412034 }, { "acc": 0.93886833, "epoch": 3.5108168846289227, "grad_norm": 16.946847915649414, "learning_rate": 4.0431742647006955e-07, "loss": 0.36609576, "memory(GiB)": 34.88, "step": 129665, "train_speed(iter/s)": 0.412035 }, { "acc": 0.94350195, "epoch": 3.510952264912138, "grad_norm": 9.704565048217773, "learning_rate": 4.040972719657773e-07, "loss": 0.30677009, "memory(GiB)": 34.88, "step": 129670, "train_speed(iter/s)": 0.412036 }, { "acc": 0.93683052, "epoch": 3.511087645195354, "grad_norm": 7.953901290893555, "learning_rate": 4.0387717504046293e-07, "loss": 0.38921971, "memory(GiB)": 34.88, "step": 129675, "train_speed(iter/s)": 0.412036 }, { "acc": 0.94237127, "epoch": 3.5112230254785692, "grad_norm": 11.717520713806152, "learning_rate": 4.0365713569688126e-07, "loss": 0.27906048, "memory(GiB)": 34.88, "step": 129680, "train_speed(iter/s)": 0.412037 }, { "acc": 0.94034815, "epoch": 3.5113584057617846, "grad_norm": 7.184311866760254, "learning_rate": 4.034371539377887e-07, "loss": 0.32949638, "memory(GiB)": 34.88, "step": 129685, "train_speed(iter/s)": 0.412038 }, { "acc": 0.95018339, "epoch": 3.5114937860450004, "grad_norm": 2.1097664833068848, "learning_rate": 4.0321722976594165e-07, "loss": 0.27691162, "memory(GiB)": 34.88, "step": 129690, "train_speed(iter/s)": 0.412039 }, { "acc": 0.94876766, "epoch": 3.511629166328216, "grad_norm": 3.6112046241760254, "learning_rate": 4.0299736318409595e-07, "loss": 0.27693863, "memory(GiB)": 34.88, "step": 129695, "train_speed(iter/s)": 0.41204 }, { "acc": 0.95392666, "epoch": 3.5117645466114316, "grad_norm": 5.808831691741943, "learning_rate": 4.027775541950036e-07, "loss": 0.26272817, "memory(GiB)": 34.88, "step": 129700, "train_speed(iter/s)": 0.41204 }, { "acc": 0.94143276, "epoch": 3.511899926894647, "grad_norm": 11.943166732788086, "learning_rate": 4.0255780280141985e-07, "loss": 0.36244802, "memory(GiB)": 34.88, "step": 129705, "train_speed(iter/s)": 0.412041 }, { "acc": 0.94366465, "epoch": 3.5120353071778627, "grad_norm": 5.225495338439941, "learning_rate": 4.023381090060978e-07, "loss": 0.34093957, "memory(GiB)": 34.88, "step": 129710, "train_speed(iter/s)": 0.412042 }, { "acc": 0.9413023, "epoch": 3.512170687461078, "grad_norm": 3.538579225540161, "learning_rate": 4.021184728117878e-07, "loss": 0.37853384, "memory(GiB)": 34.88, "step": 129715, "train_speed(iter/s)": 0.412043 }, { "acc": 0.9334157, "epoch": 3.512306067744294, "grad_norm": 8.674652099609375, "learning_rate": 4.018988942212423e-07, "loss": 0.42004209, "memory(GiB)": 34.88, "step": 129720, "train_speed(iter/s)": 0.412044 }, { "acc": 0.95107803, "epoch": 3.5124414480275092, "grad_norm": 6.263416767120361, "learning_rate": 4.0167937323721167e-07, "loss": 0.29149344, "memory(GiB)": 34.88, "step": 129725, "train_speed(iter/s)": 0.412045 }, { "acc": 0.94213467, "epoch": 3.512576828310725, "grad_norm": 5.180275917053223, "learning_rate": 4.014599098624472e-07, "loss": 0.27639408, "memory(GiB)": 34.88, "step": 129730, "train_speed(iter/s)": 0.412045 }, { "acc": 0.95882082, "epoch": 3.5127122085939404, "grad_norm": 2.7833025455474854, "learning_rate": 4.0124050409969603e-07, "loss": 0.23493705, "memory(GiB)": 34.88, "step": 129735, "train_speed(iter/s)": 0.412046 }, { "acc": 0.94631004, "epoch": 3.5128475888771558, "grad_norm": 7.7083587646484375, "learning_rate": 4.0102115595170725e-07, "loss": 0.33565977, "memory(GiB)": 34.88, "step": 129740, "train_speed(iter/s)": 0.412047 }, { "acc": 0.94141388, "epoch": 3.5129829691603716, "grad_norm": 12.781824111938477, "learning_rate": 4.008018654212295e-07, "loss": 0.38417306, "memory(GiB)": 34.88, "step": 129745, "train_speed(iter/s)": 0.412048 }, { "acc": 0.94931211, "epoch": 3.513118349443587, "grad_norm": 4.438755512237549, "learning_rate": 4.005826325110098e-07, "loss": 0.34442532, "memory(GiB)": 34.88, "step": 129750, "train_speed(iter/s)": 0.412049 }, { "acc": 0.94354935, "epoch": 3.5132537297268027, "grad_norm": 9.691855430603027, "learning_rate": 4.0036345722379395e-07, "loss": 0.36544933, "memory(GiB)": 34.88, "step": 129755, "train_speed(iter/s)": 0.41205 }, { "acc": 0.93912334, "epoch": 3.513389110010018, "grad_norm": 4.330132961273193, "learning_rate": 4.0014433956232617e-07, "loss": 0.30865693, "memory(GiB)": 34.88, "step": 129760, "train_speed(iter/s)": 0.41205 }, { "acc": 0.94500313, "epoch": 3.513524490293234, "grad_norm": 4.04420804977417, "learning_rate": 3.9992527952935407e-07, "loss": 0.32846255, "memory(GiB)": 34.88, "step": 129765, "train_speed(iter/s)": 0.412051 }, { "acc": 0.93890238, "epoch": 3.5136598705764492, "grad_norm": 4.328308582305908, "learning_rate": 3.997062771276195e-07, "loss": 0.33947802, "memory(GiB)": 34.88, "step": 129770, "train_speed(iter/s)": 0.412052 }, { "acc": 0.94932632, "epoch": 3.5137952508596646, "grad_norm": 4.066488265991211, "learning_rate": 3.994873323598673e-07, "loss": 0.37431991, "memory(GiB)": 34.88, "step": 129775, "train_speed(iter/s)": 0.412053 }, { "acc": 0.95636234, "epoch": 3.5139306311428804, "grad_norm": 4.080175876617432, "learning_rate": 3.9926844522883937e-07, "loss": 0.24619081, "memory(GiB)": 34.88, "step": 129780, "train_speed(iter/s)": 0.412054 }, { "acc": 0.9509716, "epoch": 3.5140660114260958, "grad_norm": 6.448709011077881, "learning_rate": 3.990496157372794e-07, "loss": 0.27738037, "memory(GiB)": 34.88, "step": 129785, "train_speed(iter/s)": 0.412055 }, { "acc": 0.96020355, "epoch": 3.5142013917093116, "grad_norm": 6.493008136749268, "learning_rate": 3.9883084388792655e-07, "loss": 0.26515574, "memory(GiB)": 34.88, "step": 129790, "train_speed(iter/s)": 0.412055 }, { "acc": 0.94445934, "epoch": 3.514336771992527, "grad_norm": 6.94398307800293, "learning_rate": 3.986121296835212e-07, "loss": 0.39242465, "memory(GiB)": 34.88, "step": 129795, "train_speed(iter/s)": 0.412056 }, { "acc": 0.93993292, "epoch": 3.5144721522757427, "grad_norm": 5.789068222045898, "learning_rate": 3.983934731268053e-07, "loss": 0.39022388, "memory(GiB)": 34.88, "step": 129800, "train_speed(iter/s)": 0.412057 }, { "acc": 0.94877405, "epoch": 3.514607532558958, "grad_norm": 5.548862457275391, "learning_rate": 3.9817487422051637e-07, "loss": 0.26861556, "memory(GiB)": 34.88, "step": 129805, "train_speed(iter/s)": 0.412058 }, { "acc": 0.94561739, "epoch": 3.5147429128421734, "grad_norm": 5.206844806671143, "learning_rate": 3.979563329673931e-07, "loss": 0.32262549, "memory(GiB)": 34.88, "step": 129810, "train_speed(iter/s)": 0.412059 }, { "acc": 0.96661434, "epoch": 3.5148782931253892, "grad_norm": 2.623180866241455, "learning_rate": 3.977378493701729e-07, "loss": 0.21482921, "memory(GiB)": 34.88, "step": 129815, "train_speed(iter/s)": 0.41206 }, { "acc": 0.95493507, "epoch": 3.515013673408605, "grad_norm": 5.910910606384277, "learning_rate": 3.9751942343159404e-07, "loss": 0.24020185, "memory(GiB)": 34.88, "step": 129820, "train_speed(iter/s)": 0.41206 }, { "acc": 0.94656973, "epoch": 3.5151490536918204, "grad_norm": 8.280448913574219, "learning_rate": 3.9730105515439066e-07, "loss": 0.2681601, "memory(GiB)": 34.88, "step": 129825, "train_speed(iter/s)": 0.412061 }, { "acc": 0.94783573, "epoch": 3.5152844339750358, "grad_norm": 3.620453119277954, "learning_rate": 3.970827445412996e-07, "loss": 0.24939971, "memory(GiB)": 34.88, "step": 129830, "train_speed(iter/s)": 0.412062 }, { "acc": 0.95808287, "epoch": 3.5154198142582516, "grad_norm": 3.246781826019287, "learning_rate": 3.968644915950564e-07, "loss": 0.24681287, "memory(GiB)": 34.88, "step": 129835, "train_speed(iter/s)": 0.412063 }, { "acc": 0.94927425, "epoch": 3.515555194541467, "grad_norm": 4.928256511688232, "learning_rate": 3.966462963183929e-07, "loss": 0.3117022, "memory(GiB)": 34.88, "step": 129840, "train_speed(iter/s)": 0.412063 }, { "acc": 0.95125675, "epoch": 3.5156905748246823, "grad_norm": 10.838848114013672, "learning_rate": 3.9642815871404396e-07, "loss": 0.30940332, "memory(GiB)": 34.88, "step": 129845, "train_speed(iter/s)": 0.412064 }, { "acc": 0.95062752, "epoch": 3.515825955107898, "grad_norm": 10.526445388793945, "learning_rate": 3.9621007878474145e-07, "loss": 0.33508754, "memory(GiB)": 34.88, "step": 129850, "train_speed(iter/s)": 0.412065 }, { "acc": 0.95162706, "epoch": 3.515961335391114, "grad_norm": 5.011441707611084, "learning_rate": 3.959920565332185e-07, "loss": 0.28347712, "memory(GiB)": 34.88, "step": 129855, "train_speed(iter/s)": 0.412066 }, { "acc": 0.94779644, "epoch": 3.5160967156743292, "grad_norm": 6.927859783172607, "learning_rate": 3.9577409196220494e-07, "loss": 0.33665645, "memory(GiB)": 34.88, "step": 129860, "train_speed(iter/s)": 0.412067 }, { "acc": 0.94794636, "epoch": 3.5162320959575446, "grad_norm": 4.634133815765381, "learning_rate": 3.9555618507443107e-07, "loss": 0.31624837, "memory(GiB)": 34.88, "step": 129865, "train_speed(iter/s)": 0.412067 }, { "acc": 0.95548029, "epoch": 3.5163674762407604, "grad_norm": 9.710881233215332, "learning_rate": 3.953383358726277e-07, "loss": 0.30788934, "memory(GiB)": 34.88, "step": 129870, "train_speed(iter/s)": 0.412068 }, { "acc": 0.95208473, "epoch": 3.5165028565239758, "grad_norm": 3.427057981491089, "learning_rate": 3.9512054435952406e-07, "loss": 0.26586385, "memory(GiB)": 34.88, "step": 129875, "train_speed(iter/s)": 0.412069 }, { "acc": 0.94764624, "epoch": 3.5166382368071916, "grad_norm": 2.8498075008392334, "learning_rate": 3.9490281053784665e-07, "loss": 0.33952928, "memory(GiB)": 34.88, "step": 129880, "train_speed(iter/s)": 0.412069 }, { "acc": 0.93853683, "epoch": 3.516773617090407, "grad_norm": 5.4939727783203125, "learning_rate": 3.9468513441032354e-07, "loss": 0.43218794, "memory(GiB)": 34.88, "step": 129885, "train_speed(iter/s)": 0.41207 }, { "acc": 0.94152451, "epoch": 3.5169089973736227, "grad_norm": 23.86077880859375, "learning_rate": 3.9446751597968333e-07, "loss": 0.36996496, "memory(GiB)": 34.88, "step": 129890, "train_speed(iter/s)": 0.412071 }, { "acc": 0.94610138, "epoch": 3.517044377656838, "grad_norm": 7.099582672119141, "learning_rate": 3.9424995524865036e-07, "loss": 0.33176618, "memory(GiB)": 34.88, "step": 129895, "train_speed(iter/s)": 0.412072 }, { "acc": 0.9503273, "epoch": 3.5171797579400534, "grad_norm": 4.492325782775879, "learning_rate": 3.9403245221994983e-07, "loss": 0.27790666, "memory(GiB)": 34.88, "step": 129900, "train_speed(iter/s)": 0.412073 }, { "acc": 0.94957256, "epoch": 3.5173151382232692, "grad_norm": 9.701054573059082, "learning_rate": 3.9381500689630766e-07, "loss": 0.32498324, "memory(GiB)": 34.88, "step": 129905, "train_speed(iter/s)": 0.412073 }, { "acc": 0.94935541, "epoch": 3.5174505185064846, "grad_norm": 6.134478569030762, "learning_rate": 3.935976192804475e-07, "loss": 0.30092254, "memory(GiB)": 34.88, "step": 129910, "train_speed(iter/s)": 0.412074 }, { "acc": 0.95411072, "epoch": 3.5175858987897004, "grad_norm": 9.090738296508789, "learning_rate": 3.933802893750914e-07, "loss": 0.23684676, "memory(GiB)": 34.88, "step": 129915, "train_speed(iter/s)": 0.412075 }, { "acc": 0.95907917, "epoch": 3.5177212790729158, "grad_norm": 3.2911486625671387, "learning_rate": 3.93163017182963e-07, "loss": 0.25789087, "memory(GiB)": 34.88, "step": 129920, "train_speed(iter/s)": 0.412076 }, { "acc": 0.94887638, "epoch": 3.5178566593561316, "grad_norm": 10.693668365478516, "learning_rate": 3.9294580270678425e-07, "loss": 0.29198294, "memory(GiB)": 34.88, "step": 129925, "train_speed(iter/s)": 0.412077 }, { "acc": 0.94753351, "epoch": 3.517992039639347, "grad_norm": 5.435530185699463, "learning_rate": 3.927286459492749e-07, "loss": 0.31666675, "memory(GiB)": 34.88, "step": 129930, "train_speed(iter/s)": 0.412077 }, { "acc": 0.95626755, "epoch": 3.5181274199225623, "grad_norm": 5.3484625816345215, "learning_rate": 3.925115469131559e-07, "loss": 0.28449314, "memory(GiB)": 34.88, "step": 129935, "train_speed(iter/s)": 0.412078 }, { "acc": 0.94564219, "epoch": 3.518262800205778, "grad_norm": 9.534809112548828, "learning_rate": 3.92294505601147e-07, "loss": 0.34884167, "memory(GiB)": 34.88, "step": 129940, "train_speed(iter/s)": 0.412079 }, { "acc": 0.93885727, "epoch": 3.5183981804889934, "grad_norm": 16.009172439575195, "learning_rate": 3.92077522015968e-07, "loss": 0.41068087, "memory(GiB)": 34.88, "step": 129945, "train_speed(iter/s)": 0.41208 }, { "acc": 0.94838314, "epoch": 3.5185335607722092, "grad_norm": 17.022443771362305, "learning_rate": 3.918605961603348e-07, "loss": 0.31768341, "memory(GiB)": 34.88, "step": 129950, "train_speed(iter/s)": 0.41208 }, { "acc": 0.92387915, "epoch": 3.5186689410554246, "grad_norm": 5.008965492248535, "learning_rate": 3.916437280369666e-07, "loss": 0.44213314, "memory(GiB)": 34.88, "step": 129955, "train_speed(iter/s)": 0.412081 }, { "acc": 0.94987478, "epoch": 3.5188043213386404, "grad_norm": 2.2146804332733154, "learning_rate": 3.914269176485803e-07, "loss": 0.28422236, "memory(GiB)": 34.88, "step": 129960, "train_speed(iter/s)": 0.412082 }, { "acc": 0.94181042, "epoch": 3.5189397016218558, "grad_norm": 7.16120719909668, "learning_rate": 3.9121016499789034e-07, "loss": 0.34978118, "memory(GiB)": 34.88, "step": 129965, "train_speed(iter/s)": 0.412083 }, { "acc": 0.95817938, "epoch": 3.519075081905071, "grad_norm": 4.336300373077393, "learning_rate": 3.909934700876124e-07, "loss": 0.27972202, "memory(GiB)": 34.88, "step": 129970, "train_speed(iter/s)": 0.412084 }, { "acc": 0.94696627, "epoch": 3.519210462188287, "grad_norm": 4.779825210571289, "learning_rate": 3.907768329204619e-07, "loss": 0.31980414, "memory(GiB)": 34.88, "step": 129975, "train_speed(iter/s)": 0.412084 }, { "acc": 0.95429049, "epoch": 3.5193458424715027, "grad_norm": 12.31839656829834, "learning_rate": 3.905602534991531e-07, "loss": 0.28236585, "memory(GiB)": 34.88, "step": 129980, "train_speed(iter/s)": 0.412085 }, { "acc": 0.94486179, "epoch": 3.519481222754718, "grad_norm": 6.105069637298584, "learning_rate": 3.903437318263968e-07, "loss": 0.38162663, "memory(GiB)": 34.88, "step": 129985, "train_speed(iter/s)": 0.412086 }, { "acc": 0.95468597, "epoch": 3.5196166030379334, "grad_norm": 8.732015609741211, "learning_rate": 3.901272679049068e-07, "loss": 0.28880424, "memory(GiB)": 34.88, "step": 129990, "train_speed(iter/s)": 0.412087 }, { "acc": 0.92993088, "epoch": 3.5197519833211492, "grad_norm": 19.24172019958496, "learning_rate": 3.8991086173739494e-07, "loss": 0.46682925, "memory(GiB)": 34.88, "step": 129995, "train_speed(iter/s)": 0.412088 }, { "acc": 0.96409702, "epoch": 3.5198873636043646, "grad_norm": 6.688470363616943, "learning_rate": 3.896945133265723e-07, "loss": 0.22196174, "memory(GiB)": 34.88, "step": 130000, "train_speed(iter/s)": 0.412089 }, { "epoch": 3.5198873636043646, "eval_acc": 0.6262653310024197, "eval_loss": 1.249808430671692, "eval_runtime": 1301.9593, "eval_samples_per_second": 66.289, "eval_steps_per_second": 2.072, "step": 130000 }, { "acc": 0.95628757, "epoch": 3.52002274388758, "grad_norm": 5.637302875518799, "learning_rate": 3.894782226751479e-07, "loss": 0.29323249, "memory(GiB)": 34.88, "step": 130005, "train_speed(iter/s)": 0.410366 }, { "acc": 0.93512726, "epoch": 3.5201581241707958, "grad_norm": 11.890844345092773, "learning_rate": 3.892619897858322e-07, "loss": 0.33167448, "memory(GiB)": 34.88, "step": 130010, "train_speed(iter/s)": 0.410367 }, { "acc": 0.94415684, "epoch": 3.5202935044540116, "grad_norm": 2.9284257888793945, "learning_rate": 3.890458146613345e-07, "loss": 0.35563955, "memory(GiB)": 34.88, "step": 130015, "train_speed(iter/s)": 0.410368 }, { "acc": 0.94647961, "epoch": 3.520428884737227, "grad_norm": 24.181127548217773, "learning_rate": 3.888296973043605e-07, "loss": 0.37641406, "memory(GiB)": 34.88, "step": 130020, "train_speed(iter/s)": 0.410368 }, { "acc": 0.9548193, "epoch": 3.5205642650204423, "grad_norm": 7.563591480255127, "learning_rate": 3.8861363771761965e-07, "loss": 0.27068505, "memory(GiB)": 34.88, "step": 130025, "train_speed(iter/s)": 0.410369 }, { "acc": 0.95030699, "epoch": 3.520699645303658, "grad_norm": 11.190031051635742, "learning_rate": 3.883976359038171e-07, "loss": 0.3109616, "memory(GiB)": 34.88, "step": 130030, "train_speed(iter/s)": 0.41037 }, { "acc": 0.95275879, "epoch": 3.5208350255868734, "grad_norm": 5.241884231567383, "learning_rate": 3.881816918656611e-07, "loss": 0.28005137, "memory(GiB)": 34.88, "step": 130035, "train_speed(iter/s)": 0.410371 }, { "acc": 0.93742447, "epoch": 3.520970405870089, "grad_norm": 10.227766990661621, "learning_rate": 3.879658056058525e-07, "loss": 0.42974358, "memory(GiB)": 34.88, "step": 130040, "train_speed(iter/s)": 0.410372 }, { "acc": 0.9504549, "epoch": 3.5211057861533046, "grad_norm": 5.1053314208984375, "learning_rate": 3.877499771271005e-07, "loss": 0.2140713, "memory(GiB)": 34.88, "step": 130045, "train_speed(iter/s)": 0.410373 }, { "acc": 0.93253555, "epoch": 3.5212411664365204, "grad_norm": 4.23044490814209, "learning_rate": 3.8753420643210665e-07, "loss": 0.44259667, "memory(GiB)": 34.88, "step": 130050, "train_speed(iter/s)": 0.410374 }, { "acc": 0.95215912, "epoch": 3.5213765467197358, "grad_norm": 10.692912101745605, "learning_rate": 3.8731849352357287e-07, "loss": 0.23075962, "memory(GiB)": 34.88, "step": 130055, "train_speed(iter/s)": 0.410375 }, { "acc": 0.94718151, "epoch": 3.521511927002951, "grad_norm": 11.105345726013184, "learning_rate": 3.8710283840420237e-07, "loss": 0.34599314, "memory(GiB)": 34.88, "step": 130060, "train_speed(iter/s)": 0.410376 }, { "acc": 0.9413269, "epoch": 3.521647307286167, "grad_norm": 6.421913146972656, "learning_rate": 3.868872410766965e-07, "loss": 0.34730926, "memory(GiB)": 34.88, "step": 130065, "train_speed(iter/s)": 0.410377 }, { "acc": 0.94518709, "epoch": 3.5217826875693823, "grad_norm": 8.682241439819336, "learning_rate": 3.8667170154375723e-07, "loss": 0.33347917, "memory(GiB)": 34.88, "step": 130070, "train_speed(iter/s)": 0.410377 }, { "acc": 0.94845171, "epoch": 3.521918067852598, "grad_norm": 7.675830364227295, "learning_rate": 3.864562198080818e-07, "loss": 0.25671875, "memory(GiB)": 34.88, "step": 130075, "train_speed(iter/s)": 0.410378 }, { "acc": 0.93821068, "epoch": 3.5220534481358134, "grad_norm": 6.469554424285889, "learning_rate": 3.862407958723732e-07, "loss": 0.42132945, "memory(GiB)": 34.88, "step": 130080, "train_speed(iter/s)": 0.410379 }, { "acc": 0.96510353, "epoch": 3.5221888284190292, "grad_norm": 5.743704319000244, "learning_rate": 3.860254297393279e-07, "loss": 0.19201932, "memory(GiB)": 34.88, "step": 130085, "train_speed(iter/s)": 0.41038 }, { "acc": 0.9462471, "epoch": 3.5223242087022446, "grad_norm": 29.24121856689453, "learning_rate": 3.858101214116428e-07, "loss": 0.32325995, "memory(GiB)": 34.88, "step": 130090, "train_speed(iter/s)": 0.410381 }, { "acc": 0.93350782, "epoch": 3.52245958898546, "grad_norm": 11.585549354553223, "learning_rate": 3.855948708920169e-07, "loss": 0.41809931, "memory(GiB)": 34.88, "step": 130095, "train_speed(iter/s)": 0.410382 }, { "acc": 0.95153732, "epoch": 3.5225949692686758, "grad_norm": 14.852224349975586, "learning_rate": 3.853796781831457e-07, "loss": 0.29852664, "memory(GiB)": 34.88, "step": 130100, "train_speed(iter/s)": 0.410383 }, { "acc": 0.96444664, "epoch": 3.522730349551891, "grad_norm": 4.378923416137695, "learning_rate": 3.8516454328772603e-07, "loss": 0.19716096, "memory(GiB)": 34.88, "step": 130105, "train_speed(iter/s)": 0.410383 }, { "acc": 0.94505625, "epoch": 3.522865729835107, "grad_norm": 8.065563201904297, "learning_rate": 3.849494662084502e-07, "loss": 0.32166038, "memory(GiB)": 34.88, "step": 130110, "train_speed(iter/s)": 0.410384 }, { "acc": 0.93444052, "epoch": 3.5230011101183223, "grad_norm": 10.31828784942627, "learning_rate": 3.8473444694801644e-07, "loss": 0.37758217, "memory(GiB)": 34.88, "step": 130115, "train_speed(iter/s)": 0.410385 }, { "acc": 0.94814339, "epoch": 3.523136490401538, "grad_norm": 8.874617576599121, "learning_rate": 3.8451948550911523e-07, "loss": 0.33174675, "memory(GiB)": 34.88, "step": 130120, "train_speed(iter/s)": 0.410386 }, { "acc": 0.9415554, "epoch": 3.5232718706847534, "grad_norm": 7.224411487579346, "learning_rate": 3.843045818944411e-07, "loss": 0.32011166, "memory(GiB)": 34.88, "step": 130125, "train_speed(iter/s)": 0.410387 }, { "acc": 0.95104036, "epoch": 3.523407250967969, "grad_norm": 2.9624006748199463, "learning_rate": 3.8408973610668464e-07, "loss": 0.28466511, "memory(GiB)": 34.88, "step": 130130, "train_speed(iter/s)": 0.410388 }, { "acc": 0.93266888, "epoch": 3.5235426312511846, "grad_norm": 8.665911674499512, "learning_rate": 3.838749481485377e-07, "loss": 0.44267473, "memory(GiB)": 34.88, "step": 130135, "train_speed(iter/s)": 0.410389 }, { "acc": 0.95163364, "epoch": 3.5236780115344, "grad_norm": 4.442257881164551, "learning_rate": 3.8366021802269247e-07, "loss": 0.33742104, "memory(GiB)": 34.88, "step": 130140, "train_speed(iter/s)": 0.41039 }, { "acc": 0.93468933, "epoch": 3.5238133918176158, "grad_norm": 14.474640846252441, "learning_rate": 3.834455457318358e-07, "loss": 0.31511011, "memory(GiB)": 34.88, "step": 130145, "train_speed(iter/s)": 0.41039 }, { "acc": 0.94780073, "epoch": 3.523948772100831, "grad_norm": 7.063944339752197, "learning_rate": 3.832309312786604e-07, "loss": 0.33035483, "memory(GiB)": 34.88, "step": 130150, "train_speed(iter/s)": 0.410391 }, { "acc": 0.94811172, "epoch": 3.524084152384047, "grad_norm": 5.27464485168457, "learning_rate": 3.830163746658516e-07, "loss": 0.33241754, "memory(GiB)": 34.88, "step": 130155, "train_speed(iter/s)": 0.410392 }, { "acc": 0.94593887, "epoch": 3.5242195326672623, "grad_norm": 6.707154273986816, "learning_rate": 3.8280187589609975e-07, "loss": 0.31724234, "memory(GiB)": 34.88, "step": 130160, "train_speed(iter/s)": 0.410393 }, { "acc": 0.95374584, "epoch": 3.5243549129504776, "grad_norm": 11.120139122009277, "learning_rate": 3.8258743497208905e-07, "loss": 0.26948848, "memory(GiB)": 34.88, "step": 130165, "train_speed(iter/s)": 0.410394 }, { "acc": 0.94956055, "epoch": 3.5244902932336934, "grad_norm": 4.024688720703125, "learning_rate": 3.823730518965094e-07, "loss": 0.27004011, "memory(GiB)": 34.88, "step": 130170, "train_speed(iter/s)": 0.410395 }, { "acc": 0.94044409, "epoch": 3.5246256735169093, "grad_norm": 11.235701560974121, "learning_rate": 3.821587266720439e-07, "loss": 0.39672232, "memory(GiB)": 34.88, "step": 130175, "train_speed(iter/s)": 0.410396 }, { "acc": 0.95220718, "epoch": 3.5247610538001246, "grad_norm": 26.598241806030273, "learning_rate": 3.8194445930137614e-07, "loss": 0.28533258, "memory(GiB)": 34.88, "step": 130180, "train_speed(iter/s)": 0.410397 }, { "acc": 0.95888195, "epoch": 3.52489643408334, "grad_norm": 6.134530067443848, "learning_rate": 3.8173024978719395e-07, "loss": 0.25682127, "memory(GiB)": 34.88, "step": 130185, "train_speed(iter/s)": 0.410397 }, { "acc": 0.94138889, "epoch": 3.5250318143665558, "grad_norm": 15.98253059387207, "learning_rate": 3.81516098132178e-07, "loss": 0.42294221, "memory(GiB)": 34.88, "step": 130190, "train_speed(iter/s)": 0.410398 }, { "acc": 0.94684887, "epoch": 3.525167194649771, "grad_norm": 5.450379371643066, "learning_rate": 3.81302004339012e-07, "loss": 0.28944645, "memory(GiB)": 34.88, "step": 130195, "train_speed(iter/s)": 0.410399 }, { "acc": 0.94949017, "epoch": 3.5253025749329865, "grad_norm": 6.258266925811768, "learning_rate": 3.810879684103765e-07, "loss": 0.26528015, "memory(GiB)": 34.88, "step": 130200, "train_speed(iter/s)": 0.4104 }, { "acc": 0.94155083, "epoch": 3.5254379552162023, "grad_norm": 12.704645156860352, "learning_rate": 3.808739903489556e-07, "loss": 0.42017174, "memory(GiB)": 34.88, "step": 130205, "train_speed(iter/s)": 0.410401 }, { "acc": 0.94741936, "epoch": 3.525573335499418, "grad_norm": 7.175812244415283, "learning_rate": 3.806600701574275e-07, "loss": 0.32944393, "memory(GiB)": 34.88, "step": 130210, "train_speed(iter/s)": 0.410402 }, { "acc": 0.94204788, "epoch": 3.5257087157826335, "grad_norm": 6.366898536682129, "learning_rate": 3.804462078384709e-07, "loss": 0.34114532, "memory(GiB)": 34.88, "step": 130215, "train_speed(iter/s)": 0.410403 }, { "acc": 0.94819927, "epoch": 3.525844096065849, "grad_norm": 6.231875419616699, "learning_rate": 3.802324033947684e-07, "loss": 0.31027913, "memory(GiB)": 34.88, "step": 130220, "train_speed(iter/s)": 0.410404 }, { "acc": 0.93891735, "epoch": 3.5259794763490646, "grad_norm": 6.016944408416748, "learning_rate": 3.8001865682899536e-07, "loss": 0.38655477, "memory(GiB)": 34.88, "step": 130225, "train_speed(iter/s)": 0.410405 }, { "acc": 0.95372324, "epoch": 3.52611485663228, "grad_norm": 3.3325603008270264, "learning_rate": 3.7980496814383093e-07, "loss": 0.25305166, "memory(GiB)": 34.88, "step": 130230, "train_speed(iter/s)": 0.410406 }, { "acc": 0.95095634, "epoch": 3.5262502369154958, "grad_norm": 4.294942855834961, "learning_rate": 3.7959133734195007e-07, "loss": 0.30531869, "memory(GiB)": 34.88, "step": 130235, "train_speed(iter/s)": 0.410406 }, { "acc": 0.94101505, "epoch": 3.526385617198711, "grad_norm": 10.981186866760254, "learning_rate": 3.79377764426032e-07, "loss": 0.36655278, "memory(GiB)": 34.88, "step": 130240, "train_speed(iter/s)": 0.410407 }, { "acc": 0.93596487, "epoch": 3.526520997481927, "grad_norm": 6.356503486633301, "learning_rate": 3.791642493987493e-07, "loss": 0.3574842, "memory(GiB)": 34.88, "step": 130245, "train_speed(iter/s)": 0.410408 }, { "acc": 0.94846478, "epoch": 3.5266563777651423, "grad_norm": 5.833209037780762, "learning_rate": 3.7895079226277784e-07, "loss": 0.33459034, "memory(GiB)": 34.88, "step": 130250, "train_speed(iter/s)": 0.410409 }, { "acc": 0.95484295, "epoch": 3.5267917580483577, "grad_norm": 13.027678489685059, "learning_rate": 3.787373930207926e-07, "loss": 0.27576399, "memory(GiB)": 34.88, "step": 130255, "train_speed(iter/s)": 0.41041 }, { "acc": 0.95207424, "epoch": 3.5269271383315735, "grad_norm": 3.550894260406494, "learning_rate": 3.785240516754648e-07, "loss": 0.29694591, "memory(GiB)": 34.88, "step": 130260, "train_speed(iter/s)": 0.410411 }, { "acc": 0.94933205, "epoch": 3.527062518614789, "grad_norm": 4.872615337371826, "learning_rate": 3.7831076822946894e-07, "loss": 0.28454041, "memory(GiB)": 34.88, "step": 130265, "train_speed(iter/s)": 0.410412 }, { "acc": 0.95949554, "epoch": 3.5271978988980046, "grad_norm": 17.0034122467041, "learning_rate": 3.780975426854736e-07, "loss": 0.26442213, "memory(GiB)": 34.88, "step": 130270, "train_speed(iter/s)": 0.410413 }, { "acc": 0.95174847, "epoch": 3.52733327918122, "grad_norm": 7.820160865783691, "learning_rate": 3.778843750461542e-07, "loss": 0.34617739, "memory(GiB)": 34.88, "step": 130275, "train_speed(iter/s)": 0.410413 }, { "acc": 0.94089298, "epoch": 3.5274686594644358, "grad_norm": 13.967778205871582, "learning_rate": 3.776712653141784e-07, "loss": 0.34601092, "memory(GiB)": 34.88, "step": 130280, "train_speed(iter/s)": 0.410414 }, { "acc": 0.94832687, "epoch": 3.527604039747651, "grad_norm": 8.3260498046875, "learning_rate": 3.774582134922159e-07, "loss": 0.34727821, "memory(GiB)": 34.88, "step": 130285, "train_speed(iter/s)": 0.410415 }, { "acc": 0.9470932, "epoch": 3.5277394200308665, "grad_norm": 4.361698150634766, "learning_rate": 3.7724521958293595e-07, "loss": 0.30216172, "memory(GiB)": 34.88, "step": 130290, "train_speed(iter/s)": 0.410416 }, { "acc": 0.94419155, "epoch": 3.5278748003140823, "grad_norm": 8.749021530151367, "learning_rate": 3.7703228358900745e-07, "loss": 0.34764667, "memory(GiB)": 34.88, "step": 130295, "train_speed(iter/s)": 0.410417 }, { "acc": 0.95477123, "epoch": 3.5280101805972977, "grad_norm": 4.026037216186523, "learning_rate": 3.7681940551309775e-07, "loss": 0.28795393, "memory(GiB)": 34.88, "step": 130300, "train_speed(iter/s)": 0.410418 }, { "acc": 0.95780201, "epoch": 3.5281455608805135, "grad_norm": 12.565178871154785, "learning_rate": 3.7660658535787077e-07, "loss": 0.26217294, "memory(GiB)": 34.88, "step": 130305, "train_speed(iter/s)": 0.410418 }, { "acc": 0.95524712, "epoch": 3.528280941163729, "grad_norm": 3.203641891479492, "learning_rate": 3.763938231259968e-07, "loss": 0.23554957, "memory(GiB)": 34.88, "step": 130310, "train_speed(iter/s)": 0.410419 }, { "acc": 0.95188179, "epoch": 3.5284163214469446, "grad_norm": 4.388367176055908, "learning_rate": 3.761811188201378e-07, "loss": 0.30467196, "memory(GiB)": 34.88, "step": 130315, "train_speed(iter/s)": 0.41042 }, { "acc": 0.95672626, "epoch": 3.52855170173016, "grad_norm": 7.654115200042725, "learning_rate": 3.75968472442961e-07, "loss": 0.32183824, "memory(GiB)": 34.88, "step": 130320, "train_speed(iter/s)": 0.410421 }, { "acc": 0.94173021, "epoch": 3.5286870820133753, "grad_norm": 8.010971069335938, "learning_rate": 3.757558839971266e-07, "loss": 0.36464863, "memory(GiB)": 34.88, "step": 130325, "train_speed(iter/s)": 0.410422 }, { "acc": 0.94378357, "epoch": 3.528822462296591, "grad_norm": 12.883136749267578, "learning_rate": 3.7554335348530184e-07, "loss": 0.33235559, "memory(GiB)": 34.88, "step": 130330, "train_speed(iter/s)": 0.410423 }, { "acc": 0.94939451, "epoch": 3.528957842579807, "grad_norm": 5.914412021636963, "learning_rate": 3.7533088091014694e-07, "loss": 0.28637006, "memory(GiB)": 34.88, "step": 130335, "train_speed(iter/s)": 0.410424 }, { "acc": 0.95735836, "epoch": 3.5290932228630223, "grad_norm": 4.296498775482178, "learning_rate": 3.751184662743223e-07, "loss": 0.23595743, "memory(GiB)": 34.88, "step": 130340, "train_speed(iter/s)": 0.410425 }, { "acc": 0.96143856, "epoch": 3.5292286031462377, "grad_norm": 16.92483901977539, "learning_rate": 3.749061095804917e-07, "loss": 0.25643387, "memory(GiB)": 34.88, "step": 130345, "train_speed(iter/s)": 0.410425 }, { "acc": 0.95803728, "epoch": 3.5293639834294535, "grad_norm": 10.765920639038086, "learning_rate": 3.7469381083131274e-07, "loss": 0.31826825, "memory(GiB)": 34.88, "step": 130350, "train_speed(iter/s)": 0.410426 }, { "acc": 0.95417862, "epoch": 3.529499363712669, "grad_norm": 6.061670303344727, "learning_rate": 3.744815700294474e-07, "loss": 0.3144309, "memory(GiB)": 34.88, "step": 130355, "train_speed(iter/s)": 0.410427 }, { "acc": 0.95115128, "epoch": 3.529634743995884, "grad_norm": 3.853227376937866, "learning_rate": 3.742693871775506e-07, "loss": 0.26352293, "memory(GiB)": 34.88, "step": 130360, "train_speed(iter/s)": 0.410428 }, { "acc": 0.94768972, "epoch": 3.5297701242791, "grad_norm": 6.795712947845459, "learning_rate": 3.7405726227828486e-07, "loss": 0.34830828, "memory(GiB)": 34.88, "step": 130365, "train_speed(iter/s)": 0.410429 }, { "acc": 0.94844141, "epoch": 3.5299055045623158, "grad_norm": 13.142792701721191, "learning_rate": 3.7384519533430454e-07, "loss": 0.34632616, "memory(GiB)": 34.88, "step": 130370, "train_speed(iter/s)": 0.41043 }, { "acc": 0.94629383, "epoch": 3.530040884845531, "grad_norm": 3.6499106884002686, "learning_rate": 3.7363318634826724e-07, "loss": 0.37627921, "memory(GiB)": 34.88, "step": 130375, "train_speed(iter/s)": 0.410431 }, { "acc": 0.93650055, "epoch": 3.5301762651287465, "grad_norm": 9.060380935668945, "learning_rate": 3.7342123532282877e-07, "loss": 0.42975769, "memory(GiB)": 34.88, "step": 130380, "train_speed(iter/s)": 0.410432 }, { "acc": 0.95858126, "epoch": 3.5303116454119623, "grad_norm": 4.901467323303223, "learning_rate": 3.7320934226064357e-07, "loss": 0.2464036, "memory(GiB)": 34.88, "step": 130385, "train_speed(iter/s)": 0.410432 }, { "acc": 0.93553295, "epoch": 3.5304470256951777, "grad_norm": 18.99683952331543, "learning_rate": 3.729975071643675e-07, "loss": 0.47172918, "memory(GiB)": 34.88, "step": 130390, "train_speed(iter/s)": 0.410433 }, { "acc": 0.94596901, "epoch": 3.5305824059783935, "grad_norm": 10.771734237670898, "learning_rate": 3.727857300366511e-07, "loss": 0.34031038, "memory(GiB)": 34.88, "step": 130395, "train_speed(iter/s)": 0.410434 }, { "acc": 0.95165768, "epoch": 3.530717786261609, "grad_norm": 6.2441887855529785, "learning_rate": 3.7257401088015114e-07, "loss": 0.33367224, "memory(GiB)": 34.88, "step": 130400, "train_speed(iter/s)": 0.410435 }, { "acc": 0.94791679, "epoch": 3.5308531665448246, "grad_norm": 4.130560398101807, "learning_rate": 3.723623496975177e-07, "loss": 0.3275717, "memory(GiB)": 34.88, "step": 130405, "train_speed(iter/s)": 0.410436 }, { "acc": 0.9516077, "epoch": 3.53098854682804, "grad_norm": 18.427013397216797, "learning_rate": 3.7215074649140215e-07, "loss": 0.33145387, "memory(GiB)": 34.88, "step": 130410, "train_speed(iter/s)": 0.410437 }, { "acc": 0.9400423, "epoch": 3.5311239271112553, "grad_norm": 21.851829528808594, "learning_rate": 3.719392012644556e-07, "loss": 0.37397623, "memory(GiB)": 34.88, "step": 130415, "train_speed(iter/s)": 0.410438 }, { "acc": 0.9361578, "epoch": 3.531259307394471, "grad_norm": 11.171866416931152, "learning_rate": 3.7172771401932927e-07, "loss": 0.33864455, "memory(GiB)": 34.88, "step": 130420, "train_speed(iter/s)": 0.410439 }, { "acc": 0.93861389, "epoch": 3.5313946876776865, "grad_norm": 11.506780624389648, "learning_rate": 3.7151628475867105e-07, "loss": 0.3921196, "memory(GiB)": 34.88, "step": 130425, "train_speed(iter/s)": 0.41044 }, { "acc": 0.95081711, "epoch": 3.5315300679609023, "grad_norm": 74.20008850097656, "learning_rate": 3.713049134851284e-07, "loss": 0.34903796, "memory(GiB)": 34.88, "step": 130430, "train_speed(iter/s)": 0.41044 }, { "acc": 0.95143433, "epoch": 3.5316654482441177, "grad_norm": 5.038169860839844, "learning_rate": 3.710936002013518e-07, "loss": 0.30600824, "memory(GiB)": 34.88, "step": 130435, "train_speed(iter/s)": 0.410441 }, { "acc": 0.9463213, "epoch": 3.5318008285273335, "grad_norm": 7.076160430908203, "learning_rate": 3.7088234490998664e-07, "loss": 0.34252672, "memory(GiB)": 34.88, "step": 130440, "train_speed(iter/s)": 0.410442 }, { "acc": 0.95512905, "epoch": 3.531936208810549, "grad_norm": 3.2907731533050537, "learning_rate": 3.706711476136799e-07, "loss": 0.26018095, "memory(GiB)": 34.88, "step": 130445, "train_speed(iter/s)": 0.410443 }, { "acc": 0.9541378, "epoch": 3.532071589093764, "grad_norm": 9.593804359436035, "learning_rate": 3.7046000831507706e-07, "loss": 0.25687351, "memory(GiB)": 34.88, "step": 130450, "train_speed(iter/s)": 0.410444 }, { "acc": 0.94262304, "epoch": 3.53220696937698, "grad_norm": 8.062409400939941, "learning_rate": 3.702489270168241e-07, "loss": 0.39768608, "memory(GiB)": 34.88, "step": 130455, "train_speed(iter/s)": 0.410445 }, { "acc": 0.94139681, "epoch": 3.5323423496601953, "grad_norm": 6.013377666473389, "learning_rate": 3.700379037215641e-07, "loss": 0.37314668, "memory(GiB)": 34.88, "step": 130460, "train_speed(iter/s)": 0.410445 }, { "acc": 0.95382423, "epoch": 3.532477729943411, "grad_norm": 11.955140113830566, "learning_rate": 3.6982693843193856e-07, "loss": 0.25620856, "memory(GiB)": 34.88, "step": 130465, "train_speed(iter/s)": 0.410446 }, { "acc": 0.94404621, "epoch": 3.5326131102266265, "grad_norm": 3.788391590118408, "learning_rate": 3.696160311505946e-07, "loss": 0.31400244, "memory(GiB)": 34.88, "step": 130470, "train_speed(iter/s)": 0.410447 }, { "acc": 0.95859728, "epoch": 3.5327484905098423, "grad_norm": 5.665482521057129, "learning_rate": 3.6940518188017095e-07, "loss": 0.20971994, "memory(GiB)": 34.88, "step": 130475, "train_speed(iter/s)": 0.410448 }, { "acc": 0.95917521, "epoch": 3.5328838707930577, "grad_norm": 9.911659240722656, "learning_rate": 3.691943906233103e-07, "loss": 0.22590961, "memory(GiB)": 34.88, "step": 130480, "train_speed(iter/s)": 0.410449 }, { "acc": 0.95036411, "epoch": 3.533019251076273, "grad_norm": 5.857234477996826, "learning_rate": 3.6898365738265237e-07, "loss": 0.27583475, "memory(GiB)": 34.88, "step": 130485, "train_speed(iter/s)": 0.41045 }, { "acc": 0.95233545, "epoch": 3.533154631359489, "grad_norm": 4.728286266326904, "learning_rate": 3.687729821608388e-07, "loss": 0.28756037, "memory(GiB)": 34.88, "step": 130490, "train_speed(iter/s)": 0.410451 }, { "acc": 0.9428277, "epoch": 3.5332900116427046, "grad_norm": 5.731846332550049, "learning_rate": 3.6856236496050647e-07, "loss": 0.3751513, "memory(GiB)": 34.88, "step": 130495, "train_speed(iter/s)": 0.410452 }, { "acc": 0.93928318, "epoch": 3.53342539192592, "grad_norm": 9.476624488830566, "learning_rate": 3.683518057842949e-07, "loss": 0.37352269, "memory(GiB)": 34.88, "step": 130500, "train_speed(iter/s)": 0.410452 }, { "acc": 0.96375437, "epoch": 3.5335607722091353, "grad_norm": 3.299970865249634, "learning_rate": 3.681413046348421e-07, "loss": 0.17243501, "memory(GiB)": 34.88, "step": 130505, "train_speed(iter/s)": 0.410453 }, { "acc": 0.95807209, "epoch": 3.533696152492351, "grad_norm": 4.123268127441406, "learning_rate": 3.679308615147841e-07, "loss": 0.21925457, "memory(GiB)": 34.88, "step": 130510, "train_speed(iter/s)": 0.410454 }, { "acc": 0.94276514, "epoch": 3.5338315327755665, "grad_norm": 4.316213607788086, "learning_rate": 3.677204764267574e-07, "loss": 0.36705692, "memory(GiB)": 34.88, "step": 130515, "train_speed(iter/s)": 0.410455 }, { "acc": 0.95386362, "epoch": 3.533966913058782, "grad_norm": 8.496125221252441, "learning_rate": 3.675101493733975e-07, "loss": 0.25548189, "memory(GiB)": 34.88, "step": 130520, "train_speed(iter/s)": 0.410456 }, { "acc": 0.9537653, "epoch": 3.5341022933419977, "grad_norm": 3.998871088027954, "learning_rate": 3.672998803573402e-07, "loss": 0.2589422, "memory(GiB)": 34.88, "step": 130525, "train_speed(iter/s)": 0.410457 }, { "acc": 0.95748787, "epoch": 3.5342376736252135, "grad_norm": 4.057142734527588, "learning_rate": 3.670896693812182e-07, "loss": 0.23895535, "memory(GiB)": 34.88, "step": 130530, "train_speed(iter/s)": 0.410457 }, { "acc": 0.95217094, "epoch": 3.534373053908429, "grad_norm": 10.99052619934082, "learning_rate": 3.6687951644766475e-07, "loss": 0.28312333, "memory(GiB)": 34.88, "step": 130535, "train_speed(iter/s)": 0.410458 }, { "acc": 0.94267569, "epoch": 3.534508434191644, "grad_norm": 5.639097213745117, "learning_rate": 3.666694215593129e-07, "loss": 0.37120967, "memory(GiB)": 34.88, "step": 130540, "train_speed(iter/s)": 0.410459 }, { "acc": 0.95197029, "epoch": 3.53464381447486, "grad_norm": 11.419349670410156, "learning_rate": 3.664593847187954e-07, "loss": 0.27463784, "memory(GiB)": 34.88, "step": 130545, "train_speed(iter/s)": 0.41046 }, { "acc": 0.94410782, "epoch": 3.5347791947580753, "grad_norm": 6.372711658477783, "learning_rate": 3.6624940592874156e-07, "loss": 0.35631328, "memory(GiB)": 34.88, "step": 130550, "train_speed(iter/s)": 0.410461 }, { "acc": 0.94707699, "epoch": 3.534914575041291, "grad_norm": 7.079999923706055, "learning_rate": 3.6603948519178276e-07, "loss": 0.31689749, "memory(GiB)": 34.88, "step": 130555, "train_speed(iter/s)": 0.410462 }, { "acc": 0.93359308, "epoch": 3.5350499553245065, "grad_norm": 18.846742630004883, "learning_rate": 3.6582962251054963e-07, "loss": 0.43663392, "memory(GiB)": 34.88, "step": 130560, "train_speed(iter/s)": 0.410462 }, { "acc": 0.9663682, "epoch": 3.5351853356077223, "grad_norm": 14.557875633239746, "learning_rate": 3.65619817887669e-07, "loss": 0.21994214, "memory(GiB)": 34.88, "step": 130565, "train_speed(iter/s)": 0.410463 }, { "acc": 0.95771255, "epoch": 3.5353207158909377, "grad_norm": 2.8987393379211426, "learning_rate": 3.654100713257703e-07, "loss": 0.30444884, "memory(GiB)": 34.88, "step": 130570, "train_speed(iter/s)": 0.410464 }, { "acc": 0.9466198, "epoch": 3.535456096174153, "grad_norm": 5.65756893157959, "learning_rate": 3.652003828274812e-07, "loss": 0.31370873, "memory(GiB)": 34.88, "step": 130575, "train_speed(iter/s)": 0.410465 }, { "acc": 0.94915371, "epoch": 3.535591476457369, "grad_norm": 9.346692085266113, "learning_rate": 3.649907523954287e-07, "loss": 0.31469202, "memory(GiB)": 34.88, "step": 130580, "train_speed(iter/s)": 0.410466 }, { "acc": 0.95929947, "epoch": 3.535726856740584, "grad_norm": 5.1586809158325195, "learning_rate": 3.647811800322372e-07, "loss": 0.2542788, "memory(GiB)": 34.88, "step": 130585, "train_speed(iter/s)": 0.410467 }, { "acc": 0.948769, "epoch": 3.5358622370238, "grad_norm": 6.527214050292969, "learning_rate": 3.6457166574053363e-07, "loss": 0.35868869, "memory(GiB)": 34.88, "step": 130590, "train_speed(iter/s)": 0.410467 }, { "acc": 0.94698038, "epoch": 3.5359976173070153, "grad_norm": 5.580605506896973, "learning_rate": 3.6436220952294245e-07, "loss": 0.22628996, "memory(GiB)": 34.88, "step": 130595, "train_speed(iter/s)": 0.410468 }, { "acc": 0.9576066, "epoch": 3.536132997590231, "grad_norm": 4.185853004455566, "learning_rate": 3.641528113820862e-07, "loss": 0.23033304, "memory(GiB)": 34.88, "step": 130600, "train_speed(iter/s)": 0.410469 }, { "acc": 0.94462729, "epoch": 3.5362683778734465, "grad_norm": 6.315727710723877, "learning_rate": 3.6394347132058927e-07, "loss": 0.36177745, "memory(GiB)": 34.88, "step": 130605, "train_speed(iter/s)": 0.41047 }, { "acc": 0.94603291, "epoch": 3.536403758156662, "grad_norm": 12.795543670654297, "learning_rate": 3.637341893410731e-07, "loss": 0.38600438, "memory(GiB)": 34.88, "step": 130610, "train_speed(iter/s)": 0.410471 }, { "acc": 0.95966816, "epoch": 3.5365391384398777, "grad_norm": 6.575623512268066, "learning_rate": 3.635249654461615e-07, "loss": 0.24061651, "memory(GiB)": 34.88, "step": 130615, "train_speed(iter/s)": 0.410472 }, { "acc": 0.93335381, "epoch": 3.536674518723093, "grad_norm": 10.131248474121094, "learning_rate": 3.6331579963847264e-07, "loss": 0.40835786, "memory(GiB)": 34.88, "step": 130620, "train_speed(iter/s)": 0.410473 }, { "acc": 0.95941944, "epoch": 3.536809899006309, "grad_norm": 6.282785892486572, "learning_rate": 3.631066919206275e-07, "loss": 0.23396223, "memory(GiB)": 34.88, "step": 130625, "train_speed(iter/s)": 0.410473 }, { "acc": 0.92773123, "epoch": 3.536945279289524, "grad_norm": 42.215877532958984, "learning_rate": 3.628976422952476e-07, "loss": 0.50869274, "memory(GiB)": 34.88, "step": 130630, "train_speed(iter/s)": 0.410474 }, { "acc": 0.96568718, "epoch": 3.53708065957274, "grad_norm": 2.8278253078460693, "learning_rate": 3.6268865076494894e-07, "loss": 0.22431674, "memory(GiB)": 34.88, "step": 130635, "train_speed(iter/s)": 0.410475 }, { "acc": 0.95371599, "epoch": 3.5372160398559553, "grad_norm": 10.823539733886719, "learning_rate": 3.6247971733235083e-07, "loss": 0.25200312, "memory(GiB)": 34.88, "step": 130640, "train_speed(iter/s)": 0.410476 }, { "acc": 0.94825077, "epoch": 3.5373514201391707, "grad_norm": 5.494686603546143, "learning_rate": 3.6227084200007037e-07, "loss": 0.34664111, "memory(GiB)": 34.88, "step": 130645, "train_speed(iter/s)": 0.410477 }, { "acc": 0.9527091, "epoch": 3.5374868004223865, "grad_norm": 4.896653175354004, "learning_rate": 3.6206202477072463e-07, "loss": 0.31680486, "memory(GiB)": 34.88, "step": 130650, "train_speed(iter/s)": 0.410478 }, { "acc": 0.95688744, "epoch": 3.5376221807056023, "grad_norm": 16.878520965576172, "learning_rate": 3.6185326564692894e-07, "loss": 0.2334775, "memory(GiB)": 34.88, "step": 130655, "train_speed(iter/s)": 0.410479 }, { "acc": 0.94275837, "epoch": 3.5377575609888177, "grad_norm": 6.444790840148926, "learning_rate": 3.6164456463129777e-07, "loss": 0.31843309, "memory(GiB)": 34.88, "step": 130660, "train_speed(iter/s)": 0.41048 }, { "acc": 0.94596081, "epoch": 3.537892941272033, "grad_norm": 5.087294578552246, "learning_rate": 3.61435921726447e-07, "loss": 0.37527282, "memory(GiB)": 34.88, "step": 130665, "train_speed(iter/s)": 0.410481 }, { "acc": 0.95132027, "epoch": 3.538028321555249, "grad_norm": 2.891371965408325, "learning_rate": 3.6122733693498994e-07, "loss": 0.30590959, "memory(GiB)": 34.88, "step": 130670, "train_speed(iter/s)": 0.410481 }, { "acc": 0.95012903, "epoch": 3.538163701838464, "grad_norm": 8.493340492248535, "learning_rate": 3.6101881025953863e-07, "loss": 0.32264946, "memory(GiB)": 34.88, "step": 130675, "train_speed(iter/s)": 0.410482 }, { "acc": 0.9378315, "epoch": 3.5382990821216795, "grad_norm": 5.217098712921143, "learning_rate": 3.608103417027056e-07, "loss": 0.41632214, "memory(GiB)": 34.88, "step": 130680, "train_speed(iter/s)": 0.410483 }, { "acc": 0.94707117, "epoch": 3.5384344624048953, "grad_norm": 10.588610649108887, "learning_rate": 3.606019312671031e-07, "loss": 0.29801459, "memory(GiB)": 34.88, "step": 130685, "train_speed(iter/s)": 0.410484 }, { "acc": 0.9394455, "epoch": 3.538569842688111, "grad_norm": 4.048828125, "learning_rate": 3.6039357895534095e-07, "loss": 0.34320455, "memory(GiB)": 34.88, "step": 130690, "train_speed(iter/s)": 0.410485 }, { "acc": 0.93956633, "epoch": 3.5387052229713265, "grad_norm": 5.1606903076171875, "learning_rate": 3.60185284770029e-07, "loss": 0.33828545, "memory(GiB)": 34.88, "step": 130695, "train_speed(iter/s)": 0.410486 }, { "acc": 0.95415497, "epoch": 3.538840603254542, "grad_norm": 12.683572769165039, "learning_rate": 3.5997704871377715e-07, "loss": 0.31075635, "memory(GiB)": 34.88, "step": 130700, "train_speed(iter/s)": 0.410486 }, { "acc": 0.96027832, "epoch": 3.5389759835377577, "grad_norm": 8.416936874389648, "learning_rate": 3.5976887078919524e-07, "loss": 0.22074585, "memory(GiB)": 34.88, "step": 130705, "train_speed(iter/s)": 0.410487 }, { "acc": 0.93101597, "epoch": 3.539111363820973, "grad_norm": 6.435591220855713, "learning_rate": 3.595607509988877e-07, "loss": 0.39575746, "memory(GiB)": 34.88, "step": 130710, "train_speed(iter/s)": 0.410488 }, { "acc": 0.95251093, "epoch": 3.539246744104189, "grad_norm": 6.006304740905762, "learning_rate": 3.593526893454654e-07, "loss": 0.24522109, "memory(GiB)": 34.88, "step": 130715, "train_speed(iter/s)": 0.410489 }, { "acc": 0.95423365, "epoch": 3.539382124387404, "grad_norm": 4.066662311553955, "learning_rate": 3.591446858315327e-07, "loss": 0.25300024, "memory(GiB)": 34.88, "step": 130720, "train_speed(iter/s)": 0.41049 }, { "acc": 0.95022125, "epoch": 3.53951750467062, "grad_norm": 6.795004844665527, "learning_rate": 3.5893674045969493e-07, "loss": 0.26172962, "memory(GiB)": 34.88, "step": 130725, "train_speed(iter/s)": 0.410491 }, { "acc": 0.94811039, "epoch": 3.5396528849538353, "grad_norm": 7.569909572601318, "learning_rate": 3.587288532325577e-07, "loss": 0.33008919, "memory(GiB)": 34.88, "step": 130730, "train_speed(iter/s)": 0.410491 }, { "acc": 0.94179211, "epoch": 3.5397882652370507, "grad_norm": 7.509585857391357, "learning_rate": 3.585210241527253e-07, "loss": 0.36183584, "memory(GiB)": 34.88, "step": 130735, "train_speed(iter/s)": 0.410492 }, { "acc": 0.94944572, "epoch": 3.5399236455202665, "grad_norm": 9.843375205993652, "learning_rate": 3.5831325322280194e-07, "loss": 0.24828146, "memory(GiB)": 34.88, "step": 130740, "train_speed(iter/s)": 0.410493 }, { "acc": 0.93604507, "epoch": 3.540059025803482, "grad_norm": 3.2664926052093506, "learning_rate": 3.5810554044538765e-07, "loss": 0.41917982, "memory(GiB)": 34.88, "step": 130745, "train_speed(iter/s)": 0.410494 }, { "acc": 0.94687443, "epoch": 3.5401944060866977, "grad_norm": 7.1184587478637695, "learning_rate": 3.578978858230877e-07, "loss": 0.33398273, "memory(GiB)": 34.88, "step": 130750, "train_speed(iter/s)": 0.410495 }, { "acc": 0.94932575, "epoch": 3.540329786369913, "grad_norm": 9.86335277557373, "learning_rate": 3.5769028935850215e-07, "loss": 0.3211858, "memory(GiB)": 34.88, "step": 130755, "train_speed(iter/s)": 0.410496 }, { "acc": 0.95642109, "epoch": 3.540465166653129, "grad_norm": 5.049886703491211, "learning_rate": 3.5748275105423014e-07, "loss": 0.25855966, "memory(GiB)": 34.88, "step": 130760, "train_speed(iter/s)": 0.410497 }, { "acc": 0.94135933, "epoch": 3.540600546936344, "grad_norm": 20.392236709594727, "learning_rate": 3.572752709128734e-07, "loss": 0.42368631, "memory(GiB)": 34.88, "step": 130765, "train_speed(iter/s)": 0.410498 }, { "acc": 0.94794741, "epoch": 3.5407359272195595, "grad_norm": 11.0379638671875, "learning_rate": 3.570678489370294e-07, "loss": 0.27531672, "memory(GiB)": 34.88, "step": 130770, "train_speed(iter/s)": 0.410498 }, { "acc": 0.9596838, "epoch": 3.5408713075027753, "grad_norm": 9.011613845825195, "learning_rate": 3.568604851292987e-07, "loss": 0.24883766, "memory(GiB)": 34.88, "step": 130775, "train_speed(iter/s)": 0.410499 }, { "acc": 0.94223881, "epoch": 3.5410066877859907, "grad_norm": 7.91654634475708, "learning_rate": 3.5665317949227617e-07, "loss": 0.32552266, "memory(GiB)": 34.88, "step": 130780, "train_speed(iter/s)": 0.4105 }, { "acc": 0.96194582, "epoch": 3.5411420680692065, "grad_norm": 5.113462448120117, "learning_rate": 3.5644593202856175e-07, "loss": 0.24025152, "memory(GiB)": 34.88, "step": 130785, "train_speed(iter/s)": 0.410501 }, { "acc": 0.94549198, "epoch": 3.541277448352422, "grad_norm": 18.19109535217285, "learning_rate": 3.562387427407491e-07, "loss": 0.3497241, "memory(GiB)": 34.88, "step": 130790, "train_speed(iter/s)": 0.410502 }, { "acc": 0.94936562, "epoch": 3.5414128286356377, "grad_norm": 5.166660785675049, "learning_rate": 3.560316116314359e-07, "loss": 0.34034843, "memory(GiB)": 34.88, "step": 130795, "train_speed(iter/s)": 0.410503 }, { "acc": 0.93793392, "epoch": 3.541548208918853, "grad_norm": 8.263921737670898, "learning_rate": 3.5582453870321434e-07, "loss": 0.34700718, "memory(GiB)": 34.88, "step": 130800, "train_speed(iter/s)": 0.410504 }, { "acc": 0.95103168, "epoch": 3.5416835892020684, "grad_norm": 3.8359789848327637, "learning_rate": 3.556175239586799e-07, "loss": 0.2608139, "memory(GiB)": 34.88, "step": 130805, "train_speed(iter/s)": 0.410504 }, { "acc": 0.95801287, "epoch": 3.541818969485284, "grad_norm": 5.516379356384277, "learning_rate": 3.5541056740042675e-07, "loss": 0.28309629, "memory(GiB)": 34.88, "step": 130810, "train_speed(iter/s)": 0.410505 }, { "acc": 0.94597511, "epoch": 3.5419543497685, "grad_norm": 6.076868534088135, "learning_rate": 3.552036690310443e-07, "loss": 0.31925976, "memory(GiB)": 34.88, "step": 130815, "train_speed(iter/s)": 0.410506 }, { "acc": 0.95018721, "epoch": 3.5420897300517153, "grad_norm": 5.206750869750977, "learning_rate": 3.54996828853128e-07, "loss": 0.26786036, "memory(GiB)": 34.88, "step": 130820, "train_speed(iter/s)": 0.410507 }, { "acc": 0.94789028, "epoch": 3.5422251103349307, "grad_norm": 8.672006607055664, "learning_rate": 3.5479004686926663e-07, "loss": 0.29292979, "memory(GiB)": 34.88, "step": 130825, "train_speed(iter/s)": 0.410508 }, { "acc": 0.94497175, "epoch": 3.5423604906181465, "grad_norm": 5.269503593444824, "learning_rate": 3.5458332308205184e-07, "loss": 0.32163625, "memory(GiB)": 34.88, "step": 130830, "train_speed(iter/s)": 0.410509 }, { "acc": 0.95599995, "epoch": 3.542495870901362, "grad_norm": 5.094122409820557, "learning_rate": 3.5437665749407117e-07, "loss": 0.23875551, "memory(GiB)": 34.88, "step": 130835, "train_speed(iter/s)": 0.41051 }, { "acc": 0.96019402, "epoch": 3.5426312511845772, "grad_norm": 2.926074266433716, "learning_rate": 3.541700501079167e-07, "loss": 0.18380547, "memory(GiB)": 34.88, "step": 130840, "train_speed(iter/s)": 0.410511 }, { "acc": 0.95603933, "epoch": 3.542766631467793, "grad_norm": 4.959385395050049, "learning_rate": 3.539635009261751e-07, "loss": 0.251019, "memory(GiB)": 34.88, "step": 130845, "train_speed(iter/s)": 0.410512 }, { "acc": 0.96342888, "epoch": 3.542902011751009, "grad_norm": 4.059799671173096, "learning_rate": 3.5375700995143126e-07, "loss": 0.19849685, "memory(GiB)": 34.88, "step": 130850, "train_speed(iter/s)": 0.410512 }, { "acc": 0.94997568, "epoch": 3.543037392034224, "grad_norm": 10.986922264099121, "learning_rate": 3.5355057718627617e-07, "loss": 0.2671917, "memory(GiB)": 34.88, "step": 130855, "train_speed(iter/s)": 0.410513 }, { "acc": 0.94691687, "epoch": 3.5431727723174395, "grad_norm": 5.670248985290527, "learning_rate": 3.5334420263329346e-07, "loss": 0.26191707, "memory(GiB)": 34.88, "step": 130860, "train_speed(iter/s)": 0.410514 }, { "acc": 0.93565865, "epoch": 3.5433081526006553, "grad_norm": 10.854593276977539, "learning_rate": 3.5313788629506886e-07, "loss": 0.37610676, "memory(GiB)": 34.88, "step": 130865, "train_speed(iter/s)": 0.410515 }, { "acc": 0.9442421, "epoch": 3.5434435328838707, "grad_norm": 21.991912841796875, "learning_rate": 3.5293162817418534e-07, "loss": 0.30945134, "memory(GiB)": 34.88, "step": 130870, "train_speed(iter/s)": 0.410516 }, { "acc": 0.94951267, "epoch": 3.5435789131670865, "grad_norm": 6.995468616485596, "learning_rate": 3.527254282732302e-07, "loss": 0.3246541, "memory(GiB)": 34.88, "step": 130875, "train_speed(iter/s)": 0.410517 }, { "acc": 0.94743719, "epoch": 3.543714293450302, "grad_norm": 7.367313861846924, "learning_rate": 3.525192865947844e-07, "loss": 0.34639218, "memory(GiB)": 34.88, "step": 130880, "train_speed(iter/s)": 0.410518 }, { "acc": 0.93779507, "epoch": 3.5438496737335177, "grad_norm": 7.030035018920898, "learning_rate": 3.523132031414289e-07, "loss": 0.32788796, "memory(GiB)": 34.88, "step": 130885, "train_speed(iter/s)": 0.410519 }, { "acc": 0.95340405, "epoch": 3.543985054016733, "grad_norm": 4.605922698974609, "learning_rate": 3.5210717791574693e-07, "loss": 0.30405698, "memory(GiB)": 34.88, "step": 130890, "train_speed(iter/s)": 0.41052 }, { "acc": 0.95924139, "epoch": 3.5441204342999484, "grad_norm": 5.896276473999023, "learning_rate": 3.519012109203196e-07, "loss": 0.26917124, "memory(GiB)": 34.88, "step": 130895, "train_speed(iter/s)": 0.410521 }, { "acc": 0.94444237, "epoch": 3.544255814583164, "grad_norm": 10.32396125793457, "learning_rate": 3.516953021577267e-07, "loss": 0.42042098, "memory(GiB)": 34.88, "step": 130900, "train_speed(iter/s)": 0.410521 }, { "acc": 0.94944963, "epoch": 3.5443911948663795, "grad_norm": 2.6963934898376465, "learning_rate": 3.5148945163054657e-07, "loss": 0.38096523, "memory(GiB)": 34.88, "step": 130905, "train_speed(iter/s)": 0.410522 }, { "acc": 0.94883766, "epoch": 3.5445265751495953, "grad_norm": 9.023978233337402, "learning_rate": 3.512836593413601e-07, "loss": 0.33162446, "memory(GiB)": 34.88, "step": 130910, "train_speed(iter/s)": 0.410523 }, { "acc": 0.9535305, "epoch": 3.5446619554328107, "grad_norm": 19.109031677246094, "learning_rate": 3.510779252927429e-07, "loss": 0.31942391, "memory(GiB)": 34.88, "step": 130915, "train_speed(iter/s)": 0.410524 }, { "acc": 0.95091972, "epoch": 3.5447973357160265, "grad_norm": 2.7715003490448, "learning_rate": 3.508722494872747e-07, "loss": 0.32510033, "memory(GiB)": 34.88, "step": 130920, "train_speed(iter/s)": 0.410525 }, { "acc": 0.94780655, "epoch": 3.544932715999242, "grad_norm": 8.921701431274414, "learning_rate": 3.506666319275293e-07, "loss": 0.35739045, "memory(GiB)": 34.88, "step": 130925, "train_speed(iter/s)": 0.410526 }, { "acc": 0.95601196, "epoch": 3.5450680962824572, "grad_norm": 4.291359901428223, "learning_rate": 3.5046107261608407e-07, "loss": 0.28448315, "memory(GiB)": 34.88, "step": 130930, "train_speed(iter/s)": 0.410527 }, { "acc": 0.96753263, "epoch": 3.545203476565673, "grad_norm": 12.186824798583984, "learning_rate": 3.5025557155551416e-07, "loss": 0.18650786, "memory(GiB)": 34.88, "step": 130935, "train_speed(iter/s)": 0.410528 }, { "acc": 0.94059076, "epoch": 3.5453388568488884, "grad_norm": 4.459172248840332, "learning_rate": 3.5005012874839186e-07, "loss": 0.35538342, "memory(GiB)": 34.88, "step": 130940, "train_speed(iter/s)": 0.410528 }, { "acc": 0.96323738, "epoch": 3.545474237132104, "grad_norm": 7.770122528076172, "learning_rate": 3.4984474419729437e-07, "loss": 0.2198211, "memory(GiB)": 34.88, "step": 130945, "train_speed(iter/s)": 0.410529 }, { "acc": 0.94653854, "epoch": 3.5456096174153195, "grad_norm": 9.213571548461914, "learning_rate": 3.4963941790479144e-07, "loss": 0.37065451, "memory(GiB)": 34.88, "step": 130950, "train_speed(iter/s)": 0.41053 }, { "acc": 0.92170238, "epoch": 3.5457449976985354, "grad_norm": 15.574728965759277, "learning_rate": 3.4943414987345687e-07, "loss": 0.51140518, "memory(GiB)": 34.88, "step": 130955, "train_speed(iter/s)": 0.410531 }, { "acc": 0.9517643, "epoch": 3.5458803779817507, "grad_norm": 10.027798652648926, "learning_rate": 3.492289401058596e-07, "loss": 0.3387563, "memory(GiB)": 34.88, "step": 130960, "train_speed(iter/s)": 0.410532 }, { "acc": 0.95130291, "epoch": 3.546015758264966, "grad_norm": 7.624870300292969, "learning_rate": 3.4902378860457386e-07, "loss": 0.24910388, "memory(GiB)": 34.88, "step": 130965, "train_speed(iter/s)": 0.410533 }, { "acc": 0.9448451, "epoch": 3.546151138548182, "grad_norm": 10.066505432128906, "learning_rate": 3.4881869537216796e-07, "loss": 0.34318786, "memory(GiB)": 34.88, "step": 130970, "train_speed(iter/s)": 0.410534 }, { "acc": 0.96073265, "epoch": 3.5462865188313977, "grad_norm": 6.1287617683410645, "learning_rate": 3.4861366041120955e-07, "loss": 0.21796176, "memory(GiB)": 34.88, "step": 130975, "train_speed(iter/s)": 0.410535 }, { "acc": 0.94358702, "epoch": 3.546421899114613, "grad_norm": 11.99412727355957, "learning_rate": 3.484086837242696e-07, "loss": 0.38301535, "memory(GiB)": 34.88, "step": 130980, "train_speed(iter/s)": 0.410535 }, { "acc": 0.95105429, "epoch": 3.5465572793978284, "grad_norm": 4.286064624786377, "learning_rate": 3.482037653139143e-07, "loss": 0.26051526, "memory(GiB)": 34.88, "step": 130985, "train_speed(iter/s)": 0.410536 }, { "acc": 0.94391661, "epoch": 3.546692659681044, "grad_norm": 6.972462177276611, "learning_rate": 3.479989051827117e-07, "loss": 0.35440917, "memory(GiB)": 34.88, "step": 130990, "train_speed(iter/s)": 0.410537 }, { "acc": 0.94323788, "epoch": 3.5468280399642595, "grad_norm": 7.70892333984375, "learning_rate": 3.4779410333322634e-07, "loss": 0.31656742, "memory(GiB)": 34.88, "step": 130995, "train_speed(iter/s)": 0.410538 }, { "acc": 0.94925032, "epoch": 3.546963420247475, "grad_norm": 6.4095306396484375, "learning_rate": 3.475893597680257e-07, "loss": 0.31711442, "memory(GiB)": 34.88, "step": 131000, "train_speed(iter/s)": 0.410539 }, { "acc": 0.9522665, "epoch": 3.5470988005306907, "grad_norm": 18.719892501831055, "learning_rate": 3.473846744896743e-07, "loss": 0.31825054, "memory(GiB)": 34.88, "step": 131005, "train_speed(iter/s)": 0.41054 }, { "acc": 0.94668884, "epoch": 3.5472341808139065, "grad_norm": 7.310591220855713, "learning_rate": 3.471800475007337e-07, "loss": 0.31840138, "memory(GiB)": 34.88, "step": 131010, "train_speed(iter/s)": 0.410541 }, { "acc": 0.94965086, "epoch": 3.547369561097122, "grad_norm": 7.305722236633301, "learning_rate": 3.46975478803771e-07, "loss": 0.32309232, "memory(GiB)": 34.88, "step": 131015, "train_speed(iter/s)": 0.410541 }, { "acc": 0.9530488, "epoch": 3.5475049413803372, "grad_norm": 4.991296291351318, "learning_rate": 3.467709684013467e-07, "loss": 0.26659307, "memory(GiB)": 34.88, "step": 131020, "train_speed(iter/s)": 0.410542 }, { "acc": 0.93869057, "epoch": 3.547640321663553, "grad_norm": 22.88834571838379, "learning_rate": 3.465665162960235e-07, "loss": 0.35988808, "memory(GiB)": 34.88, "step": 131025, "train_speed(iter/s)": 0.410543 }, { "acc": 0.93768873, "epoch": 3.5477757019467684, "grad_norm": 7.4735517501831055, "learning_rate": 3.4636212249036067e-07, "loss": 0.34482703, "memory(GiB)": 34.88, "step": 131030, "train_speed(iter/s)": 0.410544 }, { "acc": 0.94899254, "epoch": 3.5479110822299837, "grad_norm": 3.7153213024139404, "learning_rate": 3.4615778698692154e-07, "loss": 0.26052494, "memory(GiB)": 34.88, "step": 131035, "train_speed(iter/s)": 0.410545 }, { "acc": 0.95799818, "epoch": 3.5480464625131996, "grad_norm": 5.074403285980225, "learning_rate": 3.4595350978826386e-07, "loss": 0.24414608, "memory(GiB)": 34.88, "step": 131040, "train_speed(iter/s)": 0.410546 }, { "acc": 0.93973961, "epoch": 3.5481818427964154, "grad_norm": 6.528693675994873, "learning_rate": 3.457492908969469e-07, "loss": 0.33651798, "memory(GiB)": 34.88, "step": 131045, "train_speed(iter/s)": 0.410546 }, { "acc": 0.9491066, "epoch": 3.5483172230796307, "grad_norm": 12.177119255065918, "learning_rate": 3.4554513031553003e-07, "loss": 0.29951267, "memory(GiB)": 34.88, "step": 131050, "train_speed(iter/s)": 0.410547 }, { "acc": 0.96364212, "epoch": 3.548452603362846, "grad_norm": 3.4097959995269775, "learning_rate": 3.453410280465693e-07, "loss": 0.22297194, "memory(GiB)": 34.88, "step": 131055, "train_speed(iter/s)": 0.410548 }, { "acc": 0.94685392, "epoch": 3.548587983646062, "grad_norm": 5.917270183563232, "learning_rate": 3.4513698409262247e-07, "loss": 0.30766411, "memory(GiB)": 34.88, "step": 131060, "train_speed(iter/s)": 0.410549 }, { "acc": 0.94676838, "epoch": 3.5487233639292772, "grad_norm": 8.550480842590332, "learning_rate": 3.4493299845624384e-07, "loss": 0.35280011, "memory(GiB)": 34.88, "step": 131065, "train_speed(iter/s)": 0.41055 }, { "acc": 0.95359411, "epoch": 3.548858744212493, "grad_norm": 5.125038146972656, "learning_rate": 3.4472907113999163e-07, "loss": 0.27011211, "memory(GiB)": 34.88, "step": 131070, "train_speed(iter/s)": 0.410551 }, { "acc": 0.95104609, "epoch": 3.5489941244957084, "grad_norm": 4.790931224822998, "learning_rate": 3.4452520214641805e-07, "loss": 0.27272735, "memory(GiB)": 34.88, "step": 131075, "train_speed(iter/s)": 0.410552 }, { "acc": 0.92918682, "epoch": 3.549129504778924, "grad_norm": 4.513701915740967, "learning_rate": 3.4432139147807754e-07, "loss": 0.42419457, "memory(GiB)": 34.88, "step": 131080, "train_speed(iter/s)": 0.410552 }, { "acc": 0.94809093, "epoch": 3.5492648850621396, "grad_norm": 2.7803633213043213, "learning_rate": 3.4411763913752374e-07, "loss": 0.34836988, "memory(GiB)": 34.88, "step": 131085, "train_speed(iter/s)": 0.410553 }, { "acc": 0.95702419, "epoch": 3.549400265345355, "grad_norm": 2.602778911590576, "learning_rate": 3.4391394512731e-07, "loss": 0.2711246, "memory(GiB)": 34.88, "step": 131090, "train_speed(iter/s)": 0.410554 }, { "acc": 0.95008717, "epoch": 3.5495356456285707, "grad_norm": 3.486807107925415, "learning_rate": 3.4371030944998626e-07, "loss": 0.25230951, "memory(GiB)": 34.88, "step": 131095, "train_speed(iter/s)": 0.410555 }, { "acc": 0.93657722, "epoch": 3.549671025911786, "grad_norm": 5.295299053192139, "learning_rate": 3.435067321081024e-07, "loss": 0.37091398, "memory(GiB)": 34.88, "step": 131100, "train_speed(iter/s)": 0.410556 }, { "acc": 0.9442174, "epoch": 3.549806406195002, "grad_norm": 135.84471130371094, "learning_rate": 3.4330321310421175e-07, "loss": 0.32817388, "memory(GiB)": 34.88, "step": 131105, "train_speed(iter/s)": 0.410557 }, { "acc": 0.9444479, "epoch": 3.5499417864782172, "grad_norm": 6.748301982879639, "learning_rate": 3.430997524408614e-07, "loss": 0.35599401, "memory(GiB)": 34.88, "step": 131110, "train_speed(iter/s)": 0.410558 }, { "acc": 0.9358675, "epoch": 3.550077166761433, "grad_norm": 9.49732494354248, "learning_rate": 3.4289635012060127e-07, "loss": 0.4053565, "memory(GiB)": 34.88, "step": 131115, "train_speed(iter/s)": 0.410558 }, { "acc": 0.94360571, "epoch": 3.5502125470446484, "grad_norm": 4.319196701049805, "learning_rate": 3.426930061459785e-07, "loss": 0.35291657, "memory(GiB)": 34.88, "step": 131120, "train_speed(iter/s)": 0.410559 }, { "acc": 0.93288307, "epoch": 3.5503479273278638, "grad_norm": 8.718596458435059, "learning_rate": 3.42489720519542e-07, "loss": 0.39645059, "memory(GiB)": 34.88, "step": 131125, "train_speed(iter/s)": 0.41056 }, { "acc": 0.9432457, "epoch": 3.5504833076110796, "grad_norm": 5.972226142883301, "learning_rate": 3.422864932438366e-07, "loss": 0.32697921, "memory(GiB)": 34.88, "step": 131130, "train_speed(iter/s)": 0.410561 }, { "acc": 0.95449467, "epoch": 3.550618687894295, "grad_norm": 12.520946502685547, "learning_rate": 3.420833243214084e-07, "loss": 0.32403307, "memory(GiB)": 34.88, "step": 131135, "train_speed(iter/s)": 0.410562 }, { "acc": 0.97451315, "epoch": 3.5507540681775107, "grad_norm": 2.281264305114746, "learning_rate": 3.4188021375480405e-07, "loss": 0.16760362, "memory(GiB)": 34.88, "step": 131140, "train_speed(iter/s)": 0.410563 }, { "acc": 0.94732666, "epoch": 3.550889448460726, "grad_norm": 7.687705039978027, "learning_rate": 3.4167716154656553e-07, "loss": 0.32842171, "memory(GiB)": 34.88, "step": 131145, "train_speed(iter/s)": 0.410564 }, { "acc": 0.94698544, "epoch": 3.551024828743942, "grad_norm": 4.884593486785889, "learning_rate": 3.41474167699238e-07, "loss": 0.37032182, "memory(GiB)": 34.88, "step": 131150, "train_speed(iter/s)": 0.410564 }, { "acc": 0.95582104, "epoch": 3.5511602090271572, "grad_norm": 9.253877639770508, "learning_rate": 3.4127123221536343e-07, "loss": 0.35002658, "memory(GiB)": 34.88, "step": 131155, "train_speed(iter/s)": 0.410565 }, { "acc": 0.94847403, "epoch": 3.5512955893103726, "grad_norm": 12.41708755493164, "learning_rate": 3.410683550974857e-07, "loss": 0.33086874, "memory(GiB)": 34.88, "step": 131160, "train_speed(iter/s)": 0.410566 }, { "acc": 0.9506978, "epoch": 3.5514309695935884, "grad_norm": 10.574835777282715, "learning_rate": 3.4086553634814427e-07, "loss": 0.29555008, "memory(GiB)": 34.88, "step": 131165, "train_speed(iter/s)": 0.410567 }, { "acc": 0.9533865, "epoch": 3.551566349876804, "grad_norm": 2.5600602626800537, "learning_rate": 3.4066277596988056e-07, "loss": 0.28397689, "memory(GiB)": 34.88, "step": 131170, "train_speed(iter/s)": 0.410568 }, { "acc": 0.93041153, "epoch": 3.5517017301600196, "grad_norm": 5.197010517120361, "learning_rate": 3.4046007396523583e-07, "loss": 0.4846808, "memory(GiB)": 34.88, "step": 131175, "train_speed(iter/s)": 0.410569 }, { "acc": 0.93798332, "epoch": 3.551837110443235, "grad_norm": 4.074528217315674, "learning_rate": 3.402574303367471e-07, "loss": 0.45339365, "memory(GiB)": 34.88, "step": 131180, "train_speed(iter/s)": 0.41057 }, { "acc": 0.95027304, "epoch": 3.5519724907264507, "grad_norm": 8.211236953735352, "learning_rate": 3.4005484508695373e-07, "loss": 0.32353988, "memory(GiB)": 34.88, "step": 131185, "train_speed(iter/s)": 0.41057 }, { "acc": 0.95248871, "epoch": 3.552107871009666, "grad_norm": 7.13794469833374, "learning_rate": 3.3985231821839364e-07, "loss": 0.32751322, "memory(GiB)": 34.88, "step": 131190, "train_speed(iter/s)": 0.410571 }, { "acc": 0.94837608, "epoch": 3.5522432512928814, "grad_norm": 6.101107120513916, "learning_rate": 3.3964984973360484e-07, "loss": 0.37171805, "memory(GiB)": 34.88, "step": 131195, "train_speed(iter/s)": 0.410572 }, { "acc": 0.95951462, "epoch": 3.5523786315760972, "grad_norm": 4.170712947845459, "learning_rate": 3.394474396351219e-07, "loss": 0.2353528, "memory(GiB)": 34.88, "step": 131200, "train_speed(iter/s)": 0.410573 }, { "acc": 0.95372086, "epoch": 3.552514011859313, "grad_norm": 8.686596870422363, "learning_rate": 3.3924508792548126e-07, "loss": 0.31515584, "memory(GiB)": 34.88, "step": 131205, "train_speed(iter/s)": 0.410574 }, { "acc": 0.94046288, "epoch": 3.5526493921425284, "grad_norm": 11.444009780883789, "learning_rate": 3.3904279460721804e-07, "loss": 0.38887551, "memory(GiB)": 34.88, "step": 131210, "train_speed(iter/s)": 0.410575 }, { "acc": 0.93702135, "epoch": 3.5527847724257438, "grad_norm": 5.721582412719727, "learning_rate": 3.388405596828665e-07, "loss": 0.37271817, "memory(GiB)": 34.88, "step": 131215, "train_speed(iter/s)": 0.410576 }, { "acc": 0.93995705, "epoch": 3.5529201527089596, "grad_norm": 8.521846771240234, "learning_rate": 3.3863838315495894e-07, "loss": 0.38826947, "memory(GiB)": 34.88, "step": 131220, "train_speed(iter/s)": 0.410577 }, { "acc": 0.94773941, "epoch": 3.553055532992175, "grad_norm": 3.0858044624328613, "learning_rate": 3.3843626502602843e-07, "loss": 0.33832536, "memory(GiB)": 34.88, "step": 131225, "train_speed(iter/s)": 0.410577 }, { "acc": 0.94359608, "epoch": 3.5531909132753907, "grad_norm": 7.042252540588379, "learning_rate": 3.3823420529860843e-07, "loss": 0.37623906, "memory(GiB)": 34.88, "step": 131230, "train_speed(iter/s)": 0.410578 }, { "acc": 0.95607319, "epoch": 3.553326293558606, "grad_norm": 2.412917375564575, "learning_rate": 3.3803220397522775e-07, "loss": 0.23826456, "memory(GiB)": 34.88, "step": 131235, "train_speed(iter/s)": 0.410579 }, { "acc": 0.93719158, "epoch": 3.553461673841822, "grad_norm": 4.3643059730529785, "learning_rate": 3.378302610584179e-07, "loss": 0.38286042, "memory(GiB)": 34.88, "step": 131240, "train_speed(iter/s)": 0.41058 }, { "acc": 0.95034533, "epoch": 3.5535970541250372, "grad_norm": 6.004385948181152, "learning_rate": 3.376283765507089e-07, "loss": 0.28098545, "memory(GiB)": 34.88, "step": 131245, "train_speed(iter/s)": 0.410581 }, { "acc": 0.94921703, "epoch": 3.5537324344082526, "grad_norm": 8.649819374084473, "learning_rate": 3.3742655045463017e-07, "loss": 0.2682364, "memory(GiB)": 34.88, "step": 131250, "train_speed(iter/s)": 0.410582 }, { "acc": 0.95356646, "epoch": 3.5538678146914684, "grad_norm": 5.6805806159973145, "learning_rate": 3.372247827727082e-07, "loss": 0.31255698, "memory(GiB)": 34.88, "step": 131255, "train_speed(iter/s)": 0.410583 }, { "acc": 0.96111135, "epoch": 3.5540031949746838, "grad_norm": 3.313524007797241, "learning_rate": 3.370230735074719e-07, "loss": 0.2089941, "memory(GiB)": 34.88, "step": 131260, "train_speed(iter/s)": 0.410584 }, { "acc": 0.94930935, "epoch": 3.5541385752578996, "grad_norm": 7.441456317901611, "learning_rate": 3.368214226614485e-07, "loss": 0.34929919, "memory(GiB)": 34.88, "step": 131265, "train_speed(iter/s)": 0.410584 }, { "acc": 0.93850384, "epoch": 3.554273955541115, "grad_norm": 3.73913836479187, "learning_rate": 3.366198302371629e-07, "loss": 0.41767945, "memory(GiB)": 34.88, "step": 131270, "train_speed(iter/s)": 0.410585 }, { "acc": 0.94692945, "epoch": 3.5544093358243307, "grad_norm": 9.678412437438965, "learning_rate": 3.3641829623714054e-07, "loss": 0.25952249, "memory(GiB)": 34.88, "step": 131275, "train_speed(iter/s)": 0.410586 }, { "acc": 0.95289364, "epoch": 3.554544716107546, "grad_norm": 8.942980766296387, "learning_rate": 3.36216820663907e-07, "loss": 0.28320413, "memory(GiB)": 34.88, "step": 131280, "train_speed(iter/s)": 0.410587 }, { "acc": 0.94285564, "epoch": 3.5546800963907614, "grad_norm": 8.901941299438477, "learning_rate": 3.3601540351998555e-07, "loss": 0.38237429, "memory(GiB)": 34.88, "step": 131285, "train_speed(iter/s)": 0.410588 }, { "acc": 0.94420118, "epoch": 3.5548154766739772, "grad_norm": 6.104146957397461, "learning_rate": 3.358140448078995e-07, "loss": 0.35577371, "memory(GiB)": 34.88, "step": 131290, "train_speed(iter/s)": 0.410589 }, { "acc": 0.94812145, "epoch": 3.5549508569571926, "grad_norm": 4.365509986877441, "learning_rate": 3.356127445301704e-07, "loss": 0.32422323, "memory(GiB)": 34.88, "step": 131295, "train_speed(iter/s)": 0.41059 }, { "acc": 0.95191088, "epoch": 3.5550862372404084, "grad_norm": 4.131504058837891, "learning_rate": 3.354115026893216e-07, "loss": 0.2867018, "memory(GiB)": 34.88, "step": 131300, "train_speed(iter/s)": 0.41059 }, { "acc": 0.97087965, "epoch": 3.5552216175236238, "grad_norm": 3.864962339401245, "learning_rate": 3.3521031928787245e-07, "loss": 0.17700834, "memory(GiB)": 34.88, "step": 131305, "train_speed(iter/s)": 0.410591 }, { "acc": 0.93879938, "epoch": 3.5553569978068396, "grad_norm": 12.548967361450195, "learning_rate": 3.350091943283441e-07, "loss": 0.33998389, "memory(GiB)": 34.88, "step": 131310, "train_speed(iter/s)": 0.410592 }, { "acc": 0.9530324, "epoch": 3.555492378090055, "grad_norm": 7.054081439971924, "learning_rate": 3.348081278132553e-07, "loss": 0.24476435, "memory(GiB)": 34.88, "step": 131315, "train_speed(iter/s)": 0.410593 }, { "acc": 0.9575695, "epoch": 3.5556277583732703, "grad_norm": 4.17474889755249, "learning_rate": 3.34607119745126e-07, "loss": 0.20017068, "memory(GiB)": 34.88, "step": 131320, "train_speed(iter/s)": 0.410594 }, { "acc": 0.95326843, "epoch": 3.555763138656486, "grad_norm": 2.8281593322753906, "learning_rate": 3.344061701264734e-07, "loss": 0.30931969, "memory(GiB)": 34.88, "step": 131325, "train_speed(iter/s)": 0.410595 }, { "acc": 0.94847498, "epoch": 3.555898518939702, "grad_norm": 4.847166061401367, "learning_rate": 3.3420527895981427e-07, "loss": 0.31871014, "memory(GiB)": 34.88, "step": 131330, "train_speed(iter/s)": 0.410595 }, { "acc": 0.95097704, "epoch": 3.5560338992229172, "grad_norm": 7.199862003326416, "learning_rate": 3.340044462476661e-07, "loss": 0.33016615, "memory(GiB)": 34.88, "step": 131335, "train_speed(iter/s)": 0.410596 }, { "acc": 0.93447685, "epoch": 3.5561692795061326, "grad_norm": 8.588799476623535, "learning_rate": 3.338036719925446e-07, "loss": 0.4076108, "memory(GiB)": 34.88, "step": 131340, "train_speed(iter/s)": 0.410597 }, { "acc": 0.95161057, "epoch": 3.5563046597893484, "grad_norm": 14.702903747558594, "learning_rate": 3.336029561969646e-07, "loss": 0.29221201, "memory(GiB)": 34.88, "step": 131345, "train_speed(iter/s)": 0.410598 }, { "acc": 0.95564289, "epoch": 3.5564400400725638, "grad_norm": 6.326292991638184, "learning_rate": 3.3340229886344e-07, "loss": 0.26188321, "memory(GiB)": 34.88, "step": 131350, "train_speed(iter/s)": 0.410599 }, { "acc": 0.93755207, "epoch": 3.556575420355779, "grad_norm": 6.419280529022217, "learning_rate": 3.3320169999448573e-07, "loss": 0.43172302, "memory(GiB)": 34.88, "step": 131355, "train_speed(iter/s)": 0.410599 }, { "acc": 0.94245024, "epoch": 3.556710800638995, "grad_norm": 12.747227668762207, "learning_rate": 3.3300115959261347e-07, "loss": 0.3388804, "memory(GiB)": 34.88, "step": 131360, "train_speed(iter/s)": 0.4106 }, { "acc": 0.94797163, "epoch": 3.5568461809222107, "grad_norm": 6.351909637451172, "learning_rate": 3.3280067766033543e-07, "loss": 0.34671268, "memory(GiB)": 34.88, "step": 131365, "train_speed(iter/s)": 0.410601 }, { "acc": 0.95155602, "epoch": 3.556981561205426, "grad_norm": 8.934950828552246, "learning_rate": 3.3260025420016365e-07, "loss": 0.31009891, "memory(GiB)": 34.88, "step": 131370, "train_speed(iter/s)": 0.410602 }, { "acc": 0.93465157, "epoch": 3.5571169414886414, "grad_norm": 14.019370079040527, "learning_rate": 3.323998892146093e-07, "loss": 0.37606509, "memory(GiB)": 34.88, "step": 131375, "train_speed(iter/s)": 0.410603 }, { "acc": 0.94560966, "epoch": 3.5572523217718572, "grad_norm": 5.871165752410889, "learning_rate": 3.321995827061802e-07, "loss": 0.30023167, "memory(GiB)": 34.88, "step": 131380, "train_speed(iter/s)": 0.410604 }, { "acc": 0.95017014, "epoch": 3.5573877020550726, "grad_norm": 7.012833595275879, "learning_rate": 3.3199933467738837e-07, "loss": 0.26415534, "memory(GiB)": 34.88, "step": 131385, "train_speed(iter/s)": 0.410605 }, { "acc": 0.95662937, "epoch": 3.5575230823382884, "grad_norm": 5.010685443878174, "learning_rate": 3.3179914513074114e-07, "loss": 0.27165446, "memory(GiB)": 34.88, "step": 131390, "train_speed(iter/s)": 0.410605 }, { "acc": 0.94183865, "epoch": 3.5576584626215038, "grad_norm": 6.957095623016357, "learning_rate": 3.31599014068745e-07, "loss": 0.33671088, "memory(GiB)": 34.88, "step": 131395, "train_speed(iter/s)": 0.410606 }, { "acc": 0.94480715, "epoch": 3.5577938429047196, "grad_norm": 23.469470977783203, "learning_rate": 3.313989414939084e-07, "loss": 0.36793702, "memory(GiB)": 34.88, "step": 131400, "train_speed(iter/s)": 0.410607 }, { "acc": 0.9474802, "epoch": 3.557929223187935, "grad_norm": 12.446500778198242, "learning_rate": 3.311989274087373e-07, "loss": 0.36422613, "memory(GiB)": 34.88, "step": 131405, "train_speed(iter/s)": 0.410608 }, { "acc": 0.95636454, "epoch": 3.5580646034711503, "grad_norm": 5.97228479385376, "learning_rate": 3.3099897181573784e-07, "loss": 0.2527663, "memory(GiB)": 34.88, "step": 131410, "train_speed(iter/s)": 0.410609 }, { "acc": 0.95578213, "epoch": 3.558199983754366, "grad_norm": 12.238264083862305, "learning_rate": 3.3079907471741335e-07, "loss": 0.28810577, "memory(GiB)": 34.88, "step": 131415, "train_speed(iter/s)": 0.41061 }, { "acc": 0.95098534, "epoch": 3.5583353640375814, "grad_norm": 10.94467830657959, "learning_rate": 3.3059923611626977e-07, "loss": 0.27260945, "memory(GiB)": 34.88, "step": 131420, "train_speed(iter/s)": 0.410611 }, { "acc": 0.95138416, "epoch": 3.5584707443207972, "grad_norm": 11.540725708007812, "learning_rate": 3.3039945601481003e-07, "loss": 0.34030986, "memory(GiB)": 34.88, "step": 131425, "train_speed(iter/s)": 0.410612 }, { "acc": 0.95390854, "epoch": 3.5586061246040126, "grad_norm": 3.6080586910247803, "learning_rate": 3.301997344155351e-07, "loss": 0.25103133, "memory(GiB)": 34.88, "step": 131430, "train_speed(iter/s)": 0.410612 }, { "acc": 0.95456543, "epoch": 3.5587415048872284, "grad_norm": 4.125352382659912, "learning_rate": 3.300000713209483e-07, "loss": 0.27278421, "memory(GiB)": 34.88, "step": 131435, "train_speed(iter/s)": 0.410613 }, { "acc": 0.94220772, "epoch": 3.5588768851704438, "grad_norm": 8.494292259216309, "learning_rate": 3.2980046673355075e-07, "loss": 0.41048393, "memory(GiB)": 34.88, "step": 131440, "train_speed(iter/s)": 0.410614 }, { "acc": 0.9417244, "epoch": 3.559012265453659, "grad_norm": 12.02499008178711, "learning_rate": 3.296009206558434e-07, "loss": 0.32289391, "memory(GiB)": 34.88, "step": 131445, "train_speed(iter/s)": 0.410615 }, { "acc": 0.94984064, "epoch": 3.559147645736875, "grad_norm": 7.264571189880371, "learning_rate": 3.294014330903248e-07, "loss": 0.25785546, "memory(GiB)": 34.88, "step": 131450, "train_speed(iter/s)": 0.410616 }, { "acc": 0.94390326, "epoch": 3.5592830260200903, "grad_norm": 10.459630012512207, "learning_rate": 3.2920200403949416e-07, "loss": 0.35848196, "memory(GiB)": 34.88, "step": 131455, "train_speed(iter/s)": 0.410617 }, { "acc": 0.94156361, "epoch": 3.559418406303306, "grad_norm": 12.39282512664795, "learning_rate": 3.290026335058504e-07, "loss": 0.31168761, "memory(GiB)": 34.88, "step": 131460, "train_speed(iter/s)": 0.410617 }, { "acc": 0.9493597, "epoch": 3.5595537865865214, "grad_norm": 9.78139877319336, "learning_rate": 3.2880332149189126e-07, "loss": 0.3527873, "memory(GiB)": 34.88, "step": 131465, "train_speed(iter/s)": 0.410618 }, { "acc": 0.93748074, "epoch": 3.5596891668697372, "grad_norm": 8.37926959991455, "learning_rate": 3.286040680001118e-07, "loss": 0.43510194, "memory(GiB)": 34.88, "step": 131470, "train_speed(iter/s)": 0.410619 }, { "acc": 0.96130924, "epoch": 3.5598245471529526, "grad_norm": 5.9917311668396, "learning_rate": 3.284048730330091e-07, "loss": 0.20193086, "memory(GiB)": 34.88, "step": 131475, "train_speed(iter/s)": 0.41062 }, { "acc": 0.95572348, "epoch": 3.559959927436168, "grad_norm": 10.14875602722168, "learning_rate": 3.282057365930799e-07, "loss": 0.24640427, "memory(GiB)": 34.88, "step": 131480, "train_speed(iter/s)": 0.410621 }, { "acc": 0.94319429, "epoch": 3.5600953077193838, "grad_norm": 7.305603981018066, "learning_rate": 3.2800665868281627e-07, "loss": 0.37653942, "memory(GiB)": 34.88, "step": 131485, "train_speed(iter/s)": 0.410622 }, { "acc": 0.96373396, "epoch": 3.5602306880025996, "grad_norm": 2.883829116821289, "learning_rate": 3.2780763930471283e-07, "loss": 0.20249436, "memory(GiB)": 34.88, "step": 131490, "train_speed(iter/s)": 0.410623 }, { "acc": 0.93536263, "epoch": 3.560366068285815, "grad_norm": 7.0327348709106445, "learning_rate": 3.276086784612633e-07, "loss": 0.32164168, "memory(GiB)": 34.88, "step": 131495, "train_speed(iter/s)": 0.410624 }, { "acc": 0.94639969, "epoch": 3.5605014485690303, "grad_norm": 17.717792510986328, "learning_rate": 3.2740977615496105e-07, "loss": 0.33524921, "memory(GiB)": 34.88, "step": 131500, "train_speed(iter/s)": 0.410625 }, { "acc": 0.94252577, "epoch": 3.560636828852246, "grad_norm": 8.577569007873535, "learning_rate": 3.272109323882943e-07, "loss": 0.31548316, "memory(GiB)": 34.88, "step": 131505, "train_speed(iter/s)": 0.410625 }, { "acc": 0.94367533, "epoch": 3.5607722091354614, "grad_norm": 8.489592552185059, "learning_rate": 3.270121471637576e-07, "loss": 0.26763272, "memory(GiB)": 34.88, "step": 131510, "train_speed(iter/s)": 0.410626 }, { "acc": 0.9419693, "epoch": 3.560907589418677, "grad_norm": 4.4126667976379395, "learning_rate": 3.268134204838403e-07, "loss": 0.31912985, "memory(GiB)": 34.88, "step": 131515, "train_speed(iter/s)": 0.410627 }, { "acc": 0.96213436, "epoch": 3.5610429697018926, "grad_norm": 3.2710647583007812, "learning_rate": 3.2661475235102963e-07, "loss": 0.155572, "memory(GiB)": 34.88, "step": 131520, "train_speed(iter/s)": 0.410628 }, { "acc": 0.94481936, "epoch": 3.5611783499851084, "grad_norm": 4.390691757202148, "learning_rate": 3.264161427678161e-07, "loss": 0.32407005, "memory(GiB)": 34.88, "step": 131525, "train_speed(iter/s)": 0.410629 }, { "acc": 0.95326805, "epoch": 3.5613137302683238, "grad_norm": 3.5633456707000732, "learning_rate": 3.2621759173668755e-07, "loss": 0.25662937, "memory(GiB)": 34.88, "step": 131530, "train_speed(iter/s)": 0.41063 }, { "acc": 0.94734888, "epoch": 3.561449110551539, "grad_norm": 6.271109104156494, "learning_rate": 3.2601909926013225e-07, "loss": 0.36974423, "memory(GiB)": 34.88, "step": 131535, "train_speed(iter/s)": 0.41063 }, { "acc": 0.9487648, "epoch": 3.561584490834755, "grad_norm": 5.0811662673950195, "learning_rate": 3.2582066534063347e-07, "loss": 0.29459691, "memory(GiB)": 34.88, "step": 131540, "train_speed(iter/s)": 0.410631 }, { "acc": 0.9442873, "epoch": 3.5617198711179703, "grad_norm": 6.866786479949951, "learning_rate": 3.256222899806807e-07, "loss": 0.32403903, "memory(GiB)": 34.88, "step": 131545, "train_speed(iter/s)": 0.410632 }, { "acc": 0.96267166, "epoch": 3.561855251401186, "grad_norm": 5.982870578765869, "learning_rate": 3.2542397318275776e-07, "loss": 0.21496766, "memory(GiB)": 34.88, "step": 131550, "train_speed(iter/s)": 0.410633 }, { "acc": 0.93004227, "epoch": 3.5619906316844014, "grad_norm": 15.199033737182617, "learning_rate": 3.252257149493475e-07, "loss": 0.49933829, "memory(GiB)": 34.88, "step": 131555, "train_speed(iter/s)": 0.410634 }, { "acc": 0.95876484, "epoch": 3.5621260119676172, "grad_norm": 18.47342300415039, "learning_rate": 3.2502751528293476e-07, "loss": 0.18914237, "memory(GiB)": 34.88, "step": 131560, "train_speed(iter/s)": 0.410634 }, { "acc": 0.93856182, "epoch": 3.5622613922508326, "grad_norm": 16.735017776489258, "learning_rate": 3.2482937418600196e-07, "loss": 0.38022432, "memory(GiB)": 34.88, "step": 131565, "train_speed(iter/s)": 0.410635 }, { "acc": 0.9386343, "epoch": 3.562396772534048, "grad_norm": 8.409886360168457, "learning_rate": 3.246312916610322e-07, "loss": 0.29372067, "memory(GiB)": 34.88, "step": 131570, "train_speed(iter/s)": 0.410636 }, { "acc": 0.95788155, "epoch": 3.5625321528172638, "grad_norm": 4.0166449546813965, "learning_rate": 3.244332677105045e-07, "loss": 0.16097522, "memory(GiB)": 34.88, "step": 131575, "train_speed(iter/s)": 0.410637 }, { "acc": 0.93490124, "epoch": 3.562667533100479, "grad_norm": 4.3499016761779785, "learning_rate": 3.242353023369027e-07, "loss": 0.42127895, "memory(GiB)": 34.88, "step": 131580, "train_speed(iter/s)": 0.410638 }, { "acc": 0.93249264, "epoch": 3.562802913383695, "grad_norm": 6.929067611694336, "learning_rate": 3.24037395542704e-07, "loss": 0.3895839, "memory(GiB)": 34.88, "step": 131585, "train_speed(iter/s)": 0.410639 }, { "acc": 0.94537163, "epoch": 3.5629382936669103, "grad_norm": 12.376148223876953, "learning_rate": 3.2383954733039014e-07, "loss": 0.3043853, "memory(GiB)": 34.88, "step": 131590, "train_speed(iter/s)": 0.41064 }, { "acc": 0.96272135, "epoch": 3.563073673950126, "grad_norm": 8.336349487304688, "learning_rate": 3.2364175770243656e-07, "loss": 0.20651557, "memory(GiB)": 34.88, "step": 131595, "train_speed(iter/s)": 0.410641 }, { "acc": 0.92914076, "epoch": 3.5632090542333414, "grad_norm": 15.280047416687012, "learning_rate": 3.2344402666132276e-07, "loss": 0.42887201, "memory(GiB)": 34.88, "step": 131600, "train_speed(iter/s)": 0.410641 }, { "acc": 0.94397278, "epoch": 3.563344434516557, "grad_norm": 4.4605793952941895, "learning_rate": 3.2324635420952646e-07, "loss": 0.36931758, "memory(GiB)": 34.88, "step": 131605, "train_speed(iter/s)": 0.410642 }, { "acc": 0.9499321, "epoch": 3.5634798147997726, "grad_norm": 4.284963130950928, "learning_rate": 3.2304874034952106e-07, "loss": 0.2848175, "memory(GiB)": 34.88, "step": 131610, "train_speed(iter/s)": 0.410643 }, { "acc": 0.94609318, "epoch": 3.563615195082988, "grad_norm": 4.471761703491211, "learning_rate": 3.228511850837853e-07, "loss": 0.30057254, "memory(GiB)": 34.88, "step": 131615, "train_speed(iter/s)": 0.410644 }, { "acc": 0.94329157, "epoch": 3.5637505753662038, "grad_norm": 3.706169605255127, "learning_rate": 3.226536884147921e-07, "loss": 0.36739917, "memory(GiB)": 34.88, "step": 131620, "train_speed(iter/s)": 0.410645 }, { "acc": 0.94079399, "epoch": 3.563885955649419, "grad_norm": 11.40760612487793, "learning_rate": 3.2245625034501647e-07, "loss": 0.34497533, "memory(GiB)": 34.88, "step": 131625, "train_speed(iter/s)": 0.410645 }, { "acc": 0.94536743, "epoch": 3.564021335932635, "grad_norm": 7.078752517700195, "learning_rate": 3.222588708769294e-07, "loss": 0.37601428, "memory(GiB)": 34.88, "step": 131630, "train_speed(iter/s)": 0.410646 }, { "acc": 0.95365143, "epoch": 3.5641567162158503, "grad_norm": 5.236911296844482, "learning_rate": 3.22061550013007e-07, "loss": 0.24702692, "memory(GiB)": 34.88, "step": 131635, "train_speed(iter/s)": 0.410647 }, { "acc": 0.94450378, "epoch": 3.5642920964990656, "grad_norm": 7.763823509216309, "learning_rate": 3.218642877557188e-07, "loss": 0.33560748, "memory(GiB)": 34.88, "step": 131640, "train_speed(iter/s)": 0.410648 }, { "acc": 0.9516139, "epoch": 3.5644274767822814, "grad_norm": 3.912470579147339, "learning_rate": 3.2166708410753534e-07, "loss": 0.29869199, "memory(GiB)": 34.88, "step": 131645, "train_speed(iter/s)": 0.410649 }, { "acc": 0.93245163, "epoch": 3.5645628570654972, "grad_norm": 11.848241806030273, "learning_rate": 3.2146993907092993e-07, "loss": 0.35461049, "memory(GiB)": 34.88, "step": 131650, "train_speed(iter/s)": 0.41065 }, { "acc": 0.95073109, "epoch": 3.5646982373487126, "grad_norm": 17.755050659179688, "learning_rate": 3.212728526483686e-07, "loss": 0.25289958, "memory(GiB)": 34.88, "step": 131655, "train_speed(iter/s)": 0.41065 }, { "acc": 0.95609503, "epoch": 3.564833617631928, "grad_norm": 5.645260810852051, "learning_rate": 3.2107582484232377e-07, "loss": 0.23087866, "memory(GiB)": 34.88, "step": 131660, "train_speed(iter/s)": 0.410651 }, { "acc": 0.94575539, "epoch": 3.5649689979151438, "grad_norm": 6.448831081390381, "learning_rate": 3.2087885565525974e-07, "loss": 0.33703802, "memory(GiB)": 34.88, "step": 131665, "train_speed(iter/s)": 0.410652 }, { "acc": 0.94005928, "epoch": 3.565104378198359, "grad_norm": 9.592219352722168, "learning_rate": 3.20681945089647e-07, "loss": 0.36208282, "memory(GiB)": 34.88, "step": 131670, "train_speed(iter/s)": 0.410653 }, { "acc": 0.94392557, "epoch": 3.5652397584815745, "grad_norm": 7.007965087890625, "learning_rate": 3.204850931479513e-07, "loss": 0.35455794, "memory(GiB)": 34.88, "step": 131675, "train_speed(iter/s)": 0.410654 }, { "acc": 0.95337505, "epoch": 3.5653751387647903, "grad_norm": 3.412299871444702, "learning_rate": 3.202882998326381e-07, "loss": 0.23950624, "memory(GiB)": 34.88, "step": 131680, "train_speed(iter/s)": 0.410655 }, { "acc": 0.95739594, "epoch": 3.565510519048006, "grad_norm": 6.499778747558594, "learning_rate": 3.2009156514617397e-07, "loss": 0.29174073, "memory(GiB)": 34.88, "step": 131685, "train_speed(iter/s)": 0.410656 }, { "acc": 0.94325743, "epoch": 3.5656458993312214, "grad_norm": 4.779894828796387, "learning_rate": 3.1989488909102135e-07, "loss": 0.3638937, "memory(GiB)": 34.88, "step": 131690, "train_speed(iter/s)": 0.410657 }, { "acc": 0.94410248, "epoch": 3.565781279614437, "grad_norm": 17.588428497314453, "learning_rate": 3.1969827166964615e-07, "loss": 0.32811167, "memory(GiB)": 34.88, "step": 131695, "train_speed(iter/s)": 0.410657 }, { "acc": 0.93778114, "epoch": 3.5659166598976526, "grad_norm": 9.449739456176758, "learning_rate": 3.195017128845091e-07, "loss": 0.36173599, "memory(GiB)": 34.88, "step": 131700, "train_speed(iter/s)": 0.410658 }, { "acc": 0.94951382, "epoch": 3.566052040180868, "grad_norm": 5.567782402038574, "learning_rate": 3.193052127380751e-07, "loss": 0.26082845, "memory(GiB)": 34.88, "step": 131705, "train_speed(iter/s)": 0.410659 }, { "acc": 0.93005657, "epoch": 3.5661874204640838, "grad_norm": 7.609288692474365, "learning_rate": 3.191087712328031e-07, "loss": 0.4323328, "memory(GiB)": 34.88, "step": 131710, "train_speed(iter/s)": 0.41066 }, { "acc": 0.95492382, "epoch": 3.566322800747299, "grad_norm": 8.281942367553711, "learning_rate": 3.1891238837115583e-07, "loss": 0.29642437, "memory(GiB)": 34.88, "step": 131715, "train_speed(iter/s)": 0.410661 }, { "acc": 0.95641518, "epoch": 3.566458181030515, "grad_norm": 8.771108627319336, "learning_rate": 3.187160641555933e-07, "loss": 0.26770775, "memory(GiB)": 34.88, "step": 131720, "train_speed(iter/s)": 0.410662 }, { "acc": 0.94275112, "epoch": 3.5665935613137303, "grad_norm": 5.66959810256958, "learning_rate": 3.1851979858857337e-07, "loss": 0.34819458, "memory(GiB)": 34.88, "step": 131725, "train_speed(iter/s)": 0.410663 }, { "acc": 0.94559422, "epoch": 3.5667289415969456, "grad_norm": 5.507885456085205, "learning_rate": 3.18323591672556e-07, "loss": 0.30273666, "memory(GiB)": 34.88, "step": 131730, "train_speed(iter/s)": 0.410663 }, { "acc": 0.96081219, "epoch": 3.5668643218801614, "grad_norm": 3.554640293121338, "learning_rate": 3.1812744340999734e-07, "loss": 0.21238027, "memory(GiB)": 34.88, "step": 131735, "train_speed(iter/s)": 0.410664 }, { "acc": 0.96874084, "epoch": 3.566999702163377, "grad_norm": 7.888340473175049, "learning_rate": 3.179313538033573e-07, "loss": 0.2110363, "memory(GiB)": 34.88, "step": 131740, "train_speed(iter/s)": 0.410665 }, { "acc": 0.94599743, "epoch": 3.5671350824465926, "grad_norm": 10.922435760498047, "learning_rate": 3.177353228550905e-07, "loss": 0.33322577, "memory(GiB)": 34.88, "step": 131745, "train_speed(iter/s)": 0.410666 }, { "acc": 0.95514317, "epoch": 3.567270462729808, "grad_norm": 6.1856465339660645, "learning_rate": 3.1753935056765236e-07, "loss": 0.27587514, "memory(GiB)": 34.88, "step": 131750, "train_speed(iter/s)": 0.410667 }, { "acc": 0.93895464, "epoch": 3.5674058430130238, "grad_norm": 5.9510345458984375, "learning_rate": 3.1734343694349854e-07, "loss": 0.32710292, "memory(GiB)": 34.88, "step": 131755, "train_speed(iter/s)": 0.410667 }, { "acc": 0.94731541, "epoch": 3.567541223296239, "grad_norm": 9.279738426208496, "learning_rate": 3.1714758198508345e-07, "loss": 0.309076, "memory(GiB)": 34.88, "step": 131760, "train_speed(iter/s)": 0.410668 }, { "acc": 0.95666523, "epoch": 3.5676766035794545, "grad_norm": 5.408243656158447, "learning_rate": 3.1695178569486047e-07, "loss": 0.2675086, "memory(GiB)": 34.88, "step": 131765, "train_speed(iter/s)": 0.410669 }, { "acc": 0.94385281, "epoch": 3.5678119838626703, "grad_norm": 9.678175926208496, "learning_rate": 3.167560480752807e-07, "loss": 0.32653236, "memory(GiB)": 34.88, "step": 131770, "train_speed(iter/s)": 0.41067 }, { "acc": 0.93596325, "epoch": 3.5679473641458856, "grad_norm": 7.435988903045654, "learning_rate": 3.1656036912879865e-07, "loss": 0.42169628, "memory(GiB)": 34.88, "step": 131775, "train_speed(iter/s)": 0.410671 }, { "acc": 0.94826717, "epoch": 3.5680827444291014, "grad_norm": 10.647756576538086, "learning_rate": 3.1636474885786367e-07, "loss": 0.32146811, "memory(GiB)": 34.88, "step": 131780, "train_speed(iter/s)": 0.410672 }, { "acc": 0.94069443, "epoch": 3.568218124712317, "grad_norm": 7.391668796539307, "learning_rate": 3.1616918726492704e-07, "loss": 0.34508138, "memory(GiB)": 34.88, "step": 131785, "train_speed(iter/s)": 0.410672 }, { "acc": 0.95046453, "epoch": 3.5683535049955326, "grad_norm": 7.788474082946777, "learning_rate": 3.1597368435243867e-07, "loss": 0.30530238, "memory(GiB)": 34.88, "step": 131790, "train_speed(iter/s)": 0.410673 }, { "acc": 0.93572922, "epoch": 3.568488885278748, "grad_norm": 10.6214017868042, "learning_rate": 3.1577824012284853e-07, "loss": 0.38246231, "memory(GiB)": 34.88, "step": 131795, "train_speed(iter/s)": 0.410674 }, { "acc": 0.93514404, "epoch": 3.5686242655619633, "grad_norm": 7.895659923553467, "learning_rate": 3.1558285457860286e-07, "loss": 0.37500811, "memory(GiB)": 34.88, "step": 131800, "train_speed(iter/s)": 0.410675 }, { "acc": 0.95017319, "epoch": 3.568759645845179, "grad_norm": 5.354821681976318, "learning_rate": 3.1538752772215e-07, "loss": 0.29453797, "memory(GiB)": 34.88, "step": 131805, "train_speed(iter/s)": 0.410676 }, { "acc": 0.9587759, "epoch": 3.568895026128395, "grad_norm": 6.993043899536133, "learning_rate": 3.1519225955593826e-07, "loss": 0.26505361, "memory(GiB)": 34.88, "step": 131810, "train_speed(iter/s)": 0.410677 }, { "acc": 0.95238495, "epoch": 3.5690304064116103, "grad_norm": 19.67719841003418, "learning_rate": 3.1499705008241164e-07, "loss": 0.31292934, "memory(GiB)": 34.88, "step": 131815, "train_speed(iter/s)": 0.410677 }, { "acc": 0.95322762, "epoch": 3.5691657866948256, "grad_norm": 5.772791385650635, "learning_rate": 3.148018993040172e-07, "loss": 0.28032715, "memory(GiB)": 34.88, "step": 131820, "train_speed(iter/s)": 0.410678 }, { "acc": 0.94564438, "epoch": 3.5693011669780415, "grad_norm": 7.216663360595703, "learning_rate": 3.146068072231974e-07, "loss": 0.24984155, "memory(GiB)": 34.88, "step": 131825, "train_speed(iter/s)": 0.410679 }, { "acc": 0.93168421, "epoch": 3.569436547261257, "grad_norm": 7.937644004821777, "learning_rate": 3.144117738423987e-07, "loss": 0.41639328, "memory(GiB)": 34.88, "step": 131830, "train_speed(iter/s)": 0.41068 }, { "acc": 0.9642416, "epoch": 3.569571927544472, "grad_norm": 4.171566963195801, "learning_rate": 3.1421679916406303e-07, "loss": 0.24180937, "memory(GiB)": 34.88, "step": 131835, "train_speed(iter/s)": 0.410681 }, { "acc": 0.93884001, "epoch": 3.569707307827688, "grad_norm": 9.6970853805542, "learning_rate": 3.140218831906325e-07, "loss": 0.42126026, "memory(GiB)": 34.88, "step": 131840, "train_speed(iter/s)": 0.410682 }, { "acc": 0.95787907, "epoch": 3.5698426881109038, "grad_norm": 7.158426761627197, "learning_rate": 3.1382702592455044e-07, "loss": 0.29653633, "memory(GiB)": 34.88, "step": 131845, "train_speed(iter/s)": 0.410682 }, { "acc": 0.9649807, "epoch": 3.569978068394119, "grad_norm": 4.030855178833008, "learning_rate": 3.1363222736825585e-07, "loss": 0.24364195, "memory(GiB)": 34.88, "step": 131850, "train_speed(iter/s)": 0.410683 }, { "acc": 0.96113262, "epoch": 3.5701134486773345, "grad_norm": 11.67517375946045, "learning_rate": 3.134374875241904e-07, "loss": 0.26338642, "memory(GiB)": 34.88, "step": 131855, "train_speed(iter/s)": 0.410684 }, { "acc": 0.96442156, "epoch": 3.5702488289605503, "grad_norm": 9.153908729553223, "learning_rate": 3.1324280639479134e-07, "loss": 0.21232338, "memory(GiB)": 34.88, "step": 131860, "train_speed(iter/s)": 0.410685 }, { "acc": 0.94269886, "epoch": 3.5703842092437656, "grad_norm": 10.173347473144531, "learning_rate": 3.1304818398250087e-07, "loss": 0.37710874, "memory(GiB)": 34.88, "step": 131865, "train_speed(iter/s)": 0.410686 }, { "acc": 0.94072552, "epoch": 3.5705195895269815, "grad_norm": 6.416683197021484, "learning_rate": 3.1285362028975404e-07, "loss": 0.36979117, "memory(GiB)": 34.88, "step": 131870, "train_speed(iter/s)": 0.410687 }, { "acc": 0.94179344, "epoch": 3.570654969810197, "grad_norm": 5.078640460968018, "learning_rate": 3.126591153189898e-07, "loss": 0.30644083, "memory(GiB)": 34.88, "step": 131875, "train_speed(iter/s)": 0.410688 }, { "acc": 0.94661427, "epoch": 3.5707903500934126, "grad_norm": 5.253516674041748, "learning_rate": 3.124646690726443e-07, "loss": 0.35036702, "memory(GiB)": 34.88, "step": 131880, "train_speed(iter/s)": 0.410688 }, { "acc": 0.9430109, "epoch": 3.570925730376628, "grad_norm": 2.507239580154419, "learning_rate": 3.122702815531536e-07, "loss": 0.3468298, "memory(GiB)": 34.88, "step": 131885, "train_speed(iter/s)": 0.410689 }, { "acc": 0.94608507, "epoch": 3.5710611106598433, "grad_norm": 7.23099946975708, "learning_rate": 3.120759527629528e-07, "loss": 0.31936991, "memory(GiB)": 34.88, "step": 131890, "train_speed(iter/s)": 0.41069 }, { "acc": 0.95450363, "epoch": 3.571196490943059, "grad_norm": 3.586697578430176, "learning_rate": 3.1188168270447415e-07, "loss": 0.21105456, "memory(GiB)": 34.88, "step": 131895, "train_speed(iter/s)": 0.410691 }, { "acc": 0.9451726, "epoch": 3.5713318712262745, "grad_norm": 8.217278480529785, "learning_rate": 3.116874713801549e-07, "loss": 0.34678464, "memory(GiB)": 34.88, "step": 131900, "train_speed(iter/s)": 0.410692 }, { "acc": 0.94208269, "epoch": 3.5714672515094903, "grad_norm": 7.602567672729492, "learning_rate": 3.1149331879242506e-07, "loss": 0.38375862, "memory(GiB)": 34.88, "step": 131905, "train_speed(iter/s)": 0.410692 }, { "acc": 0.96058826, "epoch": 3.5716026317927057, "grad_norm": 9.611868858337402, "learning_rate": 3.1129922494371803e-07, "loss": 0.21751337, "memory(GiB)": 34.88, "step": 131910, "train_speed(iter/s)": 0.410693 }, { "acc": 0.95711117, "epoch": 3.5717380120759215, "grad_norm": 5.294923305511475, "learning_rate": 3.1110518983646494e-07, "loss": 0.28579099, "memory(GiB)": 34.88, "step": 131915, "train_speed(iter/s)": 0.410694 }, { "acc": 0.96579189, "epoch": 3.571873392359137, "grad_norm": 3.452096700668335, "learning_rate": 3.109112134730969e-07, "loss": 0.20295701, "memory(GiB)": 34.88, "step": 131920, "train_speed(iter/s)": 0.410695 }, { "acc": 0.94119263, "epoch": 3.572008772642352, "grad_norm": 5.5905890464782715, "learning_rate": 3.107172958560429e-07, "loss": 0.33806732, "memory(GiB)": 34.88, "step": 131925, "train_speed(iter/s)": 0.410696 }, { "acc": 0.9574481, "epoch": 3.572144152925568, "grad_norm": 4.991487979888916, "learning_rate": 3.1052343698773293e-07, "loss": 0.29420733, "memory(GiB)": 34.88, "step": 131930, "train_speed(iter/s)": 0.410696 }, { "acc": 0.94724712, "epoch": 3.5722795332087833, "grad_norm": 4.488200664520264, "learning_rate": 3.1032963687059534e-07, "loss": 0.35412679, "memory(GiB)": 34.88, "step": 131935, "train_speed(iter/s)": 0.410697 }, { "acc": 0.95799627, "epoch": 3.572414913491999, "grad_norm": 5.832020282745361, "learning_rate": 3.1013589550705746e-07, "loss": 0.249453, "memory(GiB)": 34.88, "step": 131940, "train_speed(iter/s)": 0.410698 }, { "acc": 0.93620338, "epoch": 3.5725502937752145, "grad_norm": 6.82173490524292, "learning_rate": 3.0994221289954646e-07, "loss": 0.4088006, "memory(GiB)": 34.88, "step": 131945, "train_speed(iter/s)": 0.410699 }, { "acc": 0.95448551, "epoch": 3.5726856740584303, "grad_norm": 5.1323981285095215, "learning_rate": 3.097485890504886e-07, "loss": 0.23504324, "memory(GiB)": 34.88, "step": 131950, "train_speed(iter/s)": 0.4107 }, { "acc": 0.95984612, "epoch": 3.5728210543416457, "grad_norm": 15.598394393920898, "learning_rate": 3.095550239623099e-07, "loss": 0.21699855, "memory(GiB)": 34.88, "step": 131955, "train_speed(iter/s)": 0.4107 }, { "acc": 0.94826899, "epoch": 3.572956434624861, "grad_norm": 5.3963823318481445, "learning_rate": 3.0936151763743444e-07, "loss": 0.32637517, "memory(GiB)": 34.88, "step": 131960, "train_speed(iter/s)": 0.410701 }, { "acc": 0.96148949, "epoch": 3.573091814908077, "grad_norm": 2.432697057723999, "learning_rate": 3.091680700782866e-07, "loss": 0.23060141, "memory(GiB)": 34.88, "step": 131965, "train_speed(iter/s)": 0.410702 }, { "acc": 0.93815174, "epoch": 3.5732271951912926, "grad_norm": 6.337406158447266, "learning_rate": 3.0897468128728973e-07, "loss": 0.37668438, "memory(GiB)": 34.88, "step": 131970, "train_speed(iter/s)": 0.410703 }, { "acc": 0.94843864, "epoch": 3.573362575474508, "grad_norm": 11.33381462097168, "learning_rate": 3.087813512668662e-07, "loss": 0.34812093, "memory(GiB)": 34.88, "step": 131975, "train_speed(iter/s)": 0.410704 }, { "acc": 0.94348288, "epoch": 3.5734979557577233, "grad_norm": 5.187891006469727, "learning_rate": 3.0858808001943767e-07, "loss": 0.33808517, "memory(GiB)": 34.88, "step": 131980, "train_speed(iter/s)": 0.410704 }, { "acc": 0.92556171, "epoch": 3.573633336040939, "grad_norm": 7.187459468841553, "learning_rate": 3.083948675474258e-07, "loss": 0.51412716, "memory(GiB)": 34.88, "step": 131985, "train_speed(iter/s)": 0.410705 }, { "acc": 0.95224838, "epoch": 3.5737687163241545, "grad_norm": 6.639020919799805, "learning_rate": 3.082017138532512e-07, "loss": 0.27797322, "memory(GiB)": 34.88, "step": 131990, "train_speed(iter/s)": 0.410706 }, { "acc": 0.95920563, "epoch": 3.57390409660737, "grad_norm": 9.15121078491211, "learning_rate": 3.0800861893933236e-07, "loss": 0.24792345, "memory(GiB)": 34.88, "step": 131995, "train_speed(iter/s)": 0.410707 }, { "acc": 0.93835573, "epoch": 3.5740394768905857, "grad_norm": 8.118609428405762, "learning_rate": 3.078155828080887e-07, "loss": 0.36760263, "memory(GiB)": 34.88, "step": 132000, "train_speed(iter/s)": 0.410708 }, { "acc": 0.94018326, "epoch": 3.5741748571738015, "grad_norm": 6.170809268951416, "learning_rate": 3.076226054619385e-07, "loss": 0.4160388, "memory(GiB)": 34.88, "step": 132005, "train_speed(iter/s)": 0.410709 }, { "acc": 0.94710579, "epoch": 3.574310237457017, "grad_norm": 4.9689836502075195, "learning_rate": 3.074296869032997e-07, "loss": 0.26380477, "memory(GiB)": 34.88, "step": 132010, "train_speed(iter/s)": 0.41071 }, { "acc": 0.94503746, "epoch": 3.574445617740232, "grad_norm": 10.096278190612793, "learning_rate": 3.0723682713458794e-07, "loss": 0.32204909, "memory(GiB)": 34.88, "step": 132015, "train_speed(iter/s)": 0.41071 }, { "acc": 0.94561138, "epoch": 3.574580998023448, "grad_norm": 4.3823699951171875, "learning_rate": 3.070440261582198e-07, "loss": 0.36517167, "memory(GiB)": 34.88, "step": 132020, "train_speed(iter/s)": 0.410711 }, { "acc": 0.9433424, "epoch": 3.5747163783066633, "grad_norm": 16.64359474182129, "learning_rate": 3.0685128397661096e-07, "loss": 0.38053503, "memory(GiB)": 34.88, "step": 132025, "train_speed(iter/s)": 0.410712 }, { "acc": 0.94660301, "epoch": 3.574851758589879, "grad_norm": 4.146414279937744, "learning_rate": 3.0665860059217426e-07, "loss": 0.31773701, "memory(GiB)": 34.88, "step": 132030, "train_speed(iter/s)": 0.410713 }, { "acc": 0.94560814, "epoch": 3.5749871388730945, "grad_norm": 4.187147617340088, "learning_rate": 3.0646597600732475e-07, "loss": 0.34688771, "memory(GiB)": 34.88, "step": 132035, "train_speed(iter/s)": 0.410713 }, { "acc": 0.94696646, "epoch": 3.5751225191563103, "grad_norm": 3.3636744022369385, "learning_rate": 3.062734102244752e-07, "loss": 0.3052948, "memory(GiB)": 34.88, "step": 132040, "train_speed(iter/s)": 0.410714 }, { "acc": 0.9501915, "epoch": 3.5752578994395257, "grad_norm": 4.569845676422119, "learning_rate": 3.060809032460385e-07, "loss": 0.32780185, "memory(GiB)": 34.88, "step": 132045, "train_speed(iter/s)": 0.410715 }, { "acc": 0.94360018, "epoch": 3.575393279722741, "grad_norm": 7.630142688751221, "learning_rate": 3.058884550744247e-07, "loss": 0.39252043, "memory(GiB)": 34.88, "step": 132050, "train_speed(iter/s)": 0.410716 }, { "acc": 0.93652239, "epoch": 3.575528660005957, "grad_norm": 9.850695610046387, "learning_rate": 3.0569606571204545e-07, "loss": 0.43158445, "memory(GiB)": 34.88, "step": 132055, "train_speed(iter/s)": 0.410717 }, { "acc": 0.9465518, "epoch": 3.575664040289172, "grad_norm": 8.098967552185059, "learning_rate": 3.0550373516131147e-07, "loss": 0.277615, "memory(GiB)": 34.88, "step": 132060, "train_speed(iter/s)": 0.410717 }, { "acc": 0.94134045, "epoch": 3.575799420572388, "grad_norm": 11.81939697265625, "learning_rate": 3.0531146342463047e-07, "loss": 0.3104146, "memory(GiB)": 34.88, "step": 132065, "train_speed(iter/s)": 0.410718 }, { "acc": 0.9319128, "epoch": 3.5759348008556033, "grad_norm": 8.785333633422852, "learning_rate": 3.0511925050441204e-07, "loss": 0.39624066, "memory(GiB)": 34.88, "step": 132070, "train_speed(iter/s)": 0.410719 }, { "acc": 0.94096489, "epoch": 3.576070181138819, "grad_norm": 6.257530212402344, "learning_rate": 3.04927096403064e-07, "loss": 0.40618095, "memory(GiB)": 34.88, "step": 132075, "train_speed(iter/s)": 0.41072 }, { "acc": 0.96103325, "epoch": 3.5762055614220345, "grad_norm": 3.306295871734619, "learning_rate": 3.0473500112299415e-07, "loss": 0.22281466, "memory(GiB)": 34.88, "step": 132080, "train_speed(iter/s)": 0.410721 }, { "acc": 0.93347597, "epoch": 3.57634094170525, "grad_norm": 6.309688091278076, "learning_rate": 3.04542964666607e-07, "loss": 0.43164511, "memory(GiB)": 34.88, "step": 132085, "train_speed(iter/s)": 0.410722 }, { "acc": 0.94721508, "epoch": 3.5764763219884657, "grad_norm": 10.082653999328613, "learning_rate": 3.043509870363098e-07, "loss": 0.33803828, "memory(GiB)": 34.88, "step": 132090, "train_speed(iter/s)": 0.410722 }, { "acc": 0.94892874, "epoch": 3.576611702271681, "grad_norm": 5.382013320922852, "learning_rate": 3.041590682345071e-07, "loss": 0.34470339, "memory(GiB)": 34.88, "step": 132095, "train_speed(iter/s)": 0.410723 }, { "acc": 0.94451675, "epoch": 3.576747082554897, "grad_norm": 6.1197662353515625, "learning_rate": 3.039672082636023e-07, "loss": 0.39641962, "memory(GiB)": 34.88, "step": 132100, "train_speed(iter/s)": 0.410724 }, { "acc": 0.95068302, "epoch": 3.576882462838112, "grad_norm": 8.066569328308105, "learning_rate": 3.0377540712599933e-07, "loss": 0.27631552, "memory(GiB)": 34.88, "step": 132105, "train_speed(iter/s)": 0.410725 }, { "acc": 0.93545971, "epoch": 3.577017843121328, "grad_norm": 4.392423152923584, "learning_rate": 3.0358366482410104e-07, "loss": 0.33021865, "memory(GiB)": 34.88, "step": 132110, "train_speed(iter/s)": 0.410726 }, { "acc": 0.95944042, "epoch": 3.5771532234045433, "grad_norm": 4.4417405128479, "learning_rate": 3.033919813603102e-07, "loss": 0.26234956, "memory(GiB)": 34.88, "step": 132115, "train_speed(iter/s)": 0.410726 }, { "acc": 0.93501835, "epoch": 3.5772886036877587, "grad_norm": 9.52663803100586, "learning_rate": 3.0320035673702585e-07, "loss": 0.44581566, "memory(GiB)": 34.88, "step": 132120, "train_speed(iter/s)": 0.410727 }, { "acc": 0.95251808, "epoch": 3.5774239839709745, "grad_norm": 15.58467960357666, "learning_rate": 3.0300879095665025e-07, "loss": 0.29633174, "memory(GiB)": 34.88, "step": 132125, "train_speed(iter/s)": 0.410728 }, { "acc": 0.95850735, "epoch": 3.57755936425419, "grad_norm": 2.537428140640259, "learning_rate": 3.028172840215823e-07, "loss": 0.23172007, "memory(GiB)": 34.88, "step": 132130, "train_speed(iter/s)": 0.410728 }, { "acc": 0.95413551, "epoch": 3.5776947445374057, "grad_norm": 5.320504665374756, "learning_rate": 3.026258359342221e-07, "loss": 0.23858213, "memory(GiB)": 34.88, "step": 132135, "train_speed(iter/s)": 0.410729 }, { "acc": 0.94348354, "epoch": 3.577830124820621, "grad_norm": 6.255153656005859, "learning_rate": 3.0243444669696647e-07, "loss": 0.36718063, "memory(GiB)": 34.88, "step": 132140, "train_speed(iter/s)": 0.41073 }, { "acc": 0.95531349, "epoch": 3.577965505103837, "grad_norm": 43.384056091308594, "learning_rate": 3.022431163122136e-07, "loss": 0.28865657, "memory(GiB)": 34.88, "step": 132145, "train_speed(iter/s)": 0.410731 }, { "acc": 0.94034061, "epoch": 3.578100885387052, "grad_norm": 14.18187141418457, "learning_rate": 3.02051844782361e-07, "loss": 0.34331021, "memory(GiB)": 34.88, "step": 132150, "train_speed(iter/s)": 0.410732 }, { "acc": 0.93895435, "epoch": 3.5782362656702675, "grad_norm": 9.565655708312988, "learning_rate": 3.01860632109803e-07, "loss": 0.40769453, "memory(GiB)": 34.88, "step": 132155, "train_speed(iter/s)": 0.410732 }, { "acc": 0.94975433, "epoch": 3.5783716459534833, "grad_norm": 4.921936511993408, "learning_rate": 3.016694782969359e-07, "loss": 0.29666367, "memory(GiB)": 34.88, "step": 132160, "train_speed(iter/s)": 0.410733 }, { "acc": 0.93821535, "epoch": 3.578507026236699, "grad_norm": 5.969814777374268, "learning_rate": 3.0147838334615413e-07, "loss": 0.36366861, "memory(GiB)": 34.88, "step": 132165, "train_speed(iter/s)": 0.410734 }, { "acc": 0.94442234, "epoch": 3.5786424065199145, "grad_norm": 7.063801288604736, "learning_rate": 3.012873472598528e-07, "loss": 0.32701459, "memory(GiB)": 34.88, "step": 132170, "train_speed(iter/s)": 0.410735 }, { "acc": 0.94492407, "epoch": 3.57877778680313, "grad_norm": 4.765924453735352, "learning_rate": 3.0109637004042203e-07, "loss": 0.31840127, "memory(GiB)": 34.88, "step": 132175, "train_speed(iter/s)": 0.410736 }, { "acc": 0.95173531, "epoch": 3.5789131670863457, "grad_norm": 6.581114768981934, "learning_rate": 3.0090545169025787e-07, "loss": 0.29718585, "memory(GiB)": 34.88, "step": 132180, "train_speed(iter/s)": 0.410737 }, { "acc": 0.94341507, "epoch": 3.579048547369561, "grad_norm": 4.312366962432861, "learning_rate": 3.007145922117499e-07, "loss": 0.3184891, "memory(GiB)": 34.88, "step": 132185, "train_speed(iter/s)": 0.410737 }, { "acc": 0.93982639, "epoch": 3.5791839276527764, "grad_norm": 11.234397888183594, "learning_rate": 3.0052379160728883e-07, "loss": 0.44328475, "memory(GiB)": 34.88, "step": 132190, "train_speed(iter/s)": 0.410738 }, { "acc": 0.94994297, "epoch": 3.579319307935992, "grad_norm": 3.4675614833831787, "learning_rate": 3.003330498792646e-07, "loss": 0.26367369, "memory(GiB)": 34.88, "step": 132195, "train_speed(iter/s)": 0.410739 }, { "acc": 0.95169392, "epoch": 3.579454688219208, "grad_norm": 5.830592155456543, "learning_rate": 3.001423670300679e-07, "loss": 0.27885504, "memory(GiB)": 34.88, "step": 132200, "train_speed(iter/s)": 0.41074 }, { "acc": 0.93721771, "epoch": 3.5795900685024233, "grad_norm": 21.833919525146484, "learning_rate": 2.9995174306208715e-07, "loss": 0.34833353, "memory(GiB)": 34.88, "step": 132205, "train_speed(iter/s)": 0.410741 }, { "acc": 0.95031385, "epoch": 3.5797254487856387, "grad_norm": 4.426731586456299, "learning_rate": 2.997611779777085e-07, "loss": 0.34217136, "memory(GiB)": 34.88, "step": 132210, "train_speed(iter/s)": 0.410741 }, { "acc": 0.95139914, "epoch": 3.5798608290688545, "grad_norm": 11.037793159484863, "learning_rate": 2.995706717793214e-07, "loss": 0.2918376, "memory(GiB)": 34.88, "step": 132215, "train_speed(iter/s)": 0.410742 }, { "acc": 0.9488905, "epoch": 3.57999620935207, "grad_norm": 10.529280662536621, "learning_rate": 2.9938022446931167e-07, "loss": 0.3512228, "memory(GiB)": 34.88, "step": 132220, "train_speed(iter/s)": 0.410743 }, { "acc": 0.95322762, "epoch": 3.5801315896352857, "grad_norm": 13.091195106506348, "learning_rate": 2.991898360500648e-07, "loss": 0.33037333, "memory(GiB)": 34.88, "step": 132225, "train_speed(iter/s)": 0.410744 }, { "acc": 0.94757519, "epoch": 3.580266969918501, "grad_norm": 4.346817493438721, "learning_rate": 2.9899950652396534e-07, "loss": 0.31662784, "memory(GiB)": 34.88, "step": 132230, "train_speed(iter/s)": 0.410744 }, { "acc": 0.94024296, "epoch": 3.580402350201717, "grad_norm": 6.638128757476807, "learning_rate": 2.988092358933983e-07, "loss": 0.42567663, "memory(GiB)": 34.88, "step": 132235, "train_speed(iter/s)": 0.410745 }, { "acc": 0.95300131, "epoch": 3.580537730484932, "grad_norm": 10.852829933166504, "learning_rate": 2.9861902416074723e-07, "loss": 0.28177681, "memory(GiB)": 34.88, "step": 132240, "train_speed(iter/s)": 0.410746 }, { "acc": 0.94181709, "epoch": 3.5806731107681475, "grad_norm": 9.457557678222656, "learning_rate": 2.9842887132839323e-07, "loss": 0.34522471, "memory(GiB)": 34.88, "step": 132245, "train_speed(iter/s)": 0.410747 }, { "acc": 0.95000448, "epoch": 3.5808084910513633, "grad_norm": 6.578012943267822, "learning_rate": 2.9823877739872087e-07, "loss": 0.33850844, "memory(GiB)": 34.88, "step": 132250, "train_speed(iter/s)": 0.410748 }, { "acc": 0.95058193, "epoch": 3.5809438713345787, "grad_norm": 13.914273262023926, "learning_rate": 2.980487423741096e-07, "loss": 0.31449726, "memory(GiB)": 34.88, "step": 132255, "train_speed(iter/s)": 0.410749 }, { "acc": 0.94672899, "epoch": 3.5810792516177945, "grad_norm": 9.433591842651367, "learning_rate": 2.9785876625694175e-07, "loss": 0.26001897, "memory(GiB)": 34.88, "step": 132260, "train_speed(iter/s)": 0.41075 }, { "acc": 0.94965019, "epoch": 3.58121463190101, "grad_norm": 8.695359230041504, "learning_rate": 2.9766884904959526e-07, "loss": 0.33069019, "memory(GiB)": 34.88, "step": 132265, "train_speed(iter/s)": 0.410751 }, { "acc": 0.95382671, "epoch": 3.5813500121842257, "grad_norm": 2.8027288913726807, "learning_rate": 2.9747899075444947e-07, "loss": 0.31422448, "memory(GiB)": 34.88, "step": 132270, "train_speed(iter/s)": 0.410751 }, { "acc": 0.94925356, "epoch": 3.581485392467441, "grad_norm": 11.454726219177246, "learning_rate": 2.972891913738841e-07, "loss": 0.2929297, "memory(GiB)": 34.88, "step": 132275, "train_speed(iter/s)": 0.410752 }, { "acc": 0.94638939, "epoch": 3.5816207727506564, "grad_norm": 7.63939094543457, "learning_rate": 2.9709945091027475e-07, "loss": 0.26841166, "memory(GiB)": 34.88, "step": 132280, "train_speed(iter/s)": 0.410753 }, { "acc": 0.93785524, "epoch": 3.581756153033872, "grad_norm": 6.446976661682129, "learning_rate": 2.9690976936600086e-07, "loss": 0.40249958, "memory(GiB)": 34.88, "step": 132285, "train_speed(iter/s)": 0.410754 }, { "acc": 0.95974293, "epoch": 3.5818915333170875, "grad_norm": 3.7998836040496826, "learning_rate": 2.9672014674343645e-07, "loss": 0.20314753, "memory(GiB)": 34.88, "step": 132290, "train_speed(iter/s)": 0.410755 }, { "acc": 0.95716267, "epoch": 3.5820269136003033, "grad_norm": 7.069152355194092, "learning_rate": 2.9653058304495825e-07, "loss": 0.28953447, "memory(GiB)": 34.88, "step": 132295, "train_speed(iter/s)": 0.410755 }, { "acc": 0.9389678, "epoch": 3.5821622938835187, "grad_norm": 6.1567535400390625, "learning_rate": 2.9634107827293864e-07, "loss": 0.4055099, "memory(GiB)": 34.88, "step": 132300, "train_speed(iter/s)": 0.410756 }, { "acc": 0.94029846, "epoch": 3.5822976741667345, "grad_norm": 6.835840225219727, "learning_rate": 2.961516324297549e-07, "loss": 0.3304256, "memory(GiB)": 34.88, "step": 132305, "train_speed(iter/s)": 0.410757 }, { "acc": 0.94032154, "epoch": 3.58243305444995, "grad_norm": 4.478571891784668, "learning_rate": 2.959622455177781e-07, "loss": 0.33771214, "memory(GiB)": 34.88, "step": 132310, "train_speed(iter/s)": 0.410758 }, { "acc": 0.94565344, "epoch": 3.582568434733165, "grad_norm": 7.323140621185303, "learning_rate": 2.9577291753938015e-07, "loss": 0.33681493, "memory(GiB)": 34.88, "step": 132315, "train_speed(iter/s)": 0.410758 }, { "acc": 0.94759865, "epoch": 3.582703815016381, "grad_norm": 10.790074348449707, "learning_rate": 2.9558364849693443e-07, "loss": 0.34200926, "memory(GiB)": 34.88, "step": 132320, "train_speed(iter/s)": 0.410759 }, { "acc": 0.95088243, "epoch": 3.582839195299597, "grad_norm": 5.193244457244873, "learning_rate": 2.9539443839281106e-07, "loss": 0.34513555, "memory(GiB)": 34.88, "step": 132325, "train_speed(iter/s)": 0.41076 }, { "acc": 0.95924368, "epoch": 3.582974575582812, "grad_norm": 14.332396507263184, "learning_rate": 2.9520528722938114e-07, "loss": 0.23962162, "memory(GiB)": 34.88, "step": 132330, "train_speed(iter/s)": 0.410761 }, { "acc": 0.94076052, "epoch": 3.5831099558660275, "grad_norm": 3.8399760723114014, "learning_rate": 2.950161950090115e-07, "loss": 0.33049319, "memory(GiB)": 34.88, "step": 132335, "train_speed(iter/s)": 0.410762 }, { "acc": 0.95454426, "epoch": 3.5832453361492433, "grad_norm": 5.500960826873779, "learning_rate": 2.948271617340744e-07, "loss": 0.26196921, "memory(GiB)": 34.88, "step": 132340, "train_speed(iter/s)": 0.410762 }, { "acc": 0.94873238, "epoch": 3.5833807164324587, "grad_norm": 7.620869159698486, "learning_rate": 2.946381874069356e-07, "loss": 0.27824106, "memory(GiB)": 34.88, "step": 132345, "train_speed(iter/s)": 0.410763 }, { "acc": 0.93752565, "epoch": 3.583516096715674, "grad_norm": 8.26372241973877, "learning_rate": 2.9444927202996284e-07, "loss": 0.43317952, "memory(GiB)": 34.88, "step": 132350, "train_speed(iter/s)": 0.410764 }, { "acc": 0.95880423, "epoch": 3.58365147699889, "grad_norm": 5.283538818359375, "learning_rate": 2.942604156055235e-07, "loss": 0.23144221, "memory(GiB)": 34.88, "step": 132355, "train_speed(iter/s)": 0.410765 }, { "acc": 0.93880367, "epoch": 3.5837868572821057, "grad_norm": 10.187180519104004, "learning_rate": 2.940716181359821e-07, "loss": 0.36728737, "memory(GiB)": 34.88, "step": 132360, "train_speed(iter/s)": 0.410766 }, { "acc": 0.95535269, "epoch": 3.583922237565321, "grad_norm": 5.276028156280518, "learning_rate": 2.938828796237054e-07, "loss": 0.24699669, "memory(GiB)": 34.88, "step": 132365, "train_speed(iter/s)": 0.410767 }, { "acc": 0.94954786, "epoch": 3.5840576178485364, "grad_norm": 13.221109390258789, "learning_rate": 2.936942000710546e-07, "loss": 0.34071226, "memory(GiB)": 34.88, "step": 132370, "train_speed(iter/s)": 0.410767 }, { "acc": 0.9467144, "epoch": 3.584192998131752, "grad_norm": 5.807317733764648, "learning_rate": 2.9350557948039705e-07, "loss": 0.31660066, "memory(GiB)": 34.88, "step": 132375, "train_speed(iter/s)": 0.410768 }, { "acc": 0.95066509, "epoch": 3.5843283784149675, "grad_norm": 3.733065605163574, "learning_rate": 2.933170178540928e-07, "loss": 0.33311911, "memory(GiB)": 34.88, "step": 132380, "train_speed(iter/s)": 0.410769 }, { "acc": 0.94256516, "epoch": 3.5844637586981833, "grad_norm": 8.546012878417969, "learning_rate": 2.931285151945059e-07, "loss": 0.34327674, "memory(GiB)": 34.88, "step": 132385, "train_speed(iter/s)": 0.41077 }, { "acc": 0.93616848, "epoch": 3.5845991389813987, "grad_norm": 7.434121608734131, "learning_rate": 2.929400715039964e-07, "loss": 0.40648022, "memory(GiB)": 34.88, "step": 132390, "train_speed(iter/s)": 0.410771 }, { "acc": 0.94282312, "epoch": 3.5847345192646145, "grad_norm": 5.684959888458252, "learning_rate": 2.92751686784925e-07, "loss": 0.37151499, "memory(GiB)": 34.88, "step": 132395, "train_speed(iter/s)": 0.410771 }, { "acc": 0.95398331, "epoch": 3.58486989954783, "grad_norm": 3.3923861980438232, "learning_rate": 2.9256336103965277e-07, "loss": 0.29102943, "memory(GiB)": 34.88, "step": 132400, "train_speed(iter/s)": 0.410772 }, { "acc": 0.95246029, "epoch": 3.585005279831045, "grad_norm": 4.547721862792969, "learning_rate": 2.923750942705366e-07, "loss": 0.27523847, "memory(GiB)": 34.88, "step": 132405, "train_speed(iter/s)": 0.410773 }, { "acc": 0.95178471, "epoch": 3.585140660114261, "grad_norm": 10.257026672363281, "learning_rate": 2.921868864799377e-07, "loss": 0.30901337, "memory(GiB)": 34.88, "step": 132410, "train_speed(iter/s)": 0.410774 }, { "acc": 0.94617929, "epoch": 3.5852760403974764, "grad_norm": 3.9431324005126953, "learning_rate": 2.9199873767021174e-07, "loss": 0.35492818, "memory(GiB)": 34.88, "step": 132415, "train_speed(iter/s)": 0.410775 }, { "acc": 0.96262474, "epoch": 3.585411420680692, "grad_norm": 3.9764747619628906, "learning_rate": 2.918106478437165e-07, "loss": 0.18689631, "memory(GiB)": 34.88, "step": 132420, "train_speed(iter/s)": 0.410776 }, { "acc": 0.96376419, "epoch": 3.5855468009639075, "grad_norm": 13.403462409973145, "learning_rate": 2.9162261700280715e-07, "loss": 0.28167658, "memory(GiB)": 34.88, "step": 132425, "train_speed(iter/s)": 0.410777 }, { "acc": 0.94993219, "epoch": 3.5856821812471233, "grad_norm": 7.484464168548584, "learning_rate": 2.91434645149841e-07, "loss": 0.32028341, "memory(GiB)": 34.88, "step": 132430, "train_speed(iter/s)": 0.410778 }, { "acc": 0.95021954, "epoch": 3.5858175615303387, "grad_norm": 11.953947067260742, "learning_rate": 2.9124673228717204e-07, "loss": 0.35191875, "memory(GiB)": 34.88, "step": 132435, "train_speed(iter/s)": 0.410778 }, { "acc": 0.94354897, "epoch": 3.585952941813554, "grad_norm": 6.725879192352295, "learning_rate": 2.9105887841715205e-07, "loss": 0.34174962, "memory(GiB)": 34.88, "step": 132440, "train_speed(iter/s)": 0.410779 }, { "acc": 0.96391163, "epoch": 3.58608832209677, "grad_norm": 6.275700569152832, "learning_rate": 2.9087108354213785e-07, "loss": 0.21666238, "memory(GiB)": 34.88, "step": 132445, "train_speed(iter/s)": 0.41078 }, { "acc": 0.95491714, "epoch": 3.5862237023799852, "grad_norm": 4.748389720916748, "learning_rate": 2.906833476644794e-07, "loss": 0.27745047, "memory(GiB)": 34.88, "step": 132450, "train_speed(iter/s)": 0.410781 }, { "acc": 0.95188503, "epoch": 3.586359082663201, "grad_norm": 4.965550422668457, "learning_rate": 2.904956707865303e-07, "loss": 0.29982772, "memory(GiB)": 34.88, "step": 132455, "train_speed(iter/s)": 0.410782 }, { "acc": 0.94918146, "epoch": 3.5864944629464164, "grad_norm": 7.378287315368652, "learning_rate": 2.9030805291063897e-07, "loss": 0.22479415, "memory(GiB)": 34.88, "step": 132460, "train_speed(iter/s)": 0.410783 }, { "acc": 0.96025181, "epoch": 3.586629843229632, "grad_norm": 5.304469585418701, "learning_rate": 2.901204940391587e-07, "loss": 0.23993921, "memory(GiB)": 34.88, "step": 132465, "train_speed(iter/s)": 0.410783 }, { "acc": 0.95247135, "epoch": 3.5867652235128475, "grad_norm": 6.127531051635742, "learning_rate": 2.8993299417443697e-07, "loss": 0.24915514, "memory(GiB)": 34.88, "step": 132470, "train_speed(iter/s)": 0.410784 }, { "acc": 0.94782524, "epoch": 3.586900603796063, "grad_norm": 8.827369689941406, "learning_rate": 2.897455533188233e-07, "loss": 0.33704958, "memory(GiB)": 34.88, "step": 132475, "train_speed(iter/s)": 0.410785 }, { "acc": 0.9372818, "epoch": 3.5870359840792787, "grad_norm": 20.01630401611328, "learning_rate": 2.895581714746661e-07, "loss": 0.37392769, "memory(GiB)": 34.88, "step": 132480, "train_speed(iter/s)": 0.410786 }, { "acc": 0.95728788, "epoch": 3.5871713643624945, "grad_norm": 2.666912794113159, "learning_rate": 2.893708486443116e-07, "loss": 0.28822823, "memory(GiB)": 34.88, "step": 132485, "train_speed(iter/s)": 0.410787 }, { "acc": 0.94350433, "epoch": 3.58730674464571, "grad_norm": 12.05982494354248, "learning_rate": 2.8918358483010825e-07, "loss": 0.35987403, "memory(GiB)": 34.88, "step": 132490, "train_speed(iter/s)": 0.410788 }, { "acc": 0.94804516, "epoch": 3.5874421249289252, "grad_norm": 7.0524983406066895, "learning_rate": 2.8899638003439897e-07, "loss": 0.31631327, "memory(GiB)": 34.88, "step": 132495, "train_speed(iter/s)": 0.410788 }, { "acc": 0.95049191, "epoch": 3.587577505212141, "grad_norm": 8.081525802612305, "learning_rate": 2.8880923425953214e-07, "loss": 0.32648933, "memory(GiB)": 34.88, "step": 132500, "train_speed(iter/s)": 0.410789 }, { "acc": 0.95637627, "epoch": 3.5877128854953564, "grad_norm": 5.228618144989014, "learning_rate": 2.8862214750784957e-07, "loss": 0.30318875, "memory(GiB)": 34.88, "step": 132505, "train_speed(iter/s)": 0.41079 }, { "acc": 0.95821428, "epoch": 3.5878482657785717, "grad_norm": 9.766704559326172, "learning_rate": 2.884351197816964e-07, "loss": 0.25979743, "memory(GiB)": 34.88, "step": 132510, "train_speed(iter/s)": 0.410791 }, { "acc": 0.9357523, "epoch": 3.5879836460617875, "grad_norm": 4.268631935119629, "learning_rate": 2.882481510834155e-07, "loss": 0.3711489, "memory(GiB)": 34.88, "step": 132515, "train_speed(iter/s)": 0.410791 }, { "acc": 0.94527216, "epoch": 3.5881190263450033, "grad_norm": 3.508620500564575, "learning_rate": 2.8806124141534753e-07, "loss": 0.36781836, "memory(GiB)": 34.88, "step": 132520, "train_speed(iter/s)": 0.410792 }, { "acc": 0.94713421, "epoch": 3.5882544066282187, "grad_norm": 5.175509452819824, "learning_rate": 2.878743907798359e-07, "loss": 0.27893541, "memory(GiB)": 34.88, "step": 132525, "train_speed(iter/s)": 0.410793 }, { "acc": 0.94686661, "epoch": 3.588389786911434, "grad_norm": 7.606995105743408, "learning_rate": 2.876875991792192e-07, "loss": 0.41395373, "memory(GiB)": 34.88, "step": 132530, "train_speed(iter/s)": 0.410794 }, { "acc": 0.96391773, "epoch": 3.58852516719465, "grad_norm": 2.543585777282715, "learning_rate": 2.875008666158396e-07, "loss": 0.20975308, "memory(GiB)": 34.88, "step": 132535, "train_speed(iter/s)": 0.410795 }, { "acc": 0.94842167, "epoch": 3.5886605474778652, "grad_norm": 5.441436767578125, "learning_rate": 2.8731419309203454e-07, "loss": 0.27978351, "memory(GiB)": 34.88, "step": 132540, "train_speed(iter/s)": 0.410796 }, { "acc": 0.94681749, "epoch": 3.588795927761081, "grad_norm": 6.537909030914307, "learning_rate": 2.8712757861014304e-07, "loss": 0.36871805, "memory(GiB)": 34.88, "step": 132545, "train_speed(iter/s)": 0.410797 }, { "acc": 0.9389946, "epoch": 3.5889313080442964, "grad_norm": 7.690547943115234, "learning_rate": 2.869410231725029e-07, "loss": 0.4054615, "memory(GiB)": 34.88, "step": 132550, "train_speed(iter/s)": 0.410797 }, { "acc": 0.94239578, "epoch": 3.589066688327512, "grad_norm": 3.6804847717285156, "learning_rate": 2.8675452678145206e-07, "loss": 0.37587481, "memory(GiB)": 34.88, "step": 132555, "train_speed(iter/s)": 0.410798 }, { "acc": 0.94574785, "epoch": 3.5892020686107275, "grad_norm": 5.567915916442871, "learning_rate": 2.8656808943932566e-07, "loss": 0.2749629, "memory(GiB)": 34.88, "step": 132560, "train_speed(iter/s)": 0.410799 }, { "acc": 0.94866781, "epoch": 3.589337448893943, "grad_norm": 6.184828281402588, "learning_rate": 2.8638171114845773e-07, "loss": 0.28048611, "memory(GiB)": 34.88, "step": 132565, "train_speed(iter/s)": 0.4108 }, { "acc": 0.95356207, "epoch": 3.5894728291771587, "grad_norm": 5.993279933929443, "learning_rate": 2.861953919111862e-07, "loss": 0.26838479, "memory(GiB)": 34.88, "step": 132570, "train_speed(iter/s)": 0.410801 }, { "acc": 0.95198002, "epoch": 3.589608209460374, "grad_norm": 8.305380821228027, "learning_rate": 2.8600913172984216e-07, "loss": 0.33814406, "memory(GiB)": 34.88, "step": 132575, "train_speed(iter/s)": 0.410802 }, { "acc": 0.92864447, "epoch": 3.58974358974359, "grad_norm": 5.360953330993652, "learning_rate": 2.858229306067608e-07, "loss": 0.46529288, "memory(GiB)": 34.88, "step": 132580, "train_speed(iter/s)": 0.410803 }, { "acc": 0.95780659, "epoch": 3.5898789700268052, "grad_norm": 2.7357943058013916, "learning_rate": 2.8563678854427394e-07, "loss": 0.2610554, "memory(GiB)": 34.88, "step": 132585, "train_speed(iter/s)": 0.410803 }, { "acc": 0.94597406, "epoch": 3.590014350310021, "grad_norm": 7.23321008682251, "learning_rate": 2.8545070554471394e-07, "loss": 0.37079868, "memory(GiB)": 34.88, "step": 132590, "train_speed(iter/s)": 0.410804 }, { "acc": 0.93954887, "epoch": 3.5901497305932364, "grad_norm": 5.9735517501831055, "learning_rate": 2.852646816104109e-07, "loss": 0.34408624, "memory(GiB)": 34.88, "step": 132595, "train_speed(iter/s)": 0.410805 }, { "acc": 0.94116869, "epoch": 3.5902851108764517, "grad_norm": 6.027656555175781, "learning_rate": 2.85078716743696e-07, "loss": 0.39041071, "memory(GiB)": 34.88, "step": 132600, "train_speed(iter/s)": 0.410806 }, { "acc": 0.95128431, "epoch": 3.5904204911596675, "grad_norm": 6.641737461090088, "learning_rate": 2.848928109468989e-07, "loss": 0.30242105, "memory(GiB)": 34.88, "step": 132605, "train_speed(iter/s)": 0.410806 }, { "acc": 0.93250847, "epoch": 3.590555871442883, "grad_norm": 5.443702220916748, "learning_rate": 2.8470696422234685e-07, "loss": 0.37447047, "memory(GiB)": 34.88, "step": 132610, "train_speed(iter/s)": 0.410807 }, { "acc": 0.95689259, "epoch": 3.5906912517260987, "grad_norm": 4.379971027374268, "learning_rate": 2.845211765723696e-07, "loss": 0.32239654, "memory(GiB)": 34.88, "step": 132615, "train_speed(iter/s)": 0.410808 }, { "acc": 0.93974028, "epoch": 3.590826632009314, "grad_norm": 7.468295574188232, "learning_rate": 2.843354479992937e-07, "loss": 0.34105649, "memory(GiB)": 34.88, "step": 132620, "train_speed(iter/s)": 0.410809 }, { "acc": 0.95337334, "epoch": 3.59096201229253, "grad_norm": 3.396646499633789, "learning_rate": 2.8414977850544727e-07, "loss": 0.3616837, "memory(GiB)": 34.88, "step": 132625, "train_speed(iter/s)": 0.41081 }, { "acc": 0.95006924, "epoch": 3.5910973925757452, "grad_norm": 3.5318222045898438, "learning_rate": 2.839641680931536e-07, "loss": 0.24905043, "memory(GiB)": 34.88, "step": 132630, "train_speed(iter/s)": 0.410811 }, { "acc": 0.94632683, "epoch": 3.5912327728589606, "grad_norm": 2.701392889022827, "learning_rate": 2.8377861676473965e-07, "loss": 0.27578223, "memory(GiB)": 34.88, "step": 132635, "train_speed(iter/s)": 0.410812 }, { "acc": 0.95632849, "epoch": 3.5913681531421764, "grad_norm": 8.231908798217773, "learning_rate": 2.835931245225298e-07, "loss": 0.24305537, "memory(GiB)": 34.88, "step": 132640, "train_speed(iter/s)": 0.410813 }, { "acc": 0.94989805, "epoch": 3.591503533425392, "grad_norm": 10.978379249572754, "learning_rate": 2.8340769136884723e-07, "loss": 0.32302747, "memory(GiB)": 34.88, "step": 132645, "train_speed(iter/s)": 0.410814 }, { "acc": 0.94718828, "epoch": 3.5916389137086076, "grad_norm": 5.2550458908081055, "learning_rate": 2.8322231730601406e-07, "loss": 0.31175427, "memory(GiB)": 34.88, "step": 132650, "train_speed(iter/s)": 0.410814 }, { "acc": 0.95245047, "epoch": 3.591774293991823, "grad_norm": 7.183697700500488, "learning_rate": 2.830370023363538e-07, "loss": 0.33065948, "memory(GiB)": 34.88, "step": 132655, "train_speed(iter/s)": 0.410815 }, { "acc": 0.94831133, "epoch": 3.5919096742750387, "grad_norm": 4.29453182220459, "learning_rate": 2.8285174646218777e-07, "loss": 0.30805931, "memory(GiB)": 34.88, "step": 132660, "train_speed(iter/s)": 0.410816 }, { "acc": 0.94885454, "epoch": 3.592045054558254, "grad_norm": 5.665436744689941, "learning_rate": 2.826665496858355e-07, "loss": 0.28852215, "memory(GiB)": 34.88, "step": 132665, "train_speed(iter/s)": 0.410817 }, { "acc": 0.95475721, "epoch": 3.5921804348414694, "grad_norm": 3.659780502319336, "learning_rate": 2.824814120096182e-07, "loss": 0.24667344, "memory(GiB)": 34.88, "step": 132670, "train_speed(iter/s)": 0.410818 }, { "acc": 0.95695114, "epoch": 3.5923158151246852, "grad_norm": 4.458962440490723, "learning_rate": 2.8229633343585443e-07, "loss": 0.26138737, "memory(GiB)": 34.88, "step": 132675, "train_speed(iter/s)": 0.410819 }, { "acc": 0.95737753, "epoch": 3.592451195407901, "grad_norm": 8.100298881530762, "learning_rate": 2.821113139668631e-07, "loss": 0.21213064, "memory(GiB)": 34.88, "step": 132680, "train_speed(iter/s)": 0.410819 }, { "acc": 0.94388599, "epoch": 3.5925865756911164, "grad_norm": 10.99699878692627, "learning_rate": 2.819263536049617e-07, "loss": 0.27090278, "memory(GiB)": 34.88, "step": 132685, "train_speed(iter/s)": 0.41082 }, { "acc": 0.96265793, "epoch": 3.5927219559743317, "grad_norm": 4.054483413696289, "learning_rate": 2.817414523524664e-07, "loss": 0.1994783, "memory(GiB)": 34.88, "step": 132690, "train_speed(iter/s)": 0.410821 }, { "acc": 0.94951973, "epoch": 3.5928573362575476, "grad_norm": 4.7746405601501465, "learning_rate": 2.8155661021169564e-07, "loss": 0.32779715, "memory(GiB)": 34.88, "step": 132695, "train_speed(iter/s)": 0.410822 }, { "acc": 0.94883223, "epoch": 3.592992716540763, "grad_norm": 9.354401588439941, "learning_rate": 2.813718271849624e-07, "loss": 0.3358299, "memory(GiB)": 34.88, "step": 132700, "train_speed(iter/s)": 0.410823 }, { "acc": 0.93549871, "epoch": 3.5931280968239787, "grad_norm": 7.083795547485352, "learning_rate": 2.8118710327458285e-07, "loss": 0.43519683, "memory(GiB)": 34.88, "step": 132705, "train_speed(iter/s)": 0.410824 }, { "acc": 0.94821129, "epoch": 3.593263477107194, "grad_norm": 2.9499239921569824, "learning_rate": 2.810024384828706e-07, "loss": 0.2868825, "memory(GiB)": 34.88, "step": 132710, "train_speed(iter/s)": 0.410825 }, { "acc": 0.94461517, "epoch": 3.59339885739041, "grad_norm": 13.173238754272461, "learning_rate": 2.8081783281214013e-07, "loss": 0.32825093, "memory(GiB)": 34.88, "step": 132715, "train_speed(iter/s)": 0.410826 }, { "acc": 0.94427528, "epoch": 3.5935342376736252, "grad_norm": 3.2801637649536133, "learning_rate": 2.8063328626470156e-07, "loss": 0.33018796, "memory(GiB)": 34.88, "step": 132720, "train_speed(iter/s)": 0.410826 }, { "acc": 0.95536852, "epoch": 3.5936696179568406, "grad_norm": 7.071707248687744, "learning_rate": 2.80448798842869e-07, "loss": 0.27387705, "memory(GiB)": 34.88, "step": 132725, "train_speed(iter/s)": 0.410827 }, { "acc": 0.96111059, "epoch": 3.5938049982400564, "grad_norm": 4.1653289794921875, "learning_rate": 2.802643705489526e-07, "loss": 0.28155565, "memory(GiB)": 34.88, "step": 132730, "train_speed(iter/s)": 0.410828 }, { "acc": 0.95100613, "epoch": 3.5939403785232718, "grad_norm": 8.579670906066895, "learning_rate": 2.800800013852624e-07, "loss": 0.32374556, "memory(GiB)": 34.88, "step": 132735, "train_speed(iter/s)": 0.410829 }, { "acc": 0.95530586, "epoch": 3.5940757588064876, "grad_norm": 5.889162063598633, "learning_rate": 2.7989569135410805e-07, "loss": 0.27216561, "memory(GiB)": 34.88, "step": 132740, "train_speed(iter/s)": 0.41083 }, { "acc": 0.96212807, "epoch": 3.594211139089703, "grad_norm": 1.926864504814148, "learning_rate": 2.797114404577991e-07, "loss": 0.22006221, "memory(GiB)": 34.88, "step": 132745, "train_speed(iter/s)": 0.410831 }, { "acc": 0.95067043, "epoch": 3.5943465193729187, "grad_norm": 13.365263938903809, "learning_rate": 2.795272486986436e-07, "loss": 0.26895862, "memory(GiB)": 34.88, "step": 132750, "train_speed(iter/s)": 0.410831 }, { "acc": 0.93632793, "epoch": 3.594481899656134, "grad_norm": 14.811210632324219, "learning_rate": 2.793431160789477e-07, "loss": 0.41439929, "memory(GiB)": 34.88, "step": 132755, "train_speed(iter/s)": 0.410832 }, { "acc": 0.94792156, "epoch": 3.5946172799393494, "grad_norm": 9.49028205871582, "learning_rate": 2.791590426010187e-07, "loss": 0.32686827, "memory(GiB)": 34.88, "step": 132760, "train_speed(iter/s)": 0.410833 }, { "acc": 0.95027075, "epoch": 3.5947526602225652, "grad_norm": 6.578581809997559, "learning_rate": 2.789750282671625e-07, "loss": 0.25669646, "memory(GiB)": 34.88, "step": 132765, "train_speed(iter/s)": 0.410834 }, { "acc": 0.93266335, "epoch": 3.5948880405057806, "grad_norm": 14.222452163696289, "learning_rate": 2.7879107307968573e-07, "loss": 0.44431839, "memory(GiB)": 34.88, "step": 132770, "train_speed(iter/s)": 0.410835 }, { "acc": 0.95403786, "epoch": 3.5950234207889964, "grad_norm": 7.780614376068115, "learning_rate": 2.786071770408903e-07, "loss": 0.30458417, "memory(GiB)": 34.88, "step": 132775, "train_speed(iter/s)": 0.410836 }, { "acc": 0.9539916, "epoch": 3.5951588010722118, "grad_norm": 10.469526290893555, "learning_rate": 2.7842334015308074e-07, "loss": 0.30353251, "memory(GiB)": 34.88, "step": 132780, "train_speed(iter/s)": 0.410837 }, { "acc": 0.958881, "epoch": 3.5952941813554276, "grad_norm": 3.6910572052001953, "learning_rate": 2.7823956241856115e-07, "loss": 0.24180589, "memory(GiB)": 34.88, "step": 132785, "train_speed(iter/s)": 0.410837 }, { "acc": 0.94453926, "epoch": 3.595429561638643, "grad_norm": 9.248684883117676, "learning_rate": 2.780558438396322e-07, "loss": 0.34246898, "memory(GiB)": 34.88, "step": 132790, "train_speed(iter/s)": 0.410838 }, { "acc": 0.9558918, "epoch": 3.5955649419218583, "grad_norm": 2.898637533187866, "learning_rate": 2.7787218441859576e-07, "loss": 0.22379446, "memory(GiB)": 34.88, "step": 132795, "train_speed(iter/s)": 0.410839 }, { "acc": 0.9529253, "epoch": 3.595700322205074, "grad_norm": 4.628509521484375, "learning_rate": 2.776885841577525e-07, "loss": 0.25114644, "memory(GiB)": 34.88, "step": 132800, "train_speed(iter/s)": 0.41084 }, { "acc": 0.93994713, "epoch": 3.59583570248829, "grad_norm": 24.048139572143555, "learning_rate": 2.7750504305940366e-07, "loss": 0.36447487, "memory(GiB)": 34.88, "step": 132805, "train_speed(iter/s)": 0.410841 }, { "acc": 0.93932724, "epoch": 3.5959710827715052, "grad_norm": 8.737028121948242, "learning_rate": 2.7732156112584606e-07, "loss": 0.36037803, "memory(GiB)": 34.88, "step": 132810, "train_speed(iter/s)": 0.410842 }, { "acc": 0.95424862, "epoch": 3.5961064630547206, "grad_norm": 7.5337934494018555, "learning_rate": 2.771381383593799e-07, "loss": 0.29033451, "memory(GiB)": 34.88, "step": 132815, "train_speed(iter/s)": 0.410842 }, { "acc": 0.94549141, "epoch": 3.5962418433379364, "grad_norm": 7.317394733428955, "learning_rate": 2.769547747623031e-07, "loss": 0.33349528, "memory(GiB)": 34.88, "step": 132820, "train_speed(iter/s)": 0.410843 }, { "acc": 0.94365225, "epoch": 3.5963772236211518, "grad_norm": 7.833034992218018, "learning_rate": 2.767714703369113e-07, "loss": 0.33992591, "memory(GiB)": 34.88, "step": 132825, "train_speed(iter/s)": 0.410844 }, { "acc": 0.93768921, "epoch": 3.596512603904367, "grad_norm": 8.22149658203125, "learning_rate": 2.7658822508550145e-07, "loss": 0.37373757, "memory(GiB)": 34.88, "step": 132830, "train_speed(iter/s)": 0.410845 }, { "acc": 0.96016378, "epoch": 3.596647984187583, "grad_norm": 6.2306599617004395, "learning_rate": 2.764050390103686e-07, "loss": 0.21974301, "memory(GiB)": 34.88, "step": 132835, "train_speed(iter/s)": 0.410846 }, { "acc": 0.9550374, "epoch": 3.5967833644707987, "grad_norm": 3.5693368911743164, "learning_rate": 2.7622191211380906e-07, "loss": 0.2337604, "memory(GiB)": 34.88, "step": 132840, "train_speed(iter/s)": 0.410846 }, { "acc": 0.95715551, "epoch": 3.596918744754014, "grad_norm": 6.621156692504883, "learning_rate": 2.760388443981141e-07, "loss": 0.27209072, "memory(GiB)": 34.88, "step": 132845, "train_speed(iter/s)": 0.410847 }, { "acc": 0.96181126, "epoch": 3.5970541250372294, "grad_norm": 5.5238823890686035, "learning_rate": 2.7585583586558055e-07, "loss": 0.20685868, "memory(GiB)": 34.88, "step": 132850, "train_speed(iter/s)": 0.410848 }, { "acc": 0.95115442, "epoch": 3.5971895053204452, "grad_norm": 3.7212073802948, "learning_rate": 2.7567288651849853e-07, "loss": 0.31960449, "memory(GiB)": 34.88, "step": 132855, "train_speed(iter/s)": 0.410849 }, { "acc": 0.95945454, "epoch": 3.5973248856036606, "grad_norm": 8.399080276489258, "learning_rate": 2.754899963591599e-07, "loss": 0.26068962, "memory(GiB)": 34.88, "step": 132860, "train_speed(iter/s)": 0.41085 }, { "acc": 0.94582777, "epoch": 3.5974602658868764, "grad_norm": 5.975775241851807, "learning_rate": 2.753071653898565e-07, "loss": 0.2914705, "memory(GiB)": 34.88, "step": 132865, "train_speed(iter/s)": 0.410851 }, { "acc": 0.95719299, "epoch": 3.5975956461700918, "grad_norm": 4.056733131408691, "learning_rate": 2.751243936128779e-07, "loss": 0.25588455, "memory(GiB)": 34.88, "step": 132870, "train_speed(iter/s)": 0.410851 }, { "acc": 0.95237675, "epoch": 3.5977310264533076, "grad_norm": 11.336767196655273, "learning_rate": 2.749416810305148e-07, "loss": 0.23841746, "memory(GiB)": 34.88, "step": 132875, "train_speed(iter/s)": 0.410852 }, { "acc": 0.94431, "epoch": 3.597866406736523, "grad_norm": 5.534774303436279, "learning_rate": 2.747590276450541e-07, "loss": 0.32309899, "memory(GiB)": 34.88, "step": 132880, "train_speed(iter/s)": 0.410853 }, { "acc": 0.95244837, "epoch": 3.5980017870197383, "grad_norm": 6.660512924194336, "learning_rate": 2.74576433458787e-07, "loss": 0.31665344, "memory(GiB)": 34.88, "step": 132885, "train_speed(iter/s)": 0.410854 }, { "acc": 0.94381237, "epoch": 3.598137167302954, "grad_norm": 6.906344890594482, "learning_rate": 2.7439389847399764e-07, "loss": 0.33146257, "memory(GiB)": 34.88, "step": 132890, "train_speed(iter/s)": 0.410855 }, { "acc": 0.9259079, "epoch": 3.5982725475861694, "grad_norm": 7.38585901260376, "learning_rate": 2.7421142269297495e-07, "loss": 0.48866954, "memory(GiB)": 34.88, "step": 132895, "train_speed(iter/s)": 0.410856 }, { "acc": 0.9614069, "epoch": 3.5984079278693852, "grad_norm": 7.409584999084473, "learning_rate": 2.740290061180031e-07, "loss": 0.20404477, "memory(GiB)": 34.88, "step": 132900, "train_speed(iter/s)": 0.410856 }, { "acc": 0.95176201, "epoch": 3.5985433081526006, "grad_norm": 11.905306816101074, "learning_rate": 2.738466487513677e-07, "loss": 0.24320078, "memory(GiB)": 34.88, "step": 132905, "train_speed(iter/s)": 0.410857 }, { "acc": 0.9510191, "epoch": 3.5986786884358164, "grad_norm": 3.878363609313965, "learning_rate": 2.736643505953546e-07, "loss": 0.31878819, "memory(GiB)": 34.88, "step": 132910, "train_speed(iter/s)": 0.410858 }, { "acc": 0.95804462, "epoch": 3.5988140687190318, "grad_norm": 19.009559631347656, "learning_rate": 2.734821116522445e-07, "loss": 0.3087369, "memory(GiB)": 34.88, "step": 132915, "train_speed(iter/s)": 0.410859 }, { "acc": 0.92634697, "epoch": 3.598949449002247, "grad_norm": 24.992387771606445, "learning_rate": 2.732999319243236e-07, "loss": 0.4660356, "memory(GiB)": 34.88, "step": 132920, "train_speed(iter/s)": 0.41086 }, { "acc": 0.95723419, "epoch": 3.599084829285463, "grad_norm": 6.749459743499756, "learning_rate": 2.731178114138715e-07, "loss": 0.28121982, "memory(GiB)": 34.88, "step": 132925, "train_speed(iter/s)": 0.41086 }, { "acc": 0.94877949, "epoch": 3.5992202095686783, "grad_norm": 7.654374599456787, "learning_rate": 2.729357501231713e-07, "loss": 0.31841168, "memory(GiB)": 34.88, "step": 132930, "train_speed(iter/s)": 0.410861 }, { "acc": 0.94425297, "epoch": 3.599355589851894, "grad_norm": 5.583583354949951, "learning_rate": 2.7275374805450244e-07, "loss": 0.33110726, "memory(GiB)": 34.88, "step": 132935, "train_speed(iter/s)": 0.410862 }, { "acc": 0.94492378, "epoch": 3.5994909701351094, "grad_norm": 11.523846626281738, "learning_rate": 2.7257180521014516e-07, "loss": 0.35603409, "memory(GiB)": 34.88, "step": 132940, "train_speed(iter/s)": 0.410863 }, { "acc": 0.94193783, "epoch": 3.5996263504183252, "grad_norm": 2.74108624458313, "learning_rate": 2.723899215923802e-07, "loss": 0.32778144, "memory(GiB)": 34.88, "step": 132945, "train_speed(iter/s)": 0.410864 }, { "acc": 0.9397398, "epoch": 3.5997617307015406, "grad_norm": 24.73617935180664, "learning_rate": 2.722080972034833e-07, "loss": 0.32306025, "memory(GiB)": 34.88, "step": 132950, "train_speed(iter/s)": 0.410865 }, { "acc": 0.94728594, "epoch": 3.599897110984756, "grad_norm": 4.266007900238037, "learning_rate": 2.72026332045734e-07, "loss": 0.31228623, "memory(GiB)": 34.88, "step": 132955, "train_speed(iter/s)": 0.410865 }, { "acc": 0.94990482, "epoch": 3.6000324912679718, "grad_norm": 26.8706111907959, "learning_rate": 2.718446261214087e-07, "loss": 0.30983946, "memory(GiB)": 34.88, "step": 132960, "train_speed(iter/s)": 0.410866 }, { "acc": 0.94931946, "epoch": 3.6001678715511876, "grad_norm": 7.9027204513549805, "learning_rate": 2.7166297943278417e-07, "loss": 0.34901934, "memory(GiB)": 34.88, "step": 132965, "train_speed(iter/s)": 0.410867 }, { "acc": 0.95928097, "epoch": 3.600303251834403, "grad_norm": 10.07135009765625, "learning_rate": 2.7148139198213445e-07, "loss": 0.26807096, "memory(GiB)": 34.88, "step": 132970, "train_speed(iter/s)": 0.410868 }, { "acc": 0.93975248, "epoch": 3.6004386321176183, "grad_norm": 11.407565116882324, "learning_rate": 2.712998637717364e-07, "loss": 0.418121, "memory(GiB)": 34.88, "step": 132975, "train_speed(iter/s)": 0.410869 }, { "acc": 0.94774055, "epoch": 3.600574012400834, "grad_norm": 6.731228351593018, "learning_rate": 2.71118394803863e-07, "loss": 0.22996078, "memory(GiB)": 34.88, "step": 132980, "train_speed(iter/s)": 0.41087 }, { "acc": 0.95377913, "epoch": 3.6007093926840494, "grad_norm": 6.382848262786865, "learning_rate": 2.709369850807872e-07, "loss": 0.3408524, "memory(GiB)": 34.88, "step": 132985, "train_speed(iter/s)": 0.410871 }, { "acc": 0.95131159, "epoch": 3.600844772967265, "grad_norm": 6.142759799957275, "learning_rate": 2.707556346047808e-07, "loss": 0.25101514, "memory(GiB)": 34.88, "step": 132990, "train_speed(iter/s)": 0.410871 }, { "acc": 0.93957558, "epoch": 3.6009801532504806, "grad_norm": 4.2975850105285645, "learning_rate": 2.705743433781174e-07, "loss": 0.39019518, "memory(GiB)": 34.88, "step": 132995, "train_speed(iter/s)": 0.410872 }, { "acc": 0.95727568, "epoch": 3.6011155335336964, "grad_norm": 4.165458679199219, "learning_rate": 2.7039311140306765e-07, "loss": 0.24030335, "memory(GiB)": 34.88, "step": 133000, "train_speed(iter/s)": 0.410873 }, { "acc": 0.95260181, "epoch": 3.6012509138169118, "grad_norm": 6.686659336090088, "learning_rate": 2.702119386818995e-07, "loss": 0.31014881, "memory(GiB)": 34.88, "step": 133005, "train_speed(iter/s)": 0.410874 }, { "acc": 0.93155937, "epoch": 3.601386294100127, "grad_norm": 5.385747909545898, "learning_rate": 2.70030825216886e-07, "loss": 0.49642591, "memory(GiB)": 34.88, "step": 133010, "train_speed(iter/s)": 0.410875 }, { "acc": 0.94506063, "epoch": 3.601521674383343, "grad_norm": 4.385272979736328, "learning_rate": 2.6984977101029337e-07, "loss": 0.32616024, "memory(GiB)": 34.88, "step": 133015, "train_speed(iter/s)": 0.410876 }, { "acc": 0.94394951, "epoch": 3.6016570546665583, "grad_norm": 8.163504600524902, "learning_rate": 2.696687760643918e-07, "loss": 0.34780602, "memory(GiB)": 34.88, "step": 133020, "train_speed(iter/s)": 0.410876 }, { "acc": 0.95722961, "epoch": 3.601792434949774, "grad_norm": 8.518964767456055, "learning_rate": 2.6948784038144584e-07, "loss": 0.28424101, "memory(GiB)": 34.88, "step": 133025, "train_speed(iter/s)": 0.410877 }, { "acc": 0.94943161, "epoch": 3.6019278152329894, "grad_norm": 4.409363269805908, "learning_rate": 2.693069639637242e-07, "loss": 0.30639486, "memory(GiB)": 34.88, "step": 133030, "train_speed(iter/s)": 0.410878 }, { "acc": 0.94827414, "epoch": 3.6020631955162052, "grad_norm": 3.836520195007324, "learning_rate": 2.69126146813493e-07, "loss": 0.30331595, "memory(GiB)": 34.88, "step": 133035, "train_speed(iter/s)": 0.410879 }, { "acc": 0.95118694, "epoch": 3.6021985757994206, "grad_norm": 7.907037734985352, "learning_rate": 2.689453889330147e-07, "loss": 0.30640388, "memory(GiB)": 34.88, "step": 133040, "train_speed(iter/s)": 0.41088 }, { "acc": 0.94277811, "epoch": 3.602333956082636, "grad_norm": 4.901843070983887, "learning_rate": 2.6876469032455677e-07, "loss": 0.34310484, "memory(GiB)": 34.88, "step": 133045, "train_speed(iter/s)": 0.410881 }, { "acc": 0.9358017, "epoch": 3.6024693363658518, "grad_norm": 7.1299309730529785, "learning_rate": 2.6858405099038037e-07, "loss": 0.37972934, "memory(GiB)": 34.88, "step": 133050, "train_speed(iter/s)": 0.410881 }, { "acc": 0.94042263, "epoch": 3.602604716649067, "grad_norm": 5.504925727844238, "learning_rate": 2.6840347093275086e-07, "loss": 0.40395269, "memory(GiB)": 34.88, "step": 133055, "train_speed(iter/s)": 0.410882 }, { "acc": 0.93515282, "epoch": 3.602740096932283, "grad_norm": 3.4335765838623047, "learning_rate": 2.682229501539277e-07, "loss": 0.43533783, "memory(GiB)": 34.88, "step": 133060, "train_speed(iter/s)": 0.410883 }, { "acc": 0.93487253, "epoch": 3.6028754772154983, "grad_norm": 9.017019271850586, "learning_rate": 2.6804248865617396e-07, "loss": 0.43010197, "memory(GiB)": 34.88, "step": 133065, "train_speed(iter/s)": 0.410884 }, { "acc": 0.93621845, "epoch": 3.603010857498714, "grad_norm": 8.8479642868042, "learning_rate": 2.6786208644175035e-07, "loss": 0.36964543, "memory(GiB)": 34.88, "step": 133070, "train_speed(iter/s)": 0.410885 }, { "acc": 0.94725361, "epoch": 3.6031462377819294, "grad_norm": 8.4210205078125, "learning_rate": 2.676817435129148e-07, "loss": 0.32801285, "memory(GiB)": 34.88, "step": 133075, "train_speed(iter/s)": 0.410885 }, { "acc": 0.93642387, "epoch": 3.603281618065145, "grad_norm": 13.497629165649414, "learning_rate": 2.6750145987192924e-07, "loss": 0.43969793, "memory(GiB)": 34.88, "step": 133080, "train_speed(iter/s)": 0.410886 }, { "acc": 0.95525198, "epoch": 3.6034169983483606, "grad_norm": 9.204130172729492, "learning_rate": 2.673212355210499e-07, "loss": 0.29278853, "memory(GiB)": 34.88, "step": 133085, "train_speed(iter/s)": 0.410887 }, { "acc": 0.93877964, "epoch": 3.603552378631576, "grad_norm": 6.953423500061035, "learning_rate": 2.6714107046253596e-07, "loss": 0.38429003, "memory(GiB)": 34.88, "step": 133090, "train_speed(iter/s)": 0.410888 }, { "acc": 0.94751911, "epoch": 3.6036877589147918, "grad_norm": 4.122952938079834, "learning_rate": 2.6696096469864247e-07, "loss": 0.33723881, "memory(GiB)": 34.88, "step": 133095, "train_speed(iter/s)": 0.410889 }, { "acc": 0.95290956, "epoch": 3.603823139198007, "grad_norm": 4.749343395233154, "learning_rate": 2.667809182316281e-07, "loss": 0.26814759, "memory(GiB)": 34.88, "step": 133100, "train_speed(iter/s)": 0.410889 }, { "acc": 0.94892368, "epoch": 3.603958519481223, "grad_norm": 11.452239036560059, "learning_rate": 2.666009310637468e-07, "loss": 0.32682245, "memory(GiB)": 34.88, "step": 133105, "train_speed(iter/s)": 0.41089 }, { "acc": 0.93910837, "epoch": 3.6040938997644383, "grad_norm": 4.666625499725342, "learning_rate": 2.664210031972522e-07, "loss": 0.42865191, "memory(GiB)": 34.88, "step": 133110, "train_speed(iter/s)": 0.410891 }, { "acc": 0.94473391, "epoch": 3.6042292800476536, "grad_norm": 6.869513988494873, "learning_rate": 2.6624113463440054e-07, "loss": 0.37709761, "memory(GiB)": 34.88, "step": 133115, "train_speed(iter/s)": 0.410892 }, { "acc": 0.9631073, "epoch": 3.6043646603308694, "grad_norm": 6.9919023513793945, "learning_rate": 2.6606132537744373e-07, "loss": 0.18538827, "memory(GiB)": 34.88, "step": 133120, "train_speed(iter/s)": 0.410893 }, { "acc": 0.94650888, "epoch": 3.604500040614085, "grad_norm": 12.893994331359863, "learning_rate": 2.6588157542863524e-07, "loss": 0.35031457, "memory(GiB)": 34.88, "step": 133125, "train_speed(iter/s)": 0.410894 }, { "acc": 0.94873171, "epoch": 3.6046354208973006, "grad_norm": 5.241409778594971, "learning_rate": 2.657018847902242e-07, "loss": 0.31781015, "memory(GiB)": 34.88, "step": 133130, "train_speed(iter/s)": 0.410894 }, { "acc": 0.94844236, "epoch": 3.604770801180516, "grad_norm": 8.46733570098877, "learning_rate": 2.6552225346446525e-07, "loss": 0.33839412, "memory(GiB)": 34.88, "step": 133135, "train_speed(iter/s)": 0.410895 }, { "acc": 0.94834023, "epoch": 3.6049061814637318, "grad_norm": 9.974186897277832, "learning_rate": 2.653426814536058e-07, "loss": 0.25024805, "memory(GiB)": 34.88, "step": 133140, "train_speed(iter/s)": 0.410896 }, { "acc": 0.96663513, "epoch": 3.605041561746947, "grad_norm": 4.137170791625977, "learning_rate": 2.651631687598966e-07, "loss": 0.18011124, "memory(GiB)": 34.88, "step": 133145, "train_speed(iter/s)": 0.410897 }, { "acc": 0.94467306, "epoch": 3.6051769420301625, "grad_norm": 5.6877360343933105, "learning_rate": 2.6498371538558614e-07, "loss": 0.40925803, "memory(GiB)": 34.88, "step": 133150, "train_speed(iter/s)": 0.410898 }, { "acc": 0.95318995, "epoch": 3.6053123223133783, "grad_norm": 5.753926753997803, "learning_rate": 2.648043213329225e-07, "loss": 0.33266034, "memory(GiB)": 34.88, "step": 133155, "train_speed(iter/s)": 0.410898 }, { "acc": 0.94441929, "epoch": 3.605447702596594, "grad_norm": 16.18844985961914, "learning_rate": 2.6462498660415295e-07, "loss": 0.31695096, "memory(GiB)": 34.88, "step": 133160, "train_speed(iter/s)": 0.410899 }, { "acc": 0.95192833, "epoch": 3.6055830828798094, "grad_norm": 7.612163543701172, "learning_rate": 2.644457112015228e-07, "loss": 0.31008475, "memory(GiB)": 34.88, "step": 133165, "train_speed(iter/s)": 0.4109 }, { "acc": 0.95101185, "epoch": 3.605718463163025, "grad_norm": 3.245701789855957, "learning_rate": 2.642664951272806e-07, "loss": 0.28805356, "memory(GiB)": 34.88, "step": 133170, "train_speed(iter/s)": 0.410901 }, { "acc": 0.94868984, "epoch": 3.6058538434462406, "grad_norm": 4.526351451873779, "learning_rate": 2.6408733838366877e-07, "loss": 0.31056318, "memory(GiB)": 34.88, "step": 133175, "train_speed(iter/s)": 0.410902 }, { "acc": 0.95163345, "epoch": 3.605989223729456, "grad_norm": 4.378484725952148, "learning_rate": 2.6390824097293244e-07, "loss": 0.30069416, "memory(GiB)": 34.88, "step": 133180, "train_speed(iter/s)": 0.410902 }, { "acc": 0.93987808, "epoch": 3.6061246040126713, "grad_norm": 6.426170825958252, "learning_rate": 2.637292028973163e-07, "loss": 0.31737788, "memory(GiB)": 34.88, "step": 133185, "train_speed(iter/s)": 0.410903 }, { "acc": 0.95207672, "epoch": 3.606259984295887, "grad_norm": 5.426371097564697, "learning_rate": 2.635502241590611e-07, "loss": 0.28686738, "memory(GiB)": 34.88, "step": 133190, "train_speed(iter/s)": 0.410904 }, { "acc": 0.94753571, "epoch": 3.606395364579103, "grad_norm": 5.191125869750977, "learning_rate": 2.6337130476041094e-07, "loss": 0.33891964, "memory(GiB)": 34.88, "step": 133195, "train_speed(iter/s)": 0.410905 }, { "acc": 0.96151018, "epoch": 3.6065307448623183, "grad_norm": 16.305835723876953, "learning_rate": 2.6319244470360495e-07, "loss": 0.21958263, "memory(GiB)": 34.88, "step": 133200, "train_speed(iter/s)": 0.410906 }, { "acc": 0.95932217, "epoch": 3.6066661251455336, "grad_norm": 8.672510147094727, "learning_rate": 2.6301364399088546e-07, "loss": 0.25726907, "memory(GiB)": 34.88, "step": 133205, "train_speed(iter/s)": 0.410907 }, { "acc": 0.9639185, "epoch": 3.6068015054287494, "grad_norm": 11.411568641662598, "learning_rate": 2.628349026244917e-07, "loss": 0.21120143, "memory(GiB)": 34.88, "step": 133210, "train_speed(iter/s)": 0.410907 }, { "acc": 0.95516701, "epoch": 3.606936885711965, "grad_norm": 5.593727111816406, "learning_rate": 2.626562206066626e-07, "loss": 0.29884601, "memory(GiB)": 34.88, "step": 133215, "train_speed(iter/s)": 0.410908 }, { "acc": 0.95482063, "epoch": 3.6070722659951806, "grad_norm": 7.130204200744629, "learning_rate": 2.6247759793963633e-07, "loss": 0.27995062, "memory(GiB)": 34.88, "step": 133220, "train_speed(iter/s)": 0.410909 }, { "acc": 0.96852531, "epoch": 3.607207646278396, "grad_norm": 6.670464038848877, "learning_rate": 2.622990346256519e-07, "loss": 0.2129951, "memory(GiB)": 34.88, "step": 133225, "train_speed(iter/s)": 0.41091 }, { "acc": 0.94104137, "epoch": 3.6073430265616118, "grad_norm": 5.106521129608154, "learning_rate": 2.62120530666945e-07, "loss": 0.33663011, "memory(GiB)": 34.88, "step": 133230, "train_speed(iter/s)": 0.410911 }, { "acc": 0.96154737, "epoch": 3.607478406844827, "grad_norm": 2.2678639888763428, "learning_rate": 2.6194208606575096e-07, "loss": 0.16707964, "memory(GiB)": 34.88, "step": 133235, "train_speed(iter/s)": 0.410912 }, { "acc": 0.95868921, "epoch": 3.6076137871280425, "grad_norm": 10.52089786529541, "learning_rate": 2.617637008243066e-07, "loss": 0.23224194, "memory(GiB)": 34.88, "step": 133240, "train_speed(iter/s)": 0.410912 }, { "acc": 0.94530678, "epoch": 3.6077491674112583, "grad_norm": 3.6459763050079346, "learning_rate": 2.61585374944846e-07, "loss": 0.33536375, "memory(GiB)": 34.88, "step": 133245, "train_speed(iter/s)": 0.410913 }, { "acc": 0.94246559, "epoch": 3.6078845476944736, "grad_norm": 9.281983375549316, "learning_rate": 2.614071084296027e-07, "loss": 0.32066867, "memory(GiB)": 34.88, "step": 133250, "train_speed(iter/s)": 0.410914 }, { "acc": 0.95055542, "epoch": 3.6080199279776894, "grad_norm": 4.387713432312012, "learning_rate": 2.612289012808098e-07, "loss": 0.32325015, "memory(GiB)": 34.88, "step": 133255, "train_speed(iter/s)": 0.410915 }, { "acc": 0.93979321, "epoch": 3.608155308260905, "grad_norm": 3.4206838607788086, "learning_rate": 2.6105075350070135e-07, "loss": 0.40144234, "memory(GiB)": 34.88, "step": 133260, "train_speed(iter/s)": 0.410916 }, { "acc": 0.94890079, "epoch": 3.6082906885441206, "grad_norm": 7.59712553024292, "learning_rate": 2.608726650915069e-07, "loss": 0.30553408, "memory(GiB)": 34.88, "step": 133265, "train_speed(iter/s)": 0.410917 }, { "acc": 0.94549999, "epoch": 3.608426068827336, "grad_norm": 5.852583408355713, "learning_rate": 2.606946360554579e-07, "loss": 0.34481044, "memory(GiB)": 34.88, "step": 133270, "train_speed(iter/s)": 0.410918 }, { "acc": 0.95343361, "epoch": 3.6085614491105513, "grad_norm": 13.217802047729492, "learning_rate": 2.6051666639478563e-07, "loss": 0.30305114, "memory(GiB)": 34.88, "step": 133275, "train_speed(iter/s)": 0.410918 }, { "acc": 0.942451, "epoch": 3.608696829393767, "grad_norm": 3.263184070587158, "learning_rate": 2.603387561117181e-07, "loss": 0.35804763, "memory(GiB)": 34.88, "step": 133280, "train_speed(iter/s)": 0.410919 }, { "acc": 0.96334305, "epoch": 3.6088322096769825, "grad_norm": 3.3149659633636475, "learning_rate": 2.6016090520848437e-07, "loss": 0.21396263, "memory(GiB)": 34.88, "step": 133285, "train_speed(iter/s)": 0.41092 }, { "acc": 0.93175745, "epoch": 3.6089675899601983, "grad_norm": 7.899264335632324, "learning_rate": 2.59983113687313e-07, "loss": 0.4494833, "memory(GiB)": 34.88, "step": 133290, "train_speed(iter/s)": 0.410921 }, { "acc": 0.9585, "epoch": 3.6091029702434136, "grad_norm": 5.2804975509643555, "learning_rate": 2.59805381550431e-07, "loss": 0.25230758, "memory(GiB)": 34.88, "step": 133295, "train_speed(iter/s)": 0.410922 }, { "acc": 0.95168343, "epoch": 3.6092383505266294, "grad_norm": 3.552686929702759, "learning_rate": 2.5962770880006396e-07, "loss": 0.32557878, "memory(GiB)": 34.88, "step": 133300, "train_speed(iter/s)": 0.410923 }, { "acc": 0.94505157, "epoch": 3.609373730809845, "grad_norm": 6.394278049468994, "learning_rate": 2.5945009543843774e-07, "loss": 0.33718333, "memory(GiB)": 34.88, "step": 133305, "train_speed(iter/s)": 0.410924 }, { "acc": 0.94302149, "epoch": 3.60950911109306, "grad_norm": 12.81017017364502, "learning_rate": 2.5927254146777815e-07, "loss": 0.32997632, "memory(GiB)": 34.88, "step": 133310, "train_speed(iter/s)": 0.410924 }, { "acc": 0.9552783, "epoch": 3.609644491376276, "grad_norm": 6.754253387451172, "learning_rate": 2.590950468903098e-07, "loss": 0.27950301, "memory(GiB)": 34.88, "step": 133315, "train_speed(iter/s)": 0.410925 }, { "acc": 0.93572006, "epoch": 3.6097798716594918, "grad_norm": 11.31521987915039, "learning_rate": 2.5891761170825513e-07, "loss": 0.42075624, "memory(GiB)": 34.88, "step": 133320, "train_speed(iter/s)": 0.410926 }, { "acc": 0.94910278, "epoch": 3.609915251942707, "grad_norm": 3.6793177127838135, "learning_rate": 2.5874023592383555e-07, "loss": 0.26421149, "memory(GiB)": 34.88, "step": 133325, "train_speed(iter/s)": 0.410927 }, { "acc": 0.9453577, "epoch": 3.6100506322259225, "grad_norm": 10.137313842773438, "learning_rate": 2.585629195392762e-07, "loss": 0.3413568, "memory(GiB)": 34.88, "step": 133330, "train_speed(iter/s)": 0.410928 }, { "acc": 0.95289364, "epoch": 3.6101860125091383, "grad_norm": 3.425596237182617, "learning_rate": 2.583856625567956e-07, "loss": 0.28398914, "memory(GiB)": 34.88, "step": 133335, "train_speed(iter/s)": 0.410928 }, { "acc": 0.95476933, "epoch": 3.6103213927923536, "grad_norm": 7.7489705085754395, "learning_rate": 2.582084649786157e-07, "loss": 0.24091446, "memory(GiB)": 34.88, "step": 133340, "train_speed(iter/s)": 0.410929 }, { "acc": 0.94391012, "epoch": 3.610456773075569, "grad_norm": 12.539666175842285, "learning_rate": 2.580313268069557e-07, "loss": 0.3266201, "memory(GiB)": 34.88, "step": 133345, "train_speed(iter/s)": 0.41093 }, { "acc": 0.94696188, "epoch": 3.610592153358785, "grad_norm": 10.905073165893555, "learning_rate": 2.5785424804403576e-07, "loss": 0.36302037, "memory(GiB)": 34.88, "step": 133350, "train_speed(iter/s)": 0.410931 }, { "acc": 0.94345694, "epoch": 3.6107275336420006, "grad_norm": 5.903340816497803, "learning_rate": 2.576772286920727e-07, "loss": 0.36863871, "memory(GiB)": 34.88, "step": 133355, "train_speed(iter/s)": 0.410932 }, { "acc": 0.94855537, "epoch": 3.610862913925216, "grad_norm": 8.9169340133667, "learning_rate": 2.5750026875328344e-07, "loss": 0.28576963, "memory(GiB)": 34.88, "step": 133360, "train_speed(iter/s)": 0.410933 }, { "acc": 0.95499763, "epoch": 3.6109982942084313, "grad_norm": 9.933268547058105, "learning_rate": 2.573233682298866e-07, "loss": 0.30275974, "memory(GiB)": 34.88, "step": 133365, "train_speed(iter/s)": 0.410933 }, { "acc": 0.94993935, "epoch": 3.611133674491647, "grad_norm": 25.725521087646484, "learning_rate": 2.5714652712409737e-07, "loss": 0.29698391, "memory(GiB)": 34.88, "step": 133370, "train_speed(iter/s)": 0.410934 }, { "acc": 0.93804598, "epoch": 3.6112690547748625, "grad_norm": 11.685479164123535, "learning_rate": 2.56969745438131e-07, "loss": 0.37116218, "memory(GiB)": 34.88, "step": 133375, "train_speed(iter/s)": 0.410935 }, { "acc": 0.94360123, "epoch": 3.6114044350580783, "grad_norm": 3.9028289318084717, "learning_rate": 2.567930231742015e-07, "loss": 0.32507617, "memory(GiB)": 34.88, "step": 133380, "train_speed(iter/s)": 0.410936 }, { "acc": 0.95285721, "epoch": 3.6115398153412936, "grad_norm": 9.284369468688965, "learning_rate": 2.5661636033452434e-07, "loss": 0.27417538, "memory(GiB)": 34.88, "step": 133385, "train_speed(iter/s)": 0.410937 }, { "acc": 0.94692879, "epoch": 3.6116751956245094, "grad_norm": 8.60568904876709, "learning_rate": 2.564397569213111e-07, "loss": 0.3453135, "memory(GiB)": 34.88, "step": 133390, "train_speed(iter/s)": 0.410938 }, { "acc": 0.94415455, "epoch": 3.611810575907725, "grad_norm": 7.131584644317627, "learning_rate": 2.5626321293677397e-07, "loss": 0.3241055, "memory(GiB)": 34.88, "step": 133395, "train_speed(iter/s)": 0.410938 }, { "acc": 0.95370636, "epoch": 3.61194595619094, "grad_norm": 10.222129821777344, "learning_rate": 2.560867283831258e-07, "loss": 0.27996325, "memory(GiB)": 34.88, "step": 133400, "train_speed(iter/s)": 0.410939 }, { "acc": 0.95303164, "epoch": 3.612081336474156, "grad_norm": 13.5027494430542, "learning_rate": 2.559103032625763e-07, "loss": 0.27743101, "memory(GiB)": 34.88, "step": 133405, "train_speed(iter/s)": 0.41094 }, { "acc": 0.94493074, "epoch": 3.6122167167573713, "grad_norm": 5.4390082359313965, "learning_rate": 2.557339375773352e-07, "loss": 0.34110222, "memory(GiB)": 34.88, "step": 133410, "train_speed(iter/s)": 0.410941 }, { "acc": 0.96157513, "epoch": 3.612352097040587, "grad_norm": 6.137064456939697, "learning_rate": 2.5555763132961316e-07, "loss": 0.22396474, "memory(GiB)": 34.88, "step": 133415, "train_speed(iter/s)": 0.410942 }, { "acc": 0.94752293, "epoch": 3.6124874773238025, "grad_norm": 8.257439613342285, "learning_rate": 2.5538138452161834e-07, "loss": 0.32175782, "memory(GiB)": 34.88, "step": 133420, "train_speed(iter/s)": 0.410943 }, { "acc": 0.95199242, "epoch": 3.6126228576070183, "grad_norm": 6.313614845275879, "learning_rate": 2.552051971555575e-07, "loss": 0.25227289, "memory(GiB)": 34.88, "step": 133425, "train_speed(iter/s)": 0.410943 }, { "acc": 0.95540752, "epoch": 3.6127582378902336, "grad_norm": 3.737423896789551, "learning_rate": 2.550290692336388e-07, "loss": 0.27269604, "memory(GiB)": 34.88, "step": 133430, "train_speed(iter/s)": 0.410944 }, { "acc": 0.9513586, "epoch": 3.612893618173449, "grad_norm": 7.351300239562988, "learning_rate": 2.5485300075806845e-07, "loss": 0.2704057, "memory(GiB)": 34.88, "step": 133435, "train_speed(iter/s)": 0.410945 }, { "acc": 0.9364893, "epoch": 3.613028998456665, "grad_norm": 10.506299018859863, "learning_rate": 2.546769917310523e-07, "loss": 0.37822473, "memory(GiB)": 34.88, "step": 133440, "train_speed(iter/s)": 0.410946 }, { "acc": 0.94568672, "epoch": 3.61316437873988, "grad_norm": 10.270578384399414, "learning_rate": 2.5450104215479456e-07, "loss": 0.33884687, "memory(GiB)": 34.88, "step": 133445, "train_speed(iter/s)": 0.410947 }, { "acc": 0.94699068, "epoch": 3.613299759023096, "grad_norm": 11.245105743408203, "learning_rate": 2.5432515203149927e-07, "loss": 0.38668351, "memory(GiB)": 34.88, "step": 133450, "train_speed(iter/s)": 0.410947 }, { "acc": 0.94850197, "epoch": 3.6134351393063113, "grad_norm": 4.817263126373291, "learning_rate": 2.5414932136337105e-07, "loss": 0.33418698, "memory(GiB)": 34.88, "step": 133455, "train_speed(iter/s)": 0.410948 }, { "acc": 0.94843636, "epoch": 3.613570519589527, "grad_norm": 6.584051132202148, "learning_rate": 2.5397355015261136e-07, "loss": 0.29316773, "memory(GiB)": 34.88, "step": 133460, "train_speed(iter/s)": 0.410949 }, { "acc": 0.94117537, "epoch": 3.6137058998727425, "grad_norm": 6.17588996887207, "learning_rate": 2.5379783840142157e-07, "loss": 0.35295005, "memory(GiB)": 34.88, "step": 133465, "train_speed(iter/s)": 0.41095 }, { "acc": 0.95721703, "epoch": 3.613841280155958, "grad_norm": 14.200312614440918, "learning_rate": 2.53622186112004e-07, "loss": 0.24583194, "memory(GiB)": 34.88, "step": 133470, "train_speed(iter/s)": 0.410951 }, { "acc": 0.94379091, "epoch": 3.6139766604391736, "grad_norm": 7.149310111999512, "learning_rate": 2.534465932865596e-07, "loss": 0.33131714, "memory(GiB)": 34.88, "step": 133475, "train_speed(iter/s)": 0.410952 }, { "acc": 0.9611577, "epoch": 3.6141120407223895, "grad_norm": 5.343657970428467, "learning_rate": 2.532710599272858e-07, "loss": 0.19609144, "memory(GiB)": 34.88, "step": 133480, "train_speed(iter/s)": 0.410952 }, { "acc": 0.94521027, "epoch": 3.614247421005605, "grad_norm": 9.822343826293945, "learning_rate": 2.5309558603638333e-07, "loss": 0.37350485, "memory(GiB)": 34.88, "step": 133485, "train_speed(iter/s)": 0.410953 }, { "acc": 0.94506388, "epoch": 3.61438280128882, "grad_norm": 9.436114311218262, "learning_rate": 2.5292017161605024e-07, "loss": 0.35077999, "memory(GiB)": 34.88, "step": 133490, "train_speed(iter/s)": 0.410954 }, { "acc": 0.93827734, "epoch": 3.614518181572036, "grad_norm": 8.37936782836914, "learning_rate": 2.5274481666848234e-07, "loss": 0.40304551, "memory(GiB)": 34.88, "step": 133495, "train_speed(iter/s)": 0.410955 }, { "acc": 0.95541077, "epoch": 3.6146535618552513, "grad_norm": 5.062331199645996, "learning_rate": 2.525695211958783e-07, "loss": 0.25878577, "memory(GiB)": 34.88, "step": 133500, "train_speed(iter/s)": 0.410956 }, { "acc": 0.94782667, "epoch": 3.6147889421384667, "grad_norm": 7.129881381988525, "learning_rate": 2.5239428520043264e-07, "loss": 0.33202212, "memory(GiB)": 34.88, "step": 133505, "train_speed(iter/s)": 0.410956 }, { "acc": 0.94826336, "epoch": 3.6149243224216825, "grad_norm": 16.41653823852539, "learning_rate": 2.5221910868434175e-07, "loss": 0.29342866, "memory(GiB)": 34.88, "step": 133510, "train_speed(iter/s)": 0.410957 }, { "acc": 0.94696989, "epoch": 3.6150597027048983, "grad_norm": 4.844653606414795, "learning_rate": 2.5204399164979883e-07, "loss": 0.32024121, "memory(GiB)": 34.88, "step": 133515, "train_speed(iter/s)": 0.410958 }, { "acc": 0.95168571, "epoch": 3.6151950829881137, "grad_norm": 9.561872482299805, "learning_rate": 2.518689340989978e-07, "loss": 0.32163358, "memory(GiB)": 34.88, "step": 133520, "train_speed(iter/s)": 0.410959 }, { "acc": 0.94711504, "epoch": 3.615330463271329, "grad_norm": 9.219422340393066, "learning_rate": 2.516939360341329e-07, "loss": 0.32989116, "memory(GiB)": 34.88, "step": 133525, "train_speed(iter/s)": 0.41096 }, { "acc": 0.93376389, "epoch": 3.615465843554545, "grad_norm": 8.146677017211914, "learning_rate": 2.5151899745739444e-07, "loss": 0.4723598, "memory(GiB)": 34.88, "step": 133530, "train_speed(iter/s)": 0.41096 }, { "acc": 0.94389629, "epoch": 3.61560122383776, "grad_norm": 7.45352840423584, "learning_rate": 2.5134411837097474e-07, "loss": 0.36518545, "memory(GiB)": 34.88, "step": 133535, "train_speed(iter/s)": 0.410961 }, { "acc": 0.93738003, "epoch": 3.615736604120976, "grad_norm": 16.8729305267334, "learning_rate": 2.5116929877706473e-07, "loss": 0.36373944, "memory(GiB)": 34.88, "step": 133540, "train_speed(iter/s)": 0.410962 }, { "acc": 0.93710928, "epoch": 3.6158719844041913, "grad_norm": 6.769400119781494, "learning_rate": 2.5099453867785516e-07, "loss": 0.38487129, "memory(GiB)": 34.88, "step": 133545, "train_speed(iter/s)": 0.410963 }, { "acc": 0.94074211, "epoch": 3.616007364687407, "grad_norm": 6.90553092956543, "learning_rate": 2.5081983807553296e-07, "loss": 0.35196993, "memory(GiB)": 34.88, "step": 133550, "train_speed(iter/s)": 0.410964 }, { "acc": 0.95058193, "epoch": 3.6161427449706225, "grad_norm": 5.7827959060668945, "learning_rate": 2.506451969722884e-07, "loss": 0.32315216, "memory(GiB)": 34.88, "step": 133555, "train_speed(iter/s)": 0.410965 }, { "acc": 0.94129066, "epoch": 3.616278125253838, "grad_norm": 9.772099494934082, "learning_rate": 2.5047061537030835e-07, "loss": 0.38414655, "memory(GiB)": 34.88, "step": 133560, "train_speed(iter/s)": 0.410965 }, { "acc": 0.95446692, "epoch": 3.6164135055370537, "grad_norm": 3.390871524810791, "learning_rate": 2.502960932717809e-07, "loss": 0.26046972, "memory(GiB)": 34.88, "step": 133565, "train_speed(iter/s)": 0.410966 }, { "acc": 0.93298073, "epoch": 3.616548885820269, "grad_norm": 5.22063684463501, "learning_rate": 2.501216306788913e-07, "loss": 0.38944869, "memory(GiB)": 34.88, "step": 133570, "train_speed(iter/s)": 0.410967 }, { "acc": 0.94671516, "epoch": 3.616684266103485, "grad_norm": 18.725622177124023, "learning_rate": 2.4994722759382477e-07, "loss": 0.34308507, "memory(GiB)": 34.88, "step": 133575, "train_speed(iter/s)": 0.410968 }, { "acc": 0.95649567, "epoch": 3.6168196463867, "grad_norm": 7.513972282409668, "learning_rate": 2.497728840187671e-07, "loss": 0.2513973, "memory(GiB)": 34.88, "step": 133580, "train_speed(iter/s)": 0.410969 }, { "acc": 0.95001364, "epoch": 3.616955026669916, "grad_norm": 7.148796558380127, "learning_rate": 2.495985999559014e-07, "loss": 0.32616878, "memory(GiB)": 34.88, "step": 133585, "train_speed(iter/s)": 0.41097 }, { "acc": 0.9479188, "epoch": 3.6170904069531313, "grad_norm": 7.771404266357422, "learning_rate": 2.494243754074107e-07, "loss": 0.28150661, "memory(GiB)": 34.88, "step": 133590, "train_speed(iter/s)": 0.410971 }, { "acc": 0.96439781, "epoch": 3.6172257872363467, "grad_norm": 3.172488212585449, "learning_rate": 2.492502103754785e-07, "loss": 0.22633786, "memory(GiB)": 34.88, "step": 133595, "train_speed(iter/s)": 0.410971 }, { "acc": 0.95529289, "epoch": 3.6173611675195625, "grad_norm": 7.212233066558838, "learning_rate": 2.490761048622868e-07, "loss": 0.23687592, "memory(GiB)": 34.88, "step": 133600, "train_speed(iter/s)": 0.410972 }, { "acc": 0.93714771, "epoch": 3.617496547802778, "grad_norm": 4.724623680114746, "learning_rate": 2.4890205887001476e-07, "loss": 0.38602161, "memory(GiB)": 34.88, "step": 133605, "train_speed(iter/s)": 0.410973 }, { "acc": 0.94739866, "epoch": 3.6176319280859937, "grad_norm": 7.2025837898254395, "learning_rate": 2.4872807240084423e-07, "loss": 0.32063613, "memory(GiB)": 34.88, "step": 133610, "train_speed(iter/s)": 0.410974 }, { "acc": 0.95609579, "epoch": 3.617767308369209, "grad_norm": 5.548704147338867, "learning_rate": 2.48554145456955e-07, "loss": 0.22982419, "memory(GiB)": 34.88, "step": 133615, "train_speed(iter/s)": 0.410975 }, { "acc": 0.94605446, "epoch": 3.617902688652425, "grad_norm": 4.8640875816345215, "learning_rate": 2.483802780405245e-07, "loss": 0.35111084, "memory(GiB)": 34.88, "step": 133620, "train_speed(iter/s)": 0.410976 }, { "acc": 0.94999561, "epoch": 3.61803806893564, "grad_norm": 6.0838236808776855, "learning_rate": 2.482064701537314e-07, "loss": 0.2997205, "memory(GiB)": 34.88, "step": 133625, "train_speed(iter/s)": 0.410977 }, { "acc": 0.93651752, "epoch": 3.6181734492188555, "grad_norm": 9.013007164001465, "learning_rate": 2.48032721798753e-07, "loss": 0.37084355, "memory(GiB)": 34.88, "step": 133630, "train_speed(iter/s)": 0.410977 }, { "acc": 0.95369005, "epoch": 3.6183088295020713, "grad_norm": 2.8832597732543945, "learning_rate": 2.478590329777665e-07, "loss": 0.25924759, "memory(GiB)": 34.88, "step": 133635, "train_speed(iter/s)": 0.410978 }, { "acc": 0.9454689, "epoch": 3.618444209785287, "grad_norm": 4.633401870727539, "learning_rate": 2.476854036929453e-07, "loss": 0.35056205, "memory(GiB)": 34.88, "step": 133640, "train_speed(iter/s)": 0.410979 }, { "acc": 0.95494022, "epoch": 3.6185795900685025, "grad_norm": 10.543537139892578, "learning_rate": 2.47511833946468e-07, "loss": 0.30520005, "memory(GiB)": 34.88, "step": 133645, "train_speed(iter/s)": 0.41098 }, { "acc": 0.95052357, "epoch": 3.618714970351718, "grad_norm": 6.346903324127197, "learning_rate": 2.4733832374050664e-07, "loss": 0.29026484, "memory(GiB)": 34.88, "step": 133650, "train_speed(iter/s)": 0.410981 }, { "acc": 0.92778778, "epoch": 3.6188503506349337, "grad_norm": 4.689083576202393, "learning_rate": 2.4716487307723473e-07, "loss": 0.46664677, "memory(GiB)": 34.88, "step": 133655, "train_speed(iter/s)": 0.410982 }, { "acc": 0.94638119, "epoch": 3.618985730918149, "grad_norm": 3.938668966293335, "learning_rate": 2.4699148195882593e-07, "loss": 0.35850005, "memory(GiB)": 34.88, "step": 133660, "train_speed(iter/s)": 0.410983 }, { "acc": 0.95178919, "epoch": 3.6191211112013644, "grad_norm": 4.949465274810791, "learning_rate": 2.4681815038745155e-07, "loss": 0.28586087, "memory(GiB)": 34.88, "step": 133665, "train_speed(iter/s)": 0.410983 }, { "acc": 0.95304432, "epoch": 3.61925649148458, "grad_norm": 4.676052093505859, "learning_rate": 2.466448783652842e-07, "loss": 0.27591233, "memory(GiB)": 34.88, "step": 133670, "train_speed(iter/s)": 0.410984 }, { "acc": 0.96130657, "epoch": 3.619391871767796, "grad_norm": 6.463160514831543, "learning_rate": 2.464716658944924e-07, "loss": 0.22847888, "memory(GiB)": 34.88, "step": 133675, "train_speed(iter/s)": 0.410985 }, { "acc": 0.94755974, "epoch": 3.6195272520510113, "grad_norm": 8.978522300720215, "learning_rate": 2.4629851297724804e-07, "loss": 0.31553304, "memory(GiB)": 34.88, "step": 133680, "train_speed(iter/s)": 0.410986 }, { "acc": 0.95756149, "epoch": 3.6196626323342267, "grad_norm": 10.471362113952637, "learning_rate": 2.461254196157186e-07, "loss": 0.23682129, "memory(GiB)": 34.88, "step": 133685, "train_speed(iter/s)": 0.410987 }, { "acc": 0.95926743, "epoch": 3.6197980126174425, "grad_norm": 12.627629280090332, "learning_rate": 2.4595238581207396e-07, "loss": 0.28605137, "memory(GiB)": 34.88, "step": 133690, "train_speed(iter/s)": 0.410988 }, { "acc": 0.9524024, "epoch": 3.619933392900658, "grad_norm": 10.471691131591797, "learning_rate": 2.457794115684804e-07, "loss": 0.29558573, "memory(GiB)": 34.88, "step": 133695, "train_speed(iter/s)": 0.410988 }, { "acc": 0.94095354, "epoch": 3.6200687731838737, "grad_norm": 4.433468341827393, "learning_rate": 2.4560649688710484e-07, "loss": 0.32275929, "memory(GiB)": 34.88, "step": 133700, "train_speed(iter/s)": 0.410989 }, { "acc": 0.9470252, "epoch": 3.620204153467089, "grad_norm": 7.480060577392578, "learning_rate": 2.454336417701148e-07, "loss": 0.28256359, "memory(GiB)": 34.88, "step": 133705, "train_speed(iter/s)": 0.41099 }, { "acc": 0.9472044, "epoch": 3.620339533750305, "grad_norm": 5.796772480010986, "learning_rate": 2.4526084621967277e-07, "loss": 0.3038758, "memory(GiB)": 34.88, "step": 133710, "train_speed(iter/s)": 0.410991 }, { "acc": 0.95466213, "epoch": 3.62047491403352, "grad_norm": 6.115302562713623, "learning_rate": 2.450881102379468e-07, "loss": 0.25409987, "memory(GiB)": 34.88, "step": 133715, "train_speed(iter/s)": 0.410992 }, { "acc": 0.95078506, "epoch": 3.6206102943167355, "grad_norm": 5.09486722946167, "learning_rate": 2.449154338270988e-07, "loss": 0.26405733, "memory(GiB)": 34.88, "step": 133720, "train_speed(iter/s)": 0.410993 }, { "acc": 0.96086416, "epoch": 3.6207456745999513, "grad_norm": 11.85975170135498, "learning_rate": 2.4474281698929245e-07, "loss": 0.23512187, "memory(GiB)": 34.88, "step": 133725, "train_speed(iter/s)": 0.410993 }, { "acc": 0.92883968, "epoch": 3.6208810548831667, "grad_norm": 10.01476001739502, "learning_rate": 2.4457025972668906e-07, "loss": 0.43777795, "memory(GiB)": 34.88, "step": 133730, "train_speed(iter/s)": 0.410994 }, { "acc": 0.94677334, "epoch": 3.6210164351663825, "grad_norm": 5.691100120544434, "learning_rate": 2.4439776204145117e-07, "loss": 0.33669765, "memory(GiB)": 34.88, "step": 133735, "train_speed(iter/s)": 0.410995 }, { "acc": 0.95976639, "epoch": 3.621151815449598, "grad_norm": 8.51840877532959, "learning_rate": 2.442253239357402e-07, "loss": 0.22752733, "memory(GiB)": 34.88, "step": 133740, "train_speed(iter/s)": 0.410996 }, { "acc": 0.94880219, "epoch": 3.6212871957328137, "grad_norm": 4.533727169036865, "learning_rate": 2.440529454117142e-07, "loss": 0.32539258, "memory(GiB)": 34.88, "step": 133745, "train_speed(iter/s)": 0.410997 }, { "acc": 0.94417658, "epoch": 3.621422576016029, "grad_norm": 2.8739631175994873, "learning_rate": 2.438806264715356e-07, "loss": 0.37865152, "memory(GiB)": 34.88, "step": 133750, "train_speed(iter/s)": 0.410998 }, { "acc": 0.94528008, "epoch": 3.6215579562992444, "grad_norm": 17.76634979248047, "learning_rate": 2.4370836711736087e-07, "loss": 0.31167827, "memory(GiB)": 34.88, "step": 133755, "train_speed(iter/s)": 0.410998 }, { "acc": 0.96332111, "epoch": 3.62169333658246, "grad_norm": 7.605318546295166, "learning_rate": 2.435361673513485e-07, "loss": 0.27950037, "memory(GiB)": 34.88, "step": 133760, "train_speed(iter/s)": 0.410999 }, { "acc": 0.94109535, "epoch": 3.6218287168656755, "grad_norm": 4.746870517730713, "learning_rate": 2.433640271756545e-07, "loss": 0.36543791, "memory(GiB)": 34.88, "step": 133765, "train_speed(iter/s)": 0.411 }, { "acc": 0.94905558, "epoch": 3.6219640971488913, "grad_norm": 5.885926246643066, "learning_rate": 2.4319194659243736e-07, "loss": 0.36093016, "memory(GiB)": 34.88, "step": 133770, "train_speed(iter/s)": 0.411001 }, { "acc": 0.95623789, "epoch": 3.6220994774321067, "grad_norm": 5.335554122924805, "learning_rate": 2.4301992560385244e-07, "loss": 0.29134307, "memory(GiB)": 34.88, "step": 133775, "train_speed(iter/s)": 0.411002 }, { "acc": 0.9670414, "epoch": 3.6222348577153225, "grad_norm": 5.3094916343688965, "learning_rate": 2.428479642120516e-07, "loss": 0.17916658, "memory(GiB)": 34.88, "step": 133780, "train_speed(iter/s)": 0.411002 }, { "acc": 0.95740967, "epoch": 3.622370237998538, "grad_norm": 5.167446613311768, "learning_rate": 2.4267606241919354e-07, "loss": 0.27123086, "memory(GiB)": 34.88, "step": 133785, "train_speed(iter/s)": 0.411003 }, { "acc": 0.9347805, "epoch": 3.622505618281753, "grad_norm": 17.5823974609375, "learning_rate": 2.4250422022742794e-07, "loss": 0.40703535, "memory(GiB)": 34.88, "step": 133790, "train_speed(iter/s)": 0.411004 }, { "acc": 0.96588802, "epoch": 3.622640998564969, "grad_norm": 3.1931822299957275, "learning_rate": 2.423324376389101e-07, "loss": 0.19033886, "memory(GiB)": 34.88, "step": 133795, "train_speed(iter/s)": 0.411005 }, { "acc": 0.94606085, "epoch": 3.622776378848185, "grad_norm": 3.5775978565216064, "learning_rate": 2.421607146557892e-07, "loss": 0.314118, "memory(GiB)": 34.88, "step": 133800, "train_speed(iter/s)": 0.411006 }, { "acc": 0.94490967, "epoch": 3.6229117591314, "grad_norm": 10.36256217956543, "learning_rate": 2.419890512802194e-07, "loss": 0.34482636, "memory(GiB)": 34.88, "step": 133805, "train_speed(iter/s)": 0.411007 }, { "acc": 0.93976603, "epoch": 3.6230471394146155, "grad_norm": 22.78767204284668, "learning_rate": 2.418174475143488e-07, "loss": 0.43909082, "memory(GiB)": 34.88, "step": 133810, "train_speed(iter/s)": 0.411007 }, { "acc": 0.95667992, "epoch": 3.6231825196978313, "grad_norm": 5.96996545791626, "learning_rate": 2.416459033603282e-07, "loss": 0.29342594, "memory(GiB)": 34.88, "step": 133815, "train_speed(iter/s)": 0.411008 }, { "acc": 0.94191055, "epoch": 3.6233178999810467, "grad_norm": 2.4486773014068604, "learning_rate": 2.414744188203068e-07, "loss": 0.35774505, "memory(GiB)": 34.88, "step": 133820, "train_speed(iter/s)": 0.411009 }, { "acc": 0.95543728, "epoch": 3.623453280264262, "grad_norm": 3.8223960399627686, "learning_rate": 2.413029938964321e-07, "loss": 0.26453588, "memory(GiB)": 34.88, "step": 133825, "train_speed(iter/s)": 0.41101 }, { "acc": 0.95468712, "epoch": 3.623588660547478, "grad_norm": 6.06019926071167, "learning_rate": 2.411316285908516e-07, "loss": 0.28022385, "memory(GiB)": 34.88, "step": 133830, "train_speed(iter/s)": 0.411011 }, { "acc": 0.94412088, "epoch": 3.6237240408306937, "grad_norm": 6.071316719055176, "learning_rate": 2.409603229057112e-07, "loss": 0.37153165, "memory(GiB)": 34.88, "step": 133835, "train_speed(iter/s)": 0.411012 }, { "acc": 0.96529217, "epoch": 3.623859421113909, "grad_norm": 3.665525197982788, "learning_rate": 2.407890768431589e-07, "loss": 0.19010433, "memory(GiB)": 34.88, "step": 133840, "train_speed(iter/s)": 0.411012 }, { "acc": 0.95123596, "epoch": 3.6239948013971244, "grad_norm": 4.781291484832764, "learning_rate": 2.4061789040533837e-07, "loss": 0.33078485, "memory(GiB)": 34.88, "step": 133845, "train_speed(iter/s)": 0.411013 }, { "acc": 0.94803133, "epoch": 3.62413018168034, "grad_norm": 8.95981216430664, "learning_rate": 2.404467635943944e-07, "loss": 0.31089275, "memory(GiB)": 34.88, "step": 133850, "train_speed(iter/s)": 0.411014 }, { "acc": 0.94473352, "epoch": 3.6242655619635555, "grad_norm": 14.037910461425781, "learning_rate": 2.402756964124704e-07, "loss": 0.33464251, "memory(GiB)": 34.88, "step": 133855, "train_speed(iter/s)": 0.411015 }, { "acc": 0.95796309, "epoch": 3.6244009422467713, "grad_norm": 7.2897539138793945, "learning_rate": 2.401046888617103e-07, "loss": 0.26643529, "memory(GiB)": 34.88, "step": 133860, "train_speed(iter/s)": 0.411016 }, { "acc": 0.9637475, "epoch": 3.6245363225299867, "grad_norm": 9.163763046264648, "learning_rate": 2.3993374094425583e-07, "loss": 0.23920386, "memory(GiB)": 34.88, "step": 133865, "train_speed(iter/s)": 0.411016 }, { "acc": 0.9451889, "epoch": 3.6246717028132025, "grad_norm": 13.742549896240234, "learning_rate": 2.397628526622468e-07, "loss": 0.31832838, "memory(GiB)": 34.88, "step": 133870, "train_speed(iter/s)": 0.411017 }, { "acc": 0.95782719, "epoch": 3.624807083096418, "grad_norm": 6.382181167602539, "learning_rate": 2.3959202401782684e-07, "loss": 0.30732346, "memory(GiB)": 34.88, "step": 133875, "train_speed(iter/s)": 0.411018 }, { "acc": 0.94530935, "epoch": 3.624942463379633, "grad_norm": 20.78713607788086, "learning_rate": 2.3942125501313347e-07, "loss": 0.30510747, "memory(GiB)": 34.88, "step": 133880, "train_speed(iter/s)": 0.411019 }, { "acc": 0.9519146, "epoch": 3.625077843662849, "grad_norm": 3.9515926837921143, "learning_rate": 2.3925054565030816e-07, "loss": 0.31147909, "memory(GiB)": 34.88, "step": 133885, "train_speed(iter/s)": 0.41102 }, { "acc": 0.94231701, "epoch": 3.6252132239460644, "grad_norm": 28.92129135131836, "learning_rate": 2.390798959314861e-07, "loss": 0.36687465, "memory(GiB)": 34.88, "step": 133890, "train_speed(iter/s)": 0.411021 }, { "acc": 0.96389027, "epoch": 3.62534860422928, "grad_norm": 3.4821739196777344, "learning_rate": 2.389093058588088e-07, "loss": 0.18762696, "memory(GiB)": 34.88, "step": 133895, "train_speed(iter/s)": 0.411021 }, { "acc": 0.94375887, "epoch": 3.6254839845124955, "grad_norm": 8.88915729522705, "learning_rate": 2.387387754344109e-07, "loss": 0.35471125, "memory(GiB)": 34.88, "step": 133900, "train_speed(iter/s)": 0.411022 }, { "acc": 0.95223465, "epoch": 3.6256193647957113, "grad_norm": 8.979615211486816, "learning_rate": 2.385683046604283e-07, "loss": 0.38005691, "memory(GiB)": 34.88, "step": 133905, "train_speed(iter/s)": 0.411023 }, { "acc": 0.95856705, "epoch": 3.6257547450789267, "grad_norm": 4.024560451507568, "learning_rate": 2.3839789353899856e-07, "loss": 0.24096656, "memory(GiB)": 34.88, "step": 133910, "train_speed(iter/s)": 0.411024 }, { "acc": 0.94080267, "epoch": 3.625890125362142, "grad_norm": 17.050010681152344, "learning_rate": 2.3822754207225473e-07, "loss": 0.36753922, "memory(GiB)": 34.88, "step": 133915, "train_speed(iter/s)": 0.411025 }, { "acc": 0.95181808, "epoch": 3.626025505645358, "grad_norm": 4.1846137046813965, "learning_rate": 2.380572502623321e-07, "loss": 0.23756354, "memory(GiB)": 34.88, "step": 133920, "train_speed(iter/s)": 0.411026 }, { "acc": 0.9456706, "epoch": 3.626160885928573, "grad_norm": 4.85673189163208, "learning_rate": 2.3788701811136096e-07, "loss": 0.32219174, "memory(GiB)": 34.88, "step": 133925, "train_speed(iter/s)": 0.411026 }, { "acc": 0.9511097, "epoch": 3.626296266211789, "grad_norm": 5.503001689910889, "learning_rate": 2.377168456214778e-07, "loss": 0.2586895, "memory(GiB)": 34.88, "step": 133930, "train_speed(iter/s)": 0.411027 }, { "acc": 0.94539261, "epoch": 3.6264316464950044, "grad_norm": 5.987502098083496, "learning_rate": 2.375467327948123e-07, "loss": 0.35061076, "memory(GiB)": 34.88, "step": 133935, "train_speed(iter/s)": 0.411028 }, { "acc": 0.95451527, "epoch": 3.62656702677822, "grad_norm": 7.324203014373779, "learning_rate": 2.3737667963349472e-07, "loss": 0.25634861, "memory(GiB)": 34.88, "step": 133940, "train_speed(iter/s)": 0.411029 }, { "acc": 0.96092186, "epoch": 3.6267024070614355, "grad_norm": 8.23866081237793, "learning_rate": 2.372066861396577e-07, "loss": 0.23625324, "memory(GiB)": 34.88, "step": 133945, "train_speed(iter/s)": 0.41103 }, { "acc": 0.93848019, "epoch": 3.626837787344651, "grad_norm": 9.389764785766602, "learning_rate": 2.3703675231542814e-07, "loss": 0.47953968, "memory(GiB)": 34.88, "step": 133950, "train_speed(iter/s)": 0.41103 }, { "acc": 0.94448481, "epoch": 3.6269731676278667, "grad_norm": 3.458059072494507, "learning_rate": 2.3686687816293696e-07, "loss": 0.3726779, "memory(GiB)": 34.88, "step": 133955, "train_speed(iter/s)": 0.411031 }, { "acc": 0.95619106, "epoch": 3.6271085479110825, "grad_norm": 4.292264461517334, "learning_rate": 2.366970636843094e-07, "loss": 0.25183113, "memory(GiB)": 34.88, "step": 133960, "train_speed(iter/s)": 0.411032 }, { "acc": 0.94936161, "epoch": 3.627243928194298, "grad_norm": 3.121854305267334, "learning_rate": 2.3652730888167632e-07, "loss": 0.3211503, "memory(GiB)": 34.88, "step": 133965, "train_speed(iter/s)": 0.411033 }, { "acc": 0.96626225, "epoch": 3.627379308477513, "grad_norm": 5.575181484222412, "learning_rate": 2.3635761375716143e-07, "loss": 0.23871315, "memory(GiB)": 34.88, "step": 133970, "train_speed(iter/s)": 0.411034 }, { "acc": 0.94946404, "epoch": 3.627514688760729, "grad_norm": 6.330967426300049, "learning_rate": 2.3618797831289108e-07, "loss": 0.24974313, "memory(GiB)": 34.88, "step": 133975, "train_speed(iter/s)": 0.411035 }, { "acc": 0.94839411, "epoch": 3.6276500690439444, "grad_norm": 5.048786163330078, "learning_rate": 2.360184025509912e-07, "loss": 0.35070283, "memory(GiB)": 34.88, "step": 133980, "train_speed(iter/s)": 0.411035 }, { "acc": 0.93471737, "epoch": 3.6277854493271597, "grad_norm": 5.019388675689697, "learning_rate": 2.35848886473586e-07, "loss": 0.42477217, "memory(GiB)": 34.88, "step": 133985, "train_speed(iter/s)": 0.411036 }, { "acc": 0.94242573, "epoch": 3.6279208296103755, "grad_norm": 4.934986114501953, "learning_rate": 2.3567943008279842e-07, "loss": 0.31543546, "memory(GiB)": 34.88, "step": 133990, "train_speed(iter/s)": 0.411037 }, { "acc": 0.93130188, "epoch": 3.6280562098935913, "grad_norm": 8.795304298400879, "learning_rate": 2.3551003338075005e-07, "loss": 0.45832024, "memory(GiB)": 34.88, "step": 133995, "train_speed(iter/s)": 0.411038 }, { "acc": 0.95327806, "epoch": 3.6281915901768067, "grad_norm": 4.499824523925781, "learning_rate": 2.3534069636956556e-07, "loss": 0.28490591, "memory(GiB)": 34.88, "step": 134000, "train_speed(iter/s)": 0.411039 }, { "acc": 0.94913445, "epoch": 3.628326970460022, "grad_norm": 9.01961612701416, "learning_rate": 2.3517141905136476e-07, "loss": 0.35754552, "memory(GiB)": 34.88, "step": 134005, "train_speed(iter/s)": 0.411039 }, { "acc": 0.94079208, "epoch": 3.628462350743238, "grad_norm": 10.043577194213867, "learning_rate": 2.3500220142826737e-07, "loss": 0.39641488, "memory(GiB)": 34.88, "step": 134010, "train_speed(iter/s)": 0.41104 }, { "acc": 0.94344978, "epoch": 3.628597731026453, "grad_norm": 6.155596733093262, "learning_rate": 2.3483304350239485e-07, "loss": 0.37682755, "memory(GiB)": 34.88, "step": 134015, "train_speed(iter/s)": 0.411041 }, { "acc": 0.94980221, "epoch": 3.628733111309669, "grad_norm": 2.113896608352661, "learning_rate": 2.346639452758658e-07, "loss": 0.30669284, "memory(GiB)": 34.88, "step": 134020, "train_speed(iter/s)": 0.411042 }, { "acc": 0.92557354, "epoch": 3.6288684915928844, "grad_norm": 13.395303726196289, "learning_rate": 2.344949067507978e-07, "loss": 0.42374754, "memory(GiB)": 34.88, "step": 134025, "train_speed(iter/s)": 0.411043 }, { "acc": 0.95520353, "epoch": 3.6290038718761, "grad_norm": 5.433446884155273, "learning_rate": 2.3432592792930778e-07, "loss": 0.30750079, "memory(GiB)": 34.88, "step": 134030, "train_speed(iter/s)": 0.411043 }, { "acc": 0.95681553, "epoch": 3.6291392521593155, "grad_norm": 5.2769269943237305, "learning_rate": 2.3415700881351498e-07, "loss": 0.19900342, "memory(GiB)": 34.88, "step": 134035, "train_speed(iter/s)": 0.411044 }, { "acc": 0.93520813, "epoch": 3.629274632442531, "grad_norm": 4.843045234680176, "learning_rate": 2.3398814940553312e-07, "loss": 0.41443987, "memory(GiB)": 34.88, "step": 134040, "train_speed(iter/s)": 0.411045 }, { "acc": 0.94579906, "epoch": 3.6294100127257467, "grad_norm": 9.769408226013184, "learning_rate": 2.3381934970747848e-07, "loss": 0.32196956, "memory(GiB)": 34.88, "step": 134045, "train_speed(iter/s)": 0.411046 }, { "acc": 0.96180458, "epoch": 3.629545393008962, "grad_norm": 9.00573444366455, "learning_rate": 2.3365060972146597e-07, "loss": 0.19914585, "memory(GiB)": 34.88, "step": 134050, "train_speed(iter/s)": 0.411047 }, { "acc": 0.96130714, "epoch": 3.629680773292178, "grad_norm": 6.3525285720825195, "learning_rate": 2.3348192944960914e-07, "loss": 0.25159531, "memory(GiB)": 34.88, "step": 134055, "train_speed(iter/s)": 0.411048 }, { "acc": 0.94829617, "epoch": 3.6298161535753932, "grad_norm": 6.279634952545166, "learning_rate": 2.3331330889402003e-07, "loss": 0.26159813, "memory(GiB)": 34.88, "step": 134060, "train_speed(iter/s)": 0.411048 }, { "acc": 0.94900999, "epoch": 3.629951533858609, "grad_norm": 7.860906600952148, "learning_rate": 2.3314474805681174e-07, "loss": 0.31673329, "memory(GiB)": 34.88, "step": 134065, "train_speed(iter/s)": 0.411049 }, { "acc": 0.94358892, "epoch": 3.6300869141418244, "grad_norm": 8.178716659545898, "learning_rate": 2.3297624694009622e-07, "loss": 0.27316952, "memory(GiB)": 34.88, "step": 134070, "train_speed(iter/s)": 0.41105 }, { "acc": 0.94838371, "epoch": 3.6302222944250397, "grad_norm": 8.187341690063477, "learning_rate": 2.3280780554598384e-07, "loss": 0.28372161, "memory(GiB)": 34.88, "step": 134075, "train_speed(iter/s)": 0.411051 }, { "acc": 0.94809856, "epoch": 3.6303576747082555, "grad_norm": 3.116417646408081, "learning_rate": 2.326394238765843e-07, "loss": 0.33637545, "memory(GiB)": 34.88, "step": 134080, "train_speed(iter/s)": 0.411052 }, { "acc": 0.94381247, "epoch": 3.630493054991471, "grad_norm": 12.400606155395508, "learning_rate": 2.3247110193400747e-07, "loss": 0.33899488, "memory(GiB)": 34.88, "step": 134085, "train_speed(iter/s)": 0.411052 }, { "acc": 0.93729057, "epoch": 3.6306284352746867, "grad_norm": 3.9799747467041016, "learning_rate": 2.3230283972036246e-07, "loss": 0.37778535, "memory(GiB)": 34.88, "step": 134090, "train_speed(iter/s)": 0.411053 }, { "acc": 0.94987793, "epoch": 3.630763815557902, "grad_norm": 5.725000381469727, "learning_rate": 2.321346372377558e-07, "loss": 0.26552372, "memory(GiB)": 34.88, "step": 134095, "train_speed(iter/s)": 0.411054 }, { "acc": 0.95278149, "epoch": 3.630899195841118, "grad_norm": 9.641035079956055, "learning_rate": 2.3196649448829495e-07, "loss": 0.37053523, "memory(GiB)": 34.88, "step": 134100, "train_speed(iter/s)": 0.411055 }, { "acc": 0.94788342, "epoch": 3.6310345761243332, "grad_norm": 9.463623046875, "learning_rate": 2.3179841147408638e-07, "loss": 0.36717222, "memory(GiB)": 34.88, "step": 134105, "train_speed(iter/s)": 0.411056 }, { "acc": 0.94869423, "epoch": 3.6311699564075486, "grad_norm": 6.939770221710205, "learning_rate": 2.31630388197236e-07, "loss": 0.27302337, "memory(GiB)": 34.88, "step": 134110, "train_speed(iter/s)": 0.411056 }, { "acc": 0.94744759, "epoch": 3.6313053366907644, "grad_norm": 17.232851028442383, "learning_rate": 2.3146242465984802e-07, "loss": 0.31279502, "memory(GiB)": 34.88, "step": 134115, "train_speed(iter/s)": 0.411057 }, { "acc": 0.94408083, "epoch": 3.6314407169739797, "grad_norm": 11.739163398742676, "learning_rate": 2.312945208640272e-07, "loss": 0.29197412, "memory(GiB)": 34.88, "step": 134120, "train_speed(iter/s)": 0.411058 }, { "acc": 0.92736158, "epoch": 3.6315760972571955, "grad_norm": 6.939943790435791, "learning_rate": 2.3112667681187662e-07, "loss": 0.50737295, "memory(GiB)": 34.88, "step": 134125, "train_speed(iter/s)": 0.411058 }, { "acc": 0.95113792, "epoch": 3.631711477540411, "grad_norm": 7.431450366973877, "learning_rate": 2.309588925054983e-07, "loss": 0.28087459, "memory(GiB)": 34.88, "step": 134130, "train_speed(iter/s)": 0.411059 }, { "acc": 0.94923229, "epoch": 3.6318468578236267, "grad_norm": 4.942068576812744, "learning_rate": 2.3079116794699426e-07, "loss": 0.27388399, "memory(GiB)": 34.88, "step": 134135, "train_speed(iter/s)": 0.41106 }, { "acc": 0.95161314, "epoch": 3.631982238106842, "grad_norm": 7.388482570648193, "learning_rate": 2.3062350313846594e-07, "loss": 0.25797453, "memory(GiB)": 34.88, "step": 134140, "train_speed(iter/s)": 0.411061 }, { "acc": 0.95155659, "epoch": 3.6321176183900574, "grad_norm": 5.409483432769775, "learning_rate": 2.3045589808201419e-07, "loss": 0.27417717, "memory(GiB)": 34.88, "step": 134145, "train_speed(iter/s)": 0.411062 }, { "acc": 0.94879169, "epoch": 3.6322529986732732, "grad_norm": 7.790487766265869, "learning_rate": 2.3028835277973712e-07, "loss": 0.32622371, "memory(GiB)": 34.88, "step": 134150, "train_speed(iter/s)": 0.411063 }, { "acc": 0.95799484, "epoch": 3.632388378956489, "grad_norm": 5.433635711669922, "learning_rate": 2.3012086723373458e-07, "loss": 0.18195844, "memory(GiB)": 34.88, "step": 134155, "train_speed(iter/s)": 0.411063 }, { "acc": 0.95081749, "epoch": 3.6325237592397044, "grad_norm": 6.640120983123779, "learning_rate": 2.2995344144610513e-07, "loss": 0.32471011, "memory(GiB)": 34.88, "step": 134160, "train_speed(iter/s)": 0.411064 }, { "acc": 0.94786663, "epoch": 3.6326591395229197, "grad_norm": 7.791708946228027, "learning_rate": 2.2978607541894421e-07, "loss": 0.33740959, "memory(GiB)": 34.88, "step": 134165, "train_speed(iter/s)": 0.411065 }, { "acc": 0.95365591, "epoch": 3.6327945198061355, "grad_norm": 7.836570739746094, "learning_rate": 2.2961876915435042e-07, "loss": 0.25718935, "memory(GiB)": 34.88, "step": 134170, "train_speed(iter/s)": 0.411066 }, { "acc": 0.94457588, "epoch": 3.632929900089351, "grad_norm": 14.930085182189941, "learning_rate": 2.2945152265441855e-07, "loss": 0.31076593, "memory(GiB)": 34.88, "step": 134175, "train_speed(iter/s)": 0.411066 }, { "acc": 0.95903378, "epoch": 3.6330652803725663, "grad_norm": 3.399745464324951, "learning_rate": 2.292843359212445e-07, "loss": 0.2370074, "memory(GiB)": 34.88, "step": 134180, "train_speed(iter/s)": 0.411067 }, { "acc": 0.95511894, "epoch": 3.633200660655782, "grad_norm": 10.831560134887695, "learning_rate": 2.291172089569214e-07, "loss": 0.25702996, "memory(GiB)": 34.88, "step": 134185, "train_speed(iter/s)": 0.411068 }, { "acc": 0.95331211, "epoch": 3.633336040938998, "grad_norm": 41.18901062011719, "learning_rate": 2.2895014176354348e-07, "loss": 0.25923364, "memory(GiB)": 34.88, "step": 134190, "train_speed(iter/s)": 0.411069 }, { "acc": 0.94503498, "epoch": 3.6334714212222132, "grad_norm": 7.749638557434082, "learning_rate": 2.2878313434320436e-07, "loss": 0.34376094, "memory(GiB)": 34.88, "step": 134195, "train_speed(iter/s)": 0.41107 }, { "acc": 0.93835812, "epoch": 3.6336068015054286, "grad_norm": 5.040625095367432, "learning_rate": 2.2861618669799499e-07, "loss": 0.36076407, "memory(GiB)": 34.88, "step": 134200, "train_speed(iter/s)": 0.41107 }, { "acc": 0.95247612, "epoch": 3.6337421817886444, "grad_norm": 4.548986434936523, "learning_rate": 2.2844929883000682e-07, "loss": 0.29060237, "memory(GiB)": 34.88, "step": 134205, "train_speed(iter/s)": 0.411071 }, { "acc": 0.95789509, "epoch": 3.6338775620718597, "grad_norm": 8.970623016357422, "learning_rate": 2.282824707413307e-07, "loss": 0.23617778, "memory(GiB)": 34.88, "step": 134210, "train_speed(iter/s)": 0.411072 }, { "acc": 0.95812035, "epoch": 3.6340129423550755, "grad_norm": 4.453896522521973, "learning_rate": 2.2811570243405757e-07, "loss": 0.26569977, "memory(GiB)": 34.88, "step": 134215, "train_speed(iter/s)": 0.411073 }, { "acc": 0.93936605, "epoch": 3.634148322638291, "grad_norm": 3.9145407676696777, "learning_rate": 2.2794899391027439e-07, "loss": 0.34997954, "memory(GiB)": 34.88, "step": 134220, "train_speed(iter/s)": 0.411073 }, { "acc": 0.94499474, "epoch": 3.6342837029215067, "grad_norm": 6.4488420486450195, "learning_rate": 2.2778234517207043e-07, "loss": 0.36136713, "memory(GiB)": 34.88, "step": 134225, "train_speed(iter/s)": 0.411074 }, { "acc": 0.9434763, "epoch": 3.634419083204722, "grad_norm": 3.614981174468994, "learning_rate": 2.2761575622153385e-07, "loss": 0.36244903, "memory(GiB)": 34.88, "step": 134230, "train_speed(iter/s)": 0.411075 }, { "acc": 0.9453474, "epoch": 3.6345544634879374, "grad_norm": 5.328793525695801, "learning_rate": 2.274492270607516e-07, "loss": 0.28990464, "memory(GiB)": 34.88, "step": 134235, "train_speed(iter/s)": 0.411076 }, { "acc": 0.94667301, "epoch": 3.6346898437711532, "grad_norm": 13.219367980957031, "learning_rate": 2.27282757691809e-07, "loss": 0.36169133, "memory(GiB)": 34.88, "step": 134240, "train_speed(iter/s)": 0.411077 }, { "acc": 0.95990162, "epoch": 3.6348252240543686, "grad_norm": 7.8415021896362305, "learning_rate": 2.2711634811679147e-07, "loss": 0.25231452, "memory(GiB)": 34.88, "step": 134245, "train_speed(iter/s)": 0.411078 }, { "acc": 0.94198685, "epoch": 3.6349606043375844, "grad_norm": 9.577455520629883, "learning_rate": 2.2694999833778435e-07, "loss": 0.30478644, "memory(GiB)": 34.88, "step": 134250, "train_speed(iter/s)": 0.411078 }, { "acc": 0.94941387, "epoch": 3.6350959846207997, "grad_norm": 11.920639991760254, "learning_rate": 2.267837083568707e-07, "loss": 0.28444288, "memory(GiB)": 34.88, "step": 134255, "train_speed(iter/s)": 0.411079 }, { "acc": 0.95079479, "epoch": 3.6352313649040155, "grad_norm": 4.97432804107666, "learning_rate": 2.266174781761337e-07, "loss": 0.32595935, "memory(GiB)": 34.88, "step": 134260, "train_speed(iter/s)": 0.41108 }, { "acc": 0.93959122, "epoch": 3.635366745187231, "grad_norm": 16.95022201538086, "learning_rate": 2.264513077976559e-07, "loss": 0.40416231, "memory(GiB)": 34.88, "step": 134265, "train_speed(iter/s)": 0.411081 }, { "acc": 0.94828911, "epoch": 3.6355021254704463, "grad_norm": 7.0658278465271, "learning_rate": 2.2628519722351986e-07, "loss": 0.31502919, "memory(GiB)": 34.88, "step": 134270, "train_speed(iter/s)": 0.411082 }, { "acc": 0.93891296, "epoch": 3.635637505753662, "grad_norm": 8.043804168701172, "learning_rate": 2.2611914645580483e-07, "loss": 0.43260736, "memory(GiB)": 34.88, "step": 134275, "train_speed(iter/s)": 0.411083 }, { "acc": 0.94297953, "epoch": 3.6357728860368774, "grad_norm": 5.616962909698486, "learning_rate": 2.2595315549659174e-07, "loss": 0.31120005, "memory(GiB)": 34.88, "step": 134280, "train_speed(iter/s)": 0.411083 }, { "acc": 0.96777763, "epoch": 3.6359082663200932, "grad_norm": 6.256099224090576, "learning_rate": 2.257872243479598e-07, "loss": 0.21861274, "memory(GiB)": 34.88, "step": 134285, "train_speed(iter/s)": 0.411084 }, { "acc": 0.95611362, "epoch": 3.6360436466033086, "grad_norm": 8.704700469970703, "learning_rate": 2.2562135301198773e-07, "loss": 0.27527733, "memory(GiB)": 34.88, "step": 134290, "train_speed(iter/s)": 0.411085 }, { "acc": 0.95279694, "epoch": 3.6361790268865244, "grad_norm": 4.7661919593811035, "learning_rate": 2.2545554149075306e-07, "loss": 0.34996386, "memory(GiB)": 34.88, "step": 134295, "train_speed(iter/s)": 0.411086 }, { "acc": 0.96063309, "epoch": 3.6363144071697397, "grad_norm": 6.222580432891846, "learning_rate": 2.2528978978633335e-07, "loss": 0.22231641, "memory(GiB)": 34.88, "step": 134300, "train_speed(iter/s)": 0.411087 }, { "acc": 0.95114193, "epoch": 3.636449787452955, "grad_norm": 5.939594268798828, "learning_rate": 2.2512409790080565e-07, "loss": 0.29603348, "memory(GiB)": 34.88, "step": 134305, "train_speed(iter/s)": 0.411087 }, { "acc": 0.95039349, "epoch": 3.636585167736171, "grad_norm": 7.386599540710449, "learning_rate": 2.2495846583624258e-07, "loss": 0.37691379, "memory(GiB)": 34.88, "step": 134310, "train_speed(iter/s)": 0.411088 }, { "acc": 0.96706924, "epoch": 3.6367205480193867, "grad_norm": 7.534816741943359, "learning_rate": 2.2479289359472333e-07, "loss": 0.24446187, "memory(GiB)": 34.88, "step": 134315, "train_speed(iter/s)": 0.411089 }, { "acc": 0.94824286, "epoch": 3.636855928302602, "grad_norm": 9.239114761352539, "learning_rate": 2.2462738117831936e-07, "loss": 0.34491689, "memory(GiB)": 34.88, "step": 134320, "train_speed(iter/s)": 0.41109 }, { "acc": 0.94340935, "epoch": 3.6369913085858174, "grad_norm": 3.117349624633789, "learning_rate": 2.2446192858910386e-07, "loss": 0.26690972, "memory(GiB)": 34.88, "step": 134325, "train_speed(iter/s)": 0.411091 }, { "acc": 0.93776569, "epoch": 3.6371266888690332, "grad_norm": 11.036346435546875, "learning_rate": 2.2429653582915046e-07, "loss": 0.42161789, "memory(GiB)": 34.88, "step": 134330, "train_speed(iter/s)": 0.411092 }, { "acc": 0.9549387, "epoch": 3.6372620691522486, "grad_norm": 10.253079414367676, "learning_rate": 2.2413120290053065e-07, "loss": 0.2438808, "memory(GiB)": 34.88, "step": 134335, "train_speed(iter/s)": 0.411093 }, { "acc": 0.94538803, "epoch": 3.637397449435464, "grad_norm": 11.348060607910156, "learning_rate": 2.2396592980531588e-07, "loss": 0.36126919, "memory(GiB)": 34.88, "step": 134340, "train_speed(iter/s)": 0.411093 }, { "acc": 0.93608904, "epoch": 3.6375328297186797, "grad_norm": 6.421931266784668, "learning_rate": 2.2380071654557488e-07, "loss": 0.4226809, "memory(GiB)": 34.88, "step": 134345, "train_speed(iter/s)": 0.411094 }, { "acc": 0.93589926, "epoch": 3.6376682100018956, "grad_norm": 3.939448356628418, "learning_rate": 2.236355631233802e-07, "loss": 0.47708802, "memory(GiB)": 34.88, "step": 134350, "train_speed(iter/s)": 0.411094 }, { "acc": 0.94520283, "epoch": 3.637803590285111, "grad_norm": 8.786706924438477, "learning_rate": 2.2347046954079836e-07, "loss": 0.34132271, "memory(GiB)": 34.88, "step": 134355, "train_speed(iter/s)": 0.411095 }, { "acc": 0.95266066, "epoch": 3.6379389705683263, "grad_norm": 6.240973472595215, "learning_rate": 2.2330543579989857e-07, "loss": 0.32526584, "memory(GiB)": 34.88, "step": 134360, "train_speed(iter/s)": 0.411096 }, { "acc": 0.94942694, "epoch": 3.638074350851542, "grad_norm": 21.680557250976562, "learning_rate": 2.2314046190274784e-07, "loss": 0.32215295, "memory(GiB)": 34.88, "step": 134365, "train_speed(iter/s)": 0.411097 }, { "acc": 0.95630512, "epoch": 3.6382097311347574, "grad_norm": 14.621479988098145, "learning_rate": 2.2297554785141216e-07, "loss": 0.2745518, "memory(GiB)": 34.88, "step": 134370, "train_speed(iter/s)": 0.411097 }, { "acc": 0.95014305, "epoch": 3.6383451114179732, "grad_norm": 6.13558292388916, "learning_rate": 2.22810693647959e-07, "loss": 0.26537209, "memory(GiB)": 34.88, "step": 134375, "train_speed(iter/s)": 0.411098 }, { "acc": 0.95461063, "epoch": 3.6384804917011886, "grad_norm": 12.685999870300293, "learning_rate": 2.2264589929445163e-07, "loss": 0.2914691, "memory(GiB)": 34.88, "step": 134380, "train_speed(iter/s)": 0.411099 }, { "acc": 0.95058823, "epoch": 3.6386158719844044, "grad_norm": 3.728884696960449, "learning_rate": 2.2248116479295585e-07, "loss": 0.34702826, "memory(GiB)": 34.88, "step": 134385, "train_speed(iter/s)": 0.4111 }, { "acc": 0.95205679, "epoch": 3.6387512522676198, "grad_norm": 2.5340535640716553, "learning_rate": 2.2231649014553492e-07, "loss": 0.28360648, "memory(GiB)": 34.88, "step": 134390, "train_speed(iter/s)": 0.411101 }, { "acc": 0.95038738, "epoch": 3.638886632550835, "grad_norm": 5.642195701599121, "learning_rate": 2.221518753542519e-07, "loss": 0.22964032, "memory(GiB)": 34.88, "step": 134395, "train_speed(iter/s)": 0.411101 }, { "acc": 0.9379694, "epoch": 3.639022012834051, "grad_norm": 28.658817291259766, "learning_rate": 2.2198732042116716e-07, "loss": 0.3818331, "memory(GiB)": 34.88, "step": 134400, "train_speed(iter/s)": 0.411102 }, { "acc": 0.94294834, "epoch": 3.6391573931172663, "grad_norm": 8.570453643798828, "learning_rate": 2.21822825348345e-07, "loss": 0.33420429, "memory(GiB)": 34.88, "step": 134405, "train_speed(iter/s)": 0.411103 }, { "acc": 0.93457508, "epoch": 3.639292773400482, "grad_norm": 7.938357830047607, "learning_rate": 2.216583901378441e-07, "loss": 0.4114151, "memory(GiB)": 34.88, "step": 134410, "train_speed(iter/s)": 0.411104 }, { "acc": 0.94702139, "epoch": 3.6394281536836974, "grad_norm": 7.342975616455078, "learning_rate": 2.214940147917237e-07, "loss": 0.32142887, "memory(GiB)": 34.88, "step": 134415, "train_speed(iter/s)": 0.411104 }, { "acc": 0.94826994, "epoch": 3.6395635339669132, "grad_norm": 9.820860862731934, "learning_rate": 2.213296993120453e-07, "loss": 0.34128613, "memory(GiB)": 34.88, "step": 134420, "train_speed(iter/s)": 0.411105 }, { "acc": 0.94803925, "epoch": 3.6396989142501286, "grad_norm": 5.414104461669922, "learning_rate": 2.2116544370086537e-07, "loss": 0.33548055, "memory(GiB)": 34.88, "step": 134425, "train_speed(iter/s)": 0.411106 }, { "acc": 0.95466175, "epoch": 3.639834294533344, "grad_norm": 10.875332832336426, "learning_rate": 2.210012479602432e-07, "loss": 0.29586415, "memory(GiB)": 34.88, "step": 134430, "train_speed(iter/s)": 0.411106 }, { "acc": 0.95223675, "epoch": 3.6399696748165598, "grad_norm": 8.254332542419434, "learning_rate": 2.2083711209223243e-07, "loss": 0.28323097, "memory(GiB)": 34.88, "step": 134435, "train_speed(iter/s)": 0.411107 }, { "acc": 0.95778027, "epoch": 3.640105055099775, "grad_norm": 2.7482144832611084, "learning_rate": 2.2067303609889348e-07, "loss": 0.26193361, "memory(GiB)": 34.88, "step": 134440, "train_speed(iter/s)": 0.411108 }, { "acc": 0.95572958, "epoch": 3.640240435382991, "grad_norm": 4.7225117683410645, "learning_rate": 2.2050901998227896e-07, "loss": 0.2507453, "memory(GiB)": 34.88, "step": 134445, "train_speed(iter/s)": 0.411109 }, { "acc": 0.93829041, "epoch": 3.6403758156662063, "grad_norm": 8.724675178527832, "learning_rate": 2.2034506374444365e-07, "loss": 0.36844842, "memory(GiB)": 34.88, "step": 134450, "train_speed(iter/s)": 0.41111 }, { "acc": 0.95583019, "epoch": 3.640511195949422, "grad_norm": 3.061110258102417, "learning_rate": 2.2018116738744237e-07, "loss": 0.27552743, "memory(GiB)": 34.88, "step": 134455, "train_speed(iter/s)": 0.411111 }, { "acc": 0.94634476, "epoch": 3.6406465762326374, "grad_norm": 7.521351337432861, "learning_rate": 2.200173309133272e-07, "loss": 0.34011736, "memory(GiB)": 34.88, "step": 134460, "train_speed(iter/s)": 0.411111 }, { "acc": 0.93238354, "epoch": 3.640781956515853, "grad_norm": 11.736156463623047, "learning_rate": 2.198535543241518e-07, "loss": 0.41927156, "memory(GiB)": 34.88, "step": 134465, "train_speed(iter/s)": 0.411112 }, { "acc": 0.94911156, "epoch": 3.6409173367990686, "grad_norm": 6.3507609367370605, "learning_rate": 2.1968983762196604e-07, "loss": 0.30260684, "memory(GiB)": 34.88, "step": 134470, "train_speed(iter/s)": 0.411113 }, { "acc": 0.9540206, "epoch": 3.6410527170822844, "grad_norm": 3.645146608352661, "learning_rate": 2.19526180808823e-07, "loss": 0.32047281, "memory(GiB)": 34.88, "step": 134475, "train_speed(iter/s)": 0.411114 }, { "acc": 0.94403067, "epoch": 3.6411880973654998, "grad_norm": 5.035479545593262, "learning_rate": 2.193625838867709e-07, "loss": 0.344206, "memory(GiB)": 34.88, "step": 134480, "train_speed(iter/s)": 0.411114 }, { "acc": 0.95140133, "epoch": 3.641323477648715, "grad_norm": 5.887202262878418, "learning_rate": 2.1919904685786066e-07, "loss": 0.3062887, "memory(GiB)": 34.88, "step": 134485, "train_speed(iter/s)": 0.411115 }, { "acc": 0.94687977, "epoch": 3.641458857931931, "grad_norm": 4.688311576843262, "learning_rate": 2.1903556972413985e-07, "loss": 0.31088085, "memory(GiB)": 34.88, "step": 134490, "train_speed(iter/s)": 0.411116 }, { "acc": 0.95558777, "epoch": 3.6415942382151463, "grad_norm": 7.693946361541748, "learning_rate": 2.188721524876561e-07, "loss": 0.22950878, "memory(GiB)": 34.88, "step": 134495, "train_speed(iter/s)": 0.411117 }, { "acc": 0.95059776, "epoch": 3.6417296184983616, "grad_norm": 6.021928310394287, "learning_rate": 2.1870879515045762e-07, "loss": 0.32250631, "memory(GiB)": 34.88, "step": 134500, "train_speed(iter/s)": 0.411118 }, { "acc": 0.93630791, "epoch": 3.6418649987815774, "grad_norm": 8.862262725830078, "learning_rate": 2.1854549771458913e-07, "loss": 0.44069452, "memory(GiB)": 34.88, "step": 134505, "train_speed(iter/s)": 0.411119 }, { "acc": 0.95286198, "epoch": 3.6420003790647932, "grad_norm": 5.378817081451416, "learning_rate": 2.1838226018209826e-07, "loss": 0.29239006, "memory(GiB)": 34.88, "step": 134510, "train_speed(iter/s)": 0.411119 }, { "acc": 0.95020599, "epoch": 3.6421357593480086, "grad_norm": 8.716434478759766, "learning_rate": 2.1821908255502822e-07, "loss": 0.26234016, "memory(GiB)": 34.88, "step": 134515, "train_speed(iter/s)": 0.41112 }, { "acc": 0.95054073, "epoch": 3.642271139631224, "grad_norm": 11.123736381530762, "learning_rate": 2.1805596483542492e-07, "loss": 0.33806963, "memory(GiB)": 34.88, "step": 134520, "train_speed(iter/s)": 0.411121 }, { "acc": 0.96222944, "epoch": 3.6424065199144398, "grad_norm": 5.1817827224731445, "learning_rate": 2.178929070253293e-07, "loss": 0.24169917, "memory(GiB)": 34.88, "step": 134525, "train_speed(iter/s)": 0.411121 }, { "acc": 0.94431448, "epoch": 3.642541900197655, "grad_norm": 12.725791931152344, "learning_rate": 2.1772990912678619e-07, "loss": 0.40611849, "memory(GiB)": 34.88, "step": 134530, "train_speed(iter/s)": 0.411122 }, { "acc": 0.94797144, "epoch": 3.642677280480871, "grad_norm": 10.90439510345459, "learning_rate": 2.1756697114183705e-07, "loss": 0.3533551, "memory(GiB)": 34.88, "step": 134535, "train_speed(iter/s)": 0.411123 }, { "acc": 0.95098248, "epoch": 3.6428126607640863, "grad_norm": 6.046647071838379, "learning_rate": 2.174040930725212e-07, "loss": 0.3440268, "memory(GiB)": 34.88, "step": 134540, "train_speed(iter/s)": 0.411124 }, { "acc": 0.95180759, "epoch": 3.642948041047302, "grad_norm": 3.9355967044830322, "learning_rate": 2.1724127492088124e-07, "loss": 0.26196609, "memory(GiB)": 34.88, "step": 134545, "train_speed(iter/s)": 0.411124 }, { "acc": 0.92846394, "epoch": 3.6430834213305174, "grad_norm": 8.544679641723633, "learning_rate": 2.170785166889559e-07, "loss": 0.43963776, "memory(GiB)": 34.88, "step": 134550, "train_speed(iter/s)": 0.411125 }, { "acc": 0.94226055, "epoch": 3.643218801613733, "grad_norm": 7.956403732299805, "learning_rate": 2.169158183787844e-07, "loss": 0.37838283, "memory(GiB)": 34.88, "step": 134555, "train_speed(iter/s)": 0.411126 }, { "acc": 0.9482585, "epoch": 3.6433541818969486, "grad_norm": 7.049310684204102, "learning_rate": 2.1675317999240275e-07, "loss": 0.30189261, "memory(GiB)": 34.88, "step": 134560, "train_speed(iter/s)": 0.411127 }, { "acc": 0.95862198, "epoch": 3.643489562180164, "grad_norm": 3.896540880203247, "learning_rate": 2.1659060153185185e-07, "loss": 0.29513025, "memory(GiB)": 34.88, "step": 134565, "train_speed(iter/s)": 0.411128 }, { "acc": 0.95469398, "epoch": 3.6436249424633798, "grad_norm": 13.528070449829102, "learning_rate": 2.164280829991666e-07, "loss": 0.24910426, "memory(GiB)": 34.88, "step": 134570, "train_speed(iter/s)": 0.411128 }, { "acc": 0.94291, "epoch": 3.643760322746595, "grad_norm": 16.828937530517578, "learning_rate": 2.162656243963812e-07, "loss": 0.39643872, "memory(GiB)": 34.88, "step": 134575, "train_speed(iter/s)": 0.411129 }, { "acc": 0.94916096, "epoch": 3.643895703029811, "grad_norm": 9.655667304992676, "learning_rate": 2.1610322572553384e-07, "loss": 0.30471802, "memory(GiB)": 34.88, "step": 134580, "train_speed(iter/s)": 0.41113 }, { "acc": 0.94971552, "epoch": 3.6440310833130263, "grad_norm": 4.799095630645752, "learning_rate": 2.1594088698865656e-07, "loss": 0.33720336, "memory(GiB)": 34.88, "step": 134585, "train_speed(iter/s)": 0.411131 }, { "acc": 0.95662718, "epoch": 3.6441664635962416, "grad_norm": 12.345544815063477, "learning_rate": 2.1577860818778482e-07, "loss": 0.25442872, "memory(GiB)": 34.88, "step": 134590, "train_speed(iter/s)": 0.411132 }, { "acc": 0.96056471, "epoch": 3.6443018438794574, "grad_norm": 5.394589424133301, "learning_rate": 2.1561638932494894e-07, "loss": 0.24259207, "memory(GiB)": 34.88, "step": 134595, "train_speed(iter/s)": 0.411132 }, { "acc": 0.95598116, "epoch": 3.644437224162673, "grad_norm": 12.820172309875488, "learning_rate": 2.154542304021838e-07, "loss": 0.28210557, "memory(GiB)": 34.88, "step": 134600, "train_speed(iter/s)": 0.411133 }, { "acc": 0.94727459, "epoch": 3.6445726044458886, "grad_norm": 5.306518077850342, "learning_rate": 2.1529213142151921e-07, "loss": 0.30761225, "memory(GiB)": 34.88, "step": 134605, "train_speed(iter/s)": 0.411134 }, { "acc": 0.95148754, "epoch": 3.644707984729104, "grad_norm": 4.699095249176025, "learning_rate": 2.1513009238498562e-07, "loss": 0.26395521, "memory(GiB)": 34.88, "step": 134610, "train_speed(iter/s)": 0.411135 }, { "acc": 0.95921001, "epoch": 3.6448433650123198, "grad_norm": 9.224287986755371, "learning_rate": 2.149681132946145e-07, "loss": 0.23303328, "memory(GiB)": 34.88, "step": 134615, "train_speed(iter/s)": 0.411135 }, { "acc": 0.93894949, "epoch": 3.644978745295535, "grad_norm": 9.92462158203125, "learning_rate": 2.1480619415243233e-07, "loss": 0.34894862, "memory(GiB)": 34.88, "step": 134620, "train_speed(iter/s)": 0.411136 }, { "acc": 0.95619659, "epoch": 3.6451141255787505, "grad_norm": 3.214552402496338, "learning_rate": 2.1464433496047064e-07, "loss": 0.26169581, "memory(GiB)": 34.88, "step": 134625, "train_speed(iter/s)": 0.411137 }, { "acc": 0.95150728, "epoch": 3.6452495058619663, "grad_norm": 3.7765331268310547, "learning_rate": 2.144825357207532e-07, "loss": 0.31049819, "memory(GiB)": 34.88, "step": 134630, "train_speed(iter/s)": 0.411138 }, { "acc": 0.951964, "epoch": 3.645384886145182, "grad_norm": 10.66308879852295, "learning_rate": 2.1432079643531088e-07, "loss": 0.33151507, "memory(GiB)": 34.88, "step": 134635, "train_speed(iter/s)": 0.411138 }, { "acc": 0.96638317, "epoch": 3.6455202664283974, "grad_norm": 4.034638404846191, "learning_rate": 2.141591171061669e-07, "loss": 0.17392178, "memory(GiB)": 34.88, "step": 134640, "train_speed(iter/s)": 0.411139 }, { "acc": 0.9619545, "epoch": 3.645655646711613, "grad_norm": 3.3722476959228516, "learning_rate": 2.1399749773534782e-07, "loss": 0.24423018, "memory(GiB)": 34.88, "step": 134645, "train_speed(iter/s)": 0.41114 }, { "acc": 0.96335745, "epoch": 3.6457910269948286, "grad_norm": 5.652368068695068, "learning_rate": 2.1383593832487784e-07, "loss": 0.21945779, "memory(GiB)": 34.88, "step": 134650, "train_speed(iter/s)": 0.411141 }, { "acc": 0.93607597, "epoch": 3.645926407278044, "grad_norm": 4.186197757720947, "learning_rate": 2.136744388767819e-07, "loss": 0.39540572, "memory(GiB)": 34.88, "step": 134655, "train_speed(iter/s)": 0.411141 }, { "acc": 0.95532017, "epoch": 3.6460617875612593, "grad_norm": 4.123274803161621, "learning_rate": 2.1351299939308196e-07, "loss": 0.29694858, "memory(GiB)": 34.88, "step": 134660, "train_speed(iter/s)": 0.411142 }, { "acc": 0.95803165, "epoch": 3.646197167844475, "grad_norm": 4.315192699432373, "learning_rate": 2.1335161987579963e-07, "loss": 0.28381305, "memory(GiB)": 34.88, "step": 134665, "train_speed(iter/s)": 0.411143 }, { "acc": 0.94016533, "epoch": 3.646332548127691, "grad_norm": 5.097430229187012, "learning_rate": 2.131903003269586e-07, "loss": 0.33478844, "memory(GiB)": 34.88, "step": 134670, "train_speed(iter/s)": 0.411144 }, { "acc": 0.95255594, "epoch": 3.6464679284109063, "grad_norm": 3.5200634002685547, "learning_rate": 2.1302904074857813e-07, "loss": 0.2605433, "memory(GiB)": 34.88, "step": 134675, "train_speed(iter/s)": 0.411145 }, { "acc": 0.94657946, "epoch": 3.6466033086941216, "grad_norm": 1.9944285154342651, "learning_rate": 2.128678411426787e-07, "loss": 0.35167065, "memory(GiB)": 34.88, "step": 134680, "train_speed(iter/s)": 0.411145 }, { "acc": 0.95486393, "epoch": 3.6467386889773374, "grad_norm": 3.739377498626709, "learning_rate": 2.1270670151127957e-07, "loss": 0.26458359, "memory(GiB)": 34.88, "step": 134685, "train_speed(iter/s)": 0.411146 }, { "acc": 0.94771032, "epoch": 3.646874069260553, "grad_norm": 5.541521072387695, "learning_rate": 2.1254562185640002e-07, "loss": 0.35360723, "memory(GiB)": 34.88, "step": 134690, "train_speed(iter/s)": 0.411147 }, { "acc": 0.94742174, "epoch": 3.6470094495437686, "grad_norm": 9.02035903930664, "learning_rate": 2.1238460218005768e-07, "loss": 0.2953841, "memory(GiB)": 34.88, "step": 134695, "train_speed(iter/s)": 0.411147 }, { "acc": 0.94723148, "epoch": 3.647144829826984, "grad_norm": 11.170797348022461, "learning_rate": 2.1222364248426799e-07, "loss": 0.31288221, "memory(GiB)": 34.88, "step": 134700, "train_speed(iter/s)": 0.411148 }, { "acc": 0.95520639, "epoch": 3.6472802101101998, "grad_norm": 5.273415565490723, "learning_rate": 2.1206274277105018e-07, "loss": 0.28824825, "memory(GiB)": 34.88, "step": 134705, "train_speed(iter/s)": 0.411149 }, { "acc": 0.95327883, "epoch": 3.647415590393415, "grad_norm": 12.383702278137207, "learning_rate": 2.1190190304241698e-07, "loss": 0.27705827, "memory(GiB)": 34.88, "step": 134710, "train_speed(iter/s)": 0.41115 }, { "acc": 0.95709972, "epoch": 3.6475509706766305, "grad_norm": 4.286202430725098, "learning_rate": 2.117411233003848e-07, "loss": 0.28469005, "memory(GiB)": 34.88, "step": 134715, "train_speed(iter/s)": 0.41115 }, { "acc": 0.92750721, "epoch": 3.6476863509598463, "grad_norm": 16.76424789428711, "learning_rate": 2.11580403546968e-07, "loss": 0.39345856, "memory(GiB)": 34.88, "step": 134720, "train_speed(iter/s)": 0.411151 }, { "acc": 0.95494404, "epoch": 3.6478217312430616, "grad_norm": 4.933173179626465, "learning_rate": 2.114197437841792e-07, "loss": 0.26543727, "memory(GiB)": 34.88, "step": 134725, "train_speed(iter/s)": 0.411152 }, { "acc": 0.95752201, "epoch": 3.6479571115262774, "grad_norm": 10.052264213562012, "learning_rate": 2.1125914401403107e-07, "loss": 0.22117524, "memory(GiB)": 34.88, "step": 134730, "train_speed(iter/s)": 0.411153 }, { "acc": 0.95843267, "epoch": 3.648092491809493, "grad_norm": 4.219227313995361, "learning_rate": 2.1109860423853506e-07, "loss": 0.25375886, "memory(GiB)": 34.88, "step": 134735, "train_speed(iter/s)": 0.411154 }, { "acc": 0.93127327, "epoch": 3.6482278720927086, "grad_norm": 6.42020845413208, "learning_rate": 2.1093812445970332e-07, "loss": 0.47288723, "memory(GiB)": 34.88, "step": 134740, "train_speed(iter/s)": 0.411154 }, { "acc": 0.9588316, "epoch": 3.648363252375924, "grad_norm": 2.3924784660339355, "learning_rate": 2.107777046795451e-07, "loss": 0.28709402, "memory(GiB)": 34.88, "step": 134745, "train_speed(iter/s)": 0.411155 }, { "acc": 0.93326979, "epoch": 3.6484986326591393, "grad_norm": 13.435731887817383, "learning_rate": 2.1061734490007083e-07, "loss": 0.41464739, "memory(GiB)": 34.88, "step": 134750, "train_speed(iter/s)": 0.411156 }, { "acc": 0.93501244, "epoch": 3.648634012942355, "grad_norm": 10.134330749511719, "learning_rate": 2.1045704512328815e-07, "loss": 0.36178887, "memory(GiB)": 34.88, "step": 134755, "train_speed(iter/s)": 0.411157 }, { "acc": 0.95605946, "epoch": 3.6487693932255705, "grad_norm": 7.891111850738525, "learning_rate": 2.1029680535120694e-07, "loss": 0.31458747, "memory(GiB)": 34.88, "step": 134760, "train_speed(iter/s)": 0.411158 }, { "acc": 0.93457794, "epoch": 3.6489047735087863, "grad_norm": 11.20362377166748, "learning_rate": 2.1013662558583256e-07, "loss": 0.4497551, "memory(GiB)": 34.88, "step": 134765, "train_speed(iter/s)": 0.411159 }, { "acc": 0.9517168, "epoch": 3.6490401537920016, "grad_norm": 2.970027208328247, "learning_rate": 2.099765058291727e-07, "loss": 0.30484765, "memory(GiB)": 34.88, "step": 134770, "train_speed(iter/s)": 0.41116 }, { "acc": 0.95296144, "epoch": 3.6491755340752174, "grad_norm": 2.930586576461792, "learning_rate": 2.0981644608323336e-07, "loss": 0.35095689, "memory(GiB)": 34.88, "step": 134775, "train_speed(iter/s)": 0.41116 }, { "acc": 0.9418047, "epoch": 3.649310914358433, "grad_norm": 6.259834289550781, "learning_rate": 2.096564463500193e-07, "loss": 0.34478896, "memory(GiB)": 34.88, "step": 134780, "train_speed(iter/s)": 0.411161 }, { "acc": 0.94051533, "epoch": 3.649446294641648, "grad_norm": 7.867066383361816, "learning_rate": 2.0949650663153434e-07, "loss": 0.34435463, "memory(GiB)": 34.88, "step": 134785, "train_speed(iter/s)": 0.411162 }, { "acc": 0.94645519, "epoch": 3.649581674924864, "grad_norm": 6.4828643798828125, "learning_rate": 2.0933662692978223e-07, "loss": 0.3034862, "memory(GiB)": 34.88, "step": 134790, "train_speed(iter/s)": 0.411163 }, { "acc": 0.95489788, "epoch": 3.6497170552080798, "grad_norm": 3.9748055934906006, "learning_rate": 2.0917680724676673e-07, "loss": 0.26124861, "memory(GiB)": 34.88, "step": 134795, "train_speed(iter/s)": 0.411164 }, { "acc": 0.94856377, "epoch": 3.649852435491295, "grad_norm": 5.701038837432861, "learning_rate": 2.0901704758448874e-07, "loss": 0.29750516, "memory(GiB)": 34.88, "step": 134800, "train_speed(iter/s)": 0.411164 }, { "acc": 0.95920277, "epoch": 3.6499878157745105, "grad_norm": 5.659427165985107, "learning_rate": 2.0885734794494992e-07, "loss": 0.24671376, "memory(GiB)": 34.88, "step": 134805, "train_speed(iter/s)": 0.411165 }, { "acc": 0.93967142, "epoch": 3.6501231960577263, "grad_norm": 7.189188480377197, "learning_rate": 2.086977083301506e-07, "loss": 0.39956226, "memory(GiB)": 34.88, "step": 134810, "train_speed(iter/s)": 0.411166 }, { "acc": 0.93728456, "epoch": 3.6502585763409416, "grad_norm": 12.8103609085083, "learning_rate": 2.0853812874209178e-07, "loss": 0.39483299, "memory(GiB)": 34.88, "step": 134815, "train_speed(iter/s)": 0.411167 }, { "acc": 0.94567947, "epoch": 3.650393956624157, "grad_norm": 4.638731479644775, "learning_rate": 2.0837860918277113e-07, "loss": 0.3395277, "memory(GiB)": 34.88, "step": 134820, "train_speed(iter/s)": 0.411168 }, { "acc": 0.95086775, "epoch": 3.650529336907373, "grad_norm": 5.297140121459961, "learning_rate": 2.0821914965418682e-07, "loss": 0.27704303, "memory(GiB)": 34.88, "step": 134825, "train_speed(iter/s)": 0.411169 }, { "acc": 0.96091442, "epoch": 3.6506647171905886, "grad_norm": 3.6197633743286133, "learning_rate": 2.080597501583376e-07, "loss": 0.2334209, "memory(GiB)": 34.88, "step": 134830, "train_speed(iter/s)": 0.411169 }, { "acc": 0.95365963, "epoch": 3.650800097473804, "grad_norm": 9.77353286743164, "learning_rate": 2.0790041069721948e-07, "loss": 0.27232361, "memory(GiB)": 34.88, "step": 134835, "train_speed(iter/s)": 0.41117 }, { "acc": 0.95794888, "epoch": 3.6509354777570193, "grad_norm": 7.323358058929443, "learning_rate": 2.0774113127282842e-07, "loss": 0.31465735, "memory(GiB)": 34.88, "step": 134840, "train_speed(iter/s)": 0.411171 }, { "acc": 0.95470142, "epoch": 3.651070858040235, "grad_norm": 6.184672832489014, "learning_rate": 2.0758191188715984e-07, "loss": 0.29473817, "memory(GiB)": 34.88, "step": 134845, "train_speed(iter/s)": 0.411172 }, { "acc": 0.94187994, "epoch": 3.6512062383234505, "grad_norm": 20.348690032958984, "learning_rate": 2.074227525422092e-07, "loss": 0.3732924, "memory(GiB)": 34.88, "step": 134850, "train_speed(iter/s)": 0.411172 }, { "acc": 0.96623974, "epoch": 3.6513416186066663, "grad_norm": 5.81911039352417, "learning_rate": 2.072636532399686e-07, "loss": 0.15282679, "memory(GiB)": 34.88, "step": 134855, "train_speed(iter/s)": 0.411173 }, { "acc": 0.95521746, "epoch": 3.6514769988898816, "grad_norm": 4.823047161102295, "learning_rate": 2.071046139824323e-07, "loss": 0.24067154, "memory(GiB)": 34.88, "step": 134860, "train_speed(iter/s)": 0.411174 }, { "acc": 0.96459236, "epoch": 3.6516123791730974, "grad_norm": 4.403629779815674, "learning_rate": 2.0694563477159246e-07, "loss": 0.16141353, "memory(GiB)": 34.88, "step": 134865, "train_speed(iter/s)": 0.411175 }, { "acc": 0.95249872, "epoch": 3.651747759456313, "grad_norm": 6.018732070922852, "learning_rate": 2.0678671560944e-07, "loss": 0.28512814, "memory(GiB)": 34.88, "step": 134870, "train_speed(iter/s)": 0.411176 }, { "acc": 0.94319954, "epoch": 3.651883139739528, "grad_norm": 5.31376314163208, "learning_rate": 2.0662785649796595e-07, "loss": 0.30808568, "memory(GiB)": 34.88, "step": 134875, "train_speed(iter/s)": 0.411177 }, { "acc": 0.9411417, "epoch": 3.652018520022744, "grad_norm": 14.936750411987305, "learning_rate": 2.064690574391602e-07, "loss": 0.39043329, "memory(GiB)": 34.88, "step": 134880, "train_speed(iter/s)": 0.411177 }, { "acc": 0.94339485, "epoch": 3.6521539003059593, "grad_norm": 4.313669681549072, "learning_rate": 2.0631031843501316e-07, "loss": 0.38061035, "memory(GiB)": 34.88, "step": 134885, "train_speed(iter/s)": 0.411178 }, { "acc": 0.93917456, "epoch": 3.652289280589175, "grad_norm": 7.033809661865234, "learning_rate": 2.0615163948751192e-07, "loss": 0.3608871, "memory(GiB)": 34.88, "step": 134890, "train_speed(iter/s)": 0.411179 }, { "acc": 0.95575695, "epoch": 3.6524246608723905, "grad_norm": 8.677000045776367, "learning_rate": 2.059930205986447e-07, "loss": 0.28765368, "memory(GiB)": 34.88, "step": 134895, "train_speed(iter/s)": 0.41118 }, { "acc": 0.95012541, "epoch": 3.6525600411556063, "grad_norm": 6.81598424911499, "learning_rate": 2.058344617703986e-07, "loss": 0.26984057, "memory(GiB)": 34.88, "step": 134900, "train_speed(iter/s)": 0.411181 }, { "acc": 0.94714327, "epoch": 3.6526954214388216, "grad_norm": 7.713114261627197, "learning_rate": 2.0567596300476074e-07, "loss": 0.3403461, "memory(GiB)": 34.88, "step": 134905, "train_speed(iter/s)": 0.411181 }, { "acc": 0.94428101, "epoch": 3.652830801722037, "grad_norm": 3.4260942935943604, "learning_rate": 2.0551752430371483e-07, "loss": 0.34032845, "memory(GiB)": 34.88, "step": 134910, "train_speed(iter/s)": 0.411182 }, { "acc": 0.95133953, "epoch": 3.652966182005253, "grad_norm": 31.835485458374023, "learning_rate": 2.0535914566924637e-07, "loss": 0.26483202, "memory(GiB)": 34.88, "step": 134915, "train_speed(iter/s)": 0.411183 }, { "acc": 0.95007992, "epoch": 3.653101562288468, "grad_norm": 5.649110794067383, "learning_rate": 2.052008271033402e-07, "loss": 0.26503823, "memory(GiB)": 34.88, "step": 134920, "train_speed(iter/s)": 0.411184 }, { "acc": 0.94518776, "epoch": 3.653236942571684, "grad_norm": 3.768259048461914, "learning_rate": 2.0504256860797845e-07, "loss": 0.27518444, "memory(GiB)": 34.88, "step": 134925, "train_speed(iter/s)": 0.411185 }, { "acc": 0.9601223, "epoch": 3.6533723228548993, "grad_norm": 6.5376081466674805, "learning_rate": 2.0488437018514432e-07, "loss": 0.23785651, "memory(GiB)": 34.88, "step": 134930, "train_speed(iter/s)": 0.411186 }, { "acc": 0.93066711, "epoch": 3.653507703138115, "grad_norm": 7.031281471252441, "learning_rate": 2.0472623183681884e-07, "loss": 0.4112536, "memory(GiB)": 34.88, "step": 134935, "train_speed(iter/s)": 0.411187 }, { "acc": 0.94875832, "epoch": 3.6536430834213305, "grad_norm": 5.289365768432617, "learning_rate": 2.0456815356498462e-07, "loss": 0.30889392, "memory(GiB)": 34.88, "step": 134940, "train_speed(iter/s)": 0.411187 }, { "acc": 0.95342026, "epoch": 3.653778463704546, "grad_norm": 6.2813720703125, "learning_rate": 2.0441013537161932e-07, "loss": 0.30582716, "memory(GiB)": 34.88, "step": 134945, "train_speed(iter/s)": 0.411188 }, { "acc": 0.94715042, "epoch": 3.6539138439877616, "grad_norm": 5.096679210662842, "learning_rate": 2.042521772587051e-07, "loss": 0.28934717, "memory(GiB)": 34.88, "step": 134950, "train_speed(iter/s)": 0.411189 }, { "acc": 0.95618229, "epoch": 3.6540492242709774, "grad_norm": 4.787929058074951, "learning_rate": 2.0409427922821906e-07, "loss": 0.27029829, "memory(GiB)": 34.88, "step": 134955, "train_speed(iter/s)": 0.41119 }, { "acc": 0.94682922, "epoch": 3.654184604554193, "grad_norm": 4.0096235275268555, "learning_rate": 2.0393644128213936e-07, "loss": 0.33326857, "memory(GiB)": 34.88, "step": 134960, "train_speed(iter/s)": 0.41119 }, { "acc": 0.94779921, "epoch": 3.654319984837408, "grad_norm": 5.586310863494873, "learning_rate": 2.0377866342244372e-07, "loss": 0.32816162, "memory(GiB)": 34.88, "step": 134965, "train_speed(iter/s)": 0.411191 }, { "acc": 0.94699306, "epoch": 3.654455365120624, "grad_norm": 6.73029088973999, "learning_rate": 2.0362094565110814e-07, "loss": 0.32071686, "memory(GiB)": 34.88, "step": 134970, "train_speed(iter/s)": 0.411192 }, { "acc": 0.94239311, "epoch": 3.6545907454038393, "grad_norm": 7.011577606201172, "learning_rate": 2.0346328797010915e-07, "loss": 0.34597311, "memory(GiB)": 34.88, "step": 134975, "train_speed(iter/s)": 0.411193 }, { "acc": 0.94673891, "epoch": 3.6547261256870547, "grad_norm": 6.752470970153809, "learning_rate": 2.0330569038141938e-07, "loss": 0.33001313, "memory(GiB)": 34.88, "step": 134980, "train_speed(iter/s)": 0.411194 }, { "acc": 0.94059381, "epoch": 3.6548615059702705, "grad_norm": 7.122133731842041, "learning_rate": 2.0314815288701662e-07, "loss": 0.286584, "memory(GiB)": 34.88, "step": 134985, "train_speed(iter/s)": 0.411195 }, { "acc": 0.95867214, "epoch": 3.6549968862534863, "grad_norm": 7.154881477355957, "learning_rate": 2.029906754888723e-07, "loss": 0.22163928, "memory(GiB)": 34.88, "step": 134990, "train_speed(iter/s)": 0.411196 }, { "acc": 0.94993877, "epoch": 3.6551322665367016, "grad_norm": 12.643630027770996, "learning_rate": 2.0283325818895916e-07, "loss": 0.33642094, "memory(GiB)": 34.88, "step": 134995, "train_speed(iter/s)": 0.411197 }, { "acc": 0.95591049, "epoch": 3.655267646819917, "grad_norm": 2.4977636337280273, "learning_rate": 2.0267590098924927e-07, "loss": 0.24438105, "memory(GiB)": 34.88, "step": 135000, "train_speed(iter/s)": 0.411197 }, { "acc": 0.96535187, "epoch": 3.655403027103133, "grad_norm": 8.327327728271484, "learning_rate": 2.0251860389171367e-07, "loss": 0.18329347, "memory(GiB)": 34.88, "step": 135005, "train_speed(iter/s)": 0.411198 }, { "acc": 0.93522224, "epoch": 3.655538407386348, "grad_norm": 18.05319595336914, "learning_rate": 2.0236136689832336e-07, "loss": 0.45512614, "memory(GiB)": 34.88, "step": 135010, "train_speed(iter/s)": 0.411199 }, { "acc": 0.94810295, "epoch": 3.655673787669564, "grad_norm": 14.30274486541748, "learning_rate": 2.0220419001104767e-07, "loss": 0.33201685, "memory(GiB)": 34.88, "step": 135015, "train_speed(iter/s)": 0.4112 }, { "acc": 0.94632874, "epoch": 3.6558091679527793, "grad_norm": 3.2555418014526367, "learning_rate": 2.0204707323185537e-07, "loss": 0.33846903, "memory(GiB)": 34.88, "step": 135020, "train_speed(iter/s)": 0.411201 }, { "acc": 0.94559269, "epoch": 3.655944548235995, "grad_norm": 4.440600395202637, "learning_rate": 2.0189001656271527e-07, "loss": 0.36788681, "memory(GiB)": 34.88, "step": 135025, "train_speed(iter/s)": 0.411201 }, { "acc": 0.95226917, "epoch": 3.6560799285192105, "grad_norm": 8.23298454284668, "learning_rate": 2.0173302000559501e-07, "loss": 0.30129476, "memory(GiB)": 34.88, "step": 135030, "train_speed(iter/s)": 0.411202 }, { "acc": 0.94652042, "epoch": 3.656215308802426, "grad_norm": 4.6083197593688965, "learning_rate": 2.0157608356245954e-07, "loss": 0.29801571, "memory(GiB)": 34.88, "step": 135035, "train_speed(iter/s)": 0.411203 }, { "acc": 0.94597731, "epoch": 3.6563506890856416, "grad_norm": 43.493614196777344, "learning_rate": 2.014192072352765e-07, "loss": 0.34613299, "memory(GiB)": 34.88, "step": 135040, "train_speed(iter/s)": 0.411204 }, { "acc": 0.96606197, "epoch": 3.656486069368857, "grad_norm": 4.264202117919922, "learning_rate": 2.012623910260113e-07, "loss": 0.18414552, "memory(GiB)": 34.88, "step": 135045, "train_speed(iter/s)": 0.411205 }, { "acc": 0.95552111, "epoch": 3.656621449652073, "grad_norm": 5.315098762512207, "learning_rate": 2.0110563493662672e-07, "loss": 0.25950768, "memory(GiB)": 34.88, "step": 135050, "train_speed(iter/s)": 0.411206 }, { "acc": 0.94774933, "epoch": 3.656756829935288, "grad_norm": 4.995849132537842, "learning_rate": 2.009489389690876e-07, "loss": 0.2881952, "memory(GiB)": 34.88, "step": 135055, "train_speed(iter/s)": 0.411206 }, { "acc": 0.9496912, "epoch": 3.656892210218504, "grad_norm": 7.996245861053467, "learning_rate": 2.007923031253566e-07, "loss": 0.26625018, "memory(GiB)": 34.88, "step": 135060, "train_speed(iter/s)": 0.411207 }, { "acc": 0.96185389, "epoch": 3.6570275905017193, "grad_norm": 4.113626003265381, "learning_rate": 2.006357274073964e-07, "loss": 0.25106316, "memory(GiB)": 34.88, "step": 135065, "train_speed(iter/s)": 0.411208 }, { "acc": 0.95963745, "epoch": 3.6571629707849347, "grad_norm": 2.835254669189453, "learning_rate": 2.004792118171664e-07, "loss": 0.26753359, "memory(GiB)": 34.88, "step": 135070, "train_speed(iter/s)": 0.411209 }, { "acc": 0.94141684, "epoch": 3.6572983510681505, "grad_norm": 6.234903812408447, "learning_rate": 2.0032275635663033e-07, "loss": 0.36703668, "memory(GiB)": 34.88, "step": 135075, "train_speed(iter/s)": 0.41121 }, { "acc": 0.94062233, "epoch": 3.657433731351366, "grad_norm": 7.036360263824463, "learning_rate": 2.0016636102774645e-07, "loss": 0.38109689, "memory(GiB)": 34.88, "step": 135080, "train_speed(iter/s)": 0.41121 }, { "acc": 0.95331783, "epoch": 3.6575691116345816, "grad_norm": 5.617995738983154, "learning_rate": 2.0001002583247355e-07, "loss": 0.25193334, "memory(GiB)": 34.88, "step": 135085, "train_speed(iter/s)": 0.411211 }, { "acc": 0.94434681, "epoch": 3.657704491917797, "grad_norm": 5.496021747589111, "learning_rate": 1.9985375077277045e-07, "loss": 0.3587033, "memory(GiB)": 34.88, "step": 135090, "train_speed(iter/s)": 0.411212 }, { "acc": 0.95055285, "epoch": 3.657839872201013, "grad_norm": 13.878334045410156, "learning_rate": 1.9969753585059538e-07, "loss": 0.25520227, "memory(GiB)": 34.88, "step": 135095, "train_speed(iter/s)": 0.411213 }, { "acc": 0.9549181, "epoch": 3.657975252484228, "grad_norm": 9.581221580505371, "learning_rate": 1.9954138106790484e-07, "loss": 0.27498279, "memory(GiB)": 34.88, "step": 135100, "train_speed(iter/s)": 0.411214 }, { "acc": 0.9491497, "epoch": 3.6581106327674435, "grad_norm": 4.239800930023193, "learning_rate": 1.993852864266533e-07, "loss": 0.35372829, "memory(GiB)": 34.88, "step": 135105, "train_speed(iter/s)": 0.411215 }, { "acc": 0.94721079, "epoch": 3.6582460130506593, "grad_norm": 14.02755069732666, "learning_rate": 1.9922925192879945e-07, "loss": 0.28325565, "memory(GiB)": 34.88, "step": 135110, "train_speed(iter/s)": 0.411215 }, { "acc": 0.9474966, "epoch": 3.658381393333875, "grad_norm": 5.968554973602295, "learning_rate": 1.9907327757629602e-07, "loss": 0.35082915, "memory(GiB)": 34.88, "step": 135115, "train_speed(iter/s)": 0.411216 }, { "acc": 0.95007181, "epoch": 3.6585167736170905, "grad_norm": 3.7981624603271484, "learning_rate": 1.9891736337109683e-07, "loss": 0.29477928, "memory(GiB)": 34.88, "step": 135120, "train_speed(iter/s)": 0.411217 }, { "acc": 0.95910616, "epoch": 3.658652153900306, "grad_norm": 3.2998478412628174, "learning_rate": 1.9876150931515508e-07, "loss": 0.23372793, "memory(GiB)": 34.88, "step": 135125, "train_speed(iter/s)": 0.411218 }, { "acc": 0.94525614, "epoch": 3.6587875341835217, "grad_norm": 8.198424339294434, "learning_rate": 1.986057154104229e-07, "loss": 0.29345829, "memory(GiB)": 34.88, "step": 135130, "train_speed(iter/s)": 0.411219 }, { "acc": 0.94877529, "epoch": 3.658922914466737, "grad_norm": 7.0275044441223145, "learning_rate": 1.9844998165885358e-07, "loss": 0.29845934, "memory(GiB)": 34.88, "step": 135135, "train_speed(iter/s)": 0.411219 }, { "acc": 0.94823132, "epoch": 3.6590582947499524, "grad_norm": 7.2151618003845215, "learning_rate": 1.9829430806239477e-07, "loss": 0.34275723, "memory(GiB)": 34.88, "step": 135140, "train_speed(iter/s)": 0.41122 }, { "acc": 0.96302948, "epoch": 3.659193675033168, "grad_norm": 4.960474014282227, "learning_rate": 1.981386946230003e-07, "loss": 0.22732921, "memory(GiB)": 34.88, "step": 135145, "train_speed(iter/s)": 0.411221 }, { "acc": 0.95459538, "epoch": 3.659329055316384, "grad_norm": 7.6149373054504395, "learning_rate": 1.979831413426173e-07, "loss": 0.26415725, "memory(GiB)": 34.88, "step": 135150, "train_speed(iter/s)": 0.411222 }, { "acc": 0.94528599, "epoch": 3.6594644355995993, "grad_norm": 5.033334732055664, "learning_rate": 1.9782764822319507e-07, "loss": 0.25338087, "memory(GiB)": 34.88, "step": 135155, "train_speed(iter/s)": 0.411223 }, { "acc": 0.93462257, "epoch": 3.6595998158828147, "grad_norm": 10.29355239868164, "learning_rate": 1.9767221526668135e-07, "loss": 0.46384068, "memory(GiB)": 34.88, "step": 135160, "train_speed(iter/s)": 0.411223 }, { "acc": 0.96478014, "epoch": 3.6597351961660305, "grad_norm": 6.308042049407959, "learning_rate": 1.9751684247502276e-07, "loss": 0.2135149, "memory(GiB)": 34.88, "step": 135165, "train_speed(iter/s)": 0.411224 }, { "acc": 0.94215126, "epoch": 3.659870576449246, "grad_norm": 9.855655670166016, "learning_rate": 1.9736152985016636e-07, "loss": 0.36222239, "memory(GiB)": 34.88, "step": 135170, "train_speed(iter/s)": 0.411225 }, { "acc": 0.94183388, "epoch": 3.660005956732461, "grad_norm": 6.951233863830566, "learning_rate": 1.9720627739405653e-07, "loss": 0.35773215, "memory(GiB)": 34.88, "step": 135175, "train_speed(iter/s)": 0.411226 }, { "acc": 0.95506611, "epoch": 3.660141337015677, "grad_norm": 4.794818878173828, "learning_rate": 1.9705108510864047e-07, "loss": 0.28294425, "memory(GiB)": 34.88, "step": 135180, "train_speed(iter/s)": 0.411227 }, { "acc": 0.9458168, "epoch": 3.660276717298893, "grad_norm": 3.3579490184783936, "learning_rate": 1.9689595299585966e-07, "loss": 0.36279118, "memory(GiB)": 34.88, "step": 135185, "train_speed(iter/s)": 0.411228 }, { "acc": 0.95099907, "epoch": 3.660412097582108, "grad_norm": 6.185117721557617, "learning_rate": 1.9674088105765967e-07, "loss": 0.33613629, "memory(GiB)": 34.88, "step": 135190, "train_speed(iter/s)": 0.411228 }, { "acc": 0.93651161, "epoch": 3.6605474778653235, "grad_norm": 6.814987659454346, "learning_rate": 1.965858692959809e-07, "loss": 0.44443383, "memory(GiB)": 34.88, "step": 135195, "train_speed(iter/s)": 0.411229 }, { "acc": 0.93908434, "epoch": 3.6606828581485393, "grad_norm": 8.320470809936523, "learning_rate": 1.964309177127672e-07, "loss": 0.35603569, "memory(GiB)": 34.88, "step": 135200, "train_speed(iter/s)": 0.41123 }, { "acc": 0.94176445, "epoch": 3.6608182384317547, "grad_norm": 6.762019634246826, "learning_rate": 1.962760263099585e-07, "loss": 0.37710631, "memory(GiB)": 34.88, "step": 135205, "train_speed(iter/s)": 0.411231 }, { "acc": 0.96384392, "epoch": 3.6609536187149705, "grad_norm": 8.460823059082031, "learning_rate": 1.9612119508949467e-07, "loss": 0.23988926, "memory(GiB)": 34.88, "step": 135210, "train_speed(iter/s)": 0.411232 }, { "acc": 0.95021162, "epoch": 3.661088998998186, "grad_norm": 8.570066452026367, "learning_rate": 1.959664240533168e-07, "loss": 0.3246716, "memory(GiB)": 34.88, "step": 135215, "train_speed(iter/s)": 0.411233 }, { "acc": 0.93350916, "epoch": 3.6612243792814017, "grad_norm": 7.478565692901611, "learning_rate": 1.9581171320336252e-07, "loss": 0.40321636, "memory(GiB)": 34.88, "step": 135220, "train_speed(iter/s)": 0.411233 }, { "acc": 0.95429344, "epoch": 3.661359759564617, "grad_norm": 5.9965596199035645, "learning_rate": 1.9565706254157074e-07, "loss": 0.24273157, "memory(GiB)": 34.88, "step": 135225, "train_speed(iter/s)": 0.411234 }, { "acc": 0.93799896, "epoch": 3.6614951398478324, "grad_norm": 11.206669807434082, "learning_rate": 1.9550247206987685e-07, "loss": 0.37228515, "memory(GiB)": 34.88, "step": 135230, "train_speed(iter/s)": 0.411235 }, { "acc": 0.9337738, "epoch": 3.661630520131048, "grad_norm": 17.129783630371094, "learning_rate": 1.9534794179022026e-07, "loss": 0.40113153, "memory(GiB)": 34.88, "step": 135235, "train_speed(iter/s)": 0.411236 }, { "acc": 0.94383001, "epoch": 3.6617659004142635, "grad_norm": 3.84024715423584, "learning_rate": 1.9519347170453533e-07, "loss": 0.28007083, "memory(GiB)": 34.88, "step": 135240, "train_speed(iter/s)": 0.411237 }, { "acc": 0.94609022, "epoch": 3.6619012806974793, "grad_norm": 33.87697219848633, "learning_rate": 1.9503906181475587e-07, "loss": 0.34397469, "memory(GiB)": 34.88, "step": 135245, "train_speed(iter/s)": 0.411237 }, { "acc": 0.94367695, "epoch": 3.6620366609806947, "grad_norm": 7.028691291809082, "learning_rate": 1.948847121228179e-07, "loss": 0.35101364, "memory(GiB)": 34.88, "step": 135250, "train_speed(iter/s)": 0.411238 }, { "acc": 0.94159174, "epoch": 3.6621720412639105, "grad_norm": 15.089822769165039, "learning_rate": 1.9473042263065472e-07, "loss": 0.33689761, "memory(GiB)": 34.88, "step": 135255, "train_speed(iter/s)": 0.411239 }, { "acc": 0.95292797, "epoch": 3.662307421547126, "grad_norm": 4.881621837615967, "learning_rate": 1.9457619334019902e-07, "loss": 0.27666993, "memory(GiB)": 34.88, "step": 135260, "train_speed(iter/s)": 0.41124 }, { "acc": 0.9454258, "epoch": 3.662442801830341, "grad_norm": 20.89368438720703, "learning_rate": 1.944220242533813e-07, "loss": 0.36649652, "memory(GiB)": 34.88, "step": 135265, "train_speed(iter/s)": 0.411241 }, { "acc": 0.95891829, "epoch": 3.662578182113557, "grad_norm": 28.684106826782227, "learning_rate": 1.9426791537213586e-07, "loss": 0.25785861, "memory(GiB)": 34.88, "step": 135270, "train_speed(iter/s)": 0.411241 }, { "acc": 0.94583473, "epoch": 3.6627135623967724, "grad_norm": 7.166182041168213, "learning_rate": 1.941138666983905e-07, "loss": 0.31269531, "memory(GiB)": 34.88, "step": 135275, "train_speed(iter/s)": 0.411242 }, { "acc": 0.95055065, "epoch": 3.662848942679988, "grad_norm": 5.530120372772217, "learning_rate": 1.939598782340762e-07, "loss": 0.31199799, "memory(GiB)": 34.88, "step": 135280, "train_speed(iter/s)": 0.411243 }, { "acc": 0.96121702, "epoch": 3.6629843229632035, "grad_norm": 2.5668749809265137, "learning_rate": 1.9380594998112235e-07, "loss": 0.18545934, "memory(GiB)": 34.88, "step": 135285, "train_speed(iter/s)": 0.411244 }, { "acc": 0.94752007, "epoch": 3.6631197032464193, "grad_norm": 7.324647426605225, "learning_rate": 1.9365208194145557e-07, "loss": 0.41146946, "memory(GiB)": 34.88, "step": 135290, "train_speed(iter/s)": 0.411245 }, { "acc": 0.94895115, "epoch": 3.6632550835296347, "grad_norm": 6.607586860656738, "learning_rate": 1.9349827411700518e-07, "loss": 0.33123436, "memory(GiB)": 34.88, "step": 135295, "train_speed(iter/s)": 0.411246 }, { "acc": 0.94850349, "epoch": 3.66339046381285, "grad_norm": 5.6491007804870605, "learning_rate": 1.933445265096962e-07, "loss": 0.30292447, "memory(GiB)": 34.88, "step": 135300, "train_speed(iter/s)": 0.411246 }, { "acc": 0.95815849, "epoch": 3.663525844096066, "grad_norm": 6.094893455505371, "learning_rate": 1.9319083912145682e-07, "loss": 0.21668317, "memory(GiB)": 34.88, "step": 135305, "train_speed(iter/s)": 0.411247 }, { "acc": 0.95195532, "epoch": 3.6636612243792817, "grad_norm": 12.627517700195312, "learning_rate": 1.9303721195421034e-07, "loss": 0.34486988, "memory(GiB)": 34.88, "step": 135310, "train_speed(iter/s)": 0.411248 }, { "acc": 0.94938698, "epoch": 3.663796604662497, "grad_norm": 12.95943832397461, "learning_rate": 1.9288364500988168e-07, "loss": 0.26314538, "memory(GiB)": 34.88, "step": 135315, "train_speed(iter/s)": 0.411249 }, { "acc": 0.95482836, "epoch": 3.6639319849457124, "grad_norm": 5.750178337097168, "learning_rate": 1.9273013829039524e-07, "loss": 0.25266142, "memory(GiB)": 34.88, "step": 135320, "train_speed(iter/s)": 0.41125 }, { "acc": 0.95165501, "epoch": 3.664067365228928, "grad_norm": 6.848632335662842, "learning_rate": 1.925766917976737e-07, "loss": 0.28543534, "memory(GiB)": 34.88, "step": 135325, "train_speed(iter/s)": 0.41125 }, { "acc": 0.93861341, "epoch": 3.6642027455121435, "grad_norm": 12.456326484680176, "learning_rate": 1.9242330553363928e-07, "loss": 0.40199971, "memory(GiB)": 34.88, "step": 135330, "train_speed(iter/s)": 0.411251 }, { "acc": 0.94236526, "epoch": 3.664338125795359, "grad_norm": 9.527832984924316, "learning_rate": 1.9226997950021244e-07, "loss": 0.36884904, "memory(GiB)": 34.88, "step": 135335, "train_speed(iter/s)": 0.411252 }, { "acc": 0.95341473, "epoch": 3.6644735060785747, "grad_norm": 2.336801052093506, "learning_rate": 1.9211671369931582e-07, "loss": 0.22556257, "memory(GiB)": 34.88, "step": 135340, "train_speed(iter/s)": 0.411253 }, { "acc": 0.94324026, "epoch": 3.6646088863617905, "grad_norm": 39.258487701416016, "learning_rate": 1.9196350813286728e-07, "loss": 0.42336721, "memory(GiB)": 34.88, "step": 135345, "train_speed(iter/s)": 0.411254 }, { "acc": 0.94223013, "epoch": 3.664744266645006, "grad_norm": 12.459277153015137, "learning_rate": 1.9181036280278777e-07, "loss": 0.3262557, "memory(GiB)": 34.88, "step": 135350, "train_speed(iter/s)": 0.411254 }, { "acc": 0.9553669, "epoch": 3.664879646928221, "grad_norm": 4.3268022537231445, "learning_rate": 1.9165727771099446e-07, "loss": 0.21792645, "memory(GiB)": 34.88, "step": 135355, "train_speed(iter/s)": 0.411255 }, { "acc": 0.93713284, "epoch": 3.665015027211437, "grad_norm": 7.284664630889893, "learning_rate": 1.9150425285940623e-07, "loss": 0.37153809, "memory(GiB)": 34.88, "step": 135360, "train_speed(iter/s)": 0.411256 }, { "acc": 0.93893719, "epoch": 3.6651504074946524, "grad_norm": 9.071738243103027, "learning_rate": 1.9135128824993964e-07, "loss": 0.42061667, "memory(GiB)": 34.88, "step": 135365, "train_speed(iter/s)": 0.411257 }, { "acc": 0.94667978, "epoch": 3.665285787777868, "grad_norm": 10.43149471282959, "learning_rate": 1.9119838388450968e-07, "loss": 0.28058975, "memory(GiB)": 34.88, "step": 135370, "train_speed(iter/s)": 0.411258 }, { "acc": 0.94806786, "epoch": 3.6654211680610835, "grad_norm": 9.8099365234375, "learning_rate": 1.9104553976503346e-07, "loss": 0.35887418, "memory(GiB)": 34.88, "step": 135375, "train_speed(iter/s)": 0.411259 }, { "acc": 0.95725269, "epoch": 3.6655565483442993, "grad_norm": 7.435429096221924, "learning_rate": 1.9089275589342425e-07, "loss": 0.25095747, "memory(GiB)": 34.88, "step": 135380, "train_speed(iter/s)": 0.411259 }, { "acc": 0.95138226, "epoch": 3.6656919286275147, "grad_norm": 5.769678592681885, "learning_rate": 1.9074003227159763e-07, "loss": 0.31644826, "memory(GiB)": 34.88, "step": 135385, "train_speed(iter/s)": 0.41126 }, { "acc": 0.94758081, "epoch": 3.66582730891073, "grad_norm": 14.382177352905273, "learning_rate": 1.9058736890146405e-07, "loss": 0.33676143, "memory(GiB)": 34.88, "step": 135390, "train_speed(iter/s)": 0.411261 }, { "acc": 0.93809547, "epoch": 3.665962689193946, "grad_norm": 10.600903511047363, "learning_rate": 1.9043476578493955e-07, "loss": 0.39181952, "memory(GiB)": 34.88, "step": 135395, "train_speed(iter/s)": 0.411262 }, { "acc": 0.95858784, "epoch": 3.666098069477161, "grad_norm": 5.540271282196045, "learning_rate": 1.90282222923933e-07, "loss": 0.24813781, "memory(GiB)": 34.88, "step": 135400, "train_speed(iter/s)": 0.411263 }, { "acc": 0.95906906, "epoch": 3.666233449760377, "grad_norm": 7.983791351318359, "learning_rate": 1.9012974032035597e-07, "loss": 0.21235113, "memory(GiB)": 34.88, "step": 135405, "train_speed(iter/s)": 0.411263 }, { "acc": 0.9504343, "epoch": 3.6663688300435924, "grad_norm": 4.715768337249756, "learning_rate": 1.89977317976119e-07, "loss": 0.3372613, "memory(GiB)": 34.88, "step": 135410, "train_speed(iter/s)": 0.411264 }, { "acc": 0.95678158, "epoch": 3.666504210326808, "grad_norm": 3.528103828430176, "learning_rate": 1.8982495589313146e-07, "loss": 0.2259758, "memory(GiB)": 34.88, "step": 135415, "train_speed(iter/s)": 0.411265 }, { "acc": 0.94326496, "epoch": 3.6666395906100235, "grad_norm": 4.564966201782227, "learning_rate": 1.896726540733022e-07, "loss": 0.35352976, "memory(GiB)": 34.88, "step": 135420, "train_speed(iter/s)": 0.411266 }, { "acc": 0.93921547, "epoch": 3.666774970893239, "grad_norm": 4.405358791351318, "learning_rate": 1.895204125185373e-07, "loss": 0.38314579, "memory(GiB)": 34.88, "step": 135425, "train_speed(iter/s)": 0.411267 }, { "acc": 0.93513298, "epoch": 3.6669103511764547, "grad_norm": 11.339856147766113, "learning_rate": 1.8936823123074667e-07, "loss": 0.37818577, "memory(GiB)": 34.88, "step": 135430, "train_speed(iter/s)": 0.411267 }, { "acc": 0.94681768, "epoch": 3.66704573145967, "grad_norm": 7.7345709800720215, "learning_rate": 1.8921611021183478e-07, "loss": 0.28731952, "memory(GiB)": 34.88, "step": 135435, "train_speed(iter/s)": 0.411268 }, { "acc": 0.95224371, "epoch": 3.667181111742886, "grad_norm": 4.035196304321289, "learning_rate": 1.890640494637076e-07, "loss": 0.31220865, "memory(GiB)": 34.88, "step": 135440, "train_speed(iter/s)": 0.411269 }, { "acc": 0.94662552, "epoch": 3.667316492026101, "grad_norm": 6.548027515411377, "learning_rate": 1.8891204898827009e-07, "loss": 0.29918194, "memory(GiB)": 34.88, "step": 135445, "train_speed(iter/s)": 0.41127 }, { "acc": 0.95462093, "epoch": 3.667451872309317, "grad_norm": 4.689194679260254, "learning_rate": 1.8876010878742673e-07, "loss": 0.26889737, "memory(GiB)": 34.88, "step": 135450, "train_speed(iter/s)": 0.411271 }, { "acc": 0.94342108, "epoch": 3.6675872525925324, "grad_norm": 10.428214073181152, "learning_rate": 1.8860822886308128e-07, "loss": 0.39098933, "memory(GiB)": 34.88, "step": 135455, "train_speed(iter/s)": 0.411271 }, { "acc": 0.93946533, "epoch": 3.6677226328757477, "grad_norm": 5.223613262176514, "learning_rate": 1.8845640921713375e-07, "loss": 0.38553908, "memory(GiB)": 34.88, "step": 135460, "train_speed(iter/s)": 0.411272 }, { "acc": 0.94721737, "epoch": 3.6678580131589635, "grad_norm": 8.04998779296875, "learning_rate": 1.8830464985148963e-07, "loss": 0.37441747, "memory(GiB)": 34.88, "step": 135465, "train_speed(iter/s)": 0.411273 }, { "acc": 0.95200558, "epoch": 3.6679933934421793, "grad_norm": 6.49941873550415, "learning_rate": 1.8815295076804716e-07, "loss": 0.26122553, "memory(GiB)": 34.88, "step": 135470, "train_speed(iter/s)": 0.411274 }, { "acc": 0.95770969, "epoch": 3.6681287737253947, "grad_norm": 10.613141059875488, "learning_rate": 1.8800131196870805e-07, "loss": 0.28121934, "memory(GiB)": 34.88, "step": 135475, "train_speed(iter/s)": 0.411275 }, { "acc": 0.94780159, "epoch": 3.66826415400861, "grad_norm": 10.561816215515137, "learning_rate": 1.8784973345537107e-07, "loss": 0.32311361, "memory(GiB)": 34.88, "step": 135480, "train_speed(iter/s)": 0.411275 }, { "acc": 0.96539326, "epoch": 3.668399534291826, "grad_norm": 5.912605285644531, "learning_rate": 1.876982152299368e-07, "loss": 0.20226307, "memory(GiB)": 34.88, "step": 135485, "train_speed(iter/s)": 0.411276 }, { "acc": 0.95533981, "epoch": 3.668534914575041, "grad_norm": 6.682702541351318, "learning_rate": 1.875467572943007e-07, "loss": 0.29289122, "memory(GiB)": 34.88, "step": 135490, "train_speed(iter/s)": 0.411277 }, { "acc": 0.94295444, "epoch": 3.6686702948582566, "grad_norm": 5.244601249694824, "learning_rate": 1.8739535965036222e-07, "loss": 0.41688108, "memory(GiB)": 34.88, "step": 135495, "train_speed(iter/s)": 0.411278 }, { "acc": 0.94590187, "epoch": 3.6688056751414724, "grad_norm": 6.365204811096191, "learning_rate": 1.872440223000168e-07, "loss": 0.29307678, "memory(GiB)": 34.88, "step": 135500, "train_speed(iter/s)": 0.411279 }, { "acc": 0.94738445, "epoch": 3.668941055424688, "grad_norm": 6.092159271240234, "learning_rate": 1.8709274524516063e-07, "loss": 0.32786665, "memory(GiB)": 34.88, "step": 135505, "train_speed(iter/s)": 0.41128 }, { "acc": 0.94543943, "epoch": 3.6690764357079035, "grad_norm": 9.459278106689453, "learning_rate": 1.8694152848768855e-07, "loss": 0.27823584, "memory(GiB)": 34.88, "step": 135510, "train_speed(iter/s)": 0.41128 }, { "acc": 0.95063725, "epoch": 3.669211815991119, "grad_norm": 8.489108085632324, "learning_rate": 1.8679037202949445e-07, "loss": 0.31665678, "memory(GiB)": 34.88, "step": 135515, "train_speed(iter/s)": 0.411281 }, { "acc": 0.94106331, "epoch": 3.6693471962743347, "grad_norm": 8.632222175598145, "learning_rate": 1.8663927587247337e-07, "loss": 0.34860537, "memory(GiB)": 34.88, "step": 135520, "train_speed(iter/s)": 0.411282 }, { "acc": 0.95258484, "epoch": 3.66948257655755, "grad_norm": 11.082093238830566, "learning_rate": 1.8648824001851627e-07, "loss": 0.27045183, "memory(GiB)": 34.88, "step": 135525, "train_speed(iter/s)": 0.411283 }, { "acc": 0.93771582, "epoch": 3.669617956840766, "grad_norm": 13.502580642700195, "learning_rate": 1.8633726446951652e-07, "loss": 0.3828073, "memory(GiB)": 34.88, "step": 135530, "train_speed(iter/s)": 0.411284 }, { "acc": 0.93723717, "epoch": 3.669753337123981, "grad_norm": 5.123079776763916, "learning_rate": 1.8618634922736516e-07, "loss": 0.42075934, "memory(GiB)": 34.88, "step": 135535, "train_speed(iter/s)": 0.411284 }, { "acc": 0.95030422, "epoch": 3.669888717407197, "grad_norm": 2.97851300239563, "learning_rate": 1.860354942939516e-07, "loss": 0.30815268, "memory(GiB)": 34.88, "step": 135540, "train_speed(iter/s)": 0.411285 }, { "acc": 0.94679756, "epoch": 3.6700240976904124, "grad_norm": 6.24876594543457, "learning_rate": 1.8588469967116643e-07, "loss": 0.28073258, "memory(GiB)": 34.88, "step": 135545, "train_speed(iter/s)": 0.411286 }, { "acc": 0.94490824, "epoch": 3.6701594779736277, "grad_norm": 3.734873056411743, "learning_rate": 1.857339653608984e-07, "loss": 0.33745012, "memory(GiB)": 34.88, "step": 135550, "train_speed(iter/s)": 0.411287 }, { "acc": 0.96121731, "epoch": 3.6702948582568435, "grad_norm": 2.722984790802002, "learning_rate": 1.8558329136503697e-07, "loss": 0.19664164, "memory(GiB)": 34.88, "step": 135555, "train_speed(iter/s)": 0.411288 }, { "acc": 0.94441223, "epoch": 3.670430238540059, "grad_norm": 4.835098743438721, "learning_rate": 1.8543267768546767e-07, "loss": 0.37171764, "memory(GiB)": 34.88, "step": 135560, "train_speed(iter/s)": 0.411289 }, { "acc": 0.94650726, "epoch": 3.6705656188232747, "grad_norm": 4.212339401245117, "learning_rate": 1.8528212432407825e-07, "loss": 0.27796493, "memory(GiB)": 34.88, "step": 135565, "train_speed(iter/s)": 0.411289 }, { "acc": 0.94004688, "epoch": 3.67070099910649, "grad_norm": 8.092297554016113, "learning_rate": 1.8513163128275477e-07, "loss": 0.39521708, "memory(GiB)": 34.88, "step": 135570, "train_speed(iter/s)": 0.41129 }, { "acc": 0.95220242, "epoch": 3.670836379389706, "grad_norm": 12.356731414794922, "learning_rate": 1.849811985633828e-07, "loss": 0.29172661, "memory(GiB)": 34.88, "step": 135575, "train_speed(iter/s)": 0.411291 }, { "acc": 0.94400187, "epoch": 3.670971759672921, "grad_norm": 4.799790382385254, "learning_rate": 1.8483082616784556e-07, "loss": 0.37942901, "memory(GiB)": 34.88, "step": 135580, "train_speed(iter/s)": 0.411292 }, { "acc": 0.93491344, "epoch": 3.6711071399561366, "grad_norm": 6.394409656524658, "learning_rate": 1.846805140980281e-07, "loss": 0.42726197, "memory(GiB)": 34.88, "step": 135585, "train_speed(iter/s)": 0.411292 }, { "acc": 0.94231081, "epoch": 3.6712425202393524, "grad_norm": 5.101676940917969, "learning_rate": 1.8453026235581262e-07, "loss": 0.33054848, "memory(GiB)": 34.88, "step": 135590, "train_speed(iter/s)": 0.411293 }, { "acc": 0.94824553, "epoch": 3.6713779005225677, "grad_norm": 3.55255389213562, "learning_rate": 1.8438007094308125e-07, "loss": 0.32231424, "memory(GiB)": 34.88, "step": 135595, "train_speed(iter/s)": 0.411294 }, { "acc": 0.95207825, "epoch": 3.6715132808057835, "grad_norm": 5.727212429046631, "learning_rate": 1.8422993986171565e-07, "loss": 0.24686408, "memory(GiB)": 34.88, "step": 135600, "train_speed(iter/s)": 0.411295 }, { "acc": 0.93975649, "epoch": 3.671648661088999, "grad_norm": 2.8253653049468994, "learning_rate": 1.8407986911359638e-07, "loss": 0.30494492, "memory(GiB)": 34.88, "step": 135605, "train_speed(iter/s)": 0.411296 }, { "acc": 0.95919809, "epoch": 3.6717840413722147, "grad_norm": 5.632717132568359, "learning_rate": 1.839298587006039e-07, "loss": 0.27424202, "memory(GiB)": 34.88, "step": 135610, "train_speed(iter/s)": 0.411296 }, { "acc": 0.94625797, "epoch": 3.67191942165543, "grad_norm": 27.85529899597168, "learning_rate": 1.8377990862461663e-07, "loss": 0.38969064, "memory(GiB)": 34.88, "step": 135615, "train_speed(iter/s)": 0.411297 }, { "acc": 0.95017262, "epoch": 3.6720548019386454, "grad_norm": 7.081707954406738, "learning_rate": 1.8363001888751334e-07, "loss": 0.26057343, "memory(GiB)": 34.88, "step": 135620, "train_speed(iter/s)": 0.411298 }, { "acc": 0.95024137, "epoch": 3.672190182221861, "grad_norm": 5.815896034240723, "learning_rate": 1.8348018949117238e-07, "loss": 0.23943095, "memory(GiB)": 34.88, "step": 135625, "train_speed(iter/s)": 0.411299 }, { "acc": 0.94916201, "epoch": 3.672325562505077, "grad_norm": 4.076877593994141, "learning_rate": 1.8333042043746933e-07, "loss": 0.3283612, "memory(GiB)": 34.88, "step": 135630, "train_speed(iter/s)": 0.4113 }, { "acc": 0.95922155, "epoch": 3.6724609427882924, "grad_norm": 4.53882360458374, "learning_rate": 1.8318071172828072e-07, "loss": 0.21820302, "memory(GiB)": 34.88, "step": 135635, "train_speed(iter/s)": 0.411301 }, { "acc": 0.94294882, "epoch": 3.6725963230715077, "grad_norm": 9.279045104980469, "learning_rate": 1.8303106336548274e-07, "loss": 0.3198005, "memory(GiB)": 34.88, "step": 135640, "train_speed(iter/s)": 0.411301 }, { "acc": 0.93770905, "epoch": 3.6727317033547235, "grad_norm": 9.252004623413086, "learning_rate": 1.8288147535094974e-07, "loss": 0.42366023, "memory(GiB)": 34.88, "step": 135645, "train_speed(iter/s)": 0.411302 }, { "acc": 0.95510597, "epoch": 3.672867083637939, "grad_norm": 3.764845609664917, "learning_rate": 1.8273194768655453e-07, "loss": 0.26750367, "memory(GiB)": 34.88, "step": 135650, "train_speed(iter/s)": 0.411303 }, { "acc": 0.94778862, "epoch": 3.6730024639211543, "grad_norm": 7.752237796783447, "learning_rate": 1.8258248037417152e-07, "loss": 0.31537004, "memory(GiB)": 34.88, "step": 135655, "train_speed(iter/s)": 0.411304 }, { "acc": 0.95946846, "epoch": 3.67313784420437, "grad_norm": 3.3963258266448975, "learning_rate": 1.824330734156729e-07, "loss": 0.21941643, "memory(GiB)": 34.88, "step": 135660, "train_speed(iter/s)": 0.411304 }, { "acc": 0.95865936, "epoch": 3.673273224487586, "grad_norm": 14.534379005432129, "learning_rate": 1.8228372681292982e-07, "loss": 0.26743269, "memory(GiB)": 34.88, "step": 135665, "train_speed(iter/s)": 0.411305 }, { "acc": 0.95169449, "epoch": 3.6734086047708012, "grad_norm": 10.610438346862793, "learning_rate": 1.8213444056781273e-07, "loss": 0.29028201, "memory(GiB)": 34.88, "step": 135670, "train_speed(iter/s)": 0.411306 }, { "acc": 0.93932934, "epoch": 3.6735439850540166, "grad_norm": 6.689663410186768, "learning_rate": 1.819852146821928e-07, "loss": 0.36344447, "memory(GiB)": 34.88, "step": 135675, "train_speed(iter/s)": 0.411307 }, { "acc": 0.95642891, "epoch": 3.6736793653372324, "grad_norm": 17.315837860107422, "learning_rate": 1.818360491579394e-07, "loss": 0.25241449, "memory(GiB)": 34.88, "step": 135680, "train_speed(iter/s)": 0.411308 }, { "acc": 0.958321, "epoch": 3.6738147456204477, "grad_norm": 13.097326278686523, "learning_rate": 1.8168694399691978e-07, "loss": 0.25084321, "memory(GiB)": 34.88, "step": 135685, "train_speed(iter/s)": 0.411308 }, { "acc": 0.94284763, "epoch": 3.6739501259036635, "grad_norm": 3.81130051612854, "learning_rate": 1.8153789920100338e-07, "loss": 0.37120371, "memory(GiB)": 34.88, "step": 135690, "train_speed(iter/s)": 0.411309 }, { "acc": 0.95166597, "epoch": 3.674085506186879, "grad_norm": 10.113612174987793, "learning_rate": 1.813889147720557e-07, "loss": 0.28655105, "memory(GiB)": 34.88, "step": 135695, "train_speed(iter/s)": 0.41131 }, { "acc": 0.95488071, "epoch": 3.6742208864700947, "grad_norm": 4.853255271911621, "learning_rate": 1.8123999071194505e-07, "loss": 0.3167552, "memory(GiB)": 34.88, "step": 135700, "train_speed(iter/s)": 0.411311 }, { "acc": 0.9424984, "epoch": 3.67435626675331, "grad_norm": 7.355733394622803, "learning_rate": 1.810911270225354e-07, "loss": 0.3466028, "memory(GiB)": 34.88, "step": 135705, "train_speed(iter/s)": 0.411312 }, { "acc": 0.95797405, "epoch": 3.6744916470365254, "grad_norm": 8.09768009185791, "learning_rate": 1.809423237056916e-07, "loss": 0.2895462, "memory(GiB)": 34.88, "step": 135710, "train_speed(iter/s)": 0.411313 }, { "acc": 0.96017227, "epoch": 3.6746270273197412, "grad_norm": 5.068784713745117, "learning_rate": 1.8079358076327874e-07, "loss": 0.26231387, "memory(GiB)": 34.88, "step": 135715, "train_speed(iter/s)": 0.411313 }, { "acc": 0.94629755, "epoch": 3.6747624076029566, "grad_norm": 8.055251121520996, "learning_rate": 1.80644898197159e-07, "loss": 0.36726191, "memory(GiB)": 34.88, "step": 135720, "train_speed(iter/s)": 0.411314 }, { "acc": 0.94668827, "epoch": 3.6748977878861724, "grad_norm": 8.439709663391113, "learning_rate": 1.8049627600919515e-07, "loss": 0.32503405, "memory(GiB)": 34.88, "step": 135725, "train_speed(iter/s)": 0.411315 }, { "acc": 0.94769497, "epoch": 3.6750331681693877, "grad_norm": 7.133350849151611, "learning_rate": 1.803477142012494e-07, "loss": 0.3072608, "memory(GiB)": 34.88, "step": 135730, "train_speed(iter/s)": 0.411316 }, { "acc": 0.94976749, "epoch": 3.6751685484526035, "grad_norm": 11.084694862365723, "learning_rate": 1.8019921277518344e-07, "loss": 0.3204052, "memory(GiB)": 34.88, "step": 135735, "train_speed(iter/s)": 0.411317 }, { "acc": 0.94326, "epoch": 3.675303928735819, "grad_norm": 13.758131980895996, "learning_rate": 1.80050771732855e-07, "loss": 0.33912439, "memory(GiB)": 34.88, "step": 135740, "train_speed(iter/s)": 0.411318 }, { "acc": 0.95733604, "epoch": 3.6754393090190343, "grad_norm": 5.626855850219727, "learning_rate": 1.7990239107612686e-07, "loss": 0.24747462, "memory(GiB)": 34.88, "step": 135745, "train_speed(iter/s)": 0.411318 }, { "acc": 0.95217667, "epoch": 3.67557468930225, "grad_norm": 4.846916675567627, "learning_rate": 1.7975407080685624e-07, "loss": 0.24794226, "memory(GiB)": 34.88, "step": 135750, "train_speed(iter/s)": 0.411319 }, { "acc": 0.93839922, "epoch": 3.6757100695854654, "grad_norm": 9.187894821166992, "learning_rate": 1.7960581092690037e-07, "loss": 0.34813113, "memory(GiB)": 34.88, "step": 135755, "train_speed(iter/s)": 0.41132 }, { "acc": 0.94871464, "epoch": 3.6758454498686812, "grad_norm": 1.9494789838790894, "learning_rate": 1.7945761143811756e-07, "loss": 0.25214899, "memory(GiB)": 34.88, "step": 135760, "train_speed(iter/s)": 0.411321 }, { "acc": 0.9607501, "epoch": 3.6759808301518966, "grad_norm": 6.259693622589111, "learning_rate": 1.7930947234236391e-07, "loss": 0.2806078, "memory(GiB)": 34.88, "step": 135765, "train_speed(iter/s)": 0.411321 }, { "acc": 0.94442797, "epoch": 3.6761162104351124, "grad_norm": 6.10253381729126, "learning_rate": 1.7916139364149558e-07, "loss": 0.37476556, "memory(GiB)": 34.88, "step": 135770, "train_speed(iter/s)": 0.411322 }, { "acc": 0.95422325, "epoch": 3.6762515907183277, "grad_norm": 7.705674648284912, "learning_rate": 1.790133753373658e-07, "loss": 0.26529169, "memory(GiB)": 34.88, "step": 135775, "train_speed(iter/s)": 0.411323 }, { "acc": 0.95461149, "epoch": 3.676386971001543, "grad_norm": 6.832856178283691, "learning_rate": 1.7886541743183189e-07, "loss": 0.25765028, "memory(GiB)": 34.88, "step": 135780, "train_speed(iter/s)": 0.411324 }, { "acc": 0.95197821, "epoch": 3.676522351284759, "grad_norm": 10.6151762008667, "learning_rate": 1.7871751992674547e-07, "loss": 0.31548643, "memory(GiB)": 34.88, "step": 135785, "train_speed(iter/s)": 0.411325 }, { "acc": 0.94976921, "epoch": 3.6766577315679747, "grad_norm": 12.678296089172363, "learning_rate": 1.7856968282395877e-07, "loss": 0.28891578, "memory(GiB)": 34.88, "step": 135790, "train_speed(iter/s)": 0.411326 }, { "acc": 0.95802288, "epoch": 3.67679311185119, "grad_norm": 4.599554538726807, "learning_rate": 1.7842190612532454e-07, "loss": 0.33735695, "memory(GiB)": 34.88, "step": 135795, "train_speed(iter/s)": 0.411326 }, { "acc": 0.94726429, "epoch": 3.6769284921344054, "grad_norm": 4.767186641693115, "learning_rate": 1.7827418983269395e-07, "loss": 0.25497632, "memory(GiB)": 34.88, "step": 135800, "train_speed(iter/s)": 0.411327 }, { "acc": 0.94830036, "epoch": 3.6770638724176212, "grad_norm": 13.707977294921875, "learning_rate": 1.781265339479175e-07, "loss": 0.25909274, "memory(GiB)": 34.88, "step": 135805, "train_speed(iter/s)": 0.411328 }, { "acc": 0.95438051, "epoch": 3.6771992527008366, "grad_norm": 11.128273963928223, "learning_rate": 1.7797893847284356e-07, "loss": 0.31133211, "memory(GiB)": 34.88, "step": 135810, "train_speed(iter/s)": 0.411329 }, { "acc": 0.95318279, "epoch": 3.677334632984052, "grad_norm": 12.716119766235352, "learning_rate": 1.7783140340932325e-07, "loss": 0.24591506, "memory(GiB)": 34.88, "step": 135815, "train_speed(iter/s)": 0.41133 }, { "acc": 0.94738884, "epoch": 3.6774700132672677, "grad_norm": 5.357522487640381, "learning_rate": 1.7768392875920317e-07, "loss": 0.36063886, "memory(GiB)": 34.88, "step": 135820, "train_speed(iter/s)": 0.411331 }, { "acc": 0.95084457, "epoch": 3.6776053935504835, "grad_norm": 6.698957920074463, "learning_rate": 1.7753651452433178e-07, "loss": 0.29702845, "memory(GiB)": 34.88, "step": 135825, "train_speed(iter/s)": 0.411331 }, { "acc": 0.94719343, "epoch": 3.677740773833699, "grad_norm": 14.987533569335938, "learning_rate": 1.7738916070655452e-07, "loss": 0.29551246, "memory(GiB)": 34.88, "step": 135830, "train_speed(iter/s)": 0.411332 }, { "acc": 0.95611057, "epoch": 3.6778761541169143, "grad_norm": 8.115826606750488, "learning_rate": 1.772418673077181e-07, "loss": 0.30724936, "memory(GiB)": 34.88, "step": 135835, "train_speed(iter/s)": 0.411333 }, { "acc": 0.94685478, "epoch": 3.67801153440013, "grad_norm": 8.785880088806152, "learning_rate": 1.7709463432966759e-07, "loss": 0.27304451, "memory(GiB)": 34.88, "step": 135840, "train_speed(iter/s)": 0.411334 }, { "acc": 0.95920401, "epoch": 3.6781469146833454, "grad_norm": 14.748290061950684, "learning_rate": 1.769474617742468e-07, "loss": 0.2593817, "memory(GiB)": 34.88, "step": 135845, "train_speed(iter/s)": 0.411335 }, { "acc": 0.94104137, "epoch": 3.6782822949665612, "grad_norm": 6.545533180236816, "learning_rate": 1.7680034964330023e-07, "loss": 0.38660288, "memory(GiB)": 34.88, "step": 135850, "train_speed(iter/s)": 0.411335 }, { "acc": 0.95968084, "epoch": 3.6784176752497766, "grad_norm": 7.831799507141113, "learning_rate": 1.7665329793867009e-07, "loss": 0.2358109, "memory(GiB)": 34.88, "step": 135855, "train_speed(iter/s)": 0.411336 }, { "acc": 0.95174866, "epoch": 3.6785530555329924, "grad_norm": 7.795604228973389, "learning_rate": 1.7650630666219975e-07, "loss": 0.27755029, "memory(GiB)": 34.88, "step": 135860, "train_speed(iter/s)": 0.411337 }, { "acc": 0.95800209, "epoch": 3.6786884358162077, "grad_norm": 9.448657989501953, "learning_rate": 1.7635937581572802e-07, "loss": 0.26208575, "memory(GiB)": 34.88, "step": 135865, "train_speed(iter/s)": 0.411338 }, { "acc": 0.93643379, "epoch": 3.678823816099423, "grad_norm": 6.374683380126953, "learning_rate": 1.7621250540109784e-07, "loss": 0.38534441, "memory(GiB)": 34.88, "step": 135870, "train_speed(iter/s)": 0.411339 }, { "acc": 0.95573788, "epoch": 3.678959196382639, "grad_norm": 8.890989303588867, "learning_rate": 1.760656954201485e-07, "loss": 0.24457355, "memory(GiB)": 34.88, "step": 135875, "train_speed(iter/s)": 0.41134 }, { "acc": 0.95330467, "epoch": 3.6790945766658543, "grad_norm": 6.261244773864746, "learning_rate": 1.7591894587471792e-07, "loss": 0.30341375, "memory(GiB)": 34.88, "step": 135880, "train_speed(iter/s)": 0.411341 }, { "acc": 0.95832348, "epoch": 3.67922995694907, "grad_norm": 6.1494622230529785, "learning_rate": 1.7577225676664602e-07, "loss": 0.27869382, "memory(GiB)": 34.88, "step": 135885, "train_speed(iter/s)": 0.411341 }, { "acc": 0.95319529, "epoch": 3.6793653372322854, "grad_norm": 7.601001739501953, "learning_rate": 1.7562562809776956e-07, "loss": 0.28806491, "memory(GiB)": 34.88, "step": 135890, "train_speed(iter/s)": 0.411342 }, { "acc": 0.95686264, "epoch": 3.6795007175155012, "grad_norm": 18.758281707763672, "learning_rate": 1.7547905986992571e-07, "loss": 0.2221462, "memory(GiB)": 34.88, "step": 135895, "train_speed(iter/s)": 0.411343 }, { "acc": 0.96742287, "epoch": 3.6796360977987166, "grad_norm": 10.997279167175293, "learning_rate": 1.7533255208494953e-07, "loss": 0.22336528, "memory(GiB)": 34.88, "step": 135900, "train_speed(iter/s)": 0.411344 }, { "acc": 0.94416847, "epoch": 3.679771478081932, "grad_norm": 4.8186821937561035, "learning_rate": 1.751861047446777e-07, "loss": 0.31456909, "memory(GiB)": 34.88, "step": 135905, "train_speed(iter/s)": 0.411345 }, { "acc": 0.95392227, "epoch": 3.6799068583651477, "grad_norm": 5.085558891296387, "learning_rate": 1.7503971785094409e-07, "loss": 0.27305984, "memory(GiB)": 34.88, "step": 135910, "train_speed(iter/s)": 0.411345 }, { "acc": 0.95232992, "epoch": 3.680042238648363, "grad_norm": 10.896063804626465, "learning_rate": 1.7489339140558205e-07, "loss": 0.256936, "memory(GiB)": 34.88, "step": 135915, "train_speed(iter/s)": 0.411346 }, { "acc": 0.94668789, "epoch": 3.680177618931579, "grad_norm": 3.468607187271118, "learning_rate": 1.7474712541042606e-07, "loss": 0.27122741, "memory(GiB)": 34.88, "step": 135920, "train_speed(iter/s)": 0.411347 }, { "acc": 0.94434814, "epoch": 3.6803129992147943, "grad_norm": 8.619845390319824, "learning_rate": 1.746009198673061e-07, "loss": 0.34864738, "memory(GiB)": 34.88, "step": 135925, "train_speed(iter/s)": 0.411348 }, { "acc": 0.95354576, "epoch": 3.68044837949801, "grad_norm": 4.2680768966674805, "learning_rate": 1.7445477477805618e-07, "loss": 0.31398678, "memory(GiB)": 34.88, "step": 135930, "train_speed(iter/s)": 0.411349 }, { "acc": 0.94029198, "epoch": 3.6805837597812254, "grad_norm": 23.913497924804688, "learning_rate": 1.743086901445051e-07, "loss": 0.43555303, "memory(GiB)": 34.88, "step": 135935, "train_speed(iter/s)": 0.411349 }, { "acc": 0.94436207, "epoch": 3.680719140064441, "grad_norm": 9.516735076904297, "learning_rate": 1.7416266596848405e-07, "loss": 0.30494676, "memory(GiB)": 34.88, "step": 135940, "train_speed(iter/s)": 0.41135 }, { "acc": 0.95052652, "epoch": 3.6808545203476566, "grad_norm": 6.6532721519470215, "learning_rate": 1.7401670225182135e-07, "loss": 0.27146058, "memory(GiB)": 34.88, "step": 135945, "train_speed(iter/s)": 0.411351 }, { "acc": 0.94268303, "epoch": 3.6809899006308724, "grad_norm": 7.93904972076416, "learning_rate": 1.7387079899634706e-07, "loss": 0.41732564, "memory(GiB)": 34.88, "step": 135950, "train_speed(iter/s)": 0.411352 }, { "acc": 0.94035568, "epoch": 3.6811252809140877, "grad_norm": 3.3723154067993164, "learning_rate": 1.737249562038867e-07, "loss": 0.30432119, "memory(GiB)": 34.88, "step": 135955, "train_speed(iter/s)": 0.411353 }, { "acc": 0.94717627, "epoch": 3.681260661197303, "grad_norm": 13.07553482055664, "learning_rate": 1.7357917387626867e-07, "loss": 0.33339407, "memory(GiB)": 34.88, "step": 135960, "train_speed(iter/s)": 0.411354 }, { "acc": 0.95040159, "epoch": 3.681396041480519, "grad_norm": 4.6157917976379395, "learning_rate": 1.734334520153191e-07, "loss": 0.24884892, "memory(GiB)": 34.88, "step": 135965, "train_speed(iter/s)": 0.411354 }, { "acc": 0.95615759, "epoch": 3.6815314217637343, "grad_norm": 9.411666870117188, "learning_rate": 1.7328779062286244e-07, "loss": 0.25315869, "memory(GiB)": 34.88, "step": 135970, "train_speed(iter/s)": 0.411355 }, { "acc": 0.94602127, "epoch": 3.6816668020469496, "grad_norm": 13.817704200744629, "learning_rate": 1.731421897007254e-07, "loss": 0.29919498, "memory(GiB)": 34.88, "step": 135975, "train_speed(iter/s)": 0.411356 }, { "acc": 0.93702898, "epoch": 3.6818021823301654, "grad_norm": 8.796615600585938, "learning_rate": 1.7299664925072963e-07, "loss": 0.40583234, "memory(GiB)": 34.88, "step": 135980, "train_speed(iter/s)": 0.411357 }, { "acc": 0.94979191, "epoch": 3.6819375626133812, "grad_norm": 8.050577163696289, "learning_rate": 1.7285116927470024e-07, "loss": 0.27515962, "memory(GiB)": 34.88, "step": 135985, "train_speed(iter/s)": 0.411357 }, { "acc": 0.94759312, "epoch": 3.6820729428965966, "grad_norm": 8.22064208984375, "learning_rate": 1.7270574977445776e-07, "loss": 0.30852277, "memory(GiB)": 34.88, "step": 135990, "train_speed(iter/s)": 0.411358 }, { "acc": 0.9439682, "epoch": 3.682208323179812, "grad_norm": 6.904624938964844, "learning_rate": 1.725603907518255e-07, "loss": 0.33018155, "memory(GiB)": 34.88, "step": 135995, "train_speed(iter/s)": 0.411359 }, { "acc": 0.95550232, "epoch": 3.6823437034630278, "grad_norm": 8.694245338439941, "learning_rate": 1.7241509220862415e-07, "loss": 0.23886395, "memory(GiB)": 34.88, "step": 136000, "train_speed(iter/s)": 0.41136 }, { "acc": 0.94470253, "epoch": 3.682479083746243, "grad_norm": 4.655981063842773, "learning_rate": 1.7226985414667198e-07, "loss": 0.33939869, "memory(GiB)": 34.88, "step": 136005, "train_speed(iter/s)": 0.411361 }, { "acc": 0.94140873, "epoch": 3.682614464029459, "grad_norm": 7.502676486968994, "learning_rate": 1.721246765677913e-07, "loss": 0.32361534, "memory(GiB)": 34.88, "step": 136010, "train_speed(iter/s)": 0.411361 }, { "acc": 0.95928335, "epoch": 3.6827498443126743, "grad_norm": 7.725257396697998, "learning_rate": 1.7197955947379878e-07, "loss": 0.25702968, "memory(GiB)": 34.88, "step": 136015, "train_speed(iter/s)": 0.411362 }, { "acc": 0.9468214, "epoch": 3.68288522459589, "grad_norm": 3.7776381969451904, "learning_rate": 1.7183450286651335e-07, "loss": 0.38753915, "memory(GiB)": 34.88, "step": 136020, "train_speed(iter/s)": 0.411363 }, { "acc": 0.95342312, "epoch": 3.6830206048791054, "grad_norm": 10.166250228881836, "learning_rate": 1.7168950674775e-07, "loss": 0.29824438, "memory(GiB)": 34.88, "step": 136025, "train_speed(iter/s)": 0.411364 }, { "acc": 0.942173, "epoch": 3.683155985162321, "grad_norm": 7.594409465789795, "learning_rate": 1.715445711193283e-07, "loss": 0.34693835, "memory(GiB)": 34.88, "step": 136030, "train_speed(iter/s)": 0.411365 }, { "acc": 0.95604477, "epoch": 3.6832913654455366, "grad_norm": 6.977844715118408, "learning_rate": 1.713996959830615e-07, "loss": 0.30042698, "memory(GiB)": 34.88, "step": 136035, "train_speed(iter/s)": 0.411365 }, { "acc": 0.94298496, "epoch": 3.683426745728752, "grad_norm": 4.945486545562744, "learning_rate": 1.7125488134076474e-07, "loss": 0.32521718, "memory(GiB)": 34.88, "step": 136040, "train_speed(iter/s)": 0.411366 }, { "acc": 0.95063457, "epoch": 3.6835621260119678, "grad_norm": 5.718084812164307, "learning_rate": 1.7111012719425352e-07, "loss": 0.3024848, "memory(GiB)": 34.88, "step": 136045, "train_speed(iter/s)": 0.411367 }, { "acc": 0.95762272, "epoch": 3.683697506295183, "grad_norm": 8.018607139587402, "learning_rate": 1.7096543354533962e-07, "loss": 0.27274151, "memory(GiB)": 34.88, "step": 136050, "train_speed(iter/s)": 0.411368 }, { "acc": 0.95105944, "epoch": 3.683832886578399, "grad_norm": 4.338930130004883, "learning_rate": 1.7082080039583693e-07, "loss": 0.21076808, "memory(GiB)": 34.88, "step": 136055, "train_speed(iter/s)": 0.411369 }, { "acc": 0.95737553, "epoch": 3.6839682668616143, "grad_norm": 9.259133338928223, "learning_rate": 1.7067622774755487e-07, "loss": 0.25221696, "memory(GiB)": 34.88, "step": 136060, "train_speed(iter/s)": 0.411369 }, { "acc": 0.95335493, "epoch": 3.6841036471448296, "grad_norm": 3.4061715602874756, "learning_rate": 1.7053171560230804e-07, "loss": 0.29962914, "memory(GiB)": 34.88, "step": 136065, "train_speed(iter/s)": 0.41137 }, { "acc": 0.95096588, "epoch": 3.6842390274280454, "grad_norm": 6.641607284545898, "learning_rate": 1.7038726396190362e-07, "loss": 0.28814602, "memory(GiB)": 34.88, "step": 136070, "train_speed(iter/s)": 0.411371 }, { "acc": 0.95641756, "epoch": 3.684374407711261, "grad_norm": 7.148655891418457, "learning_rate": 1.7024287282815275e-07, "loss": 0.26005526, "memory(GiB)": 34.88, "step": 136075, "train_speed(iter/s)": 0.411372 }, { "acc": 0.95540495, "epoch": 3.6845097879944766, "grad_norm": 13.852474212646484, "learning_rate": 1.7009854220286386e-07, "loss": 0.21244326, "memory(GiB)": 34.88, "step": 136080, "train_speed(iter/s)": 0.411372 }, { "acc": 0.92974377, "epoch": 3.684645168277692, "grad_norm": 5.114830493927002, "learning_rate": 1.699542720878447e-07, "loss": 0.43595414, "memory(GiB)": 34.88, "step": 136085, "train_speed(iter/s)": 0.411373 }, { "acc": 0.94690037, "epoch": 3.6847805485609078, "grad_norm": 8.802824020385742, "learning_rate": 1.6981006248490373e-07, "loss": 0.33608208, "memory(GiB)": 34.88, "step": 136090, "train_speed(iter/s)": 0.411374 }, { "acc": 0.96178703, "epoch": 3.684915928844123, "grad_norm": 2.8441214561462402, "learning_rate": 1.696659133958448e-07, "loss": 0.2291127, "memory(GiB)": 34.88, "step": 136095, "train_speed(iter/s)": 0.411375 }, { "acc": 0.94922943, "epoch": 3.6850513091273385, "grad_norm": 4.304272174835205, "learning_rate": 1.6952182482247688e-07, "loss": 0.28893528, "memory(GiB)": 34.88, "step": 136100, "train_speed(iter/s)": 0.411376 }, { "acc": 0.95341282, "epoch": 3.6851866894105543, "grad_norm": 4.729158878326416, "learning_rate": 1.6937779676660275e-07, "loss": 0.30556495, "memory(GiB)": 34.88, "step": 136105, "train_speed(iter/s)": 0.411377 }, { "acc": 0.95178576, "epoch": 3.68532206969377, "grad_norm": 13.057220458984375, "learning_rate": 1.6923382923002753e-07, "loss": 0.32688384, "memory(GiB)": 34.88, "step": 136110, "train_speed(iter/s)": 0.411377 }, { "acc": 0.9532093, "epoch": 3.6854574499769854, "grad_norm": 5.656617164611816, "learning_rate": 1.690899222145545e-07, "loss": 0.26203477, "memory(GiB)": 34.88, "step": 136115, "train_speed(iter/s)": 0.411378 }, { "acc": 0.94906464, "epoch": 3.685592830260201, "grad_norm": 9.559895515441895, "learning_rate": 1.6894607572198658e-07, "loss": 0.32643781, "memory(GiB)": 34.88, "step": 136120, "train_speed(iter/s)": 0.411379 }, { "acc": 0.94053745, "epoch": 3.6857282105434166, "grad_norm": 8.667701721191406, "learning_rate": 1.6880228975412592e-07, "loss": 0.38846114, "memory(GiB)": 34.88, "step": 136125, "train_speed(iter/s)": 0.41138 }, { "acc": 0.95570221, "epoch": 3.685863590826632, "grad_norm": 2.8545961380004883, "learning_rate": 1.6865856431277217e-07, "loss": 0.22618179, "memory(GiB)": 34.88, "step": 136130, "train_speed(iter/s)": 0.411381 }, { "acc": 0.9448844, "epoch": 3.6859989711098473, "grad_norm": 11.144547462463379, "learning_rate": 1.68514899399728e-07, "loss": 0.36169758, "memory(GiB)": 34.88, "step": 136135, "train_speed(iter/s)": 0.411382 }, { "acc": 0.96449165, "epoch": 3.686134351393063, "grad_norm": 5.548391342163086, "learning_rate": 1.683712950167913e-07, "loss": 0.20233631, "memory(GiB)": 34.88, "step": 136140, "train_speed(iter/s)": 0.411382 }, { "acc": 0.95103397, "epoch": 3.686269731676279, "grad_norm": 10.488819122314453, "learning_rate": 1.6822775116576214e-07, "loss": 0.2989975, "memory(GiB)": 34.88, "step": 136145, "train_speed(iter/s)": 0.411383 }, { "acc": 0.95999165, "epoch": 3.6864051119594943, "grad_norm": 14.504335403442383, "learning_rate": 1.6808426784843773e-07, "loss": 0.27496214, "memory(GiB)": 34.88, "step": 136150, "train_speed(iter/s)": 0.411384 }, { "acc": 0.95554447, "epoch": 3.6865404922427096, "grad_norm": 10.424945831298828, "learning_rate": 1.6794084506661708e-07, "loss": 0.2622571, "memory(GiB)": 34.88, "step": 136155, "train_speed(iter/s)": 0.411385 }, { "acc": 0.94659729, "epoch": 3.6866758725259254, "grad_norm": 5.091132164001465, "learning_rate": 1.677974828220952e-07, "loss": 0.33596103, "memory(GiB)": 34.88, "step": 136160, "train_speed(iter/s)": 0.411386 }, { "acc": 0.93010836, "epoch": 3.686811252809141, "grad_norm": 9.64179801940918, "learning_rate": 1.676541811166688e-07, "loss": 0.42919698, "memory(GiB)": 34.88, "step": 136165, "train_speed(iter/s)": 0.411386 }, { "acc": 0.95438347, "epoch": 3.6869466330923566, "grad_norm": 8.136438369750977, "learning_rate": 1.6751093995213294e-07, "loss": 0.28130817, "memory(GiB)": 34.88, "step": 136170, "train_speed(iter/s)": 0.411387 }, { "acc": 0.9515707, "epoch": 3.687082013375572, "grad_norm": 4.920263290405273, "learning_rate": 1.6736775933028154e-07, "loss": 0.31034029, "memory(GiB)": 34.88, "step": 136175, "train_speed(iter/s)": 0.411388 }, { "acc": 0.95546608, "epoch": 3.6872173936587878, "grad_norm": 5.56831693649292, "learning_rate": 1.6722463925290858e-07, "loss": 0.275265, "memory(GiB)": 34.88, "step": 136180, "train_speed(iter/s)": 0.411389 }, { "acc": 0.9462595, "epoch": 3.687352773942003, "grad_norm": 6.626310348510742, "learning_rate": 1.6708157972180635e-07, "loss": 0.30269272, "memory(GiB)": 34.88, "step": 136185, "train_speed(iter/s)": 0.41139 }, { "acc": 0.94139061, "epoch": 3.6874881542252185, "grad_norm": 11.546762466430664, "learning_rate": 1.669385807387687e-07, "loss": 0.42277212, "memory(GiB)": 34.88, "step": 136190, "train_speed(iter/s)": 0.41139 }, { "acc": 0.95158701, "epoch": 3.6876235345084343, "grad_norm": 6.629218578338623, "learning_rate": 1.6679564230558466e-07, "loss": 0.31797631, "memory(GiB)": 34.88, "step": 136195, "train_speed(iter/s)": 0.411391 }, { "acc": 0.95415592, "epoch": 3.6877589147916496, "grad_norm": 27.02461051940918, "learning_rate": 1.666527644240465e-07, "loss": 0.26824245, "memory(GiB)": 34.88, "step": 136200, "train_speed(iter/s)": 0.411392 }, { "acc": 0.95604458, "epoch": 3.6878942950748654, "grad_norm": 6.096250534057617, "learning_rate": 1.665099470959437e-07, "loss": 0.2508564, "memory(GiB)": 34.88, "step": 136205, "train_speed(iter/s)": 0.411393 }, { "acc": 0.93939342, "epoch": 3.688029675358081, "grad_norm": 9.445942878723145, "learning_rate": 1.663671903230646e-07, "loss": 0.40331521, "memory(GiB)": 34.88, "step": 136210, "train_speed(iter/s)": 0.411394 }, { "acc": 0.95758572, "epoch": 3.6881650556412966, "grad_norm": 4.115417003631592, "learning_rate": 1.662244941071977e-07, "loss": 0.26964884, "memory(GiB)": 34.88, "step": 136215, "train_speed(iter/s)": 0.411394 }, { "acc": 0.94963789, "epoch": 3.688300435924512, "grad_norm": 13.820643424987793, "learning_rate": 1.660818584501313e-07, "loss": 0.32437048, "memory(GiB)": 34.88, "step": 136220, "train_speed(iter/s)": 0.411395 }, { "acc": 0.94897556, "epoch": 3.6884358162077273, "grad_norm": 5.623185157775879, "learning_rate": 1.659392833536522e-07, "loss": 0.32110505, "memory(GiB)": 34.88, "step": 136225, "train_speed(iter/s)": 0.411396 }, { "acc": 0.95171003, "epoch": 3.688571196490943, "grad_norm": 4.9444122314453125, "learning_rate": 1.657967688195454e-07, "loss": 0.30984964, "memory(GiB)": 34.88, "step": 136230, "train_speed(iter/s)": 0.411397 }, { "acc": 0.93655996, "epoch": 3.6887065767741585, "grad_norm": 6.462246417999268, "learning_rate": 1.6565431484959653e-07, "loss": 0.41298513, "memory(GiB)": 34.88, "step": 136235, "train_speed(iter/s)": 0.411398 }, { "acc": 0.96819515, "epoch": 3.6888419570573743, "grad_norm": 5.966027736663818, "learning_rate": 1.6551192144559008e-07, "loss": 0.21243312, "memory(GiB)": 34.88, "step": 136240, "train_speed(iter/s)": 0.411399 }, { "acc": 0.94570122, "epoch": 3.6889773373405896, "grad_norm": 9.003772735595703, "learning_rate": 1.6536958860931113e-07, "loss": 0.34135518, "memory(GiB)": 34.88, "step": 136245, "train_speed(iter/s)": 0.411399 }, { "acc": 0.94583969, "epoch": 3.6891127176238054, "grad_norm": 7.755220890045166, "learning_rate": 1.6522731634254087e-07, "loss": 0.34784527, "memory(GiB)": 34.88, "step": 136250, "train_speed(iter/s)": 0.4114 }, { "acc": 0.95519619, "epoch": 3.689248097907021, "grad_norm": 7.497766971588135, "learning_rate": 1.6508510464706213e-07, "loss": 0.27680752, "memory(GiB)": 34.88, "step": 136255, "train_speed(iter/s)": 0.411401 }, { "acc": 0.95174446, "epoch": 3.689383478190236, "grad_norm": 15.119799613952637, "learning_rate": 1.6494295352465717e-07, "loss": 0.28073087, "memory(GiB)": 34.88, "step": 136260, "train_speed(iter/s)": 0.411402 }, { "acc": 0.95900459, "epoch": 3.689518858473452, "grad_norm": 6.9299821853637695, "learning_rate": 1.6480086297710605e-07, "loss": 0.29748466, "memory(GiB)": 34.88, "step": 136265, "train_speed(iter/s)": 0.411403 }, { "acc": 0.93758392, "epoch": 3.6896542387566673, "grad_norm": 8.345280647277832, "learning_rate": 1.6465883300618778e-07, "loss": 0.36952982, "memory(GiB)": 34.88, "step": 136270, "train_speed(iter/s)": 0.411403 }, { "acc": 0.9628952, "epoch": 3.689789619039883, "grad_norm": 3.703369140625, "learning_rate": 1.645168636136835e-07, "loss": 0.21063507, "memory(GiB)": 34.88, "step": 136275, "train_speed(iter/s)": 0.411404 }, { "acc": 0.94644203, "epoch": 3.6899249993230985, "grad_norm": 7.685619354248047, "learning_rate": 1.643749548013705e-07, "loss": 0.32954144, "memory(GiB)": 34.88, "step": 136280, "train_speed(iter/s)": 0.411405 }, { "acc": 0.95045586, "epoch": 3.6900603796063143, "grad_norm": 8.136161804199219, "learning_rate": 1.6423310657102663e-07, "loss": 0.33768215, "memory(GiB)": 34.88, "step": 136285, "train_speed(iter/s)": 0.411406 }, { "acc": 0.9518177, "epoch": 3.6901957598895296, "grad_norm": 4.215242862701416, "learning_rate": 1.640913189244286e-07, "loss": 0.3208369, "memory(GiB)": 34.88, "step": 136290, "train_speed(iter/s)": 0.411407 }, { "acc": 0.95589294, "epoch": 3.690331140172745, "grad_norm": 4.851299285888672, "learning_rate": 1.6394959186335316e-07, "loss": 0.35496609, "memory(GiB)": 34.88, "step": 136295, "train_speed(iter/s)": 0.411407 }, { "acc": 0.94416409, "epoch": 3.690466520455961, "grad_norm": 5.167199611663818, "learning_rate": 1.6380792538957486e-07, "loss": 0.34874706, "memory(GiB)": 34.88, "step": 136300, "train_speed(iter/s)": 0.411408 }, { "acc": 0.95689068, "epoch": 3.6906019007391766, "grad_norm": 13.818856239318848, "learning_rate": 1.636663195048693e-07, "loss": 0.28467503, "memory(GiB)": 34.88, "step": 136305, "train_speed(iter/s)": 0.411409 }, { "acc": 0.94164, "epoch": 3.690737281022392, "grad_norm": 9.312734603881836, "learning_rate": 1.6352477421100983e-07, "loss": 0.4169776, "memory(GiB)": 34.88, "step": 136310, "train_speed(iter/s)": 0.41141 }, { "acc": 0.93898287, "epoch": 3.6908726613056073, "grad_norm": 8.952493667602539, "learning_rate": 1.633832895097699e-07, "loss": 0.30610113, "memory(GiB)": 34.88, "step": 136315, "train_speed(iter/s)": 0.411411 }, { "acc": 0.94946833, "epoch": 3.691008041588823, "grad_norm": 7.227123260498047, "learning_rate": 1.632418654029213e-07, "loss": 0.31567636, "memory(GiB)": 34.88, "step": 136320, "train_speed(iter/s)": 0.411411 }, { "acc": 0.9572588, "epoch": 3.6911434218720385, "grad_norm": 5.0534539222717285, "learning_rate": 1.6310050189223623e-07, "loss": 0.2238606, "memory(GiB)": 34.88, "step": 136325, "train_speed(iter/s)": 0.411412 }, { "acc": 0.95380192, "epoch": 3.691278802155254, "grad_norm": 3.494540214538574, "learning_rate": 1.6295919897948595e-07, "loss": 0.24364588, "memory(GiB)": 34.88, "step": 136330, "train_speed(iter/s)": 0.411413 }, { "acc": 0.94646835, "epoch": 3.6914141824384696, "grad_norm": 12.928001403808594, "learning_rate": 1.628179566664388e-07, "loss": 0.36169767, "memory(GiB)": 34.88, "step": 136335, "train_speed(iter/s)": 0.411414 }, { "acc": 0.9503396, "epoch": 3.6915495627216854, "grad_norm": 10.318373680114746, "learning_rate": 1.6267677495486603e-07, "loss": 0.3357234, "memory(GiB)": 34.88, "step": 136340, "train_speed(iter/s)": 0.411415 }, { "acc": 0.94640083, "epoch": 3.691684943004901, "grad_norm": 6.984446048736572, "learning_rate": 1.6253565384653484e-07, "loss": 0.34242685, "memory(GiB)": 34.88, "step": 136345, "train_speed(iter/s)": 0.411415 }, { "acc": 0.96703053, "epoch": 3.691820323288116, "grad_norm": 3.8198091983795166, "learning_rate": 1.6239459334321433e-07, "loss": 0.20127227, "memory(GiB)": 34.88, "step": 136350, "train_speed(iter/s)": 0.411416 }, { "acc": 0.95133743, "epoch": 3.691955703571332, "grad_norm": 5.000898838043213, "learning_rate": 1.622535934466706e-07, "loss": 0.32434034, "memory(GiB)": 34.88, "step": 136355, "train_speed(iter/s)": 0.411417 }, { "acc": 0.94824467, "epoch": 3.6920910838545473, "grad_norm": 17.552194595336914, "learning_rate": 1.621126541586704e-07, "loss": 0.34506335, "memory(GiB)": 34.88, "step": 136360, "train_speed(iter/s)": 0.411418 }, { "acc": 0.95033493, "epoch": 3.692226464137763, "grad_norm": 6.338546276092529, "learning_rate": 1.6197177548097884e-07, "loss": 0.34234805, "memory(GiB)": 34.88, "step": 136365, "train_speed(iter/s)": 0.411419 }, { "acc": 0.93765888, "epoch": 3.6923618444209785, "grad_norm": 9.85481071472168, "learning_rate": 1.6183095741536152e-07, "loss": 0.33326182, "memory(GiB)": 34.88, "step": 136370, "train_speed(iter/s)": 0.411419 }, { "acc": 0.95095139, "epoch": 3.6924972247041943, "grad_norm": 7.034651756286621, "learning_rate": 1.616901999635813e-07, "loss": 0.3865767, "memory(GiB)": 34.88, "step": 136375, "train_speed(iter/s)": 0.41142 }, { "acc": 0.96388626, "epoch": 3.6926326049874096, "grad_norm": 10.336677551269531, "learning_rate": 1.6154950312740218e-07, "loss": 0.2100039, "memory(GiB)": 34.88, "step": 136380, "train_speed(iter/s)": 0.411421 }, { "acc": 0.93995895, "epoch": 3.692767985270625, "grad_norm": 3.7976574897766113, "learning_rate": 1.6140886690858752e-07, "loss": 0.29775796, "memory(GiB)": 34.88, "step": 136385, "train_speed(iter/s)": 0.411422 }, { "acc": 0.94423704, "epoch": 3.692903365553841, "grad_norm": 8.907724380493164, "learning_rate": 1.612682913088969e-07, "loss": 0.30859356, "memory(GiB)": 34.88, "step": 136390, "train_speed(iter/s)": 0.411422 }, { "acc": 0.95412865, "epoch": 3.693038745837056, "grad_norm": 4.600538730621338, "learning_rate": 1.611277763300926e-07, "loss": 0.26781731, "memory(GiB)": 34.88, "step": 136395, "train_speed(iter/s)": 0.411423 }, { "acc": 0.95905228, "epoch": 3.693174126120272, "grad_norm": 6.5351643562316895, "learning_rate": 1.6098732197393528e-07, "loss": 0.2262228, "memory(GiB)": 34.88, "step": 136400, "train_speed(iter/s)": 0.411424 }, { "acc": 0.95963926, "epoch": 3.6933095064034873, "grad_norm": 5.990698337554932, "learning_rate": 1.6084692824218386e-07, "loss": 0.20026493, "memory(GiB)": 34.88, "step": 136405, "train_speed(iter/s)": 0.411425 }, { "acc": 0.95027828, "epoch": 3.693444886686703, "grad_norm": 6.666987419128418, "learning_rate": 1.6070659513659624e-07, "loss": 0.29673231, "memory(GiB)": 34.88, "step": 136410, "train_speed(iter/s)": 0.411426 }, { "acc": 0.95016766, "epoch": 3.6935802669699185, "grad_norm": 5.57261323928833, "learning_rate": 1.6056632265893196e-07, "loss": 0.30888724, "memory(GiB)": 34.88, "step": 136415, "train_speed(iter/s)": 0.411426 }, { "acc": 0.94026146, "epoch": 3.693715647253134, "grad_norm": 16.53876495361328, "learning_rate": 1.6042611081094774e-07, "loss": 0.36823683, "memory(GiB)": 34.88, "step": 136420, "train_speed(iter/s)": 0.411427 }, { "acc": 0.95048914, "epoch": 3.6938510275363496, "grad_norm": 4.847097873687744, "learning_rate": 1.6028595959439925e-07, "loss": 0.28424883, "memory(GiB)": 34.88, "step": 136425, "train_speed(iter/s)": 0.411428 }, { "acc": 0.95947332, "epoch": 3.693986407819565, "grad_norm": 5.213325023651123, "learning_rate": 1.6014586901104262e-07, "loss": 0.29796729, "memory(GiB)": 34.88, "step": 136430, "train_speed(iter/s)": 0.411429 }, { "acc": 0.94500446, "epoch": 3.694121788102781, "grad_norm": 8.173325538635254, "learning_rate": 1.6000583906263248e-07, "loss": 0.33028271, "memory(GiB)": 34.88, "step": 136435, "train_speed(iter/s)": 0.411429 }, { "acc": 0.95725651, "epoch": 3.694257168385996, "grad_norm": 3.6625983715057373, "learning_rate": 1.5986586975092386e-07, "loss": 0.22454896, "memory(GiB)": 34.88, "step": 136440, "train_speed(iter/s)": 0.41143 }, { "acc": 0.95022373, "epoch": 3.694392548669212, "grad_norm": 5.720295429229736, "learning_rate": 1.5972596107766858e-07, "loss": 0.29151936, "memory(GiB)": 34.88, "step": 136445, "train_speed(iter/s)": 0.411431 }, { "acc": 0.96303139, "epoch": 3.6945279289524273, "grad_norm": 9.498452186584473, "learning_rate": 1.5958611304462108e-07, "loss": 0.21655023, "memory(GiB)": 34.88, "step": 136450, "train_speed(iter/s)": 0.411432 }, { "acc": 0.93517141, "epoch": 3.6946633092356427, "grad_norm": 9.390132904052734, "learning_rate": 1.5944632565353203e-07, "loss": 0.40173101, "memory(GiB)": 34.88, "step": 136455, "train_speed(iter/s)": 0.411433 }, { "acc": 0.94700203, "epoch": 3.6947986895188585, "grad_norm": 5.265803813934326, "learning_rate": 1.5930659890615377e-07, "loss": 0.29993544, "memory(GiB)": 34.88, "step": 136460, "train_speed(iter/s)": 0.411434 }, { "acc": 0.94472675, "epoch": 3.6949340698020743, "grad_norm": 14.247566223144531, "learning_rate": 1.5916693280423473e-07, "loss": 0.34919555, "memory(GiB)": 34.88, "step": 136465, "train_speed(iter/s)": 0.411434 }, { "acc": 0.95961571, "epoch": 3.6950694500852896, "grad_norm": 15.774925231933594, "learning_rate": 1.590273273495261e-07, "loss": 0.28844681, "memory(GiB)": 34.88, "step": 136470, "train_speed(iter/s)": 0.411435 }, { "acc": 0.94235115, "epoch": 3.695204830368505, "grad_norm": 7.208845615386963, "learning_rate": 1.5888778254377627e-07, "loss": 0.39953556, "memory(GiB)": 34.88, "step": 136475, "train_speed(iter/s)": 0.411436 }, { "acc": 0.96281576, "epoch": 3.695340210651721, "grad_norm": 7.359586715698242, "learning_rate": 1.587482983887321e-07, "loss": 0.2163882, "memory(GiB)": 34.88, "step": 136480, "train_speed(iter/s)": 0.411437 }, { "acc": 0.93718071, "epoch": 3.695475590934936, "grad_norm": 7.604334831237793, "learning_rate": 1.5860887488614356e-07, "loss": 0.40432615, "memory(GiB)": 34.88, "step": 136485, "train_speed(iter/s)": 0.411438 }, { "acc": 0.95475063, "epoch": 3.6956109712181515, "grad_norm": 5.293218612670898, "learning_rate": 1.5846951203775469e-07, "loss": 0.36283855, "memory(GiB)": 34.88, "step": 136490, "train_speed(iter/s)": 0.411438 }, { "acc": 0.94700451, "epoch": 3.6957463515013673, "grad_norm": 10.143688201904297, "learning_rate": 1.5833020984531284e-07, "loss": 0.30096052, "memory(GiB)": 34.88, "step": 136495, "train_speed(iter/s)": 0.411439 }, { "acc": 0.9538559, "epoch": 3.695881731784583, "grad_norm": 2.6524338722229004, "learning_rate": 1.5819096831056196e-07, "loss": 0.28647056, "memory(GiB)": 34.88, "step": 136500, "train_speed(iter/s)": 0.41144 }, { "acc": 0.94470234, "epoch": 3.6960171120677985, "grad_norm": 12.012948036193848, "learning_rate": 1.5805178743524715e-07, "loss": 0.35404341, "memory(GiB)": 34.88, "step": 136505, "train_speed(iter/s)": 0.411441 }, { "acc": 0.9501297, "epoch": 3.696152492351014, "grad_norm": 5.338921546936035, "learning_rate": 1.5791266722111184e-07, "loss": 0.38929055, "memory(GiB)": 34.88, "step": 136510, "train_speed(iter/s)": 0.411442 }, { "acc": 0.94580526, "epoch": 3.6962878726342296, "grad_norm": 18.317081451416016, "learning_rate": 1.5777360766989776e-07, "loss": 0.32836356, "memory(GiB)": 34.88, "step": 136515, "train_speed(iter/s)": 0.411442 }, { "acc": 0.94275827, "epoch": 3.696423252917445, "grad_norm": 7.949735164642334, "learning_rate": 1.5763460878334788e-07, "loss": 0.30762105, "memory(GiB)": 34.88, "step": 136520, "train_speed(iter/s)": 0.411443 }, { "acc": 0.94657059, "epoch": 3.696558633200661, "grad_norm": 3.9866108894348145, "learning_rate": 1.5749567056320331e-07, "loss": 0.33687105, "memory(GiB)": 34.88, "step": 136525, "train_speed(iter/s)": 0.411444 }, { "acc": 0.95051804, "epoch": 3.696694013483876, "grad_norm": 17.36458396911621, "learning_rate": 1.573567930112048e-07, "loss": 0.30965824, "memory(GiB)": 34.88, "step": 136530, "train_speed(iter/s)": 0.411445 }, { "acc": 0.96816788, "epoch": 3.696829393767092, "grad_norm": 5.120163440704346, "learning_rate": 1.5721797612909072e-07, "loss": 0.15112655, "memory(GiB)": 34.88, "step": 136535, "train_speed(iter/s)": 0.411446 }, { "acc": 0.96037045, "epoch": 3.6969647740503073, "grad_norm": 9.384395599365234, "learning_rate": 1.5707921991860228e-07, "loss": 0.25583024, "memory(GiB)": 34.88, "step": 136540, "train_speed(iter/s)": 0.411446 }, { "acc": 0.94638491, "epoch": 3.6971001543335227, "grad_norm": 25.33405303955078, "learning_rate": 1.5694052438147624e-07, "loss": 0.35630898, "memory(GiB)": 34.88, "step": 136545, "train_speed(iter/s)": 0.411447 }, { "acc": 0.95622015, "epoch": 3.6972355346167385, "grad_norm": 9.721125602722168, "learning_rate": 1.5680188951945002e-07, "loss": 0.29523451, "memory(GiB)": 34.88, "step": 136550, "train_speed(iter/s)": 0.411448 }, { "acc": 0.96888027, "epoch": 3.697370914899954, "grad_norm": 8.929664611816406, "learning_rate": 1.5666331533426025e-07, "loss": 0.18633113, "memory(GiB)": 34.88, "step": 136555, "train_speed(iter/s)": 0.411449 }, { "acc": 0.95209093, "epoch": 3.6975062951831696, "grad_norm": 19.157291412353516, "learning_rate": 1.565248018276427e-07, "loss": 0.34538193, "memory(GiB)": 34.88, "step": 136560, "train_speed(iter/s)": 0.41145 }, { "acc": 0.94807606, "epoch": 3.697641675466385, "grad_norm": 11.638829231262207, "learning_rate": 1.5638634900133408e-07, "loss": 0.32516627, "memory(GiB)": 34.88, "step": 136565, "train_speed(iter/s)": 0.41145 }, { "acc": 0.94285793, "epoch": 3.697777055749601, "grad_norm": 24.35335350036621, "learning_rate": 1.5624795685706622e-07, "loss": 0.33317723, "memory(GiB)": 34.88, "step": 136570, "train_speed(iter/s)": 0.411451 }, { "acc": 0.9493391, "epoch": 3.697912436032816, "grad_norm": 3.445105791091919, "learning_rate": 1.5610962539657527e-07, "loss": 0.2649564, "memory(GiB)": 34.88, "step": 136575, "train_speed(iter/s)": 0.411452 }, { "acc": 0.94470749, "epoch": 3.6980478163160315, "grad_norm": 6.5966715812683105, "learning_rate": 1.559713546215925e-07, "loss": 0.34571452, "memory(GiB)": 34.88, "step": 136580, "train_speed(iter/s)": 0.411453 }, { "acc": 0.94555092, "epoch": 3.6981831965992473, "grad_norm": 5.813270568847656, "learning_rate": 1.5583314453385078e-07, "loss": 0.27241197, "memory(GiB)": 34.88, "step": 136585, "train_speed(iter/s)": 0.411453 }, { "acc": 0.95826235, "epoch": 3.6983185768824627, "grad_norm": 4.7176008224487305, "learning_rate": 1.5569499513508077e-07, "loss": 0.23487117, "memory(GiB)": 34.88, "step": 136590, "train_speed(iter/s)": 0.411454 }, { "acc": 0.9556983, "epoch": 3.6984539571656785, "grad_norm": 11.305641174316406, "learning_rate": 1.5555690642701313e-07, "loss": 0.25978537, "memory(GiB)": 34.88, "step": 136595, "train_speed(iter/s)": 0.411455 }, { "acc": 0.96064472, "epoch": 3.698589337448894, "grad_norm": 4.655814170837402, "learning_rate": 1.5541887841137908e-07, "loss": 0.25543883, "memory(GiB)": 34.88, "step": 136600, "train_speed(iter/s)": 0.411456 }, { "acc": 0.9570509, "epoch": 3.6987247177321096, "grad_norm": 3.564711332321167, "learning_rate": 1.5528091108990542e-07, "loss": 0.21904492, "memory(GiB)": 34.88, "step": 136605, "train_speed(iter/s)": 0.411457 }, { "acc": 0.9421751, "epoch": 3.698860098015325, "grad_norm": 9.776618957519531, "learning_rate": 1.5514300446432278e-07, "loss": 0.31278245, "memory(GiB)": 34.88, "step": 136610, "train_speed(iter/s)": 0.411457 }, { "acc": 0.93402195, "epoch": 3.6989954782985404, "grad_norm": 9.2642240524292, "learning_rate": 1.5500515853635685e-07, "loss": 0.43760662, "memory(GiB)": 34.88, "step": 136615, "train_speed(iter/s)": 0.411458 }, { "acc": 0.93995399, "epoch": 3.699130858581756, "grad_norm": 11.049325942993164, "learning_rate": 1.5486737330773608e-07, "loss": 0.38637919, "memory(GiB)": 34.88, "step": 136620, "train_speed(iter/s)": 0.411459 }, { "acc": 0.94499683, "epoch": 3.699266238864972, "grad_norm": 89.06158447265625, "learning_rate": 1.5472964878018503e-07, "loss": 0.35973341, "memory(GiB)": 34.88, "step": 136625, "train_speed(iter/s)": 0.41146 }, { "acc": 0.9450779, "epoch": 3.6994016191481873, "grad_norm": 7.323238372802734, "learning_rate": 1.5459198495542935e-07, "loss": 0.33468666, "memory(GiB)": 34.88, "step": 136630, "train_speed(iter/s)": 0.411461 }, { "acc": 0.94120617, "epoch": 3.6995369994314027, "grad_norm": 10.78244686126709, "learning_rate": 1.5445438183519417e-07, "loss": 0.3755331, "memory(GiB)": 34.88, "step": 136635, "train_speed(iter/s)": 0.411461 }, { "acc": 0.958076, "epoch": 3.6996723797146185, "grad_norm": 3.3395493030548096, "learning_rate": 1.5431683942120237e-07, "loss": 0.26322145, "memory(GiB)": 34.88, "step": 136640, "train_speed(iter/s)": 0.411462 }, { "acc": 0.93899069, "epoch": 3.699807759997834, "grad_norm": 14.639230728149414, "learning_rate": 1.5417935771517797e-07, "loss": 0.43332028, "memory(GiB)": 34.88, "step": 136645, "train_speed(iter/s)": 0.411463 }, { "acc": 0.95451679, "epoch": 3.699943140281049, "grad_norm": 13.225878715515137, "learning_rate": 1.540419367188422e-07, "loss": 0.2228579, "memory(GiB)": 34.88, "step": 136650, "train_speed(iter/s)": 0.411464 }, { "acc": 0.95757961, "epoch": 3.700078520564265, "grad_norm": 15.279699325561523, "learning_rate": 1.5390457643391737e-07, "loss": 0.22468457, "memory(GiB)": 34.88, "step": 136655, "train_speed(iter/s)": 0.411465 }, { "acc": 0.94418373, "epoch": 3.700213900847481, "grad_norm": 19.49903678894043, "learning_rate": 1.5376727686212252e-07, "loss": 0.34105053, "memory(GiB)": 34.88, "step": 136660, "train_speed(iter/s)": 0.411466 }, { "acc": 0.93402786, "epoch": 3.700349281130696, "grad_norm": 23.700000762939453, "learning_rate": 1.5363003800518052e-07, "loss": 0.42655959, "memory(GiB)": 34.88, "step": 136665, "train_speed(iter/s)": 0.411466 }, { "acc": 0.9369504, "epoch": 3.7004846614139115, "grad_norm": 9.610443115234375, "learning_rate": 1.534928598648082e-07, "loss": 0.40955534, "memory(GiB)": 34.88, "step": 136670, "train_speed(iter/s)": 0.411467 }, { "acc": 0.95134983, "epoch": 3.7006200416971273, "grad_norm": 4.3537702560424805, "learning_rate": 1.5335574244272396e-07, "loss": 0.31236925, "memory(GiB)": 34.88, "step": 136675, "train_speed(iter/s)": 0.411468 }, { "acc": 0.94833851, "epoch": 3.7007554219803427, "grad_norm": 6.688912868499756, "learning_rate": 1.532186857406474e-07, "loss": 0.34446857, "memory(GiB)": 34.88, "step": 136680, "train_speed(iter/s)": 0.411469 }, { "acc": 0.94840946, "epoch": 3.7008908022635585, "grad_norm": 19.89058494567871, "learning_rate": 1.5308168976029307e-07, "loss": 0.3346879, "memory(GiB)": 34.88, "step": 136685, "train_speed(iter/s)": 0.411469 }, { "acc": 0.95207834, "epoch": 3.701026182546774, "grad_norm": 6.272240161895752, "learning_rate": 1.5294475450337943e-07, "loss": 0.26763852, "memory(GiB)": 34.88, "step": 136690, "train_speed(iter/s)": 0.41147 }, { "acc": 0.9391737, "epoch": 3.7011615628299896, "grad_norm": 7.531012058258057, "learning_rate": 1.5280787997161936e-07, "loss": 0.33605356, "memory(GiB)": 34.88, "step": 136695, "train_speed(iter/s)": 0.41147 }, { "acc": 0.96019573, "epoch": 3.701296943113205, "grad_norm": 2.794647216796875, "learning_rate": 1.526710661667297e-07, "loss": 0.25638871, "memory(GiB)": 34.88, "step": 136700, "train_speed(iter/s)": 0.411471 }, { "acc": 0.95458889, "epoch": 3.7014323233964204, "grad_norm": 6.992992401123047, "learning_rate": 1.5253431309042276e-07, "loss": 0.27495875, "memory(GiB)": 34.88, "step": 136705, "train_speed(iter/s)": 0.411472 }, { "acc": 0.94252491, "epoch": 3.701567703679636, "grad_norm": 3.0284903049468994, "learning_rate": 1.5239762074441256e-07, "loss": 0.35122867, "memory(GiB)": 34.88, "step": 136710, "train_speed(iter/s)": 0.411473 }, { "acc": 0.94572105, "epoch": 3.7017030839628515, "grad_norm": 7.013009548187256, "learning_rate": 1.5226098913041142e-07, "loss": 0.31999776, "memory(GiB)": 34.88, "step": 136715, "train_speed(iter/s)": 0.411473 }, { "acc": 0.95216618, "epoch": 3.7018384642460673, "grad_norm": 6.506009578704834, "learning_rate": 1.5212441825013066e-07, "loss": 0.26594346, "memory(GiB)": 34.88, "step": 136720, "train_speed(iter/s)": 0.411474 }, { "acc": 0.94693213, "epoch": 3.7019738445292827, "grad_norm": 10.775784492492676, "learning_rate": 1.519879081052814e-07, "loss": 0.31056364, "memory(GiB)": 34.88, "step": 136725, "train_speed(iter/s)": 0.411475 }, { "acc": 0.96221647, "epoch": 3.7021092248124985, "grad_norm": 5.773281097412109, "learning_rate": 1.518514586975722e-07, "loss": 0.20430486, "memory(GiB)": 34.88, "step": 136730, "train_speed(iter/s)": 0.411476 }, { "acc": 0.94207315, "epoch": 3.702244605095714, "grad_norm": 4.453517913818359, "learning_rate": 1.5171507002871428e-07, "loss": 0.34523196, "memory(GiB)": 34.88, "step": 136735, "train_speed(iter/s)": 0.411477 }, { "acc": 0.94690313, "epoch": 3.702379985378929, "grad_norm": 19.176475524902344, "learning_rate": 1.5157874210041552e-07, "loss": 0.34997711, "memory(GiB)": 34.88, "step": 136740, "train_speed(iter/s)": 0.411477 }, { "acc": 0.96511059, "epoch": 3.702515365662145, "grad_norm": 5.721424102783203, "learning_rate": 1.5144247491438328e-07, "loss": 0.23481209, "memory(GiB)": 34.88, "step": 136745, "train_speed(iter/s)": 0.411478 }, { "acc": 0.95915794, "epoch": 3.7026507459453604, "grad_norm": 3.4910049438476562, "learning_rate": 1.5130626847232546e-07, "loss": 0.23000803, "memory(GiB)": 34.88, "step": 136750, "train_speed(iter/s)": 0.411479 }, { "acc": 0.94437408, "epoch": 3.702786126228576, "grad_norm": 7.285866737365723, "learning_rate": 1.5117012277594776e-07, "loss": 0.35322738, "memory(GiB)": 34.88, "step": 136755, "train_speed(iter/s)": 0.41148 }, { "acc": 0.94868546, "epoch": 3.7029215065117915, "grad_norm": 3.521564245223999, "learning_rate": 1.510340378269559e-07, "loss": 0.32284985, "memory(GiB)": 34.88, "step": 136760, "train_speed(iter/s)": 0.411481 }, { "acc": 0.94731846, "epoch": 3.7030568867950073, "grad_norm": 4.099507808685303, "learning_rate": 1.5089801362705325e-07, "loss": 0.29343238, "memory(GiB)": 34.88, "step": 136765, "train_speed(iter/s)": 0.411482 }, { "acc": 0.93782463, "epoch": 3.7031922670782227, "grad_norm": 6.315155506134033, "learning_rate": 1.507620501779456e-07, "loss": 0.36203449, "memory(GiB)": 34.88, "step": 136770, "train_speed(iter/s)": 0.411482 }, { "acc": 0.94752665, "epoch": 3.703327647361438, "grad_norm": 5.714718818664551, "learning_rate": 1.506261474813358e-07, "loss": 0.35473878, "memory(GiB)": 34.88, "step": 136775, "train_speed(iter/s)": 0.411483 }, { "acc": 0.9505456, "epoch": 3.703463027644654, "grad_norm": 2.625206232070923, "learning_rate": 1.5049030553892512e-07, "loss": 0.24610512, "memory(GiB)": 34.88, "step": 136780, "train_speed(iter/s)": 0.411484 }, { "acc": 0.96008377, "epoch": 3.7035984079278697, "grad_norm": 5.590799808502197, "learning_rate": 1.503545243524165e-07, "loss": 0.2673902, "memory(GiB)": 34.88, "step": 136785, "train_speed(iter/s)": 0.411485 }, { "acc": 0.9548439, "epoch": 3.703733788211085, "grad_norm": 14.240886688232422, "learning_rate": 1.5021880392351113e-07, "loss": 0.27780378, "memory(GiB)": 34.88, "step": 136790, "train_speed(iter/s)": 0.411485 }, { "acc": 0.94471064, "epoch": 3.7038691684943004, "grad_norm": 6.783811092376709, "learning_rate": 1.500831442539086e-07, "loss": 0.33115559, "memory(GiB)": 34.88, "step": 136795, "train_speed(iter/s)": 0.411486 }, { "acc": 0.95065479, "epoch": 3.704004548777516, "grad_norm": 3.2179641723632812, "learning_rate": 1.4994754534530688e-07, "loss": 0.29868045, "memory(GiB)": 34.88, "step": 136800, "train_speed(iter/s)": 0.411487 }, { "acc": 0.94940262, "epoch": 3.7041399290607315, "grad_norm": 17.761899948120117, "learning_rate": 1.4981200719940716e-07, "loss": 0.33789983, "memory(GiB)": 34.88, "step": 136805, "train_speed(iter/s)": 0.411487 }, { "acc": 0.95470314, "epoch": 3.704275309343947, "grad_norm": 8.1923246383667, "learning_rate": 1.496765298179057e-07, "loss": 0.28081787, "memory(GiB)": 34.88, "step": 136810, "train_speed(iter/s)": 0.411488 }, { "acc": 0.94943666, "epoch": 3.7044106896271627, "grad_norm": 7.0050506591796875, "learning_rate": 1.495411132024999e-07, "loss": 0.2988106, "memory(GiB)": 34.88, "step": 136815, "train_speed(iter/s)": 0.411489 }, { "acc": 0.94790001, "epoch": 3.7045460699103785, "grad_norm": 3.4721035957336426, "learning_rate": 1.494057573548865e-07, "loss": 0.29015121, "memory(GiB)": 34.88, "step": 136820, "train_speed(iter/s)": 0.411489 }, { "acc": 0.94907799, "epoch": 3.704681450193594, "grad_norm": 2.9252264499664307, "learning_rate": 1.4927046227676126e-07, "loss": 0.29891608, "memory(GiB)": 34.88, "step": 136825, "train_speed(iter/s)": 0.41149 }, { "acc": 0.94548674, "epoch": 3.704816830476809, "grad_norm": 7.991775035858154, "learning_rate": 1.491352279698182e-07, "loss": 0.31765211, "memory(GiB)": 34.88, "step": 136830, "train_speed(iter/s)": 0.411491 }, { "acc": 0.96074171, "epoch": 3.704952210760025, "grad_norm": 3.8496148586273193, "learning_rate": 1.4900005443575187e-07, "loss": 0.23716938, "memory(GiB)": 34.88, "step": 136835, "train_speed(iter/s)": 0.411491 }, { "acc": 0.93984909, "epoch": 3.7050875910432404, "grad_norm": 8.194904327392578, "learning_rate": 1.4886494167625578e-07, "loss": 0.41597233, "memory(GiB)": 34.88, "step": 136840, "train_speed(iter/s)": 0.411492 }, { "acc": 0.9528307, "epoch": 3.705222971326456, "grad_norm": 4.91032600402832, "learning_rate": 1.4872988969302171e-07, "loss": 0.25357437, "memory(GiB)": 34.88, "step": 136845, "train_speed(iter/s)": 0.411493 }, { "acc": 0.95040894, "epoch": 3.7053583516096715, "grad_norm": 7.288622856140137, "learning_rate": 1.4859489848774207e-07, "loss": 0.35522618, "memory(GiB)": 34.88, "step": 136850, "train_speed(iter/s)": 0.411494 }, { "acc": 0.95095825, "epoch": 3.7054937318928873, "grad_norm": 11.33896255493164, "learning_rate": 1.4845996806210807e-07, "loss": 0.2623086, "memory(GiB)": 34.88, "step": 136855, "train_speed(iter/s)": 0.411495 }, { "acc": 0.95838308, "epoch": 3.7056291121761027, "grad_norm": 5.490568161010742, "learning_rate": 1.4832509841780986e-07, "loss": 0.23449163, "memory(GiB)": 34.88, "step": 136860, "train_speed(iter/s)": 0.411495 }, { "acc": 0.95148392, "epoch": 3.705764492459318, "grad_norm": 21.502025604248047, "learning_rate": 1.4819028955653653e-07, "loss": 0.26751566, "memory(GiB)": 34.88, "step": 136865, "train_speed(iter/s)": 0.411496 }, { "acc": 0.94216309, "epoch": 3.705899872742534, "grad_norm": 9.72671890258789, "learning_rate": 1.4805554147997706e-07, "loss": 0.36595166, "memory(GiB)": 34.88, "step": 136870, "train_speed(iter/s)": 0.411497 }, { "acc": 0.95689297, "epoch": 3.706035253025749, "grad_norm": 4.4872918128967285, "learning_rate": 1.4792085418981992e-07, "loss": 0.28037815, "memory(GiB)": 34.88, "step": 136875, "train_speed(iter/s)": 0.411498 }, { "acc": 0.95349483, "epoch": 3.706170633308965, "grad_norm": 4.304175853729248, "learning_rate": 1.4778622768775145e-07, "loss": 0.27082241, "memory(GiB)": 34.88, "step": 136880, "train_speed(iter/s)": 0.411499 }, { "acc": 0.95182076, "epoch": 3.7063060135921804, "grad_norm": 5.25766658782959, "learning_rate": 1.476516619754589e-07, "loss": 0.3217144, "memory(GiB)": 34.88, "step": 136885, "train_speed(iter/s)": 0.411499 }, { "acc": 0.95698109, "epoch": 3.706441393875396, "grad_norm": 14.794836044311523, "learning_rate": 1.4751715705462642e-07, "loss": 0.26849685, "memory(GiB)": 34.88, "step": 136890, "train_speed(iter/s)": 0.4115 }, { "acc": 0.93991451, "epoch": 3.7065767741586115, "grad_norm": 7.6136698722839355, "learning_rate": 1.4738271292694188e-07, "loss": 0.34300926, "memory(GiB)": 34.88, "step": 136895, "train_speed(iter/s)": 0.411501 }, { "acc": 0.93835249, "epoch": 3.706712154441827, "grad_norm": 10.458294868469238, "learning_rate": 1.4724832959408657e-07, "loss": 0.30398424, "memory(GiB)": 34.88, "step": 136900, "train_speed(iter/s)": 0.411502 }, { "acc": 0.94841557, "epoch": 3.7068475347250427, "grad_norm": 11.273263931274414, "learning_rate": 1.4711400705774504e-07, "loss": 0.30880022, "memory(GiB)": 34.88, "step": 136905, "train_speed(iter/s)": 0.411502 }, { "acc": 0.94259491, "epoch": 3.706982915008258, "grad_norm": 11.184964179992676, "learning_rate": 1.4697974531960029e-07, "loss": 0.30760679, "memory(GiB)": 34.88, "step": 136910, "train_speed(iter/s)": 0.411503 }, { "acc": 0.94526024, "epoch": 3.707118295291474, "grad_norm": 6.386523723602295, "learning_rate": 1.4684554438133408e-07, "loss": 0.34349086, "memory(GiB)": 34.88, "step": 136915, "train_speed(iter/s)": 0.411504 }, { "acc": 0.9395977, "epoch": 3.707253675574689, "grad_norm": 9.011090278625488, "learning_rate": 1.467114042446277e-07, "loss": 0.31918406, "memory(GiB)": 34.88, "step": 136920, "train_speed(iter/s)": 0.411504 }, { "acc": 0.94035969, "epoch": 3.707389055857905, "grad_norm": 9.401280403137207, "learning_rate": 1.4657732491116018e-07, "loss": 0.31077683, "memory(GiB)": 34.88, "step": 136925, "train_speed(iter/s)": 0.411505 }, { "acc": 0.95275106, "epoch": 3.7075244361411204, "grad_norm": 6.428205966949463, "learning_rate": 1.464433063826128e-07, "loss": 0.26151671, "memory(GiB)": 34.88, "step": 136930, "train_speed(iter/s)": 0.411506 }, { "acc": 0.93625479, "epoch": 3.7076598164243357, "grad_norm": 4.884746074676514, "learning_rate": 1.4630934866066294e-07, "loss": 0.40508471, "memory(GiB)": 34.88, "step": 136935, "train_speed(iter/s)": 0.411507 }, { "acc": 0.95006027, "epoch": 3.7077951967075515, "grad_norm": 6.897752285003662, "learning_rate": 1.461754517469902e-07, "loss": 0.3144098, "memory(GiB)": 34.88, "step": 136940, "train_speed(iter/s)": 0.411507 }, { "acc": 0.96259556, "epoch": 3.7079305769907673, "grad_norm": 3.75846791267395, "learning_rate": 1.4604161564327026e-07, "loss": 0.26470003, "memory(GiB)": 34.88, "step": 136945, "train_speed(iter/s)": 0.411508 }, { "acc": 0.94766121, "epoch": 3.7080659572739827, "grad_norm": 5.323369026184082, "learning_rate": 1.4590784035118165e-07, "loss": 0.30554538, "memory(GiB)": 34.88, "step": 136950, "train_speed(iter/s)": 0.411509 }, { "acc": 0.94496059, "epoch": 3.708201337557198, "grad_norm": 3.8819427490234375, "learning_rate": 1.4577412587239896e-07, "loss": 0.36974139, "memory(GiB)": 34.88, "step": 136955, "train_speed(iter/s)": 0.41151 }, { "acc": 0.94709644, "epoch": 3.708336717840414, "grad_norm": 9.6748046875, "learning_rate": 1.456404722085968e-07, "loss": 0.34285583, "memory(GiB)": 34.88, "step": 136960, "train_speed(iter/s)": 0.41151 }, { "acc": 0.94027195, "epoch": 3.708472098123629, "grad_norm": 4.563438892364502, "learning_rate": 1.455068793614503e-07, "loss": 0.35146866, "memory(GiB)": 34.88, "step": 136965, "train_speed(iter/s)": 0.411511 }, { "acc": 0.95192699, "epoch": 3.7086074784068446, "grad_norm": 6.01310396194458, "learning_rate": 1.4537334733263302e-07, "loss": 0.30221786, "memory(GiB)": 34.88, "step": 136970, "train_speed(iter/s)": 0.411512 }, { "acc": 0.94916687, "epoch": 3.7087428586900604, "grad_norm": 8.60948371887207, "learning_rate": 1.4523987612381673e-07, "loss": 0.26136417, "memory(GiB)": 34.88, "step": 136975, "train_speed(iter/s)": 0.411513 }, { "acc": 0.94745331, "epoch": 3.708878238973276, "grad_norm": 3.337246894836426, "learning_rate": 1.4510646573667381e-07, "loss": 0.35541532, "memory(GiB)": 34.88, "step": 136980, "train_speed(iter/s)": 0.411513 }, { "acc": 0.93880501, "epoch": 3.7090136192564915, "grad_norm": 4.175467491149902, "learning_rate": 1.4497311617287668e-07, "loss": 0.34605515, "memory(GiB)": 34.88, "step": 136985, "train_speed(iter/s)": 0.411514 }, { "acc": 0.94784012, "epoch": 3.709148999539707, "grad_norm": 11.332988739013672, "learning_rate": 1.4483982743409438e-07, "loss": 0.29043529, "memory(GiB)": 34.88, "step": 136990, "train_speed(iter/s)": 0.411515 }, { "acc": 0.94752731, "epoch": 3.7092843798229227, "grad_norm": 6.814693927764893, "learning_rate": 1.4470659952199705e-07, "loss": 0.3596014, "memory(GiB)": 34.88, "step": 136995, "train_speed(iter/s)": 0.411516 }, { "acc": 0.9285099, "epoch": 3.709419760106138, "grad_norm": 6.386787414550781, "learning_rate": 1.445734324382538e-07, "loss": 0.48107929, "memory(GiB)": 34.88, "step": 137000, "train_speed(iter/s)": 0.411516 }, { "acc": 0.94164867, "epoch": 3.709555140389354, "grad_norm": 5.559558391571045, "learning_rate": 1.4444032618453307e-07, "loss": 0.36936181, "memory(GiB)": 34.88, "step": 137005, "train_speed(iter/s)": 0.411517 }, { "acc": 0.9407567, "epoch": 3.709690520672569, "grad_norm": 10.417909622192383, "learning_rate": 1.4430728076250116e-07, "loss": 0.35326657, "memory(GiB)": 34.88, "step": 137010, "train_speed(iter/s)": 0.411518 }, { "acc": 0.9644515, "epoch": 3.709825900955785, "grad_norm": 3.925649881362915, "learning_rate": 1.4417429617382603e-07, "loss": 0.23851385, "memory(GiB)": 34.88, "step": 137015, "train_speed(iter/s)": 0.411519 }, { "acc": 0.95857487, "epoch": 3.7099612812390004, "grad_norm": 2.0376923084259033, "learning_rate": 1.440413724201728e-07, "loss": 0.25614109, "memory(GiB)": 34.88, "step": 137020, "train_speed(iter/s)": 0.411519 }, { "acc": 0.94657402, "epoch": 3.7100966615222157, "grad_norm": 4.007283687591553, "learning_rate": 1.4390850950320667e-07, "loss": 0.32928085, "memory(GiB)": 34.88, "step": 137025, "train_speed(iter/s)": 0.41152 }, { "acc": 0.93763981, "epoch": 3.7102320418054315, "grad_norm": 5.8467206954956055, "learning_rate": 1.4377570742459228e-07, "loss": 0.38823323, "memory(GiB)": 34.88, "step": 137030, "train_speed(iter/s)": 0.411521 }, { "acc": 0.94102745, "epoch": 3.710367422088647, "grad_norm": 12.802654266357422, "learning_rate": 1.4364296618599308e-07, "loss": 0.34296513, "memory(GiB)": 34.88, "step": 137035, "train_speed(iter/s)": 0.411522 }, { "acc": 0.95709772, "epoch": 3.7105028023718627, "grad_norm": 4.341460227966309, "learning_rate": 1.4351028578907207e-07, "loss": 0.27041819, "memory(GiB)": 34.88, "step": 137040, "train_speed(iter/s)": 0.411523 }, { "acc": 0.94116774, "epoch": 3.710638182655078, "grad_norm": 6.310842990875244, "learning_rate": 1.4337766623549105e-07, "loss": 0.34974742, "memory(GiB)": 34.88, "step": 137045, "train_speed(iter/s)": 0.411523 }, { "acc": 0.95018454, "epoch": 3.710773562938294, "grad_norm": 4.439884662628174, "learning_rate": 1.432451075269113e-07, "loss": 0.32974243, "memory(GiB)": 34.88, "step": 137050, "train_speed(iter/s)": 0.411524 }, { "acc": 0.94343367, "epoch": 3.710908943221509, "grad_norm": 2.8891308307647705, "learning_rate": 1.431126096649941e-07, "loss": 0.35740952, "memory(GiB)": 34.88, "step": 137055, "train_speed(iter/s)": 0.411525 }, { "acc": 0.95047569, "epoch": 3.7110443235047246, "grad_norm": 10.203262329101562, "learning_rate": 1.4298017265139802e-07, "loss": 0.28134308, "memory(GiB)": 34.88, "step": 137060, "train_speed(iter/s)": 0.411526 }, { "acc": 0.93998318, "epoch": 3.7111797037879404, "grad_norm": 5.892988681793213, "learning_rate": 1.4284779648778318e-07, "loss": 0.34647746, "memory(GiB)": 34.88, "step": 137065, "train_speed(iter/s)": 0.411526 }, { "acc": 0.9376565, "epoch": 3.7113150840711557, "grad_norm": 5.186348915100098, "learning_rate": 1.4271548117580697e-07, "loss": 0.35945003, "memory(GiB)": 34.88, "step": 137070, "train_speed(iter/s)": 0.411527 }, { "acc": 0.94814425, "epoch": 3.7114504643543715, "grad_norm": 9.468864440917969, "learning_rate": 1.4258322671712845e-07, "loss": 0.29471235, "memory(GiB)": 34.88, "step": 137075, "train_speed(iter/s)": 0.411527 }, { "acc": 0.95110111, "epoch": 3.711585844637587, "grad_norm": 3.2157044410705566, "learning_rate": 1.4245103311340283e-07, "loss": 0.3687696, "memory(GiB)": 34.88, "step": 137080, "train_speed(iter/s)": 0.411528 }, { "acc": 0.95141153, "epoch": 3.7117212249208027, "grad_norm": 9.177459716796875, "learning_rate": 1.4231890036628582e-07, "loss": 0.28262048, "memory(GiB)": 34.88, "step": 137085, "train_speed(iter/s)": 0.411529 }, { "acc": 0.9354948, "epoch": 3.711856605204018, "grad_norm": 5.091220378875732, "learning_rate": 1.4218682847743482e-07, "loss": 0.42585864, "memory(GiB)": 34.88, "step": 137090, "train_speed(iter/s)": 0.41153 }, { "acc": 0.93692846, "epoch": 3.7119919854872334, "grad_norm": 6.227503299713135, "learning_rate": 1.4205481744850168e-07, "loss": 0.39902654, "memory(GiB)": 34.88, "step": 137095, "train_speed(iter/s)": 0.411531 }, { "acc": 0.94823208, "epoch": 3.712127365770449, "grad_norm": 7.044669151306152, "learning_rate": 1.4192286728114158e-07, "loss": 0.3128859, "memory(GiB)": 34.88, "step": 137100, "train_speed(iter/s)": 0.411532 }, { "acc": 0.96050529, "epoch": 3.712262746053665, "grad_norm": 4.128150939941406, "learning_rate": 1.4179097797700692e-07, "loss": 0.28249195, "memory(GiB)": 34.88, "step": 137105, "train_speed(iter/s)": 0.411532 }, { "acc": 0.95669384, "epoch": 3.7123981263368804, "grad_norm": 12.161382675170898, "learning_rate": 1.4165914953775064e-07, "loss": 0.25082431, "memory(GiB)": 34.88, "step": 137110, "train_speed(iter/s)": 0.411533 }, { "acc": 0.94880257, "epoch": 3.7125335066200957, "grad_norm": 9.505112648010254, "learning_rate": 1.4152738196502347e-07, "loss": 0.3294313, "memory(GiB)": 34.88, "step": 137115, "train_speed(iter/s)": 0.411534 }, { "acc": 0.93539543, "epoch": 3.7126688869033115, "grad_norm": 17.28398895263672, "learning_rate": 1.4139567526047564e-07, "loss": 0.41715374, "memory(GiB)": 34.88, "step": 137120, "train_speed(iter/s)": 0.411534 }, { "acc": 0.95160923, "epoch": 3.712804267186527, "grad_norm": 4.432544231414795, "learning_rate": 1.4126402942575842e-07, "loss": 0.28943274, "memory(GiB)": 34.88, "step": 137125, "train_speed(iter/s)": 0.411535 }, { "acc": 0.95035095, "epoch": 3.7129396474697423, "grad_norm": 6.405579566955566, "learning_rate": 1.4113244446251977e-07, "loss": 0.35922861, "memory(GiB)": 34.88, "step": 137130, "train_speed(iter/s)": 0.411536 }, { "acc": 0.94297438, "epoch": 3.713075027752958, "grad_norm": 8.437220573425293, "learning_rate": 1.4100092037240818e-07, "loss": 0.36505618, "memory(GiB)": 34.88, "step": 137135, "train_speed(iter/s)": 0.411537 }, { "acc": 0.95132942, "epoch": 3.713210408036174, "grad_norm": 13.105172157287598, "learning_rate": 1.4086945715707162e-07, "loss": 0.34706624, "memory(GiB)": 34.88, "step": 137140, "train_speed(iter/s)": 0.411537 }, { "acc": 0.94489365, "epoch": 3.713345788319389, "grad_norm": 5.788237571716309, "learning_rate": 1.40738054818157e-07, "loss": 0.34552236, "memory(GiB)": 34.88, "step": 137145, "train_speed(iter/s)": 0.411538 }, { "acc": 0.95359154, "epoch": 3.7134811686026046, "grad_norm": 7.377135753631592, "learning_rate": 1.4060671335730997e-07, "loss": 0.35624948, "memory(GiB)": 34.88, "step": 137150, "train_speed(iter/s)": 0.411539 }, { "acc": 0.94470921, "epoch": 3.7136165488858204, "grad_norm": 7.277023792266846, "learning_rate": 1.4047543277617578e-07, "loss": 0.3049418, "memory(GiB)": 34.88, "step": 137155, "train_speed(iter/s)": 0.41154 }, { "acc": 0.95540237, "epoch": 3.7137519291690357, "grad_norm": 8.964974403381348, "learning_rate": 1.403442130763996e-07, "loss": 0.31040964, "memory(GiB)": 34.88, "step": 137160, "train_speed(iter/s)": 0.411541 }, { "acc": 0.94789667, "epoch": 3.7138873094522515, "grad_norm": 23.432506561279297, "learning_rate": 1.4021305425962494e-07, "loss": 0.29956934, "memory(GiB)": 34.88, "step": 137165, "train_speed(iter/s)": 0.411542 }, { "acc": 0.94958363, "epoch": 3.714022689735467, "grad_norm": 8.191186904907227, "learning_rate": 1.4008195632749422e-07, "loss": 0.27625704, "memory(GiB)": 34.88, "step": 137170, "train_speed(iter/s)": 0.411542 }, { "acc": 0.94706783, "epoch": 3.7141580700186827, "grad_norm": 4.738046169281006, "learning_rate": 1.399509192816504e-07, "loss": 0.37224774, "memory(GiB)": 34.88, "step": 137175, "train_speed(iter/s)": 0.411543 }, { "acc": 0.9569705, "epoch": 3.714293450301898, "grad_norm": 6.361300945281982, "learning_rate": 1.3981994312373536e-07, "loss": 0.27720423, "memory(GiB)": 34.88, "step": 137180, "train_speed(iter/s)": 0.411544 }, { "acc": 0.9583868, "epoch": 3.7144288305851134, "grad_norm": 4.6573076248168945, "learning_rate": 1.3968902785538868e-07, "loss": 0.2700675, "memory(GiB)": 34.88, "step": 137185, "train_speed(iter/s)": 0.411545 }, { "acc": 0.94513102, "epoch": 3.714564210868329, "grad_norm": 9.76167106628418, "learning_rate": 1.395581734782506e-07, "loss": 0.32318747, "memory(GiB)": 34.88, "step": 137190, "train_speed(iter/s)": 0.411546 }, { "acc": 0.94104824, "epoch": 3.7146995911515446, "grad_norm": 6.283471584320068, "learning_rate": 1.3942737999396076e-07, "loss": 0.380321, "memory(GiB)": 34.88, "step": 137195, "train_speed(iter/s)": 0.411546 }, { "acc": 0.94884529, "epoch": 3.7148349714347604, "grad_norm": 4.134864807128906, "learning_rate": 1.392966474041582e-07, "loss": 0.30774748, "memory(GiB)": 34.88, "step": 137200, "train_speed(iter/s)": 0.411547 }, { "acc": 0.96107121, "epoch": 3.7149703517179757, "grad_norm": 5.537215232849121, "learning_rate": 1.391659757104787e-07, "loss": 0.21510229, "memory(GiB)": 34.88, "step": 137205, "train_speed(iter/s)": 0.411548 }, { "acc": 0.94608603, "epoch": 3.7151057320011915, "grad_norm": 9.919493675231934, "learning_rate": 1.3903536491456132e-07, "loss": 0.32500229, "memory(GiB)": 34.88, "step": 137210, "train_speed(iter/s)": 0.411549 }, { "acc": 0.9500741, "epoch": 3.715241112284407, "grad_norm": 7.033121109008789, "learning_rate": 1.3890481501804183e-07, "loss": 0.29299998, "memory(GiB)": 34.88, "step": 137215, "train_speed(iter/s)": 0.411549 }, { "acc": 0.94509764, "epoch": 3.7153764925676223, "grad_norm": 8.393553733825684, "learning_rate": 1.3877432602255375e-07, "loss": 0.324264, "memory(GiB)": 34.88, "step": 137220, "train_speed(iter/s)": 0.41155 }, { "acc": 0.95328159, "epoch": 3.715511872850838, "grad_norm": 4.64728307723999, "learning_rate": 1.3864389792973338e-07, "loss": 0.2461627, "memory(GiB)": 34.88, "step": 137225, "train_speed(iter/s)": 0.411551 }, { "acc": 0.94603043, "epoch": 3.7156472531340534, "grad_norm": 3.327683687210083, "learning_rate": 1.3851353074121427e-07, "loss": 0.30651829, "memory(GiB)": 34.88, "step": 137230, "train_speed(iter/s)": 0.411552 }, { "acc": 0.95331688, "epoch": 3.715782633417269, "grad_norm": 5.321867942810059, "learning_rate": 1.3838322445862993e-07, "loss": 0.24186671, "memory(GiB)": 34.88, "step": 137235, "train_speed(iter/s)": 0.411552 }, { "acc": 0.96098652, "epoch": 3.7159180137004846, "grad_norm": 3.201446771621704, "learning_rate": 1.3825297908361112e-07, "loss": 0.20525291, "memory(GiB)": 34.88, "step": 137240, "train_speed(iter/s)": 0.411553 }, { "acc": 0.94788532, "epoch": 3.7160533939837004, "grad_norm": 4.182848930358887, "learning_rate": 1.3812279461779136e-07, "loss": 0.30812454, "memory(GiB)": 34.88, "step": 137245, "train_speed(iter/s)": 0.411554 }, { "acc": 0.9448864, "epoch": 3.7161887742669157, "grad_norm": 6.569131374359131, "learning_rate": 1.3799267106280033e-07, "loss": 0.26545458, "memory(GiB)": 34.88, "step": 137250, "train_speed(iter/s)": 0.411555 }, { "acc": 0.9441762, "epoch": 3.716324154550131, "grad_norm": 3.8691742420196533, "learning_rate": 1.378626084202693e-07, "loss": 0.26619215, "memory(GiB)": 34.88, "step": 137255, "train_speed(iter/s)": 0.411556 }, { "acc": 0.94865208, "epoch": 3.716459534833347, "grad_norm": 29.4081974029541, "learning_rate": 1.377326066918257e-07, "loss": 0.27755733, "memory(GiB)": 34.88, "step": 137260, "train_speed(iter/s)": 0.411557 }, { "acc": 0.93953133, "epoch": 3.7165949151165623, "grad_norm": 6.821064472198486, "learning_rate": 1.376026658790992e-07, "loss": 0.34132166, "memory(GiB)": 34.88, "step": 137265, "train_speed(iter/s)": 0.411557 }, { "acc": 0.92299519, "epoch": 3.716730295399778, "grad_norm": 10.717573165893555, "learning_rate": 1.3747278598371774e-07, "loss": 0.4987999, "memory(GiB)": 34.88, "step": 137270, "train_speed(iter/s)": 0.411558 }, { "acc": 0.94876547, "epoch": 3.7168656756829934, "grad_norm": 5.296248912811279, "learning_rate": 1.3734296700730712e-07, "loss": 0.30456898, "memory(GiB)": 34.88, "step": 137275, "train_speed(iter/s)": 0.411559 }, { "acc": 0.93864613, "epoch": 3.717001055966209, "grad_norm": 5.628945827484131, "learning_rate": 1.372132089514953e-07, "loss": 0.3153698, "memory(GiB)": 34.88, "step": 137280, "train_speed(iter/s)": 0.41156 }, { "acc": 0.92835817, "epoch": 3.7171364362494246, "grad_norm": 7.1222734451293945, "learning_rate": 1.3708351181790638e-07, "loss": 0.42027802, "memory(GiB)": 34.88, "step": 137285, "train_speed(iter/s)": 0.411561 }, { "acc": 0.9535882, "epoch": 3.71727181653264, "grad_norm": 3.597683906555176, "learning_rate": 1.369538756081661e-07, "loss": 0.28690042, "memory(GiB)": 34.88, "step": 137290, "train_speed(iter/s)": 0.411561 }, { "acc": 0.945191, "epoch": 3.7174071968158557, "grad_norm": 3.582017421722412, "learning_rate": 1.3682430032389744e-07, "loss": 0.33814721, "memory(GiB)": 34.88, "step": 137295, "train_speed(iter/s)": 0.411562 }, { "acc": 0.94946136, "epoch": 3.7175425770990715, "grad_norm": 16.850358963012695, "learning_rate": 1.3669478596672398e-07, "loss": 0.3475466, "memory(GiB)": 34.88, "step": 137300, "train_speed(iter/s)": 0.411563 }, { "acc": 0.94333868, "epoch": 3.717677957382287, "grad_norm": 11.62907600402832, "learning_rate": 1.3656533253826868e-07, "loss": 0.42344189, "memory(GiB)": 34.88, "step": 137305, "train_speed(iter/s)": 0.411564 }, { "acc": 0.9484108, "epoch": 3.7178133376655023, "grad_norm": 5.234957218170166, "learning_rate": 1.3643594004015176e-07, "loss": 0.29234281, "memory(GiB)": 34.88, "step": 137310, "train_speed(iter/s)": 0.411564 }, { "acc": 0.9601181, "epoch": 3.717948717948718, "grad_norm": 4.909444332122803, "learning_rate": 1.3630660847399618e-07, "loss": 0.26221373, "memory(GiB)": 34.88, "step": 137315, "train_speed(iter/s)": 0.411565 }, { "acc": 0.96305332, "epoch": 3.7180840982319334, "grad_norm": 3.309422254562378, "learning_rate": 1.361773378414205e-07, "loss": 0.17772887, "memory(GiB)": 34.88, "step": 137320, "train_speed(iter/s)": 0.411566 }, { "acc": 0.94193163, "epoch": 3.718219478515149, "grad_norm": 3.4560163021087646, "learning_rate": 1.3604812814404435e-07, "loss": 0.35595553, "memory(GiB)": 34.88, "step": 137325, "train_speed(iter/s)": 0.411567 }, { "acc": 0.94809113, "epoch": 3.7183548587983646, "grad_norm": 6.698103904724121, "learning_rate": 1.359189793834863e-07, "loss": 0.33485839, "memory(GiB)": 34.88, "step": 137330, "train_speed(iter/s)": 0.411568 }, { "acc": 0.92500725, "epoch": 3.7184902390815804, "grad_norm": 9.000263214111328, "learning_rate": 1.357898915613649e-07, "loss": 0.43001204, "memory(GiB)": 34.88, "step": 137335, "train_speed(iter/s)": 0.411568 }, { "acc": 0.94385929, "epoch": 3.7186256193647957, "grad_norm": 14.564131736755371, "learning_rate": 1.35660864679297e-07, "loss": 0.34081981, "memory(GiB)": 34.88, "step": 137340, "train_speed(iter/s)": 0.411569 }, { "acc": 0.94906349, "epoch": 3.718760999648011, "grad_norm": 7.255852699279785, "learning_rate": 1.355318987388978e-07, "loss": 0.30617774, "memory(GiB)": 34.88, "step": 137345, "train_speed(iter/s)": 0.41157 }, { "acc": 0.94842463, "epoch": 3.718896379931227, "grad_norm": 9.349610328674316, "learning_rate": 1.354029937417848e-07, "loss": 0.29741764, "memory(GiB)": 34.88, "step": 137350, "train_speed(iter/s)": 0.411571 }, { "acc": 0.95382805, "epoch": 3.7190317602144423, "grad_norm": 3.1090524196624756, "learning_rate": 1.3527414968957093e-07, "loss": 0.26534545, "memory(GiB)": 34.88, "step": 137355, "train_speed(iter/s)": 0.411571 }, { "acc": 0.94366665, "epoch": 3.719167140497658, "grad_norm": 4.339626789093018, "learning_rate": 1.3514536658387145e-07, "loss": 0.36141925, "memory(GiB)": 34.88, "step": 137360, "train_speed(iter/s)": 0.411572 }, { "acc": 0.95278454, "epoch": 3.7193025207808734, "grad_norm": 2.641700029373169, "learning_rate": 1.3501664442629819e-07, "loss": 0.31095757, "memory(GiB)": 34.88, "step": 137365, "train_speed(iter/s)": 0.411573 }, { "acc": 0.94636698, "epoch": 3.7194379010640892, "grad_norm": 26.90558624267578, "learning_rate": 1.348879832184653e-07, "loss": 0.37960277, "memory(GiB)": 34.88, "step": 137370, "train_speed(iter/s)": 0.411574 }, { "acc": 0.953302, "epoch": 3.7195732813473046, "grad_norm": 4.954983711242676, "learning_rate": 1.3475938296198408e-07, "loss": 0.27822886, "memory(GiB)": 34.88, "step": 137375, "train_speed(iter/s)": 0.411574 }, { "acc": 0.95502071, "epoch": 3.71970866163052, "grad_norm": 5.961387634277344, "learning_rate": 1.3463084365846475e-07, "loss": 0.27077432, "memory(GiB)": 34.88, "step": 137380, "train_speed(iter/s)": 0.411575 }, { "acc": 0.94948864, "epoch": 3.7198440419137357, "grad_norm": 7.36802339553833, "learning_rate": 1.3450236530951867e-07, "loss": 0.33716857, "memory(GiB)": 34.88, "step": 137385, "train_speed(iter/s)": 0.411576 }, { "acc": 0.9481369, "epoch": 3.719979422196951, "grad_norm": 11.05288314819336, "learning_rate": 1.343739479167538e-07, "loss": 0.32623518, "memory(GiB)": 34.88, "step": 137390, "train_speed(iter/s)": 0.411577 }, { "acc": 0.94178238, "epoch": 3.720114802480167, "grad_norm": 12.545991897583008, "learning_rate": 1.3424559148178034e-07, "loss": 0.40260572, "memory(GiB)": 34.88, "step": 137395, "train_speed(iter/s)": 0.411578 }, { "acc": 0.95759468, "epoch": 3.7202501827633823, "grad_norm": 14.803977966308594, "learning_rate": 1.3411729600620467e-07, "loss": 0.23959961, "memory(GiB)": 34.88, "step": 137400, "train_speed(iter/s)": 0.411579 }, { "acc": 0.93938141, "epoch": 3.720385563046598, "grad_norm": 11.134346008300781, "learning_rate": 1.339890614916359e-07, "loss": 0.40741186, "memory(GiB)": 34.88, "step": 137405, "train_speed(iter/s)": 0.41158 }, { "acc": 0.94677458, "epoch": 3.7205209433298134, "grad_norm": 5.595089912414551, "learning_rate": 1.3386088793967863e-07, "loss": 0.32228067, "memory(GiB)": 34.88, "step": 137410, "train_speed(iter/s)": 0.41158 }, { "acc": 0.95472403, "epoch": 3.720656323613029, "grad_norm": 3.557196617126465, "learning_rate": 1.3373277535193925e-07, "loss": 0.32332506, "memory(GiB)": 34.88, "step": 137415, "train_speed(iter/s)": 0.411581 }, { "acc": 0.94239407, "epoch": 3.7207917038962446, "grad_norm": 8.314501762390137, "learning_rate": 1.3360472373002298e-07, "loss": 0.37217107, "memory(GiB)": 34.88, "step": 137420, "train_speed(iter/s)": 0.411582 }, { "acc": 0.94925976, "epoch": 3.72092708417946, "grad_norm": 5.96070671081543, "learning_rate": 1.3347673307553282e-07, "loss": 0.36072092, "memory(GiB)": 34.88, "step": 137425, "train_speed(iter/s)": 0.411583 }, { "acc": 0.95431194, "epoch": 3.7210624644626757, "grad_norm": 6.7204670906066895, "learning_rate": 1.333488033900734e-07, "loss": 0.25057793, "memory(GiB)": 34.88, "step": 137430, "train_speed(iter/s)": 0.411584 }, { "acc": 0.93758507, "epoch": 3.721197844745891, "grad_norm": 9.960238456726074, "learning_rate": 1.3322093467524556e-07, "loss": 0.36626735, "memory(GiB)": 34.88, "step": 137435, "train_speed(iter/s)": 0.411584 }, { "acc": 0.9314641, "epoch": 3.721333225029107, "grad_norm": 5.556309700012207, "learning_rate": 1.3309312693265336e-07, "loss": 0.38134685, "memory(GiB)": 34.88, "step": 137440, "train_speed(iter/s)": 0.411585 }, { "acc": 0.9468565, "epoch": 3.7214686053123223, "grad_norm": 19.184946060180664, "learning_rate": 1.3296538016389593e-07, "loss": 0.30034621, "memory(GiB)": 34.88, "step": 137445, "train_speed(iter/s)": 0.411586 }, { "acc": 0.95013599, "epoch": 3.7216039855955376, "grad_norm": 3.766801357269287, "learning_rate": 1.3283769437057518e-07, "loss": 0.30419948, "memory(GiB)": 34.88, "step": 137450, "train_speed(iter/s)": 0.411587 }, { "acc": 0.95570297, "epoch": 3.7217393658787534, "grad_norm": 3.854623556137085, "learning_rate": 1.3271006955428855e-07, "loss": 0.24381037, "memory(GiB)": 34.88, "step": 137455, "train_speed(iter/s)": 0.411587 }, { "acc": 0.95613689, "epoch": 3.7218747461619692, "grad_norm": 5.076335430145264, "learning_rate": 1.3258250571663683e-07, "loss": 0.26847415, "memory(GiB)": 34.88, "step": 137460, "train_speed(iter/s)": 0.411588 }, { "acc": 0.95914631, "epoch": 3.7220101264451846, "grad_norm": 7.730368614196777, "learning_rate": 1.3245500285921745e-07, "loss": 0.25835397, "memory(GiB)": 34.88, "step": 137465, "train_speed(iter/s)": 0.411589 }, { "acc": 0.94925156, "epoch": 3.7221455067284, "grad_norm": 2.3218443393707275, "learning_rate": 1.323275609836262e-07, "loss": 0.29374261, "memory(GiB)": 34.88, "step": 137470, "train_speed(iter/s)": 0.41159 }, { "acc": 0.96603928, "epoch": 3.7222808870116157, "grad_norm": 3.4294445514678955, "learning_rate": 1.3220018009146166e-07, "loss": 0.2184701, "memory(GiB)": 34.88, "step": 137475, "train_speed(iter/s)": 0.411591 }, { "acc": 0.95180092, "epoch": 3.722416267294831, "grad_norm": 11.929742813110352, "learning_rate": 1.320728601843185e-07, "loss": 0.27676666, "memory(GiB)": 34.88, "step": 137480, "train_speed(iter/s)": 0.411591 }, { "acc": 0.93089085, "epoch": 3.7225516475780465, "grad_norm": 5.555819511413574, "learning_rate": 1.3194560126379194e-07, "loss": 0.43919611, "memory(GiB)": 34.88, "step": 137485, "train_speed(iter/s)": 0.411592 }, { "acc": 0.9576088, "epoch": 3.7226870278612623, "grad_norm": 4.120979309082031, "learning_rate": 1.3181840333147443e-07, "loss": 0.31866741, "memory(GiB)": 34.88, "step": 137490, "train_speed(iter/s)": 0.411593 }, { "acc": 0.94945087, "epoch": 3.722822408144478, "grad_norm": 6.996993064880371, "learning_rate": 1.3169126638896234e-07, "loss": 0.31669455, "memory(GiB)": 34.88, "step": 137495, "train_speed(iter/s)": 0.411594 }, { "acc": 0.94640999, "epoch": 3.7229577884276934, "grad_norm": 7.486442565917969, "learning_rate": 1.3156419043784642e-07, "loss": 0.29366536, "memory(GiB)": 34.88, "step": 137500, "train_speed(iter/s)": 0.411595 }, { "acc": 0.94539576, "epoch": 3.723093168710909, "grad_norm": 10.710503578186035, "learning_rate": 1.3143717547971863e-07, "loss": 0.41806526, "memory(GiB)": 34.88, "step": 137505, "train_speed(iter/s)": 0.411596 }, { "acc": 0.94688416, "epoch": 3.7232285489941246, "grad_norm": 6.807186126708984, "learning_rate": 1.3131022151617082e-07, "loss": 0.32932036, "memory(GiB)": 34.88, "step": 137510, "train_speed(iter/s)": 0.411596 }, { "acc": 0.93796196, "epoch": 3.72336392927734, "grad_norm": 15.728214263916016, "learning_rate": 1.3118332854879266e-07, "loss": 0.4339139, "memory(GiB)": 34.88, "step": 137515, "train_speed(iter/s)": 0.411597 }, { "acc": 0.95371265, "epoch": 3.7234993095605557, "grad_norm": 7.393154621124268, "learning_rate": 1.3105649657917445e-07, "loss": 0.22133374, "memory(GiB)": 34.88, "step": 137520, "train_speed(iter/s)": 0.411598 }, { "acc": 0.94994698, "epoch": 3.723634689843771, "grad_norm": 7.194732666015625, "learning_rate": 1.309297256089036e-07, "loss": 0.33573232, "memory(GiB)": 34.88, "step": 137525, "train_speed(iter/s)": 0.411599 }, { "acc": 0.94945984, "epoch": 3.723770070126987, "grad_norm": 5.555964946746826, "learning_rate": 1.3080301563957034e-07, "loss": 0.32660882, "memory(GiB)": 34.88, "step": 137530, "train_speed(iter/s)": 0.4116 }, { "acc": 0.93820248, "epoch": 3.7239054504102023, "grad_norm": 11.53536605834961, "learning_rate": 1.3067636667275994e-07, "loss": 0.4217598, "memory(GiB)": 34.88, "step": 137535, "train_speed(iter/s)": 0.4116 }, { "acc": 0.93403606, "epoch": 3.7240408306934176, "grad_norm": 11.080972671508789, "learning_rate": 1.3054977871005983e-07, "loss": 0.40194473, "memory(GiB)": 34.88, "step": 137540, "train_speed(iter/s)": 0.411601 }, { "acc": 0.95168514, "epoch": 3.7241762109766334, "grad_norm": 12.858755111694336, "learning_rate": 1.3042325175305585e-07, "loss": 0.27114978, "memory(GiB)": 34.88, "step": 137545, "train_speed(iter/s)": 0.411602 }, { "acc": 0.93554935, "epoch": 3.724311591259849, "grad_norm": 16.53740882873535, "learning_rate": 1.3029678580333262e-07, "loss": 0.3751987, "memory(GiB)": 34.88, "step": 137550, "train_speed(iter/s)": 0.411603 }, { "acc": 0.94808311, "epoch": 3.7244469715430646, "grad_norm": 6.517791271209717, "learning_rate": 1.3017038086247545e-07, "loss": 0.34788465, "memory(GiB)": 34.88, "step": 137555, "train_speed(iter/s)": 0.411603 }, { "acc": 0.94127846, "epoch": 3.72458235182628, "grad_norm": 5.305811882019043, "learning_rate": 1.3004403693206565e-07, "loss": 0.41644588, "memory(GiB)": 34.88, "step": 137560, "train_speed(iter/s)": 0.411604 }, { "acc": 0.94647789, "epoch": 3.7247177321094957, "grad_norm": 6.867119789123535, "learning_rate": 1.2991775401368791e-07, "loss": 0.31509309, "memory(GiB)": 34.88, "step": 137565, "train_speed(iter/s)": 0.411605 }, { "acc": 0.93629131, "epoch": 3.724853112392711, "grad_norm": 3.6117961406707764, "learning_rate": 1.2979153210892304e-07, "loss": 0.39696803, "memory(GiB)": 34.88, "step": 137570, "train_speed(iter/s)": 0.411606 }, { "acc": 0.94006243, "epoch": 3.7249884926759265, "grad_norm": 6.031683444976807, "learning_rate": 1.2966537121935296e-07, "loss": 0.42723336, "memory(GiB)": 34.88, "step": 137575, "train_speed(iter/s)": 0.411607 }, { "acc": 0.9483139, "epoch": 3.7251238729591423, "grad_norm": 8.294395446777344, "learning_rate": 1.2953927134655788e-07, "loss": 0.27681656, "memory(GiB)": 34.88, "step": 137580, "train_speed(iter/s)": 0.411607 }, { "acc": 0.95892105, "epoch": 3.7252592532423576, "grad_norm": 9.849032402038574, "learning_rate": 1.2941323249211748e-07, "loss": 0.2406002, "memory(GiB)": 34.88, "step": 137585, "train_speed(iter/s)": 0.411608 }, { "acc": 0.94827137, "epoch": 3.7253946335255734, "grad_norm": 3.8204421997070312, "learning_rate": 1.292872546576104e-07, "loss": 0.29613783, "memory(GiB)": 34.88, "step": 137590, "train_speed(iter/s)": 0.411609 }, { "acc": 0.95026283, "epoch": 3.725530013808789, "grad_norm": 3.2590181827545166, "learning_rate": 1.2916133784461465e-07, "loss": 0.30564396, "memory(GiB)": 34.88, "step": 137595, "train_speed(iter/s)": 0.41161 }, { "acc": 0.95254841, "epoch": 3.7256653940920046, "grad_norm": 8.001887321472168, "learning_rate": 1.2903548205470818e-07, "loss": 0.27514358, "memory(GiB)": 34.88, "step": 137600, "train_speed(iter/s)": 0.41161 }, { "acc": 0.95386124, "epoch": 3.72580077437522, "grad_norm": 3.769021987915039, "learning_rate": 1.2890968728946635e-07, "loss": 0.27334535, "memory(GiB)": 34.88, "step": 137605, "train_speed(iter/s)": 0.411611 }, { "acc": 0.96122446, "epoch": 3.7259361546584353, "grad_norm": 8.157443046569824, "learning_rate": 1.2878395355046655e-07, "loss": 0.21256421, "memory(GiB)": 34.88, "step": 137610, "train_speed(iter/s)": 0.411612 }, { "acc": 0.93734703, "epoch": 3.726071534941651, "grad_norm": 5.499059200286865, "learning_rate": 1.2865828083928296e-07, "loss": 0.41839857, "memory(GiB)": 34.88, "step": 137615, "train_speed(iter/s)": 0.411613 }, { "acc": 0.94803257, "epoch": 3.726206915224867, "grad_norm": 4.830799102783203, "learning_rate": 1.2853266915749023e-07, "loss": 0.32037566, "memory(GiB)": 34.88, "step": 137620, "train_speed(iter/s)": 0.411614 }, { "acc": 0.94978828, "epoch": 3.7263422955080823, "grad_norm": 7.019702911376953, "learning_rate": 1.2840711850666144e-07, "loss": 0.31848886, "memory(GiB)": 34.88, "step": 137625, "train_speed(iter/s)": 0.411614 }, { "acc": 0.9586935, "epoch": 3.7264776757912976, "grad_norm": 4.785254955291748, "learning_rate": 1.282816288883696e-07, "loss": 0.23935087, "memory(GiB)": 34.88, "step": 137630, "train_speed(iter/s)": 0.411615 }, { "acc": 0.94395571, "epoch": 3.7266130560745134, "grad_norm": 21.726987838745117, "learning_rate": 1.281562003041877e-07, "loss": 0.32788672, "memory(GiB)": 34.88, "step": 137635, "train_speed(iter/s)": 0.411616 }, { "acc": 0.94852915, "epoch": 3.726748436357729, "grad_norm": 2.546828031539917, "learning_rate": 1.2803083275568493e-07, "loss": 0.26198809, "memory(GiB)": 34.88, "step": 137640, "train_speed(iter/s)": 0.411617 }, { "acc": 0.94741325, "epoch": 3.726883816640944, "grad_norm": 10.05369758605957, "learning_rate": 1.279055262444332e-07, "loss": 0.31992202, "memory(GiB)": 34.88, "step": 137645, "train_speed(iter/s)": 0.411618 }, { "acc": 0.95179081, "epoch": 3.72701919692416, "grad_norm": 5.275589942932129, "learning_rate": 1.2778028077200164e-07, "loss": 0.26568689, "memory(GiB)": 34.88, "step": 137650, "train_speed(iter/s)": 0.411618 }, { "acc": 0.93911629, "epoch": 3.7271545772073758, "grad_norm": 11.701592445373535, "learning_rate": 1.2765509633995997e-07, "loss": 0.37904682, "memory(GiB)": 34.88, "step": 137655, "train_speed(iter/s)": 0.411619 }, { "acc": 0.94804764, "epoch": 3.727289957490591, "grad_norm": 8.239204406738281, "learning_rate": 1.2752997294987565e-07, "loss": 0.29162984, "memory(GiB)": 34.88, "step": 137660, "train_speed(iter/s)": 0.41162 }, { "acc": 0.95550938, "epoch": 3.7274253377738065, "grad_norm": 4.298457622528076, "learning_rate": 1.2740491060331614e-07, "loss": 0.28943782, "memory(GiB)": 34.88, "step": 137665, "train_speed(iter/s)": 0.411621 }, { "acc": 0.94620914, "epoch": 3.7275607180570223, "grad_norm": 5.450254440307617, "learning_rate": 1.2727990930184787e-07, "loss": 0.32686663, "memory(GiB)": 34.88, "step": 137670, "train_speed(iter/s)": 0.411622 }, { "acc": 0.94522858, "epoch": 3.7276960983402376, "grad_norm": 5.920205116271973, "learning_rate": 1.2715496904703828e-07, "loss": 0.39974833, "memory(GiB)": 34.88, "step": 137675, "train_speed(iter/s)": 0.411622 }, { "acc": 0.94813251, "epoch": 3.7278314786234534, "grad_norm": 6.291749477386475, "learning_rate": 1.2703008984045036e-07, "loss": 0.34852476, "memory(GiB)": 34.88, "step": 137680, "train_speed(iter/s)": 0.411623 }, { "acc": 0.93192444, "epoch": 3.727966858906669, "grad_norm": 12.180598258972168, "learning_rate": 1.2690527168364946e-07, "loss": 0.45573101, "memory(GiB)": 34.88, "step": 137685, "train_speed(iter/s)": 0.411624 }, { "acc": 0.94052334, "epoch": 3.7281022391898846, "grad_norm": 25.14789581298828, "learning_rate": 1.2678051457819966e-07, "loss": 0.39557071, "memory(GiB)": 34.88, "step": 137690, "train_speed(iter/s)": 0.411625 }, { "acc": 0.95128899, "epoch": 3.7282376194731, "grad_norm": 6.098106861114502, "learning_rate": 1.2665581852566237e-07, "loss": 0.31436648, "memory(GiB)": 34.88, "step": 137695, "train_speed(iter/s)": 0.411625 }, { "acc": 0.94215946, "epoch": 3.7283729997563153, "grad_norm": 6.401776313781738, "learning_rate": 1.2653118352760116e-07, "loss": 0.32994173, "memory(GiB)": 34.88, "step": 137700, "train_speed(iter/s)": 0.411626 }, { "acc": 0.968853, "epoch": 3.728508380039531, "grad_norm": 6.791891098022461, "learning_rate": 1.2640660958557628e-07, "loss": 0.19365308, "memory(GiB)": 34.88, "step": 137705, "train_speed(iter/s)": 0.411627 }, { "acc": 0.94399681, "epoch": 3.7286437603227465, "grad_norm": 7.716188430786133, "learning_rate": 1.2628209670114915e-07, "loss": 0.36710052, "memory(GiB)": 34.88, "step": 137710, "train_speed(iter/s)": 0.411628 }, { "acc": 0.94635134, "epoch": 3.7287791406059623, "grad_norm": 5.339968204498291, "learning_rate": 1.2615764487587888e-07, "loss": 0.29270587, "memory(GiB)": 34.88, "step": 137715, "train_speed(iter/s)": 0.411629 }, { "acc": 0.94835567, "epoch": 3.7289145208891776, "grad_norm": 6.8070783615112305, "learning_rate": 1.2603325411132464e-07, "loss": 0.36641679, "memory(GiB)": 34.88, "step": 137720, "train_speed(iter/s)": 0.411629 }, { "acc": 0.93163204, "epoch": 3.7290499011723934, "grad_norm": 5.57439661026001, "learning_rate": 1.2590892440904503e-07, "loss": 0.44758863, "memory(GiB)": 34.88, "step": 137725, "train_speed(iter/s)": 0.41163 }, { "acc": 0.94664669, "epoch": 3.729185281455609, "grad_norm": 5.8502397537231445, "learning_rate": 1.25784655770597e-07, "loss": 0.31708875, "memory(GiB)": 34.88, "step": 137730, "train_speed(iter/s)": 0.411631 }, { "acc": 0.96317406, "epoch": 3.729320661738824, "grad_norm": 8.393311500549316, "learning_rate": 1.256604481975374e-07, "loss": 0.27911563, "memory(GiB)": 34.88, "step": 137735, "train_speed(iter/s)": 0.411632 }, { "acc": 0.9565793, "epoch": 3.72945604202204, "grad_norm": 3.3143043518066406, "learning_rate": 1.2553630169142217e-07, "loss": 0.30440612, "memory(GiB)": 34.88, "step": 137740, "train_speed(iter/s)": 0.411633 }, { "acc": 0.94358387, "epoch": 3.7295914223052553, "grad_norm": 10.708890914916992, "learning_rate": 1.2541221625380704e-07, "loss": 0.39009185, "memory(GiB)": 34.88, "step": 137745, "train_speed(iter/s)": 0.411633 }, { "acc": 0.95583572, "epoch": 3.729726802588471, "grad_norm": 8.198981285095215, "learning_rate": 1.252881918862451e-07, "loss": 0.224154, "memory(GiB)": 34.88, "step": 137750, "train_speed(iter/s)": 0.411634 }, { "acc": 0.95513268, "epoch": 3.7298621828716865, "grad_norm": 4.167795181274414, "learning_rate": 1.2516422859029104e-07, "loss": 0.25080087, "memory(GiB)": 34.88, "step": 137755, "train_speed(iter/s)": 0.411635 }, { "acc": 0.95925045, "epoch": 3.7299975631549023, "grad_norm": 3.5708072185516357, "learning_rate": 1.2504032636749793e-07, "loss": 0.29306471, "memory(GiB)": 34.88, "step": 137760, "train_speed(iter/s)": 0.411636 }, { "acc": 0.94512205, "epoch": 3.7301329434381176, "grad_norm": 12.122079849243164, "learning_rate": 1.2491648521941715e-07, "loss": 0.39114406, "memory(GiB)": 34.88, "step": 137765, "train_speed(iter/s)": 0.411637 }, { "acc": 0.95331173, "epoch": 3.730268323721333, "grad_norm": 3.8811583518981934, "learning_rate": 1.247927051476006e-07, "loss": 0.25406895, "memory(GiB)": 34.88, "step": 137770, "train_speed(iter/s)": 0.411637 }, { "acc": 0.94850922, "epoch": 3.730403704004549, "grad_norm": 9.376418113708496, "learning_rate": 1.246689861535986e-07, "loss": 0.33180356, "memory(GiB)": 34.88, "step": 137775, "train_speed(iter/s)": 0.411638 }, { "acc": 0.94623327, "epoch": 3.7305390842877646, "grad_norm": 8.222868919372559, "learning_rate": 1.245453282389614e-07, "loss": 0.31150861, "memory(GiB)": 34.88, "step": 137780, "train_speed(iter/s)": 0.411639 }, { "acc": 0.94737778, "epoch": 3.73067446457098, "grad_norm": 16.3811092376709, "learning_rate": 1.2442173140523703e-07, "loss": 0.36144211, "memory(GiB)": 34.88, "step": 137785, "train_speed(iter/s)": 0.41164 }, { "acc": 0.94610138, "epoch": 3.7308098448541953, "grad_norm": 5.282751083374023, "learning_rate": 1.2429819565397467e-07, "loss": 0.33052344, "memory(GiB)": 34.88, "step": 137790, "train_speed(iter/s)": 0.411641 }, { "acc": 0.9403018, "epoch": 3.730945225137411, "grad_norm": 6.903412342071533, "learning_rate": 1.2417472098672184e-07, "loss": 0.40223022, "memory(GiB)": 34.88, "step": 137795, "train_speed(iter/s)": 0.411642 }, { "acc": 0.96110916, "epoch": 3.7310806054206265, "grad_norm": 5.695804595947266, "learning_rate": 1.2405130740502488e-07, "loss": 0.22531056, "memory(GiB)": 34.88, "step": 137800, "train_speed(iter/s)": 0.411642 }, { "acc": 0.95489082, "epoch": 3.731215985703842, "grad_norm": 7.4210076332092285, "learning_rate": 1.239279549104302e-07, "loss": 0.23879833, "memory(GiB)": 34.88, "step": 137805, "train_speed(iter/s)": 0.411643 }, { "acc": 0.95296364, "epoch": 3.7313513659870576, "grad_norm": 6.872493267059326, "learning_rate": 1.2380466350448254e-07, "loss": 0.25906467, "memory(GiB)": 34.88, "step": 137810, "train_speed(iter/s)": 0.411644 }, { "acc": 0.95339203, "epoch": 3.7314867462702734, "grad_norm": 6.2330121994018555, "learning_rate": 1.2368143318872716e-07, "loss": 0.25860062, "memory(GiB)": 34.88, "step": 137815, "train_speed(iter/s)": 0.411645 }, { "acc": 0.956458, "epoch": 3.731622126553489, "grad_norm": 8.705415725708008, "learning_rate": 1.2355826396470652e-07, "loss": 0.24664166, "memory(GiB)": 34.88, "step": 137820, "train_speed(iter/s)": 0.411646 }, { "acc": 0.95741816, "epoch": 3.731757506836704, "grad_norm": 8.66004467010498, "learning_rate": 1.2343515583396487e-07, "loss": 0.23388419, "memory(GiB)": 34.88, "step": 137825, "train_speed(iter/s)": 0.411646 }, { "acc": 0.94581451, "epoch": 3.73189288711992, "grad_norm": 4.498953819274902, "learning_rate": 1.2331210879804353e-07, "loss": 0.31733029, "memory(GiB)": 34.88, "step": 137830, "train_speed(iter/s)": 0.411647 }, { "acc": 0.95016384, "epoch": 3.7320282674031353, "grad_norm": 5.0629777908325195, "learning_rate": 1.2318912285848446e-07, "loss": 0.32796021, "memory(GiB)": 34.88, "step": 137835, "train_speed(iter/s)": 0.411648 }, { "acc": 0.9618371, "epoch": 3.732163647686351, "grad_norm": 4.976245403289795, "learning_rate": 1.2306619801682795e-07, "loss": 0.22277071, "memory(GiB)": 34.88, "step": 137840, "train_speed(iter/s)": 0.411649 }, { "acc": 0.94574776, "epoch": 3.7322990279695665, "grad_norm": 4.845256805419922, "learning_rate": 1.2294333427461373e-07, "loss": 0.30508263, "memory(GiB)": 34.88, "step": 137845, "train_speed(iter/s)": 0.41165 }, { "acc": 0.92464371, "epoch": 3.7324344082527823, "grad_norm": 13.70408821105957, "learning_rate": 1.2282053163338154e-07, "loss": 0.47927055, "memory(GiB)": 34.88, "step": 137850, "train_speed(iter/s)": 0.41165 }, { "acc": 0.94451714, "epoch": 3.7325697885359976, "grad_norm": 5.784082889556885, "learning_rate": 1.2269779009466886e-07, "loss": 0.33082252, "memory(GiB)": 34.88, "step": 137855, "train_speed(iter/s)": 0.411651 }, { "acc": 0.96841316, "epoch": 3.732705168819213, "grad_norm": 4.330078125, "learning_rate": 1.2257510966001373e-07, "loss": 0.18879905, "memory(GiB)": 34.88, "step": 137860, "train_speed(iter/s)": 0.411652 }, { "acc": 0.94774075, "epoch": 3.732840549102429, "grad_norm": 4.368444442749023, "learning_rate": 1.2245249033095313e-07, "loss": 0.28507781, "memory(GiB)": 34.88, "step": 137865, "train_speed(iter/s)": 0.411653 }, { "acc": 0.93614902, "epoch": 3.732975929385644, "grad_norm": 5.3606276512146, "learning_rate": 1.2232993210902347e-07, "loss": 0.42950335, "memory(GiB)": 34.88, "step": 137870, "train_speed(iter/s)": 0.411654 }, { "acc": 0.9593214, "epoch": 3.73311130966886, "grad_norm": 6.371967315673828, "learning_rate": 1.2220743499575834e-07, "loss": 0.247579, "memory(GiB)": 34.88, "step": 137875, "train_speed(iter/s)": 0.411655 }, { "acc": 0.9299489, "epoch": 3.7332466899520753, "grad_norm": 9.158716201782227, "learning_rate": 1.2208499899269415e-07, "loss": 0.44053001, "memory(GiB)": 34.88, "step": 137880, "train_speed(iter/s)": 0.411655 }, { "acc": 0.94497108, "epoch": 3.733382070235291, "grad_norm": 6.357004642486572, "learning_rate": 1.2196262410136393e-07, "loss": 0.37467484, "memory(GiB)": 34.88, "step": 137885, "train_speed(iter/s)": 0.411656 }, { "acc": 0.93743334, "epoch": 3.7335174505185065, "grad_norm": 17.160884857177734, "learning_rate": 1.2184031032330083e-07, "loss": 0.40081825, "memory(GiB)": 34.88, "step": 137890, "train_speed(iter/s)": 0.411657 }, { "acc": 0.94430084, "epoch": 3.733652830801722, "grad_norm": 3.0957179069519043, "learning_rate": 1.2171805766003616e-07, "loss": 0.36658175, "memory(GiB)": 34.88, "step": 137895, "train_speed(iter/s)": 0.411658 }, { "acc": 0.94613094, "epoch": 3.7337882110849376, "grad_norm": 3.4660089015960693, "learning_rate": 1.2159586611310245e-07, "loss": 0.30634627, "memory(GiB)": 34.88, "step": 137900, "train_speed(iter/s)": 0.411659 }, { "acc": 0.95654507, "epoch": 3.733923591368153, "grad_norm": 8.60897159576416, "learning_rate": 1.2147373568403057e-07, "loss": 0.23550484, "memory(GiB)": 34.88, "step": 137905, "train_speed(iter/s)": 0.411659 }, { "acc": 0.93854065, "epoch": 3.734058971651369, "grad_norm": 7.318507194519043, "learning_rate": 1.2135166637434859e-07, "loss": 0.39281766, "memory(GiB)": 34.88, "step": 137910, "train_speed(iter/s)": 0.41166 }, { "acc": 0.96097031, "epoch": 3.734194351934584, "grad_norm": 9.247979164123535, "learning_rate": 1.2122965818558845e-07, "loss": 0.22420163, "memory(GiB)": 34.88, "step": 137915, "train_speed(iter/s)": 0.411661 }, { "acc": 0.9432127, "epoch": 3.7343297322178, "grad_norm": 10.513540267944336, "learning_rate": 1.2110771111927654e-07, "loss": 0.32990053, "memory(GiB)": 34.88, "step": 137920, "train_speed(iter/s)": 0.411662 }, { "acc": 0.94950752, "epoch": 3.7344651125010153, "grad_norm": 5.8733954429626465, "learning_rate": 1.2098582517694094e-07, "loss": 0.29020486, "memory(GiB)": 34.88, "step": 137925, "train_speed(iter/s)": 0.411663 }, { "acc": 0.94852962, "epoch": 3.7346004927842307, "grad_norm": 4.895458698272705, "learning_rate": 1.2086400036010864e-07, "loss": 0.33633235, "memory(GiB)": 34.88, "step": 137930, "train_speed(iter/s)": 0.411663 }, { "acc": 0.9670433, "epoch": 3.7347358730674465, "grad_norm": 3.197505474090576, "learning_rate": 1.2074223667030544e-07, "loss": 0.17875283, "memory(GiB)": 34.88, "step": 137935, "train_speed(iter/s)": 0.411664 }, { "acc": 0.95994244, "epoch": 3.7348712533506623, "grad_norm": 3.828294277191162, "learning_rate": 1.2062053410905778e-07, "loss": 0.20629683, "memory(GiB)": 34.88, "step": 137940, "train_speed(iter/s)": 0.411665 }, { "acc": 0.95517168, "epoch": 3.7350066336338776, "grad_norm": 9.938660621643066, "learning_rate": 1.2049889267788817e-07, "loss": 0.2460887, "memory(GiB)": 34.88, "step": 137945, "train_speed(iter/s)": 0.411666 }, { "acc": 0.95228424, "epoch": 3.735142013917093, "grad_norm": 4.087471961975098, "learning_rate": 1.2037731237832302e-07, "loss": 0.27093499, "memory(GiB)": 34.88, "step": 137950, "train_speed(iter/s)": 0.411666 }, { "acc": 0.94417334, "epoch": 3.735277394200309, "grad_norm": 8.983922958374023, "learning_rate": 1.202557932118826e-07, "loss": 0.30864534, "memory(GiB)": 34.88, "step": 137955, "train_speed(iter/s)": 0.411667 }, { "acc": 0.96312819, "epoch": 3.735412774483524, "grad_norm": 6.359953880310059, "learning_rate": 1.2013433518009165e-07, "loss": 0.24242079, "memory(GiB)": 34.88, "step": 137960, "train_speed(iter/s)": 0.411668 }, { "acc": 0.94681358, "epoch": 3.7355481547667395, "grad_norm": 4.4221601486206055, "learning_rate": 1.2001293828446994e-07, "loss": 0.31202841, "memory(GiB)": 34.88, "step": 137965, "train_speed(iter/s)": 0.411669 }, { "acc": 0.95431499, "epoch": 3.7356835350499553, "grad_norm": 8.659242630004883, "learning_rate": 1.1989160252653888e-07, "loss": 0.30026727, "memory(GiB)": 34.88, "step": 137970, "train_speed(iter/s)": 0.411669 }, { "acc": 0.94751263, "epoch": 3.735818915333171, "grad_norm": 5.358920097351074, "learning_rate": 1.1977032790781877e-07, "loss": 0.28453362, "memory(GiB)": 34.88, "step": 137975, "train_speed(iter/s)": 0.41167 }, { "acc": 0.94970608, "epoch": 3.7359542956163865, "grad_norm": 11.07781982421875, "learning_rate": 1.1964911442982764e-07, "loss": 0.26904483, "memory(GiB)": 34.88, "step": 137980, "train_speed(iter/s)": 0.411671 }, { "acc": 0.95313282, "epoch": 3.736089675899602, "grad_norm": 15.851373672485352, "learning_rate": 1.195279620940853e-07, "loss": 0.25207286, "memory(GiB)": 34.88, "step": 137985, "train_speed(iter/s)": 0.411671 }, { "acc": 0.95215673, "epoch": 3.7362250561828176, "grad_norm": 5.494853496551514, "learning_rate": 1.1940687090210868e-07, "loss": 0.27857995, "memory(GiB)": 34.88, "step": 137990, "train_speed(iter/s)": 0.411672 }, { "acc": 0.94540081, "epoch": 3.736360436466033, "grad_norm": 33.404396057128906, "learning_rate": 1.1928584085541474e-07, "loss": 0.33321123, "memory(GiB)": 34.88, "step": 137995, "train_speed(iter/s)": 0.411673 }, { "acc": 0.94855871, "epoch": 3.736495816749249, "grad_norm": 9.280264854431152, "learning_rate": 1.1916487195551937e-07, "loss": 0.3419946, "memory(GiB)": 34.88, "step": 138000, "train_speed(iter/s)": 0.411674 }, { "acc": 0.94943676, "epoch": 3.736631197032464, "grad_norm": 8.488593101501465, "learning_rate": 1.1904396420393894e-07, "loss": 0.34569159, "memory(GiB)": 34.88, "step": 138005, "train_speed(iter/s)": 0.411674 }, { "acc": 0.94971485, "epoch": 3.73676657731568, "grad_norm": 4.408711910247803, "learning_rate": 1.1892311760218768e-07, "loss": 0.29753714, "memory(GiB)": 34.88, "step": 138010, "train_speed(iter/s)": 0.411675 }, { "acc": 0.95884724, "epoch": 3.7369019575988953, "grad_norm": 3.965729236602783, "learning_rate": 1.1880233215177866e-07, "loss": 0.24142222, "memory(GiB)": 34.88, "step": 138015, "train_speed(iter/s)": 0.411676 }, { "acc": 0.95459023, "epoch": 3.7370373378821107, "grad_norm": 7.303603172302246, "learning_rate": 1.1868160785422498e-07, "loss": 0.31064487, "memory(GiB)": 34.88, "step": 138020, "train_speed(iter/s)": 0.411677 }, { "acc": 0.94580994, "epoch": 3.7371727181653265, "grad_norm": 7.006532669067383, "learning_rate": 1.185609447110397e-07, "loss": 0.33783083, "memory(GiB)": 34.88, "step": 138025, "train_speed(iter/s)": 0.411678 }, { "acc": 0.94768524, "epoch": 3.737308098448542, "grad_norm": 8.486210823059082, "learning_rate": 1.1844034272373479e-07, "loss": 0.36204581, "memory(GiB)": 34.88, "step": 138030, "train_speed(iter/s)": 0.411678 }, { "acc": 0.94995251, "epoch": 3.7374434787317576, "grad_norm": 6.823775291442871, "learning_rate": 1.183198018938189e-07, "loss": 0.29192004, "memory(GiB)": 34.88, "step": 138035, "train_speed(iter/s)": 0.411679 }, { "acc": 0.9596777, "epoch": 3.737578859014973, "grad_norm": 4.716732978820801, "learning_rate": 1.1819932222280512e-07, "loss": 0.287116, "memory(GiB)": 34.88, "step": 138040, "train_speed(iter/s)": 0.41168 }, { "acc": 0.94310522, "epoch": 3.737714239298189, "grad_norm": 7.5149946212768555, "learning_rate": 1.1807890371219986e-07, "loss": 0.33002257, "memory(GiB)": 34.88, "step": 138045, "train_speed(iter/s)": 0.411681 }, { "acc": 0.96219177, "epoch": 3.737849619581404, "grad_norm": 3.9657483100891113, "learning_rate": 1.1795854636351344e-07, "loss": 0.2470824, "memory(GiB)": 34.88, "step": 138050, "train_speed(iter/s)": 0.411681 }, { "acc": 0.94395065, "epoch": 3.7379849998646195, "grad_norm": 7.110601902008057, "learning_rate": 1.1783825017825284e-07, "loss": 0.35369041, "memory(GiB)": 34.88, "step": 138055, "train_speed(iter/s)": 0.411682 }, { "acc": 0.94200611, "epoch": 3.7381203801478353, "grad_norm": 10.20274829864502, "learning_rate": 1.1771801515792445e-07, "loss": 0.32587588, "memory(GiB)": 34.88, "step": 138060, "train_speed(iter/s)": 0.411683 }, { "acc": 0.93632574, "epoch": 3.7382557604310507, "grad_norm": 10.2781982421875, "learning_rate": 1.1759784130403583e-07, "loss": 0.37665086, "memory(GiB)": 34.88, "step": 138065, "train_speed(iter/s)": 0.411684 }, { "acc": 0.94572096, "epoch": 3.7383911407142665, "grad_norm": 3.2074718475341797, "learning_rate": 1.1747772861809063e-07, "loss": 0.30183821, "memory(GiB)": 34.88, "step": 138070, "train_speed(iter/s)": 0.411684 }, { "acc": 0.93587742, "epoch": 3.738526520997482, "grad_norm": 10.846482276916504, "learning_rate": 1.1735767710159525e-07, "loss": 0.3865001, "memory(GiB)": 34.88, "step": 138075, "train_speed(iter/s)": 0.411685 }, { "acc": 0.95123196, "epoch": 3.7386619012806976, "grad_norm": 23.135971069335938, "learning_rate": 1.172376867560528e-07, "loss": 0.28746848, "memory(GiB)": 34.88, "step": 138080, "train_speed(iter/s)": 0.411686 }, { "acc": 0.9486228, "epoch": 3.738797281563913, "grad_norm": 5.456447124481201, "learning_rate": 1.1711775758296693e-07, "loss": 0.33376172, "memory(GiB)": 34.88, "step": 138085, "train_speed(iter/s)": 0.411687 }, { "acc": 0.94887428, "epoch": 3.7389326618471284, "grad_norm": 10.859280586242676, "learning_rate": 1.1699788958383794e-07, "loss": 0.37607009, "memory(GiB)": 34.88, "step": 138090, "train_speed(iter/s)": 0.411688 }, { "acc": 0.94648304, "epoch": 3.739068042130344, "grad_norm": 13.539793014526367, "learning_rate": 1.1687808276017005e-07, "loss": 0.39976974, "memory(GiB)": 34.88, "step": 138095, "train_speed(iter/s)": 0.411688 }, { "acc": 0.94194565, "epoch": 3.73920342241356, "grad_norm": 3.7666072845458984, "learning_rate": 1.1675833711346354e-07, "loss": 0.34260573, "memory(GiB)": 34.88, "step": 138100, "train_speed(iter/s)": 0.411689 }, { "acc": 0.95148277, "epoch": 3.7393388026967753, "grad_norm": 12.893589973449707, "learning_rate": 1.1663865264521655e-07, "loss": 0.32498736, "memory(GiB)": 34.88, "step": 138105, "train_speed(iter/s)": 0.41169 }, { "acc": 0.94305315, "epoch": 3.7394741829799907, "grad_norm": 18.16490936279297, "learning_rate": 1.1651902935693048e-07, "loss": 0.40262976, "memory(GiB)": 34.88, "step": 138110, "train_speed(iter/s)": 0.411691 }, { "acc": 0.95673714, "epoch": 3.7396095632632065, "grad_norm": 2.4555463790893555, "learning_rate": 1.1639946725010288e-07, "loss": 0.28498194, "memory(GiB)": 34.88, "step": 138115, "train_speed(iter/s)": 0.411691 }, { "acc": 0.94631176, "epoch": 3.739744943546422, "grad_norm": 9.965506553649902, "learning_rate": 1.1627996632623187e-07, "loss": 0.33730648, "memory(GiB)": 34.88, "step": 138120, "train_speed(iter/s)": 0.411692 }, { "acc": 0.94982166, "epoch": 3.739880323829637, "grad_norm": 3.9645535945892334, "learning_rate": 1.1616052658681327e-07, "loss": 0.31536469, "memory(GiB)": 34.88, "step": 138125, "train_speed(iter/s)": 0.411693 }, { "acc": 0.9417078, "epoch": 3.740015704112853, "grad_norm": 16.52392578125, "learning_rate": 1.1604114803334578e-07, "loss": 0.35479305, "memory(GiB)": 34.88, "step": 138130, "train_speed(iter/s)": 0.411694 }, { "acc": 0.94397116, "epoch": 3.740151084396069, "grad_norm": 4.4471235275268555, "learning_rate": 1.1592183066732304e-07, "loss": 0.34996371, "memory(GiB)": 34.88, "step": 138135, "train_speed(iter/s)": 0.411695 }, { "acc": 0.94855461, "epoch": 3.740286464679284, "grad_norm": 5.5705060958862305, "learning_rate": 1.1580257449023926e-07, "loss": 0.30657861, "memory(GiB)": 34.88, "step": 138140, "train_speed(iter/s)": 0.411695 }, { "acc": 0.95567188, "epoch": 3.7404218449624995, "grad_norm": 6.730690956115723, "learning_rate": 1.1568337950358974e-07, "loss": 0.27122972, "memory(GiB)": 34.88, "step": 138145, "train_speed(iter/s)": 0.411696 }, { "acc": 0.94718151, "epoch": 3.7405572252457153, "grad_norm": 15.868496894836426, "learning_rate": 1.1556424570886651e-07, "loss": 0.29096186, "memory(GiB)": 34.88, "step": 138150, "train_speed(iter/s)": 0.411697 }, { "acc": 0.95953064, "epoch": 3.7406926055289307, "grad_norm": 8.66899299621582, "learning_rate": 1.154451731075632e-07, "loss": 0.24807389, "memory(GiB)": 34.88, "step": 138155, "train_speed(iter/s)": 0.411698 }, { "acc": 0.95600243, "epoch": 3.7408279858121465, "grad_norm": 6.589353561401367, "learning_rate": 1.1532616170117014e-07, "loss": 0.26502616, "memory(GiB)": 34.88, "step": 138160, "train_speed(iter/s)": 0.411698 }, { "acc": 0.94787455, "epoch": 3.740963366095362, "grad_norm": 15.149564743041992, "learning_rate": 1.1520721149117932e-07, "loss": 0.34017372, "memory(GiB)": 34.88, "step": 138165, "train_speed(iter/s)": 0.411699 }, { "acc": 0.94064398, "epoch": 3.7410987463785776, "grad_norm": 4.217560291290283, "learning_rate": 1.150883224790794e-07, "loss": 0.3750576, "memory(GiB)": 34.88, "step": 138170, "train_speed(iter/s)": 0.4117 }, { "acc": 0.94175282, "epoch": 3.741234126661793, "grad_norm": 6.113550662994385, "learning_rate": 1.1496949466636128e-07, "loss": 0.31251242, "memory(GiB)": 34.88, "step": 138175, "train_speed(iter/s)": 0.411701 }, { "acc": 0.94820366, "epoch": 3.7413695069450084, "grad_norm": 15.61559772491455, "learning_rate": 1.1485072805451302e-07, "loss": 0.29487958, "memory(GiB)": 34.88, "step": 138180, "train_speed(iter/s)": 0.411702 }, { "acc": 0.95529499, "epoch": 3.741504887228224, "grad_norm": 5.9664764404296875, "learning_rate": 1.1473202264502111e-07, "loss": 0.25862443, "memory(GiB)": 34.88, "step": 138185, "train_speed(iter/s)": 0.411702 }, { "acc": 0.94303474, "epoch": 3.7416402675114395, "grad_norm": 11.267378807067871, "learning_rate": 1.1461337843937473e-07, "loss": 0.3554944, "memory(GiB)": 34.88, "step": 138190, "train_speed(iter/s)": 0.411703 }, { "acc": 0.95069017, "epoch": 3.7417756477946553, "grad_norm": 8.542159080505371, "learning_rate": 1.1449479543905814e-07, "loss": 0.31377819, "memory(GiB)": 34.88, "step": 138195, "train_speed(iter/s)": 0.411704 }, { "acc": 0.95898094, "epoch": 3.7419110280778707, "grad_norm": 6.782224655151367, "learning_rate": 1.1437627364555828e-07, "loss": 0.2478436, "memory(GiB)": 34.88, "step": 138200, "train_speed(iter/s)": 0.411705 }, { "acc": 0.95118637, "epoch": 3.7420464083610865, "grad_norm": 5.6478271484375, "learning_rate": 1.142578130603594e-07, "loss": 0.30911536, "memory(GiB)": 34.88, "step": 138205, "train_speed(iter/s)": 0.411705 }, { "acc": 0.95238895, "epoch": 3.742181788644302, "grad_norm": 6.03248405456543, "learning_rate": 1.1413941368494462e-07, "loss": 0.29807696, "memory(GiB)": 34.88, "step": 138210, "train_speed(iter/s)": 0.411706 }, { "acc": 0.94980526, "epoch": 3.742317168927517, "grad_norm": 3.9126696586608887, "learning_rate": 1.1402107552079869e-07, "loss": 0.29317963, "memory(GiB)": 34.88, "step": 138215, "train_speed(iter/s)": 0.411707 }, { "acc": 0.95366058, "epoch": 3.742452549210733, "grad_norm": 22.437843322753906, "learning_rate": 1.1390279856940308e-07, "loss": 0.27286763, "memory(GiB)": 34.88, "step": 138220, "train_speed(iter/s)": 0.411708 }, { "acc": 0.94116764, "epoch": 3.7425879294939484, "grad_norm": 4.2025675773620605, "learning_rate": 1.1378458283223976e-07, "loss": 0.36786356, "memory(GiB)": 34.88, "step": 138225, "train_speed(iter/s)": 0.411709 }, { "acc": 0.94549179, "epoch": 3.742723309777164, "grad_norm": 13.626083374023438, "learning_rate": 1.1366642831078854e-07, "loss": 0.28955622, "memory(GiB)": 34.88, "step": 138230, "train_speed(iter/s)": 0.411709 }, { "acc": 0.92808838, "epoch": 3.7428586900603795, "grad_norm": 4.33790397644043, "learning_rate": 1.1354833500653084e-07, "loss": 0.44971294, "memory(GiB)": 34.88, "step": 138235, "train_speed(iter/s)": 0.41171 }, { "acc": 0.94525642, "epoch": 3.7429940703435953, "grad_norm": 7.43603515625, "learning_rate": 1.1343030292094589e-07, "loss": 0.32880578, "memory(GiB)": 34.88, "step": 138240, "train_speed(iter/s)": 0.411711 }, { "acc": 0.95589104, "epoch": 3.7431294506268107, "grad_norm": 2.7705507278442383, "learning_rate": 1.1331233205551127e-07, "loss": 0.25536511, "memory(GiB)": 34.88, "step": 138245, "train_speed(iter/s)": 0.411712 }, { "acc": 0.93919792, "epoch": 3.743264830910026, "grad_norm": 4.939499378204346, "learning_rate": 1.1319442241170618e-07, "loss": 0.3430567, "memory(GiB)": 34.88, "step": 138250, "train_speed(iter/s)": 0.411712 }, { "acc": 0.95472136, "epoch": 3.743400211193242, "grad_norm": 6.576025485992432, "learning_rate": 1.1307657399100707e-07, "loss": 0.25770342, "memory(GiB)": 34.88, "step": 138255, "train_speed(iter/s)": 0.411713 }, { "acc": 0.94029636, "epoch": 3.743535591476457, "grad_norm": 10.461800575256348, "learning_rate": 1.1295878679488984e-07, "loss": 0.34112735, "memory(GiB)": 34.88, "step": 138260, "train_speed(iter/s)": 0.411714 }, { "acc": 0.95573063, "epoch": 3.743670971759673, "grad_norm": 5.384681224822998, "learning_rate": 1.1284106082482984e-07, "loss": 0.22367694, "memory(GiB)": 34.88, "step": 138265, "train_speed(iter/s)": 0.411715 }, { "acc": 0.93145313, "epoch": 3.7438063520428884, "grad_norm": 7.879177093505859, "learning_rate": 1.1272339608230294e-07, "loss": 0.40917435, "memory(GiB)": 34.88, "step": 138270, "train_speed(iter/s)": 0.411715 }, { "acc": 0.96164236, "epoch": 3.743941732326104, "grad_norm": 10.44852352142334, "learning_rate": 1.1260579256878228e-07, "loss": 0.26212745, "memory(GiB)": 34.88, "step": 138275, "train_speed(iter/s)": 0.411716 }, { "acc": 0.93971157, "epoch": 3.7440771126093195, "grad_norm": 5.1899566650390625, "learning_rate": 1.1248825028574098e-07, "loss": 0.2887888, "memory(GiB)": 34.88, "step": 138280, "train_speed(iter/s)": 0.411717 }, { "acc": 0.94153557, "epoch": 3.744212492892535, "grad_norm": 6.850107669830322, "learning_rate": 1.1237076923465214e-07, "loss": 0.37069321, "memory(GiB)": 34.88, "step": 138285, "train_speed(iter/s)": 0.411717 }, { "acc": 0.93785543, "epoch": 3.7443478731757507, "grad_norm": 10.521934509277344, "learning_rate": 1.1225334941698721e-07, "loss": 0.39505668, "memory(GiB)": 34.88, "step": 138290, "train_speed(iter/s)": 0.411718 }, { "acc": 0.94169931, "epoch": 3.7444832534589665, "grad_norm": 5.162271976470947, "learning_rate": 1.1213599083421655e-07, "loss": 0.34630044, "memory(GiB)": 34.88, "step": 138295, "train_speed(iter/s)": 0.411719 }, { "acc": 0.94925423, "epoch": 3.744618633742182, "grad_norm": 5.0647101402282715, "learning_rate": 1.1201869348781107e-07, "loss": 0.32993255, "memory(GiB)": 34.88, "step": 138300, "train_speed(iter/s)": 0.41172 }, { "acc": 0.94586678, "epoch": 3.744754014025397, "grad_norm": 6.558762550354004, "learning_rate": 1.1190145737923998e-07, "loss": 0.32666569, "memory(GiB)": 34.88, "step": 138305, "train_speed(iter/s)": 0.411721 }, { "acc": 0.95453148, "epoch": 3.744889394308613, "grad_norm": 3.8501663208007812, "learning_rate": 1.1178428250997084e-07, "loss": 0.26289263, "memory(GiB)": 34.88, "step": 138310, "train_speed(iter/s)": 0.411721 }, { "acc": 0.93684816, "epoch": 3.7450247745918284, "grad_norm": 2.7547643184661865, "learning_rate": 1.1166716888147291e-07, "loss": 0.45420818, "memory(GiB)": 34.88, "step": 138315, "train_speed(iter/s)": 0.411722 }, { "acc": 0.95022678, "epoch": 3.7451601548750437, "grad_norm": 2.698603391647339, "learning_rate": 1.1155011649521265e-07, "loss": 0.29904428, "memory(GiB)": 34.88, "step": 138320, "train_speed(iter/s)": 0.411723 }, { "acc": 0.94819012, "epoch": 3.7452955351582595, "grad_norm": 6.792470455169678, "learning_rate": 1.1143312535265705e-07, "loss": 0.3216177, "memory(GiB)": 34.88, "step": 138325, "train_speed(iter/s)": 0.411724 }, { "acc": 0.94816742, "epoch": 3.7454309154414753, "grad_norm": 5.578657150268555, "learning_rate": 1.1131619545527034e-07, "loss": 0.36976652, "memory(GiB)": 34.88, "step": 138330, "train_speed(iter/s)": 0.411725 }, { "acc": 0.95028009, "epoch": 3.7455662957246907, "grad_norm": 5.23089075088501, "learning_rate": 1.1119932680451791e-07, "loss": 0.28222461, "memory(GiB)": 34.88, "step": 138335, "train_speed(iter/s)": 0.411725 }, { "acc": 0.95088444, "epoch": 3.745701676007906, "grad_norm": 14.486047744750977, "learning_rate": 1.1108251940186396e-07, "loss": 0.30740557, "memory(GiB)": 34.88, "step": 138340, "train_speed(iter/s)": 0.411726 }, { "acc": 0.93366194, "epoch": 3.745837056291122, "grad_norm": 14.70625114440918, "learning_rate": 1.109657732487722e-07, "loss": 0.39236445, "memory(GiB)": 34.88, "step": 138345, "train_speed(iter/s)": 0.411727 }, { "acc": 0.93997564, "epoch": 3.745972436574337, "grad_norm": 4.818421363830566, "learning_rate": 1.1084908834670406e-07, "loss": 0.38534126, "memory(GiB)": 34.88, "step": 138350, "train_speed(iter/s)": 0.411727 }, { "acc": 0.94604721, "epoch": 3.746107816857553, "grad_norm": 12.782426834106445, "learning_rate": 1.1073246469712159e-07, "loss": 0.32349215, "memory(GiB)": 34.88, "step": 138355, "train_speed(iter/s)": 0.411728 }, { "acc": 0.9517272, "epoch": 3.7462431971407684, "grad_norm": 5.841545104980469, "learning_rate": 1.1061590230148678e-07, "loss": 0.32134991, "memory(GiB)": 34.88, "step": 138360, "train_speed(iter/s)": 0.411729 }, { "acc": 0.94349184, "epoch": 3.746378577423984, "grad_norm": 9.020658493041992, "learning_rate": 1.1049940116125833e-07, "loss": 0.36459508, "memory(GiB)": 34.88, "step": 138365, "train_speed(iter/s)": 0.41173 }, { "acc": 0.95342073, "epoch": 3.7465139577071995, "grad_norm": 11.1536283493042, "learning_rate": 1.1038296127789605e-07, "loss": 0.2612592, "memory(GiB)": 34.88, "step": 138370, "train_speed(iter/s)": 0.41173 }, { "acc": 0.9590477, "epoch": 3.746649337990415, "grad_norm": 3.4612743854522705, "learning_rate": 1.102665826528586e-07, "loss": 0.26385841, "memory(GiB)": 34.88, "step": 138375, "train_speed(iter/s)": 0.411731 }, { "acc": 0.93882923, "epoch": 3.7467847182736307, "grad_norm": 25.37451934814453, "learning_rate": 1.1015026528760469e-07, "loss": 0.39591558, "memory(GiB)": 34.88, "step": 138380, "train_speed(iter/s)": 0.411732 }, { "acc": 0.93936605, "epoch": 3.746920098556846, "grad_norm": 10.380975723266602, "learning_rate": 1.1003400918358968e-07, "loss": 0.3350045, "memory(GiB)": 34.88, "step": 138385, "train_speed(iter/s)": 0.411733 }, { "acc": 0.94573622, "epoch": 3.747055478840062, "grad_norm": 6.644733428955078, "learning_rate": 1.0991781434227169e-07, "loss": 0.30729012, "memory(GiB)": 34.88, "step": 138390, "train_speed(iter/s)": 0.411733 }, { "acc": 0.95555792, "epoch": 3.747190859123277, "grad_norm": 3.6018898487091064, "learning_rate": 1.098016807651055e-07, "loss": 0.24969969, "memory(GiB)": 34.88, "step": 138395, "train_speed(iter/s)": 0.411734 }, { "acc": 0.95889969, "epoch": 3.747326239406493, "grad_norm": 4.90847110748291, "learning_rate": 1.0968560845354595e-07, "loss": 0.29445868, "memory(GiB)": 34.88, "step": 138400, "train_speed(iter/s)": 0.411735 }, { "acc": 0.96092558, "epoch": 3.7474616196897084, "grad_norm": 3.4371140003204346, "learning_rate": 1.095695974090467e-07, "loss": 0.24546988, "memory(GiB)": 34.88, "step": 138405, "train_speed(iter/s)": 0.411736 }, { "acc": 0.93453121, "epoch": 3.7475969999729237, "grad_norm": 6.822627067565918, "learning_rate": 1.0945364763306148e-07, "loss": 0.39130206, "memory(GiB)": 34.88, "step": 138410, "train_speed(iter/s)": 0.411737 }, { "acc": 0.94203072, "epoch": 3.7477323802561395, "grad_norm": 7.469415664672852, "learning_rate": 1.093377591270434e-07, "loss": 0.36678836, "memory(GiB)": 34.88, "step": 138415, "train_speed(iter/s)": 0.411737 }, { "acc": 0.94445057, "epoch": 3.747867760539355, "grad_norm": 5.421360015869141, "learning_rate": 1.092219318924428e-07, "loss": 0.37741408, "memory(GiB)": 34.88, "step": 138420, "train_speed(iter/s)": 0.411738 }, { "acc": 0.94055157, "epoch": 3.7480031408225707, "grad_norm": 4.843326091766357, "learning_rate": 1.0910616593071176e-07, "loss": 0.33840368, "memory(GiB)": 34.88, "step": 138425, "train_speed(iter/s)": 0.411739 }, { "acc": 0.94888458, "epoch": 3.748138521105786, "grad_norm": 8.81087875366211, "learning_rate": 1.0899046124330007e-07, "loss": 0.37232513, "memory(GiB)": 34.88, "step": 138430, "train_speed(iter/s)": 0.41174 }, { "acc": 0.94784269, "epoch": 3.748273901389002, "grad_norm": 4.849631309509277, "learning_rate": 1.0887481783165694e-07, "loss": 0.34715943, "memory(GiB)": 34.88, "step": 138435, "train_speed(iter/s)": 0.41174 }, { "acc": 0.95148411, "epoch": 3.748409281672217, "grad_norm": 7.55394172668457, "learning_rate": 1.087592356972311e-07, "loss": 0.26145523, "memory(GiB)": 34.88, "step": 138440, "train_speed(iter/s)": 0.411741 }, { "acc": 0.95151749, "epoch": 3.7485446619554326, "grad_norm": 3.6223690509796143, "learning_rate": 1.086437148414707e-07, "loss": 0.28867528, "memory(GiB)": 34.88, "step": 138445, "train_speed(iter/s)": 0.411742 }, { "acc": 0.94318829, "epoch": 3.7486800422386484, "grad_norm": 7.462604522705078, "learning_rate": 1.0852825526582331e-07, "loss": 0.36351867, "memory(GiB)": 34.88, "step": 138450, "train_speed(iter/s)": 0.411743 }, { "acc": 0.96107864, "epoch": 3.748815422521864, "grad_norm": 7.658304691314697, "learning_rate": 1.0841285697173373e-07, "loss": 0.24220958, "memory(GiB)": 34.88, "step": 138455, "train_speed(iter/s)": 0.411744 }, { "acc": 0.95435371, "epoch": 3.7489508028050795, "grad_norm": 8.098109245300293, "learning_rate": 1.0829751996064902e-07, "loss": 0.28477597, "memory(GiB)": 34.88, "step": 138460, "train_speed(iter/s)": 0.411744 }, { "acc": 0.95839596, "epoch": 3.749086183088295, "grad_norm": 4.048122406005859, "learning_rate": 1.0818224423401343e-07, "loss": 0.25329905, "memory(GiB)": 34.88, "step": 138465, "train_speed(iter/s)": 0.411745 }, { "acc": 0.95765924, "epoch": 3.7492215633715107, "grad_norm": 4.439582824707031, "learning_rate": 1.0806702979327174e-07, "loss": 0.25416064, "memory(GiB)": 34.88, "step": 138470, "train_speed(iter/s)": 0.411746 }, { "acc": 0.95208998, "epoch": 3.749356943654726, "grad_norm": 12.631997108459473, "learning_rate": 1.07951876639866e-07, "loss": 0.30941842, "memory(GiB)": 34.88, "step": 138475, "train_speed(iter/s)": 0.411747 }, { "acc": 0.94739962, "epoch": 3.7494923239379414, "grad_norm": 39.857295989990234, "learning_rate": 1.0783678477523937e-07, "loss": 0.33342166, "memory(GiB)": 34.88, "step": 138480, "train_speed(iter/s)": 0.411748 }, { "acc": 0.96289997, "epoch": 3.749627704221157, "grad_norm": 16.95236587524414, "learning_rate": 1.0772175420083389e-07, "loss": 0.24682786, "memory(GiB)": 34.88, "step": 138485, "train_speed(iter/s)": 0.411748 }, { "acc": 0.95367908, "epoch": 3.749763084504373, "grad_norm": 4.48345947265625, "learning_rate": 1.0760678491808991e-07, "loss": 0.25587978, "memory(GiB)": 34.88, "step": 138490, "train_speed(iter/s)": 0.411749 }, { "acc": 0.93825264, "epoch": 3.7498984647875884, "grad_norm": 8.828876495361328, "learning_rate": 1.0749187692844782e-07, "loss": 0.42595744, "memory(GiB)": 34.88, "step": 138495, "train_speed(iter/s)": 0.41175 }, { "acc": 0.96183567, "epoch": 3.7500338450708037, "grad_norm": 7.222352027893066, "learning_rate": 1.0737703023334741e-07, "loss": 0.23908994, "memory(GiB)": 34.88, "step": 138500, "train_speed(iter/s)": 0.411751 }, { "acc": 0.95049744, "epoch": 3.7501692253540195, "grad_norm": 7.540526390075684, "learning_rate": 1.072622448342274e-07, "loss": 0.29149172, "memory(GiB)": 34.88, "step": 138505, "train_speed(iter/s)": 0.411752 }, { "acc": 0.93670053, "epoch": 3.750304605637235, "grad_norm": 7.473454475402832, "learning_rate": 1.071475207325254e-07, "loss": 0.43432703, "memory(GiB)": 34.88, "step": 138510, "train_speed(iter/s)": 0.411752 }, { "acc": 0.94014072, "epoch": 3.7504399859204507, "grad_norm": 10.37448787689209, "learning_rate": 1.0703285792967842e-07, "loss": 0.38141558, "memory(GiB)": 34.88, "step": 138515, "train_speed(iter/s)": 0.411753 }, { "acc": 0.94649439, "epoch": 3.750575366203666, "grad_norm": 3.3884289264678955, "learning_rate": 1.069182564271235e-07, "loss": 0.35226586, "memory(GiB)": 34.88, "step": 138520, "train_speed(iter/s)": 0.411754 }, { "acc": 0.96177464, "epoch": 3.750710746486882, "grad_norm": 5.4784932136535645, "learning_rate": 1.0680371622629549e-07, "loss": 0.24265108, "memory(GiB)": 34.88, "step": 138525, "train_speed(iter/s)": 0.411755 }, { "acc": 0.9444479, "epoch": 3.750846126770097, "grad_norm": 10.35633659362793, "learning_rate": 1.0668923732862919e-07, "loss": 0.37490306, "memory(GiB)": 34.88, "step": 138530, "train_speed(iter/s)": 0.411755 }, { "acc": 0.94963541, "epoch": 3.7509815070533126, "grad_norm": 8.610641479492188, "learning_rate": 1.0657481973555888e-07, "loss": 0.30073919, "memory(GiB)": 34.88, "step": 138535, "train_speed(iter/s)": 0.411756 }, { "acc": 0.95433788, "epoch": 3.7511168873365284, "grad_norm": 5.149384021759033, "learning_rate": 1.064604634485188e-07, "loss": 0.23861084, "memory(GiB)": 34.88, "step": 138540, "train_speed(iter/s)": 0.411757 }, { "acc": 0.94040661, "epoch": 3.7512522676197437, "grad_norm": 5.983617782592773, "learning_rate": 1.0634616846893989e-07, "loss": 0.35190959, "memory(GiB)": 34.88, "step": 138545, "train_speed(iter/s)": 0.411758 }, { "acc": 0.95589056, "epoch": 3.7513876479029595, "grad_norm": 3.3432540893554688, "learning_rate": 1.0623193479825532e-07, "loss": 0.25882564, "memory(GiB)": 34.88, "step": 138550, "train_speed(iter/s)": 0.411759 }, { "acc": 0.93282814, "epoch": 3.751523028186175, "grad_norm": 15.395368576049805, "learning_rate": 1.0611776243789546e-07, "loss": 0.38102722, "memory(GiB)": 34.88, "step": 138555, "train_speed(iter/s)": 0.411759 }, { "acc": 0.94506073, "epoch": 3.7516584084693907, "grad_norm": 10.053228378295898, "learning_rate": 1.0600365138929071e-07, "loss": 0.32355123, "memory(GiB)": 34.88, "step": 138560, "train_speed(iter/s)": 0.41176 }, { "acc": 0.94923773, "epoch": 3.751793788752606, "grad_norm": 2.9602861404418945, "learning_rate": 1.0588960165386974e-07, "loss": 0.32889242, "memory(GiB)": 34.88, "step": 138565, "train_speed(iter/s)": 0.411761 }, { "acc": 0.94244518, "epoch": 3.7519291690358214, "grad_norm": 8.944809913635254, "learning_rate": 1.0577561323306186e-07, "loss": 0.38500233, "memory(GiB)": 34.88, "step": 138570, "train_speed(iter/s)": 0.411762 }, { "acc": 0.94678898, "epoch": 3.752064549319037, "grad_norm": 16.15688133239746, "learning_rate": 1.0566168612829575e-07, "loss": 0.38989871, "memory(GiB)": 34.88, "step": 138575, "train_speed(iter/s)": 0.411763 }, { "acc": 0.95454445, "epoch": 3.7521999296022526, "grad_norm": 2.448011875152588, "learning_rate": 1.0554782034099738e-07, "loss": 0.21336446, "memory(GiB)": 34.88, "step": 138580, "train_speed(iter/s)": 0.411763 }, { "acc": 0.96433163, "epoch": 3.7523353098854684, "grad_norm": 5.3055853843688965, "learning_rate": 1.0543401587259379e-07, "loss": 0.22366009, "memory(GiB)": 34.88, "step": 138585, "train_speed(iter/s)": 0.411764 }, { "acc": 0.94322348, "epoch": 3.7524706901686837, "grad_norm": 5.2616801261901855, "learning_rate": 1.0532027272451035e-07, "loss": 0.33340456, "memory(GiB)": 34.88, "step": 138590, "train_speed(iter/s)": 0.411765 }, { "acc": 0.95563936, "epoch": 3.7526060704518995, "grad_norm": 3.5048601627349854, "learning_rate": 1.0520659089817245e-07, "loss": 0.26126204, "memory(GiB)": 34.88, "step": 138595, "train_speed(iter/s)": 0.411766 }, { "acc": 0.94215546, "epoch": 3.752741450735115, "grad_norm": 10.318052291870117, "learning_rate": 1.0509297039500326e-07, "loss": 0.40514736, "memory(GiB)": 34.88, "step": 138600, "train_speed(iter/s)": 0.411767 }, { "acc": 0.95714006, "epoch": 3.7528768310183303, "grad_norm": 4.339351177215576, "learning_rate": 1.049794112164265e-07, "loss": 0.20513806, "memory(GiB)": 34.88, "step": 138605, "train_speed(iter/s)": 0.411767 }, { "acc": 0.94904709, "epoch": 3.753012211301546, "grad_norm": 15.963553428649902, "learning_rate": 1.0486591336386533e-07, "loss": 0.32163935, "memory(GiB)": 34.88, "step": 138610, "train_speed(iter/s)": 0.411768 }, { "acc": 0.94794788, "epoch": 3.753147591584762, "grad_norm": 7.73496150970459, "learning_rate": 1.047524768387407e-07, "loss": 0.29331775, "memory(GiB)": 34.88, "step": 138615, "train_speed(iter/s)": 0.411769 }, { "acc": 0.95510597, "epoch": 3.753282971867977, "grad_norm": 3.716151237487793, "learning_rate": 1.0463910164247352e-07, "loss": 0.28073201, "memory(GiB)": 34.88, "step": 138620, "train_speed(iter/s)": 0.411769 }, { "acc": 0.95660763, "epoch": 3.7534183521511926, "grad_norm": 10.865528106689453, "learning_rate": 1.0452578777648478e-07, "loss": 0.25426509, "memory(GiB)": 34.88, "step": 138625, "train_speed(iter/s)": 0.41177 }, { "acc": 0.94416246, "epoch": 3.7535537324344084, "grad_norm": 8.229026794433594, "learning_rate": 1.0441253524219372e-07, "loss": 0.34889266, "memory(GiB)": 34.88, "step": 138630, "train_speed(iter/s)": 0.411771 }, { "acc": 0.93938675, "epoch": 3.7536891127176237, "grad_norm": 9.629523277282715, "learning_rate": 1.0429934404101855e-07, "loss": 0.33009582, "memory(GiB)": 34.88, "step": 138635, "train_speed(iter/s)": 0.411772 }, { "acc": 0.96328011, "epoch": 3.753824493000839, "grad_norm": 8.326825141906738, "learning_rate": 1.0418621417437794e-07, "loss": 0.20254664, "memory(GiB)": 34.88, "step": 138640, "train_speed(iter/s)": 0.411773 }, { "acc": 0.96322765, "epoch": 3.753959873284055, "grad_norm": 8.000921249389648, "learning_rate": 1.0407314564368901e-07, "loss": 0.21491382, "memory(GiB)": 34.88, "step": 138645, "train_speed(iter/s)": 0.411773 }, { "acc": 0.94177208, "epoch": 3.7540952535672707, "grad_norm": 13.915205001831055, "learning_rate": 1.0396013845036767e-07, "loss": 0.3493958, "memory(GiB)": 34.88, "step": 138650, "train_speed(iter/s)": 0.411774 }, { "acc": 0.94880152, "epoch": 3.754230633850486, "grad_norm": 7.306591033935547, "learning_rate": 1.038471925958293e-07, "loss": 0.34417305, "memory(GiB)": 34.88, "step": 138655, "train_speed(iter/s)": 0.411775 }, { "acc": 0.94806528, "epoch": 3.7543660141337014, "grad_norm": 14.224621772766113, "learning_rate": 1.0373430808148985e-07, "loss": 0.35467644, "memory(GiB)": 34.88, "step": 138660, "train_speed(iter/s)": 0.411776 }, { "acc": 0.96179028, "epoch": 3.754501394416917, "grad_norm": 6.544337749481201, "learning_rate": 1.0362148490876253e-07, "loss": 0.22853475, "memory(GiB)": 34.88, "step": 138665, "train_speed(iter/s)": 0.411776 }, { "acc": 0.95846548, "epoch": 3.7546367747001326, "grad_norm": 13.303601264953613, "learning_rate": 1.035087230790605e-07, "loss": 0.2756062, "memory(GiB)": 34.88, "step": 138670, "train_speed(iter/s)": 0.411777 }, { "acc": 0.9475975, "epoch": 3.7547721549833484, "grad_norm": 8.500771522521973, "learning_rate": 1.0339602259379803e-07, "loss": 0.32723937, "memory(GiB)": 34.88, "step": 138675, "train_speed(iter/s)": 0.411778 }, { "acc": 0.95695171, "epoch": 3.7549075352665637, "grad_norm": 4.535462856292725, "learning_rate": 1.0328338345438496e-07, "loss": 0.25475163, "memory(GiB)": 34.88, "step": 138680, "train_speed(iter/s)": 0.411779 }, { "acc": 0.92727804, "epoch": 3.7550429155497795, "grad_norm": 12.654762268066406, "learning_rate": 1.0317080566223335e-07, "loss": 0.50015564, "memory(GiB)": 34.88, "step": 138685, "train_speed(iter/s)": 0.411779 }, { "acc": 0.9465601, "epoch": 3.755178295832995, "grad_norm": 4.389534950256348, "learning_rate": 1.030582892187525e-07, "loss": 0.32791462, "memory(GiB)": 34.88, "step": 138690, "train_speed(iter/s)": 0.41178 }, { "acc": 0.9461216, "epoch": 3.7553136761162103, "grad_norm": 15.567208290100098, "learning_rate": 1.0294583412535278e-07, "loss": 0.30151839, "memory(GiB)": 34.88, "step": 138695, "train_speed(iter/s)": 0.411781 }, { "acc": 0.95336418, "epoch": 3.755449056399426, "grad_norm": 3.5354881286621094, "learning_rate": 1.0283344038344299e-07, "loss": 0.27060664, "memory(GiB)": 34.88, "step": 138700, "train_speed(iter/s)": 0.411782 }, { "acc": 0.94280529, "epoch": 3.7555844366826414, "grad_norm": 4.1553850173950195, "learning_rate": 1.0272110799442956e-07, "loss": 0.34917111, "memory(GiB)": 34.88, "step": 138705, "train_speed(iter/s)": 0.411782 }, { "acc": 0.95679169, "epoch": 3.755719816965857, "grad_norm": 7.231550693511963, "learning_rate": 1.0260883695972239e-07, "loss": 0.28667974, "memory(GiB)": 34.88, "step": 138710, "train_speed(iter/s)": 0.411783 }, { "acc": 0.94487524, "epoch": 3.7558551972490726, "grad_norm": 7.280836582183838, "learning_rate": 1.0249662728072573e-07, "loss": 0.35385945, "memory(GiB)": 34.88, "step": 138715, "train_speed(iter/s)": 0.411784 }, { "acc": 0.94598894, "epoch": 3.7559905775322884, "grad_norm": 9.57227611541748, "learning_rate": 1.0238447895884614e-07, "loss": 0.30713763, "memory(GiB)": 34.88, "step": 138720, "train_speed(iter/s)": 0.411785 }, { "acc": 0.9554369, "epoch": 3.7561259578155037, "grad_norm": 3.60823917388916, "learning_rate": 1.0227239199548785e-07, "loss": 0.27493625, "memory(GiB)": 34.88, "step": 138725, "train_speed(iter/s)": 0.411786 }, { "acc": 0.96089687, "epoch": 3.756261338098719, "grad_norm": 7.327028751373291, "learning_rate": 1.0216036639205578e-07, "loss": 0.20872586, "memory(GiB)": 34.88, "step": 138730, "train_speed(iter/s)": 0.411786 }, { "acc": 0.94754286, "epoch": 3.756396718381935, "grad_norm": 3.5501065254211426, "learning_rate": 1.0204840214995248e-07, "loss": 0.31790323, "memory(GiB)": 34.88, "step": 138735, "train_speed(iter/s)": 0.411787 }, { "acc": 0.94677887, "epoch": 3.7565320986651503, "grad_norm": 6.19187593460083, "learning_rate": 1.0193649927058064e-07, "loss": 0.3224092, "memory(GiB)": 34.88, "step": 138740, "train_speed(iter/s)": 0.411788 }, { "acc": 0.95301085, "epoch": 3.756667478948366, "grad_norm": 5.537020206451416, "learning_rate": 1.0182465775534284e-07, "loss": 0.28503389, "memory(GiB)": 34.88, "step": 138745, "train_speed(iter/s)": 0.411788 }, { "acc": 0.94649811, "epoch": 3.7568028592315814, "grad_norm": 6.774447441101074, "learning_rate": 1.0171287760563953e-07, "loss": 0.31446748, "memory(GiB)": 34.88, "step": 138750, "train_speed(iter/s)": 0.411789 }, { "acc": 0.94756737, "epoch": 3.756938239514797, "grad_norm": 11.603180885314941, "learning_rate": 1.0160115882287106e-07, "loss": 0.31152365, "memory(GiB)": 34.88, "step": 138755, "train_speed(iter/s)": 0.41179 }, { "acc": 0.93680401, "epoch": 3.7570736197980126, "grad_norm": 7.000705718994141, "learning_rate": 1.0148950140843622e-07, "loss": 0.39638813, "memory(GiB)": 34.88, "step": 138760, "train_speed(iter/s)": 0.41179 }, { "acc": 0.94647379, "epoch": 3.757209000081228, "grad_norm": 5.02723503112793, "learning_rate": 1.0137790536373541e-07, "loss": 0.32338128, "memory(GiB)": 34.88, "step": 138765, "train_speed(iter/s)": 0.411791 }, { "acc": 0.94058208, "epoch": 3.7573443803644437, "grad_norm": 6.010344982147217, "learning_rate": 1.0126637069016513e-07, "loss": 0.34360523, "memory(GiB)": 34.88, "step": 138770, "train_speed(iter/s)": 0.411792 }, { "acc": 0.96461639, "epoch": 3.7574797606476595, "grad_norm": 4.587764263153076, "learning_rate": 1.0115489738912247e-07, "loss": 0.21452987, "memory(GiB)": 34.88, "step": 138775, "train_speed(iter/s)": 0.411793 }, { "acc": 0.96256123, "epoch": 3.757615140930875, "grad_norm": 7.1416802406311035, "learning_rate": 1.0104348546200507e-07, "loss": 0.22705386, "memory(GiB)": 34.88, "step": 138780, "train_speed(iter/s)": 0.411794 }, { "acc": 0.95237494, "epoch": 3.7577505212140903, "grad_norm": 14.732544898986816, "learning_rate": 1.0093213491020775e-07, "loss": 0.32394328, "memory(GiB)": 34.88, "step": 138785, "train_speed(iter/s)": 0.411794 }, { "acc": 0.94656067, "epoch": 3.757885901497306, "grad_norm": 15.170291900634766, "learning_rate": 1.0082084573512595e-07, "loss": 0.36507001, "memory(GiB)": 34.88, "step": 138790, "train_speed(iter/s)": 0.411795 }, { "acc": 0.95071173, "epoch": 3.7580212817805214, "grad_norm": 17.73111343383789, "learning_rate": 1.0070961793815287e-07, "loss": 0.27483933, "memory(GiB)": 34.88, "step": 138795, "train_speed(iter/s)": 0.411796 }, { "acc": 0.95853901, "epoch": 3.7581566620637368, "grad_norm": 5.041857719421387, "learning_rate": 1.0059845152068278e-07, "loss": 0.21741683, "memory(GiB)": 34.88, "step": 138800, "train_speed(iter/s)": 0.411797 }, { "acc": 0.94526062, "epoch": 3.7582920423469526, "grad_norm": 7.100845813751221, "learning_rate": 1.0048734648410779e-07, "loss": 0.35280309, "memory(GiB)": 34.88, "step": 138805, "train_speed(iter/s)": 0.411798 }, { "acc": 0.93325891, "epoch": 3.7584274226301684, "grad_norm": 11.881121635437012, "learning_rate": 1.003763028298194e-07, "loss": 0.40722704, "memory(GiB)": 34.88, "step": 138810, "train_speed(iter/s)": 0.411798 }, { "acc": 0.95563698, "epoch": 3.7585628029133837, "grad_norm": 5.9076385498046875, "learning_rate": 1.0026532055920971e-07, "loss": 0.18295732, "memory(GiB)": 34.88, "step": 138815, "train_speed(iter/s)": 0.411799 }, { "acc": 0.95700893, "epoch": 3.758698183196599, "grad_norm": 5.31240701675415, "learning_rate": 1.0015439967366746e-07, "loss": 0.27020826, "memory(GiB)": 34.88, "step": 138820, "train_speed(iter/s)": 0.4118 }, { "acc": 0.94533358, "epoch": 3.758833563479815, "grad_norm": 5.801238059997559, "learning_rate": 1.0004354017458418e-07, "loss": 0.34545324, "memory(GiB)": 34.88, "step": 138825, "train_speed(iter/s)": 0.411801 }, { "acc": 0.94574528, "epoch": 3.7589689437630303, "grad_norm": 7.734127044677734, "learning_rate": 9.99327420633464e-08, "loss": 0.29602189, "memory(GiB)": 34.88, "step": 138830, "train_speed(iter/s)": 0.411802 }, { "acc": 0.94410305, "epoch": 3.759104324046246, "grad_norm": 3.4190969467163086, "learning_rate": 9.98220053413434e-08, "loss": 0.29008474, "memory(GiB)": 34.88, "step": 138835, "train_speed(iter/s)": 0.411802 }, { "acc": 0.96485806, "epoch": 3.7592397043294614, "grad_norm": 11.13119888305664, "learning_rate": 9.97113300099623e-08, "loss": 0.21648855, "memory(GiB)": 34.88, "step": 138840, "train_speed(iter/s)": 0.411803 }, { "acc": 0.95490513, "epoch": 3.759375084612677, "grad_norm": 4.696097373962402, "learning_rate": 9.960071607058909e-08, "loss": 0.26306078, "memory(GiB)": 34.88, "step": 138845, "train_speed(iter/s)": 0.411804 }, { "acc": 0.95635414, "epoch": 3.7595104648958926, "grad_norm": 3.9782800674438477, "learning_rate": 9.949016352461022e-08, "loss": 0.24428508, "memory(GiB)": 34.88, "step": 138850, "train_speed(iter/s)": 0.411805 }, { "acc": 0.9379509, "epoch": 3.759645845179108, "grad_norm": 11.308685302734375, "learning_rate": 9.937967237340953e-08, "loss": 0.43908935, "memory(GiB)": 34.88, "step": 138855, "train_speed(iter/s)": 0.411805 }, { "acc": 0.94989138, "epoch": 3.7597812254623237, "grad_norm": 3.405139207839966, "learning_rate": 9.926924261837186e-08, "loss": 0.32373013, "memory(GiB)": 34.88, "step": 138860, "train_speed(iter/s)": 0.411806 }, { "acc": 0.95882215, "epoch": 3.759916605745539, "grad_norm": 4.396327972412109, "learning_rate": 9.915887426087928e-08, "loss": 0.24107375, "memory(GiB)": 34.88, "step": 138865, "train_speed(iter/s)": 0.411807 }, { "acc": 0.95637302, "epoch": 3.760051986028755, "grad_norm": 5.318288803100586, "learning_rate": 9.904856730231665e-08, "loss": 0.25026975, "memory(GiB)": 34.88, "step": 138870, "train_speed(iter/s)": 0.411807 }, { "acc": 0.94062996, "epoch": 3.7601873663119703, "grad_norm": 4.738487720489502, "learning_rate": 9.893832174406443e-08, "loss": 0.3807898, "memory(GiB)": 34.88, "step": 138875, "train_speed(iter/s)": 0.411808 }, { "acc": 0.94008341, "epoch": 3.760322746595186, "grad_norm": 5.478139400482178, "learning_rate": 9.882813758750302e-08, "loss": 0.35834484, "memory(GiB)": 34.88, "step": 138880, "train_speed(iter/s)": 0.411809 }, { "acc": 0.95134869, "epoch": 3.7604581268784014, "grad_norm": 4.08527135848999, "learning_rate": 9.871801483401395e-08, "loss": 0.27392459, "memory(GiB)": 34.88, "step": 138885, "train_speed(iter/s)": 0.41181 }, { "acc": 0.9535696, "epoch": 3.7605935071616168, "grad_norm": 7.863076686859131, "learning_rate": 9.860795348497656e-08, "loss": 0.26465666, "memory(GiB)": 34.88, "step": 138890, "train_speed(iter/s)": 0.411811 }, { "acc": 0.94718084, "epoch": 3.7607288874448326, "grad_norm": 5.7853899002075195, "learning_rate": 9.849795354176958e-08, "loss": 0.28020945, "memory(GiB)": 34.88, "step": 138895, "train_speed(iter/s)": 0.411811 }, { "acc": 0.9345211, "epoch": 3.760864267728048, "grad_norm": 12.869997024536133, "learning_rate": 9.838801500576955e-08, "loss": 0.47108021, "memory(GiB)": 34.88, "step": 138900, "train_speed(iter/s)": 0.411812 }, { "acc": 0.94295845, "epoch": 3.7609996480112637, "grad_norm": 9.158198356628418, "learning_rate": 9.827813787835527e-08, "loss": 0.35258374, "memory(GiB)": 34.88, "step": 138905, "train_speed(iter/s)": 0.411813 }, { "acc": 0.96099796, "epoch": 3.761135028294479, "grad_norm": 2.1538233757019043, "learning_rate": 9.816832216090267e-08, "loss": 0.21959429, "memory(GiB)": 34.88, "step": 138910, "train_speed(iter/s)": 0.411814 }, { "acc": 0.93676882, "epoch": 3.761270408577695, "grad_norm": 6.406429767608643, "learning_rate": 9.805856785478721e-08, "loss": 0.36031694, "memory(GiB)": 34.88, "step": 138915, "train_speed(iter/s)": 0.411815 }, { "acc": 0.95200939, "epoch": 3.7614057888609103, "grad_norm": 7.719687461853027, "learning_rate": 9.794887496138375e-08, "loss": 0.28473659, "memory(GiB)": 34.88, "step": 138920, "train_speed(iter/s)": 0.411815 }, { "acc": 0.94516945, "epoch": 3.7615411691441256, "grad_norm": 9.693456649780273, "learning_rate": 9.783924348206718e-08, "loss": 0.36378114, "memory(GiB)": 34.88, "step": 138925, "train_speed(iter/s)": 0.411816 }, { "acc": 0.95131016, "epoch": 3.7616765494273414, "grad_norm": 5.894121170043945, "learning_rate": 9.772967341821069e-08, "loss": 0.33734307, "memory(GiB)": 34.88, "step": 138930, "train_speed(iter/s)": 0.411817 }, { "acc": 0.95105267, "epoch": 3.7618119297105572, "grad_norm": 3.642899990081787, "learning_rate": 9.762016477118529e-08, "loss": 0.32861598, "memory(GiB)": 34.88, "step": 138935, "train_speed(iter/s)": 0.411818 }, { "acc": 0.95792294, "epoch": 3.7619473099937726, "grad_norm": 8.799509048461914, "learning_rate": 9.751071754236471e-08, "loss": 0.25514596, "memory(GiB)": 34.88, "step": 138940, "train_speed(iter/s)": 0.411819 }, { "acc": 0.95010281, "epoch": 3.762082690276988, "grad_norm": 4.294586658477783, "learning_rate": 9.740133173311941e-08, "loss": 0.26604452, "memory(GiB)": 34.88, "step": 138945, "train_speed(iter/s)": 0.411819 }, { "acc": 0.9613451, "epoch": 3.7622180705602037, "grad_norm": 7.341086387634277, "learning_rate": 9.729200734481927e-08, "loss": 0.25243781, "memory(GiB)": 34.88, "step": 138950, "train_speed(iter/s)": 0.41182 }, { "acc": 0.95109072, "epoch": 3.762353450843419, "grad_norm": 9.725113868713379, "learning_rate": 9.71827443788336e-08, "loss": 0.31073456, "memory(GiB)": 34.88, "step": 138955, "train_speed(iter/s)": 0.411821 }, { "acc": 0.95673637, "epoch": 3.7624888311266345, "grad_norm": 7.318288803100586, "learning_rate": 9.707354283653173e-08, "loss": 0.27714014, "memory(GiB)": 34.88, "step": 138960, "train_speed(iter/s)": 0.411821 }, { "acc": 0.94520903, "epoch": 3.7626242114098503, "grad_norm": 6.872994422912598, "learning_rate": 9.696440271928189e-08, "loss": 0.28083973, "memory(GiB)": 34.88, "step": 138965, "train_speed(iter/s)": 0.411822 }, { "acc": 0.95368748, "epoch": 3.762759591693066, "grad_norm": 10.482115745544434, "learning_rate": 9.685532402845001e-08, "loss": 0.26940327, "memory(GiB)": 34.88, "step": 138970, "train_speed(iter/s)": 0.411823 }, { "acc": 0.9375989, "epoch": 3.7628949719762814, "grad_norm": 9.283504486083984, "learning_rate": 9.674630676540438e-08, "loss": 0.39609857, "memory(GiB)": 34.88, "step": 138975, "train_speed(iter/s)": 0.411824 }, { "acc": 0.94492207, "epoch": 3.763030352259497, "grad_norm": 7.8594889640808105, "learning_rate": 9.663735093150928e-08, "loss": 0.31728363, "memory(GiB)": 34.88, "step": 138980, "train_speed(iter/s)": 0.411824 }, { "acc": 0.94854155, "epoch": 3.7631657325427126, "grad_norm": 7.780961036682129, "learning_rate": 9.652845652813018e-08, "loss": 0.29503663, "memory(GiB)": 34.88, "step": 138985, "train_speed(iter/s)": 0.411825 }, { "acc": 0.95454187, "epoch": 3.763301112825928, "grad_norm": 4.349433422088623, "learning_rate": 9.641962355662971e-08, "loss": 0.29172554, "memory(GiB)": 34.88, "step": 138990, "train_speed(iter/s)": 0.411826 }, { "acc": 0.94841022, "epoch": 3.7634364931091437, "grad_norm": 7.53912353515625, "learning_rate": 9.631085201837389e-08, "loss": 0.32377768, "memory(GiB)": 34.88, "step": 138995, "train_speed(iter/s)": 0.411827 }, { "acc": 0.96941519, "epoch": 3.763571873392359, "grad_norm": 4.180508136749268, "learning_rate": 9.620214191472315e-08, "loss": 0.15094428, "memory(GiB)": 34.88, "step": 139000, "train_speed(iter/s)": 0.411828 }, { "acc": 0.93322639, "epoch": 3.763707253675575, "grad_norm": 9.423285484313965, "learning_rate": 9.609349324703958e-08, "loss": 0.42036567, "memory(GiB)": 34.88, "step": 139005, "train_speed(iter/s)": 0.411828 }, { "acc": 0.95210514, "epoch": 3.7638426339587903, "grad_norm": 5.240248680114746, "learning_rate": 9.598490601668532e-08, "loss": 0.2817976, "memory(GiB)": 34.88, "step": 139010, "train_speed(iter/s)": 0.411829 }, { "acc": 0.94434071, "epoch": 3.7639780142420056, "grad_norm": 9.802541732788086, "learning_rate": 9.587638022502022e-08, "loss": 0.33802781, "memory(GiB)": 34.88, "step": 139015, "train_speed(iter/s)": 0.41183 }, { "acc": 0.93553925, "epoch": 3.7641133945252214, "grad_norm": 8.995518684387207, "learning_rate": 9.57679158734031e-08, "loss": 0.39739623, "memory(GiB)": 34.88, "step": 139020, "train_speed(iter/s)": 0.411831 }, { "acc": 0.95308905, "epoch": 3.764248774808437, "grad_norm": 7.879946231842041, "learning_rate": 9.565951296319272e-08, "loss": 0.26707985, "memory(GiB)": 34.88, "step": 139025, "train_speed(iter/s)": 0.411831 }, { "acc": 0.95612364, "epoch": 3.7643841550916526, "grad_norm": 4.945912837982178, "learning_rate": 9.555117149574839e-08, "loss": 0.24455645, "memory(GiB)": 34.88, "step": 139030, "train_speed(iter/s)": 0.411832 }, { "acc": 0.94287672, "epoch": 3.764519535374868, "grad_norm": 7.838120460510254, "learning_rate": 9.54428914724256e-08, "loss": 0.36143711, "memory(GiB)": 34.88, "step": 139035, "train_speed(iter/s)": 0.411833 }, { "acc": 0.94592991, "epoch": 3.7646549156580837, "grad_norm": 5.3916730880737305, "learning_rate": 9.533467289458142e-08, "loss": 0.33683674, "memory(GiB)": 34.88, "step": 139040, "train_speed(iter/s)": 0.411834 }, { "acc": 0.94829807, "epoch": 3.764790295941299, "grad_norm": 6.107615947723389, "learning_rate": 9.522651576357189e-08, "loss": 0.34521184, "memory(GiB)": 34.88, "step": 139045, "train_speed(iter/s)": 0.411835 }, { "acc": 0.96636534, "epoch": 3.7649256762245145, "grad_norm": 4.710024356842041, "learning_rate": 9.511842008075185e-08, "loss": 0.18187467, "memory(GiB)": 34.88, "step": 139050, "train_speed(iter/s)": 0.411835 }, { "acc": 0.94968338, "epoch": 3.7650610565077303, "grad_norm": 6.3855109214782715, "learning_rate": 9.501038584747569e-08, "loss": 0.27578211, "memory(GiB)": 34.88, "step": 139055, "train_speed(iter/s)": 0.411836 }, { "acc": 0.95612783, "epoch": 3.7651964367909456, "grad_norm": 5.3130340576171875, "learning_rate": 9.490241306509493e-08, "loss": 0.26635811, "memory(GiB)": 34.88, "step": 139060, "train_speed(iter/s)": 0.411837 }, { "acc": 0.95103083, "epoch": 3.7653318170741614, "grad_norm": 9.260814666748047, "learning_rate": 9.479450173496394e-08, "loss": 0.29128711, "memory(GiB)": 34.88, "step": 139065, "train_speed(iter/s)": 0.411838 }, { "acc": 0.93554659, "epoch": 3.765467197357377, "grad_norm": 29.820125579833984, "learning_rate": 9.468665185843426e-08, "loss": 0.37453008, "memory(GiB)": 34.88, "step": 139070, "train_speed(iter/s)": 0.411839 }, { "acc": 0.94189663, "epoch": 3.7656025776405926, "grad_norm": 18.01385498046875, "learning_rate": 9.45788634368558e-08, "loss": 0.32819569, "memory(GiB)": 34.88, "step": 139075, "train_speed(iter/s)": 0.411839 }, { "acc": 0.95210857, "epoch": 3.765737957923808, "grad_norm": 4.572657585144043, "learning_rate": 9.447113647158066e-08, "loss": 0.28143475, "memory(GiB)": 34.88, "step": 139080, "train_speed(iter/s)": 0.41184 }, { "acc": 0.93515377, "epoch": 3.7658733382070233, "grad_norm": 7.470418930053711, "learning_rate": 9.436347096395708e-08, "loss": 0.36917865, "memory(GiB)": 34.88, "step": 139085, "train_speed(iter/s)": 0.411841 }, { "acc": 0.95925407, "epoch": 3.766008718490239, "grad_norm": 5.876923084259033, "learning_rate": 9.425586691533384e-08, "loss": 0.28308783, "memory(GiB)": 34.88, "step": 139090, "train_speed(iter/s)": 0.411842 }, { "acc": 0.94852161, "epoch": 3.766144098773455, "grad_norm": 4.9987335205078125, "learning_rate": 9.414832432705921e-08, "loss": 0.30896204, "memory(GiB)": 34.88, "step": 139095, "train_speed(iter/s)": 0.411842 }, { "acc": 0.95162907, "epoch": 3.7662794790566703, "grad_norm": 12.500537872314453, "learning_rate": 9.404084320048081e-08, "loss": 0.28425164, "memory(GiB)": 34.88, "step": 139100, "train_speed(iter/s)": 0.411843 }, { "acc": 0.94506626, "epoch": 3.7664148593398856, "grad_norm": 5.65007209777832, "learning_rate": 9.393342353694412e-08, "loss": 0.29379053, "memory(GiB)": 34.88, "step": 139105, "train_speed(iter/s)": 0.411843 }, { "acc": 0.9482028, "epoch": 3.7665502396231014, "grad_norm": 9.856670379638672, "learning_rate": 9.382606533779459e-08, "loss": 0.32202706, "memory(GiB)": 34.88, "step": 139110, "train_speed(iter/s)": 0.411844 }, { "acc": 0.94873791, "epoch": 3.766685619906317, "grad_norm": 8.946646690368652, "learning_rate": 9.371876860437824e-08, "loss": 0.29386358, "memory(GiB)": 34.88, "step": 139115, "train_speed(iter/s)": 0.411845 }, { "acc": 0.93761292, "epoch": 3.766821000189532, "grad_norm": 9.726285934448242, "learning_rate": 9.361153333803885e-08, "loss": 0.33792264, "memory(GiB)": 34.88, "step": 139120, "train_speed(iter/s)": 0.411846 }, { "acc": 0.94753208, "epoch": 3.766956380472748, "grad_norm": 5.495187759399414, "learning_rate": 9.350435954011913e-08, "loss": 0.35274837, "memory(GiB)": 34.88, "step": 139125, "train_speed(iter/s)": 0.411846 }, { "acc": 0.94812527, "epoch": 3.7670917607559637, "grad_norm": 4.495199680328369, "learning_rate": 9.339724721196227e-08, "loss": 0.27546721, "memory(GiB)": 34.88, "step": 139130, "train_speed(iter/s)": 0.411847 }, { "acc": 0.94640245, "epoch": 3.767227141039179, "grad_norm": 15.035886764526367, "learning_rate": 9.329019635490988e-08, "loss": 0.394648, "memory(GiB)": 34.88, "step": 139135, "train_speed(iter/s)": 0.411848 }, { "acc": 0.94296169, "epoch": 3.7673625213223945, "grad_norm": 9.40800952911377, "learning_rate": 9.318320697030294e-08, "loss": 0.39702394, "memory(GiB)": 34.88, "step": 139140, "train_speed(iter/s)": 0.411848 }, { "acc": 0.94137535, "epoch": 3.7674979016056103, "grad_norm": 15.021074295043945, "learning_rate": 9.307627905948138e-08, "loss": 0.34982283, "memory(GiB)": 34.88, "step": 139145, "train_speed(iter/s)": 0.411849 }, { "acc": 0.95098286, "epoch": 3.7676332818888256, "grad_norm": 10.366572380065918, "learning_rate": 9.29694126237851e-08, "loss": 0.33102496, "memory(GiB)": 34.88, "step": 139150, "train_speed(iter/s)": 0.41185 }, { "acc": 0.9310647, "epoch": 3.7677686621720414, "grad_norm": 7.868189334869385, "learning_rate": 9.286260766455289e-08, "loss": 0.4613472, "memory(GiB)": 34.88, "step": 139155, "train_speed(iter/s)": 0.411851 }, { "acc": 0.95836592, "epoch": 3.767904042455257, "grad_norm": 3.19142484664917, "learning_rate": 9.275586418312244e-08, "loss": 0.22670426, "memory(GiB)": 34.88, "step": 139160, "train_speed(iter/s)": 0.411852 }, { "acc": 0.94174185, "epoch": 3.7680394227384726, "grad_norm": 2.8918025493621826, "learning_rate": 9.264918218083088e-08, "loss": 0.28397598, "memory(GiB)": 34.88, "step": 139165, "train_speed(iter/s)": 0.411852 }, { "acc": 0.9500617, "epoch": 3.768174803021688, "grad_norm": 5.265313625335693, "learning_rate": 9.254256165901477e-08, "loss": 0.3188926, "memory(GiB)": 34.88, "step": 139170, "train_speed(iter/s)": 0.411853 }, { "acc": 0.95580368, "epoch": 3.7683101833049033, "grad_norm": 5.437661170959473, "learning_rate": 9.243600261901017e-08, "loss": 0.24108381, "memory(GiB)": 34.88, "step": 139175, "train_speed(iter/s)": 0.411853 }, { "acc": 0.95433846, "epoch": 3.768445563588119, "grad_norm": 4.231827735900879, "learning_rate": 9.232950506215083e-08, "loss": 0.27858834, "memory(GiB)": 34.88, "step": 139180, "train_speed(iter/s)": 0.411854 }, { "acc": 0.93829861, "epoch": 3.7685809438713345, "grad_norm": 4.66398286819458, "learning_rate": 9.222306898977114e-08, "loss": 0.39814942, "memory(GiB)": 34.88, "step": 139185, "train_speed(iter/s)": 0.411855 }, { "acc": 0.95473909, "epoch": 3.7687163241545503, "grad_norm": 8.54002857208252, "learning_rate": 9.211669440320599e-08, "loss": 0.26528525, "memory(GiB)": 34.88, "step": 139190, "train_speed(iter/s)": 0.411856 }, { "acc": 0.95639858, "epoch": 3.7688517044377656, "grad_norm": 7.228113651275635, "learning_rate": 9.201038130378586e-08, "loss": 0.24521582, "memory(GiB)": 34.88, "step": 139195, "train_speed(iter/s)": 0.411856 }, { "acc": 0.94090767, "epoch": 3.7689870847209814, "grad_norm": 5.150609016418457, "learning_rate": 9.190412969284343e-08, "loss": 0.31064754, "memory(GiB)": 34.88, "step": 139200, "train_speed(iter/s)": 0.411857 }, { "acc": 0.95378637, "epoch": 3.769122465004197, "grad_norm": 6.168449878692627, "learning_rate": 9.179793957170918e-08, "loss": 0.25128465, "memory(GiB)": 34.88, "step": 139205, "train_speed(iter/s)": 0.411858 }, { "acc": 0.94118729, "epoch": 3.769257845287412, "grad_norm": 10.767202377319336, "learning_rate": 9.169181094171468e-08, "loss": 0.37090788, "memory(GiB)": 34.88, "step": 139210, "train_speed(iter/s)": 0.411858 }, { "acc": 0.9492034, "epoch": 3.769393225570628, "grad_norm": 20.02873420715332, "learning_rate": 9.158574380418819e-08, "loss": 0.28828716, "memory(GiB)": 34.88, "step": 139215, "train_speed(iter/s)": 0.411859 }, { "acc": 0.95123253, "epoch": 3.7695286058538433, "grad_norm": 6.624102592468262, "learning_rate": 9.147973816045905e-08, "loss": 0.25700309, "memory(GiB)": 34.88, "step": 139220, "train_speed(iter/s)": 0.41186 }, { "acc": 0.95385818, "epoch": 3.769663986137059, "grad_norm": 5.722910404205322, "learning_rate": 9.137379401185443e-08, "loss": 0.29630332, "memory(GiB)": 34.88, "step": 139225, "train_speed(iter/s)": 0.411861 }, { "acc": 0.95136166, "epoch": 3.7697993664202745, "grad_norm": 6.2667036056518555, "learning_rate": 9.126791135970254e-08, "loss": 0.34021153, "memory(GiB)": 34.88, "step": 139230, "train_speed(iter/s)": 0.411861 }, { "acc": 0.95580473, "epoch": 3.7699347467034903, "grad_norm": 5.1815619468688965, "learning_rate": 9.116209020532833e-08, "loss": 0.25238371, "memory(GiB)": 34.88, "step": 139235, "train_speed(iter/s)": 0.411862 }, { "acc": 0.960079, "epoch": 3.7700701269867056, "grad_norm": 4.72391939163208, "learning_rate": 9.105633055005892e-08, "loss": 0.25919681, "memory(GiB)": 34.88, "step": 139240, "train_speed(iter/s)": 0.411863 }, { "acc": 0.95225697, "epoch": 3.770205507269921, "grad_norm": 8.237798690795898, "learning_rate": 9.095063239521813e-08, "loss": 0.33464019, "memory(GiB)": 34.88, "step": 139245, "train_speed(iter/s)": 0.411864 }, { "acc": 0.9499754, "epoch": 3.770340887553137, "grad_norm": 4.65528678894043, "learning_rate": 9.084499574213087e-08, "loss": 0.26846232, "memory(GiB)": 34.88, "step": 139250, "train_speed(iter/s)": 0.411865 }, { "acc": 0.96560678, "epoch": 3.7704762678363526, "grad_norm": 9.241389274597168, "learning_rate": 9.07394205921193e-08, "loss": 0.23817642, "memory(GiB)": 34.88, "step": 139255, "train_speed(iter/s)": 0.411865 }, { "acc": 0.95842419, "epoch": 3.770611648119568, "grad_norm": 4.624690532684326, "learning_rate": 9.06339069465072e-08, "loss": 0.32000551, "memory(GiB)": 34.88, "step": 139260, "train_speed(iter/s)": 0.411866 }, { "acc": 0.95648603, "epoch": 3.7707470284027833, "grad_norm": 3.586091995239258, "learning_rate": 9.05284548066162e-08, "loss": 0.24161553, "memory(GiB)": 34.88, "step": 139265, "train_speed(iter/s)": 0.411867 }, { "acc": 0.95980139, "epoch": 3.770882408685999, "grad_norm": 10.874587059020996, "learning_rate": 9.042306417376618e-08, "loss": 0.29800069, "memory(GiB)": 34.88, "step": 139270, "train_speed(iter/s)": 0.411867 }, { "acc": 0.94750586, "epoch": 3.7710177889692145, "grad_norm": 6.219023704528809, "learning_rate": 9.031773504927766e-08, "loss": 0.29733214, "memory(GiB)": 34.88, "step": 139275, "train_speed(iter/s)": 0.411868 }, { "acc": 0.96021824, "epoch": 3.77115316925243, "grad_norm": 4.923993110656738, "learning_rate": 9.02124674344716e-08, "loss": 0.24453063, "memory(GiB)": 34.88, "step": 139280, "train_speed(iter/s)": 0.411869 }, { "acc": 0.94595547, "epoch": 3.7712885495356456, "grad_norm": 6.832027912139893, "learning_rate": 9.010726133066469e-08, "loss": 0.32175241, "memory(GiB)": 34.88, "step": 139285, "train_speed(iter/s)": 0.41187 }, { "acc": 0.93550987, "epoch": 3.7714239298188614, "grad_norm": 18.83696746826172, "learning_rate": 9.000211673917623e-08, "loss": 0.4296195, "memory(GiB)": 34.88, "step": 139290, "train_speed(iter/s)": 0.41187 }, { "acc": 0.93630161, "epoch": 3.771559310102077, "grad_norm": 9.007051467895508, "learning_rate": 8.989703366132285e-08, "loss": 0.3935631, "memory(GiB)": 34.88, "step": 139295, "train_speed(iter/s)": 0.411871 }, { "acc": 0.94886112, "epoch": 3.771694690385292, "grad_norm": 4.413148880004883, "learning_rate": 8.97920120984211e-08, "loss": 0.26594677, "memory(GiB)": 34.88, "step": 139300, "train_speed(iter/s)": 0.411872 }, { "acc": 0.94181852, "epoch": 3.771830070668508, "grad_norm": 6.033566474914551, "learning_rate": 8.96870520517854e-08, "loss": 0.33135133, "memory(GiB)": 34.88, "step": 139305, "train_speed(iter/s)": 0.411873 }, { "acc": 0.95441589, "epoch": 3.7719654509517233, "grad_norm": 7.647029399871826, "learning_rate": 8.958215352273288e-08, "loss": 0.27994745, "memory(GiB)": 34.88, "step": 139310, "train_speed(iter/s)": 0.411873 }, { "acc": 0.95476246, "epoch": 3.7721008312349387, "grad_norm": 5.749154567718506, "learning_rate": 8.947731651257566e-08, "loss": 0.25540161, "memory(GiB)": 34.88, "step": 139315, "train_speed(iter/s)": 0.411874 }, { "acc": 0.95348711, "epoch": 3.7722362115181545, "grad_norm": 6.017442226409912, "learning_rate": 8.937254102262817e-08, "loss": 0.32116442, "memory(GiB)": 34.88, "step": 139320, "train_speed(iter/s)": 0.411875 }, { "acc": 0.93305597, "epoch": 3.7723715918013703, "grad_norm": 4.729089260101318, "learning_rate": 8.926782705420198e-08, "loss": 0.36113501, "memory(GiB)": 34.88, "step": 139325, "train_speed(iter/s)": 0.411875 }, { "acc": 0.95568781, "epoch": 3.7725069720845856, "grad_norm": 3.814546585083008, "learning_rate": 8.916317460860922e-08, "loss": 0.29792237, "memory(GiB)": 34.88, "step": 139330, "train_speed(iter/s)": 0.411876 }, { "acc": 0.95121822, "epoch": 3.772642352367801, "grad_norm": 4.686819076538086, "learning_rate": 8.905858368716153e-08, "loss": 0.29347043, "memory(GiB)": 34.88, "step": 139335, "train_speed(iter/s)": 0.411877 }, { "acc": 0.95919256, "epoch": 3.772777732651017, "grad_norm": 6.671909809112549, "learning_rate": 8.895405429116771e-08, "loss": 0.20549128, "memory(GiB)": 34.88, "step": 139340, "train_speed(iter/s)": 0.411877 }, { "acc": 0.94500561, "epoch": 3.772913112934232, "grad_norm": 4.826422214508057, "learning_rate": 8.88495864219388e-08, "loss": 0.37901623, "memory(GiB)": 34.88, "step": 139345, "train_speed(iter/s)": 0.411878 }, { "acc": 0.96074028, "epoch": 3.773048493217448, "grad_norm": 10.047897338867188, "learning_rate": 8.874518008078251e-08, "loss": 0.2176085, "memory(GiB)": 34.88, "step": 139350, "train_speed(iter/s)": 0.411879 }, { "acc": 0.94658356, "epoch": 3.7731838735006633, "grad_norm": 14.945243835449219, "learning_rate": 8.864083526900656e-08, "loss": 0.33017318, "memory(GiB)": 34.88, "step": 139355, "train_speed(iter/s)": 0.41188 }, { "acc": 0.95060377, "epoch": 3.773319253783879, "grad_norm": 9.32507038116455, "learning_rate": 8.853655198791866e-08, "loss": 0.33413446, "memory(GiB)": 34.88, "step": 139360, "train_speed(iter/s)": 0.41188 }, { "acc": 0.9480011, "epoch": 3.7734546340670945, "grad_norm": 3.8810596466064453, "learning_rate": 8.843233023882484e-08, "loss": 0.27043097, "memory(GiB)": 34.88, "step": 139365, "train_speed(iter/s)": 0.411881 }, { "acc": 0.95765076, "epoch": 3.77359001435031, "grad_norm": 4.688549041748047, "learning_rate": 8.832817002303119e-08, "loss": 0.21294737, "memory(GiB)": 34.88, "step": 139370, "train_speed(iter/s)": 0.411882 }, { "acc": 0.94688015, "epoch": 3.7737253946335256, "grad_norm": 6.7876176834106445, "learning_rate": 8.82240713418415e-08, "loss": 0.35610814, "memory(GiB)": 34.88, "step": 139375, "train_speed(iter/s)": 0.411883 }, { "acc": 0.95041313, "epoch": 3.773860774916741, "grad_norm": 4.431720733642578, "learning_rate": 8.81200341965607e-08, "loss": 0.29484112, "memory(GiB)": 34.88, "step": 139380, "train_speed(iter/s)": 0.411883 }, { "acc": 0.95146408, "epoch": 3.773996155199957, "grad_norm": 5.261757850646973, "learning_rate": 8.80160585884921e-08, "loss": 0.23260493, "memory(GiB)": 34.88, "step": 139385, "train_speed(iter/s)": 0.411884 }, { "acc": 0.94300766, "epoch": 3.774131535483172, "grad_norm": 6.730051040649414, "learning_rate": 8.791214451893783e-08, "loss": 0.34148641, "memory(GiB)": 34.88, "step": 139390, "train_speed(iter/s)": 0.411885 }, { "acc": 0.94499807, "epoch": 3.774266915766388, "grad_norm": 8.021027565002441, "learning_rate": 8.780829198919894e-08, "loss": 0.30306509, "memory(GiB)": 34.88, "step": 139395, "train_speed(iter/s)": 0.411885 }, { "acc": 0.95385685, "epoch": 3.7744022960496033, "grad_norm": 3.333538293838501, "learning_rate": 8.770450100057762e-08, "loss": 0.26017747, "memory(GiB)": 34.88, "step": 139400, "train_speed(iter/s)": 0.411886 }, { "acc": 0.95854397, "epoch": 3.7745376763328187, "grad_norm": 4.36199426651001, "learning_rate": 8.76007715543738e-08, "loss": 0.26258259, "memory(GiB)": 34.88, "step": 139405, "train_speed(iter/s)": 0.411887 }, { "acc": 0.95947285, "epoch": 3.7746730566160345, "grad_norm": 7.9048004150390625, "learning_rate": 8.749710365188628e-08, "loss": 0.20658722, "memory(GiB)": 34.88, "step": 139410, "train_speed(iter/s)": 0.411888 }, { "acc": 0.95502968, "epoch": 3.77480843689925, "grad_norm": 3.620818853378296, "learning_rate": 8.739349729441446e-08, "loss": 0.2722729, "memory(GiB)": 34.88, "step": 139415, "train_speed(iter/s)": 0.411888 }, { "acc": 0.93903198, "epoch": 3.7749438171824656, "grad_norm": 10.000368118286133, "learning_rate": 8.728995248325498e-08, "loss": 0.38662534, "memory(GiB)": 34.88, "step": 139420, "train_speed(iter/s)": 0.411889 }, { "acc": 0.96581717, "epoch": 3.775079197465681, "grad_norm": 5.529778003692627, "learning_rate": 8.718646921970665e-08, "loss": 0.2258182, "memory(GiB)": 34.88, "step": 139425, "train_speed(iter/s)": 0.41189 }, { "acc": 0.9597024, "epoch": 3.775214577748897, "grad_norm": 4.793250560760498, "learning_rate": 8.708304750506441e-08, "loss": 0.23914547, "memory(GiB)": 34.88, "step": 139430, "train_speed(iter/s)": 0.411891 }, { "acc": 0.95570316, "epoch": 3.775349958032112, "grad_norm": 4.504454612731934, "learning_rate": 8.69796873406249e-08, "loss": 0.25093446, "memory(GiB)": 34.88, "step": 139435, "train_speed(iter/s)": 0.411892 }, { "acc": 0.95517979, "epoch": 3.7754853383153275, "grad_norm": 6.4499053955078125, "learning_rate": 8.687638872768247e-08, "loss": 0.31693213, "memory(GiB)": 34.88, "step": 139440, "train_speed(iter/s)": 0.411892 }, { "acc": 0.95690212, "epoch": 3.7756207185985433, "grad_norm": 3.4012136459350586, "learning_rate": 8.677315166752987e-08, "loss": 0.23819582, "memory(GiB)": 34.88, "step": 139445, "train_speed(iter/s)": 0.411893 }, { "acc": 0.93892097, "epoch": 3.775756098881759, "grad_norm": 5.4580183029174805, "learning_rate": 8.666997616146259e-08, "loss": 0.3391979, "memory(GiB)": 34.88, "step": 139450, "train_speed(iter/s)": 0.411894 }, { "acc": 0.95770903, "epoch": 3.7758914791649745, "grad_norm": 3.615422487258911, "learning_rate": 8.65668622107717e-08, "loss": 0.32741728, "memory(GiB)": 34.88, "step": 139455, "train_speed(iter/s)": 0.411895 }, { "acc": 0.96103325, "epoch": 3.77602685944819, "grad_norm": 3.8867642879486084, "learning_rate": 8.646380981674936e-08, "loss": 0.23396502, "memory(GiB)": 34.88, "step": 139460, "train_speed(iter/s)": 0.411895 }, { "acc": 0.96272297, "epoch": 3.7761622397314056, "grad_norm": 7.896792888641357, "learning_rate": 8.636081898068551e-08, "loss": 0.1859077, "memory(GiB)": 34.88, "step": 139465, "train_speed(iter/s)": 0.411896 }, { "acc": 0.94794254, "epoch": 3.776297620014621, "grad_norm": 9.566136360168457, "learning_rate": 8.625788970387182e-08, "loss": 0.35395348, "memory(GiB)": 34.88, "step": 139470, "train_speed(iter/s)": 0.411897 }, { "acc": 0.94975777, "epoch": 3.7764330002978364, "grad_norm": 12.526422500610352, "learning_rate": 8.615502198759762e-08, "loss": 0.31334231, "memory(GiB)": 34.88, "step": 139475, "train_speed(iter/s)": 0.411897 }, { "acc": 0.95739946, "epoch": 3.776568380581052, "grad_norm": 5.147615909576416, "learning_rate": 8.605221583314956e-08, "loss": 0.24314189, "memory(GiB)": 34.88, "step": 139480, "train_speed(iter/s)": 0.411898 }, { "acc": 0.95055895, "epoch": 3.776703760864268, "grad_norm": 6.952652931213379, "learning_rate": 8.594947124181814e-08, "loss": 0.28981643, "memory(GiB)": 34.88, "step": 139485, "train_speed(iter/s)": 0.411899 }, { "acc": 0.95756855, "epoch": 3.7768391411474833, "grad_norm": 7.246483325958252, "learning_rate": 8.584678821488887e-08, "loss": 0.21876159, "memory(GiB)": 34.88, "step": 139490, "train_speed(iter/s)": 0.411899 }, { "acc": 0.93583107, "epoch": 3.7769745214306987, "grad_norm": 23.973752975463867, "learning_rate": 8.574416675364837e-08, "loss": 0.37905126, "memory(GiB)": 34.88, "step": 139495, "train_speed(iter/s)": 0.4119 }, { "acc": 0.94547558, "epoch": 3.7771099017139145, "grad_norm": 13.577544212341309, "learning_rate": 8.564160685938105e-08, "loss": 0.31434746, "memory(GiB)": 34.88, "step": 139500, "train_speed(iter/s)": 0.411901 }, { "acc": 0.95948696, "epoch": 3.77724528199713, "grad_norm": 7.118730068206787, "learning_rate": 8.553910853337406e-08, "loss": 0.29251282, "memory(GiB)": 34.88, "step": 139505, "train_speed(iter/s)": 0.411902 }, { "acc": 0.9459053, "epoch": 3.7773806622803456, "grad_norm": 7.365777492523193, "learning_rate": 8.543667177690961e-08, "loss": 0.31074448, "memory(GiB)": 34.88, "step": 139510, "train_speed(iter/s)": 0.411902 }, { "acc": 0.95824203, "epoch": 3.777516042563561, "grad_norm": 4.845423698425293, "learning_rate": 8.533429659127207e-08, "loss": 0.29467182, "memory(GiB)": 34.88, "step": 139515, "train_speed(iter/s)": 0.411903 }, { "acc": 0.95336037, "epoch": 3.777651422846777, "grad_norm": 8.169645309448242, "learning_rate": 8.523198297774252e-08, "loss": 0.27052624, "memory(GiB)": 34.88, "step": 139520, "train_speed(iter/s)": 0.411904 }, { "acc": 0.95049763, "epoch": 3.777786803129992, "grad_norm": 4.258324146270752, "learning_rate": 8.512973093760371e-08, "loss": 0.27345047, "memory(GiB)": 34.88, "step": 139525, "train_speed(iter/s)": 0.411905 }, { "acc": 0.94507389, "epoch": 3.7779221834132075, "grad_norm": 8.059320449829102, "learning_rate": 8.502754047213615e-08, "loss": 0.37454321, "memory(GiB)": 34.88, "step": 139530, "train_speed(iter/s)": 0.411905 }, { "acc": 0.94967985, "epoch": 3.7780575636964233, "grad_norm": 8.926859855651855, "learning_rate": 8.492541158261978e-08, "loss": 0.33012207, "memory(GiB)": 34.88, "step": 139535, "train_speed(iter/s)": 0.411906 }, { "acc": 0.9499691, "epoch": 3.7781929439796387, "grad_norm": 15.274662017822266, "learning_rate": 8.482334427033457e-08, "loss": 0.27952287, "memory(GiB)": 34.88, "step": 139540, "train_speed(iter/s)": 0.411907 }, { "acc": 0.944907, "epoch": 3.7783283242628545, "grad_norm": 16.93950843811035, "learning_rate": 8.472133853655825e-08, "loss": 0.35529296, "memory(GiB)": 34.88, "step": 139545, "train_speed(iter/s)": 0.411907 }, { "acc": 0.9485527, "epoch": 3.77846370454607, "grad_norm": 3.2429099082946777, "learning_rate": 8.46193943825697e-08, "loss": 0.31394172, "memory(GiB)": 34.88, "step": 139550, "train_speed(iter/s)": 0.411908 }, { "acc": 0.95494175, "epoch": 3.7785990848292856, "grad_norm": 12.63674545288086, "learning_rate": 8.451751180964494e-08, "loss": 0.31293464, "memory(GiB)": 34.88, "step": 139555, "train_speed(iter/s)": 0.411909 }, { "acc": 0.94584599, "epoch": 3.778734465112501, "grad_norm": 8.668525695800781, "learning_rate": 8.441569081906121e-08, "loss": 0.34318762, "memory(GiB)": 34.88, "step": 139560, "train_speed(iter/s)": 0.41191 }, { "acc": 0.94455986, "epoch": 3.7788698453957164, "grad_norm": 7.125657558441162, "learning_rate": 8.431393141209346e-08, "loss": 0.29349911, "memory(GiB)": 34.88, "step": 139565, "train_speed(iter/s)": 0.41191 }, { "acc": 0.96073141, "epoch": 3.779005225678932, "grad_norm": 4.392963409423828, "learning_rate": 8.421223359001606e-08, "loss": 0.25591345, "memory(GiB)": 34.88, "step": 139570, "train_speed(iter/s)": 0.411911 }, { "acc": 0.94385281, "epoch": 3.7791406059621475, "grad_norm": 4.933396816253662, "learning_rate": 8.411059735410401e-08, "loss": 0.34481914, "memory(GiB)": 34.88, "step": 139575, "train_speed(iter/s)": 0.411912 }, { "acc": 0.95222845, "epoch": 3.7792759862453633, "grad_norm": 8.588602066040039, "learning_rate": 8.40090227056295e-08, "loss": 0.26311102, "memory(GiB)": 34.88, "step": 139580, "train_speed(iter/s)": 0.411913 }, { "acc": 0.94730759, "epoch": 3.7794113665285787, "grad_norm": 5.318865776062012, "learning_rate": 8.39075096458658e-08, "loss": 0.31387265, "memory(GiB)": 34.88, "step": 139585, "train_speed(iter/s)": 0.411914 }, { "acc": 0.9478548, "epoch": 3.7795467468117945, "grad_norm": 3.2568840980529785, "learning_rate": 8.380605817608347e-08, "loss": 0.3304337, "memory(GiB)": 34.88, "step": 139590, "train_speed(iter/s)": 0.411914 }, { "acc": 0.94671364, "epoch": 3.77968212709501, "grad_norm": 8.459277153015137, "learning_rate": 8.370466829755469e-08, "loss": 0.33726244, "memory(GiB)": 34.88, "step": 139595, "train_speed(iter/s)": 0.411915 }, { "acc": 0.95443611, "epoch": 3.779817507378225, "grad_norm": 9.604618072509766, "learning_rate": 8.360334001154883e-08, "loss": 0.25953479, "memory(GiB)": 34.88, "step": 139600, "train_speed(iter/s)": 0.411916 }, { "acc": 0.94841576, "epoch": 3.779952887661441, "grad_norm": 4.482852458953857, "learning_rate": 8.35020733193348e-08, "loss": 0.36533775, "memory(GiB)": 34.88, "step": 139605, "train_speed(iter/s)": 0.411917 }, { "acc": 0.94913368, "epoch": 3.780088267944657, "grad_norm": 6.604671955108643, "learning_rate": 8.340086822218254e-08, "loss": 0.31132078, "memory(GiB)": 34.88, "step": 139610, "train_speed(iter/s)": 0.411918 }, { "acc": 0.95744915, "epoch": 3.780223648227872, "grad_norm": 4.082403659820557, "learning_rate": 8.329972472135816e-08, "loss": 0.25443485, "memory(GiB)": 34.88, "step": 139615, "train_speed(iter/s)": 0.411918 }, { "acc": 0.94349403, "epoch": 3.7803590285110875, "grad_norm": 11.59680461883545, "learning_rate": 8.319864281812992e-08, "loss": 0.35371244, "memory(GiB)": 34.88, "step": 139620, "train_speed(iter/s)": 0.411919 }, { "acc": 0.94039192, "epoch": 3.7804944087943033, "grad_norm": 7.316932201385498, "learning_rate": 8.309762251376339e-08, "loss": 0.36429234, "memory(GiB)": 34.88, "step": 139625, "train_speed(iter/s)": 0.41192 }, { "acc": 0.94722013, "epoch": 3.7806297890775187, "grad_norm": 4.4513139724731445, "learning_rate": 8.299666380952461e-08, "loss": 0.36139271, "memory(GiB)": 34.88, "step": 139630, "train_speed(iter/s)": 0.41192 }, { "acc": 0.9664875, "epoch": 3.780765169360734, "grad_norm": 5.037834644317627, "learning_rate": 8.289576670667805e-08, "loss": 0.15721757, "memory(GiB)": 34.88, "step": 139635, "train_speed(iter/s)": 0.411921 }, { "acc": 0.94810095, "epoch": 3.78090054964395, "grad_norm": 7.7006635665893555, "learning_rate": 8.279493120648699e-08, "loss": 0.28059196, "memory(GiB)": 34.88, "step": 139640, "train_speed(iter/s)": 0.411922 }, { "acc": 0.95002203, "epoch": 3.7810359299271656, "grad_norm": 7.595311641693115, "learning_rate": 8.269415731021582e-08, "loss": 0.34210796, "memory(GiB)": 34.88, "step": 139645, "train_speed(iter/s)": 0.411923 }, { "acc": 0.94181414, "epoch": 3.781171310210381, "grad_norm": 17.481273651123047, "learning_rate": 8.259344501912625e-08, "loss": 0.364678, "memory(GiB)": 34.88, "step": 139650, "train_speed(iter/s)": 0.411923 }, { "acc": 0.9496974, "epoch": 3.7813066904935964, "grad_norm": 18.276165008544922, "learning_rate": 8.249279433447987e-08, "loss": 0.32809608, "memory(GiB)": 34.88, "step": 139655, "train_speed(iter/s)": 0.411924 }, { "acc": 0.944765, "epoch": 3.781442070776812, "grad_norm": 8.867100715637207, "learning_rate": 8.239220525753721e-08, "loss": 0.33296986, "memory(GiB)": 34.88, "step": 139660, "train_speed(iter/s)": 0.411925 }, { "acc": 0.94613514, "epoch": 3.7815774510600275, "grad_norm": 7.451644420623779, "learning_rate": 8.229167778955938e-08, "loss": 0.34121714, "memory(GiB)": 34.88, "step": 139665, "train_speed(iter/s)": 0.411926 }, { "acc": 0.94489288, "epoch": 3.7817128313432433, "grad_norm": 6.386697769165039, "learning_rate": 8.219121193180469e-08, "loss": 0.30542743, "memory(GiB)": 34.88, "step": 139670, "train_speed(iter/s)": 0.411926 }, { "acc": 0.93733044, "epoch": 3.7818482116264587, "grad_norm": 5.42167329788208, "learning_rate": 8.209080768553199e-08, "loss": 0.40167713, "memory(GiB)": 34.88, "step": 139675, "train_speed(iter/s)": 0.411927 }, { "acc": 0.94691734, "epoch": 3.7819835919096745, "grad_norm": 12.27724838256836, "learning_rate": 8.199046505199905e-08, "loss": 0.3590847, "memory(GiB)": 34.88, "step": 139680, "train_speed(iter/s)": 0.411928 }, { "acc": 0.95087547, "epoch": 3.78211897219289, "grad_norm": 5.793245315551758, "learning_rate": 8.189018403246306e-08, "loss": 0.31486816, "memory(GiB)": 34.88, "step": 139685, "train_speed(iter/s)": 0.411929 }, { "acc": 0.93802357, "epoch": 3.782254352476105, "grad_norm": 8.609095573425293, "learning_rate": 8.178996462818013e-08, "loss": 0.36446099, "memory(GiB)": 34.88, "step": 139690, "train_speed(iter/s)": 0.411929 }, { "acc": 0.94736385, "epoch": 3.782389732759321, "grad_norm": 2.4320905208587646, "learning_rate": 8.168980684040524e-08, "loss": 0.33722262, "memory(GiB)": 34.88, "step": 139695, "train_speed(iter/s)": 0.41193 }, { "acc": 0.94659081, "epoch": 3.7825251130425364, "grad_norm": 3.122429132461548, "learning_rate": 8.158971067039391e-08, "loss": 0.29111724, "memory(GiB)": 34.88, "step": 139700, "train_speed(iter/s)": 0.411931 }, { "acc": 0.94346418, "epoch": 3.782660493325752, "grad_norm": 6.9220781326293945, "learning_rate": 8.148967611939947e-08, "loss": 0.37948346, "memory(GiB)": 34.88, "step": 139705, "train_speed(iter/s)": 0.411932 }, { "acc": 0.94526377, "epoch": 3.7827958736089675, "grad_norm": 4.136362552642822, "learning_rate": 8.138970318867524e-08, "loss": 0.32461321, "memory(GiB)": 34.88, "step": 139710, "train_speed(iter/s)": 0.411933 }, { "acc": 0.95285206, "epoch": 3.7829312538921833, "grad_norm": 4.591479778289795, "learning_rate": 8.128979187947284e-08, "loss": 0.27732008, "memory(GiB)": 34.88, "step": 139715, "train_speed(iter/s)": 0.411933 }, { "acc": 0.94235764, "epoch": 3.7830666341753987, "grad_norm": 8.34610652923584, "learning_rate": 8.118994219304564e-08, "loss": 0.36467195, "memory(GiB)": 34.88, "step": 139720, "train_speed(iter/s)": 0.411934 }, { "acc": 0.94359474, "epoch": 3.783202014458614, "grad_norm": 13.333986282348633, "learning_rate": 8.109015413064247e-08, "loss": 0.40783386, "memory(GiB)": 34.88, "step": 139725, "train_speed(iter/s)": 0.411935 }, { "acc": 0.9611557, "epoch": 3.78333739474183, "grad_norm": 9.612127304077148, "learning_rate": 8.099042769351445e-08, "loss": 0.22394595, "memory(GiB)": 34.88, "step": 139730, "train_speed(iter/s)": 0.411936 }, { "acc": 0.95909119, "epoch": 3.783472775025045, "grad_norm": 5.02263069152832, "learning_rate": 8.089076288291099e-08, "loss": 0.29480762, "memory(GiB)": 34.88, "step": 139735, "train_speed(iter/s)": 0.411936 }, { "acc": 0.93719158, "epoch": 3.783608155308261, "grad_norm": 8.803196907043457, "learning_rate": 8.079115970007987e-08, "loss": 0.31109037, "memory(GiB)": 34.88, "step": 139740, "train_speed(iter/s)": 0.411937 }, { "acc": 0.96490726, "epoch": 3.7837435355914764, "grad_norm": 16.529754638671875, "learning_rate": 8.069161814626943e-08, "loss": 0.22935438, "memory(GiB)": 34.88, "step": 139745, "train_speed(iter/s)": 0.411938 }, { "acc": 0.94237823, "epoch": 3.783878915874692, "grad_norm": 13.898580551147461, "learning_rate": 8.059213822272628e-08, "loss": 0.41420412, "memory(GiB)": 34.88, "step": 139750, "train_speed(iter/s)": 0.411939 }, { "acc": 0.95354786, "epoch": 3.7840142961579075, "grad_norm": 20.655866622924805, "learning_rate": 8.049271993069712e-08, "loss": 0.27502449, "memory(GiB)": 34.88, "step": 139755, "train_speed(iter/s)": 0.411939 }, { "acc": 0.9462163, "epoch": 3.784149676441123, "grad_norm": 8.30712604522705, "learning_rate": 8.039336327142638e-08, "loss": 0.33258584, "memory(GiB)": 34.88, "step": 139760, "train_speed(iter/s)": 0.41194 }, { "acc": 0.9500967, "epoch": 3.7842850567243387, "grad_norm": 6.8137125968933105, "learning_rate": 8.029406824615901e-08, "loss": 0.31700635, "memory(GiB)": 34.88, "step": 139765, "train_speed(iter/s)": 0.411941 }, { "acc": 0.93992138, "epoch": 3.7844204370075545, "grad_norm": 9.728145599365234, "learning_rate": 8.019483485614004e-08, "loss": 0.41399841, "memory(GiB)": 34.88, "step": 139770, "train_speed(iter/s)": 0.411942 }, { "acc": 0.94781723, "epoch": 3.78455581729077, "grad_norm": 7.826237201690674, "learning_rate": 8.009566310261056e-08, "loss": 0.27070892, "memory(GiB)": 34.88, "step": 139775, "train_speed(iter/s)": 0.411943 }, { "acc": 0.94084072, "epoch": 3.784691197573985, "grad_norm": 5.955742359161377, "learning_rate": 7.999655298681447e-08, "loss": 0.36071177, "memory(GiB)": 34.88, "step": 139780, "train_speed(iter/s)": 0.411943 }, { "acc": 0.9533349, "epoch": 3.784826577857201, "grad_norm": 10.823514938354492, "learning_rate": 7.989750450999284e-08, "loss": 0.3230135, "memory(GiB)": 34.88, "step": 139785, "train_speed(iter/s)": 0.411944 }, { "acc": 0.94560509, "epoch": 3.7849619581404164, "grad_norm": 4.248822212219238, "learning_rate": 7.979851767338679e-08, "loss": 0.27290068, "memory(GiB)": 34.88, "step": 139790, "train_speed(iter/s)": 0.411945 }, { "acc": 0.93709736, "epoch": 3.7850973384236317, "grad_norm": 7.080399513244629, "learning_rate": 7.969959247823579e-08, "loss": 0.37338963, "memory(GiB)": 34.88, "step": 139795, "train_speed(iter/s)": 0.411946 }, { "acc": 0.94847193, "epoch": 3.7852327187068475, "grad_norm": 9.248983383178711, "learning_rate": 7.960072892577868e-08, "loss": 0.29553127, "memory(GiB)": 34.88, "step": 139800, "train_speed(iter/s)": 0.411946 }, { "acc": 0.96219425, "epoch": 3.7853680989900633, "grad_norm": 12.355408668518066, "learning_rate": 7.950192701725493e-08, "loss": 0.25146396, "memory(GiB)": 34.88, "step": 139805, "train_speed(iter/s)": 0.411947 }, { "acc": 0.93175993, "epoch": 3.7855034792732787, "grad_norm": 5.708866596221924, "learning_rate": 7.940318675390175e-08, "loss": 0.47286654, "memory(GiB)": 34.88, "step": 139810, "train_speed(iter/s)": 0.411948 }, { "acc": 0.9485116, "epoch": 3.785638859556494, "grad_norm": 5.77220344543457, "learning_rate": 7.930450813695634e-08, "loss": 0.30223866, "memory(GiB)": 34.88, "step": 139815, "train_speed(iter/s)": 0.411949 }, { "acc": 0.95038681, "epoch": 3.78577423983971, "grad_norm": 20.290769577026367, "learning_rate": 7.92058911676543e-08, "loss": 0.3085464, "memory(GiB)": 34.88, "step": 139820, "train_speed(iter/s)": 0.411949 }, { "acc": 0.95737438, "epoch": 3.785909620122925, "grad_norm": 11.41008472442627, "learning_rate": 7.910733584723171e-08, "loss": 0.24988618, "memory(GiB)": 34.88, "step": 139825, "train_speed(iter/s)": 0.41195 }, { "acc": 0.94360485, "epoch": 3.786045000406141, "grad_norm": 14.193804740905762, "learning_rate": 7.90088421769219e-08, "loss": 0.36577976, "memory(GiB)": 34.88, "step": 139830, "train_speed(iter/s)": 0.411951 }, { "acc": 0.9629715, "epoch": 3.7861803806893564, "grad_norm": 6.1735687255859375, "learning_rate": 7.891041015796044e-08, "loss": 0.22942681, "memory(GiB)": 34.88, "step": 139835, "train_speed(iter/s)": 0.411952 }, { "acc": 0.97039471, "epoch": 3.786315760972572, "grad_norm": 4.018059730529785, "learning_rate": 7.881203979157897e-08, "loss": 0.17494535, "memory(GiB)": 34.88, "step": 139840, "train_speed(iter/s)": 0.411952 }, { "acc": 0.95223637, "epoch": 3.7864511412557875, "grad_norm": 9.94066047668457, "learning_rate": 7.871373107901032e-08, "loss": 0.3023556, "memory(GiB)": 34.88, "step": 139845, "train_speed(iter/s)": 0.411953 }, { "acc": 0.95135288, "epoch": 3.786586521539003, "grad_norm": 25.256057739257812, "learning_rate": 7.861548402148612e-08, "loss": 0.27907858, "memory(GiB)": 34.88, "step": 139850, "train_speed(iter/s)": 0.411954 }, { "acc": 0.93876104, "epoch": 3.7867219018222187, "grad_norm": 19.519119262695312, "learning_rate": 7.851729862023692e-08, "loss": 0.40130463, "memory(GiB)": 34.88, "step": 139855, "train_speed(iter/s)": 0.411955 }, { "acc": 0.95271034, "epoch": 3.786857282105434, "grad_norm": 7.840029239654541, "learning_rate": 7.841917487649333e-08, "loss": 0.31066737, "memory(GiB)": 34.88, "step": 139860, "train_speed(iter/s)": 0.411955 }, { "acc": 0.94101763, "epoch": 3.78699266238865, "grad_norm": 9.428201675415039, "learning_rate": 7.83211127914831e-08, "loss": 0.42971525, "memory(GiB)": 34.88, "step": 139865, "train_speed(iter/s)": 0.411956 }, { "acc": 0.94946423, "epoch": 3.787128042671865, "grad_norm": 3.87847900390625, "learning_rate": 7.822311236643567e-08, "loss": 0.3135715, "memory(GiB)": 34.88, "step": 139870, "train_speed(iter/s)": 0.411957 }, { "acc": 0.9375144, "epoch": 3.787263422955081, "grad_norm": 8.408001899719238, "learning_rate": 7.81251736025783e-08, "loss": 0.36771116, "memory(GiB)": 34.88, "step": 139875, "train_speed(iter/s)": 0.411958 }, { "acc": 0.96819744, "epoch": 3.7873988032382964, "grad_norm": 4.80691385269165, "learning_rate": 7.802729650113872e-08, "loss": 0.20716183, "memory(GiB)": 34.88, "step": 139880, "train_speed(iter/s)": 0.411958 }, { "acc": 0.95110378, "epoch": 3.7875341835215117, "grad_norm": 5.450089931488037, "learning_rate": 7.7929481063342e-08, "loss": 0.33574481, "memory(GiB)": 34.88, "step": 139885, "train_speed(iter/s)": 0.411959 }, { "acc": 0.9404686, "epoch": 3.7876695638047275, "grad_norm": 3.5682735443115234, "learning_rate": 7.783172729041367e-08, "loss": 0.38489645, "memory(GiB)": 34.88, "step": 139890, "train_speed(iter/s)": 0.41196 }, { "acc": 0.95348539, "epoch": 3.787804944087943, "grad_norm": 7.491944789886475, "learning_rate": 7.77340351835793e-08, "loss": 0.25383034, "memory(GiB)": 34.88, "step": 139895, "train_speed(iter/s)": 0.411961 }, { "acc": 0.95507765, "epoch": 3.7879403243711587, "grad_norm": 8.360172271728516, "learning_rate": 7.763640474406113e-08, "loss": 0.23750765, "memory(GiB)": 34.88, "step": 139900, "train_speed(iter/s)": 0.411961 }, { "acc": 0.95633335, "epoch": 3.788075704654374, "grad_norm": 6.729979038238525, "learning_rate": 7.753883597308304e-08, "loss": 0.26801136, "memory(GiB)": 34.88, "step": 139905, "train_speed(iter/s)": 0.411962 }, { "acc": 0.94836836, "epoch": 3.78821108493759, "grad_norm": 5.98062801361084, "learning_rate": 7.744132887186729e-08, "loss": 0.28617716, "memory(GiB)": 34.88, "step": 139910, "train_speed(iter/s)": 0.411963 }, { "acc": 0.95028896, "epoch": 3.788346465220805, "grad_norm": 3.285521984100342, "learning_rate": 7.734388344163498e-08, "loss": 0.38348386, "memory(GiB)": 34.88, "step": 139915, "train_speed(iter/s)": 0.411964 }, { "acc": 0.94601068, "epoch": 3.7884818455040206, "grad_norm": 4.092392921447754, "learning_rate": 7.724649968360724e-08, "loss": 0.36810231, "memory(GiB)": 34.88, "step": 139920, "train_speed(iter/s)": 0.411965 }, { "acc": 0.95948362, "epoch": 3.7886172257872364, "grad_norm": 7.873030185699463, "learning_rate": 7.714917759900353e-08, "loss": 0.24002781, "memory(GiB)": 34.88, "step": 139925, "train_speed(iter/s)": 0.411965 }, { "acc": 0.95331841, "epoch": 3.788752606070452, "grad_norm": 4.415834903717041, "learning_rate": 7.705191718904331e-08, "loss": 0.2360945, "memory(GiB)": 34.88, "step": 139930, "train_speed(iter/s)": 0.411966 }, { "acc": 0.94395905, "epoch": 3.7888879863536675, "grad_norm": 9.003225326538086, "learning_rate": 7.695471845494493e-08, "loss": 0.36904545, "memory(GiB)": 34.88, "step": 139935, "train_speed(iter/s)": 0.411967 }, { "acc": 0.93883286, "epoch": 3.789023366636883, "grad_norm": 4.78691291809082, "learning_rate": 7.685758139792618e-08, "loss": 0.31303124, "memory(GiB)": 34.88, "step": 139940, "train_speed(iter/s)": 0.411968 }, { "acc": 0.94811478, "epoch": 3.7891587469200987, "grad_norm": 4.673728942871094, "learning_rate": 7.676050601920319e-08, "loss": 0.2737251, "memory(GiB)": 34.88, "step": 139945, "train_speed(iter/s)": 0.411968 }, { "acc": 0.94037189, "epoch": 3.789294127203314, "grad_norm": 8.413725852966309, "learning_rate": 7.666349231999264e-08, "loss": 0.37751613, "memory(GiB)": 34.88, "step": 139950, "train_speed(iter/s)": 0.411969 }, { "acc": 0.95100851, "epoch": 3.7894295074865294, "grad_norm": 136.02090454101562, "learning_rate": 7.656654030151009e-08, "loss": 0.29043522, "memory(GiB)": 34.88, "step": 139955, "train_speed(iter/s)": 0.41197 }, { "acc": 0.95076275, "epoch": 3.789564887769745, "grad_norm": 10.114578247070312, "learning_rate": 7.646964996496891e-08, "loss": 0.33790336, "memory(GiB)": 34.88, "step": 139960, "train_speed(iter/s)": 0.411971 }, { "acc": 0.95140715, "epoch": 3.789700268052961, "grad_norm": 5.154987335205078, "learning_rate": 7.637282131158412e-08, "loss": 0.28193758, "memory(GiB)": 34.88, "step": 139965, "train_speed(iter/s)": 0.411971 }, { "acc": 0.93883886, "epoch": 3.7898356483361764, "grad_norm": 10.440805435180664, "learning_rate": 7.627605434256794e-08, "loss": 0.4394227, "memory(GiB)": 34.88, "step": 139970, "train_speed(iter/s)": 0.411972 }, { "acc": 0.95300646, "epoch": 3.7899710286193917, "grad_norm": 4.297529697418213, "learning_rate": 7.617934905913263e-08, "loss": 0.30184608, "memory(GiB)": 34.88, "step": 139975, "train_speed(iter/s)": 0.411973 }, { "acc": 0.96206627, "epoch": 3.7901064089026075, "grad_norm": 3.4960482120513916, "learning_rate": 7.608270546248988e-08, "loss": 0.23751643, "memory(GiB)": 34.88, "step": 139980, "train_speed(iter/s)": 0.411974 }, { "acc": 0.94398489, "epoch": 3.790241789185823, "grad_norm": 4.056634426116943, "learning_rate": 7.598612355385082e-08, "loss": 0.34270852, "memory(GiB)": 34.88, "step": 139985, "train_speed(iter/s)": 0.411975 }, { "acc": 0.94280548, "epoch": 3.7903771694690387, "grad_norm": 6.313058853149414, "learning_rate": 7.58896033344238e-08, "loss": 0.29294524, "memory(GiB)": 34.88, "step": 139990, "train_speed(iter/s)": 0.411975 }, { "acc": 0.94233761, "epoch": 3.790512549752254, "grad_norm": 8.07607364654541, "learning_rate": 7.579314480541884e-08, "loss": 0.35866983, "memory(GiB)": 34.88, "step": 139995, "train_speed(iter/s)": 0.411976 }, { "acc": 0.95898752, "epoch": 3.79064793003547, "grad_norm": 5.072564601898193, "learning_rate": 7.569674796804487e-08, "loss": 0.26420269, "memory(GiB)": 34.88, "step": 140000, "train_speed(iter/s)": 0.411977 }, { "epoch": 3.79064793003547, "eval_acc": 0.6266770969931375, "eval_loss": 1.2620960474014282, "eval_runtime": 1295.1445, "eval_samples_per_second": 66.637, "eval_steps_per_second": 2.083, "step": 140000 }, { "acc": 0.94993534, "epoch": 3.790783310318685, "grad_norm": 11.096416473388672, "learning_rate": 7.560041282350854e-08, "loss": 0.29452736, "memory(GiB)": 34.88, "step": 140005, "train_speed(iter/s)": 0.410386 }, { "acc": 0.9426033, "epoch": 3.7909186906019006, "grad_norm": 9.771284103393555, "learning_rate": 7.55041393730166e-08, "loss": 0.42898817, "memory(GiB)": 34.88, "step": 140010, "train_speed(iter/s)": 0.410387 }, { "acc": 0.95163479, "epoch": 3.7910540708851164, "grad_norm": 3.922511100769043, "learning_rate": 7.540792761777625e-08, "loss": 0.32675619, "memory(GiB)": 34.88, "step": 140015, "train_speed(iter/s)": 0.410388 }, { "acc": 0.949506, "epoch": 3.7911894511683317, "grad_norm": 9.805768966674805, "learning_rate": 7.531177755899143e-08, "loss": 0.30292354, "memory(GiB)": 34.88, "step": 140020, "train_speed(iter/s)": 0.410389 }, { "acc": 0.9501483, "epoch": 3.7913248314515475, "grad_norm": 9.264782905578613, "learning_rate": 7.521568919786716e-08, "loss": 0.38405759, "memory(GiB)": 34.88, "step": 140025, "train_speed(iter/s)": 0.410389 }, { "acc": 0.94822121, "epoch": 3.791460211734763, "grad_norm": 18.166446685791016, "learning_rate": 7.511966253560681e-08, "loss": 0.36143172, "memory(GiB)": 34.88, "step": 140030, "train_speed(iter/s)": 0.41039 }, { "acc": 0.94982386, "epoch": 3.7915955920179787, "grad_norm": 10.591548919677734, "learning_rate": 7.502369757341372e-08, "loss": 0.26330361, "memory(GiB)": 34.88, "step": 140035, "train_speed(iter/s)": 0.410391 }, { "acc": 0.95140018, "epoch": 3.791730972301194, "grad_norm": 7.581151008605957, "learning_rate": 7.492779431249016e-08, "loss": 0.26836195, "memory(GiB)": 34.88, "step": 140040, "train_speed(iter/s)": 0.410392 }, { "acc": 0.95908451, "epoch": 3.7918663525844094, "grad_norm": 8.1279935836792, "learning_rate": 7.483195275403616e-08, "loss": 0.21992252, "memory(GiB)": 34.88, "step": 140045, "train_speed(iter/s)": 0.410392 }, { "acc": 0.95850868, "epoch": 3.792001732867625, "grad_norm": 9.973796844482422, "learning_rate": 7.473617289925453e-08, "loss": 0.2403271, "memory(GiB)": 34.88, "step": 140050, "train_speed(iter/s)": 0.410393 }, { "acc": 0.95420685, "epoch": 3.7921371131508406, "grad_norm": 4.228525638580322, "learning_rate": 7.464045474934361e-08, "loss": 0.24515026, "memory(GiB)": 34.88, "step": 140055, "train_speed(iter/s)": 0.410394 }, { "acc": 0.95865841, "epoch": 3.7922724934340564, "grad_norm": 6.0219950675964355, "learning_rate": 7.454479830550291e-08, "loss": 0.24629483, "memory(GiB)": 34.88, "step": 140060, "train_speed(iter/s)": 0.410395 }, { "acc": 0.95679302, "epoch": 3.7924078737172717, "grad_norm": 6.821249961853027, "learning_rate": 7.444920356893021e-08, "loss": 0.28718958, "memory(GiB)": 34.88, "step": 140065, "train_speed(iter/s)": 0.410396 }, { "acc": 0.93551521, "epoch": 3.7925432540004875, "grad_norm": 14.935430526733398, "learning_rate": 7.435367054082334e-08, "loss": 0.39395251, "memory(GiB)": 34.88, "step": 140070, "train_speed(iter/s)": 0.410397 }, { "acc": 0.95834084, "epoch": 3.792678634283703, "grad_norm": 5.8863019943237305, "learning_rate": 7.425819922237955e-08, "loss": 0.22912998, "memory(GiB)": 34.88, "step": 140075, "train_speed(iter/s)": 0.410397 }, { "acc": 0.95429649, "epoch": 3.7928140145669182, "grad_norm": 3.998037338256836, "learning_rate": 7.416278961479388e-08, "loss": 0.24582241, "memory(GiB)": 34.88, "step": 140080, "train_speed(iter/s)": 0.410398 }, { "acc": 0.95379772, "epoch": 3.792949394850134, "grad_norm": 7.289853572845459, "learning_rate": 7.406744171926248e-08, "loss": 0.3101933, "memory(GiB)": 34.88, "step": 140085, "train_speed(iter/s)": 0.410399 }, { "acc": 0.94424953, "epoch": 3.79308477513335, "grad_norm": 11.062590599060059, "learning_rate": 7.397215553697871e-08, "loss": 0.30671499, "memory(GiB)": 34.88, "step": 140090, "train_speed(iter/s)": 0.4104 }, { "acc": 0.96261225, "epoch": 3.793220155416565, "grad_norm": 9.468091011047363, "learning_rate": 7.387693106913759e-08, "loss": 0.24868803, "memory(GiB)": 34.88, "step": 140095, "train_speed(iter/s)": 0.410401 }, { "acc": 0.94701376, "epoch": 3.7933555356997806, "grad_norm": 6.589534759521484, "learning_rate": 7.378176831693032e-08, "loss": 0.26628346, "memory(GiB)": 34.88, "step": 140100, "train_speed(iter/s)": 0.410402 }, { "acc": 0.93909721, "epoch": 3.7934909159829964, "grad_norm": 5.578462600708008, "learning_rate": 7.368666728155077e-08, "loss": 0.41514611, "memory(GiB)": 34.88, "step": 140105, "train_speed(iter/s)": 0.410402 }, { "acc": 0.94856834, "epoch": 3.7936262962662117, "grad_norm": 5.342732906341553, "learning_rate": 7.359162796418902e-08, "loss": 0.2630084, "memory(GiB)": 34.88, "step": 140110, "train_speed(iter/s)": 0.410403 }, { "acc": 0.94163122, "epoch": 3.793761676549427, "grad_norm": 9.27251148223877, "learning_rate": 7.34966503660362e-08, "loss": 0.36136212, "memory(GiB)": 34.88, "step": 140115, "train_speed(iter/s)": 0.410404 }, { "acc": 0.95788689, "epoch": 3.793897056832643, "grad_norm": 6.101715564727783, "learning_rate": 7.340173448828123e-08, "loss": 0.2985714, "memory(GiB)": 34.88, "step": 140120, "train_speed(iter/s)": 0.410405 }, { "acc": 0.95061989, "epoch": 3.7940324371158587, "grad_norm": 4.371507167816162, "learning_rate": 7.330688033211418e-08, "loss": 0.29225032, "memory(GiB)": 34.88, "step": 140125, "train_speed(iter/s)": 0.410406 }, { "acc": 0.955375, "epoch": 3.794167817399074, "grad_norm": 6.7491607666015625, "learning_rate": 7.32120878987234e-08, "loss": 0.29930196, "memory(GiB)": 34.88, "step": 140130, "train_speed(iter/s)": 0.410406 }, { "acc": 0.95566978, "epoch": 3.7943031976822894, "grad_norm": 2.6295480728149414, "learning_rate": 7.311735718929452e-08, "loss": 0.27773099, "memory(GiB)": 34.88, "step": 140135, "train_speed(iter/s)": 0.410407 }, { "acc": 0.94817867, "epoch": 3.794438577965505, "grad_norm": 6.552547931671143, "learning_rate": 7.302268820501642e-08, "loss": 0.29568315, "memory(GiB)": 34.88, "step": 140140, "train_speed(iter/s)": 0.410408 }, { "acc": 0.95744514, "epoch": 3.7945739582487206, "grad_norm": 3.7165863513946533, "learning_rate": 7.292808094707475e-08, "loss": 0.20496736, "memory(GiB)": 34.88, "step": 140145, "train_speed(iter/s)": 0.410409 }, { "acc": 0.94078722, "epoch": 3.7947093385319364, "grad_norm": 18.452726364135742, "learning_rate": 7.283353541665287e-08, "loss": 0.33945746, "memory(GiB)": 34.88, "step": 140150, "train_speed(iter/s)": 0.41041 }, { "acc": 0.95402985, "epoch": 3.7948447188151517, "grad_norm": 4.927412509918213, "learning_rate": 7.273905161493635e-08, "loss": 0.31894553, "memory(GiB)": 34.88, "step": 140155, "train_speed(iter/s)": 0.410411 }, { "acc": 0.94331322, "epoch": 3.7949800990983675, "grad_norm": 6.530355930328369, "learning_rate": 7.264462954310918e-08, "loss": 0.37999511, "memory(GiB)": 34.88, "step": 140160, "train_speed(iter/s)": 0.410411 }, { "acc": 0.943186, "epoch": 3.795115479381583, "grad_norm": 6.283402442932129, "learning_rate": 7.255026920235415e-08, "loss": 0.32582622, "memory(GiB)": 34.88, "step": 140165, "train_speed(iter/s)": 0.410412 }, { "acc": 0.96191654, "epoch": 3.7952508596647982, "grad_norm": 8.348520278930664, "learning_rate": 7.245597059385187e-08, "loss": 0.17378176, "memory(GiB)": 34.88, "step": 140170, "train_speed(iter/s)": 0.410413 }, { "acc": 0.94667368, "epoch": 3.795386239948014, "grad_norm": 5.683187484741211, "learning_rate": 7.236173371878515e-08, "loss": 0.31229873, "memory(GiB)": 34.88, "step": 140175, "train_speed(iter/s)": 0.410413 }, { "acc": 0.92366419, "epoch": 3.7955216202312294, "grad_norm": 18.831729888916016, "learning_rate": 7.226755857833408e-08, "loss": 0.42827106, "memory(GiB)": 34.88, "step": 140180, "train_speed(iter/s)": 0.410414 }, { "acc": 0.92930145, "epoch": 3.795657000514445, "grad_norm": 19.033613204956055, "learning_rate": 7.217344517367867e-08, "loss": 0.48419971, "memory(GiB)": 34.88, "step": 140185, "train_speed(iter/s)": 0.410415 }, { "acc": 0.95600128, "epoch": 3.7957923807976606, "grad_norm": 8.528098106384277, "learning_rate": 7.207939350599676e-08, "loss": 0.27319334, "memory(GiB)": 34.88, "step": 140190, "train_speed(iter/s)": 0.410416 }, { "acc": 0.93833637, "epoch": 3.7959277610808764, "grad_norm": 5.29727029800415, "learning_rate": 7.198540357646785e-08, "loss": 0.45253391, "memory(GiB)": 34.88, "step": 140195, "train_speed(iter/s)": 0.410417 }, { "acc": 0.95246925, "epoch": 3.7960631413640917, "grad_norm": 3.7652435302734375, "learning_rate": 7.189147538626864e-08, "loss": 0.28817403, "memory(GiB)": 34.88, "step": 140200, "train_speed(iter/s)": 0.410417 }, { "acc": 0.93810959, "epoch": 3.796198521647307, "grad_norm": 9.139227867126465, "learning_rate": 7.179760893657534e-08, "loss": 0.48290882, "memory(GiB)": 34.88, "step": 140205, "train_speed(iter/s)": 0.410418 }, { "acc": 0.95499401, "epoch": 3.796333901930523, "grad_norm": 12.047707557678223, "learning_rate": 7.170380422856517e-08, "loss": 0.29869168, "memory(GiB)": 34.88, "step": 140210, "train_speed(iter/s)": 0.410419 }, { "acc": 0.96256695, "epoch": 3.7964692822137382, "grad_norm": 2.352760076522827, "learning_rate": 7.161006126341154e-08, "loss": 0.24865465, "memory(GiB)": 34.88, "step": 140215, "train_speed(iter/s)": 0.410419 }, { "acc": 0.9461092, "epoch": 3.796604662496954, "grad_norm": 5.340507984161377, "learning_rate": 7.151638004229061e-08, "loss": 0.3179565, "memory(GiB)": 34.88, "step": 140220, "train_speed(iter/s)": 0.41042 }, { "acc": 0.95405264, "epoch": 3.7967400427801694, "grad_norm": 4.32565450668335, "learning_rate": 7.142276056637412e-08, "loss": 0.2769485, "memory(GiB)": 34.88, "step": 140225, "train_speed(iter/s)": 0.410421 }, { "acc": 0.94656782, "epoch": 3.796875423063385, "grad_norm": 3.3849551677703857, "learning_rate": 7.1329202836836e-08, "loss": 0.30278721, "memory(GiB)": 34.88, "step": 140230, "train_speed(iter/s)": 0.410422 }, { "acc": 0.94321194, "epoch": 3.7970108033466006, "grad_norm": 5.444540500640869, "learning_rate": 7.123570685484853e-08, "loss": 0.35542216, "memory(GiB)": 34.88, "step": 140235, "train_speed(iter/s)": 0.410422 }, { "acc": 0.93882866, "epoch": 3.797146183629816, "grad_norm": 4.864673614501953, "learning_rate": 7.11422726215812e-08, "loss": 0.39023209, "memory(GiB)": 34.88, "step": 140240, "train_speed(iter/s)": 0.410423 }, { "acc": 0.9510251, "epoch": 3.7972815639130317, "grad_norm": 6.0095696449279785, "learning_rate": 7.104890013820631e-08, "loss": 0.26514695, "memory(GiB)": 34.88, "step": 140245, "train_speed(iter/s)": 0.410424 }, { "acc": 0.93505898, "epoch": 3.7974169441962475, "grad_norm": 6.815162658691406, "learning_rate": 7.09555894058928e-08, "loss": 0.35593438, "memory(GiB)": 34.88, "step": 140250, "train_speed(iter/s)": 0.410424 }, { "acc": 0.95488062, "epoch": 3.797552324479463, "grad_norm": 8.225774765014648, "learning_rate": 7.086234042580961e-08, "loss": 0.2703547, "memory(GiB)": 34.88, "step": 140255, "train_speed(iter/s)": 0.410425 }, { "acc": 0.93873625, "epoch": 3.7976877047626783, "grad_norm": 3.66414213180542, "learning_rate": 7.076915319912403e-08, "loss": 0.40053353, "memory(GiB)": 34.88, "step": 140260, "train_speed(iter/s)": 0.410426 }, { "acc": 0.94589844, "epoch": 3.797823085045894, "grad_norm": 7.401453018188477, "learning_rate": 7.0676027727005e-08, "loss": 0.3336031, "memory(GiB)": 34.88, "step": 140265, "train_speed(iter/s)": 0.410427 }, { "acc": 0.95496349, "epoch": 3.7979584653291094, "grad_norm": 4.741908550262451, "learning_rate": 7.058296401061816e-08, "loss": 0.3197819, "memory(GiB)": 34.88, "step": 140270, "train_speed(iter/s)": 0.410428 }, { "acc": 0.95052986, "epoch": 3.7980938456123248, "grad_norm": 14.702985763549805, "learning_rate": 7.04899620511291e-08, "loss": 0.2696661, "memory(GiB)": 34.88, "step": 140275, "train_speed(iter/s)": 0.410429 }, { "acc": 0.94027596, "epoch": 3.7982292258955406, "grad_norm": 14.962361335754395, "learning_rate": 7.039702184970401e-08, "loss": 0.33916647, "memory(GiB)": 34.88, "step": 140280, "train_speed(iter/s)": 0.410429 }, { "acc": 0.9486249, "epoch": 3.7983646061787564, "grad_norm": 4.466559886932373, "learning_rate": 7.030414340750573e-08, "loss": 0.32582488, "memory(GiB)": 34.88, "step": 140285, "train_speed(iter/s)": 0.41043 }, { "acc": 0.94404202, "epoch": 3.7984999864619717, "grad_norm": 6.95077657699585, "learning_rate": 7.021132672569877e-08, "loss": 0.33672395, "memory(GiB)": 34.88, "step": 140290, "train_speed(iter/s)": 0.410431 }, { "acc": 0.95081358, "epoch": 3.798635366745187, "grad_norm": 8.39244270324707, "learning_rate": 7.011857180544485e-08, "loss": 0.28126659, "memory(GiB)": 34.88, "step": 140295, "train_speed(iter/s)": 0.410432 }, { "acc": 0.94764748, "epoch": 3.798770747028403, "grad_norm": 4.245874881744385, "learning_rate": 7.00258786479068e-08, "loss": 0.26617863, "memory(GiB)": 34.88, "step": 140300, "train_speed(iter/s)": 0.410433 }, { "acc": 0.95372324, "epoch": 3.7989061273116183, "grad_norm": 8.61175537109375, "learning_rate": 6.993324725424528e-08, "loss": 0.3049613, "memory(GiB)": 34.88, "step": 140305, "train_speed(iter/s)": 0.410433 }, { "acc": 0.95142488, "epoch": 3.7990415075948336, "grad_norm": 7.83504056930542, "learning_rate": 6.984067762562145e-08, "loss": 0.27724795, "memory(GiB)": 34.88, "step": 140310, "train_speed(iter/s)": 0.410434 }, { "acc": 0.93492622, "epoch": 3.7991768878780494, "grad_norm": 9.795654296875, "learning_rate": 6.974816976319426e-08, "loss": 0.39405715, "memory(GiB)": 34.88, "step": 140315, "train_speed(iter/s)": 0.410435 }, { "acc": 0.94443274, "epoch": 3.799312268161265, "grad_norm": 5.724494934082031, "learning_rate": 6.965572366812212e-08, "loss": 0.37582107, "memory(GiB)": 34.88, "step": 140320, "train_speed(iter/s)": 0.410436 }, { "acc": 0.94735689, "epoch": 3.7994476484444806, "grad_norm": 6.907604217529297, "learning_rate": 6.9563339341564e-08, "loss": 0.29476037, "memory(GiB)": 34.88, "step": 140325, "train_speed(iter/s)": 0.410437 }, { "acc": 0.95742798, "epoch": 3.799583028727696, "grad_norm": 9.11475658416748, "learning_rate": 6.94710167846766e-08, "loss": 0.27919199, "memory(GiB)": 34.88, "step": 140330, "train_speed(iter/s)": 0.410437 }, { "acc": 0.94163971, "epoch": 3.7997184090109117, "grad_norm": 8.716707229614258, "learning_rate": 6.937875599861724e-08, "loss": 0.38675225, "memory(GiB)": 34.88, "step": 140335, "train_speed(iter/s)": 0.410438 }, { "acc": 0.95135756, "epoch": 3.799853789294127, "grad_norm": 15.717527389526367, "learning_rate": 6.928655698454042e-08, "loss": 0.28206916, "memory(GiB)": 34.88, "step": 140340, "train_speed(iter/s)": 0.410439 }, { "acc": 0.95039158, "epoch": 3.799989169577343, "grad_norm": 5.393442630767822, "learning_rate": 6.919441974360177e-08, "loss": 0.25721488, "memory(GiB)": 34.88, "step": 140345, "train_speed(iter/s)": 0.41044 }, { "acc": 0.9532959, "epoch": 3.8001245498605583, "grad_norm": 4.264318943023682, "learning_rate": 6.910234427695583e-08, "loss": 0.36075778, "memory(GiB)": 34.88, "step": 140350, "train_speed(iter/s)": 0.410441 }, { "acc": 0.9449173, "epoch": 3.800259930143774, "grad_norm": 4.269277572631836, "learning_rate": 6.901033058575599e-08, "loss": 0.3070528, "memory(GiB)": 34.88, "step": 140355, "train_speed(iter/s)": 0.410441 }, { "acc": 0.9576417, "epoch": 3.8003953104269894, "grad_norm": 9.07179069519043, "learning_rate": 6.891837867115453e-08, "loss": 0.30180807, "memory(GiB)": 34.88, "step": 140360, "train_speed(iter/s)": 0.410442 }, { "acc": 0.95901566, "epoch": 3.8005306907102048, "grad_norm": 4.489217758178711, "learning_rate": 6.882648853430266e-08, "loss": 0.22610724, "memory(GiB)": 34.88, "step": 140365, "train_speed(iter/s)": 0.410443 }, { "acc": 0.94841814, "epoch": 3.8006660709934206, "grad_norm": 7.463180065155029, "learning_rate": 6.873466017635322e-08, "loss": 0.30578256, "memory(GiB)": 34.88, "step": 140370, "train_speed(iter/s)": 0.410443 }, { "acc": 0.95587292, "epoch": 3.800801451276636, "grad_norm": 8.369826316833496, "learning_rate": 6.864289359845462e-08, "loss": 0.28558075, "memory(GiB)": 34.88, "step": 140375, "train_speed(iter/s)": 0.410444 }, { "acc": 0.95237007, "epoch": 3.8009368315598517, "grad_norm": 5.60436487197876, "learning_rate": 6.855118880175805e-08, "loss": 0.34633217, "memory(GiB)": 34.88, "step": 140380, "train_speed(iter/s)": 0.410445 }, { "acc": 0.95276814, "epoch": 3.801072211843067, "grad_norm": 3.4641060829162598, "learning_rate": 6.845954578741138e-08, "loss": 0.25768762, "memory(GiB)": 34.88, "step": 140385, "train_speed(iter/s)": 0.410446 }, { "acc": 0.94684801, "epoch": 3.801207592126283, "grad_norm": 4.200867652893066, "learning_rate": 6.836796455656299e-08, "loss": 0.35141487, "memory(GiB)": 34.88, "step": 140390, "train_speed(iter/s)": 0.410447 }, { "acc": 0.93159733, "epoch": 3.8013429724094983, "grad_norm": 7.259403228759766, "learning_rate": 6.827644511035964e-08, "loss": 0.4346796, "memory(GiB)": 34.88, "step": 140395, "train_speed(iter/s)": 0.410447 }, { "acc": 0.95053234, "epoch": 3.8014783526927136, "grad_norm": 5.421949863433838, "learning_rate": 6.818498744994809e-08, "loss": 0.2881249, "memory(GiB)": 34.88, "step": 140400, "train_speed(iter/s)": 0.410448 }, { "acc": 0.9443512, "epoch": 3.8016137329759294, "grad_norm": 7.021649360656738, "learning_rate": 6.809359157647506e-08, "loss": 0.33122766, "memory(GiB)": 34.88, "step": 140405, "train_speed(iter/s)": 0.410449 }, { "acc": 0.9460968, "epoch": 3.8017491132591448, "grad_norm": 6.8690505027771, "learning_rate": 6.800225749108343e-08, "loss": 0.29962294, "memory(GiB)": 34.88, "step": 140410, "train_speed(iter/s)": 0.41045 }, { "acc": 0.9544241, "epoch": 3.8018844935423606, "grad_norm": 54.25581359863281, "learning_rate": 6.791098519491827e-08, "loss": 0.28186486, "memory(GiB)": 34.88, "step": 140415, "train_speed(iter/s)": 0.41045 }, { "acc": 0.9550396, "epoch": 3.802019873825576, "grad_norm": 3.0022950172424316, "learning_rate": 6.781977468912357e-08, "loss": 0.27495642, "memory(GiB)": 34.88, "step": 140420, "train_speed(iter/s)": 0.410451 }, { "acc": 0.93382473, "epoch": 3.8021552541087917, "grad_norm": 6.423088073730469, "learning_rate": 6.772862597484105e-08, "loss": 0.39467235, "memory(GiB)": 34.88, "step": 140425, "train_speed(iter/s)": 0.410452 }, { "acc": 0.95613861, "epoch": 3.802290634392007, "grad_norm": 5.167703151702881, "learning_rate": 6.763753905321301e-08, "loss": 0.24619465, "memory(GiB)": 34.88, "step": 140430, "train_speed(iter/s)": 0.410453 }, { "acc": 0.94440355, "epoch": 3.8024260146752225, "grad_norm": 8.524041175842285, "learning_rate": 6.754651392538014e-08, "loss": 0.28565755, "memory(GiB)": 34.88, "step": 140435, "train_speed(iter/s)": 0.410453 }, { "acc": 0.94006996, "epoch": 3.8025613949584383, "grad_norm": 8.786620140075684, "learning_rate": 6.74555505924836e-08, "loss": 0.38661916, "memory(GiB)": 34.88, "step": 140440, "train_speed(iter/s)": 0.410454 }, { "acc": 0.94904156, "epoch": 3.802696775241654, "grad_norm": 8.518967628479004, "learning_rate": 6.736464905566181e-08, "loss": 0.29353387, "memory(GiB)": 34.88, "step": 140445, "train_speed(iter/s)": 0.410455 }, { "acc": 0.95145855, "epoch": 3.8028321555248694, "grad_norm": 5.592769145965576, "learning_rate": 6.727380931605375e-08, "loss": 0.27581558, "memory(GiB)": 34.88, "step": 140450, "train_speed(iter/s)": 0.410456 }, { "acc": 0.94029436, "epoch": 3.8029675358080848, "grad_norm": 5.511663913726807, "learning_rate": 6.718303137479729e-08, "loss": 0.35840178, "memory(GiB)": 34.88, "step": 140455, "train_speed(iter/s)": 0.410457 }, { "acc": 0.93864346, "epoch": 3.8031029160913006, "grad_norm": 10.504756927490234, "learning_rate": 6.709231523303029e-08, "loss": 0.41348619, "memory(GiB)": 34.88, "step": 140460, "train_speed(iter/s)": 0.410457 }, { "acc": 0.95069199, "epoch": 3.803238296374516, "grad_norm": 17.131288528442383, "learning_rate": 6.700166089188838e-08, "loss": 0.31525452, "memory(GiB)": 34.88, "step": 140465, "train_speed(iter/s)": 0.410458 }, { "acc": 0.94938011, "epoch": 3.8033736766577313, "grad_norm": 6.091982364654541, "learning_rate": 6.691106835250779e-08, "loss": 0.27014108, "memory(GiB)": 34.88, "step": 140470, "train_speed(iter/s)": 0.410459 }, { "acc": 0.95491238, "epoch": 3.803509056940947, "grad_norm": 8.407505989074707, "learning_rate": 6.682053761602247e-08, "loss": 0.27912056, "memory(GiB)": 34.88, "step": 140475, "train_speed(iter/s)": 0.41046 }, { "acc": 0.95438147, "epoch": 3.803644437224163, "grad_norm": 10.40861701965332, "learning_rate": 6.673006868356752e-08, "loss": 0.28408921, "memory(GiB)": 34.88, "step": 140480, "train_speed(iter/s)": 0.410461 }, { "acc": 0.94917526, "epoch": 3.8037798175073783, "grad_norm": 18.282930374145508, "learning_rate": 6.663966155627639e-08, "loss": 0.363112, "memory(GiB)": 34.88, "step": 140485, "train_speed(iter/s)": 0.410462 }, { "acc": 0.95077353, "epoch": 3.8039151977905936, "grad_norm": 8.594281196594238, "learning_rate": 6.654931623527968e-08, "loss": 0.24712338, "memory(GiB)": 34.88, "step": 140490, "train_speed(iter/s)": 0.410462 }, { "acc": 0.96274624, "epoch": 3.8040505780738094, "grad_norm": 2.7027881145477295, "learning_rate": 6.645903272171142e-08, "loss": 0.21153061, "memory(GiB)": 34.88, "step": 140495, "train_speed(iter/s)": 0.410463 }, { "acc": 0.9497777, "epoch": 3.8041859583570248, "grad_norm": 5.036363124847412, "learning_rate": 6.63688110167011e-08, "loss": 0.34518058, "memory(GiB)": 34.88, "step": 140500, "train_speed(iter/s)": 0.410464 }, { "acc": 0.96280794, "epoch": 3.8043213386402406, "grad_norm": 16.41851043701172, "learning_rate": 6.627865112137995e-08, "loss": 0.266804, "memory(GiB)": 34.88, "step": 140505, "train_speed(iter/s)": 0.410465 }, { "acc": 0.94654827, "epoch": 3.804456718923456, "grad_norm": 7.765513896942139, "learning_rate": 6.618855303687642e-08, "loss": 0.30195293, "memory(GiB)": 34.88, "step": 140510, "train_speed(iter/s)": 0.410466 }, { "acc": 0.94771929, "epoch": 3.8045920992066717, "grad_norm": 4.195955753326416, "learning_rate": 6.609851676432e-08, "loss": 0.30587182, "memory(GiB)": 34.88, "step": 140515, "train_speed(iter/s)": 0.410466 }, { "acc": 0.9439559, "epoch": 3.804727479489887, "grad_norm": 7.937224864959717, "learning_rate": 6.60085423048386e-08, "loss": 0.2973932, "memory(GiB)": 34.88, "step": 140520, "train_speed(iter/s)": 0.410467 }, { "acc": 0.93921337, "epoch": 3.8048628597731025, "grad_norm": 11.514406204223633, "learning_rate": 6.591862965955842e-08, "loss": 0.35041411, "memory(GiB)": 34.88, "step": 140525, "train_speed(iter/s)": 0.410468 }, { "acc": 0.94694881, "epoch": 3.8049982400563183, "grad_norm": 41.37419509887695, "learning_rate": 6.582877882960678e-08, "loss": 0.36210845, "memory(GiB)": 34.88, "step": 140530, "train_speed(iter/s)": 0.410469 }, { "acc": 0.92620392, "epoch": 3.8051336203395336, "grad_norm": 13.440600395202637, "learning_rate": 6.573898981610823e-08, "loss": 0.51414723, "memory(GiB)": 34.88, "step": 140535, "train_speed(iter/s)": 0.41047 }, { "acc": 0.9488492, "epoch": 3.8052690006227494, "grad_norm": 3.703535318374634, "learning_rate": 6.56492626201884e-08, "loss": 0.27446771, "memory(GiB)": 34.88, "step": 140540, "train_speed(iter/s)": 0.41047 }, { "acc": 0.943256, "epoch": 3.805404380905965, "grad_norm": 5.995489120483398, "learning_rate": 6.555959724297073e-08, "loss": 0.3984077, "memory(GiB)": 34.88, "step": 140545, "train_speed(iter/s)": 0.410471 }, { "acc": 0.94229202, "epoch": 3.8055397611891806, "grad_norm": 5.519130229949951, "learning_rate": 6.546999368557923e-08, "loss": 0.32516086, "memory(GiB)": 34.88, "step": 140550, "train_speed(iter/s)": 0.410472 }, { "acc": 0.9549181, "epoch": 3.805675141472396, "grad_norm": 4.469600677490234, "learning_rate": 6.538045194913566e-08, "loss": 0.25727024, "memory(GiB)": 34.88, "step": 140555, "train_speed(iter/s)": 0.410473 }, { "acc": 0.94851341, "epoch": 3.8058105217556113, "grad_norm": 6.246426582336426, "learning_rate": 6.529097203476177e-08, "loss": 0.40724487, "memory(GiB)": 34.88, "step": 140560, "train_speed(iter/s)": 0.410474 }, { "acc": 0.94770937, "epoch": 3.805945902038827, "grad_norm": 2.7550063133239746, "learning_rate": 6.520155394357879e-08, "loss": 0.29883397, "memory(GiB)": 34.88, "step": 140565, "train_speed(iter/s)": 0.410474 }, { "acc": 0.94322643, "epoch": 3.8060812823220425, "grad_norm": 7.835140705108643, "learning_rate": 6.511219767670682e-08, "loss": 0.36287277, "memory(GiB)": 34.88, "step": 140570, "train_speed(iter/s)": 0.410475 }, { "acc": 0.93176785, "epoch": 3.8062166626052583, "grad_norm": 11.382162094116211, "learning_rate": 6.502290323526485e-08, "loss": 0.42164454, "memory(GiB)": 34.88, "step": 140575, "train_speed(iter/s)": 0.410476 }, { "acc": 0.95013208, "epoch": 3.8063520428884736, "grad_norm": 7.6084980964660645, "learning_rate": 6.493367062037188e-08, "loss": 0.33330617, "memory(GiB)": 34.88, "step": 140580, "train_speed(iter/s)": 0.410477 }, { "acc": 0.96202888, "epoch": 3.8064874231716894, "grad_norm": 7.348428249359131, "learning_rate": 6.484449983314578e-08, "loss": 0.27396145, "memory(GiB)": 34.88, "step": 140585, "train_speed(iter/s)": 0.410477 }, { "acc": 0.94769478, "epoch": 3.806622803454905, "grad_norm": 4.680403232574463, "learning_rate": 6.475539087470277e-08, "loss": 0.34477966, "memory(GiB)": 34.88, "step": 140590, "train_speed(iter/s)": 0.410478 }, { "acc": 0.95697012, "epoch": 3.80675818373812, "grad_norm": 2.6198501586914062, "learning_rate": 6.46663437461602e-08, "loss": 0.31121461, "memory(GiB)": 34.88, "step": 140595, "train_speed(iter/s)": 0.410479 }, { "acc": 0.95194092, "epoch": 3.806893564021336, "grad_norm": 9.974705696105957, "learning_rate": 6.457735844863259e-08, "loss": 0.27913787, "memory(GiB)": 34.88, "step": 140600, "train_speed(iter/s)": 0.41048 }, { "acc": 0.94890137, "epoch": 3.8070289443045517, "grad_norm": 9.788949012756348, "learning_rate": 6.448843498323562e-08, "loss": 0.28470874, "memory(GiB)": 34.88, "step": 140605, "train_speed(iter/s)": 0.41048 }, { "acc": 0.94222517, "epoch": 3.807164324587767, "grad_norm": 6.921773433685303, "learning_rate": 6.439957335108272e-08, "loss": 0.37749591, "memory(GiB)": 34.88, "step": 140610, "train_speed(iter/s)": 0.410481 }, { "acc": 0.95308933, "epoch": 3.8072997048709825, "grad_norm": 4.025513648986816, "learning_rate": 6.43107735532868e-08, "loss": 0.29677739, "memory(GiB)": 34.88, "step": 140615, "train_speed(iter/s)": 0.410482 }, { "acc": 0.93432961, "epoch": 3.8074350851541983, "grad_norm": 6.652773857116699, "learning_rate": 6.422203559096074e-08, "loss": 0.38739383, "memory(GiB)": 34.88, "step": 140620, "train_speed(iter/s)": 0.410483 }, { "acc": 0.9452589, "epoch": 3.8075704654374136, "grad_norm": 9.504668235778809, "learning_rate": 6.413335946521575e-08, "loss": 0.38079131, "memory(GiB)": 34.88, "step": 140625, "train_speed(iter/s)": 0.410484 }, { "acc": 0.95027685, "epoch": 3.807705845720629, "grad_norm": 6.9835944175720215, "learning_rate": 6.404474517716306e-08, "loss": 0.34423923, "memory(GiB)": 34.88, "step": 140630, "train_speed(iter/s)": 0.410484 }, { "acc": 0.95048809, "epoch": 3.807841226003845, "grad_norm": 4.879202842712402, "learning_rate": 6.395619272791222e-08, "loss": 0.25947423, "memory(GiB)": 34.88, "step": 140635, "train_speed(iter/s)": 0.410485 }, { "acc": 0.96218567, "epoch": 3.8079766062870606, "grad_norm": 5.512003421783447, "learning_rate": 6.386770211857335e-08, "loss": 0.21256561, "memory(GiB)": 34.88, "step": 140640, "train_speed(iter/s)": 0.410486 }, { "acc": 0.93664856, "epoch": 3.808111986570276, "grad_norm": 8.275047302246094, "learning_rate": 6.377927335025435e-08, "loss": 0.40069814, "memory(GiB)": 34.88, "step": 140645, "train_speed(iter/s)": 0.410487 }, { "acc": 0.94132252, "epoch": 3.8082473668534913, "grad_norm": 7.825107574462891, "learning_rate": 6.369090642406253e-08, "loss": 0.38270383, "memory(GiB)": 34.88, "step": 140650, "train_speed(iter/s)": 0.410487 }, { "acc": 0.93908596, "epoch": 3.808382747136707, "grad_norm": 9.615514755249023, "learning_rate": 6.360260134110636e-08, "loss": 0.37248948, "memory(GiB)": 34.88, "step": 140655, "train_speed(iter/s)": 0.410488 }, { "acc": 0.93302116, "epoch": 3.8085181274199225, "grad_norm": 7.508790969848633, "learning_rate": 6.35143581024904e-08, "loss": 0.44029775, "memory(GiB)": 34.88, "step": 140660, "train_speed(iter/s)": 0.410489 }, { "acc": 0.9474699, "epoch": 3.8086535077031383, "grad_norm": 12.005071640014648, "learning_rate": 6.342617670932087e-08, "loss": 0.30807116, "memory(GiB)": 34.88, "step": 140665, "train_speed(iter/s)": 0.41049 }, { "acc": 0.9619566, "epoch": 3.8087888879863536, "grad_norm": 8.580423355102539, "learning_rate": 6.333805716270233e-08, "loss": 0.24644208, "memory(GiB)": 34.88, "step": 140670, "train_speed(iter/s)": 0.410491 }, { "acc": 0.92752705, "epoch": 3.8089242682695694, "grad_norm": 4.417926788330078, "learning_rate": 6.324999946373878e-08, "loss": 0.46865587, "memory(GiB)": 34.88, "step": 140675, "train_speed(iter/s)": 0.410492 }, { "acc": 0.94437304, "epoch": 3.809059648552785, "grad_norm": 5.474340915679932, "learning_rate": 6.316200361353314e-08, "loss": 0.36503546, "memory(GiB)": 34.88, "step": 140680, "train_speed(iter/s)": 0.410492 }, { "acc": 0.94815369, "epoch": 3.809195028836, "grad_norm": 17.364458084106445, "learning_rate": 6.307406961318773e-08, "loss": 0.30421486, "memory(GiB)": 34.88, "step": 140685, "train_speed(iter/s)": 0.410493 }, { "acc": 0.95255337, "epoch": 3.809330409119216, "grad_norm": 8.862824440002441, "learning_rate": 6.298619746380434e-08, "loss": 0.298401, "memory(GiB)": 34.88, "step": 140690, "train_speed(iter/s)": 0.410494 }, { "acc": 0.95074024, "epoch": 3.8094657894024313, "grad_norm": 6.964070796966553, "learning_rate": 6.289838716648308e-08, "loss": 0.29708638, "memory(GiB)": 34.88, "step": 140695, "train_speed(iter/s)": 0.410495 }, { "acc": 0.95313644, "epoch": 3.809601169685647, "grad_norm": 12.275583267211914, "learning_rate": 6.281063872232466e-08, "loss": 0.29780679, "memory(GiB)": 34.88, "step": 140700, "train_speed(iter/s)": 0.410496 }, { "acc": 0.94429569, "epoch": 3.8097365499688625, "grad_norm": 4.9129838943481445, "learning_rate": 6.272295213242807e-08, "loss": 0.37935181, "memory(GiB)": 34.88, "step": 140705, "train_speed(iter/s)": 0.410496 }, { "acc": 0.95144615, "epoch": 3.8098719302520783, "grad_norm": 5.029883861541748, "learning_rate": 6.263532739789177e-08, "loss": 0.33066902, "memory(GiB)": 34.88, "step": 140710, "train_speed(iter/s)": 0.410497 }, { "acc": 0.95836849, "epoch": 3.8100073105352936, "grad_norm": 5.4547529220581055, "learning_rate": 6.254776451981308e-08, "loss": 0.21901855, "memory(GiB)": 34.88, "step": 140715, "train_speed(iter/s)": 0.410498 }, { "acc": 0.94911671, "epoch": 3.810142690818509, "grad_norm": 6.7087836265563965, "learning_rate": 6.246026349928939e-08, "loss": 0.34038987, "memory(GiB)": 34.88, "step": 140720, "train_speed(iter/s)": 0.410499 }, { "acc": 0.95721121, "epoch": 3.810278071101725, "grad_norm": 4.230130195617676, "learning_rate": 6.237282433741635e-08, "loss": 0.26346579, "memory(GiB)": 34.88, "step": 140725, "train_speed(iter/s)": 0.410499 }, { "acc": 0.95558224, "epoch": 3.81041345138494, "grad_norm": 7.6880879402160645, "learning_rate": 6.22854470352902e-08, "loss": 0.26578934, "memory(GiB)": 34.88, "step": 140730, "train_speed(iter/s)": 0.4105 }, { "acc": 0.94129181, "epoch": 3.810548831668156, "grad_norm": 25.361122131347656, "learning_rate": 6.21981315940044e-08, "loss": 0.3705394, "memory(GiB)": 34.88, "step": 140735, "train_speed(iter/s)": 0.410501 }, { "acc": 0.95370998, "epoch": 3.8106842119513713, "grad_norm": 6.299163341522217, "learning_rate": 6.211087801465298e-08, "loss": 0.32563868, "memory(GiB)": 34.88, "step": 140740, "train_speed(iter/s)": 0.410502 }, { "acc": 0.95282898, "epoch": 3.810819592234587, "grad_norm": 3.862856149673462, "learning_rate": 6.202368629832994e-08, "loss": 0.29624488, "memory(GiB)": 34.88, "step": 140745, "train_speed(iter/s)": 0.410503 }, { "acc": 0.94154768, "epoch": 3.8109549725178025, "grad_norm": 5.178787708282471, "learning_rate": 6.193655644612653e-08, "loss": 0.36458609, "memory(GiB)": 34.88, "step": 140750, "train_speed(iter/s)": 0.410503 }, { "acc": 0.95130672, "epoch": 3.811090352801018, "grad_norm": 7.528047561645508, "learning_rate": 6.184948845913454e-08, "loss": 0.31952729, "memory(GiB)": 34.88, "step": 140755, "train_speed(iter/s)": 0.410504 }, { "acc": 0.96200886, "epoch": 3.8112257330842336, "grad_norm": 3.8247923851013184, "learning_rate": 6.17624823384441e-08, "loss": 0.20448384, "memory(GiB)": 34.88, "step": 140760, "train_speed(iter/s)": 0.410505 }, { "acc": 0.93860455, "epoch": 3.8113611133674494, "grad_norm": 3.524763345718384, "learning_rate": 6.167553808514645e-08, "loss": 0.38993363, "memory(GiB)": 34.88, "step": 140765, "train_speed(iter/s)": 0.410506 }, { "acc": 0.94670897, "epoch": 3.811496493650665, "grad_norm": 13.84369945526123, "learning_rate": 6.158865570032949e-08, "loss": 0.36356905, "memory(GiB)": 34.88, "step": 140770, "train_speed(iter/s)": 0.410507 }, { "acc": 0.94475288, "epoch": 3.81163187393388, "grad_norm": 4.019561767578125, "learning_rate": 6.150183518508226e-08, "loss": 0.36874185, "memory(GiB)": 34.88, "step": 140775, "train_speed(iter/s)": 0.410507 }, { "acc": 0.94796476, "epoch": 3.811767254217096, "grad_norm": 11.231819152832031, "learning_rate": 6.141507654049266e-08, "loss": 0.30241642, "memory(GiB)": 34.88, "step": 140780, "train_speed(iter/s)": 0.410508 }, { "acc": 0.94708996, "epoch": 3.8119026345003113, "grad_norm": 6.434581279754639, "learning_rate": 6.132837976764638e-08, "loss": 0.34227443, "memory(GiB)": 34.88, "step": 140785, "train_speed(iter/s)": 0.410509 }, { "acc": 0.96186104, "epoch": 3.8120380147835267, "grad_norm": 3.1720917224884033, "learning_rate": 6.124174486762966e-08, "loss": 0.25445895, "memory(GiB)": 34.88, "step": 140790, "train_speed(iter/s)": 0.41051 }, { "acc": 0.94613256, "epoch": 3.8121733950667425, "grad_norm": 11.734065055847168, "learning_rate": 6.115517184152875e-08, "loss": 0.35363624, "memory(GiB)": 34.88, "step": 140795, "train_speed(iter/s)": 0.410511 }, { "acc": 0.94330444, "epoch": 3.8123087753499583, "grad_norm": 6.2638468742370605, "learning_rate": 6.106866069042767e-08, "loss": 0.34612656, "memory(GiB)": 34.88, "step": 140800, "train_speed(iter/s)": 0.410512 }, { "acc": 0.93226624, "epoch": 3.8124441556331736, "grad_norm": 6.316091060638428, "learning_rate": 6.098221141540989e-08, "loss": 0.36473818, "memory(GiB)": 34.88, "step": 140805, "train_speed(iter/s)": 0.410512 }, { "acc": 0.94181261, "epoch": 3.812579535916389, "grad_norm": 6.177065372467041, "learning_rate": 6.089582401755889e-08, "loss": 0.37082531, "memory(GiB)": 34.88, "step": 140810, "train_speed(iter/s)": 0.410513 }, { "acc": 0.96778431, "epoch": 3.812714916199605, "grad_norm": 1.185045599937439, "learning_rate": 6.08094984979559e-08, "loss": 0.21269867, "memory(GiB)": 34.88, "step": 140815, "train_speed(iter/s)": 0.410514 }, { "acc": 0.95718479, "epoch": 3.81285029648282, "grad_norm": 4.138547897338867, "learning_rate": 6.072323485768385e-08, "loss": 0.27906497, "memory(GiB)": 34.88, "step": 140820, "train_speed(iter/s)": 0.410515 }, { "acc": 0.94168482, "epoch": 3.812985676766036, "grad_norm": 16.604625701904297, "learning_rate": 6.063703309782176e-08, "loss": 0.38435154, "memory(GiB)": 34.88, "step": 140825, "train_speed(iter/s)": 0.410515 }, { "acc": 0.95781403, "epoch": 3.8131210570492513, "grad_norm": 21.489625930786133, "learning_rate": 6.055089321945033e-08, "loss": 0.28259807, "memory(GiB)": 34.88, "step": 140830, "train_speed(iter/s)": 0.410516 }, { "acc": 0.93454361, "epoch": 3.813256437332467, "grad_norm": 13.170011520385742, "learning_rate": 6.046481522364859e-08, "loss": 0.47772379, "memory(GiB)": 34.88, "step": 140835, "train_speed(iter/s)": 0.410517 }, { "acc": 0.94218636, "epoch": 3.8133918176156825, "grad_norm": 5.262960433959961, "learning_rate": 6.03787991114939e-08, "loss": 0.32744176, "memory(GiB)": 34.88, "step": 140840, "train_speed(iter/s)": 0.410518 }, { "acc": 0.94613914, "epoch": 3.813527197898898, "grad_norm": 5.267794609069824, "learning_rate": 6.029284488406529e-08, "loss": 0.33035407, "memory(GiB)": 34.88, "step": 140845, "train_speed(iter/s)": 0.410518 }, { "acc": 0.95515594, "epoch": 3.8136625781821136, "grad_norm": 14.429032325744629, "learning_rate": 6.020695254243846e-08, "loss": 0.3093437, "memory(GiB)": 34.88, "step": 140850, "train_speed(iter/s)": 0.410519 }, { "acc": 0.92707748, "epoch": 3.813797958465329, "grad_norm": 7.334653854370117, "learning_rate": 6.01211220876902e-08, "loss": 0.47142954, "memory(GiB)": 34.88, "step": 140855, "train_speed(iter/s)": 0.41052 }, { "acc": 0.94541864, "epoch": 3.813933338748545, "grad_norm": 6.487386703491211, "learning_rate": 6.003535352089458e-08, "loss": 0.3356122, "memory(GiB)": 34.88, "step": 140860, "train_speed(iter/s)": 0.410521 }, { "acc": 0.95390167, "epoch": 3.81406871903176, "grad_norm": 3.555563449859619, "learning_rate": 5.994964684312727e-08, "loss": 0.22796028, "memory(GiB)": 34.88, "step": 140865, "train_speed(iter/s)": 0.410522 }, { "acc": 0.95135136, "epoch": 3.814204099314976, "grad_norm": 5.279654502868652, "learning_rate": 5.98640020554612e-08, "loss": 0.26172132, "memory(GiB)": 34.88, "step": 140870, "train_speed(iter/s)": 0.410523 }, { "acc": 0.95179863, "epoch": 3.8143394795981913, "grad_norm": 6.4338698387146, "learning_rate": 5.977841915896874e-08, "loss": 0.31748424, "memory(GiB)": 34.88, "step": 140875, "train_speed(iter/s)": 0.410523 }, { "acc": 0.95664272, "epoch": 3.8144748598814067, "grad_norm": 6.115260124206543, "learning_rate": 5.969289815472281e-08, "loss": 0.24264529, "memory(GiB)": 34.88, "step": 140880, "train_speed(iter/s)": 0.410524 }, { "acc": 0.95607986, "epoch": 3.8146102401646225, "grad_norm": 5.326108455657959, "learning_rate": 5.960743904379468e-08, "loss": 0.22996111, "memory(GiB)": 34.88, "step": 140885, "train_speed(iter/s)": 0.410525 }, { "acc": 0.94724751, "epoch": 3.814745620447838, "grad_norm": 7.331274032592773, "learning_rate": 5.952204182725504e-08, "loss": 0.3535449, "memory(GiB)": 34.88, "step": 140890, "train_speed(iter/s)": 0.410526 }, { "acc": 0.95233479, "epoch": 3.8148810007310536, "grad_norm": 5.623767852783203, "learning_rate": 5.9436706506172374e-08, "loss": 0.29457901, "memory(GiB)": 34.88, "step": 140895, "train_speed(iter/s)": 0.410526 }, { "acc": 0.9510952, "epoch": 3.815016381014269, "grad_norm": 6.412258625030518, "learning_rate": 5.935143308161739e-08, "loss": 0.31045585, "memory(GiB)": 34.88, "step": 140900, "train_speed(iter/s)": 0.410527 }, { "acc": 0.94747562, "epoch": 3.815151761297485, "grad_norm": 5.578952789306641, "learning_rate": 5.926622155465801e-08, "loss": 0.28845413, "memory(GiB)": 34.88, "step": 140905, "train_speed(iter/s)": 0.410528 }, { "acc": 0.94641438, "epoch": 3.8152871415807, "grad_norm": 8.425430297851562, "learning_rate": 5.918107192635995e-08, "loss": 0.27027903, "memory(GiB)": 34.88, "step": 140910, "train_speed(iter/s)": 0.410529 }, { "acc": 0.95376015, "epoch": 3.8154225218639155, "grad_norm": 6.498802185058594, "learning_rate": 5.909598419779224e-08, "loss": 0.22319262, "memory(GiB)": 34.88, "step": 140915, "train_speed(iter/s)": 0.410529 }, { "acc": 0.94921579, "epoch": 3.8155579021471313, "grad_norm": 6.168856143951416, "learning_rate": 5.901095837001892e-08, "loss": 0.28384242, "memory(GiB)": 34.88, "step": 140920, "train_speed(iter/s)": 0.41053 }, { "acc": 0.95846004, "epoch": 3.815693282430347, "grad_norm": 3.0529847145080566, "learning_rate": 5.892599444410627e-08, "loss": 0.25092049, "memory(GiB)": 34.88, "step": 140925, "train_speed(iter/s)": 0.410531 }, { "acc": 0.96613493, "epoch": 3.8158286627135625, "grad_norm": 14.252816200256348, "learning_rate": 5.8841092421117756e-08, "loss": 0.24331055, "memory(GiB)": 34.88, "step": 140930, "train_speed(iter/s)": 0.410532 }, { "acc": 0.95416965, "epoch": 3.815964042996778, "grad_norm": 6.414220333099365, "learning_rate": 5.8756252302117986e-08, "loss": 0.28022952, "memory(GiB)": 34.88, "step": 140935, "train_speed(iter/s)": 0.410532 }, { "acc": 0.93998985, "epoch": 3.8160994232799936, "grad_norm": 14.97608757019043, "learning_rate": 5.867147408816878e-08, "loss": 0.38614635, "memory(GiB)": 34.88, "step": 140940, "train_speed(iter/s)": 0.410533 }, { "acc": 0.94490738, "epoch": 3.816234803563209, "grad_norm": 5.147488594055176, "learning_rate": 5.8586757780332516e-08, "loss": 0.37360935, "memory(GiB)": 34.88, "step": 140945, "train_speed(iter/s)": 0.410534 }, { "acc": 0.94364176, "epoch": 3.8163701838464243, "grad_norm": 10.786177635192871, "learning_rate": 5.8502103379671024e-08, "loss": 0.34634824, "memory(GiB)": 34.88, "step": 140950, "train_speed(iter/s)": 0.410535 }, { "acc": 0.93284435, "epoch": 3.81650556412964, "grad_norm": 9.424530982971191, "learning_rate": 5.8417510887243894e-08, "loss": 0.45680447, "memory(GiB)": 34.88, "step": 140955, "train_speed(iter/s)": 0.410535 }, { "acc": 0.95065441, "epoch": 3.816640944412856, "grad_norm": 9.148509979248047, "learning_rate": 5.8332980304110726e-08, "loss": 0.28056831, "memory(GiB)": 34.88, "step": 140960, "train_speed(iter/s)": 0.410536 }, { "acc": 0.94178791, "epoch": 3.8167763246960713, "grad_norm": 6.099749565124512, "learning_rate": 5.824851163133113e-08, "loss": 0.40035195, "memory(GiB)": 34.88, "step": 140965, "train_speed(iter/s)": 0.410537 }, { "acc": 0.96455517, "epoch": 3.8169117049792867, "grad_norm": 4.599610805511475, "learning_rate": 5.816410486996304e-08, "loss": 0.23054132, "memory(GiB)": 34.88, "step": 140970, "train_speed(iter/s)": 0.410538 }, { "acc": 0.94789324, "epoch": 3.8170470852625025, "grad_norm": 4.400567531585693, "learning_rate": 5.807976002106329e-08, "loss": 0.29236226, "memory(GiB)": 34.88, "step": 140975, "train_speed(iter/s)": 0.410539 }, { "acc": 0.94760151, "epoch": 3.817182465545718, "grad_norm": 8.151190757751465, "learning_rate": 5.799547708568925e-08, "loss": 0.34239247, "memory(GiB)": 34.88, "step": 140980, "train_speed(iter/s)": 0.410539 }, { "acc": 0.95145731, "epoch": 3.8173178458289336, "grad_norm": 7.4871392250061035, "learning_rate": 5.791125606489664e-08, "loss": 0.28567529, "memory(GiB)": 34.88, "step": 140985, "train_speed(iter/s)": 0.41054 }, { "acc": 0.95156937, "epoch": 3.817453226112149, "grad_norm": 10.412261962890625, "learning_rate": 5.782709695973951e-08, "loss": 0.33204694, "memory(GiB)": 34.88, "step": 140990, "train_speed(iter/s)": 0.410541 }, { "acc": 0.9594759, "epoch": 3.817588606395365, "grad_norm": 8.073508262634277, "learning_rate": 5.7742999771273585e-08, "loss": 0.2490932, "memory(GiB)": 34.88, "step": 140995, "train_speed(iter/s)": 0.410542 }, { "acc": 0.95062542, "epoch": 3.81772398667858, "grad_norm": 2.3083200454711914, "learning_rate": 5.765896450055069e-08, "loss": 0.35150268, "memory(GiB)": 34.88, "step": 141000, "train_speed(iter/s)": 0.410542 }, { "acc": 0.9733799, "epoch": 3.8178593669617955, "grad_norm": 1.9608484506607056, "learning_rate": 5.757499114862487e-08, "loss": 0.15559919, "memory(GiB)": 34.88, "step": 141005, "train_speed(iter/s)": 0.410543 }, { "acc": 0.94930172, "epoch": 3.8179947472450113, "grad_norm": 2.2270069122314453, "learning_rate": 5.749107971654741e-08, "loss": 0.30800171, "memory(GiB)": 34.88, "step": 141010, "train_speed(iter/s)": 0.410544 }, { "acc": 0.94874115, "epoch": 3.8181301275282267, "grad_norm": 13.459336280822754, "learning_rate": 5.740723020537015e-08, "loss": 0.3325114, "memory(GiB)": 34.88, "step": 141015, "train_speed(iter/s)": 0.410544 }, { "acc": 0.95613089, "epoch": 3.8182655078114425, "grad_norm": 6.1963725090026855, "learning_rate": 5.7323442616142676e-08, "loss": 0.26876464, "memory(GiB)": 34.88, "step": 141020, "train_speed(iter/s)": 0.410545 }, { "acc": 0.94278784, "epoch": 3.818400888094658, "grad_norm": 4.816529750823975, "learning_rate": 5.7239716949915174e-08, "loss": 0.33258991, "memory(GiB)": 34.88, "step": 141025, "train_speed(iter/s)": 0.410546 }, { "acc": 0.94849224, "epoch": 3.8185362683778736, "grad_norm": 4.612685203552246, "learning_rate": 5.7156053207736133e-08, "loss": 0.29814391, "memory(GiB)": 34.88, "step": 141030, "train_speed(iter/s)": 0.410547 }, { "acc": 0.95551395, "epoch": 3.818671648661089, "grad_norm": 6.159861087799072, "learning_rate": 5.7072451390652956e-08, "loss": 0.29636869, "memory(GiB)": 34.88, "step": 141035, "train_speed(iter/s)": 0.410548 }, { "acc": 0.95460701, "epoch": 3.8188070289443043, "grad_norm": 10.750658988952637, "learning_rate": 5.6988911499714694e-08, "loss": 0.29316635, "memory(GiB)": 34.88, "step": 141040, "train_speed(iter/s)": 0.410548 }, { "acc": 0.9537941, "epoch": 3.81894240922752, "grad_norm": 3.992952823638916, "learning_rate": 5.690543353596651e-08, "loss": 0.27307122, "memory(GiB)": 34.88, "step": 141045, "train_speed(iter/s)": 0.410549 }, { "acc": 0.95566893, "epoch": 3.8190777895107355, "grad_norm": 6.139481067657471, "learning_rate": 5.682201750045413e-08, "loss": 0.27379227, "memory(GiB)": 34.88, "step": 141050, "train_speed(iter/s)": 0.41055 }, { "acc": 0.94549713, "epoch": 3.8192131697939513, "grad_norm": 9.204462051391602, "learning_rate": 5.673866339422274e-08, "loss": 0.328002, "memory(GiB)": 34.88, "step": 141055, "train_speed(iter/s)": 0.410551 }, { "acc": 0.95026426, "epoch": 3.8193485500771667, "grad_norm": 9.373745918273926, "learning_rate": 5.6655371218316935e-08, "loss": 0.27398069, "memory(GiB)": 34.88, "step": 141060, "train_speed(iter/s)": 0.410551 }, { "acc": 0.94829044, "epoch": 3.8194839303603825, "grad_norm": 4.996529579162598, "learning_rate": 5.657214097377912e-08, "loss": 0.3165257, "memory(GiB)": 34.88, "step": 141065, "train_speed(iter/s)": 0.410552 }, { "acc": 0.94859295, "epoch": 3.819619310643598, "grad_norm": 7.579776287078857, "learning_rate": 5.648897266165337e-08, "loss": 0.34321961, "memory(GiB)": 34.88, "step": 141070, "train_speed(iter/s)": 0.410553 }, { "acc": 0.93539486, "epoch": 3.819754690926813, "grad_norm": 10.445947647094727, "learning_rate": 5.6405866282980386e-08, "loss": 0.43288674, "memory(GiB)": 34.88, "step": 141075, "train_speed(iter/s)": 0.410553 }, { "acc": 0.93515987, "epoch": 3.819890071210029, "grad_norm": 13.963226318359375, "learning_rate": 5.632282183880147e-08, "loss": 0.39330959, "memory(GiB)": 34.88, "step": 141080, "train_speed(iter/s)": 0.410554 }, { "acc": 0.96068964, "epoch": 3.820025451493245, "grad_norm": 5.304033279418945, "learning_rate": 5.623983933015679e-08, "loss": 0.25138502, "memory(GiB)": 34.88, "step": 141085, "train_speed(iter/s)": 0.410555 }, { "acc": 0.95496502, "epoch": 3.82016083177646, "grad_norm": 4.6325531005859375, "learning_rate": 5.6156918758085976e-08, "loss": 0.3045584, "memory(GiB)": 34.88, "step": 141090, "train_speed(iter/s)": 0.410556 }, { "acc": 0.94402485, "epoch": 3.8202962120596755, "grad_norm": 10.72393798828125, "learning_rate": 5.607406012362864e-08, "loss": 0.35132539, "memory(GiB)": 34.88, "step": 141095, "train_speed(iter/s)": 0.410556 }, { "acc": 0.94275141, "epoch": 3.8204315923428913, "grad_norm": 4.991587162017822, "learning_rate": 5.5991263427821085e-08, "loss": 0.38101158, "memory(GiB)": 34.88, "step": 141100, "train_speed(iter/s)": 0.410557 }, { "acc": 0.94637642, "epoch": 3.8205669726261067, "grad_norm": 9.702062606811523, "learning_rate": 5.5908528671701795e-08, "loss": 0.31711183, "memory(GiB)": 34.88, "step": 141105, "train_speed(iter/s)": 0.410558 }, { "acc": 0.9447504, "epoch": 3.820702352909322, "grad_norm": 6.796533107757568, "learning_rate": 5.5825855856307074e-08, "loss": 0.29233761, "memory(GiB)": 34.88, "step": 141110, "train_speed(iter/s)": 0.410559 }, { "acc": 0.93823643, "epoch": 3.820837733192538, "grad_norm": 12.45954418182373, "learning_rate": 5.5743244982672115e-08, "loss": 0.46293736, "memory(GiB)": 34.88, "step": 141115, "train_speed(iter/s)": 0.410559 }, { "acc": 0.93661175, "epoch": 3.8209731134757536, "grad_norm": 5.161795139312744, "learning_rate": 5.566069605183207e-08, "loss": 0.43364658, "memory(GiB)": 34.88, "step": 141120, "train_speed(iter/s)": 0.41056 }, { "acc": 0.95180588, "epoch": 3.821108493758969, "grad_norm": 8.647345542907715, "learning_rate": 5.557820906482047e-08, "loss": 0.34006617, "memory(GiB)": 34.88, "step": 141125, "train_speed(iter/s)": 0.410561 }, { "acc": 0.94218979, "epoch": 3.8212438740421844, "grad_norm": 6.02755880355835, "learning_rate": 5.5495784022671386e-08, "loss": 0.34820073, "memory(GiB)": 34.88, "step": 141130, "train_speed(iter/s)": 0.410561 }, { "acc": 0.94821815, "epoch": 3.8213792543254, "grad_norm": 4.034059047698975, "learning_rate": 5.541342092641666e-08, "loss": 0.28242857, "memory(GiB)": 34.88, "step": 141135, "train_speed(iter/s)": 0.410562 }, { "acc": 0.95777454, "epoch": 3.8215146346086155, "grad_norm": 5.8361334800720215, "learning_rate": 5.5331119777088695e-08, "loss": 0.24915528, "memory(GiB)": 34.88, "step": 141140, "train_speed(iter/s)": 0.410563 }, { "acc": 0.9600193, "epoch": 3.8216500148918313, "grad_norm": 7.590743541717529, "learning_rate": 5.524888057571768e-08, "loss": 0.20748811, "memory(GiB)": 34.88, "step": 141145, "train_speed(iter/s)": 0.410564 }, { "acc": 0.94951229, "epoch": 3.8217853951750467, "grad_norm": 7.3548994064331055, "learning_rate": 5.5166703323335467e-08, "loss": 0.34091997, "memory(GiB)": 34.88, "step": 141150, "train_speed(iter/s)": 0.410565 }, { "acc": 0.94914989, "epoch": 3.8219207754582625, "grad_norm": 6.701798439025879, "learning_rate": 5.508458802097002e-08, "loss": 0.28531699, "memory(GiB)": 34.88, "step": 141155, "train_speed(iter/s)": 0.410565 }, { "acc": 0.95725145, "epoch": 3.822056155741478, "grad_norm": 20.644794464111328, "learning_rate": 5.500253466964929e-08, "loss": 0.24094591, "memory(GiB)": 34.88, "step": 141160, "train_speed(iter/s)": 0.410566 }, { "acc": 0.96019115, "epoch": 3.822191536024693, "grad_norm": 2.88283109664917, "learning_rate": 5.4920543270402916e-08, "loss": 0.23818874, "memory(GiB)": 34.88, "step": 141165, "train_speed(iter/s)": 0.410567 }, { "acc": 0.94784222, "epoch": 3.822326916307909, "grad_norm": 6.380087375640869, "learning_rate": 5.4838613824257195e-08, "loss": 0.30506916, "memory(GiB)": 34.88, "step": 141170, "train_speed(iter/s)": 0.410568 }, { "acc": 0.93050327, "epoch": 3.8224622965911244, "grad_norm": 29.269174575805664, "learning_rate": 5.475674633223898e-08, "loss": 0.45220318, "memory(GiB)": 34.88, "step": 141175, "train_speed(iter/s)": 0.410568 }, { "acc": 0.96538563, "epoch": 3.82259767687434, "grad_norm": 3.7924129962921143, "learning_rate": 5.46749407953729e-08, "loss": 0.17860587, "memory(GiB)": 34.88, "step": 141180, "train_speed(iter/s)": 0.410569 }, { "acc": 0.9573452, "epoch": 3.8227330571575555, "grad_norm": 3.418865442276001, "learning_rate": 5.4593197214684714e-08, "loss": 0.24926562, "memory(GiB)": 34.88, "step": 141185, "train_speed(iter/s)": 0.41057 }, { "acc": 0.95302134, "epoch": 3.8228684374407713, "grad_norm": 5.873898506164551, "learning_rate": 5.4511515591197365e-08, "loss": 0.26230578, "memory(GiB)": 34.88, "step": 141190, "train_speed(iter/s)": 0.410571 }, { "acc": 0.93133602, "epoch": 3.8230038177239867, "grad_norm": 8.952056884765625, "learning_rate": 5.44298959259344e-08, "loss": 0.47076521, "memory(GiB)": 34.88, "step": 141195, "train_speed(iter/s)": 0.410571 }, { "acc": 0.93126726, "epoch": 3.823139198007202, "grad_norm": 5.4612650871276855, "learning_rate": 5.434833821991933e-08, "loss": 0.44468975, "memory(GiB)": 34.88, "step": 141200, "train_speed(iter/s)": 0.410572 }, { "acc": 0.95569267, "epoch": 3.823274578290418, "grad_norm": 4.026248931884766, "learning_rate": 5.426684247417235e-08, "loss": 0.24558778, "memory(GiB)": 34.88, "step": 141205, "train_speed(iter/s)": 0.410573 }, { "acc": 0.93639002, "epoch": 3.823409958573633, "grad_norm": 19.069833755493164, "learning_rate": 5.418540868971532e-08, "loss": 0.37618275, "memory(GiB)": 34.88, "step": 141210, "train_speed(iter/s)": 0.410574 }, { "acc": 0.95238934, "epoch": 3.823545338856849, "grad_norm": 10.452779769897461, "learning_rate": 5.4104036867567876e-08, "loss": 0.26584897, "memory(GiB)": 34.88, "step": 141215, "train_speed(iter/s)": 0.410574 }, { "acc": 0.94710312, "epoch": 3.8236807191400644, "grad_norm": 7.993650436401367, "learning_rate": 5.402272700874966e-08, "loss": 0.32920563, "memory(GiB)": 34.88, "step": 141220, "train_speed(iter/s)": 0.410575 }, { "acc": 0.94213104, "epoch": 3.82381609942328, "grad_norm": 13.017641067504883, "learning_rate": 5.394147911427919e-08, "loss": 0.36632624, "memory(GiB)": 34.88, "step": 141225, "train_speed(iter/s)": 0.410576 }, { "acc": 0.95874929, "epoch": 3.8239514797064955, "grad_norm": 6.2115325927734375, "learning_rate": 5.3860293185173894e-08, "loss": 0.23561213, "memory(GiB)": 34.88, "step": 141230, "train_speed(iter/s)": 0.410577 }, { "acc": 0.93419151, "epoch": 3.824086859989711, "grad_norm": 14.318674087524414, "learning_rate": 5.377916922245119e-08, "loss": 0.43719158, "memory(GiB)": 34.88, "step": 141235, "train_speed(iter/s)": 0.410577 }, { "acc": 0.94993324, "epoch": 3.8242222402729267, "grad_norm": 6.40705680847168, "learning_rate": 5.369810722712737e-08, "loss": 0.28505793, "memory(GiB)": 34.88, "step": 141240, "train_speed(iter/s)": 0.410578 }, { "acc": 0.96223011, "epoch": 3.8243576205561425, "grad_norm": 6.354692459106445, "learning_rate": 5.3617107200217096e-08, "loss": 0.22838788, "memory(GiB)": 34.88, "step": 141245, "train_speed(iter/s)": 0.410579 }, { "acc": 0.93915138, "epoch": 3.824493000839358, "grad_norm": 5.87041711807251, "learning_rate": 5.353616914273612e-08, "loss": 0.35164337, "memory(GiB)": 34.88, "step": 141250, "train_speed(iter/s)": 0.410579 }, { "acc": 0.94541779, "epoch": 3.824628381122573, "grad_norm": 4.790182113647461, "learning_rate": 5.345529305569796e-08, "loss": 0.32903395, "memory(GiB)": 34.88, "step": 141255, "train_speed(iter/s)": 0.41058 }, { "acc": 0.95512314, "epoch": 3.824763761405789, "grad_norm": 3.7032346725463867, "learning_rate": 5.3374478940115594e-08, "loss": 0.23257487, "memory(GiB)": 34.88, "step": 141260, "train_speed(iter/s)": 0.410581 }, { "acc": 0.93593788, "epoch": 3.8248991416890044, "grad_norm": 5.49221134185791, "learning_rate": 5.3293726797001443e-08, "loss": 0.31825426, "memory(GiB)": 34.88, "step": 141265, "train_speed(iter/s)": 0.410582 }, { "acc": 0.95393438, "epoch": 3.8250345219722197, "grad_norm": 9.074613571166992, "learning_rate": 5.321303662736684e-08, "loss": 0.31746728, "memory(GiB)": 34.88, "step": 141270, "train_speed(iter/s)": 0.410583 }, { "acc": 0.94282646, "epoch": 3.8251699022554355, "grad_norm": 6.463446617126465, "learning_rate": 5.313240843222308e-08, "loss": 0.39430811, "memory(GiB)": 34.88, "step": 141275, "train_speed(iter/s)": 0.410584 }, { "acc": 0.96149321, "epoch": 3.8253052825386513, "grad_norm": 4.985572814941406, "learning_rate": 5.305184221258036e-08, "loss": 0.2351589, "memory(GiB)": 34.88, "step": 141280, "train_speed(iter/s)": 0.410584 }, { "acc": 0.93776665, "epoch": 3.8254406628218667, "grad_norm": 6.552615642547607, "learning_rate": 5.2971337969446675e-08, "loss": 0.360357, "memory(GiB)": 34.88, "step": 141285, "train_speed(iter/s)": 0.410585 }, { "acc": 0.94615164, "epoch": 3.825576043105082, "grad_norm": 10.214506149291992, "learning_rate": 5.2890895703832224e-08, "loss": 0.33767624, "memory(GiB)": 34.88, "step": 141290, "train_speed(iter/s)": 0.410586 }, { "acc": 0.95210819, "epoch": 3.825711423388298, "grad_norm": 10.794337272644043, "learning_rate": 5.281051541674331e-08, "loss": 0.21985021, "memory(GiB)": 34.88, "step": 141295, "train_speed(iter/s)": 0.410587 }, { "acc": 0.95118484, "epoch": 3.825846803671513, "grad_norm": 6.090012073516846, "learning_rate": 5.273019710918738e-08, "loss": 0.26674676, "memory(GiB)": 34.88, "step": 141300, "train_speed(iter/s)": 0.410588 }, { "acc": 0.95266247, "epoch": 3.825982183954729, "grad_norm": 11.857946395874023, "learning_rate": 5.264994078217074e-08, "loss": 0.26329589, "memory(GiB)": 34.88, "step": 141305, "train_speed(iter/s)": 0.410589 }, { "acc": 0.94802437, "epoch": 3.8261175642379444, "grad_norm": 8.066407203674316, "learning_rate": 5.2569746436698595e-08, "loss": 0.32429113, "memory(GiB)": 34.88, "step": 141310, "train_speed(iter/s)": 0.410589 }, { "acc": 0.95145273, "epoch": 3.82625294452116, "grad_norm": 8.563536643981934, "learning_rate": 5.24896140737756e-08, "loss": 0.31223602, "memory(GiB)": 34.88, "step": 141315, "train_speed(iter/s)": 0.41059 }, { "acc": 0.96280975, "epoch": 3.8263883248043755, "grad_norm": 7.122782230377197, "learning_rate": 5.24095436944053e-08, "loss": 0.20764017, "memory(GiB)": 34.88, "step": 141320, "train_speed(iter/s)": 0.41059 }, { "acc": 0.96329622, "epoch": 3.826523705087591, "grad_norm": 3.7372887134552, "learning_rate": 5.2329535299591226e-08, "loss": 0.22177248, "memory(GiB)": 34.88, "step": 141325, "train_speed(iter/s)": 0.410591 }, { "acc": 0.93961239, "epoch": 3.8266590853708067, "grad_norm": 9.312870979309082, "learning_rate": 5.224958889033525e-08, "loss": 0.37811718, "memory(GiB)": 34.88, "step": 141330, "train_speed(iter/s)": 0.410592 }, { "acc": 0.95016289, "epoch": 3.826794465654022, "grad_norm": 5.308924198150635, "learning_rate": 5.216970446763871e-08, "loss": 0.30542932, "memory(GiB)": 34.88, "step": 141335, "train_speed(iter/s)": 0.410593 }, { "acc": 0.95169754, "epoch": 3.826929845937238, "grad_norm": 5.31019926071167, "learning_rate": 5.208988203250293e-08, "loss": 0.23958845, "memory(GiB)": 34.88, "step": 141340, "train_speed(iter/s)": 0.410594 }, { "acc": 0.93783989, "epoch": 3.827065226220453, "grad_norm": 9.25975227355957, "learning_rate": 5.2010121585927546e-08, "loss": 0.40300236, "memory(GiB)": 34.88, "step": 141345, "train_speed(iter/s)": 0.410594 }, { "acc": 0.94855213, "epoch": 3.827200606503669, "grad_norm": 4.6017584800720215, "learning_rate": 5.193042312891111e-08, "loss": 0.32027497, "memory(GiB)": 34.88, "step": 141350, "train_speed(iter/s)": 0.410595 }, { "acc": 0.94556236, "epoch": 3.8273359867868844, "grad_norm": 10.709040641784668, "learning_rate": 5.185078666245329e-08, "loss": 0.34981222, "memory(GiB)": 34.88, "step": 141355, "train_speed(iter/s)": 0.410596 }, { "acc": 0.95748205, "epoch": 3.8274713670700997, "grad_norm": 10.64987564086914, "learning_rate": 5.1771212187550395e-08, "loss": 0.32507219, "memory(GiB)": 34.88, "step": 141360, "train_speed(iter/s)": 0.410596 }, { "acc": 0.95099134, "epoch": 3.8276067473533155, "grad_norm": 10.399184226989746, "learning_rate": 5.1691699705199865e-08, "loss": 0.29697261, "memory(GiB)": 34.88, "step": 141365, "train_speed(iter/s)": 0.410597 }, { "acc": 0.94968243, "epoch": 3.827742127636531, "grad_norm": 5.9337992668151855, "learning_rate": 5.161224921639804e-08, "loss": 0.28517592, "memory(GiB)": 34.88, "step": 141370, "train_speed(iter/s)": 0.410598 }, { "acc": 0.95267868, "epoch": 3.8278775079197467, "grad_norm": 5.029179096221924, "learning_rate": 5.153286072213956e-08, "loss": 0.30658731, "memory(GiB)": 34.88, "step": 141375, "train_speed(iter/s)": 0.410598 }, { "acc": 0.9575305, "epoch": 3.828012888202962, "grad_norm": 2.7109949588775635, "learning_rate": 5.145353422341909e-08, "loss": 0.28802352, "memory(GiB)": 34.88, "step": 141380, "train_speed(iter/s)": 0.410599 }, { "acc": 0.93246269, "epoch": 3.828148268486178, "grad_norm": 8.78370189666748, "learning_rate": 5.137426972123075e-08, "loss": 0.41315603, "memory(GiB)": 34.88, "step": 141385, "train_speed(iter/s)": 0.4106 }, { "acc": 0.93807354, "epoch": 3.828283648769393, "grad_norm": 3.272609233856201, "learning_rate": 5.1295067216566964e-08, "loss": 0.38163252, "memory(GiB)": 34.88, "step": 141390, "train_speed(iter/s)": 0.4106 }, { "acc": 0.94695663, "epoch": 3.8284190290526086, "grad_norm": 18.609514236450195, "learning_rate": 5.121592671042018e-08, "loss": 0.37698796, "memory(GiB)": 34.88, "step": 141395, "train_speed(iter/s)": 0.410601 }, { "acc": 0.96245441, "epoch": 3.8285544093358244, "grad_norm": 11.830657005310059, "learning_rate": 5.113684820378172e-08, "loss": 0.22423267, "memory(GiB)": 34.88, "step": 141400, "train_speed(iter/s)": 0.410602 }, { "acc": 0.96096048, "epoch": 3.8286897896190397, "grad_norm": 22.333703994750977, "learning_rate": 5.105783169764238e-08, "loss": 0.25720317, "memory(GiB)": 34.88, "step": 141405, "train_speed(iter/s)": 0.410603 }, { "acc": 0.94828138, "epoch": 3.8288251699022555, "grad_norm": 4.162295341491699, "learning_rate": 5.097887719299179e-08, "loss": 0.33905084, "memory(GiB)": 34.88, "step": 141410, "train_speed(iter/s)": 0.410604 }, { "acc": 0.96297283, "epoch": 3.828960550185471, "grad_norm": 2.022021770477295, "learning_rate": 5.0899984690819095e-08, "loss": 0.22707195, "memory(GiB)": 34.88, "step": 141415, "train_speed(iter/s)": 0.410605 }, { "acc": 0.94551439, "epoch": 3.8290959304686867, "grad_norm": 4.847963333129883, "learning_rate": 5.0821154192112266e-08, "loss": 0.36395667, "memory(GiB)": 34.88, "step": 141420, "train_speed(iter/s)": 0.410605 }, { "acc": 0.93896103, "epoch": 3.829231310751902, "grad_norm": 7.537235260009766, "learning_rate": 5.074238569785933e-08, "loss": 0.32674685, "memory(GiB)": 34.88, "step": 141425, "train_speed(iter/s)": 0.410606 }, { "acc": 0.95617704, "epoch": 3.8293666910351174, "grad_norm": 5.626035213470459, "learning_rate": 5.0663679209046596e-08, "loss": 0.2381464, "memory(GiB)": 34.88, "step": 141430, "train_speed(iter/s)": 0.410607 }, { "acc": 0.95314159, "epoch": 3.829502071318333, "grad_norm": 11.61021900177002, "learning_rate": 5.058503472666041e-08, "loss": 0.25668435, "memory(GiB)": 34.88, "step": 141435, "train_speed(iter/s)": 0.410608 }, { "acc": 0.94771976, "epoch": 3.829637451601549, "grad_norm": 10.707503318786621, "learning_rate": 5.0506452251684896e-08, "loss": 0.32197914, "memory(GiB)": 34.88, "step": 141440, "train_speed(iter/s)": 0.410609 }, { "acc": 0.9604517, "epoch": 3.8297728318847644, "grad_norm": 4.336618900299072, "learning_rate": 5.042793178510637e-08, "loss": 0.21640048, "memory(GiB)": 34.88, "step": 141445, "train_speed(iter/s)": 0.410609 }, { "acc": 0.94523888, "epoch": 3.8299082121679797, "grad_norm": 5.885317802429199, "learning_rate": 5.034947332790729e-08, "loss": 0.37691591, "memory(GiB)": 34.88, "step": 141450, "train_speed(iter/s)": 0.41061 }, { "acc": 0.96041403, "epoch": 3.8300435924511955, "grad_norm": 2.3166356086730957, "learning_rate": 5.027107688107011e-08, "loss": 0.21235607, "memory(GiB)": 34.88, "step": 141455, "train_speed(iter/s)": 0.410611 }, { "acc": 0.94111757, "epoch": 3.830178972734411, "grad_norm": 6.294549465179443, "learning_rate": 5.019274244557727e-08, "loss": 0.38671675, "memory(GiB)": 34.88, "step": 141460, "train_speed(iter/s)": 0.410612 }, { "acc": 0.95895386, "epoch": 3.8303143530176262, "grad_norm": 7.104058742523193, "learning_rate": 5.0114470022410664e-08, "loss": 0.19647142, "memory(GiB)": 34.88, "step": 141465, "train_speed(iter/s)": 0.410612 }, { "acc": 0.95391836, "epoch": 3.830449733300842, "grad_norm": 18.331222534179688, "learning_rate": 5.003625961254998e-08, "loss": 0.29649265, "memory(GiB)": 34.88, "step": 141470, "train_speed(iter/s)": 0.410613 }, { "acc": 0.94171658, "epoch": 3.830585113584058, "grad_norm": 2.8648056983947754, "learning_rate": 4.995811121697543e-08, "loss": 0.35959692, "memory(GiB)": 34.88, "step": 141475, "train_speed(iter/s)": 0.410614 }, { "acc": 0.94160271, "epoch": 3.830720493867273, "grad_norm": 9.576526641845703, "learning_rate": 4.988002483666559e-08, "loss": 0.36860843, "memory(GiB)": 34.88, "step": 141480, "train_speed(iter/s)": 0.410615 }, { "acc": 0.94631634, "epoch": 3.8308558741504886, "grad_norm": 8.59821891784668, "learning_rate": 4.980200047259901e-08, "loss": 0.37576232, "memory(GiB)": 34.88, "step": 141485, "train_speed(iter/s)": 0.410615 }, { "acc": 0.96274128, "epoch": 3.8309912544337044, "grad_norm": 7.0556488037109375, "learning_rate": 4.9724038125753715e-08, "loss": 0.22498589, "memory(GiB)": 34.88, "step": 141490, "train_speed(iter/s)": 0.410616 }, { "acc": 0.95430393, "epoch": 3.8311266347169197, "grad_norm": 6.719142913818359, "learning_rate": 4.9646137797104366e-08, "loss": 0.3139349, "memory(GiB)": 34.88, "step": 141495, "train_speed(iter/s)": 0.410617 }, { "acc": 0.95333366, "epoch": 3.8312620150001355, "grad_norm": 5.83986234664917, "learning_rate": 4.956829948762899e-08, "loss": 0.26984453, "memory(GiB)": 34.88, "step": 141500, "train_speed(iter/s)": 0.410618 }, { "acc": 0.95199423, "epoch": 3.831397395283351, "grad_norm": 17.88225555419922, "learning_rate": 4.949052319830114e-08, "loss": 0.32413568, "memory(GiB)": 34.88, "step": 141505, "train_speed(iter/s)": 0.410618 }, { "acc": 0.95645866, "epoch": 3.8315327755665667, "grad_norm": 7.9883198738098145, "learning_rate": 4.941280893009606e-08, "loss": 0.25852981, "memory(GiB)": 34.88, "step": 141510, "train_speed(iter/s)": 0.410619 }, { "acc": 0.94299641, "epoch": 3.831668155849782, "grad_norm": 11.227608680725098, "learning_rate": 4.9335156683986756e-08, "loss": 0.31923189, "memory(GiB)": 34.88, "step": 141515, "train_speed(iter/s)": 0.41062 }, { "acc": 0.95473385, "epoch": 3.8318035361329974, "grad_norm": 6.546919345855713, "learning_rate": 4.925756646094624e-08, "loss": 0.27329905, "memory(GiB)": 34.88, "step": 141520, "train_speed(iter/s)": 0.410621 }, { "acc": 0.94473772, "epoch": 3.831938916416213, "grad_norm": 7.835392951965332, "learning_rate": 4.9180038261946434e-08, "loss": 0.31470675, "memory(GiB)": 34.88, "step": 141525, "train_speed(iter/s)": 0.410621 }, { "acc": 0.94293671, "epoch": 3.8320742966994286, "grad_norm": 4.657116413116455, "learning_rate": 4.91025720879581e-08, "loss": 0.31253612, "memory(GiB)": 34.88, "step": 141530, "train_speed(iter/s)": 0.410622 }, { "acc": 0.93919268, "epoch": 3.8322096769826444, "grad_norm": 5.973331451416016, "learning_rate": 4.902516793995205e-08, "loss": 0.33717537, "memory(GiB)": 34.88, "step": 141535, "train_speed(iter/s)": 0.410623 }, { "acc": 0.95483437, "epoch": 3.8323450572658597, "grad_norm": 6.707361698150635, "learning_rate": 4.894782581889852e-08, "loss": 0.2644712, "memory(GiB)": 34.88, "step": 141540, "train_speed(iter/s)": 0.410624 }, { "acc": 0.95188084, "epoch": 3.8324804375490755, "grad_norm": 8.839396476745605, "learning_rate": 4.887054572576498e-08, "loss": 0.33092389, "memory(GiB)": 34.88, "step": 141545, "train_speed(iter/s)": 0.410625 }, { "acc": 0.96176825, "epoch": 3.832615817832291, "grad_norm": 5.111814498901367, "learning_rate": 4.879332766152109e-08, "loss": 0.24643803, "memory(GiB)": 34.88, "step": 141550, "train_speed(iter/s)": 0.410625 }, { "acc": 0.94660196, "epoch": 3.8327511981155062, "grad_norm": 5.761546611785889, "learning_rate": 4.8716171627132675e-08, "loss": 0.32077644, "memory(GiB)": 34.88, "step": 141555, "train_speed(iter/s)": 0.410626 }, { "acc": 0.92992096, "epoch": 3.832886578398722, "grad_norm": 5.753607749938965, "learning_rate": 4.8639077623567733e-08, "loss": 0.39981723, "memory(GiB)": 34.88, "step": 141560, "train_speed(iter/s)": 0.410627 }, { "acc": 0.94317646, "epoch": 3.8330219586819374, "grad_norm": 8.905831336975098, "learning_rate": 4.856204565178984e-08, "loss": 0.36723104, "memory(GiB)": 34.88, "step": 141565, "train_speed(iter/s)": 0.410628 }, { "acc": 0.95723152, "epoch": 3.833157338965153, "grad_norm": 6.835576057434082, "learning_rate": 4.848507571276648e-08, "loss": 0.29117012, "memory(GiB)": 34.88, "step": 141570, "train_speed(iter/s)": 0.410629 }, { "acc": 0.9578661, "epoch": 3.8332927192483686, "grad_norm": 4.790050983428955, "learning_rate": 4.840816780746065e-08, "loss": 0.26529665, "memory(GiB)": 34.88, "step": 141575, "train_speed(iter/s)": 0.410629 }, { "acc": 0.93448849, "epoch": 3.8334280995315844, "grad_norm": 5.597969055175781, "learning_rate": 4.833132193683594e-08, "loss": 0.39015551, "memory(GiB)": 34.88, "step": 141580, "train_speed(iter/s)": 0.41063 }, { "acc": 0.93508205, "epoch": 3.8335634798147997, "grad_norm": 7.592906475067139, "learning_rate": 4.8254538101854265e-08, "loss": 0.36060801, "memory(GiB)": 34.88, "step": 141585, "train_speed(iter/s)": 0.410631 }, { "acc": 0.94213066, "epoch": 3.833698860098015, "grad_norm": 7.244316577911377, "learning_rate": 4.817781630347865e-08, "loss": 0.3500036, "memory(GiB)": 34.88, "step": 141590, "train_speed(iter/s)": 0.410632 }, { "acc": 0.9409874, "epoch": 3.833834240381231, "grad_norm": 13.084147453308105, "learning_rate": 4.8101156542669874e-08, "loss": 0.4206934, "memory(GiB)": 34.88, "step": 141595, "train_speed(iter/s)": 0.410633 }, { "acc": 0.94390984, "epoch": 3.8339696206644467, "grad_norm": 7.725250720977783, "learning_rate": 4.802455882038765e-08, "loss": 0.34161029, "memory(GiB)": 34.88, "step": 141600, "train_speed(iter/s)": 0.410634 }, { "acc": 0.95714684, "epoch": 3.834105000947662, "grad_norm": 8.996435165405273, "learning_rate": 4.794802313759222e-08, "loss": 0.24764757, "memory(GiB)": 34.88, "step": 141605, "train_speed(iter/s)": 0.410634 }, { "acc": 0.95139751, "epoch": 3.8342403812308774, "grad_norm": 5.220604419708252, "learning_rate": 4.7871549495241614e-08, "loss": 0.29415834, "memory(GiB)": 34.88, "step": 141610, "train_speed(iter/s)": 0.410635 }, { "acc": 0.95754986, "epoch": 3.834375761514093, "grad_norm": 7.230320930480957, "learning_rate": 4.779513789429496e-08, "loss": 0.33235552, "memory(GiB)": 34.88, "step": 141615, "train_speed(iter/s)": 0.410636 }, { "acc": 0.95398083, "epoch": 3.8345111417973086, "grad_norm": 7.003270149230957, "learning_rate": 4.7718788335708066e-08, "loss": 0.2589237, "memory(GiB)": 34.88, "step": 141620, "train_speed(iter/s)": 0.410637 }, { "acc": 0.95577526, "epoch": 3.834646522080524, "grad_norm": 4.906877517700195, "learning_rate": 4.7642500820437864e-08, "loss": 0.2463182, "memory(GiB)": 34.88, "step": 141625, "train_speed(iter/s)": 0.410638 }, { "acc": 0.9411644, "epoch": 3.8347819023637397, "grad_norm": 25.2806396484375, "learning_rate": 4.756627534944124e-08, "loss": 0.36517539, "memory(GiB)": 34.88, "step": 141630, "train_speed(iter/s)": 0.410638 }, { "acc": 0.95434475, "epoch": 3.8349172826469555, "grad_norm": 12.796518325805664, "learning_rate": 4.7490111923670704e-08, "loss": 0.30901442, "memory(GiB)": 34.88, "step": 141635, "train_speed(iter/s)": 0.410639 }, { "acc": 0.94122734, "epoch": 3.835052662930171, "grad_norm": 5.057336807250977, "learning_rate": 4.7414010544082594e-08, "loss": 0.3874361, "memory(GiB)": 34.88, "step": 141640, "train_speed(iter/s)": 0.41064 }, { "acc": 0.94456654, "epoch": 3.8351880432133862, "grad_norm": 3.5819175243377686, "learning_rate": 4.7337971211628843e-08, "loss": 0.39686198, "memory(GiB)": 34.88, "step": 141645, "train_speed(iter/s)": 0.410641 }, { "acc": 0.95295753, "epoch": 3.835323423496602, "grad_norm": 12.367154121398926, "learning_rate": 4.726199392726304e-08, "loss": 0.23110237, "memory(GiB)": 34.88, "step": 141650, "train_speed(iter/s)": 0.410642 }, { "acc": 0.93231583, "epoch": 3.8354588037798174, "grad_norm": 7.514230728149414, "learning_rate": 4.7186078691935975e-08, "loss": 0.4224185, "memory(GiB)": 34.88, "step": 141655, "train_speed(iter/s)": 0.410642 }, { "acc": 0.95577221, "epoch": 3.835594184063033, "grad_norm": 3.1515958309173584, "learning_rate": 4.711022550659904e-08, "loss": 0.25008144, "memory(GiB)": 34.88, "step": 141660, "train_speed(iter/s)": 0.410643 }, { "acc": 0.95102081, "epoch": 3.8357295643462486, "grad_norm": 7.795410633087158, "learning_rate": 4.703443437220247e-08, "loss": 0.29213629, "memory(GiB)": 34.88, "step": 141665, "train_speed(iter/s)": 0.410644 }, { "acc": 0.9383791, "epoch": 3.8358649446294644, "grad_norm": 10.298036575317383, "learning_rate": 4.695870528969486e-08, "loss": 0.34599214, "memory(GiB)": 34.88, "step": 141670, "train_speed(iter/s)": 0.410645 }, { "acc": 0.95113506, "epoch": 3.8360003249126797, "grad_norm": 7.768913745880127, "learning_rate": 4.6883038260027034e-08, "loss": 0.26038747, "memory(GiB)": 34.88, "step": 141675, "train_speed(iter/s)": 0.410646 }, { "acc": 0.94140205, "epoch": 3.836135705195895, "grad_norm": 16.995935440063477, "learning_rate": 4.680743328414425e-08, "loss": 0.35692706, "memory(GiB)": 34.88, "step": 141680, "train_speed(iter/s)": 0.410646 }, { "acc": 0.94802189, "epoch": 3.836271085479111, "grad_norm": 6.226894378662109, "learning_rate": 4.673189036299564e-08, "loss": 0.29132257, "memory(GiB)": 34.88, "step": 141685, "train_speed(iter/s)": 0.410647 }, { "acc": 0.9557003, "epoch": 3.8364064657623262, "grad_norm": 10.853270530700684, "learning_rate": 4.665640949752647e-08, "loss": 0.31235912, "memory(GiB)": 34.88, "step": 141690, "train_speed(iter/s)": 0.410648 }, { "acc": 0.94898453, "epoch": 3.836541846045542, "grad_norm": 14.441444396972656, "learning_rate": 4.6580990688682555e-08, "loss": 0.33476298, "memory(GiB)": 34.88, "step": 141695, "train_speed(iter/s)": 0.410649 }, { "acc": 0.95850067, "epoch": 3.8366772263287574, "grad_norm": 4.212387561798096, "learning_rate": 4.650563393740917e-08, "loss": 0.25822828, "memory(GiB)": 34.88, "step": 141700, "train_speed(iter/s)": 0.41065 }, { "acc": 0.94696302, "epoch": 3.836812606611973, "grad_norm": 7.890814781188965, "learning_rate": 4.6430339244648776e-08, "loss": 0.3226182, "memory(GiB)": 34.88, "step": 141705, "train_speed(iter/s)": 0.410651 }, { "acc": 0.93831062, "epoch": 3.8369479868951886, "grad_norm": 2.6433258056640625, "learning_rate": 4.635510661134665e-08, "loss": 0.46820288, "memory(GiB)": 34.88, "step": 141710, "train_speed(iter/s)": 0.410651 }, { "acc": 0.9469265, "epoch": 3.837083367178404, "grad_norm": 6.45164680480957, "learning_rate": 4.627993603844362e-08, "loss": 0.34637668, "memory(GiB)": 34.88, "step": 141715, "train_speed(iter/s)": 0.410652 }, { "acc": 0.95862942, "epoch": 3.8372187474616197, "grad_norm": 11.116667747497559, "learning_rate": 4.620482752688214e-08, "loss": 0.29979248, "memory(GiB)": 34.88, "step": 141720, "train_speed(iter/s)": 0.410653 }, { "acc": 0.94083853, "epoch": 3.837354127744835, "grad_norm": 6.461864471435547, "learning_rate": 4.61297810776025e-08, "loss": 0.35490875, "memory(GiB)": 34.88, "step": 141725, "train_speed(iter/s)": 0.410654 }, { "acc": 0.94884691, "epoch": 3.837489508028051, "grad_norm": 20.293960571289062, "learning_rate": 4.605479669154551e-08, "loss": 0.27052345, "memory(GiB)": 34.88, "step": 141730, "train_speed(iter/s)": 0.410655 }, { "acc": 0.93892813, "epoch": 3.8376248883112662, "grad_norm": 3.53778338432312, "learning_rate": 4.597987436964977e-08, "loss": 0.30712357, "memory(GiB)": 34.88, "step": 141735, "train_speed(iter/s)": 0.410655 }, { "acc": 0.94104404, "epoch": 3.837760268594482, "grad_norm": 26.533897399902344, "learning_rate": 4.5905014112854446e-08, "loss": 0.34576893, "memory(GiB)": 34.88, "step": 141740, "train_speed(iter/s)": 0.410656 }, { "acc": 0.95787449, "epoch": 3.8378956488776974, "grad_norm": 4.297191619873047, "learning_rate": 4.583021592209757e-08, "loss": 0.26032112, "memory(GiB)": 34.88, "step": 141745, "train_speed(iter/s)": 0.410657 }, { "acc": 0.96630945, "epoch": 3.8380310291609128, "grad_norm": 3.8058414459228516, "learning_rate": 4.575547979831498e-08, "loss": 0.23354158, "memory(GiB)": 34.88, "step": 141750, "train_speed(iter/s)": 0.410658 }, { "acc": 0.93533611, "epoch": 3.8381664094441286, "grad_norm": 9.293259620666504, "learning_rate": 4.568080574244414e-08, "loss": 0.38236113, "memory(GiB)": 34.88, "step": 141755, "train_speed(iter/s)": 0.410659 }, { "acc": 0.95618114, "epoch": 3.8383017897273444, "grad_norm": 7.018805027008057, "learning_rate": 4.560619375541924e-08, "loss": 0.23034425, "memory(GiB)": 34.88, "step": 141760, "train_speed(iter/s)": 0.410659 }, { "acc": 0.95567036, "epoch": 3.8384371700105597, "grad_norm": 4.922165870666504, "learning_rate": 4.5531643838176075e-08, "loss": 0.28159437, "memory(GiB)": 34.88, "step": 141765, "train_speed(iter/s)": 0.41066 }, { "acc": 0.94793453, "epoch": 3.838572550293775, "grad_norm": 5.012023448944092, "learning_rate": 4.545715599164827e-08, "loss": 0.40272102, "memory(GiB)": 34.88, "step": 141770, "train_speed(iter/s)": 0.410661 }, { "acc": 0.94995995, "epoch": 3.838707930576991, "grad_norm": 4.880342960357666, "learning_rate": 4.538273021676832e-08, "loss": 0.31853018, "memory(GiB)": 34.88, "step": 141775, "train_speed(iter/s)": 0.410662 }, { "acc": 0.94699717, "epoch": 3.8388433108602062, "grad_norm": 10.381771087646484, "learning_rate": 4.530836651446926e-08, "loss": 0.32483504, "memory(GiB)": 34.88, "step": 141780, "train_speed(iter/s)": 0.410663 }, { "acc": 0.95538464, "epoch": 3.8389786911434216, "grad_norm": 6.887238502502441, "learning_rate": 4.523406488568193e-08, "loss": 0.30055356, "memory(GiB)": 34.88, "step": 141785, "train_speed(iter/s)": 0.410663 }, { "acc": 0.93413486, "epoch": 3.8391140714266374, "grad_norm": 7.459469318389893, "learning_rate": 4.515982533133771e-08, "loss": 0.3664876, "memory(GiB)": 34.88, "step": 141790, "train_speed(iter/s)": 0.410664 }, { "acc": 0.95876217, "epoch": 3.839249451709853, "grad_norm": 16.624828338623047, "learning_rate": 4.508564785236632e-08, "loss": 0.32516019, "memory(GiB)": 34.88, "step": 141795, "train_speed(iter/s)": 0.410665 }, { "acc": 0.93729553, "epoch": 3.8393848319930686, "grad_norm": 6.613325119018555, "learning_rate": 4.501153244969692e-08, "loss": 0.35200548, "memory(GiB)": 34.88, "step": 141800, "train_speed(iter/s)": 0.410666 }, { "acc": 0.94342041, "epoch": 3.839520212276284, "grad_norm": 6.256124496459961, "learning_rate": 4.493747912425868e-08, "loss": 0.27322626, "memory(GiB)": 34.88, "step": 141805, "train_speed(iter/s)": 0.410667 }, { "acc": 0.94923668, "epoch": 3.8396555925594997, "grad_norm": 2.186461925506592, "learning_rate": 4.486348787697798e-08, "loss": 0.3090735, "memory(GiB)": 34.88, "step": 141810, "train_speed(iter/s)": 0.410668 }, { "acc": 0.95113544, "epoch": 3.839790972842715, "grad_norm": 9.406451225280762, "learning_rate": 4.478955870878288e-08, "loss": 0.30199444, "memory(GiB)": 34.88, "step": 141815, "train_speed(iter/s)": 0.410668 }, { "acc": 0.96239891, "epoch": 3.839926353125931, "grad_norm": 3.606714963912964, "learning_rate": 4.471569162059866e-08, "loss": 0.22909393, "memory(GiB)": 34.88, "step": 141820, "train_speed(iter/s)": 0.410669 }, { "acc": 0.94873314, "epoch": 3.8400617334091462, "grad_norm": 5.271295547485352, "learning_rate": 4.4641886613351705e-08, "loss": 0.25884569, "memory(GiB)": 34.88, "step": 141825, "train_speed(iter/s)": 0.41067 }, { "acc": 0.95244579, "epoch": 3.840197113692362, "grad_norm": 6.72697639465332, "learning_rate": 4.4568143687965065e-08, "loss": 0.29270205, "memory(GiB)": 34.88, "step": 141830, "train_speed(iter/s)": 0.410671 }, { "acc": 0.95577269, "epoch": 3.8403324939755774, "grad_norm": 6.124117851257324, "learning_rate": 4.449446284536348e-08, "loss": 0.25656333, "memory(GiB)": 34.88, "step": 141835, "train_speed(iter/s)": 0.410671 }, { "acc": 0.94450245, "epoch": 3.8404678742587928, "grad_norm": 14.124917984008789, "learning_rate": 4.442084408647e-08, "loss": 0.35236917, "memory(GiB)": 34.88, "step": 141840, "train_speed(iter/s)": 0.410672 }, { "acc": 0.94588909, "epoch": 3.8406032545420086, "grad_norm": 9.091044425964355, "learning_rate": 4.434728741220657e-08, "loss": 0.34079022, "memory(GiB)": 34.88, "step": 141845, "train_speed(iter/s)": 0.410673 }, { "acc": 0.94507542, "epoch": 3.840738634825224, "grad_norm": 4.301249027252197, "learning_rate": 4.4273792823494586e-08, "loss": 0.28387389, "memory(GiB)": 34.88, "step": 141850, "train_speed(iter/s)": 0.410674 }, { "acc": 0.94770393, "epoch": 3.8408740151084397, "grad_norm": 5.269134521484375, "learning_rate": 4.420036032125489e-08, "loss": 0.29551926, "memory(GiB)": 34.88, "step": 141855, "train_speed(iter/s)": 0.410674 }, { "acc": 0.95919113, "epoch": 3.841009395391655, "grad_norm": 4.413309574127197, "learning_rate": 4.4126989906407206e-08, "loss": 0.22407236, "memory(GiB)": 34.88, "step": 141860, "train_speed(iter/s)": 0.410675 }, { "acc": 0.95441628, "epoch": 3.841144775674871, "grad_norm": 8.709885597229004, "learning_rate": 4.4053681579870695e-08, "loss": 0.21940503, "memory(GiB)": 34.88, "step": 141865, "train_speed(iter/s)": 0.410676 }, { "acc": 0.97420006, "epoch": 3.8412801559580863, "grad_norm": 4.643728256225586, "learning_rate": 4.3980435342564e-08, "loss": 0.17600436, "memory(GiB)": 34.88, "step": 141870, "train_speed(iter/s)": 0.410677 }, { "acc": 0.9547987, "epoch": 3.8414155362413016, "grad_norm": 19.44068145751953, "learning_rate": 4.390725119540461e-08, "loss": 0.25364254, "memory(GiB)": 34.88, "step": 141875, "train_speed(iter/s)": 0.410678 }, { "acc": 0.95328026, "epoch": 3.8415509165245174, "grad_norm": 3.5430943965911865, "learning_rate": 4.3834129139308364e-08, "loss": 0.29671257, "memory(GiB)": 34.88, "step": 141880, "train_speed(iter/s)": 0.410678 }, { "acc": 0.93852615, "epoch": 3.8416862968077328, "grad_norm": 9.184981346130371, "learning_rate": 4.376106917519224e-08, "loss": 0.35461597, "memory(GiB)": 34.88, "step": 141885, "train_speed(iter/s)": 0.410679 }, { "acc": 0.96095371, "epoch": 3.8418216770909486, "grad_norm": 7.196130752563477, "learning_rate": 4.368807130397149e-08, "loss": 0.24632452, "memory(GiB)": 34.88, "step": 141890, "train_speed(iter/s)": 0.41068 }, { "acc": 0.94847965, "epoch": 3.841957057374164, "grad_norm": 6.466761112213135, "learning_rate": 4.361513552656032e-08, "loss": 0.30275698, "memory(GiB)": 34.88, "step": 141895, "train_speed(iter/s)": 0.410681 }, { "acc": 0.95219994, "epoch": 3.8420924376573797, "grad_norm": 5.834844589233398, "learning_rate": 4.354226184387178e-08, "loss": 0.30921707, "memory(GiB)": 34.88, "step": 141900, "train_speed(iter/s)": 0.410682 }, { "acc": 0.94084339, "epoch": 3.842227817940595, "grad_norm": 15.412415504455566, "learning_rate": 4.3469450256819504e-08, "loss": 0.37306645, "memory(GiB)": 34.88, "step": 141905, "train_speed(iter/s)": 0.410682 }, { "acc": 0.94679813, "epoch": 3.8423631982238104, "grad_norm": 4.718830585479736, "learning_rate": 4.339670076631544e-08, "loss": 0.32827847, "memory(GiB)": 34.88, "step": 141910, "train_speed(iter/s)": 0.410683 }, { "acc": 0.93759842, "epoch": 3.8424985785070263, "grad_norm": 8.301324844360352, "learning_rate": 4.3324013373270994e-08, "loss": 0.36683474, "memory(GiB)": 34.88, "step": 141915, "train_speed(iter/s)": 0.410684 }, { "acc": 0.95757027, "epoch": 3.842633958790242, "grad_norm": 4.50015926361084, "learning_rate": 4.325138807859645e-08, "loss": 0.24074318, "memory(GiB)": 34.88, "step": 141920, "train_speed(iter/s)": 0.410685 }, { "acc": 0.95068731, "epoch": 3.8427693390734574, "grad_norm": 7.290815830230713, "learning_rate": 4.3178824883202124e-08, "loss": 0.31777158, "memory(GiB)": 34.88, "step": 141925, "train_speed(iter/s)": 0.410685 }, { "acc": 0.93506107, "epoch": 3.8429047193566728, "grad_norm": 9.251753807067871, "learning_rate": 4.310632378799661e-08, "loss": 0.41704912, "memory(GiB)": 34.88, "step": 141930, "train_speed(iter/s)": 0.410686 }, { "acc": 0.94500809, "epoch": 3.8430400996398886, "grad_norm": 2.8889784812927246, "learning_rate": 4.3033884793888e-08, "loss": 0.34159305, "memory(GiB)": 34.88, "step": 141935, "train_speed(iter/s)": 0.410687 }, { "acc": 0.95085764, "epoch": 3.843175479923104, "grad_norm": 8.362885475158691, "learning_rate": 4.2961507901783254e-08, "loss": 0.33816645, "memory(GiB)": 34.88, "step": 141940, "train_speed(iter/s)": 0.410688 }, { "acc": 0.94557419, "epoch": 3.8433108602063193, "grad_norm": 7.213193893432617, "learning_rate": 4.288919311259044e-08, "loss": 0.32515373, "memory(GiB)": 34.88, "step": 141945, "train_speed(iter/s)": 0.410688 }, { "acc": 0.9592926, "epoch": 3.843446240489535, "grad_norm": 5.244096755981445, "learning_rate": 4.281694042721486e-08, "loss": 0.26839969, "memory(GiB)": 34.88, "step": 141950, "train_speed(iter/s)": 0.410689 }, { "acc": 0.9536665, "epoch": 3.843581620772751, "grad_norm": 3.7463152408599854, "learning_rate": 4.2744749846560686e-08, "loss": 0.31699684, "memory(GiB)": 34.88, "step": 141955, "train_speed(iter/s)": 0.41069 }, { "acc": 0.96109886, "epoch": 3.8437170010559663, "grad_norm": 5.320306301116943, "learning_rate": 4.267262137153377e-08, "loss": 0.25575399, "memory(GiB)": 34.88, "step": 141960, "train_speed(iter/s)": 0.410691 }, { "acc": 0.95139465, "epoch": 3.8438523813391816, "grad_norm": 11.014052391052246, "learning_rate": 4.26005550030361e-08, "loss": 0.30071695, "memory(GiB)": 34.88, "step": 141965, "train_speed(iter/s)": 0.410692 }, { "acc": 0.95508041, "epoch": 3.8439877616223974, "grad_norm": 4.955997943878174, "learning_rate": 4.252855074197185e-08, "loss": 0.2661994, "memory(GiB)": 34.88, "step": 141970, "train_speed(iter/s)": 0.410693 }, { "acc": 0.95619259, "epoch": 3.8441231419056128, "grad_norm": 7.478403568267822, "learning_rate": 4.2456608589241866e-08, "loss": 0.25306218, "memory(GiB)": 34.88, "step": 141975, "train_speed(iter/s)": 0.410693 }, { "acc": 0.94146242, "epoch": 3.8442585221888286, "grad_norm": 5.8810577392578125, "learning_rate": 4.238472854574813e-08, "loss": 0.38315611, "memory(GiB)": 34.88, "step": 141980, "train_speed(iter/s)": 0.410694 }, { "acc": 0.93704357, "epoch": 3.844393902472044, "grad_norm": 11.831148147583008, "learning_rate": 4.231291061239093e-08, "loss": 0.45590591, "memory(GiB)": 34.88, "step": 141985, "train_speed(iter/s)": 0.410695 }, { "acc": 0.95333443, "epoch": 3.8445292827552597, "grad_norm": 22.550209045410156, "learning_rate": 4.2241154790070025e-08, "loss": 0.28000944, "memory(GiB)": 34.88, "step": 141990, "train_speed(iter/s)": 0.410696 }, { "acc": 0.93644104, "epoch": 3.844664663038475, "grad_norm": 19.83323860168457, "learning_rate": 4.216946107968404e-08, "loss": 0.44595814, "memory(GiB)": 34.88, "step": 141995, "train_speed(iter/s)": 0.410697 }, { "acc": 0.95228233, "epoch": 3.8448000433216905, "grad_norm": 5.096916198730469, "learning_rate": 4.20978294821305e-08, "loss": 0.2906975, "memory(GiB)": 34.88, "step": 142000, "train_speed(iter/s)": 0.410697 }, { "acc": 0.95239601, "epoch": 3.8449354236049063, "grad_norm": 4.163034915924072, "learning_rate": 4.2026259998308044e-08, "loss": 0.25375111, "memory(GiB)": 34.88, "step": 142005, "train_speed(iter/s)": 0.410698 }, { "acc": 0.950177, "epoch": 3.8450708038881216, "grad_norm": 6.653735160827637, "learning_rate": 4.195475262911253e-08, "loss": 0.33075902, "memory(GiB)": 34.88, "step": 142010, "train_speed(iter/s)": 0.410699 }, { "acc": 0.93767939, "epoch": 3.8452061841713374, "grad_norm": 15.856712341308594, "learning_rate": 4.1883307375439824e-08, "loss": 0.40183907, "memory(GiB)": 34.88, "step": 142015, "train_speed(iter/s)": 0.4107 }, { "acc": 0.95552292, "epoch": 3.8453415644545528, "grad_norm": 3.2919392585754395, "learning_rate": 4.181192423818467e-08, "loss": 0.26127915, "memory(GiB)": 34.88, "step": 142020, "train_speed(iter/s)": 0.4107 }, { "acc": 0.95180283, "epoch": 3.8454769447377686, "grad_norm": 13.618666648864746, "learning_rate": 4.174060321824127e-08, "loss": 0.27508349, "memory(GiB)": 34.88, "step": 142025, "train_speed(iter/s)": 0.410701 }, { "acc": 0.93972702, "epoch": 3.845612325020984, "grad_norm": 6.128233909606934, "learning_rate": 4.166934431650327e-08, "loss": 0.35786624, "memory(GiB)": 34.88, "step": 142030, "train_speed(iter/s)": 0.410702 }, { "acc": 0.96545486, "epoch": 3.8457477053041993, "grad_norm": 3.923596143722534, "learning_rate": 4.1598147533863734e-08, "loss": 0.20956423, "memory(GiB)": 34.88, "step": 142035, "train_speed(iter/s)": 0.410703 }, { "acc": 0.93645077, "epoch": 3.845883085587415, "grad_norm": 6.451686859130859, "learning_rate": 4.152701287121353e-08, "loss": 0.36695364, "memory(GiB)": 34.88, "step": 142040, "train_speed(iter/s)": 0.410704 }, { "acc": 0.95087776, "epoch": 3.8460184658706305, "grad_norm": 11.135098457336426, "learning_rate": 4.145594032944466e-08, "loss": 0.33485456, "memory(GiB)": 34.88, "step": 142045, "train_speed(iter/s)": 0.410704 }, { "acc": 0.94012566, "epoch": 3.8461538461538463, "grad_norm": 4.721510410308838, "learning_rate": 4.13849299094474e-08, "loss": 0.41234674, "memory(GiB)": 34.88, "step": 142050, "train_speed(iter/s)": 0.410705 }, { "acc": 0.96044903, "epoch": 3.8462892264370616, "grad_norm": 11.90928840637207, "learning_rate": 4.131398161211097e-08, "loss": 0.27374396, "memory(GiB)": 34.88, "step": 142055, "train_speed(iter/s)": 0.410706 }, { "acc": 0.94367437, "epoch": 3.8464246067202774, "grad_norm": 5.805508136749268, "learning_rate": 4.124309543832402e-08, "loss": 0.30347857, "memory(GiB)": 34.88, "step": 142060, "train_speed(iter/s)": 0.410707 }, { "acc": 0.97201843, "epoch": 3.8465599870034928, "grad_norm": 5.414918899536133, "learning_rate": 4.117227138897462e-08, "loss": 0.1628716, "memory(GiB)": 34.88, "step": 142065, "train_speed(iter/s)": 0.410707 }, { "acc": 0.93010979, "epoch": 3.846695367286708, "grad_norm": 3.6642277240753174, "learning_rate": 4.110150946495031e-08, "loss": 0.46081882, "memory(GiB)": 34.88, "step": 142070, "train_speed(iter/s)": 0.410708 }, { "acc": 0.96198006, "epoch": 3.846830747569924, "grad_norm": 4.631441116333008, "learning_rate": 4.103080966713752e-08, "loss": 0.30041444, "memory(GiB)": 34.88, "step": 142075, "train_speed(iter/s)": 0.410709 }, { "acc": 0.95590096, "epoch": 3.8469661278531397, "grad_norm": 2.9948036670684814, "learning_rate": 4.0960171996421014e-08, "loss": 0.30954552, "memory(GiB)": 34.88, "step": 142080, "train_speed(iter/s)": 0.41071 }, { "acc": 0.96167755, "epoch": 3.847101508136355, "grad_norm": 8.741917610168457, "learning_rate": 4.088959645368721e-08, "loss": 0.21185799, "memory(GiB)": 34.88, "step": 142085, "train_speed(iter/s)": 0.410711 }, { "acc": 0.95694857, "epoch": 3.8472368884195705, "grad_norm": 7.47243070602417, "learning_rate": 4.081908303981864e-08, "loss": 0.21299665, "memory(GiB)": 34.88, "step": 142090, "train_speed(iter/s)": 0.410711 }, { "acc": 0.9507493, "epoch": 3.8473722687027863, "grad_norm": 7.134352684020996, "learning_rate": 4.0748631755699517e-08, "loss": 0.30374687, "memory(GiB)": 34.88, "step": 142095, "train_speed(iter/s)": 0.410712 }, { "acc": 0.95013237, "epoch": 3.8475076489860016, "grad_norm": 9.327139854431152, "learning_rate": 4.067824260221183e-08, "loss": 0.34972916, "memory(GiB)": 34.88, "step": 142100, "train_speed(iter/s)": 0.410713 }, { "acc": 0.95373487, "epoch": 3.847643029269217, "grad_norm": 4.475837707519531, "learning_rate": 4.06079155802381e-08, "loss": 0.31746078, "memory(GiB)": 34.88, "step": 142105, "train_speed(iter/s)": 0.410714 }, { "acc": 0.9475563, "epoch": 3.8477784095524328, "grad_norm": 6.728930473327637, "learning_rate": 4.053765069065867e-08, "loss": 0.33630509, "memory(GiB)": 34.88, "step": 142110, "train_speed(iter/s)": 0.410715 }, { "acc": 0.959828, "epoch": 3.8479137898356486, "grad_norm": 5.673083305358887, "learning_rate": 4.046744793435439e-08, "loss": 0.21518953, "memory(GiB)": 34.88, "step": 142115, "train_speed(iter/s)": 0.410715 }, { "acc": 0.95817585, "epoch": 3.848049170118864, "grad_norm": 2.5790135860443115, "learning_rate": 4.0397307312203926e-08, "loss": 0.29289148, "memory(GiB)": 34.88, "step": 142120, "train_speed(iter/s)": 0.410716 }, { "acc": 0.94985209, "epoch": 3.8481845504020793, "grad_norm": 3.630786895751953, "learning_rate": 4.032722882508649e-08, "loss": 0.27794492, "memory(GiB)": 34.88, "step": 142125, "train_speed(iter/s)": 0.410717 }, { "acc": 0.94602156, "epoch": 3.848319930685295, "grad_norm": 9.759378433227539, "learning_rate": 4.0257212473879626e-08, "loss": 0.34431939, "memory(GiB)": 34.88, "step": 142130, "train_speed(iter/s)": 0.410718 }, { "acc": 0.93808079, "epoch": 3.8484553109685105, "grad_norm": 4.615995407104492, "learning_rate": 4.0187258259460304e-08, "loss": 0.33584156, "memory(GiB)": 34.88, "step": 142135, "train_speed(iter/s)": 0.410719 }, { "acc": 0.93257179, "epoch": 3.8485906912517263, "grad_norm": 10.114691734313965, "learning_rate": 4.0117366182705535e-08, "loss": 0.34209352, "memory(GiB)": 34.88, "step": 142140, "train_speed(iter/s)": 0.41072 }, { "acc": 0.94466724, "epoch": 3.8487260715349416, "grad_norm": 8.368897438049316, "learning_rate": 4.0047536244490065e-08, "loss": 0.39206717, "memory(GiB)": 34.88, "step": 142145, "train_speed(iter/s)": 0.41072 }, { "acc": 0.95119133, "epoch": 3.8488614518181574, "grad_norm": 5.336561679840088, "learning_rate": 3.997776844568869e-08, "loss": 0.34345202, "memory(GiB)": 34.88, "step": 142150, "train_speed(iter/s)": 0.410721 }, { "acc": 0.96348648, "epoch": 3.848996832101373, "grad_norm": 5.690680503845215, "learning_rate": 3.9908062787176155e-08, "loss": 0.22318742, "memory(GiB)": 34.88, "step": 142155, "train_speed(iter/s)": 0.410722 }, { "acc": 0.93151779, "epoch": 3.849132212384588, "grad_norm": 8.10462474822998, "learning_rate": 3.9838419269825026e-08, "loss": 0.41796808, "memory(GiB)": 34.88, "step": 142160, "train_speed(iter/s)": 0.410723 }, { "acc": 0.966572, "epoch": 3.849267592667804, "grad_norm": 1.5182392597198486, "learning_rate": 3.9768837894507835e-08, "loss": 0.18282926, "memory(GiB)": 34.88, "step": 142165, "train_speed(iter/s)": 0.410724 }, { "acc": 0.94662342, "epoch": 3.8494029729510193, "grad_norm": 3.017885208129883, "learning_rate": 3.969931866209602e-08, "loss": 0.36941905, "memory(GiB)": 34.88, "step": 142170, "train_speed(iter/s)": 0.410724 }, { "acc": 0.96309166, "epoch": 3.849538353234235, "grad_norm": 5.924679756164551, "learning_rate": 3.9629861573461035e-08, "loss": 0.21709132, "memory(GiB)": 34.88, "step": 142175, "train_speed(iter/s)": 0.410725 }, { "acc": 0.95599947, "epoch": 3.8496737335174505, "grad_norm": 9.741782188415527, "learning_rate": 3.9560466629472635e-08, "loss": 0.29046879, "memory(GiB)": 34.88, "step": 142180, "train_speed(iter/s)": 0.410726 }, { "acc": 0.94226646, "epoch": 3.8498091138006663, "grad_norm": 8.293316841125488, "learning_rate": 3.9491133831000055e-08, "loss": 0.36345177, "memory(GiB)": 34.88, "step": 142185, "train_speed(iter/s)": 0.410727 }, { "acc": 0.95317497, "epoch": 3.8499444940838816, "grad_norm": 5.589733600616455, "learning_rate": 3.942186317891195e-08, "loss": 0.28994908, "memory(GiB)": 34.88, "step": 142190, "train_speed(iter/s)": 0.410727 }, { "acc": 0.93989277, "epoch": 3.850079874367097, "grad_norm": 8.584625244140625, "learning_rate": 3.9352654674075864e-08, "loss": 0.38983135, "memory(GiB)": 34.88, "step": 142195, "train_speed(iter/s)": 0.410728 }, { "acc": 0.95166225, "epoch": 3.850215254650313, "grad_norm": 2.315416097640991, "learning_rate": 3.9283508317358805e-08, "loss": 0.26908998, "memory(GiB)": 34.88, "step": 142200, "train_speed(iter/s)": 0.410729 }, { "acc": 0.94561701, "epoch": 3.850350634933528, "grad_norm": 3.5326998233795166, "learning_rate": 3.921442410962721e-08, "loss": 0.29093118, "memory(GiB)": 34.88, "step": 142205, "train_speed(iter/s)": 0.41073 }, { "acc": 0.95173244, "epoch": 3.850486015216744, "grad_norm": 5.437047958374023, "learning_rate": 3.914540205174697e-08, "loss": 0.32219169, "memory(GiB)": 34.88, "step": 142210, "train_speed(iter/s)": 0.410731 }, { "acc": 0.94782171, "epoch": 3.8506213954999593, "grad_norm": 12.307018280029297, "learning_rate": 3.907644214458119e-08, "loss": 0.28155286, "memory(GiB)": 34.88, "step": 142215, "train_speed(iter/s)": 0.410731 }, { "acc": 0.93228617, "epoch": 3.850756775783175, "grad_norm": 5.876535892486572, "learning_rate": 3.900754438899522e-08, "loss": 0.41732731, "memory(GiB)": 34.88, "step": 142220, "train_speed(iter/s)": 0.410732 }, { "acc": 0.96055737, "epoch": 3.8508921560663905, "grad_norm": 11.013399124145508, "learning_rate": 3.893870878585104e-08, "loss": 0.26727304, "memory(GiB)": 34.88, "step": 142225, "train_speed(iter/s)": 0.410733 }, { "acc": 0.95843048, "epoch": 3.851027536349606, "grad_norm": 6.585314750671387, "learning_rate": 3.8869935336012325e-08, "loss": 0.25727131, "memory(GiB)": 34.88, "step": 142230, "train_speed(iter/s)": 0.410734 }, { "acc": 0.95094357, "epoch": 3.8511629166328216, "grad_norm": 2.4469401836395264, "learning_rate": 3.880122404033886e-08, "loss": 0.28627896, "memory(GiB)": 34.88, "step": 142235, "train_speed(iter/s)": 0.410735 }, { "acc": 0.96172476, "epoch": 3.8512982969160374, "grad_norm": 5.243625640869141, "learning_rate": 3.873257489969265e-08, "loss": 0.20656543, "memory(GiB)": 34.88, "step": 142240, "train_speed(iter/s)": 0.410736 }, { "acc": 0.951299, "epoch": 3.851433677199253, "grad_norm": 7.041720390319824, "learning_rate": 3.866398791493347e-08, "loss": 0.38991995, "memory(GiB)": 34.88, "step": 142245, "train_speed(iter/s)": 0.410736 }, { "acc": 0.93769608, "epoch": 3.851569057482468, "grad_norm": 6.137423038482666, "learning_rate": 3.8595463086919994e-08, "loss": 0.36808691, "memory(GiB)": 34.88, "step": 142250, "train_speed(iter/s)": 0.410737 }, { "acc": 0.95168228, "epoch": 3.851704437765684, "grad_norm": 6.562366962432861, "learning_rate": 3.852700041651145e-08, "loss": 0.24237165, "memory(GiB)": 34.88, "step": 142255, "train_speed(iter/s)": 0.410738 }, { "acc": 0.95712318, "epoch": 3.8518398180488993, "grad_norm": 2.768144369125366, "learning_rate": 3.845859990456429e-08, "loss": 0.19799752, "memory(GiB)": 34.88, "step": 142260, "train_speed(iter/s)": 0.410739 }, { "acc": 0.96498327, "epoch": 3.8519751983321147, "grad_norm": 2.4040560722351074, "learning_rate": 3.839026155193662e-08, "loss": 0.23240259, "memory(GiB)": 34.88, "step": 142265, "train_speed(iter/s)": 0.41074 }, { "acc": 0.96316528, "epoch": 3.8521105786153305, "grad_norm": 11.32825756072998, "learning_rate": 3.8321985359483796e-08, "loss": 0.23914661, "memory(GiB)": 34.88, "step": 142270, "train_speed(iter/s)": 0.41074 }, { "acc": 0.93724575, "epoch": 3.8522459588985463, "grad_norm": 8.991975784301758, "learning_rate": 3.825377132806116e-08, "loss": 0.42385826, "memory(GiB)": 34.88, "step": 142275, "train_speed(iter/s)": 0.410741 }, { "acc": 0.95082111, "epoch": 3.8523813391817616, "grad_norm": 5.270808219909668, "learning_rate": 3.818561945852348e-08, "loss": 0.31683753, "memory(GiB)": 34.88, "step": 142280, "train_speed(iter/s)": 0.410742 }, { "acc": 0.96302242, "epoch": 3.852516719464977, "grad_norm": 4.19677734375, "learning_rate": 3.8117529751724446e-08, "loss": 0.25770867, "memory(GiB)": 34.88, "step": 142285, "train_speed(iter/s)": 0.410743 }, { "acc": 0.931847, "epoch": 3.852652099748193, "grad_norm": 14.26453685760498, "learning_rate": 3.804950220851662e-08, "loss": 0.41108742, "memory(GiB)": 34.88, "step": 142290, "train_speed(iter/s)": 0.410744 }, { "acc": 0.9517374, "epoch": 3.852787480031408, "grad_norm": 6.992935657501221, "learning_rate": 3.7981536829752575e-08, "loss": 0.32386842, "memory(GiB)": 34.88, "step": 142295, "train_speed(iter/s)": 0.410744 }, { "acc": 0.95335245, "epoch": 3.852922860314624, "grad_norm": 3.7626466751098633, "learning_rate": 3.7913633616283765e-08, "loss": 0.26417308, "memory(GiB)": 34.88, "step": 142300, "train_speed(iter/s)": 0.410745 }, { "acc": 0.95397148, "epoch": 3.8530582405978393, "grad_norm": 5.248626232147217, "learning_rate": 3.784579256896052e-08, "loss": 0.2391634, "memory(GiB)": 34.88, "step": 142305, "train_speed(iter/s)": 0.410746 }, { "acc": 0.95830841, "epoch": 3.853193620881055, "grad_norm": 6.837652206420898, "learning_rate": 3.7778013688632654e-08, "loss": 0.26833916, "memory(GiB)": 34.88, "step": 142310, "train_speed(iter/s)": 0.410747 }, { "acc": 0.94620438, "epoch": 3.8533290011642705, "grad_norm": 4.349966526031494, "learning_rate": 3.7710296976149945e-08, "loss": 0.30608349, "memory(GiB)": 34.88, "step": 142315, "train_speed(iter/s)": 0.410748 }, { "acc": 0.95642567, "epoch": 3.853464381447486, "grad_norm": 5.071239948272705, "learning_rate": 3.7642642432359954e-08, "loss": 0.25498278, "memory(GiB)": 34.88, "step": 142320, "train_speed(iter/s)": 0.410749 }, { "acc": 0.95310783, "epoch": 3.8535997617307016, "grad_norm": 8.395157814025879, "learning_rate": 3.757505005810972e-08, "loss": 0.27562532, "memory(GiB)": 34.88, "step": 142325, "train_speed(iter/s)": 0.410749 }, { "acc": 0.94884005, "epoch": 3.853735142013917, "grad_norm": 8.077445983886719, "learning_rate": 3.7507519854247355e-08, "loss": 0.28346028, "memory(GiB)": 34.88, "step": 142330, "train_speed(iter/s)": 0.41075 }, { "acc": 0.95419159, "epoch": 3.853870522297133, "grad_norm": 2.6948466300964355, "learning_rate": 3.7440051821617654e-08, "loss": 0.28826945, "memory(GiB)": 34.88, "step": 142335, "train_speed(iter/s)": 0.410751 }, { "acc": 0.9468586, "epoch": 3.854005902580348, "grad_norm": 5.143019676208496, "learning_rate": 3.737264596106597e-08, "loss": 0.32065821, "memory(GiB)": 34.88, "step": 142340, "train_speed(iter/s)": 0.410752 }, { "acc": 0.96016598, "epoch": 3.854141282863564, "grad_norm": 7.70639181137085, "learning_rate": 3.7305302273437647e-08, "loss": 0.26103599, "memory(GiB)": 34.88, "step": 142345, "train_speed(iter/s)": 0.410753 }, { "acc": 0.94644661, "epoch": 3.8542766631467793, "grad_norm": 5.594942569732666, "learning_rate": 3.7238020759575266e-08, "loss": 0.32554827, "memory(GiB)": 34.88, "step": 142350, "train_speed(iter/s)": 0.410753 }, { "acc": 0.93769951, "epoch": 3.8544120434299947, "grad_norm": 5.81949520111084, "learning_rate": 3.717080142032196e-08, "loss": 0.40470285, "memory(GiB)": 34.88, "step": 142355, "train_speed(iter/s)": 0.410754 }, { "acc": 0.95463943, "epoch": 3.8545474237132105, "grad_norm": 8.79634952545166, "learning_rate": 3.7103644256519744e-08, "loss": 0.29835794, "memory(GiB)": 34.88, "step": 142360, "train_speed(iter/s)": 0.410755 }, { "acc": 0.95611706, "epoch": 3.854682803996426, "grad_norm": 7.50833797454834, "learning_rate": 3.703654926901008e-08, "loss": 0.26049519, "memory(GiB)": 34.88, "step": 142365, "train_speed(iter/s)": 0.410756 }, { "acc": 0.93792953, "epoch": 3.8548181842796416, "grad_norm": 4.6823344230651855, "learning_rate": 3.696951645863334e-08, "loss": 0.38383405, "memory(GiB)": 34.88, "step": 142370, "train_speed(iter/s)": 0.410757 }, { "acc": 0.94766445, "epoch": 3.854953564562857, "grad_norm": 4.593275547027588, "learning_rate": 3.6902545826228746e-08, "loss": 0.32284846, "memory(GiB)": 34.88, "step": 142375, "train_speed(iter/s)": 0.410757 }, { "acc": 0.95086212, "epoch": 3.855088944846073, "grad_norm": 3.883647918701172, "learning_rate": 3.6835637372636104e-08, "loss": 0.31769924, "memory(GiB)": 34.88, "step": 142380, "train_speed(iter/s)": 0.410758 }, { "acc": 0.94618797, "epoch": 3.855224325129288, "grad_norm": 4.209205627441406, "learning_rate": 3.6768791098692996e-08, "loss": 0.30642552, "memory(GiB)": 34.88, "step": 142385, "train_speed(iter/s)": 0.410759 }, { "acc": 0.96507483, "epoch": 3.8553597054125035, "grad_norm": 8.244595527648926, "learning_rate": 3.670200700523756e-08, "loss": 0.17884725, "memory(GiB)": 34.88, "step": 142390, "train_speed(iter/s)": 0.41076 }, { "acc": 0.94794159, "epoch": 3.8554950856957193, "grad_norm": 7.214570999145508, "learning_rate": 3.66352850931046e-08, "loss": 0.32128885, "memory(GiB)": 34.88, "step": 142395, "train_speed(iter/s)": 0.41076 }, { "acc": 0.9408268, "epoch": 3.8556304659789347, "grad_norm": 5.155523300170898, "learning_rate": 3.656862536313225e-08, "loss": 0.34560323, "memory(GiB)": 34.88, "step": 142400, "train_speed(iter/s)": 0.410761 }, { "acc": 0.94836845, "epoch": 3.8557658462621505, "grad_norm": 10.981475830078125, "learning_rate": 3.65020278161542e-08, "loss": 0.30442109, "memory(GiB)": 34.88, "step": 142405, "train_speed(iter/s)": 0.410762 }, { "acc": 0.9514636, "epoch": 3.855901226545366, "grad_norm": 20.683589935302734, "learning_rate": 3.643549245300526e-08, "loss": 0.28656714, "memory(GiB)": 34.88, "step": 142410, "train_speed(iter/s)": 0.410763 }, { "acc": 0.9381052, "epoch": 3.8560366068285816, "grad_norm": 5.252739429473877, "learning_rate": 3.6369019274518556e-08, "loss": 0.40790992, "memory(GiB)": 34.88, "step": 142415, "train_speed(iter/s)": 0.410764 }, { "acc": 0.95940285, "epoch": 3.856171987111797, "grad_norm": 3.599478006362915, "learning_rate": 3.6302608281526135e-08, "loss": 0.23562651, "memory(GiB)": 34.88, "step": 142420, "train_speed(iter/s)": 0.410764 }, { "acc": 0.93731632, "epoch": 3.8563073673950123, "grad_norm": 9.051593780517578, "learning_rate": 3.623625947486168e-08, "loss": 0.36928301, "memory(GiB)": 34.88, "step": 142425, "train_speed(iter/s)": 0.410765 }, { "acc": 0.9393425, "epoch": 3.856442747678228, "grad_norm": 6.737217903137207, "learning_rate": 3.616997285535445e-08, "loss": 0.35928895, "memory(GiB)": 34.88, "step": 142430, "train_speed(iter/s)": 0.410766 }, { "acc": 0.93447056, "epoch": 3.856578127961444, "grad_norm": 7.719276428222656, "learning_rate": 3.610374842383646e-08, "loss": 0.38584707, "memory(GiB)": 34.88, "step": 142435, "train_speed(iter/s)": 0.410767 }, { "acc": 0.9372385, "epoch": 3.8567135082446593, "grad_norm": 7.1100945472717285, "learning_rate": 3.6037586181135873e-08, "loss": 0.36009288, "memory(GiB)": 34.88, "step": 142440, "train_speed(iter/s)": 0.410768 }, { "acc": 0.94117727, "epoch": 3.8568488885278747, "grad_norm": 8.498638153076172, "learning_rate": 3.597148612808249e-08, "loss": 0.35400882, "memory(GiB)": 34.88, "step": 142445, "train_speed(iter/s)": 0.410768 }, { "acc": 0.95382481, "epoch": 3.8569842688110905, "grad_norm": 4.228249549865723, "learning_rate": 3.59054482655039e-08, "loss": 0.27340908, "memory(GiB)": 34.88, "step": 142450, "train_speed(iter/s)": 0.410769 }, { "acc": 0.95034943, "epoch": 3.857119649094306, "grad_norm": 10.813346862792969, "learning_rate": 3.583947259422824e-08, "loss": 0.34187665, "memory(GiB)": 34.88, "step": 142455, "train_speed(iter/s)": 0.41077 }, { "acc": 0.9529129, "epoch": 3.857255029377521, "grad_norm": 4.849495887756348, "learning_rate": 3.577355911508089e-08, "loss": 0.2710479, "memory(GiB)": 34.88, "step": 142460, "train_speed(iter/s)": 0.410771 }, { "acc": 0.94567633, "epoch": 3.857390409660737, "grad_norm": 24.41642951965332, "learning_rate": 3.5707707828887206e-08, "loss": 0.28806112, "memory(GiB)": 34.88, "step": 142465, "train_speed(iter/s)": 0.410772 }, { "acc": 0.95485201, "epoch": 3.857525789943953, "grad_norm": 8.614691734313965, "learning_rate": 3.564191873647312e-08, "loss": 0.28050485, "memory(GiB)": 34.88, "step": 142470, "train_speed(iter/s)": 0.410772 }, { "acc": 0.94671602, "epoch": 3.857661170227168, "grad_norm": 4.239534378051758, "learning_rate": 3.5576191838662334e-08, "loss": 0.25728254, "memory(GiB)": 34.88, "step": 142475, "train_speed(iter/s)": 0.410773 }, { "acc": 0.94293976, "epoch": 3.8577965505103835, "grad_norm": 4.808462619781494, "learning_rate": 3.5510527136278546e-08, "loss": 0.2956789, "memory(GiB)": 34.88, "step": 142480, "train_speed(iter/s)": 0.410774 }, { "acc": 0.94563656, "epoch": 3.8579319307935993, "grad_norm": 5.532456874847412, "learning_rate": 3.54449246301438e-08, "loss": 0.27968526, "memory(GiB)": 34.88, "step": 142485, "train_speed(iter/s)": 0.410775 }, { "acc": 0.9374218, "epoch": 3.8580673110768147, "grad_norm": 12.680087089538574, "learning_rate": 3.5379384321080695e-08, "loss": 0.41111755, "memory(GiB)": 34.88, "step": 142490, "train_speed(iter/s)": 0.410775 }, { "acc": 0.96037836, "epoch": 3.8582026913600305, "grad_norm": 6.688870906829834, "learning_rate": 3.5313906209909585e-08, "loss": 0.31348028, "memory(GiB)": 34.88, "step": 142495, "train_speed(iter/s)": 0.410776 }, { "acc": 0.95781889, "epoch": 3.858338071643246, "grad_norm": 7.597288608551025, "learning_rate": 3.52484902974503e-08, "loss": 0.24820304, "memory(GiB)": 34.88, "step": 142500, "train_speed(iter/s)": 0.410777 }, { "acc": 0.94661388, "epoch": 3.8584734519264616, "grad_norm": 4.7638092041015625, "learning_rate": 3.518313658452376e-08, "loss": 0.4105473, "memory(GiB)": 34.88, "step": 142505, "train_speed(iter/s)": 0.410778 }, { "acc": 0.94365273, "epoch": 3.858608832209677, "grad_norm": 9.235472679138184, "learning_rate": 3.511784507194702e-08, "loss": 0.39475241, "memory(GiB)": 34.88, "step": 142510, "train_speed(iter/s)": 0.410778 }, { "acc": 0.93394899, "epoch": 3.8587442124928923, "grad_norm": 10.959502220153809, "learning_rate": 3.505261576053934e-08, "loss": 0.43230791, "memory(GiB)": 34.88, "step": 142515, "train_speed(iter/s)": 0.410779 }, { "acc": 0.9587245, "epoch": 3.858879592776108, "grad_norm": 5.621120452880859, "learning_rate": 3.498744865111664e-08, "loss": 0.29727788, "memory(GiB)": 34.88, "step": 142520, "train_speed(iter/s)": 0.41078 }, { "acc": 0.94330711, "epoch": 3.8590149730593235, "grad_norm": 6.112793445587158, "learning_rate": 3.492234374449653e-08, "loss": 0.3426887, "memory(GiB)": 34.88, "step": 142525, "train_speed(iter/s)": 0.410781 }, { "acc": 0.9448782, "epoch": 3.8591503533425393, "grad_norm": 5.090467929840088, "learning_rate": 3.485730104149327e-08, "loss": 0.35316727, "memory(GiB)": 34.88, "step": 142530, "train_speed(iter/s)": 0.410782 }, { "acc": 0.94535065, "epoch": 3.8592857336257547, "grad_norm": 3.764873743057251, "learning_rate": 3.4792320542922246e-08, "loss": 0.31629057, "memory(GiB)": 34.88, "step": 142535, "train_speed(iter/s)": 0.410782 }, { "acc": 0.94492321, "epoch": 3.8594211139089705, "grad_norm": 8.342449188232422, "learning_rate": 3.472740224959827e-08, "loss": 0.3382498, "memory(GiB)": 34.88, "step": 142540, "train_speed(iter/s)": 0.410783 }, { "acc": 0.9617856, "epoch": 3.859556494192186, "grad_norm": 3.5948679447174072, "learning_rate": 3.466254616233284e-08, "loss": 0.2259115, "memory(GiB)": 34.88, "step": 142545, "train_speed(iter/s)": 0.410784 }, { "acc": 0.94413433, "epoch": 3.859691874475401, "grad_norm": 8.343116760253906, "learning_rate": 3.4597752281940214e-08, "loss": 0.41524711, "memory(GiB)": 34.88, "step": 142550, "train_speed(iter/s)": 0.410785 }, { "acc": 0.951478, "epoch": 3.859827254758617, "grad_norm": 4.9039998054504395, "learning_rate": 3.453302060923079e-08, "loss": 0.32827606, "memory(GiB)": 34.88, "step": 142555, "train_speed(iter/s)": 0.410785 }, { "acc": 0.95183964, "epoch": 3.8599626350418323, "grad_norm": 5.516860008239746, "learning_rate": 3.446835114501605e-08, "loss": 0.2511734, "memory(GiB)": 34.88, "step": 142560, "train_speed(iter/s)": 0.410786 }, { "acc": 0.95452805, "epoch": 3.860098015325048, "grad_norm": 11.19328784942627, "learning_rate": 3.440374389010582e-08, "loss": 0.29062502, "memory(GiB)": 34.88, "step": 142565, "train_speed(iter/s)": 0.410787 }, { "acc": 0.94466352, "epoch": 3.8602333956082635, "grad_norm": 8.550036430358887, "learning_rate": 3.433919884530937e-08, "loss": 0.2746336, "memory(GiB)": 34.88, "step": 142570, "train_speed(iter/s)": 0.410788 }, { "acc": 0.94989195, "epoch": 3.8603687758914793, "grad_norm": 3.547455072402954, "learning_rate": 3.4274716011435424e-08, "loss": 0.30450296, "memory(GiB)": 34.88, "step": 142575, "train_speed(iter/s)": 0.410789 }, { "acc": 0.94791183, "epoch": 3.8605041561746947, "grad_norm": 5.78062629699707, "learning_rate": 3.421029538929214e-08, "loss": 0.35141025, "memory(GiB)": 34.88, "step": 142580, "train_speed(iter/s)": 0.410789 }, { "acc": 0.95763235, "epoch": 3.86063953645791, "grad_norm": 6.192610740661621, "learning_rate": 3.4145936979686565e-08, "loss": 0.26428008, "memory(GiB)": 34.88, "step": 142585, "train_speed(iter/s)": 0.41079 }, { "acc": 0.93568983, "epoch": 3.860774916741126, "grad_norm": 25.33830451965332, "learning_rate": 3.408164078342354e-08, "loss": 0.3958606, "memory(GiB)": 34.88, "step": 142590, "train_speed(iter/s)": 0.410791 }, { "acc": 0.94851379, "epoch": 3.8609102970243416, "grad_norm": 17.978715896606445, "learning_rate": 3.4017406801310105e-08, "loss": 0.32109718, "memory(GiB)": 34.88, "step": 142595, "train_speed(iter/s)": 0.410791 }, { "acc": 0.95604343, "epoch": 3.861045677307557, "grad_norm": 7.16514778137207, "learning_rate": 3.3953235034149985e-08, "loss": 0.28724904, "memory(GiB)": 34.88, "step": 142600, "train_speed(iter/s)": 0.410792 }, { "acc": 0.95559082, "epoch": 3.8611810575907723, "grad_norm": 4.845772743225098, "learning_rate": 3.3889125482747466e-08, "loss": 0.24489477, "memory(GiB)": 34.88, "step": 142605, "train_speed(iter/s)": 0.410793 }, { "acc": 0.94909391, "epoch": 3.861316437873988, "grad_norm": 3.3895857334136963, "learning_rate": 3.3825078147905146e-08, "loss": 0.37453313, "memory(GiB)": 34.88, "step": 142610, "train_speed(iter/s)": 0.410794 }, { "acc": 0.95066414, "epoch": 3.8614518181572035, "grad_norm": 8.03355884552002, "learning_rate": 3.37610930304262e-08, "loss": 0.3601011, "memory(GiB)": 34.88, "step": 142615, "train_speed(iter/s)": 0.410794 }, { "acc": 0.94580803, "epoch": 3.861587198440419, "grad_norm": 7.502189636230469, "learning_rate": 3.3697170131111563e-08, "loss": 0.36488323, "memory(GiB)": 34.88, "step": 142620, "train_speed(iter/s)": 0.410795 }, { "acc": 0.95848598, "epoch": 3.8617225787236347, "grad_norm": 4.159196376800537, "learning_rate": 3.363330945076219e-08, "loss": 0.28609767, "memory(GiB)": 34.88, "step": 142625, "train_speed(iter/s)": 0.410796 }, { "acc": 0.959974, "epoch": 3.8618579590068505, "grad_norm": 3.731199026107788, "learning_rate": 3.3569510990177914e-08, "loss": 0.23541656, "memory(GiB)": 34.88, "step": 142630, "train_speed(iter/s)": 0.410797 }, { "acc": 0.94089966, "epoch": 3.861993339290066, "grad_norm": 3.6529347896575928, "learning_rate": 3.350577475015802e-08, "loss": 0.33345807, "memory(GiB)": 34.88, "step": 142635, "train_speed(iter/s)": 0.410798 }, { "acc": 0.95589142, "epoch": 3.862128719573281, "grad_norm": 11.011698722839355, "learning_rate": 3.3442100731500666e-08, "loss": 0.26669645, "memory(GiB)": 34.88, "step": 142640, "train_speed(iter/s)": 0.410798 }, { "acc": 0.96066723, "epoch": 3.862264099856497, "grad_norm": 6.4268388748168945, "learning_rate": 3.3378488935004035e-08, "loss": 0.26146023, "memory(GiB)": 34.88, "step": 142645, "train_speed(iter/s)": 0.410799 }, { "acc": 0.94688473, "epoch": 3.8623994801397123, "grad_norm": 6.978339672088623, "learning_rate": 3.3314939361464635e-08, "loss": 0.28107729, "memory(GiB)": 34.88, "step": 142650, "train_speed(iter/s)": 0.4108 }, { "acc": 0.95073032, "epoch": 3.862534860422928, "grad_norm": 3.6271347999572754, "learning_rate": 3.32514520116784e-08, "loss": 0.29247601, "memory(GiB)": 34.88, "step": 142655, "train_speed(iter/s)": 0.410801 }, { "acc": 0.96223869, "epoch": 3.8626702407061435, "grad_norm": 7.8394975662231445, "learning_rate": 3.3188026886441295e-08, "loss": 0.24479342, "memory(GiB)": 34.88, "step": 142660, "train_speed(iter/s)": 0.410802 }, { "acc": 0.9391655, "epoch": 3.8628056209893593, "grad_norm": 17.35016441345215, "learning_rate": 3.3124663986547045e-08, "loss": 0.26531544, "memory(GiB)": 34.88, "step": 142665, "train_speed(iter/s)": 0.410802 }, { "acc": 0.93796663, "epoch": 3.8629410012725747, "grad_norm": 2.4018423557281494, "learning_rate": 3.3061363312789925e-08, "loss": 0.36106267, "memory(GiB)": 34.88, "step": 142670, "train_speed(iter/s)": 0.410803 }, { "acc": 0.96288643, "epoch": 3.86307638155579, "grad_norm": 12.66396713256836, "learning_rate": 3.2998124865962574e-08, "loss": 0.18690523, "memory(GiB)": 34.88, "step": 142675, "train_speed(iter/s)": 0.410804 }, { "acc": 0.94752178, "epoch": 3.863211761839006, "grad_norm": 4.64614725112915, "learning_rate": 3.29349486468576e-08, "loss": 0.32484055, "memory(GiB)": 34.88, "step": 142680, "train_speed(iter/s)": 0.410805 }, { "acc": 0.95059052, "epoch": 3.863347142122221, "grad_norm": 3.7786078453063965, "learning_rate": 3.2871834656265965e-08, "loss": 0.34300058, "memory(GiB)": 34.88, "step": 142685, "train_speed(iter/s)": 0.410805 }, { "acc": 0.93859386, "epoch": 3.863482522405437, "grad_norm": 7.428451061248779, "learning_rate": 3.280878289497862e-08, "loss": 0.31395431, "memory(GiB)": 34.88, "step": 142690, "train_speed(iter/s)": 0.410806 }, { "acc": 0.95192642, "epoch": 3.8636179026886524, "grad_norm": 6.681700706481934, "learning_rate": 3.2745793363784846e-08, "loss": 0.29752393, "memory(GiB)": 34.88, "step": 142695, "train_speed(iter/s)": 0.410807 }, { "acc": 0.94832249, "epoch": 3.863753282971868, "grad_norm": 11.27700424194336, "learning_rate": 3.2682866063474504e-08, "loss": 0.29773662, "memory(GiB)": 34.88, "step": 142700, "train_speed(iter/s)": 0.410808 }, { "acc": 0.9591424, "epoch": 3.8638886632550835, "grad_norm": 6.436357021331787, "learning_rate": 3.262000099483576e-08, "loss": 0.2106442, "memory(GiB)": 34.88, "step": 142705, "train_speed(iter/s)": 0.410809 }, { "acc": 0.96236286, "epoch": 3.864024043538299, "grad_norm": 4.62717342376709, "learning_rate": 3.255719815865514e-08, "loss": 0.21605256, "memory(GiB)": 34.88, "step": 142710, "train_speed(iter/s)": 0.410809 }, { "acc": 0.95490217, "epoch": 3.8641594238215147, "grad_norm": 3.2311408519744873, "learning_rate": 3.2494457555720817e-08, "loss": 0.23614724, "memory(GiB)": 34.88, "step": 142715, "train_speed(iter/s)": 0.41081 }, { "acc": 0.96152229, "epoch": 3.86429480410473, "grad_norm": 7.110428810119629, "learning_rate": 3.243177918681764e-08, "loss": 0.24551909, "memory(GiB)": 34.88, "step": 142720, "train_speed(iter/s)": 0.410811 }, { "acc": 0.95264931, "epoch": 3.864430184387946, "grad_norm": 4.275679588317871, "learning_rate": 3.236916305273101e-08, "loss": 0.30691657, "memory(GiB)": 34.88, "step": 142725, "train_speed(iter/s)": 0.410812 }, { "acc": 0.94868126, "epoch": 3.864565564671161, "grad_norm": 4.413687229156494, "learning_rate": 3.2306609154245776e-08, "loss": 0.30875793, "memory(GiB)": 34.88, "step": 142730, "train_speed(iter/s)": 0.410813 }, { "acc": 0.94606495, "epoch": 3.864700944954377, "grad_norm": 7.4173994064331055, "learning_rate": 3.224411749214513e-08, "loss": 0.35873265, "memory(GiB)": 34.88, "step": 142735, "train_speed(iter/s)": 0.410813 }, { "acc": 0.93869123, "epoch": 3.8648363252375924, "grad_norm": 8.826386451721191, "learning_rate": 3.218168806721225e-08, "loss": 0.40335665, "memory(GiB)": 34.88, "step": 142740, "train_speed(iter/s)": 0.410814 }, { "acc": 0.95453835, "epoch": 3.8649717055208077, "grad_norm": 3.911830186843872, "learning_rate": 3.2119320880228665e-08, "loss": 0.2386107, "memory(GiB)": 34.88, "step": 142745, "train_speed(iter/s)": 0.410815 }, { "acc": 0.93582201, "epoch": 3.8651070858040235, "grad_norm": 5.517884254455566, "learning_rate": 3.2057015931975875e-08, "loss": 0.38249657, "memory(GiB)": 34.88, "step": 142750, "train_speed(iter/s)": 0.410816 }, { "acc": 0.95428505, "epoch": 3.8652424660872393, "grad_norm": 4.401127338409424, "learning_rate": 3.1994773223234865e-08, "loss": 0.31704369, "memory(GiB)": 34.88, "step": 142755, "train_speed(iter/s)": 0.410816 }, { "acc": 0.96039429, "epoch": 3.8653778463704547, "grad_norm": 9.544715881347656, "learning_rate": 3.193259275478436e-08, "loss": 0.24634991, "memory(GiB)": 34.88, "step": 142760, "train_speed(iter/s)": 0.410817 }, { "acc": 0.94672527, "epoch": 3.86551322665367, "grad_norm": 3.937659978866577, "learning_rate": 3.187047452740422e-08, "loss": 0.38873703, "memory(GiB)": 34.88, "step": 142765, "train_speed(iter/s)": 0.410818 }, { "acc": 0.95223694, "epoch": 3.865648606936886, "grad_norm": 3.264765501022339, "learning_rate": 3.1808418541872086e-08, "loss": 0.28988166, "memory(GiB)": 34.88, "step": 142770, "train_speed(iter/s)": 0.410819 }, { "acc": 0.94733486, "epoch": 3.865783987220101, "grad_norm": 4.400981426239014, "learning_rate": 3.174642479896559e-08, "loss": 0.24421341, "memory(GiB)": 34.88, "step": 142775, "train_speed(iter/s)": 0.410819 }, { "acc": 0.95840292, "epoch": 3.8659193675033166, "grad_norm": 6.078117370605469, "learning_rate": 3.168449329946124e-08, "loss": 0.20638309, "memory(GiB)": 34.88, "step": 142780, "train_speed(iter/s)": 0.41082 }, { "acc": 0.945644, "epoch": 3.8660547477865324, "grad_norm": 6.253000259399414, "learning_rate": 3.1622624044134464e-08, "loss": 0.35999293, "memory(GiB)": 34.88, "step": 142785, "train_speed(iter/s)": 0.410821 }, { "acc": 0.95026512, "epoch": 3.866190128069748, "grad_norm": 2.432074546813965, "learning_rate": 3.156081703376123e-08, "loss": 0.32059255, "memory(GiB)": 34.88, "step": 142790, "train_speed(iter/s)": 0.410822 }, { "acc": 0.93712263, "epoch": 3.8663255083529635, "grad_norm": 11.752252578735352, "learning_rate": 3.149907226911472e-08, "loss": 0.43061948, "memory(GiB)": 34.88, "step": 142795, "train_speed(iter/s)": 0.410823 }, { "acc": 0.94055214, "epoch": 3.866460888636179, "grad_norm": 7.843044281005859, "learning_rate": 3.1437389750969254e-08, "loss": 0.38936183, "memory(GiB)": 34.88, "step": 142800, "train_speed(iter/s)": 0.410823 }, { "acc": 0.94802628, "epoch": 3.8665962689193947, "grad_norm": 9.188129425048828, "learning_rate": 3.13757694800969e-08, "loss": 0.30882745, "memory(GiB)": 34.88, "step": 142805, "train_speed(iter/s)": 0.410824 }, { "acc": 0.95044689, "epoch": 3.86673164920261, "grad_norm": 10.219772338867188, "learning_rate": 3.131421145726975e-08, "loss": 0.32298131, "memory(GiB)": 34.88, "step": 142810, "train_speed(iter/s)": 0.410825 }, { "acc": 0.95397024, "epoch": 3.866867029485826, "grad_norm": 5.825860023498535, "learning_rate": 3.1252715683259326e-08, "loss": 0.2738874, "memory(GiB)": 34.88, "step": 142815, "train_speed(iter/s)": 0.410826 }, { "acc": 0.94461384, "epoch": 3.867002409769041, "grad_norm": 3.0142898559570312, "learning_rate": 3.119128215883549e-08, "loss": 0.31700096, "memory(GiB)": 34.88, "step": 142820, "train_speed(iter/s)": 0.410826 }, { "acc": 0.94736824, "epoch": 3.867137790052257, "grad_norm": 8.045660018920898, "learning_rate": 3.112991088476811e-08, "loss": 0.26356535, "memory(GiB)": 34.88, "step": 142825, "train_speed(iter/s)": 0.410827 }, { "acc": 0.96392231, "epoch": 3.8672731703354724, "grad_norm": 6.855795383453369, "learning_rate": 3.1068601861825935e-08, "loss": 0.21657026, "memory(GiB)": 34.88, "step": 142830, "train_speed(iter/s)": 0.410828 }, { "acc": 0.934408, "epoch": 3.8674085506186877, "grad_norm": 12.48859977722168, "learning_rate": 3.100735509077661e-08, "loss": 0.39885333, "memory(GiB)": 34.88, "step": 142835, "train_speed(iter/s)": 0.410829 }, { "acc": 0.94528122, "epoch": 3.8675439309019035, "grad_norm": 6.362425804138184, "learning_rate": 3.094617057238778e-08, "loss": 0.28091183, "memory(GiB)": 34.88, "step": 142840, "train_speed(iter/s)": 0.410829 }, { "acc": 0.95650234, "epoch": 3.867679311185119, "grad_norm": 6.047194004058838, "learning_rate": 3.088504830742652e-08, "loss": 0.25386534, "memory(GiB)": 34.88, "step": 142845, "train_speed(iter/s)": 0.41083 }, { "acc": 0.94838905, "epoch": 3.8678146914683347, "grad_norm": 6.453434944152832, "learning_rate": 3.082398829665715e-08, "loss": 0.32110291, "memory(GiB)": 34.88, "step": 142850, "train_speed(iter/s)": 0.410831 }, { "acc": 0.9537529, "epoch": 3.86795007175155, "grad_norm": 10.207969665527344, "learning_rate": 3.07629905408451e-08, "loss": 0.28850513, "memory(GiB)": 34.88, "step": 142855, "train_speed(iter/s)": 0.410832 }, { "acc": 0.95864868, "epoch": 3.868085452034766, "grad_norm": 14.963048934936523, "learning_rate": 3.070205504075468e-08, "loss": 0.20688825, "memory(GiB)": 34.88, "step": 142860, "train_speed(iter/s)": 0.410832 }, { "acc": 0.94571457, "epoch": 3.868220832317981, "grad_norm": 7.197706699371338, "learning_rate": 3.064118179714963e-08, "loss": 0.35265927, "memory(GiB)": 34.88, "step": 142865, "train_speed(iter/s)": 0.410833 }, { "acc": 0.94148073, "epoch": 3.8683562126011966, "grad_norm": 4.291308403015137, "learning_rate": 3.058037081079206e-08, "loss": 0.35158057, "memory(GiB)": 34.88, "step": 142870, "train_speed(iter/s)": 0.410834 }, { "acc": 0.96034946, "epoch": 3.8684915928844124, "grad_norm": 6.984014511108398, "learning_rate": 3.0519622082442944e-08, "loss": 0.21451678, "memory(GiB)": 34.88, "step": 142875, "train_speed(iter/s)": 0.410835 }, { "acc": 0.96178589, "epoch": 3.8686269731676277, "grad_norm": 6.900387763977051, "learning_rate": 3.045893561286493e-08, "loss": 0.23700299, "memory(GiB)": 34.88, "step": 142880, "train_speed(iter/s)": 0.410836 }, { "acc": 0.95784311, "epoch": 3.8687623534508435, "grad_norm": 5.376119613647461, "learning_rate": 3.039831140281678e-08, "loss": 0.23959565, "memory(GiB)": 34.88, "step": 142885, "train_speed(iter/s)": 0.410836 }, { "acc": 0.9556015, "epoch": 3.868897733734059, "grad_norm": 10.654492378234863, "learning_rate": 3.033774945305836e-08, "loss": 0.29527435, "memory(GiB)": 34.88, "step": 142890, "train_speed(iter/s)": 0.410837 }, { "acc": 0.95153894, "epoch": 3.8690331140172747, "grad_norm": 6.286839485168457, "learning_rate": 3.0277249764349e-08, "loss": 0.27314091, "memory(GiB)": 34.88, "step": 142895, "train_speed(iter/s)": 0.410838 }, { "acc": 0.95656614, "epoch": 3.86916849430049, "grad_norm": 4.00994348526001, "learning_rate": 3.021681233744579e-08, "loss": 0.23622928, "memory(GiB)": 34.88, "step": 142900, "train_speed(iter/s)": 0.410839 }, { "acc": 0.94423027, "epoch": 3.8693038745837054, "grad_norm": 6.03908634185791, "learning_rate": 3.0156437173105826e-08, "loss": 0.32099957, "memory(GiB)": 34.88, "step": 142905, "train_speed(iter/s)": 0.410839 }, { "acc": 0.94247417, "epoch": 3.869439254866921, "grad_norm": 15.702592849731445, "learning_rate": 3.00961242720862e-08, "loss": 0.39537067, "memory(GiB)": 34.88, "step": 142910, "train_speed(iter/s)": 0.41084 }, { "acc": 0.94252472, "epoch": 3.869574635150137, "grad_norm": 4.240677356719971, "learning_rate": 3.003587363514234e-08, "loss": 0.33250022, "memory(GiB)": 34.88, "step": 142915, "train_speed(iter/s)": 0.410841 }, { "acc": 0.94200726, "epoch": 3.8697100154333524, "grad_norm": 4.52264404296875, "learning_rate": 2.997568526302803e-08, "loss": 0.30417981, "memory(GiB)": 34.88, "step": 142920, "train_speed(iter/s)": 0.410842 }, { "acc": 0.9480835, "epoch": 3.8698453957165677, "grad_norm": 4.106483459472656, "learning_rate": 2.991555915649758e-08, "loss": 0.35318744, "memory(GiB)": 34.88, "step": 142925, "train_speed(iter/s)": 0.410842 }, { "acc": 0.9512538, "epoch": 3.8699807759997835, "grad_norm": 3.4196624755859375, "learning_rate": 2.985549531630475e-08, "loss": 0.25202799, "memory(GiB)": 34.88, "step": 142930, "train_speed(iter/s)": 0.410843 }, { "acc": 0.96740255, "epoch": 3.870116156282999, "grad_norm": 4.117350101470947, "learning_rate": 2.979549374320164e-08, "loss": 0.21235688, "memory(GiB)": 34.88, "step": 142935, "train_speed(iter/s)": 0.410844 }, { "acc": 0.95658684, "epoch": 3.8702515365662142, "grad_norm": 7.546271324157715, "learning_rate": 2.973555443793926e-08, "loss": 0.2498873, "memory(GiB)": 34.88, "step": 142940, "train_speed(iter/s)": 0.410845 }, { "acc": 0.94854984, "epoch": 3.87038691684943, "grad_norm": 44.86015319824219, "learning_rate": 2.9675677401269696e-08, "loss": 0.34864826, "memory(GiB)": 34.88, "step": 142945, "train_speed(iter/s)": 0.410845 }, { "acc": 0.94338398, "epoch": 3.870522297132646, "grad_norm": 28.578636169433594, "learning_rate": 2.9615862633942828e-08, "loss": 0.37662597, "memory(GiB)": 34.88, "step": 142950, "train_speed(iter/s)": 0.410846 }, { "acc": 0.94740629, "epoch": 3.870657677415861, "grad_norm": 17.958812713623047, "learning_rate": 2.9556110136706875e-08, "loss": 0.2780149, "memory(GiB)": 34.88, "step": 142955, "train_speed(iter/s)": 0.410847 }, { "acc": 0.94755306, "epoch": 3.8707930576990766, "grad_norm": 9.269598960876465, "learning_rate": 2.9496419910311165e-08, "loss": 0.27075243, "memory(GiB)": 34.88, "step": 142960, "train_speed(iter/s)": 0.410848 }, { "acc": 0.94936771, "epoch": 3.8709284379822924, "grad_norm": 4.948190212249756, "learning_rate": 2.9436791955503355e-08, "loss": 0.3131043, "memory(GiB)": 34.88, "step": 142965, "train_speed(iter/s)": 0.410848 }, { "acc": 0.94602842, "epoch": 3.8710638182655077, "grad_norm": 6.6984782218933105, "learning_rate": 2.937722627303055e-08, "loss": 0.37378702, "memory(GiB)": 34.88, "step": 142970, "train_speed(iter/s)": 0.410849 }, { "acc": 0.95299034, "epoch": 3.8711991985487235, "grad_norm": 7.582155704498291, "learning_rate": 2.9317722863638193e-08, "loss": 0.29351892, "memory(GiB)": 34.88, "step": 142975, "train_speed(iter/s)": 0.41085 }, { "acc": 0.95313635, "epoch": 3.871334578831939, "grad_norm": 5.062314987182617, "learning_rate": 2.9258281728072828e-08, "loss": 0.32952774, "memory(GiB)": 34.88, "step": 142980, "train_speed(iter/s)": 0.410851 }, { "acc": 0.95372181, "epoch": 3.8714699591151547, "grad_norm": 3.4151740074157715, "learning_rate": 2.919890286707824e-08, "loss": 0.20903583, "memory(GiB)": 34.88, "step": 142985, "train_speed(iter/s)": 0.410851 }, { "acc": 0.94806414, "epoch": 3.87160533939837, "grad_norm": 12.930829048156738, "learning_rate": 2.913958628139875e-08, "loss": 0.32664702, "memory(GiB)": 34.88, "step": 142990, "train_speed(iter/s)": 0.410852 }, { "acc": 0.93531857, "epoch": 3.8717407196815854, "grad_norm": 9.996091842651367, "learning_rate": 2.908033197177647e-08, "loss": 0.38476429, "memory(GiB)": 34.88, "step": 142995, "train_speed(iter/s)": 0.410853 }, { "acc": 0.96575069, "epoch": 3.871876099964801, "grad_norm": 5.135995864868164, "learning_rate": 2.9021139938954627e-08, "loss": 0.19012207, "memory(GiB)": 34.88, "step": 143000, "train_speed(iter/s)": 0.410854 }, { "acc": 0.95321112, "epoch": 3.8720114802480166, "grad_norm": 18.148080825805664, "learning_rate": 2.8962010183674768e-08, "loss": 0.30897779, "memory(GiB)": 34.88, "step": 143005, "train_speed(iter/s)": 0.410855 }, { "acc": 0.94602871, "epoch": 3.8721468605312324, "grad_norm": 3.8451552391052246, "learning_rate": 2.8902942706676784e-08, "loss": 0.40422363, "memory(GiB)": 34.88, "step": 143010, "train_speed(iter/s)": 0.410855 }, { "acc": 0.95801668, "epoch": 3.8722822408144477, "grad_norm": 6.63261604309082, "learning_rate": 2.8843937508701677e-08, "loss": 0.27688255, "memory(GiB)": 34.88, "step": 143015, "train_speed(iter/s)": 0.410856 }, { "acc": 0.9393672, "epoch": 3.8724176210976635, "grad_norm": 5.239362716674805, "learning_rate": 2.8784994590487672e-08, "loss": 0.38492763, "memory(GiB)": 34.88, "step": 143020, "train_speed(iter/s)": 0.410857 }, { "acc": 0.95852051, "epoch": 3.872553001380879, "grad_norm": 12.917545318603516, "learning_rate": 2.8726113952773548e-08, "loss": 0.2783926, "memory(GiB)": 34.88, "step": 143025, "train_speed(iter/s)": 0.410858 }, { "acc": 0.94641056, "epoch": 3.8726883816640942, "grad_norm": 17.972021102905273, "learning_rate": 2.8667295596296967e-08, "loss": 0.34318631, "memory(GiB)": 34.88, "step": 143030, "train_speed(iter/s)": 0.410858 }, { "acc": 0.94457388, "epoch": 3.87282376194731, "grad_norm": 2.3217101097106934, "learning_rate": 2.8608539521794497e-08, "loss": 0.42089834, "memory(GiB)": 34.88, "step": 143035, "train_speed(iter/s)": 0.410859 }, { "acc": 0.9587656, "epoch": 3.8729591422305254, "grad_norm": 5.791938781738281, "learning_rate": 2.8549845730002694e-08, "loss": 0.26209559, "memory(GiB)": 34.88, "step": 143040, "train_speed(iter/s)": 0.41086 }, { "acc": 0.95029879, "epoch": 3.873094522513741, "grad_norm": 4.814164161682129, "learning_rate": 2.8491214221655902e-08, "loss": 0.32111712, "memory(GiB)": 34.88, "step": 143045, "train_speed(iter/s)": 0.410861 }, { "acc": 0.95938768, "epoch": 3.8732299027969566, "grad_norm": 10.293665885925293, "learning_rate": 2.8432644997489563e-08, "loss": 0.23476181, "memory(GiB)": 34.88, "step": 143050, "train_speed(iter/s)": 0.410862 }, { "acc": 0.9447998, "epoch": 3.8733652830801724, "grad_norm": 13.153787612915039, "learning_rate": 2.8374138058236355e-08, "loss": 0.31578698, "memory(GiB)": 34.88, "step": 143055, "train_speed(iter/s)": 0.410862 }, { "acc": 0.94681683, "epoch": 3.8735006633633877, "grad_norm": 3.7797043323516846, "learning_rate": 2.8315693404630615e-08, "loss": 0.31419225, "memory(GiB)": 34.88, "step": 143060, "train_speed(iter/s)": 0.410863 }, { "acc": 0.94056501, "epoch": 3.873636043646603, "grad_norm": 5.574778079986572, "learning_rate": 2.82573110374028e-08, "loss": 0.35298259, "memory(GiB)": 34.88, "step": 143065, "train_speed(iter/s)": 0.410864 }, { "acc": 0.9211874, "epoch": 3.873771423929819, "grad_norm": 26.252687454223633, "learning_rate": 2.8198990957285578e-08, "loss": 0.47886677, "memory(GiB)": 34.88, "step": 143070, "train_speed(iter/s)": 0.410865 }, { "acc": 0.93749838, "epoch": 3.8739068042130347, "grad_norm": 12.239673614501953, "learning_rate": 2.814073316500886e-08, "loss": 0.40342469, "memory(GiB)": 34.88, "step": 143075, "train_speed(iter/s)": 0.410866 }, { "acc": 0.95364485, "epoch": 3.87404218449625, "grad_norm": 6.604697227478027, "learning_rate": 2.8082537661303085e-08, "loss": 0.28316135, "memory(GiB)": 34.88, "step": 143080, "train_speed(iter/s)": 0.410866 }, { "acc": 0.95842972, "epoch": 3.8741775647794654, "grad_norm": 1.6735979318618774, "learning_rate": 2.8024404446896492e-08, "loss": 0.23450329, "memory(GiB)": 34.88, "step": 143085, "train_speed(iter/s)": 0.410867 }, { "acc": 0.95181866, "epoch": 3.874312945062681, "grad_norm": 10.93055248260498, "learning_rate": 2.7966333522517318e-08, "loss": 0.34357073, "memory(GiB)": 34.88, "step": 143090, "train_speed(iter/s)": 0.410868 }, { "acc": 0.96609612, "epoch": 3.8744483253458966, "grad_norm": 5.400277614593506, "learning_rate": 2.790832488889379e-08, "loss": 0.23733821, "memory(GiB)": 34.88, "step": 143095, "train_speed(iter/s)": 0.410869 }, { "acc": 0.95643759, "epoch": 3.874583705629112, "grad_norm": 3.8829989433288574, "learning_rate": 2.7850378546751925e-08, "loss": 0.24676776, "memory(GiB)": 34.88, "step": 143100, "train_speed(iter/s)": 0.410869 }, { "acc": 0.94659824, "epoch": 3.8747190859123277, "grad_norm": 11.87022590637207, "learning_rate": 2.779249449681774e-08, "loss": 0.32038817, "memory(GiB)": 34.88, "step": 143105, "train_speed(iter/s)": 0.41087 }, { "acc": 0.9342885, "epoch": 3.8748544661955435, "grad_norm": 16.401718139648438, "learning_rate": 2.7734672739816683e-08, "loss": 0.41113162, "memory(GiB)": 34.88, "step": 143110, "train_speed(iter/s)": 0.410871 }, { "acc": 0.96507797, "epoch": 3.874989846478759, "grad_norm": 3.819124698638916, "learning_rate": 2.767691327647256e-08, "loss": 0.20214636, "memory(GiB)": 34.88, "step": 143115, "train_speed(iter/s)": 0.410871 }, { "acc": 0.95840158, "epoch": 3.8751252267619742, "grad_norm": 8.245043754577637, "learning_rate": 2.7619216107509154e-08, "loss": 0.26357775, "memory(GiB)": 34.88, "step": 143120, "train_speed(iter/s)": 0.410872 }, { "acc": 0.96226025, "epoch": 3.87526060704519, "grad_norm": 6.566937446594238, "learning_rate": 2.7561581233649153e-08, "loss": 0.24034553, "memory(GiB)": 34.88, "step": 143125, "train_speed(iter/s)": 0.410873 }, { "acc": 0.94907246, "epoch": 3.8753959873284054, "grad_norm": 3.839315176010132, "learning_rate": 2.7504008655615232e-08, "loss": 0.33111491, "memory(GiB)": 34.88, "step": 143130, "train_speed(iter/s)": 0.410874 }, { "acc": 0.94205456, "epoch": 3.875531367611621, "grad_norm": 8.367258071899414, "learning_rate": 2.744649837412731e-08, "loss": 0.40178108, "memory(GiB)": 34.88, "step": 143135, "train_speed(iter/s)": 0.410875 }, { "acc": 0.95176697, "epoch": 3.8756667478948366, "grad_norm": 4.299475193023682, "learning_rate": 2.7389050389906958e-08, "loss": 0.26894636, "memory(GiB)": 34.88, "step": 143140, "train_speed(iter/s)": 0.410875 }, { "acc": 0.95946302, "epoch": 3.8758021281780524, "grad_norm": 2.9460813999176025, "learning_rate": 2.733166470367352e-08, "loss": 0.2207983, "memory(GiB)": 34.88, "step": 143145, "train_speed(iter/s)": 0.410876 }, { "acc": 0.93522015, "epoch": 3.8759375084612677, "grad_norm": 3.8161842823028564, "learning_rate": 2.727434131614524e-08, "loss": 0.42969875, "memory(GiB)": 34.88, "step": 143150, "train_speed(iter/s)": 0.410877 }, { "acc": 0.96171064, "epoch": 3.876072888744483, "grad_norm": 3.4888439178466797, "learning_rate": 2.7217080228040927e-08, "loss": 0.21593599, "memory(GiB)": 34.88, "step": 143155, "train_speed(iter/s)": 0.410878 }, { "acc": 0.95340691, "epoch": 3.876208269027699, "grad_norm": 5.246004104614258, "learning_rate": 2.7159881440077703e-08, "loss": 0.26387281, "memory(GiB)": 34.88, "step": 143160, "train_speed(iter/s)": 0.410878 }, { "acc": 0.94659576, "epoch": 3.8763436493109142, "grad_norm": 3.7515971660614014, "learning_rate": 2.7102744952972148e-08, "loss": 0.29331594, "memory(GiB)": 34.88, "step": 143165, "train_speed(iter/s)": 0.410879 }, { "acc": 0.94957571, "epoch": 3.87647902959413, "grad_norm": 11.748897552490234, "learning_rate": 2.7045670767439726e-08, "loss": 0.3433845, "memory(GiB)": 34.88, "step": 143170, "train_speed(iter/s)": 0.41088 }, { "acc": 0.94914427, "epoch": 3.8766144098773454, "grad_norm": 9.453571319580078, "learning_rate": 2.698865888419591e-08, "loss": 0.30264149, "memory(GiB)": 34.88, "step": 143175, "train_speed(iter/s)": 0.410881 }, { "acc": 0.94942513, "epoch": 3.876749790160561, "grad_norm": 6.186557769775391, "learning_rate": 2.69317093039545e-08, "loss": 0.33272457, "memory(GiB)": 34.88, "step": 143180, "train_speed(iter/s)": 0.410882 }, { "acc": 0.94843426, "epoch": 3.8768851704437766, "grad_norm": 8.59932804107666, "learning_rate": 2.687482202742874e-08, "loss": 0.37202487, "memory(GiB)": 34.88, "step": 143185, "train_speed(iter/s)": 0.410882 }, { "acc": 0.93689289, "epoch": 3.877020550726992, "grad_norm": 10.977431297302246, "learning_rate": 2.6817997055331324e-08, "loss": 0.42506061, "memory(GiB)": 34.88, "step": 143190, "train_speed(iter/s)": 0.410883 }, { "acc": 0.95599918, "epoch": 3.8771559310102077, "grad_norm": 8.110782623291016, "learning_rate": 2.6761234388374945e-08, "loss": 0.27188768, "memory(GiB)": 34.88, "step": 143195, "train_speed(iter/s)": 0.410884 }, { "acc": 0.9398633, "epoch": 3.877291311293423, "grad_norm": 5.880649089813232, "learning_rate": 2.670453402726952e-08, "loss": 0.4425931, "memory(GiB)": 34.88, "step": 143200, "train_speed(iter/s)": 0.410884 }, { "acc": 0.9591713, "epoch": 3.877426691576639, "grad_norm": 9.715030670166016, "learning_rate": 2.6647895972725514e-08, "loss": 0.23129802, "memory(GiB)": 34.88, "step": 143205, "train_speed(iter/s)": 0.410885 }, { "acc": 0.94595509, "epoch": 3.8775620718598542, "grad_norm": 11.012405395507812, "learning_rate": 2.6591320225453412e-08, "loss": 0.38221438, "memory(GiB)": 34.88, "step": 143210, "train_speed(iter/s)": 0.410886 }, { "acc": 0.9579649, "epoch": 3.87769745214307, "grad_norm": 38.43534851074219, "learning_rate": 2.6534806786160895e-08, "loss": 0.277354, "memory(GiB)": 34.88, "step": 143215, "train_speed(iter/s)": 0.410887 }, { "acc": 0.95298872, "epoch": 3.8778328324262854, "grad_norm": 7.268393516540527, "learning_rate": 2.6478355655556224e-08, "loss": 0.27923932, "memory(GiB)": 34.88, "step": 143220, "train_speed(iter/s)": 0.410888 }, { "acc": 0.93140202, "epoch": 3.8779682127095008, "grad_norm": 3.8444511890411377, "learning_rate": 2.642196683434654e-08, "loss": 0.4539135, "memory(GiB)": 34.88, "step": 143225, "train_speed(iter/s)": 0.410888 }, { "acc": 0.95917444, "epoch": 3.8781035929927166, "grad_norm": 12.915297508239746, "learning_rate": 2.6365640323238424e-08, "loss": 0.25786016, "memory(GiB)": 34.88, "step": 143230, "train_speed(iter/s)": 0.410889 }, { "acc": 0.94949188, "epoch": 3.8782389732759324, "grad_norm": 4.480046272277832, "learning_rate": 2.6309376122936802e-08, "loss": 0.29203749, "memory(GiB)": 34.88, "step": 143235, "train_speed(iter/s)": 0.41089 }, { "acc": 0.95545464, "epoch": 3.8783743535591477, "grad_norm": 9.986331939697266, "learning_rate": 2.6253174234147706e-08, "loss": 0.28778756, "memory(GiB)": 34.88, "step": 143240, "train_speed(iter/s)": 0.410891 }, { "acc": 0.95486097, "epoch": 3.878509733842363, "grad_norm": 6.882600784301758, "learning_rate": 2.619703465757383e-08, "loss": 0.29521556, "memory(GiB)": 34.88, "step": 143245, "train_speed(iter/s)": 0.410891 }, { "acc": 0.95061321, "epoch": 3.878645114125579, "grad_norm": 3.9496757984161377, "learning_rate": 2.614095739391955e-08, "loss": 0.3425997, "memory(GiB)": 34.88, "step": 143250, "train_speed(iter/s)": 0.410892 }, { "acc": 0.95410042, "epoch": 3.8787804944087942, "grad_norm": 4.57256555557251, "learning_rate": 2.6084942443887004e-08, "loss": 0.30165379, "memory(GiB)": 34.88, "step": 143255, "train_speed(iter/s)": 0.410893 }, { "acc": 0.94000168, "epoch": 3.8789158746920096, "grad_norm": 9.578461647033691, "learning_rate": 2.602898980817723e-08, "loss": 0.37727621, "memory(GiB)": 34.88, "step": 143260, "train_speed(iter/s)": 0.410894 }, { "acc": 0.94028616, "epoch": 3.8790512549752254, "grad_norm": 13.797974586486816, "learning_rate": 2.5973099487492378e-08, "loss": 0.25870643, "memory(GiB)": 34.88, "step": 143265, "train_speed(iter/s)": 0.410894 }, { "acc": 0.94458246, "epoch": 3.879186635258441, "grad_norm": 5.183185577392578, "learning_rate": 2.5917271482531254e-08, "loss": 0.29436707, "memory(GiB)": 34.88, "step": 143270, "train_speed(iter/s)": 0.410895 }, { "acc": 0.95434227, "epoch": 3.8793220155416566, "grad_norm": 8.472208023071289, "learning_rate": 2.5861505793994346e-08, "loss": 0.33319719, "memory(GiB)": 34.88, "step": 143275, "train_speed(iter/s)": 0.410896 }, { "acc": 0.94696798, "epoch": 3.879457395824872, "grad_norm": 9.492934226989746, "learning_rate": 2.580580242257936e-08, "loss": 0.34953051, "memory(GiB)": 34.88, "step": 143280, "train_speed(iter/s)": 0.410897 }, { "acc": 0.95338116, "epoch": 3.8795927761080877, "grad_norm": 5.9193115234375, "learning_rate": 2.5750161368984553e-08, "loss": 0.23461266, "memory(GiB)": 34.88, "step": 143285, "train_speed(iter/s)": 0.410897 }, { "acc": 0.94019909, "epoch": 3.879728156391303, "grad_norm": 6.101923942565918, "learning_rate": 2.5694582633907075e-08, "loss": 0.35770061, "memory(GiB)": 34.88, "step": 143290, "train_speed(iter/s)": 0.410898 }, { "acc": 0.95510197, "epoch": 3.879863536674519, "grad_norm": 6.89568567276001, "learning_rate": 2.5639066218042414e-08, "loss": 0.27505779, "memory(GiB)": 34.88, "step": 143295, "train_speed(iter/s)": 0.410899 }, { "acc": 0.94522629, "epoch": 3.8799989169577342, "grad_norm": 6.7920002937316895, "learning_rate": 2.558361212208661e-08, "loss": 0.31985087, "memory(GiB)": 34.88, "step": 143300, "train_speed(iter/s)": 0.4109 }, { "acc": 0.95611382, "epoch": 3.88013429724095, "grad_norm": 9.02003002166748, "learning_rate": 2.5528220346734594e-08, "loss": 0.28852692, "memory(GiB)": 34.88, "step": 143305, "train_speed(iter/s)": 0.4109 }, { "acc": 0.95406094, "epoch": 3.8802696775241654, "grad_norm": 9.745111465454102, "learning_rate": 2.5472890892679626e-08, "loss": 0.32619328, "memory(GiB)": 34.88, "step": 143310, "train_speed(iter/s)": 0.410901 }, { "acc": 0.95790596, "epoch": 3.8804050578073808, "grad_norm": 42.6947021484375, "learning_rate": 2.5417623760615528e-08, "loss": 0.29148362, "memory(GiB)": 34.88, "step": 143315, "train_speed(iter/s)": 0.410902 }, { "acc": 0.93903723, "epoch": 3.8805404380905966, "grad_norm": 9.071946144104004, "learning_rate": 2.5362418951233898e-08, "loss": 0.37887466, "memory(GiB)": 34.88, "step": 143320, "train_speed(iter/s)": 0.410903 }, { "acc": 0.94418049, "epoch": 3.880675818373812, "grad_norm": 4.939565658569336, "learning_rate": 2.5307276465226343e-08, "loss": 0.42982416, "memory(GiB)": 34.88, "step": 143325, "train_speed(iter/s)": 0.410903 }, { "acc": 0.94244156, "epoch": 3.8808111986570277, "grad_norm": 3.9001758098602295, "learning_rate": 2.5252196303284458e-08, "loss": 0.38340421, "memory(GiB)": 34.88, "step": 143330, "train_speed(iter/s)": 0.410904 }, { "acc": 0.95933723, "epoch": 3.880946578940243, "grad_norm": 2.5997297763824463, "learning_rate": 2.5197178466097625e-08, "loss": 0.21868353, "memory(GiB)": 34.88, "step": 143335, "train_speed(iter/s)": 0.410905 }, { "acc": 0.95079269, "epoch": 3.881081959223459, "grad_norm": 7.9064154624938965, "learning_rate": 2.514222295435466e-08, "loss": 0.27486753, "memory(GiB)": 34.88, "step": 143340, "train_speed(iter/s)": 0.410906 }, { "acc": 0.95265322, "epoch": 3.8812173395066742, "grad_norm": 5.61386775970459, "learning_rate": 2.5087329768744402e-08, "loss": 0.25038028, "memory(GiB)": 34.88, "step": 143345, "train_speed(iter/s)": 0.410907 }, { "acc": 0.9469346, "epoch": 3.8813527197898896, "grad_norm": 2.772388219833374, "learning_rate": 2.503249890995455e-08, "loss": 0.36919854, "memory(GiB)": 34.88, "step": 143350, "train_speed(iter/s)": 0.410907 }, { "acc": 0.94067869, "epoch": 3.8814881000731054, "grad_norm": 5.8819122314453125, "learning_rate": 2.4977730378672273e-08, "loss": 0.37214191, "memory(GiB)": 34.88, "step": 143355, "train_speed(iter/s)": 0.410908 }, { "acc": 0.93913136, "epoch": 3.8816234803563208, "grad_norm": 8.78036117553711, "learning_rate": 2.4923024175583063e-08, "loss": 0.39680836, "memory(GiB)": 34.88, "step": 143360, "train_speed(iter/s)": 0.410909 }, { "acc": 0.95453205, "epoch": 3.8817588606395366, "grad_norm": 5.604403018951416, "learning_rate": 2.4868380301372414e-08, "loss": 0.25007553, "memory(GiB)": 34.88, "step": 143365, "train_speed(iter/s)": 0.41091 }, { "acc": 0.95359821, "epoch": 3.881894240922752, "grad_norm": 7.060423851013184, "learning_rate": 2.481379875672471e-08, "loss": 0.30469363, "memory(GiB)": 34.88, "step": 143370, "train_speed(iter/s)": 0.410911 }, { "acc": 0.94745302, "epoch": 3.8820296212059677, "grad_norm": 11.954801559448242, "learning_rate": 2.4759279542324335e-08, "loss": 0.33175368, "memory(GiB)": 34.88, "step": 143375, "train_speed(iter/s)": 0.410911 }, { "acc": 0.95854702, "epoch": 3.882165001489183, "grad_norm": 5.963400363922119, "learning_rate": 2.4704822658854008e-08, "loss": 0.24393539, "memory(GiB)": 34.88, "step": 143380, "train_speed(iter/s)": 0.410912 }, { "acc": 0.95205965, "epoch": 3.8823003817723984, "grad_norm": 5.181604862213135, "learning_rate": 2.4650428106994783e-08, "loss": 0.29769988, "memory(GiB)": 34.88, "step": 143385, "train_speed(iter/s)": 0.410913 }, { "acc": 0.94253845, "epoch": 3.8824357620556142, "grad_norm": 6.688575267791748, "learning_rate": 2.459609588742993e-08, "loss": 0.31300116, "memory(GiB)": 34.88, "step": 143390, "train_speed(iter/s)": 0.410914 }, { "acc": 0.95724831, "epoch": 3.88257114233883, "grad_norm": 3.767157793045044, "learning_rate": 2.4541826000838847e-08, "loss": 0.23970053, "memory(GiB)": 34.88, "step": 143395, "train_speed(iter/s)": 0.410914 }, { "acc": 0.95336609, "epoch": 3.8827065226220454, "grad_norm": 3.5968642234802246, "learning_rate": 2.4487618447900912e-08, "loss": 0.25016844, "memory(GiB)": 34.88, "step": 143400, "train_speed(iter/s)": 0.410915 }, { "acc": 0.95366001, "epoch": 3.8828419029052608, "grad_norm": 17.813030242919922, "learning_rate": 2.443347322929663e-08, "loss": 0.26711612, "memory(GiB)": 34.88, "step": 143405, "train_speed(iter/s)": 0.410916 }, { "acc": 0.94756403, "epoch": 3.8829772831884766, "grad_norm": 7.629741668701172, "learning_rate": 2.4379390345703724e-08, "loss": 0.32730331, "memory(GiB)": 34.88, "step": 143410, "train_speed(iter/s)": 0.410916 }, { "acc": 0.95307827, "epoch": 3.883112663471692, "grad_norm": 6.621034622192383, "learning_rate": 2.4325369797798803e-08, "loss": 0.27982101, "memory(GiB)": 34.88, "step": 143415, "train_speed(iter/s)": 0.410917 }, { "acc": 0.95138197, "epoch": 3.8832480437549073, "grad_norm": 7.357531547546387, "learning_rate": 2.4271411586259595e-08, "loss": 0.23815598, "memory(GiB)": 34.88, "step": 143420, "train_speed(iter/s)": 0.410918 }, { "acc": 0.95821991, "epoch": 3.883383424038123, "grad_norm": 4.02260684967041, "learning_rate": 2.4217515711762156e-08, "loss": 0.28234673, "memory(GiB)": 34.88, "step": 143425, "train_speed(iter/s)": 0.410919 }, { "acc": 0.9472888, "epoch": 3.883518804321339, "grad_norm": 10.762871742248535, "learning_rate": 2.416368217498088e-08, "loss": 0.32144966, "memory(GiB)": 34.88, "step": 143430, "train_speed(iter/s)": 0.410919 }, { "acc": 0.9558322, "epoch": 3.8836541846045542, "grad_norm": 14.312851905822754, "learning_rate": 2.410991097659016e-08, "loss": 0.25605347, "memory(GiB)": 34.88, "step": 143435, "train_speed(iter/s)": 0.41092 }, { "acc": 0.95557947, "epoch": 3.8837895648877696, "grad_norm": 7.6041741371154785, "learning_rate": 2.4056202117264388e-08, "loss": 0.23578236, "memory(GiB)": 34.88, "step": 143440, "train_speed(iter/s)": 0.410921 }, { "acc": 0.94400425, "epoch": 3.8839249451709854, "grad_norm": 3.0028679370880127, "learning_rate": 2.4002555597675736e-08, "loss": 0.33777742, "memory(GiB)": 34.88, "step": 143445, "train_speed(iter/s)": 0.410922 }, { "acc": 0.9541975, "epoch": 3.8840603254542008, "grad_norm": 11.331123352050781, "learning_rate": 2.3948971418495827e-08, "loss": 0.32091818, "memory(GiB)": 34.88, "step": 143450, "train_speed(iter/s)": 0.410922 }, { "acc": 0.94155121, "epoch": 3.884195705737416, "grad_norm": 8.437200546264648, "learning_rate": 2.3895449580396823e-08, "loss": 0.35683806, "memory(GiB)": 34.88, "step": 143455, "train_speed(iter/s)": 0.410923 }, { "acc": 0.95241814, "epoch": 3.884331086020632, "grad_norm": 4.024819374084473, "learning_rate": 2.3841990084049246e-08, "loss": 0.24688063, "memory(GiB)": 34.88, "step": 143460, "train_speed(iter/s)": 0.410924 }, { "acc": 0.95663567, "epoch": 3.8844664663038477, "grad_norm": 4.570215702056885, "learning_rate": 2.378859293012193e-08, "loss": 0.2969233, "memory(GiB)": 34.88, "step": 143465, "train_speed(iter/s)": 0.410925 }, { "acc": 0.94647179, "epoch": 3.884601846587063, "grad_norm": 7.044765472412109, "learning_rate": 2.3735258119284272e-08, "loss": 0.33573737, "memory(GiB)": 34.88, "step": 143470, "train_speed(iter/s)": 0.410925 }, { "acc": 0.95359497, "epoch": 3.8847372268702784, "grad_norm": 7.78817892074585, "learning_rate": 2.3681985652204558e-08, "loss": 0.29450834, "memory(GiB)": 34.88, "step": 143475, "train_speed(iter/s)": 0.410926 }, { "acc": 0.94123526, "epoch": 3.8848726071534943, "grad_norm": 12.146845817565918, "learning_rate": 2.3628775529549418e-08, "loss": 0.33693347, "memory(GiB)": 34.88, "step": 143480, "train_speed(iter/s)": 0.410927 }, { "acc": 0.96184273, "epoch": 3.8850079874367096, "grad_norm": 3.410688638687134, "learning_rate": 2.357562775198658e-08, "loss": 0.18082235, "memory(GiB)": 34.88, "step": 143485, "train_speed(iter/s)": 0.410928 }, { "acc": 0.95597334, "epoch": 3.8851433677199254, "grad_norm": 7.080206394195557, "learning_rate": 2.352254232018044e-08, "loss": 0.32616839, "memory(GiB)": 34.88, "step": 143490, "train_speed(iter/s)": 0.410928 }, { "acc": 0.94580956, "epoch": 3.8852787480031408, "grad_norm": 8.818442344665527, "learning_rate": 2.3469519234797076e-08, "loss": 0.32296062, "memory(GiB)": 34.88, "step": 143495, "train_speed(iter/s)": 0.410929 }, { "acc": 0.95541992, "epoch": 3.8854141282863566, "grad_norm": 3.77217960357666, "learning_rate": 2.341655849650033e-08, "loss": 0.33584468, "memory(GiB)": 34.88, "step": 143500, "train_speed(iter/s)": 0.41093 }, { "acc": 0.94599686, "epoch": 3.885549508569572, "grad_norm": 5.98244047164917, "learning_rate": 2.3363660105953493e-08, "loss": 0.35559733, "memory(GiB)": 34.88, "step": 143505, "train_speed(iter/s)": 0.410931 }, { "acc": 0.94061804, "epoch": 3.8856848888527873, "grad_norm": 4.63596773147583, "learning_rate": 2.3310824063819303e-08, "loss": 0.27696507, "memory(GiB)": 34.88, "step": 143510, "train_speed(iter/s)": 0.410931 }, { "acc": 0.96205826, "epoch": 3.885820269136003, "grad_norm": 4.353492259979248, "learning_rate": 2.325805037075994e-08, "loss": 0.22326255, "memory(GiB)": 34.88, "step": 143515, "train_speed(iter/s)": 0.410932 }, { "acc": 0.95664158, "epoch": 3.8859556494192184, "grad_norm": 5.043235778808594, "learning_rate": 2.320533902743648e-08, "loss": 0.24347427, "memory(GiB)": 34.88, "step": 143520, "train_speed(iter/s)": 0.410933 }, { "acc": 0.9357502, "epoch": 3.8860910297024343, "grad_norm": 25.132369995117188, "learning_rate": 2.3152690034508327e-08, "loss": 0.36806109, "memory(GiB)": 34.88, "step": 143525, "train_speed(iter/s)": 0.410933 }, { "acc": 0.93704815, "epoch": 3.8862264099856496, "grad_norm": 7.100741863250732, "learning_rate": 2.3100103392635996e-08, "loss": 0.37274997, "memory(GiB)": 34.88, "step": 143530, "train_speed(iter/s)": 0.410934 }, { "acc": 0.94567375, "epoch": 3.8863617902688654, "grad_norm": 9.12315845489502, "learning_rate": 2.3047579102478338e-08, "loss": 0.35117497, "memory(GiB)": 34.88, "step": 143535, "train_speed(iter/s)": 0.410935 }, { "acc": 0.95005035, "epoch": 3.8864971705520808, "grad_norm": 4.366055488586426, "learning_rate": 2.299511716469254e-08, "loss": 0.31359491, "memory(GiB)": 34.88, "step": 143540, "train_speed(iter/s)": 0.410936 }, { "acc": 0.95879498, "epoch": 3.886632550835296, "grad_norm": 8.50672721862793, "learning_rate": 2.2942717579936352e-08, "loss": 0.21204722, "memory(GiB)": 34.88, "step": 143545, "train_speed(iter/s)": 0.410937 }, { "acc": 0.95552168, "epoch": 3.886767931118512, "grad_norm": 7.009454727172852, "learning_rate": 2.2890380348865836e-08, "loss": 0.2852071, "memory(GiB)": 34.88, "step": 143550, "train_speed(iter/s)": 0.410937 }, { "acc": 0.94337072, "epoch": 3.8869033114017273, "grad_norm": 7.321228981018066, "learning_rate": 2.2838105472137074e-08, "loss": 0.25323858, "memory(GiB)": 34.88, "step": 143555, "train_speed(iter/s)": 0.410938 }, { "acc": 0.95073967, "epoch": 3.887038691684943, "grad_norm": 6.493494033813477, "learning_rate": 2.2785892950404483e-08, "loss": 0.27249885, "memory(GiB)": 34.88, "step": 143560, "train_speed(iter/s)": 0.410939 }, { "acc": 0.95103064, "epoch": 3.8871740719681585, "grad_norm": 6.734795093536377, "learning_rate": 2.273374278432191e-08, "loss": 0.30260291, "memory(GiB)": 34.88, "step": 143565, "train_speed(iter/s)": 0.41094 }, { "acc": 0.95704718, "epoch": 3.8873094522513743, "grad_norm": 11.133027076721191, "learning_rate": 2.268165497454322e-08, "loss": 0.320596, "memory(GiB)": 34.88, "step": 143570, "train_speed(iter/s)": 0.41094 }, { "acc": 0.9524168, "epoch": 3.8874448325345896, "grad_norm": 9.677528381347656, "learning_rate": 2.2629629521720596e-08, "loss": 0.32289295, "memory(GiB)": 34.88, "step": 143575, "train_speed(iter/s)": 0.410941 }, { "acc": 0.93883038, "epoch": 3.887580212817805, "grad_norm": 3.619046211242676, "learning_rate": 2.257766642650623e-08, "loss": 0.38434939, "memory(GiB)": 34.88, "step": 143580, "train_speed(iter/s)": 0.410942 }, { "acc": 0.94926338, "epoch": 3.8877155931010208, "grad_norm": 8.426538467407227, "learning_rate": 2.2525765689550647e-08, "loss": 0.28764424, "memory(GiB)": 34.88, "step": 143585, "train_speed(iter/s)": 0.410943 }, { "acc": 0.94924917, "epoch": 3.8878509733842366, "grad_norm": 7.774574279785156, "learning_rate": 2.2473927311503827e-08, "loss": 0.29523141, "memory(GiB)": 34.88, "step": 143590, "train_speed(iter/s)": 0.410944 }, { "acc": 0.95087738, "epoch": 3.887986353667452, "grad_norm": 7.89022159576416, "learning_rate": 2.2422151293015728e-08, "loss": 0.31650491, "memory(GiB)": 34.88, "step": 143595, "train_speed(iter/s)": 0.410944 }, { "acc": 0.95180721, "epoch": 3.8881217339506673, "grad_norm": 6.469570159912109, "learning_rate": 2.2370437634734106e-08, "loss": 0.32872736, "memory(GiB)": 34.88, "step": 143600, "train_speed(iter/s)": 0.410945 }, { "acc": 0.95153313, "epoch": 3.888257114233883, "grad_norm": 4.498136520385742, "learning_rate": 2.2318786337307264e-08, "loss": 0.26511872, "memory(GiB)": 34.88, "step": 143605, "train_speed(iter/s)": 0.410946 }, { "acc": 0.9564045, "epoch": 3.8883924945170985, "grad_norm": 11.541711807250977, "learning_rate": 2.2267197401382408e-08, "loss": 0.24162757, "memory(GiB)": 34.88, "step": 143610, "train_speed(iter/s)": 0.410946 }, { "acc": 0.95209141, "epoch": 3.888527874800314, "grad_norm": 6.213220119476318, "learning_rate": 2.2215670827605604e-08, "loss": 0.27274404, "memory(GiB)": 34.88, "step": 143615, "train_speed(iter/s)": 0.410947 }, { "acc": 0.95405836, "epoch": 3.8886632550835296, "grad_norm": 4.368659019470215, "learning_rate": 2.2164206616622402e-08, "loss": 0.31615891, "memory(GiB)": 34.88, "step": 143620, "train_speed(iter/s)": 0.410948 }, { "acc": 0.95221691, "epoch": 3.8887986353667454, "grad_norm": 6.219069004058838, "learning_rate": 2.211280476907776e-08, "loss": 0.25719008, "memory(GiB)": 34.88, "step": 143625, "train_speed(iter/s)": 0.410949 }, { "acc": 0.95157566, "epoch": 3.8889340156499608, "grad_norm": 5.897956848144531, "learning_rate": 2.2061465285614445e-08, "loss": 0.31713419, "memory(GiB)": 34.88, "step": 143630, "train_speed(iter/s)": 0.410949 }, { "acc": 0.95383415, "epoch": 3.889069395933176, "grad_norm": 8.771621704101562, "learning_rate": 2.2010188166876873e-08, "loss": 0.33939209, "memory(GiB)": 34.88, "step": 143635, "train_speed(iter/s)": 0.41095 }, { "acc": 0.952281, "epoch": 3.889204776216392, "grad_norm": 6.806012153625488, "learning_rate": 2.195897341350724e-08, "loss": 0.27711639, "memory(GiB)": 34.88, "step": 143640, "train_speed(iter/s)": 0.410951 }, { "acc": 0.96980076, "epoch": 3.8893401564996073, "grad_norm": 3.7712225914001465, "learning_rate": 2.1907821026146083e-08, "loss": 0.18698965, "memory(GiB)": 34.88, "step": 143645, "train_speed(iter/s)": 0.410952 }, { "acc": 0.95676632, "epoch": 3.889475536782823, "grad_norm": 20.866174697875977, "learning_rate": 2.185673100543561e-08, "loss": 0.26139805, "memory(GiB)": 34.88, "step": 143650, "train_speed(iter/s)": 0.410952 }, { "acc": 0.95292025, "epoch": 3.8896109170660385, "grad_norm": 12.372298240661621, "learning_rate": 2.1805703352014123e-08, "loss": 0.29805965, "memory(GiB)": 34.88, "step": 143655, "train_speed(iter/s)": 0.410953 }, { "acc": 0.95773354, "epoch": 3.8897462973492543, "grad_norm": 3.198617696762085, "learning_rate": 2.175473806652272e-08, "loss": 0.26793118, "memory(GiB)": 34.88, "step": 143660, "train_speed(iter/s)": 0.410954 }, { "acc": 0.96465673, "epoch": 3.8898816776324696, "grad_norm": 16.38202667236328, "learning_rate": 2.17038351495986e-08, "loss": 0.24724746, "memory(GiB)": 34.88, "step": 143665, "train_speed(iter/s)": 0.410955 }, { "acc": 0.94757833, "epoch": 3.890017057915685, "grad_norm": 3.3635733127593994, "learning_rate": 2.165299460187952e-08, "loss": 0.27818511, "memory(GiB)": 34.88, "step": 143670, "train_speed(iter/s)": 0.410956 }, { "acc": 0.94837894, "epoch": 3.8901524381989008, "grad_norm": 14.522722244262695, "learning_rate": 2.160221642400325e-08, "loss": 0.27595358, "memory(GiB)": 34.88, "step": 143675, "train_speed(iter/s)": 0.410956 }, { "acc": 0.94193611, "epoch": 3.890287818482116, "grad_norm": 5.31891393661499, "learning_rate": 2.1551500616604767e-08, "loss": 0.38036766, "memory(GiB)": 34.88, "step": 143680, "train_speed(iter/s)": 0.410957 }, { "acc": 0.92225571, "epoch": 3.890423198765332, "grad_norm": 18.852998733520508, "learning_rate": 2.150084718031961e-08, "loss": 0.4561223, "memory(GiB)": 34.88, "step": 143685, "train_speed(iter/s)": 0.410958 }, { "acc": 0.96007557, "epoch": 3.8905585790485473, "grad_norm": 6.262617588043213, "learning_rate": 2.1450256115782766e-08, "loss": 0.26286626, "memory(GiB)": 34.88, "step": 143690, "train_speed(iter/s)": 0.410958 }, { "acc": 0.94707203, "epoch": 3.890693959331763, "grad_norm": 5.243504524230957, "learning_rate": 2.1399727423628102e-08, "loss": 0.30535131, "memory(GiB)": 34.88, "step": 143695, "train_speed(iter/s)": 0.410959 }, { "acc": 0.96014404, "epoch": 3.8908293396149785, "grad_norm": 3.0644783973693848, "learning_rate": 2.1349261104487836e-08, "loss": 0.14090201, "memory(GiB)": 34.88, "step": 143700, "train_speed(iter/s)": 0.41096 }, { "acc": 0.94731483, "epoch": 3.890964719898194, "grad_norm": 6.845761299133301, "learning_rate": 2.1298857158995282e-08, "loss": 0.34313807, "memory(GiB)": 34.88, "step": 143705, "train_speed(iter/s)": 0.41096 }, { "acc": 0.94887352, "epoch": 3.8911001001814096, "grad_norm": 2.8349835872650146, "learning_rate": 2.1248515587780433e-08, "loss": 0.2921649, "memory(GiB)": 34.88, "step": 143710, "train_speed(iter/s)": 0.410961 }, { "acc": 0.956742, "epoch": 3.891235480464625, "grad_norm": 10.584028244018555, "learning_rate": 2.1198236391474936e-08, "loss": 0.26098404, "memory(GiB)": 34.88, "step": 143715, "train_speed(iter/s)": 0.410962 }, { "acc": 0.94902782, "epoch": 3.8913708607478408, "grad_norm": 8.122138977050781, "learning_rate": 2.114801957070823e-08, "loss": 0.35732975, "memory(GiB)": 34.88, "step": 143720, "train_speed(iter/s)": 0.410963 }, { "acc": 0.95246563, "epoch": 3.891506241031056, "grad_norm": 5.59113073348999, "learning_rate": 2.1097865126109185e-08, "loss": 0.33298302, "memory(GiB)": 34.88, "step": 143725, "train_speed(iter/s)": 0.410963 }, { "acc": 0.94516058, "epoch": 3.891641621314272, "grad_norm": 3.125096321105957, "learning_rate": 2.1047773058307245e-08, "loss": 0.38523524, "memory(GiB)": 34.88, "step": 143730, "train_speed(iter/s)": 0.410964 }, { "acc": 0.94394951, "epoch": 3.8917770015974873, "grad_norm": 6.465800762176514, "learning_rate": 2.0997743367927953e-08, "loss": 0.38068523, "memory(GiB)": 34.88, "step": 143735, "train_speed(iter/s)": 0.410965 }, { "acc": 0.95585537, "epoch": 3.8919123818807027, "grad_norm": 12.144295692443848, "learning_rate": 2.0947776055600184e-08, "loss": 0.25184197, "memory(GiB)": 34.88, "step": 143740, "train_speed(iter/s)": 0.410966 }, { "acc": 0.95140467, "epoch": 3.8920477621639185, "grad_norm": 6.121469020843506, "learning_rate": 2.0897871121948387e-08, "loss": 0.30675545, "memory(GiB)": 34.88, "step": 143745, "train_speed(iter/s)": 0.410966 }, { "acc": 0.95589886, "epoch": 3.8921831424471343, "grad_norm": 5.026204586029053, "learning_rate": 2.0848028567598096e-08, "loss": 0.22844062, "memory(GiB)": 34.88, "step": 143750, "train_speed(iter/s)": 0.410967 }, { "acc": 0.94579172, "epoch": 3.8923185227303496, "grad_norm": 5.357739448547363, "learning_rate": 2.0798248393173763e-08, "loss": 0.31612339, "memory(GiB)": 34.88, "step": 143755, "train_speed(iter/s)": 0.410968 }, { "acc": 0.95317135, "epoch": 3.892453903013565, "grad_norm": 4.743892669677734, "learning_rate": 2.0748530599298705e-08, "loss": 0.33504062, "memory(GiB)": 34.88, "step": 143760, "train_speed(iter/s)": 0.410968 }, { "acc": 0.94907122, "epoch": 3.8925892832967808, "grad_norm": 6.540404319763184, "learning_rate": 2.0698875186596258e-08, "loss": 0.22628045, "memory(GiB)": 34.88, "step": 143765, "train_speed(iter/s)": 0.410969 }, { "acc": 0.96475325, "epoch": 3.892724663579996, "grad_norm": 4.666040420532227, "learning_rate": 2.0649282155688076e-08, "loss": 0.25504966, "memory(GiB)": 34.88, "step": 143770, "train_speed(iter/s)": 0.41097 }, { "acc": 0.94830685, "epoch": 3.8928600438632115, "grad_norm": 7.63338041305542, "learning_rate": 2.0599751507195827e-08, "loss": 0.31033411, "memory(GiB)": 34.88, "step": 143775, "train_speed(iter/s)": 0.410971 }, { "acc": 0.96107044, "epoch": 3.8929954241464273, "grad_norm": 4.772355556488037, "learning_rate": 2.0550283241739514e-08, "loss": 0.1972543, "memory(GiB)": 34.88, "step": 143780, "train_speed(iter/s)": 0.410972 }, { "acc": 0.93035069, "epoch": 3.893130804429643, "grad_norm": 14.441824913024902, "learning_rate": 2.050087735993912e-08, "loss": 0.48732319, "memory(GiB)": 34.88, "step": 143785, "train_speed(iter/s)": 0.410972 }, { "acc": 0.96029778, "epoch": 3.8932661847128585, "grad_norm": 4.869199275970459, "learning_rate": 2.045153386241299e-08, "loss": 0.27637732, "memory(GiB)": 34.88, "step": 143790, "train_speed(iter/s)": 0.410973 }, { "acc": 0.93364687, "epoch": 3.893401564996074, "grad_norm": 10.011857032775879, "learning_rate": 2.0402252749780006e-08, "loss": 0.42808199, "memory(GiB)": 34.88, "step": 143795, "train_speed(iter/s)": 0.410974 }, { "acc": 0.95299387, "epoch": 3.8935369452792896, "grad_norm": 6.126399993896484, "learning_rate": 2.0353034022657395e-08, "loss": 0.3044745, "memory(GiB)": 34.88, "step": 143800, "train_speed(iter/s)": 0.410975 }, { "acc": 0.94164886, "epoch": 3.893672325562505, "grad_norm": 17.32938003540039, "learning_rate": 2.030387768166071e-08, "loss": 0.38562055, "memory(GiB)": 34.88, "step": 143805, "train_speed(iter/s)": 0.410975 }, { "acc": 0.96176014, "epoch": 3.893807705845721, "grad_norm": 6.746133327484131, "learning_rate": 2.025478372740717e-08, "loss": 0.21665339, "memory(GiB)": 34.88, "step": 143810, "train_speed(iter/s)": 0.410976 }, { "acc": 0.95599489, "epoch": 3.893943086128936, "grad_norm": 18.31918716430664, "learning_rate": 2.020575216051123e-08, "loss": 0.31793213, "memory(GiB)": 34.88, "step": 143815, "train_speed(iter/s)": 0.410977 }, { "acc": 0.95902882, "epoch": 3.894078466412152, "grad_norm": 2.484562397003174, "learning_rate": 2.0156782981586774e-08, "loss": 0.32876673, "memory(GiB)": 34.88, "step": 143820, "train_speed(iter/s)": 0.410978 }, { "acc": 0.95653858, "epoch": 3.8942138466953673, "grad_norm": 4.775691986083984, "learning_rate": 2.0107876191247146e-08, "loss": 0.24407451, "memory(GiB)": 34.88, "step": 143825, "train_speed(iter/s)": 0.410979 }, { "acc": 0.92955503, "epoch": 3.8943492269785827, "grad_norm": 8.559301376342773, "learning_rate": 2.005903179010568e-08, "loss": 0.42426386, "memory(GiB)": 34.88, "step": 143830, "train_speed(iter/s)": 0.410979 }, { "acc": 0.94587898, "epoch": 3.8944846072617985, "grad_norm": 6.584301471710205, "learning_rate": 2.0010249778774048e-08, "loss": 0.35427432, "memory(GiB)": 34.88, "step": 143835, "train_speed(iter/s)": 0.41098 }, { "acc": 0.94122763, "epoch": 3.894619987545014, "grad_norm": 4.167773723602295, "learning_rate": 1.996153015786281e-08, "loss": 0.40276399, "memory(GiB)": 34.88, "step": 143840, "train_speed(iter/s)": 0.410981 }, { "acc": 0.94019089, "epoch": 3.8947553678282296, "grad_norm": 7.694977760314941, "learning_rate": 1.9912872927983082e-08, "loss": 0.42883325, "memory(GiB)": 34.88, "step": 143845, "train_speed(iter/s)": 0.410982 }, { "acc": 0.94925966, "epoch": 3.894890748111445, "grad_norm": 5.020596981048584, "learning_rate": 1.986427808974376e-08, "loss": 0.31124856, "memory(GiB)": 34.88, "step": 143850, "train_speed(iter/s)": 0.410982 }, { "acc": 0.94138107, "epoch": 3.895026128394661, "grad_norm": 10.60039234161377, "learning_rate": 1.981574564375375e-08, "loss": 0.31728547, "memory(GiB)": 34.88, "step": 143855, "train_speed(iter/s)": 0.410983 }, { "acc": 0.94399586, "epoch": 3.895161508677876, "grad_norm": 3.9210071563720703, "learning_rate": 1.9767275590620827e-08, "loss": 0.29163067, "memory(GiB)": 34.88, "step": 143860, "train_speed(iter/s)": 0.410984 }, { "acc": 0.94394913, "epoch": 3.8952968889610915, "grad_norm": 7.377146244049072, "learning_rate": 1.971886793095278e-08, "loss": 0.39164071, "memory(GiB)": 34.88, "step": 143865, "train_speed(iter/s)": 0.410985 }, { "acc": 0.94068813, "epoch": 3.8954322692443073, "grad_norm": 8.118443489074707, "learning_rate": 1.9670522665355178e-08, "loss": 0.36217546, "memory(GiB)": 34.88, "step": 143870, "train_speed(iter/s)": 0.410985 }, { "acc": 0.95293694, "epoch": 3.8955676495275227, "grad_norm": 11.49039077758789, "learning_rate": 1.9622239794434136e-08, "loss": 0.3566318, "memory(GiB)": 34.88, "step": 143875, "train_speed(iter/s)": 0.410986 }, { "acc": 0.95340452, "epoch": 3.8957030298107385, "grad_norm": 4.928892135620117, "learning_rate": 1.957401931879523e-08, "loss": 0.27328897, "memory(GiB)": 34.88, "step": 143880, "train_speed(iter/s)": 0.410987 }, { "acc": 0.95150652, "epoch": 3.895838410093954, "grad_norm": 5.068301200866699, "learning_rate": 1.9525861239041245e-08, "loss": 0.32572033, "memory(GiB)": 34.88, "step": 143885, "train_speed(iter/s)": 0.410988 }, { "acc": 0.95330029, "epoch": 3.8959737903771696, "grad_norm": 3.3505172729492188, "learning_rate": 1.9477765555775525e-08, "loss": 0.27401094, "memory(GiB)": 34.88, "step": 143890, "train_speed(iter/s)": 0.410988 }, { "acc": 0.95670042, "epoch": 3.896109170660385, "grad_norm": 4.521144390106201, "learning_rate": 1.942973226960142e-08, "loss": 0.24542944, "memory(GiB)": 34.88, "step": 143895, "train_speed(iter/s)": 0.410989 }, { "acc": 0.94738693, "epoch": 3.8962445509436003, "grad_norm": 14.677136421203613, "learning_rate": 1.938176138112005e-08, "loss": 0.27480748, "memory(GiB)": 34.88, "step": 143900, "train_speed(iter/s)": 0.41099 }, { "acc": 0.94607391, "epoch": 3.896379931226816, "grad_norm": 9.156312942504883, "learning_rate": 1.933385289093255e-08, "loss": 0.33064499, "memory(GiB)": 34.88, "step": 143905, "train_speed(iter/s)": 0.410991 }, { "acc": 0.95792065, "epoch": 3.896515311510032, "grad_norm": 3.967813730239868, "learning_rate": 1.9286006799638926e-08, "loss": 0.23493948, "memory(GiB)": 34.88, "step": 143910, "train_speed(iter/s)": 0.410991 }, { "acc": 0.94392853, "epoch": 3.8966506917932473, "grad_norm": 4.673129081726074, "learning_rate": 1.923822310783864e-08, "loss": 0.33985403, "memory(GiB)": 34.88, "step": 143915, "train_speed(iter/s)": 0.410992 }, { "acc": 0.94608097, "epoch": 3.8967860720764627, "grad_norm": 14.465021133422852, "learning_rate": 1.9190501816130604e-08, "loss": 0.34553666, "memory(GiB)": 34.88, "step": 143920, "train_speed(iter/s)": 0.410993 }, { "acc": 0.96424274, "epoch": 3.8969214523596785, "grad_norm": 2.585582733154297, "learning_rate": 1.9142842925112055e-08, "loss": 0.20830069, "memory(GiB)": 34.88, "step": 143925, "train_speed(iter/s)": 0.410994 }, { "acc": 0.95507755, "epoch": 3.897056832642894, "grad_norm": 6.029293537139893, "learning_rate": 1.909524643538023e-08, "loss": 0.25466137, "memory(GiB)": 34.88, "step": 143930, "train_speed(iter/s)": 0.410995 }, { "acc": 0.95596809, "epoch": 3.897192212926109, "grad_norm": 8.315141677856445, "learning_rate": 1.9047712347531263e-08, "loss": 0.22473984, "memory(GiB)": 34.88, "step": 143935, "train_speed(iter/s)": 0.410995 }, { "acc": 0.95765371, "epoch": 3.897327593209325, "grad_norm": 4.489548683166504, "learning_rate": 1.9000240662161282e-08, "loss": 0.29362416, "memory(GiB)": 34.88, "step": 143940, "train_speed(iter/s)": 0.410996 }, { "acc": 0.96023312, "epoch": 3.897462973492541, "grad_norm": 1.7506698369979858, "learning_rate": 1.895283137986364e-08, "loss": 0.20303583, "memory(GiB)": 34.88, "step": 143945, "train_speed(iter/s)": 0.410997 }, { "acc": 0.95331163, "epoch": 3.897598353775756, "grad_norm": 10.845284461975098, "learning_rate": 1.8905484501233363e-08, "loss": 0.31288848, "memory(GiB)": 34.88, "step": 143950, "train_speed(iter/s)": 0.410998 }, { "acc": 0.94647856, "epoch": 3.8977337340589715, "grad_norm": 10.012564659118652, "learning_rate": 1.8858200026863246e-08, "loss": 0.31134362, "memory(GiB)": 34.88, "step": 143955, "train_speed(iter/s)": 0.410998 }, { "acc": 0.94638853, "epoch": 3.8978691143421873, "grad_norm": 8.103297233581543, "learning_rate": 1.8810977957345533e-08, "loss": 0.31293857, "memory(GiB)": 34.88, "step": 143960, "train_speed(iter/s)": 0.410999 }, { "acc": 0.94263554, "epoch": 3.8980044946254027, "grad_norm": 7.455796718597412, "learning_rate": 1.876381829327192e-08, "loss": 0.40274439, "memory(GiB)": 34.88, "step": 143965, "train_speed(iter/s)": 0.411 }, { "acc": 0.93772373, "epoch": 3.8981398749086185, "grad_norm": 12.952598571777344, "learning_rate": 1.8716721035232982e-08, "loss": 0.4061039, "memory(GiB)": 34.88, "step": 143970, "train_speed(iter/s)": 0.411001 }, { "acc": 0.94637814, "epoch": 3.898275255191834, "grad_norm": 7.40800666809082, "learning_rate": 1.8669686183818743e-08, "loss": 0.34033427, "memory(GiB)": 34.88, "step": 143975, "train_speed(iter/s)": 0.411001 }, { "acc": 0.95346422, "epoch": 3.8984106354750496, "grad_norm": 5.249136447906494, "learning_rate": 1.862271373961867e-08, "loss": 0.30344591, "memory(GiB)": 34.88, "step": 143980, "train_speed(iter/s)": 0.411002 }, { "acc": 0.95741949, "epoch": 3.898546015758265, "grad_norm": 3.1213018894195557, "learning_rate": 1.8575803703220574e-08, "loss": 0.26867414, "memory(GiB)": 34.88, "step": 143985, "train_speed(iter/s)": 0.411003 }, { "acc": 0.94360075, "epoch": 3.8986813960414803, "grad_norm": 7.513248443603516, "learning_rate": 1.852895607521281e-08, "loss": 0.40413914, "memory(GiB)": 34.88, "step": 143990, "train_speed(iter/s)": 0.411004 }, { "acc": 0.95073061, "epoch": 3.898816776324696, "grad_norm": 4.903057098388672, "learning_rate": 1.8482170856182075e-08, "loss": 0.30389547, "memory(GiB)": 34.88, "step": 143995, "train_speed(iter/s)": 0.411005 }, { "acc": 0.95501814, "epoch": 3.8989521566079115, "grad_norm": 8.648576736450195, "learning_rate": 1.843544804671395e-08, "loss": 0.25002813, "memory(GiB)": 34.88, "step": 144000, "train_speed(iter/s)": 0.411005 }, { "acc": 0.92467461, "epoch": 3.8990875368911273, "grad_norm": 6.06081485748291, "learning_rate": 1.8388787647394576e-08, "loss": 0.45952497, "memory(GiB)": 34.88, "step": 144005, "train_speed(iter/s)": 0.411006 }, { "acc": 0.94251614, "epoch": 3.8992229171743427, "grad_norm": 5.986910820007324, "learning_rate": 1.8342189658807314e-08, "loss": 0.37262659, "memory(GiB)": 34.88, "step": 144010, "train_speed(iter/s)": 0.411007 }, { "acc": 0.95118027, "epoch": 3.8993582974575585, "grad_norm": 5.016576766967773, "learning_rate": 1.8295654081537197e-08, "loss": 0.27464418, "memory(GiB)": 34.88, "step": 144015, "train_speed(iter/s)": 0.411008 }, { "acc": 0.96159306, "epoch": 3.899493677740774, "grad_norm": 7.903813362121582, "learning_rate": 1.824918091616592e-08, "loss": 0.20934372, "memory(GiB)": 34.88, "step": 144020, "train_speed(iter/s)": 0.411009 }, { "acc": 0.95047321, "epoch": 3.899629058023989, "grad_norm": 5.392567157745361, "learning_rate": 1.8202770163276854e-08, "loss": 0.27223155, "memory(GiB)": 34.88, "step": 144025, "train_speed(iter/s)": 0.411009 }, { "acc": 0.93558769, "epoch": 3.899764438307205, "grad_norm": 32.5146369934082, "learning_rate": 1.8156421823450024e-08, "loss": 0.39573243, "memory(GiB)": 34.88, "step": 144030, "train_speed(iter/s)": 0.41101 }, { "acc": 0.94918633, "epoch": 3.8998998185904203, "grad_norm": 9.645766258239746, "learning_rate": 1.8110135897267134e-08, "loss": 0.33826332, "memory(GiB)": 34.88, "step": 144035, "train_speed(iter/s)": 0.411011 }, { "acc": 0.94425287, "epoch": 3.900035198873636, "grad_norm": 4.570131778717041, "learning_rate": 1.806391238530766e-08, "loss": 0.30562963, "memory(GiB)": 34.88, "step": 144040, "train_speed(iter/s)": 0.411012 }, { "acc": 0.9474144, "epoch": 3.9001705791568515, "grad_norm": 13.760194778442383, "learning_rate": 1.801775128815108e-08, "loss": 0.29571548, "memory(GiB)": 34.88, "step": 144045, "train_speed(iter/s)": 0.411012 }, { "acc": 0.95007935, "epoch": 3.9003059594400673, "grad_norm": 8.171223640441895, "learning_rate": 1.7971652606375206e-08, "loss": 0.27998872, "memory(GiB)": 34.88, "step": 144050, "train_speed(iter/s)": 0.411013 }, { "acc": 0.94930525, "epoch": 3.9004413397232827, "grad_norm": 8.84579086303711, "learning_rate": 1.7925616340556748e-08, "loss": 0.30660384, "memory(GiB)": 34.88, "step": 144055, "train_speed(iter/s)": 0.411014 }, { "acc": 0.94630041, "epoch": 3.900576720006498, "grad_norm": 6.838658332824707, "learning_rate": 1.7879642491274067e-08, "loss": 0.38509703, "memory(GiB)": 34.88, "step": 144060, "train_speed(iter/s)": 0.411015 }, { "acc": 0.96382437, "epoch": 3.900712100289714, "grad_norm": 4.59950590133667, "learning_rate": 1.783373105910165e-08, "loss": 0.24516232, "memory(GiB)": 34.88, "step": 144065, "train_speed(iter/s)": 0.411015 }, { "acc": 0.94606133, "epoch": 3.9008474805729296, "grad_norm": 4.860442161560059, "learning_rate": 1.778788204461509e-08, "loss": 0.30280507, "memory(GiB)": 34.88, "step": 144070, "train_speed(iter/s)": 0.411016 }, { "acc": 0.93616657, "epoch": 3.900982860856145, "grad_norm": 4.681828022003174, "learning_rate": 1.7742095448389422e-08, "loss": 0.39196534, "memory(GiB)": 34.88, "step": 144075, "train_speed(iter/s)": 0.411017 }, { "acc": 0.95849915, "epoch": 3.9011182411393603, "grad_norm": 7.010324001312256, "learning_rate": 1.769637127099691e-08, "loss": 0.23880048, "memory(GiB)": 34.88, "step": 144080, "train_speed(iter/s)": 0.411018 }, { "acc": 0.95828552, "epoch": 3.901253621422576, "grad_norm": 8.376120567321777, "learning_rate": 1.7650709513011487e-08, "loss": 0.28000164, "memory(GiB)": 34.88, "step": 144085, "train_speed(iter/s)": 0.411018 }, { "acc": 0.94806194, "epoch": 3.9013890017057915, "grad_norm": 6.8385844230651855, "learning_rate": 1.7605110175004296e-08, "loss": 0.28550012, "memory(GiB)": 34.88, "step": 144090, "train_speed(iter/s)": 0.411019 }, { "acc": 0.94020271, "epoch": 3.901524381989007, "grad_norm": 9.658279418945312, "learning_rate": 1.755957325754761e-08, "loss": 0.3673645, "memory(GiB)": 34.88, "step": 144095, "train_speed(iter/s)": 0.41102 }, { "acc": 0.95200729, "epoch": 3.9016597622722227, "grad_norm": 11.354328155517578, "learning_rate": 1.7514098761210906e-08, "loss": 0.2831038, "memory(GiB)": 34.88, "step": 144100, "train_speed(iter/s)": 0.411021 }, { "acc": 0.94095058, "epoch": 3.9017951425554385, "grad_norm": 5.838396072387695, "learning_rate": 1.7468686686564235e-08, "loss": 0.36762748, "memory(GiB)": 34.88, "step": 144105, "train_speed(iter/s)": 0.411021 }, { "acc": 0.93715048, "epoch": 3.901930522838654, "grad_norm": 3.561004638671875, "learning_rate": 1.7423337034175968e-08, "loss": 0.36739979, "memory(GiB)": 34.88, "step": 144110, "train_speed(iter/s)": 0.411022 }, { "acc": 0.94074726, "epoch": 3.902065903121869, "grad_norm": 6.955238342285156, "learning_rate": 1.7378049804615044e-08, "loss": 0.35062995, "memory(GiB)": 34.88, "step": 144115, "train_speed(iter/s)": 0.411023 }, { "acc": 0.94967222, "epoch": 3.902201283405085, "grad_norm": 10.381691932678223, "learning_rate": 1.7332824998448174e-08, "loss": 0.31865523, "memory(GiB)": 34.88, "step": 144120, "train_speed(iter/s)": 0.411024 }, { "acc": 0.95185204, "epoch": 3.9023366636883003, "grad_norm": 4.273218154907227, "learning_rate": 1.728766261624262e-08, "loss": 0.26608777, "memory(GiB)": 34.88, "step": 144125, "train_speed(iter/s)": 0.411025 }, { "acc": 0.94831419, "epoch": 3.902472043971516, "grad_norm": 6.707284927368164, "learning_rate": 1.7242562658562872e-08, "loss": 0.32283506, "memory(GiB)": 34.88, "step": 144130, "train_speed(iter/s)": 0.411025 }, { "acc": 0.96289301, "epoch": 3.9026074242547315, "grad_norm": 3.008267402648926, "learning_rate": 1.71975251259751e-08, "loss": 0.20867445, "memory(GiB)": 34.88, "step": 144135, "train_speed(iter/s)": 0.411026 }, { "acc": 0.95202885, "epoch": 3.9027428045379473, "grad_norm": 3.2991490364074707, "learning_rate": 1.7152550019042677e-08, "loss": 0.2716233, "memory(GiB)": 34.88, "step": 144140, "train_speed(iter/s)": 0.411027 }, { "acc": 0.94308014, "epoch": 3.9028781848211627, "grad_norm": 6.0800676345825195, "learning_rate": 1.7107637338329536e-08, "loss": 0.32630191, "memory(GiB)": 34.88, "step": 144145, "train_speed(iter/s)": 0.411027 }, { "acc": 0.9553463, "epoch": 3.903013565104378, "grad_norm": 3.567683219909668, "learning_rate": 1.7062787084397956e-08, "loss": 0.25002952, "memory(GiB)": 34.88, "step": 144150, "train_speed(iter/s)": 0.411028 }, { "acc": 0.93948803, "epoch": 3.903148945387594, "grad_norm": 12.927515029907227, "learning_rate": 1.7017999257809655e-08, "loss": 0.39551353, "memory(GiB)": 34.88, "step": 144155, "train_speed(iter/s)": 0.411029 }, { "acc": 0.94377213, "epoch": 3.903284325670809, "grad_norm": 6.0288872718811035, "learning_rate": 1.697327385912634e-08, "loss": 0.37648239, "memory(GiB)": 34.88, "step": 144160, "train_speed(iter/s)": 0.41103 }, { "acc": 0.94502926, "epoch": 3.903419705954025, "grad_norm": 5.4503631591796875, "learning_rate": 1.6928610888907517e-08, "loss": 0.35171208, "memory(GiB)": 34.88, "step": 144165, "train_speed(iter/s)": 0.41103 }, { "acc": 0.94821558, "epoch": 3.9035550862372403, "grad_norm": 14.322266578674316, "learning_rate": 1.688401034771379e-08, "loss": 0.33883367, "memory(GiB)": 34.88, "step": 144170, "train_speed(iter/s)": 0.411031 }, { "acc": 0.95832281, "epoch": 3.903690466520456, "grad_norm": 4.899475574493408, "learning_rate": 1.683947223610243e-08, "loss": 0.24876695, "memory(GiB)": 34.88, "step": 144175, "train_speed(iter/s)": 0.411032 }, { "acc": 0.9357357, "epoch": 3.9038258468036715, "grad_norm": 9.823586463928223, "learning_rate": 1.6794996554632387e-08, "loss": 0.40892034, "memory(GiB)": 34.88, "step": 144180, "train_speed(iter/s)": 0.411033 }, { "acc": 0.93093529, "epoch": 3.903961227086887, "grad_norm": 7.096496105194092, "learning_rate": 1.6750583303860376e-08, "loss": 0.44896059, "memory(GiB)": 34.88, "step": 144185, "train_speed(iter/s)": 0.411034 }, { "acc": 0.95527849, "epoch": 3.9040966073701027, "grad_norm": 9.554094314575195, "learning_rate": 1.6706232484342563e-08, "loss": 0.25432904, "memory(GiB)": 34.88, "step": 144190, "train_speed(iter/s)": 0.411034 }, { "acc": 0.95124149, "epoch": 3.904231987653318, "grad_norm": 7.591261386871338, "learning_rate": 1.666194409663512e-08, "loss": 0.32036963, "memory(GiB)": 34.88, "step": 144195, "train_speed(iter/s)": 0.411035 }, { "acc": 0.95603781, "epoch": 3.904367367936534, "grad_norm": 9.592411994934082, "learning_rate": 1.661771814129254e-08, "loss": 0.28668118, "memory(GiB)": 34.88, "step": 144200, "train_speed(iter/s)": 0.411036 }, { "acc": 0.95924816, "epoch": 3.904502748219749, "grad_norm": 6.327455043792725, "learning_rate": 1.6573554618868774e-08, "loss": 0.19925019, "memory(GiB)": 34.88, "step": 144205, "train_speed(iter/s)": 0.411037 }, { "acc": 0.95686655, "epoch": 3.904638128502965, "grad_norm": 12.415234565734863, "learning_rate": 1.6529453529917213e-08, "loss": 0.29690061, "memory(GiB)": 34.88, "step": 144210, "train_speed(iter/s)": 0.411037 }, { "acc": 0.95968933, "epoch": 3.9047735087861803, "grad_norm": 11.10532283782959, "learning_rate": 1.6485414874990135e-08, "loss": 0.26558795, "memory(GiB)": 34.88, "step": 144215, "train_speed(iter/s)": 0.411038 }, { "acc": 0.95928917, "epoch": 3.9049088890693957, "grad_norm": 9.515541076660156, "learning_rate": 1.6441438654639265e-08, "loss": 0.2360239, "memory(GiB)": 34.88, "step": 144220, "train_speed(iter/s)": 0.411039 }, { "acc": 0.95404949, "epoch": 3.9050442693526115, "grad_norm": 6.276346683502197, "learning_rate": 1.6397524869415782e-08, "loss": 0.36228764, "memory(GiB)": 34.88, "step": 144225, "train_speed(iter/s)": 0.411039 }, { "acc": 0.94659805, "epoch": 3.9051796496358273, "grad_norm": 16.436553955078125, "learning_rate": 1.6353673519869178e-08, "loss": 0.34967427, "memory(GiB)": 34.88, "step": 144230, "train_speed(iter/s)": 0.41104 }, { "acc": 0.95946779, "epoch": 3.9053150299190427, "grad_norm": 5.031011581420898, "learning_rate": 1.6309884606549525e-08, "loss": 0.21905272, "memory(GiB)": 34.88, "step": 144235, "train_speed(iter/s)": 0.411041 }, { "acc": 0.95028057, "epoch": 3.905450410202258, "grad_norm": 6.287610054016113, "learning_rate": 1.6266158130004657e-08, "loss": 0.30049877, "memory(GiB)": 34.88, "step": 144240, "train_speed(iter/s)": 0.411042 }, { "acc": 0.9352849, "epoch": 3.905585790485474, "grad_norm": 3.44454288482666, "learning_rate": 1.6222494090782973e-08, "loss": 0.4593534, "memory(GiB)": 34.88, "step": 144245, "train_speed(iter/s)": 0.411043 }, { "acc": 0.94241171, "epoch": 3.905721170768689, "grad_norm": 11.622101783752441, "learning_rate": 1.61788924894312e-08, "loss": 0.35105214, "memory(GiB)": 34.88, "step": 144250, "train_speed(iter/s)": 0.411043 }, { "acc": 0.92770004, "epoch": 3.9058565510519045, "grad_norm": 2.6738638877868652, "learning_rate": 1.613535332649496e-08, "loss": 0.43974733, "memory(GiB)": 34.88, "step": 144255, "train_speed(iter/s)": 0.411044 }, { "acc": 0.96633139, "epoch": 3.9059919313351203, "grad_norm": 2.907317638397217, "learning_rate": 1.6091876602520422e-08, "loss": 0.18157141, "memory(GiB)": 34.88, "step": 144260, "train_speed(iter/s)": 0.411045 }, { "acc": 0.94915485, "epoch": 3.906127311618336, "grad_norm": 5.715895175933838, "learning_rate": 1.6048462318052108e-08, "loss": 0.26498935, "memory(GiB)": 34.88, "step": 144265, "train_speed(iter/s)": 0.411046 }, { "acc": 0.93948212, "epoch": 3.9062626919015515, "grad_norm": 9.244030952453613, "learning_rate": 1.600511047363341e-08, "loss": 0.31340511, "memory(GiB)": 34.88, "step": 144270, "train_speed(iter/s)": 0.411046 }, { "acc": 0.93852472, "epoch": 3.906398072184767, "grad_norm": 5.015626430511475, "learning_rate": 1.5961821069808283e-08, "loss": 0.38309605, "memory(GiB)": 34.88, "step": 144275, "train_speed(iter/s)": 0.411047 }, { "acc": 0.95512285, "epoch": 3.9065334524679827, "grad_norm": 9.3636474609375, "learning_rate": 1.591859410711791e-08, "loss": 0.24302287, "memory(GiB)": 34.88, "step": 144280, "train_speed(iter/s)": 0.411048 }, { "acc": 0.93438072, "epoch": 3.906668832751198, "grad_norm": 12.623695373535156, "learning_rate": 1.5875429586104576e-08, "loss": 0.39824398, "memory(GiB)": 34.88, "step": 144285, "train_speed(iter/s)": 0.411049 }, { "acc": 0.94018669, "epoch": 3.906804213034414, "grad_norm": 9.43960952758789, "learning_rate": 1.583232750730891e-08, "loss": 0.37413011, "memory(GiB)": 34.88, "step": 144290, "train_speed(iter/s)": 0.411049 }, { "acc": 0.94738197, "epoch": 3.906939593317629, "grad_norm": 2.9587583541870117, "learning_rate": 1.5789287871270423e-08, "loss": 0.32988677, "memory(GiB)": 34.88, "step": 144295, "train_speed(iter/s)": 0.41105 }, { "acc": 0.96022606, "epoch": 3.907074973600845, "grad_norm": 4.012160301208496, "learning_rate": 1.574631067852863e-08, "loss": 0.20089216, "memory(GiB)": 34.88, "step": 144300, "train_speed(iter/s)": 0.411051 }, { "acc": 0.93541794, "epoch": 3.9072103538840603, "grad_norm": 3.997554063796997, "learning_rate": 1.5703395929621937e-08, "loss": 0.38156447, "memory(GiB)": 34.88, "step": 144305, "train_speed(iter/s)": 0.411052 }, { "acc": 0.94948845, "epoch": 3.9073457341672757, "grad_norm": 5.33001708984375, "learning_rate": 1.5660543625087637e-08, "loss": 0.28996673, "memory(GiB)": 34.88, "step": 144310, "train_speed(iter/s)": 0.411052 }, { "acc": 0.9282465, "epoch": 3.9074811144504915, "grad_norm": 12.780360221862793, "learning_rate": 1.5617753765463025e-08, "loss": 0.43185277, "memory(GiB)": 34.88, "step": 144315, "train_speed(iter/s)": 0.411053 }, { "acc": 0.94749775, "epoch": 3.907616494733707, "grad_norm": 5.050021648406982, "learning_rate": 1.5575026351283175e-08, "loss": 0.26209888, "memory(GiB)": 34.88, "step": 144320, "train_speed(iter/s)": 0.411054 }, { "acc": 0.94279575, "epoch": 3.9077518750169227, "grad_norm": 4.960206031799316, "learning_rate": 1.553236138308427e-08, "loss": 0.36711664, "memory(GiB)": 34.88, "step": 144325, "train_speed(iter/s)": 0.411054 }, { "acc": 0.95399914, "epoch": 3.907887255300138, "grad_norm": 5.476809024810791, "learning_rate": 1.5489758861400834e-08, "loss": 0.26253695, "memory(GiB)": 34.88, "step": 144330, "train_speed(iter/s)": 0.411055 }, { "acc": 0.94153862, "epoch": 3.908022635583354, "grad_norm": 18.828710556030273, "learning_rate": 1.5447218786765712e-08, "loss": 0.34809942, "memory(GiB)": 34.88, "step": 144335, "train_speed(iter/s)": 0.411056 }, { "acc": 0.93753042, "epoch": 3.908158015866569, "grad_norm": 20.8583927154541, "learning_rate": 1.540474115971287e-08, "loss": 0.42386885, "memory(GiB)": 34.88, "step": 144340, "train_speed(iter/s)": 0.411057 }, { "acc": 0.94978504, "epoch": 3.9082933961497845, "grad_norm": 9.53033447265625, "learning_rate": 1.5362325980773504e-08, "loss": 0.29498267, "memory(GiB)": 34.88, "step": 144345, "train_speed(iter/s)": 0.411057 }, { "acc": 0.96190891, "epoch": 3.9084287764330004, "grad_norm": 17.84958267211914, "learning_rate": 1.5319973250479348e-08, "loss": 0.27092929, "memory(GiB)": 34.88, "step": 144350, "train_speed(iter/s)": 0.411058 }, { "acc": 0.95439281, "epoch": 3.9085641567162157, "grad_norm": 13.289615631103516, "learning_rate": 1.527768296936049e-08, "loss": 0.22924271, "memory(GiB)": 34.88, "step": 144355, "train_speed(iter/s)": 0.411059 }, { "acc": 0.95808706, "epoch": 3.9086995369994315, "grad_norm": 9.954504013061523, "learning_rate": 1.5235455137947554e-08, "loss": 0.29326055, "memory(GiB)": 34.88, "step": 144360, "train_speed(iter/s)": 0.411059 }, { "acc": 0.94244375, "epoch": 3.908834917282647, "grad_norm": 5.369852542877197, "learning_rate": 1.519328975676896e-08, "loss": 0.45107126, "memory(GiB)": 34.88, "step": 144365, "train_speed(iter/s)": 0.41106 }, { "acc": 0.94912605, "epoch": 3.9089702975658627, "grad_norm": 7.6060004234313965, "learning_rate": 1.5151186826353115e-08, "loss": 0.35048642, "memory(GiB)": 34.88, "step": 144370, "train_speed(iter/s)": 0.411061 }, { "acc": 0.94438915, "epoch": 3.909105677849078, "grad_norm": 7.022029876708984, "learning_rate": 1.5109146347227333e-08, "loss": 0.28202956, "memory(GiB)": 34.88, "step": 144375, "train_speed(iter/s)": 0.411062 }, { "acc": 0.9540803, "epoch": 3.9092410581322934, "grad_norm": 4.264486789703369, "learning_rate": 1.5067168319917795e-08, "loss": 0.27468841, "memory(GiB)": 34.88, "step": 144380, "train_speed(iter/s)": 0.411063 }, { "acc": 0.96165104, "epoch": 3.909376438415509, "grad_norm": 4.279427528381348, "learning_rate": 1.5025252744951258e-08, "loss": 0.21007802, "memory(GiB)": 34.88, "step": 144385, "train_speed(iter/s)": 0.411063 }, { "acc": 0.94984894, "epoch": 3.909511818698725, "grad_norm": 4.351297378540039, "learning_rate": 1.4983399622852245e-08, "loss": 0.32622213, "memory(GiB)": 34.88, "step": 144390, "train_speed(iter/s)": 0.411064 }, { "acc": 0.95902214, "epoch": 3.9096471989819404, "grad_norm": 4.505231857299805, "learning_rate": 1.4941608954145286e-08, "loss": 0.24522848, "memory(GiB)": 34.88, "step": 144395, "train_speed(iter/s)": 0.411065 }, { "acc": 0.94943953, "epoch": 3.9097825792651557, "grad_norm": 5.673437595367432, "learning_rate": 1.4899880739353799e-08, "loss": 0.32723823, "memory(GiB)": 34.88, "step": 144400, "train_speed(iter/s)": 0.411066 }, { "acc": 0.95754986, "epoch": 3.9099179595483715, "grad_norm": 5.827794551849365, "learning_rate": 1.4858214979000088e-08, "loss": 0.2823014, "memory(GiB)": 34.88, "step": 144405, "train_speed(iter/s)": 0.411066 }, { "acc": 0.97091436, "epoch": 3.910053339831587, "grad_norm": 3.6359903812408447, "learning_rate": 1.4816611673607022e-08, "loss": 0.21280882, "memory(GiB)": 34.88, "step": 144410, "train_speed(iter/s)": 0.411067 }, { "acc": 0.93901978, "epoch": 3.9101887201148022, "grad_norm": 8.904678344726562, "learning_rate": 1.4775070823695237e-08, "loss": 0.4282763, "memory(GiB)": 34.88, "step": 144415, "train_speed(iter/s)": 0.411068 }, { "acc": 0.94615755, "epoch": 3.910324100398018, "grad_norm": 3.8218994140625, "learning_rate": 1.4733592429784827e-08, "loss": 0.2735064, "memory(GiB)": 34.88, "step": 144420, "train_speed(iter/s)": 0.411069 }, { "acc": 0.94101219, "epoch": 3.910459480681234, "grad_norm": 6.5981526374816895, "learning_rate": 1.4692176492395874e-08, "loss": 0.36375189, "memory(GiB)": 34.88, "step": 144425, "train_speed(iter/s)": 0.411069 }, { "acc": 0.94354877, "epoch": 3.910594860964449, "grad_norm": 7.577504634857178, "learning_rate": 1.4650823012046803e-08, "loss": 0.3772824, "memory(GiB)": 34.88, "step": 144430, "train_speed(iter/s)": 0.41107 }, { "acc": 0.95574417, "epoch": 3.9107302412476646, "grad_norm": 10.128632545471191, "learning_rate": 1.4609531989256037e-08, "loss": 0.26989369, "memory(GiB)": 34.88, "step": 144435, "train_speed(iter/s)": 0.411071 }, { "acc": 0.9511343, "epoch": 3.9108656215308804, "grad_norm": 7.633115291595459, "learning_rate": 1.4568303424540885e-08, "loss": 0.34012926, "memory(GiB)": 34.88, "step": 144440, "train_speed(iter/s)": 0.411072 }, { "acc": 0.94091482, "epoch": 3.9110010018140957, "grad_norm": 5.409462928771973, "learning_rate": 1.4527137318416995e-08, "loss": 0.30939293, "memory(GiB)": 34.88, "step": 144445, "train_speed(iter/s)": 0.411073 }, { "acc": 0.9529768, "epoch": 3.911136382097311, "grad_norm": 2.7803657054901123, "learning_rate": 1.4486033671401128e-08, "loss": 0.29530785, "memory(GiB)": 34.88, "step": 144450, "train_speed(iter/s)": 0.411073 }, { "acc": 0.94740934, "epoch": 3.911271762380527, "grad_norm": 3.2940518856048584, "learning_rate": 1.4444992484007262e-08, "loss": 0.29412298, "memory(GiB)": 34.88, "step": 144455, "train_speed(iter/s)": 0.411074 }, { "acc": 0.9354948, "epoch": 3.9114071426637427, "grad_norm": 4.919028282165527, "learning_rate": 1.4404013756749934e-08, "loss": 0.41764746, "memory(GiB)": 34.88, "step": 144460, "train_speed(iter/s)": 0.411075 }, { "acc": 0.95405788, "epoch": 3.911542522946958, "grad_norm": 9.83076286315918, "learning_rate": 1.4363097490143126e-08, "loss": 0.28938642, "memory(GiB)": 34.88, "step": 144465, "train_speed(iter/s)": 0.411075 }, { "acc": 0.9538538, "epoch": 3.9116779032301734, "grad_norm": 8.263213157653809, "learning_rate": 1.4322243684698604e-08, "loss": 0.26505032, "memory(GiB)": 34.88, "step": 144470, "train_speed(iter/s)": 0.411076 }, { "acc": 0.94837561, "epoch": 3.911813283513389, "grad_norm": 6.944247245788574, "learning_rate": 1.4281452340927567e-08, "loss": 0.30428436, "memory(GiB)": 34.88, "step": 144475, "train_speed(iter/s)": 0.411077 }, { "acc": 0.95263309, "epoch": 3.9119486637966046, "grad_norm": 8.015833854675293, "learning_rate": 1.4240723459342339e-08, "loss": 0.3131012, "memory(GiB)": 34.88, "step": 144480, "train_speed(iter/s)": 0.411078 }, { "acc": 0.94616795, "epoch": 3.9120840440798204, "grad_norm": 6.9496049880981445, "learning_rate": 1.4200057040452457e-08, "loss": 0.36077502, "memory(GiB)": 34.88, "step": 144485, "train_speed(iter/s)": 0.411079 }, { "acc": 0.95240784, "epoch": 3.9122194243630357, "grad_norm": 6.478215217590332, "learning_rate": 1.4159453084767461e-08, "loss": 0.29435022, "memory(GiB)": 34.88, "step": 144490, "train_speed(iter/s)": 0.411079 }, { "acc": 0.95222187, "epoch": 3.9123548046462515, "grad_norm": 4.396241188049316, "learning_rate": 1.4118911592795785e-08, "loss": 0.27605386, "memory(GiB)": 34.88, "step": 144495, "train_speed(iter/s)": 0.41108 }, { "acc": 0.93932457, "epoch": 3.912490184929467, "grad_norm": 8.62061882019043, "learning_rate": 1.4078432565045309e-08, "loss": 0.41486177, "memory(GiB)": 34.88, "step": 144500, "train_speed(iter/s)": 0.411081 }, { "acc": 0.95419006, "epoch": 3.9126255652126822, "grad_norm": 6.718169212341309, "learning_rate": 1.4038016002023902e-08, "loss": 0.28704441, "memory(GiB)": 34.88, "step": 144505, "train_speed(iter/s)": 0.411082 }, { "acc": 0.95370579, "epoch": 3.912760945495898, "grad_norm": 7.780877113342285, "learning_rate": 1.3997661904236669e-08, "loss": 0.22678659, "memory(GiB)": 34.88, "step": 144510, "train_speed(iter/s)": 0.411082 }, { "acc": 0.94749737, "epoch": 3.9128963257791134, "grad_norm": 15.183984756469727, "learning_rate": 1.395737027218982e-08, "loss": 0.28777781, "memory(GiB)": 34.88, "step": 144515, "train_speed(iter/s)": 0.411083 }, { "acc": 0.96033564, "epoch": 3.913031706062329, "grad_norm": 6.918079376220703, "learning_rate": 1.3917141106387902e-08, "loss": 0.24413073, "memory(GiB)": 34.88, "step": 144520, "train_speed(iter/s)": 0.411084 }, { "acc": 0.95582829, "epoch": 3.9131670863455446, "grad_norm": 3.6052870750427246, "learning_rate": 1.3876974407334904e-08, "loss": 0.27837, "memory(GiB)": 34.88, "step": 144525, "train_speed(iter/s)": 0.411084 }, { "acc": 0.9553627, "epoch": 3.9133024666287604, "grad_norm": 3.426745891571045, "learning_rate": 1.3836870175534262e-08, "loss": 0.27511892, "memory(GiB)": 34.88, "step": 144530, "train_speed(iter/s)": 0.411085 }, { "acc": 0.96371861, "epoch": 3.9134378469119757, "grad_norm": 4.91554594039917, "learning_rate": 1.379682841148775e-08, "loss": 0.21370606, "memory(GiB)": 34.88, "step": 144535, "train_speed(iter/s)": 0.411086 }, { "acc": 0.95897827, "epoch": 3.913573227195191, "grad_norm": 4.47344446182251, "learning_rate": 1.3756849115697688e-08, "loss": 0.24448819, "memory(GiB)": 34.88, "step": 144540, "train_speed(iter/s)": 0.411086 }, { "acc": 0.93847733, "epoch": 3.913708607478407, "grad_norm": 8.807502746582031, "learning_rate": 1.3716932288664187e-08, "loss": 0.34913735, "memory(GiB)": 34.88, "step": 144545, "train_speed(iter/s)": 0.411087 }, { "acc": 0.93533716, "epoch": 3.9138439877616222, "grad_norm": 15.909652709960938, "learning_rate": 1.3677077930887904e-08, "loss": 0.39726095, "memory(GiB)": 34.88, "step": 144550, "train_speed(iter/s)": 0.411088 }, { "acc": 0.95060425, "epoch": 3.913979368044838, "grad_norm": 3.151005983352661, "learning_rate": 1.3637286042867282e-08, "loss": 0.32534194, "memory(GiB)": 34.88, "step": 144555, "train_speed(iter/s)": 0.411089 }, { "acc": 0.95252657, "epoch": 3.9141147483280534, "grad_norm": 5.606015682220459, "learning_rate": 1.3597556625101869e-08, "loss": 0.26774759, "memory(GiB)": 34.88, "step": 144560, "train_speed(iter/s)": 0.41109 }, { "acc": 0.94338675, "epoch": 3.914250128611269, "grad_norm": 19.989545822143555, "learning_rate": 1.355788967808844e-08, "loss": 0.35195458, "memory(GiB)": 34.88, "step": 144565, "train_speed(iter/s)": 0.41109 }, { "acc": 0.95950747, "epoch": 3.9143855088944846, "grad_norm": 5.17636251449585, "learning_rate": 1.3518285202324325e-08, "loss": 0.22160661, "memory(GiB)": 34.88, "step": 144570, "train_speed(iter/s)": 0.411091 }, { "acc": 0.95397377, "epoch": 3.9145208891777, "grad_norm": 7.492120742797852, "learning_rate": 1.3478743198305747e-08, "loss": 0.29785774, "memory(GiB)": 34.88, "step": 144575, "train_speed(iter/s)": 0.411092 }, { "acc": 0.94873285, "epoch": 3.9146562694609157, "grad_norm": 4.571600437164307, "learning_rate": 1.343926366652837e-08, "loss": 0.34932718, "memory(GiB)": 34.88, "step": 144580, "train_speed(iter/s)": 0.411093 }, { "acc": 0.94753609, "epoch": 3.9147916497441315, "grad_norm": 15.71760368347168, "learning_rate": 1.3399846607485081e-08, "loss": 0.36741097, "memory(GiB)": 34.88, "step": 144585, "train_speed(iter/s)": 0.411093 }, { "acc": 0.96243725, "epoch": 3.914927030027347, "grad_norm": 6.621090412139893, "learning_rate": 1.3360492021671548e-08, "loss": 0.2023596, "memory(GiB)": 34.88, "step": 144590, "train_speed(iter/s)": 0.411094 }, { "acc": 0.94278755, "epoch": 3.9150624103105622, "grad_norm": 8.967630386352539, "learning_rate": 1.3321199909580659e-08, "loss": 0.39726992, "memory(GiB)": 34.88, "step": 144595, "train_speed(iter/s)": 0.411095 }, { "acc": 0.94978752, "epoch": 3.915197790593778, "grad_norm": 8.244693756103516, "learning_rate": 1.3281970271703084e-08, "loss": 0.32196975, "memory(GiB)": 34.88, "step": 144600, "train_speed(iter/s)": 0.411096 }, { "acc": 0.94531975, "epoch": 3.9153331708769934, "grad_norm": 5.8118743896484375, "learning_rate": 1.3242803108531715e-08, "loss": 0.37954905, "memory(GiB)": 34.88, "step": 144605, "train_speed(iter/s)": 0.411097 }, { "acc": 0.95590229, "epoch": 3.9154685511602088, "grad_norm": 11.349263191223145, "learning_rate": 1.3203698420556105e-08, "loss": 0.25135126, "memory(GiB)": 34.88, "step": 144610, "train_speed(iter/s)": 0.411097 }, { "acc": 0.9438158, "epoch": 3.9156039314434246, "grad_norm": 10.338996887207031, "learning_rate": 1.3164656208267485e-08, "loss": 0.29265063, "memory(GiB)": 34.88, "step": 144615, "train_speed(iter/s)": 0.411098 }, { "acc": 0.94955158, "epoch": 3.9157393117266404, "grad_norm": 5.3175811767578125, "learning_rate": 1.3125676472153194e-08, "loss": 0.3193295, "memory(GiB)": 34.88, "step": 144620, "train_speed(iter/s)": 0.411099 }, { "acc": 0.93360729, "epoch": 3.9158746920098557, "grad_norm": 9.086416244506836, "learning_rate": 1.3086759212703343e-08, "loss": 0.41553335, "memory(GiB)": 34.88, "step": 144625, "train_speed(iter/s)": 0.4111 }, { "acc": 0.95527763, "epoch": 3.916010072293071, "grad_norm": 7.148542881011963, "learning_rate": 1.3047904430404161e-08, "loss": 0.30969877, "memory(GiB)": 34.88, "step": 144630, "train_speed(iter/s)": 0.411101 }, { "acc": 0.95193176, "epoch": 3.916145452576287, "grad_norm": 10.318184852600098, "learning_rate": 1.300911212574299e-08, "loss": 0.29479446, "memory(GiB)": 34.88, "step": 144635, "train_speed(iter/s)": 0.411101 }, { "acc": 0.94803505, "epoch": 3.9162808328595022, "grad_norm": 3.99312424659729, "learning_rate": 1.2970382299205504e-08, "loss": 0.2434129, "memory(GiB)": 34.88, "step": 144640, "train_speed(iter/s)": 0.411102 }, { "acc": 0.93986053, "epoch": 3.916416213142718, "grad_norm": 16.21892738342285, "learning_rate": 1.2931714951276817e-08, "loss": 0.35405216, "memory(GiB)": 34.88, "step": 144645, "train_speed(iter/s)": 0.411103 }, { "acc": 0.95841484, "epoch": 3.9165515934259334, "grad_norm": 13.414131164550781, "learning_rate": 1.2893110082441499e-08, "loss": 0.20081019, "memory(GiB)": 34.88, "step": 144650, "train_speed(iter/s)": 0.411104 }, { "acc": 0.94766493, "epoch": 3.916686973709149, "grad_norm": 13.275036811828613, "learning_rate": 1.2854567693183e-08, "loss": 0.31046231, "memory(GiB)": 34.88, "step": 144655, "train_speed(iter/s)": 0.411104 }, { "acc": 0.93763971, "epoch": 3.9168223539923646, "grad_norm": 8.640686988830566, "learning_rate": 1.2816087783984776e-08, "loss": 0.32790568, "memory(GiB)": 34.88, "step": 144660, "train_speed(iter/s)": 0.411105 }, { "acc": 0.9616375, "epoch": 3.91695773427558, "grad_norm": 6.775055885314941, "learning_rate": 1.2777670355327505e-08, "loss": 0.21661587, "memory(GiB)": 34.88, "step": 144665, "train_speed(iter/s)": 0.411106 }, { "acc": 0.94852905, "epoch": 3.9170931145587957, "grad_norm": 9.462621688842773, "learning_rate": 1.2739315407694084e-08, "loss": 0.34999537, "memory(GiB)": 34.88, "step": 144670, "train_speed(iter/s)": 0.411107 }, { "acc": 0.94865494, "epoch": 3.917228494842011, "grad_norm": 8.041054725646973, "learning_rate": 1.2701022941563528e-08, "loss": 0.30078995, "memory(GiB)": 34.88, "step": 144675, "train_speed(iter/s)": 0.411108 }, { "acc": 0.95617809, "epoch": 3.917363875125227, "grad_norm": 5.57160758972168, "learning_rate": 1.2662792957416515e-08, "loss": 0.26752286, "memory(GiB)": 34.88, "step": 144680, "train_speed(iter/s)": 0.411108 }, { "acc": 0.94605827, "epoch": 3.9174992554084422, "grad_norm": 9.91391372680664, "learning_rate": 1.2624625455732058e-08, "loss": 0.32029181, "memory(GiB)": 34.88, "step": 144685, "train_speed(iter/s)": 0.411109 }, { "acc": 0.94292431, "epoch": 3.917634635691658, "grad_norm": 8.862571716308594, "learning_rate": 1.2586520436986952e-08, "loss": 0.3718894, "memory(GiB)": 34.88, "step": 144690, "train_speed(iter/s)": 0.41111 }, { "acc": 0.94544601, "epoch": 3.9177700159748734, "grad_norm": 4.752620697021484, "learning_rate": 1.2548477901659655e-08, "loss": 0.37182245, "memory(GiB)": 34.88, "step": 144695, "train_speed(iter/s)": 0.411111 }, { "acc": 0.93568945, "epoch": 3.9179053962580888, "grad_norm": 16.0825138092041, "learning_rate": 1.2510497850226955e-08, "loss": 0.41538897, "memory(GiB)": 34.88, "step": 144700, "train_speed(iter/s)": 0.411111 }, { "acc": 0.9397707, "epoch": 3.9180407765413046, "grad_norm": 23.728696823120117, "learning_rate": 1.247258028316343e-08, "loss": 0.39118268, "memory(GiB)": 34.88, "step": 144705, "train_speed(iter/s)": 0.411112 }, { "acc": 0.95925694, "epoch": 3.91817615682452, "grad_norm": 6.745834827423096, "learning_rate": 1.2434725200945319e-08, "loss": 0.25739417, "memory(GiB)": 34.88, "step": 144710, "train_speed(iter/s)": 0.411113 }, { "acc": 0.94947567, "epoch": 3.9183115371077357, "grad_norm": 8.50220012664795, "learning_rate": 1.239693260404608e-08, "loss": 0.27312343, "memory(GiB)": 34.88, "step": 144715, "train_speed(iter/s)": 0.411113 }, { "acc": 0.95605192, "epoch": 3.918446917390951, "grad_norm": 13.783875465393066, "learning_rate": 1.2359202492939736e-08, "loss": 0.25594008, "memory(GiB)": 34.88, "step": 144720, "train_speed(iter/s)": 0.411114 }, { "acc": 0.94258032, "epoch": 3.918582297674167, "grad_norm": 7.236969470977783, "learning_rate": 1.232153486809808e-08, "loss": 0.35673156, "memory(GiB)": 34.88, "step": 144725, "train_speed(iter/s)": 0.411115 }, { "acc": 0.94752197, "epoch": 3.9187176779573822, "grad_norm": 6.96904993057251, "learning_rate": 1.2283929729993467e-08, "loss": 0.3301116, "memory(GiB)": 34.88, "step": 144730, "train_speed(iter/s)": 0.411116 }, { "acc": 0.95075836, "epoch": 3.9188530582405976, "grad_norm": 2.784001111984253, "learning_rate": 1.224638707909714e-08, "loss": 0.2650224, "memory(GiB)": 34.88, "step": 144735, "train_speed(iter/s)": 0.411117 }, { "acc": 0.96281042, "epoch": 3.9189884385238134, "grad_norm": 4.703571796417236, "learning_rate": 1.220890691587923e-08, "loss": 0.19959582, "memory(GiB)": 34.88, "step": 144740, "train_speed(iter/s)": 0.411117 }, { "acc": 0.95801115, "epoch": 3.919123818807029, "grad_norm": 5.907223224639893, "learning_rate": 1.2171489240809314e-08, "loss": 0.29205017, "memory(GiB)": 34.88, "step": 144745, "train_speed(iter/s)": 0.411118 }, { "acc": 0.93932228, "epoch": 3.9192591990902446, "grad_norm": 13.532694816589355, "learning_rate": 1.2134134054355862e-08, "loss": 0.37545633, "memory(GiB)": 34.88, "step": 144750, "train_speed(iter/s)": 0.411119 }, { "acc": 0.9533865, "epoch": 3.91939457937346, "grad_norm": 8.494585990905762, "learning_rate": 1.2096841356987339e-08, "loss": 0.24182141, "memory(GiB)": 34.88, "step": 144755, "train_speed(iter/s)": 0.41112 }, { "acc": 0.94119329, "epoch": 3.9195299596566757, "grad_norm": 5.846691608428955, "learning_rate": 1.2059611149169992e-08, "loss": 0.35930805, "memory(GiB)": 34.88, "step": 144760, "train_speed(iter/s)": 0.41112 }, { "acc": 0.94188433, "epoch": 3.919665339939891, "grad_norm": 9.750163078308105, "learning_rate": 1.202244343137118e-08, "loss": 0.37575662, "memory(GiB)": 34.88, "step": 144765, "train_speed(iter/s)": 0.411121 }, { "acc": 0.95735607, "epoch": 3.9198007202231064, "grad_norm": 7.573452949523926, "learning_rate": 1.1985338204056037e-08, "loss": 0.26678753, "memory(GiB)": 34.88, "step": 144770, "train_speed(iter/s)": 0.411122 }, { "acc": 0.95247879, "epoch": 3.9199361005063222, "grad_norm": 32.949588775634766, "learning_rate": 1.194829546768915e-08, "loss": 0.30910082, "memory(GiB)": 34.88, "step": 144775, "train_speed(iter/s)": 0.411123 }, { "acc": 0.95560684, "epoch": 3.920071480789538, "grad_norm": 6.672965049743652, "learning_rate": 1.1911315222735093e-08, "loss": 0.25407794, "memory(GiB)": 34.88, "step": 144780, "train_speed(iter/s)": 0.411124 }, { "acc": 0.93978348, "epoch": 3.9202068610727534, "grad_norm": 6.67568302154541, "learning_rate": 1.1874397469657343e-08, "loss": 0.34370837, "memory(GiB)": 34.88, "step": 144785, "train_speed(iter/s)": 0.411124 }, { "acc": 0.93311262, "epoch": 3.9203422413559688, "grad_norm": 12.098069190979004, "learning_rate": 1.1837542208917154e-08, "loss": 0.38479495, "memory(GiB)": 34.88, "step": 144790, "train_speed(iter/s)": 0.411125 }, { "acc": 0.96202631, "epoch": 3.9204776216391846, "grad_norm": 8.539461135864258, "learning_rate": 1.1800749440977436e-08, "loss": 0.19056872, "memory(GiB)": 34.88, "step": 144795, "train_speed(iter/s)": 0.411126 }, { "acc": 0.94986467, "epoch": 3.9206130019224, "grad_norm": 13.136784553527832, "learning_rate": 1.1764019166298336e-08, "loss": 0.36229773, "memory(GiB)": 34.88, "step": 144800, "train_speed(iter/s)": 0.411127 }, { "acc": 0.95845356, "epoch": 3.9207483822056157, "grad_norm": 4.314175128936768, "learning_rate": 1.1727351385340549e-08, "loss": 0.22465057, "memory(GiB)": 34.88, "step": 144805, "train_speed(iter/s)": 0.411127 }, { "acc": 0.95546017, "epoch": 3.920883762488831, "grad_norm": 4.462937831878662, "learning_rate": 1.1690746098562547e-08, "loss": 0.28796263, "memory(GiB)": 34.88, "step": 144810, "train_speed(iter/s)": 0.411128 }, { "acc": 0.94647818, "epoch": 3.921019142772047, "grad_norm": 7.364465713500977, "learning_rate": 1.1654203306423924e-08, "loss": 0.32452054, "memory(GiB)": 34.88, "step": 144815, "train_speed(iter/s)": 0.411129 }, { "acc": 0.9478714, "epoch": 3.9211545230552622, "grad_norm": 7.469962120056152, "learning_rate": 1.161772300938204e-08, "loss": 0.32891219, "memory(GiB)": 34.88, "step": 144820, "train_speed(iter/s)": 0.41113 }, { "acc": 0.94679966, "epoch": 3.9212899033384776, "grad_norm": 33.25247573852539, "learning_rate": 1.1581305207893708e-08, "loss": 0.29062171, "memory(GiB)": 34.88, "step": 144825, "train_speed(iter/s)": 0.41113 }, { "acc": 0.95180702, "epoch": 3.9214252836216934, "grad_norm": 8.208342552185059, "learning_rate": 1.154494990241463e-08, "loss": 0.29641671, "memory(GiB)": 34.88, "step": 144830, "train_speed(iter/s)": 0.411131 }, { "acc": 0.95614834, "epoch": 3.9215606639049088, "grad_norm": 8.352198600769043, "learning_rate": 1.1508657093401614e-08, "loss": 0.2634419, "memory(GiB)": 34.88, "step": 144835, "train_speed(iter/s)": 0.411132 }, { "acc": 0.94823456, "epoch": 3.9216960441881246, "grad_norm": 7.415560245513916, "learning_rate": 1.1472426781308146e-08, "loss": 0.34439297, "memory(GiB)": 34.88, "step": 144840, "train_speed(iter/s)": 0.411133 }, { "acc": 0.9484643, "epoch": 3.92183142447134, "grad_norm": 6.960803508758545, "learning_rate": 1.1436258966588812e-08, "loss": 0.28706753, "memory(GiB)": 34.88, "step": 144845, "train_speed(iter/s)": 0.411134 }, { "acc": 0.94219255, "epoch": 3.9219668047545557, "grad_norm": 6.903055667877197, "learning_rate": 1.1400153649695986e-08, "loss": 0.42862792, "memory(GiB)": 34.88, "step": 144850, "train_speed(iter/s)": 0.411134 }, { "acc": 0.94194202, "epoch": 3.922102185037771, "grad_norm": 13.676762580871582, "learning_rate": 1.1364110831082037e-08, "loss": 0.35403175, "memory(GiB)": 34.88, "step": 144855, "train_speed(iter/s)": 0.411135 }, { "acc": 0.94786577, "epoch": 3.9222375653209864, "grad_norm": 11.784008026123047, "learning_rate": 1.1328130511199335e-08, "loss": 0.25392356, "memory(GiB)": 34.88, "step": 144860, "train_speed(iter/s)": 0.411136 }, { "acc": 0.94548836, "epoch": 3.9223729456042022, "grad_norm": 5.5120697021484375, "learning_rate": 1.1292212690497476e-08, "loss": 0.32676511, "memory(GiB)": 34.88, "step": 144865, "train_speed(iter/s)": 0.411137 }, { "acc": 0.94724846, "epoch": 3.9225083258874176, "grad_norm": 5.949798583984375, "learning_rate": 1.1256357369427166e-08, "loss": 0.35032215, "memory(GiB)": 34.88, "step": 144870, "train_speed(iter/s)": 0.411137 }, { "acc": 0.95628719, "epoch": 3.9226437061706334, "grad_norm": 10.88342571258545, "learning_rate": 1.1220564548437445e-08, "loss": 0.25428648, "memory(GiB)": 34.88, "step": 144875, "train_speed(iter/s)": 0.411138 }, { "acc": 0.95757408, "epoch": 3.9227790864538488, "grad_norm": 3.0524110794067383, "learning_rate": 1.1184834227976797e-08, "loss": 0.29028482, "memory(GiB)": 34.88, "step": 144880, "train_speed(iter/s)": 0.411139 }, { "acc": 0.95786095, "epoch": 3.9229144667370646, "grad_norm": 4.292303562164307, "learning_rate": 1.1149166408492046e-08, "loss": 0.23581784, "memory(GiB)": 34.88, "step": 144885, "train_speed(iter/s)": 0.41114 }, { "acc": 0.96175156, "epoch": 3.92304984702028, "grad_norm": 17.0555477142334, "learning_rate": 1.1113561090430563e-08, "loss": 0.22962065, "memory(GiB)": 34.88, "step": 144890, "train_speed(iter/s)": 0.41114 }, { "acc": 0.95398502, "epoch": 3.9231852273034953, "grad_norm": 6.27984619140625, "learning_rate": 1.1078018274238614e-08, "loss": 0.24296927, "memory(GiB)": 34.88, "step": 144895, "train_speed(iter/s)": 0.411141 }, { "acc": 0.95167255, "epoch": 3.923320607586711, "grad_norm": 6.610672950744629, "learning_rate": 1.1042537960361354e-08, "loss": 0.27253838, "memory(GiB)": 34.88, "step": 144900, "train_speed(iter/s)": 0.411142 }, { "acc": 0.96553774, "epoch": 3.923455987869927, "grad_norm": 3.43632173538208, "learning_rate": 1.100712014924283e-08, "loss": 0.19459386, "memory(GiB)": 34.88, "step": 144905, "train_speed(iter/s)": 0.411142 }, { "acc": 0.9531292, "epoch": 3.9235913681531422, "grad_norm": 5.126562118530273, "learning_rate": 1.0971764841327082e-08, "loss": 0.2592881, "memory(GiB)": 34.88, "step": 144910, "train_speed(iter/s)": 0.411143 }, { "acc": 0.9474514, "epoch": 3.9237267484363576, "grad_norm": 9.911836624145508, "learning_rate": 1.0936472037056495e-08, "loss": 0.28276954, "memory(GiB)": 34.88, "step": 144915, "train_speed(iter/s)": 0.411144 }, { "acc": 0.9382102, "epoch": 3.9238621287195734, "grad_norm": 6.677847385406494, "learning_rate": 1.0901241736874002e-08, "loss": 0.42013206, "memory(GiB)": 34.88, "step": 144920, "train_speed(iter/s)": 0.411145 }, { "acc": 0.96372004, "epoch": 3.9239975090027888, "grad_norm": 3.6284546852111816, "learning_rate": 1.0866073941220314e-08, "loss": 0.18730564, "memory(GiB)": 34.88, "step": 144925, "train_speed(iter/s)": 0.411145 }, { "acc": 0.9590044, "epoch": 3.924132889286004, "grad_norm": 5.800140380859375, "learning_rate": 1.0830968650536149e-08, "loss": 0.26794462, "memory(GiB)": 34.88, "step": 144930, "train_speed(iter/s)": 0.411146 }, { "acc": 0.9450737, "epoch": 3.92426826956922, "grad_norm": 8.306562423706055, "learning_rate": 1.0795925865261113e-08, "loss": 0.36730547, "memory(GiB)": 34.88, "step": 144935, "train_speed(iter/s)": 0.411147 }, { "acc": 0.95726328, "epoch": 3.9244036498524357, "grad_norm": 3.370439052581787, "learning_rate": 1.0760945585834804e-08, "loss": 0.2696846, "memory(GiB)": 34.88, "step": 144940, "train_speed(iter/s)": 0.411148 }, { "acc": 0.96439152, "epoch": 3.924539030135651, "grad_norm": 8.598664283752441, "learning_rate": 1.0726027812694614e-08, "loss": 0.19358906, "memory(GiB)": 34.88, "step": 144945, "train_speed(iter/s)": 0.411148 }, { "acc": 0.95800457, "epoch": 3.9246744104188664, "grad_norm": 7.349966049194336, "learning_rate": 1.0691172546278476e-08, "loss": 0.26147943, "memory(GiB)": 34.88, "step": 144950, "train_speed(iter/s)": 0.411149 }, { "acc": 0.94186592, "epoch": 3.9248097907020822, "grad_norm": 6.607134819030762, "learning_rate": 1.0656379787022666e-08, "loss": 0.34045918, "memory(GiB)": 34.88, "step": 144955, "train_speed(iter/s)": 0.41115 }, { "acc": 0.94276085, "epoch": 3.9249451709852976, "grad_norm": 10.97901725769043, "learning_rate": 1.0621649535363458e-08, "loss": 0.36793013, "memory(GiB)": 34.88, "step": 144960, "train_speed(iter/s)": 0.411151 }, { "acc": 0.94898701, "epoch": 3.9250805512685134, "grad_norm": 5.989368438720703, "learning_rate": 1.0586981791735463e-08, "loss": 0.30453873, "memory(GiB)": 34.88, "step": 144965, "train_speed(iter/s)": 0.411152 }, { "acc": 0.94956903, "epoch": 3.9252159315517288, "grad_norm": 7.225808620452881, "learning_rate": 1.0552376556573288e-08, "loss": 0.32209301, "memory(GiB)": 34.88, "step": 144970, "train_speed(iter/s)": 0.411152 }, { "acc": 0.95958128, "epoch": 3.9253513118349446, "grad_norm": 3.991250991821289, "learning_rate": 1.0517833830310429e-08, "loss": 0.2435205, "memory(GiB)": 34.88, "step": 144975, "train_speed(iter/s)": 0.411153 }, { "acc": 0.94400635, "epoch": 3.92548669211816, "grad_norm": 19.143030166625977, "learning_rate": 1.0483353613379834e-08, "loss": 0.3933651, "memory(GiB)": 34.88, "step": 144980, "train_speed(iter/s)": 0.411154 }, { "acc": 0.95942755, "epoch": 3.9256220724013753, "grad_norm": 11.432997703552246, "learning_rate": 1.0448935906212225e-08, "loss": 0.22665734, "memory(GiB)": 34.88, "step": 144985, "train_speed(iter/s)": 0.411155 }, { "acc": 0.94251423, "epoch": 3.925757452684591, "grad_norm": 6.030850887298584, "learning_rate": 1.0414580709240545e-08, "loss": 0.34341025, "memory(GiB)": 34.88, "step": 144990, "train_speed(iter/s)": 0.411156 }, { "acc": 0.95436678, "epoch": 3.9258928329678064, "grad_norm": 4.2579755783081055, "learning_rate": 1.0380288022893297e-08, "loss": 0.31254478, "memory(GiB)": 34.88, "step": 144995, "train_speed(iter/s)": 0.411156 }, { "acc": 0.95475531, "epoch": 3.9260282132510222, "grad_norm": 4.813900470733643, "learning_rate": 1.0346057847601763e-08, "loss": 0.27325995, "memory(GiB)": 34.88, "step": 145000, "train_speed(iter/s)": 0.411157 }, { "acc": 0.95118008, "epoch": 3.9261635935342376, "grad_norm": 7.378691673278809, "learning_rate": 1.0311890183793331e-08, "loss": 0.30664525, "memory(GiB)": 34.88, "step": 145005, "train_speed(iter/s)": 0.411158 }, { "acc": 0.9438158, "epoch": 3.9262989738174534, "grad_norm": 9.108192443847656, "learning_rate": 1.0277785031897617e-08, "loss": 0.35134811, "memory(GiB)": 34.88, "step": 145010, "train_speed(iter/s)": 0.411159 }, { "acc": 0.95462351, "epoch": 3.9264343541006688, "grad_norm": 4.90524435043335, "learning_rate": 1.024374239234035e-08, "loss": 0.26548071, "memory(GiB)": 34.88, "step": 145015, "train_speed(iter/s)": 0.411159 }, { "acc": 0.95611401, "epoch": 3.926569734383884, "grad_norm": 6.669713973999023, "learning_rate": 1.0209762265548366e-08, "loss": 0.23869004, "memory(GiB)": 34.88, "step": 145020, "train_speed(iter/s)": 0.41116 }, { "acc": 0.97305498, "epoch": 3.9267051146671, "grad_norm": 4.4129509925842285, "learning_rate": 1.0175844651947948e-08, "loss": 0.16461359, "memory(GiB)": 34.88, "step": 145025, "train_speed(iter/s)": 0.411161 }, { "acc": 0.9454752, "epoch": 3.9268404949503153, "grad_norm": 10.34423828125, "learning_rate": 1.0141989551963161e-08, "loss": 0.31445775, "memory(GiB)": 34.88, "step": 145030, "train_speed(iter/s)": 0.411162 }, { "acc": 0.94064655, "epoch": 3.926975875233531, "grad_norm": 5.451000213623047, "learning_rate": 1.010819696601862e-08, "loss": 0.31919315, "memory(GiB)": 34.88, "step": 145035, "train_speed(iter/s)": 0.411162 }, { "acc": 0.9542943, "epoch": 3.9271112555167464, "grad_norm": 2.7636585235595703, "learning_rate": 1.0074466894537279e-08, "loss": 0.33097448, "memory(GiB)": 34.88, "step": 145040, "train_speed(iter/s)": 0.411163 }, { "acc": 0.94526634, "epoch": 3.9272466357999622, "grad_norm": 5.180414199829102, "learning_rate": 1.004079933794209e-08, "loss": 0.33057022, "memory(GiB)": 34.88, "step": 145045, "train_speed(iter/s)": 0.411164 }, { "acc": 0.9619935, "epoch": 3.9273820160831776, "grad_norm": 3.1342673301696777, "learning_rate": 1.0007194296654899e-08, "loss": 0.20248394, "memory(GiB)": 34.88, "step": 145050, "train_speed(iter/s)": 0.411165 }, { "acc": 0.95715923, "epoch": 3.927517396366393, "grad_norm": 5.299818992614746, "learning_rate": 9.973651771096435e-09, "loss": 0.26890523, "memory(GiB)": 34.88, "step": 145055, "train_speed(iter/s)": 0.411165 }, { "acc": 0.9483633, "epoch": 3.9276527766496088, "grad_norm": 7.908660888671875, "learning_rate": 9.94017176168632e-09, "loss": 0.39829624, "memory(GiB)": 34.88, "step": 145060, "train_speed(iter/s)": 0.411166 }, { "acc": 0.9476203, "epoch": 3.9277881569328246, "grad_norm": 5.2217607498168945, "learning_rate": 9.906754268844732e-09, "loss": 0.3696687, "memory(GiB)": 34.88, "step": 145065, "train_speed(iter/s)": 0.411167 }, { "acc": 0.95117769, "epoch": 3.92792353721604, "grad_norm": 3.5105228424072266, "learning_rate": 9.873399292990185e-09, "loss": 0.37044833, "memory(GiB)": 34.88, "step": 145070, "train_speed(iter/s)": 0.411168 }, { "acc": 0.95181408, "epoch": 3.9280589174992553, "grad_norm": 8.762616157531738, "learning_rate": 9.84010683454008e-09, "loss": 0.24667258, "memory(GiB)": 34.88, "step": 145075, "train_speed(iter/s)": 0.411169 }, { "acc": 0.94650993, "epoch": 3.928194297782471, "grad_norm": 5.280268669128418, "learning_rate": 9.806876893912371e-09, "loss": 0.31421399, "memory(GiB)": 34.88, "step": 145080, "train_speed(iter/s)": 0.411169 }, { "acc": 0.94434891, "epoch": 3.9283296780656864, "grad_norm": 13.200051307678223, "learning_rate": 9.773709471522245e-09, "loss": 0.45373454, "memory(GiB)": 34.88, "step": 145085, "train_speed(iter/s)": 0.41117 }, { "acc": 0.94984016, "epoch": 3.928465058348902, "grad_norm": 3.852590560913086, "learning_rate": 9.740604567785438e-09, "loss": 0.30445738, "memory(GiB)": 34.88, "step": 145090, "train_speed(iter/s)": 0.411171 }, { "acc": 0.96143723, "epoch": 3.9286004386321176, "grad_norm": 4.239938735961914, "learning_rate": 9.70756218311713e-09, "loss": 0.2540988, "memory(GiB)": 34.88, "step": 145095, "train_speed(iter/s)": 0.411172 }, { "acc": 0.94936714, "epoch": 3.9287358189153334, "grad_norm": 6.6201701164245605, "learning_rate": 9.674582317931393e-09, "loss": 0.29097528, "memory(GiB)": 34.88, "step": 145100, "train_speed(iter/s)": 0.411172 }, { "acc": 0.94530611, "epoch": 3.9288711991985488, "grad_norm": 8.57754135131836, "learning_rate": 9.641664972640634e-09, "loss": 0.34957671, "memory(GiB)": 34.88, "step": 145105, "train_speed(iter/s)": 0.411173 }, { "acc": 0.94788704, "epoch": 3.929006579481764, "grad_norm": 5.645911693572998, "learning_rate": 9.608810147657259e-09, "loss": 0.35354905, "memory(GiB)": 34.88, "step": 145110, "train_speed(iter/s)": 0.411174 }, { "acc": 0.9617301, "epoch": 3.92914195976498, "grad_norm": 5.4661736488342285, "learning_rate": 9.576017843393676e-09, "loss": 0.22101159, "memory(GiB)": 34.88, "step": 145115, "train_speed(iter/s)": 0.411175 }, { "acc": 0.95688057, "epoch": 3.9292773400481953, "grad_norm": 4.362009048461914, "learning_rate": 9.543288060259516e-09, "loss": 0.26748104, "memory(GiB)": 34.88, "step": 145120, "train_speed(iter/s)": 0.411175 }, { "acc": 0.96137962, "epoch": 3.929412720331411, "grad_norm": 5.415336608886719, "learning_rate": 9.510620798666075e-09, "loss": 0.20245852, "memory(GiB)": 34.88, "step": 145125, "train_speed(iter/s)": 0.411176 }, { "acc": 0.9350317, "epoch": 3.9295481006146264, "grad_norm": 4.102105140686035, "learning_rate": 9.47801605902132e-09, "loss": 0.39345334, "memory(GiB)": 34.88, "step": 145130, "train_speed(iter/s)": 0.411177 }, { "acc": 0.95088472, "epoch": 3.9296834808978423, "grad_norm": 5.085043907165527, "learning_rate": 9.445473841734326e-09, "loss": 0.33841219, "memory(GiB)": 34.88, "step": 145135, "train_speed(iter/s)": 0.411178 }, { "acc": 0.94192438, "epoch": 3.9298188611810576, "grad_norm": 5.165767192840576, "learning_rate": 9.412994147213058e-09, "loss": 0.37688961, "memory(GiB)": 34.88, "step": 145140, "train_speed(iter/s)": 0.411179 }, { "acc": 0.95683794, "epoch": 3.929954241464273, "grad_norm": 7.3195881843566895, "learning_rate": 9.380576975863266e-09, "loss": 0.29872341, "memory(GiB)": 34.88, "step": 145145, "train_speed(iter/s)": 0.411179 }, { "acc": 0.94441652, "epoch": 3.9300896217474888, "grad_norm": 4.6402788162231445, "learning_rate": 9.348222328092357e-09, "loss": 0.33533521, "memory(GiB)": 34.88, "step": 145150, "train_speed(iter/s)": 0.41118 }, { "acc": 0.94635582, "epoch": 3.930225002030704, "grad_norm": 6.329823970794678, "learning_rate": 9.31593020430497e-09, "loss": 0.3254632, "memory(GiB)": 34.88, "step": 145155, "train_speed(iter/s)": 0.411181 }, { "acc": 0.95382786, "epoch": 3.93036038231392, "grad_norm": 7.961163520812988, "learning_rate": 9.283700604905738e-09, "loss": 0.24016144, "memory(GiB)": 34.88, "step": 145160, "train_speed(iter/s)": 0.411182 }, { "acc": 0.94874878, "epoch": 3.9304957625971353, "grad_norm": 4.563310146331787, "learning_rate": 9.251533530298189e-09, "loss": 0.32145932, "memory(GiB)": 34.88, "step": 145165, "train_speed(iter/s)": 0.411183 }, { "acc": 0.94107132, "epoch": 3.930631142880351, "grad_norm": 5.0465826988220215, "learning_rate": 9.219428980885849e-09, "loss": 0.35017896, "memory(GiB)": 34.88, "step": 145170, "train_speed(iter/s)": 0.411183 }, { "acc": 0.94736042, "epoch": 3.9307665231635665, "grad_norm": 2.4245193004608154, "learning_rate": 9.18738695707002e-09, "loss": 0.2952348, "memory(GiB)": 34.88, "step": 145175, "train_speed(iter/s)": 0.411184 }, { "acc": 0.94551582, "epoch": 3.930901903446782, "grad_norm": 13.119233131408691, "learning_rate": 9.155407459253122e-09, "loss": 0.330305, "memory(GiB)": 34.88, "step": 145180, "train_speed(iter/s)": 0.411185 }, { "acc": 0.95001059, "epoch": 3.9310372837299976, "grad_norm": 5.1602253913879395, "learning_rate": 9.123490487834791e-09, "loss": 0.30916753, "memory(GiB)": 34.88, "step": 145185, "train_speed(iter/s)": 0.411185 }, { "acc": 0.95218563, "epoch": 3.931172664013213, "grad_norm": 9.57368278503418, "learning_rate": 9.091636043215227e-09, "loss": 0.20671654, "memory(GiB)": 34.88, "step": 145190, "train_speed(iter/s)": 0.411186 }, { "acc": 0.95432358, "epoch": 3.9313080442964288, "grad_norm": 17.02521324157715, "learning_rate": 9.059844125794065e-09, "loss": 0.27666972, "memory(GiB)": 34.88, "step": 145195, "train_speed(iter/s)": 0.411187 }, { "acc": 0.94995041, "epoch": 3.931443424579644, "grad_norm": 8.199732780456543, "learning_rate": 9.028114735968175e-09, "loss": 0.30503576, "memory(GiB)": 34.88, "step": 145200, "train_speed(iter/s)": 0.411188 }, { "acc": 0.9495739, "epoch": 3.93157880486286, "grad_norm": 7.354626655578613, "learning_rate": 8.99644787413664e-09, "loss": 0.28830421, "memory(GiB)": 34.88, "step": 145205, "train_speed(iter/s)": 0.411188 }, { "acc": 0.9458086, "epoch": 3.9317141851460753, "grad_norm": 8.502373695373535, "learning_rate": 8.964843540695216e-09, "loss": 0.35132883, "memory(GiB)": 34.88, "step": 145210, "train_speed(iter/s)": 0.411189 }, { "acc": 0.94742861, "epoch": 3.9318495654292906, "grad_norm": 7.138285160064697, "learning_rate": 8.933301736039655e-09, "loss": 0.34017148, "memory(GiB)": 34.88, "step": 145215, "train_speed(iter/s)": 0.41119 }, { "acc": 0.96152248, "epoch": 3.9319849457125065, "grad_norm": 7.4657979011535645, "learning_rate": 8.901822460565715e-09, "loss": 0.22523968, "memory(GiB)": 34.88, "step": 145220, "train_speed(iter/s)": 0.411191 }, { "acc": 0.93412704, "epoch": 3.9321203259957223, "grad_norm": 14.233712196350098, "learning_rate": 8.870405714667487e-09, "loss": 0.43872719, "memory(GiB)": 34.88, "step": 145225, "train_speed(iter/s)": 0.411191 }, { "acc": 0.96361237, "epoch": 3.9322557062789376, "grad_norm": 3.3146615028381348, "learning_rate": 8.8390514987385e-09, "loss": 0.18949475, "memory(GiB)": 34.88, "step": 145230, "train_speed(iter/s)": 0.411192 }, { "acc": 0.93903809, "epoch": 3.932391086562153, "grad_norm": 11.87862777709961, "learning_rate": 8.807759813170629e-09, "loss": 0.40517149, "memory(GiB)": 34.88, "step": 145235, "train_speed(iter/s)": 0.411193 }, { "acc": 0.93938313, "epoch": 3.9325264668453688, "grad_norm": 8.489248275756836, "learning_rate": 8.776530658357962e-09, "loss": 0.36902871, "memory(GiB)": 34.88, "step": 145240, "train_speed(iter/s)": 0.411194 }, { "acc": 0.95313139, "epoch": 3.932661847128584, "grad_norm": 7.175848007202148, "learning_rate": 8.745364034689591e-09, "loss": 0.27314928, "memory(GiB)": 34.88, "step": 145245, "train_speed(iter/s)": 0.411194 }, { "acc": 0.94943123, "epoch": 3.9327972274117995, "grad_norm": 6.116784572601318, "learning_rate": 8.71425994255739e-09, "loss": 0.30978999, "memory(GiB)": 34.88, "step": 145250, "train_speed(iter/s)": 0.411195 }, { "acc": 0.95291166, "epoch": 3.9329326076950153, "grad_norm": 9.049723625183105, "learning_rate": 8.683218382349894e-09, "loss": 0.34888148, "memory(GiB)": 34.88, "step": 145255, "train_speed(iter/s)": 0.411196 }, { "acc": 0.9435051, "epoch": 3.933067987978231, "grad_norm": 3.9582910537719727, "learning_rate": 8.652239354456756e-09, "loss": 0.29153743, "memory(GiB)": 34.88, "step": 145260, "train_speed(iter/s)": 0.411197 }, { "acc": 0.9485218, "epoch": 3.9332033682614465, "grad_norm": 4.414675235748291, "learning_rate": 8.621322859265956e-09, "loss": 0.33182836, "memory(GiB)": 34.88, "step": 145265, "train_speed(iter/s)": 0.411198 }, { "acc": 0.94279432, "epoch": 3.933338748544662, "grad_norm": 6.021327495574951, "learning_rate": 8.590468897163818e-09, "loss": 0.42663984, "memory(GiB)": 34.88, "step": 145270, "train_speed(iter/s)": 0.411198 }, { "acc": 0.95649529, "epoch": 3.9334741288278776, "grad_norm": 7.0720391273498535, "learning_rate": 8.55967746853832e-09, "loss": 0.25864449, "memory(GiB)": 34.88, "step": 145275, "train_speed(iter/s)": 0.411199 }, { "acc": 0.94868565, "epoch": 3.933609509111093, "grad_norm": 7.595559597015381, "learning_rate": 8.528948573774677e-09, "loss": 0.38291607, "memory(GiB)": 34.88, "step": 145280, "train_speed(iter/s)": 0.4112 }, { "acc": 0.95700836, "epoch": 3.9337448893943088, "grad_norm": 5.647276401519775, "learning_rate": 8.498282213257538e-09, "loss": 0.22966592, "memory(GiB)": 34.88, "step": 145285, "train_speed(iter/s)": 0.411201 }, { "acc": 0.95131245, "epoch": 3.933880269677524, "grad_norm": 10.798325538635254, "learning_rate": 8.467678387371005e-09, "loss": 0.30344188, "memory(GiB)": 34.88, "step": 145290, "train_speed(iter/s)": 0.411201 }, { "acc": 0.95201702, "epoch": 3.93401564996074, "grad_norm": 5.749500751495361, "learning_rate": 8.437137096498621e-09, "loss": 0.2727355, "memory(GiB)": 34.88, "step": 145295, "train_speed(iter/s)": 0.411202 }, { "acc": 0.94897156, "epoch": 3.9341510302439553, "grad_norm": 5.679221153259277, "learning_rate": 8.406658341022817e-09, "loss": 0.28716643, "memory(GiB)": 34.88, "step": 145300, "train_speed(iter/s)": 0.411203 }, { "acc": 0.92943316, "epoch": 3.9342864105271707, "grad_norm": 14.392196655273438, "learning_rate": 8.376242121326031e-09, "loss": 0.41886787, "memory(GiB)": 34.88, "step": 145305, "train_speed(iter/s)": 0.411204 }, { "acc": 0.94264593, "epoch": 3.9344217908103865, "grad_norm": 4.786600112915039, "learning_rate": 8.345888437788472e-09, "loss": 0.35796645, "memory(GiB)": 34.88, "step": 145310, "train_speed(iter/s)": 0.411204 }, { "acc": 0.95232744, "epoch": 3.934557171093602, "grad_norm": 6.761532783508301, "learning_rate": 8.315597290790913e-09, "loss": 0.29794707, "memory(GiB)": 34.88, "step": 145315, "train_speed(iter/s)": 0.411205 }, { "acc": 0.95106087, "epoch": 3.9346925513768176, "grad_norm": 4.481639862060547, "learning_rate": 8.285368680712452e-09, "loss": 0.34398363, "memory(GiB)": 34.88, "step": 145320, "train_speed(iter/s)": 0.411206 }, { "acc": 0.9532793, "epoch": 3.934827931660033, "grad_norm": 5.41131067276001, "learning_rate": 8.255202607931642e-09, "loss": 0.28145308, "memory(GiB)": 34.88, "step": 145325, "train_speed(iter/s)": 0.411207 }, { "acc": 0.95564442, "epoch": 3.9349633119432488, "grad_norm": 6.464156150817871, "learning_rate": 8.225099072827027e-09, "loss": 0.27576776, "memory(GiB)": 34.88, "step": 145330, "train_speed(iter/s)": 0.411208 }, { "acc": 0.94371157, "epoch": 3.935098692226464, "grad_norm": 5.523088455200195, "learning_rate": 8.195058075775493e-09, "loss": 0.34941776, "memory(GiB)": 34.88, "step": 145335, "train_speed(iter/s)": 0.411208 }, { "acc": 0.94011879, "epoch": 3.9352340725096795, "grad_norm": 9.56696605682373, "learning_rate": 8.165079617153368e-09, "loss": 0.37596736, "memory(GiB)": 34.88, "step": 145340, "train_speed(iter/s)": 0.411209 }, { "acc": 0.94545555, "epoch": 3.9353694527928953, "grad_norm": 5.360219955444336, "learning_rate": 8.135163697335313e-09, "loss": 0.35730217, "memory(GiB)": 34.88, "step": 145345, "train_speed(iter/s)": 0.41121 }, { "acc": 0.94119053, "epoch": 3.9355048330761107, "grad_norm": 11.83139705657959, "learning_rate": 8.105310316697657e-09, "loss": 0.37055051, "memory(GiB)": 34.88, "step": 145350, "train_speed(iter/s)": 0.411211 }, { "acc": 0.95119209, "epoch": 3.9356402133593265, "grad_norm": 10.967248916625977, "learning_rate": 8.075519475612842e-09, "loss": 0.29384112, "memory(GiB)": 34.88, "step": 145355, "train_speed(iter/s)": 0.411211 }, { "acc": 0.95552635, "epoch": 3.935775593642542, "grad_norm": 7.6447601318359375, "learning_rate": 8.045791174454978e-09, "loss": 0.26966152, "memory(GiB)": 34.88, "step": 145360, "train_speed(iter/s)": 0.411212 }, { "acc": 0.94547319, "epoch": 3.9359109739257576, "grad_norm": 8.046186447143555, "learning_rate": 8.016125413596504e-09, "loss": 0.33799303, "memory(GiB)": 34.88, "step": 145365, "train_speed(iter/s)": 0.411213 }, { "acc": 0.942764, "epoch": 3.936046354208973, "grad_norm": 9.162327766418457, "learning_rate": 7.9865221934082e-09, "loss": 0.35531139, "memory(GiB)": 34.88, "step": 145370, "train_speed(iter/s)": 0.411214 }, { "acc": 0.94577065, "epoch": 3.9361817344921883, "grad_norm": 7.72119665145874, "learning_rate": 7.95698151426195e-09, "loss": 0.33540988, "memory(GiB)": 34.88, "step": 145375, "train_speed(iter/s)": 0.411214 }, { "acc": 0.94780922, "epoch": 3.936317114775404, "grad_norm": 42.92070770263672, "learning_rate": 7.927503376527427e-09, "loss": 0.36276541, "memory(GiB)": 34.88, "step": 145380, "train_speed(iter/s)": 0.411215 }, { "acc": 0.94571533, "epoch": 3.93645249505862, "grad_norm": 10.435545921325684, "learning_rate": 7.898087780573184e-09, "loss": 0.36488857, "memory(GiB)": 34.88, "step": 145385, "train_speed(iter/s)": 0.411216 }, { "acc": 0.95362101, "epoch": 3.9365878753418353, "grad_norm": 6.906425476074219, "learning_rate": 7.86873472676889e-09, "loss": 0.30417802, "memory(GiB)": 34.88, "step": 145390, "train_speed(iter/s)": 0.411217 }, { "acc": 0.95956249, "epoch": 3.9367232556250507, "grad_norm": 4.60251522064209, "learning_rate": 7.839444215481437e-09, "loss": 0.2669626, "memory(GiB)": 34.88, "step": 145395, "train_speed(iter/s)": 0.411217 }, { "acc": 0.9385498, "epoch": 3.9368586359082665, "grad_norm": 4.277284622192383, "learning_rate": 7.81021624707827e-09, "loss": 0.3919699, "memory(GiB)": 34.88, "step": 145400, "train_speed(iter/s)": 0.411218 }, { "acc": 0.95757122, "epoch": 3.936994016191482, "grad_norm": 3.8148200511932373, "learning_rate": 7.781050821925173e-09, "loss": 0.26012228, "memory(GiB)": 34.88, "step": 145405, "train_speed(iter/s)": 0.411219 }, { "acc": 0.9557313, "epoch": 3.937129396474697, "grad_norm": 7.841725826263428, "learning_rate": 7.751947940387372e-09, "loss": 0.26783373, "memory(GiB)": 34.88, "step": 145410, "train_speed(iter/s)": 0.41122 }, { "acc": 0.93287029, "epoch": 3.937264776757913, "grad_norm": 4.792372226715088, "learning_rate": 7.72290760282954e-09, "loss": 0.38109896, "memory(GiB)": 34.88, "step": 145415, "train_speed(iter/s)": 0.41122 }, { "acc": 0.94569092, "epoch": 3.937400157041129, "grad_norm": 7.105413913726807, "learning_rate": 7.693929809616347e-09, "loss": 0.34486773, "memory(GiB)": 34.88, "step": 145420, "train_speed(iter/s)": 0.411221 }, { "acc": 0.9568491, "epoch": 3.937535537324344, "grad_norm": 4.707596302032471, "learning_rate": 7.665014561109135e-09, "loss": 0.28321202, "memory(GiB)": 34.88, "step": 145425, "train_speed(iter/s)": 0.411222 }, { "acc": 0.9549118, "epoch": 3.9376709176075595, "grad_norm": 6.111218452453613, "learning_rate": 7.636161857671467e-09, "loss": 0.19203024, "memory(GiB)": 34.88, "step": 145430, "train_speed(iter/s)": 0.411222 }, { "acc": 0.94402742, "epoch": 3.9378062978907753, "grad_norm": 6.583484172821045, "learning_rate": 7.607371699664128e-09, "loss": 0.43213286, "memory(GiB)": 34.88, "step": 145435, "train_speed(iter/s)": 0.411223 }, { "acc": 0.96551485, "epoch": 3.9379416781739907, "grad_norm": 10.769896507263184, "learning_rate": 7.57864408744846e-09, "loss": 0.21862555, "memory(GiB)": 34.88, "step": 145440, "train_speed(iter/s)": 0.411224 }, { "acc": 0.95305233, "epoch": 3.9380770584572065, "grad_norm": 3.755115509033203, "learning_rate": 7.549979021383029e-09, "loss": 0.28519111, "memory(GiB)": 34.88, "step": 145445, "train_speed(iter/s)": 0.411225 }, { "acc": 0.9653985, "epoch": 3.938212438740422, "grad_norm": 5.930480003356934, "learning_rate": 7.521376501828621e-09, "loss": 0.22980742, "memory(GiB)": 34.88, "step": 145450, "train_speed(iter/s)": 0.411226 }, { "acc": 0.94997568, "epoch": 3.9383478190236376, "grad_norm": 3.6643178462982178, "learning_rate": 7.492836529142138e-09, "loss": 0.297083, "memory(GiB)": 34.88, "step": 145455, "train_speed(iter/s)": 0.411226 }, { "acc": 0.94036522, "epoch": 3.938483199306853, "grad_norm": 4.543370723724365, "learning_rate": 7.464359103681591e-09, "loss": 0.34989088, "memory(GiB)": 34.88, "step": 145460, "train_speed(iter/s)": 0.411227 }, { "acc": 0.95351944, "epoch": 3.9386185795900683, "grad_norm": 10.83403491973877, "learning_rate": 7.435944225803879e-09, "loss": 0.30723615, "memory(GiB)": 34.88, "step": 145465, "train_speed(iter/s)": 0.411228 }, { "acc": 0.94406509, "epoch": 3.938753959873284, "grad_norm": 3.612537145614624, "learning_rate": 7.4075918958647966e-09, "loss": 0.41841855, "memory(GiB)": 34.88, "step": 145470, "train_speed(iter/s)": 0.411229 }, { "acc": 0.95536852, "epoch": 3.9388893401564995, "grad_norm": 4.069185733795166, "learning_rate": 7.379302114219575e-09, "loss": 0.26151297, "memory(GiB)": 34.88, "step": 145475, "train_speed(iter/s)": 0.411229 }, { "acc": 0.9552001, "epoch": 3.9390247204397153, "grad_norm": 4.507733345031738, "learning_rate": 7.351074881222345e-09, "loss": 0.27655144, "memory(GiB)": 34.88, "step": 145480, "train_speed(iter/s)": 0.41123 }, { "acc": 0.94690037, "epoch": 3.9391601007229307, "grad_norm": 9.212708473205566, "learning_rate": 7.322910197227228e-09, "loss": 0.30069509, "memory(GiB)": 34.88, "step": 145485, "train_speed(iter/s)": 0.411231 }, { "acc": 0.96512213, "epoch": 3.9392954810061465, "grad_norm": 3.719412326812744, "learning_rate": 7.294808062586687e-09, "loss": 0.24295485, "memory(GiB)": 34.88, "step": 145490, "train_speed(iter/s)": 0.411232 }, { "acc": 0.94260521, "epoch": 3.939430861289362, "grad_norm": 25.495159149169922, "learning_rate": 7.266768477652627e-09, "loss": 0.35497458, "memory(GiB)": 34.88, "step": 145495, "train_speed(iter/s)": 0.411232 }, { "acc": 0.95716515, "epoch": 3.939566241572577, "grad_norm": 13.61359977722168, "learning_rate": 7.238791442776953e-09, "loss": 0.22469571, "memory(GiB)": 34.88, "step": 145500, "train_speed(iter/s)": 0.411233 }, { "acc": 0.93438997, "epoch": 3.939701621855793, "grad_norm": 8.76844310760498, "learning_rate": 7.210876958308796e-09, "loss": 0.43865948, "memory(GiB)": 34.88, "step": 145505, "train_speed(iter/s)": 0.411234 }, { "acc": 0.94964533, "epoch": 3.9398370021390083, "grad_norm": 3.999896764755249, "learning_rate": 7.1830250245995054e-09, "loss": 0.2830179, "memory(GiB)": 34.88, "step": 145510, "train_speed(iter/s)": 0.411235 }, { "acc": 0.94912043, "epoch": 3.939972382422224, "grad_norm": 5.983317852020264, "learning_rate": 7.155235641997103e-09, "loss": 0.28508234, "memory(GiB)": 34.88, "step": 145515, "train_speed(iter/s)": 0.411235 }, { "acc": 0.94770174, "epoch": 3.9401077627054395, "grad_norm": 6.961846828460693, "learning_rate": 7.127508810849051e-09, "loss": 0.34698086, "memory(GiB)": 34.88, "step": 145520, "train_speed(iter/s)": 0.411236 }, { "acc": 0.95103016, "epoch": 3.9402431429886553, "grad_norm": 12.651910781860352, "learning_rate": 7.099844531503928e-09, "loss": 0.28923664, "memory(GiB)": 34.88, "step": 145525, "train_speed(iter/s)": 0.411237 }, { "acc": 0.94433708, "epoch": 3.9403785232718707, "grad_norm": 6.916222095489502, "learning_rate": 7.0722428043080865e-09, "loss": 0.38114474, "memory(GiB)": 34.88, "step": 145530, "train_speed(iter/s)": 0.411237 }, { "acc": 0.96317768, "epoch": 3.940513903555086, "grad_norm": 6.072014808654785, "learning_rate": 7.044703629606217e-09, "loss": 0.21940458, "memory(GiB)": 34.88, "step": 145535, "train_speed(iter/s)": 0.411238 }, { "acc": 0.95721207, "epoch": 3.940649283838302, "grad_norm": 5.061744213104248, "learning_rate": 7.017227007744675e-09, "loss": 0.23916905, "memory(GiB)": 34.88, "step": 145540, "train_speed(iter/s)": 0.411239 }, { "acc": 0.9574276, "epoch": 3.940784664121517, "grad_norm": 3.0844080448150635, "learning_rate": 6.98981293906704e-09, "loss": 0.26521301, "memory(GiB)": 34.88, "step": 145545, "train_speed(iter/s)": 0.41124 }, { "acc": 0.95369263, "epoch": 3.940920044404733, "grad_norm": 4.360368728637695, "learning_rate": 6.962461423916335e-09, "loss": 0.29424825, "memory(GiB)": 34.88, "step": 145550, "train_speed(iter/s)": 0.41124 }, { "acc": 0.95129147, "epoch": 3.9410554246879483, "grad_norm": 16.192312240600586, "learning_rate": 6.935172462635587e-09, "loss": 0.30585515, "memory(GiB)": 34.88, "step": 145555, "train_speed(iter/s)": 0.411241 }, { "acc": 0.94094467, "epoch": 3.941190804971164, "grad_norm": 13.517410278320312, "learning_rate": 6.907946055567264e-09, "loss": 0.35915918, "memory(GiB)": 34.88, "step": 145560, "train_speed(iter/s)": 0.411242 }, { "acc": 0.95268784, "epoch": 3.9413261852543795, "grad_norm": 13.297431945800781, "learning_rate": 6.880782203051061e-09, "loss": 0.25000873, "memory(GiB)": 34.88, "step": 145565, "train_speed(iter/s)": 0.411243 }, { "acc": 0.95541248, "epoch": 3.941461565537595, "grad_norm": 5.062201976776123, "learning_rate": 6.853680905428337e-09, "loss": 0.25165265, "memory(GiB)": 34.88, "step": 145570, "train_speed(iter/s)": 0.411244 }, { "acc": 0.95021992, "epoch": 3.9415969458208107, "grad_norm": 3.5442261695861816, "learning_rate": 6.8266421630382315e-09, "loss": 0.33133173, "memory(GiB)": 34.88, "step": 145575, "train_speed(iter/s)": 0.411244 }, { "acc": 0.93848763, "epoch": 3.9417323261040265, "grad_norm": 19.68655776977539, "learning_rate": 6.799665976219328e-09, "loss": 0.42566333, "memory(GiB)": 34.88, "step": 145580, "train_speed(iter/s)": 0.411245 }, { "acc": 0.95395145, "epoch": 3.941867706387242, "grad_norm": 6.854740142822266, "learning_rate": 6.772752345310211e-09, "loss": 0.2890919, "memory(GiB)": 34.88, "step": 145585, "train_speed(iter/s)": 0.411246 }, { "acc": 0.93565426, "epoch": 3.942003086670457, "grad_norm": 10.35285472869873, "learning_rate": 6.7459012706466916e-09, "loss": 0.4761425, "memory(GiB)": 34.88, "step": 145590, "train_speed(iter/s)": 0.411246 }, { "acc": 0.94751167, "epoch": 3.942138466953673, "grad_norm": 16.89838409423828, "learning_rate": 6.719112752566795e-09, "loss": 0.32659321, "memory(GiB)": 34.88, "step": 145595, "train_speed(iter/s)": 0.411247 }, { "acc": 0.93667326, "epoch": 3.9422738472368883, "grad_norm": 11.154122352600098, "learning_rate": 6.6923867914046675e-09, "loss": 0.3933286, "memory(GiB)": 34.88, "step": 145600, "train_speed(iter/s)": 0.411248 }, { "acc": 0.95347061, "epoch": 3.9424092275201037, "grad_norm": 7.247643947601318, "learning_rate": 6.6657233874961164e-09, "loss": 0.25407314, "memory(GiB)": 34.88, "step": 145605, "train_speed(iter/s)": 0.411249 }, { "acc": 0.94243279, "epoch": 3.9425446078033195, "grad_norm": 11.245746612548828, "learning_rate": 6.639122541174733e-09, "loss": 0.36606388, "memory(GiB)": 34.88, "step": 145610, "train_speed(iter/s)": 0.411249 }, { "acc": 0.94958086, "epoch": 3.9426799880865353, "grad_norm": 14.504217147827148, "learning_rate": 6.612584252773548e-09, "loss": 0.33343334, "memory(GiB)": 34.88, "step": 145615, "train_speed(iter/s)": 0.41125 }, { "acc": 0.9550602, "epoch": 3.9428153683697507, "grad_norm": 5.00182580947876, "learning_rate": 6.586108522625041e-09, "loss": 0.29534154, "memory(GiB)": 34.88, "step": 145620, "train_speed(iter/s)": 0.411251 }, { "acc": 0.94580078, "epoch": 3.942950748652966, "grad_norm": 6.469937801361084, "learning_rate": 6.5596953510616915e-09, "loss": 0.30394425, "memory(GiB)": 34.88, "step": 145625, "train_speed(iter/s)": 0.411252 }, { "acc": 0.96234283, "epoch": 3.943086128936182, "grad_norm": 3.7121963500976562, "learning_rate": 6.533344738413201e-09, "loss": 0.25827448, "memory(GiB)": 34.88, "step": 145630, "train_speed(iter/s)": 0.411252 }, { "acc": 0.96237392, "epoch": 3.943221509219397, "grad_norm": 8.768507957458496, "learning_rate": 6.5070566850103845e-09, "loss": 0.26969872, "memory(GiB)": 34.88, "step": 145635, "train_speed(iter/s)": 0.411253 }, { "acc": 0.95183659, "epoch": 3.943356889502613, "grad_norm": 7.672860145568848, "learning_rate": 6.480831191182388e-09, "loss": 0.34336915, "memory(GiB)": 34.88, "step": 145640, "train_speed(iter/s)": 0.411254 }, { "acc": 0.94935265, "epoch": 3.9434922697858283, "grad_norm": 4.938450336456299, "learning_rate": 6.454668257257253e-09, "loss": 0.29865854, "memory(GiB)": 34.88, "step": 145645, "train_speed(iter/s)": 0.411255 }, { "acc": 0.95070324, "epoch": 3.943627650069044, "grad_norm": 10.646568298339844, "learning_rate": 6.428567883563569e-09, "loss": 0.32376404, "memory(GiB)": 34.88, "step": 145650, "train_speed(iter/s)": 0.411255 }, { "acc": 0.96185112, "epoch": 3.9437630303522595, "grad_norm": 5.488478183746338, "learning_rate": 6.402530070428267e-09, "loss": 0.20280674, "memory(GiB)": 34.88, "step": 145655, "train_speed(iter/s)": 0.411256 }, { "acc": 0.95139809, "epoch": 3.943898410635475, "grad_norm": 6.566799640655518, "learning_rate": 6.3765548181766076e-09, "loss": 0.25893908, "memory(GiB)": 34.88, "step": 145660, "train_speed(iter/s)": 0.411257 }, { "acc": 0.93823204, "epoch": 3.9440337909186907, "grad_norm": 5.361673831939697, "learning_rate": 6.35064212713441e-09, "loss": 0.39804792, "memory(GiB)": 34.88, "step": 145665, "train_speed(iter/s)": 0.411258 }, { "acc": 0.94468584, "epoch": 3.944169171201906, "grad_norm": 6.393182277679443, "learning_rate": 6.324791997626937e-09, "loss": 0.34766028, "memory(GiB)": 34.88, "step": 145670, "train_speed(iter/s)": 0.411259 }, { "acc": 0.95657177, "epoch": 3.944304551485122, "grad_norm": 5.434373378753662, "learning_rate": 6.299004429977232e-09, "loss": 0.25706813, "memory(GiB)": 34.88, "step": 145675, "train_speed(iter/s)": 0.411259 }, { "acc": 0.94232826, "epoch": 3.944439931768337, "grad_norm": 3.77888560295105, "learning_rate": 6.273279424508889e-09, "loss": 0.35108039, "memory(GiB)": 34.88, "step": 145680, "train_speed(iter/s)": 0.41126 }, { "acc": 0.95435162, "epoch": 3.944575312051553, "grad_norm": 7.8343939781188965, "learning_rate": 6.24761698154329e-09, "loss": 0.24874666, "memory(GiB)": 34.88, "step": 145685, "train_speed(iter/s)": 0.411261 }, { "acc": 0.95048313, "epoch": 3.9447106923347683, "grad_norm": 5.5266523361206055, "learning_rate": 6.222017101402919e-09, "loss": 0.31633646, "memory(GiB)": 34.88, "step": 145690, "train_speed(iter/s)": 0.411262 }, { "acc": 0.93170662, "epoch": 3.9448460726179837, "grad_norm": 4.141900539398193, "learning_rate": 6.196479784407489e-09, "loss": 0.46501865, "memory(GiB)": 34.88, "step": 145695, "train_speed(iter/s)": 0.411262 }, { "acc": 0.92412796, "epoch": 3.9449814529011995, "grad_norm": 10.671146392822266, "learning_rate": 6.171005030877825e-09, "loss": 0.53119321, "memory(GiB)": 34.88, "step": 145700, "train_speed(iter/s)": 0.411263 }, { "acc": 0.97002792, "epoch": 3.945116833184415, "grad_norm": 2.7474234104156494, "learning_rate": 6.1455928411330814e-09, "loss": 0.16453357, "memory(GiB)": 34.88, "step": 145705, "train_speed(iter/s)": 0.411264 }, { "acc": 0.95828342, "epoch": 3.9452522134676307, "grad_norm": 5.577897071838379, "learning_rate": 6.1202432154907504e-09, "loss": 0.26700835, "memory(GiB)": 34.88, "step": 145710, "train_speed(iter/s)": 0.411264 }, { "acc": 0.96311207, "epoch": 3.945387593750846, "grad_norm": 5.249964714050293, "learning_rate": 6.094956154268325e-09, "loss": 0.23307118, "memory(GiB)": 34.88, "step": 145715, "train_speed(iter/s)": 0.411265 }, { "acc": 0.94617357, "epoch": 3.945522974034062, "grad_norm": 7.318244934082031, "learning_rate": 6.0697316577838535e-09, "loss": 0.29327149, "memory(GiB)": 34.88, "step": 145720, "train_speed(iter/s)": 0.411266 }, { "acc": 0.94821529, "epoch": 3.945658354317277, "grad_norm": 4.65299129486084, "learning_rate": 6.044569726352049e-09, "loss": 0.36591003, "memory(GiB)": 34.88, "step": 145725, "train_speed(iter/s)": 0.411267 }, { "acc": 0.94514952, "epoch": 3.9457937346004925, "grad_norm": 9.251133918762207, "learning_rate": 6.0194703602887415e-09, "loss": 0.28413808, "memory(GiB)": 34.88, "step": 145730, "train_speed(iter/s)": 0.411267 }, { "acc": 0.95043344, "epoch": 3.9459291148837083, "grad_norm": 8.510140419006348, "learning_rate": 5.9944335599080925e-09, "loss": 0.35855136, "memory(GiB)": 34.88, "step": 145735, "train_speed(iter/s)": 0.411268 }, { "acc": 0.92932119, "epoch": 3.946064495166924, "grad_norm": 5.133184432983398, "learning_rate": 5.969459325524263e-09, "loss": 0.45759897, "memory(GiB)": 34.88, "step": 145740, "train_speed(iter/s)": 0.411269 }, { "acc": 0.95647058, "epoch": 3.9461998754501395, "grad_norm": 6.797302722930908, "learning_rate": 5.9445476574491926e-09, "loss": 0.27254498, "memory(GiB)": 34.88, "step": 145745, "train_speed(iter/s)": 0.411269 }, { "acc": 0.94830246, "epoch": 3.946335255733355, "grad_norm": 9.5663423538208, "learning_rate": 5.919698555995936e-09, "loss": 0.34591322, "memory(GiB)": 34.88, "step": 145750, "train_speed(iter/s)": 0.41127 }, { "acc": 0.93889141, "epoch": 3.9464706360165707, "grad_norm": 9.330978393554688, "learning_rate": 5.894912021474768e-09, "loss": 0.33393416, "memory(GiB)": 34.88, "step": 145755, "train_speed(iter/s)": 0.411271 }, { "acc": 0.93968611, "epoch": 3.946606016299786, "grad_norm": 8.483360290527344, "learning_rate": 5.870188054197075e-09, "loss": 0.39975181, "memory(GiB)": 34.88, "step": 145760, "train_speed(iter/s)": 0.411272 }, { "acc": 0.95476131, "epoch": 3.9467413965830014, "grad_norm": 6.9543232917785645, "learning_rate": 5.8455266544720225e-09, "loss": 0.31017232, "memory(GiB)": 34.88, "step": 145765, "train_speed(iter/s)": 0.411272 }, { "acc": 0.94454803, "epoch": 3.946876776866217, "grad_norm": 5.761573314666748, "learning_rate": 5.82092782260878e-09, "loss": 0.37962332, "memory(GiB)": 34.88, "step": 145770, "train_speed(iter/s)": 0.411273 }, { "acc": 0.93688679, "epoch": 3.947012157149433, "grad_norm": 11.545515060424805, "learning_rate": 5.796391558915399e-09, "loss": 0.48515115, "memory(GiB)": 34.88, "step": 145775, "train_speed(iter/s)": 0.411274 }, { "acc": 0.95579243, "epoch": 3.9471475374326483, "grad_norm": 5.608665943145752, "learning_rate": 5.771917863699383e-09, "loss": 0.27896597, "memory(GiB)": 34.88, "step": 145780, "train_speed(iter/s)": 0.411275 }, { "acc": 0.94300804, "epoch": 3.9472829177158637, "grad_norm": 7.628198146820068, "learning_rate": 5.747506737267123e-09, "loss": 0.39389572, "memory(GiB)": 34.88, "step": 145785, "train_speed(iter/s)": 0.411275 }, { "acc": 0.94581051, "epoch": 3.9474182979990795, "grad_norm": 5.985300540924072, "learning_rate": 5.723158179924454e-09, "loss": 0.31066709, "memory(GiB)": 34.88, "step": 145790, "train_speed(iter/s)": 0.411276 }, { "acc": 0.95073433, "epoch": 3.947553678282295, "grad_norm": 5.013388156890869, "learning_rate": 5.698872191976658e-09, "loss": 0.37546148, "memory(GiB)": 34.88, "step": 145795, "train_speed(iter/s)": 0.411277 }, { "acc": 0.95818062, "epoch": 3.9476890585655107, "grad_norm": 28.64259147644043, "learning_rate": 5.674648773727905e-09, "loss": 0.27524374, "memory(GiB)": 34.88, "step": 145800, "train_speed(iter/s)": 0.411278 }, { "acc": 0.94904203, "epoch": 3.947824438848726, "grad_norm": 13.044023513793945, "learning_rate": 5.650487925481256e-09, "loss": 0.30334287, "memory(GiB)": 34.88, "step": 145805, "train_speed(iter/s)": 0.411278 }, { "acc": 0.94945126, "epoch": 3.947959819131942, "grad_norm": 4.590601444244385, "learning_rate": 5.626389647539769e-09, "loss": 0.35616896, "memory(GiB)": 34.88, "step": 145810, "train_speed(iter/s)": 0.411279 }, { "acc": 0.95373898, "epoch": 3.948095199415157, "grad_norm": 7.3195576667785645, "learning_rate": 5.602353940204843e-09, "loss": 0.28368959, "memory(GiB)": 34.88, "step": 145815, "train_speed(iter/s)": 0.41128 }, { "acc": 0.94850521, "epoch": 3.9482305796983725, "grad_norm": 9.995990753173828, "learning_rate": 5.578380803777872e-09, "loss": 0.31617103, "memory(GiB)": 34.88, "step": 145820, "train_speed(iter/s)": 0.41128 }, { "acc": 0.9524437, "epoch": 3.9483659599815883, "grad_norm": 3.4799771308898926, "learning_rate": 5.554470238559141e-09, "loss": 0.28131723, "memory(GiB)": 34.88, "step": 145825, "train_speed(iter/s)": 0.411281 }, { "acc": 0.95680103, "epoch": 3.9485013402648037, "grad_norm": 2.5505635738372803, "learning_rate": 5.53062224484838e-09, "loss": 0.23547487, "memory(GiB)": 34.88, "step": 145830, "train_speed(iter/s)": 0.411282 }, { "acc": 0.96054239, "epoch": 3.9486367205480195, "grad_norm": 5.914462566375732, "learning_rate": 5.50683682294421e-09, "loss": 0.19474244, "memory(GiB)": 34.88, "step": 145835, "train_speed(iter/s)": 0.411282 }, { "acc": 0.95486917, "epoch": 3.948772100831235, "grad_norm": 3.7838962078094482, "learning_rate": 5.483113973144696e-09, "loss": 0.30992246, "memory(GiB)": 34.88, "step": 145840, "train_speed(iter/s)": 0.411283 }, { "acc": 0.95952301, "epoch": 3.9489074811144507, "grad_norm": 7.56773567199707, "learning_rate": 5.459453695746792e-09, "loss": 0.26330085, "memory(GiB)": 34.88, "step": 145845, "train_speed(iter/s)": 0.411284 }, { "acc": 0.96021452, "epoch": 3.949042861397666, "grad_norm": 6.515152931213379, "learning_rate": 5.435855991046899e-09, "loss": 0.21263859, "memory(GiB)": 34.88, "step": 145850, "train_speed(iter/s)": 0.411284 }, { "acc": 0.95048351, "epoch": 3.9491782416808814, "grad_norm": 18.514822006225586, "learning_rate": 5.412320859340861e-09, "loss": 0.30985851, "memory(GiB)": 34.88, "step": 145855, "train_speed(iter/s)": 0.411285 }, { "acc": 0.94051552, "epoch": 3.949313621964097, "grad_norm": 7.032402038574219, "learning_rate": 5.388848300922857e-09, "loss": 0.31832545, "memory(GiB)": 34.88, "step": 145860, "train_speed(iter/s)": 0.411286 }, { "acc": 0.9442112, "epoch": 3.9494490022473125, "grad_norm": 5.187738418579102, "learning_rate": 5.365438316087625e-09, "loss": 0.29982319, "memory(GiB)": 34.88, "step": 145865, "train_speed(iter/s)": 0.411286 }, { "acc": 0.95788822, "epoch": 3.9495843825305283, "grad_norm": 4.26370906829834, "learning_rate": 5.342090905128231e-09, "loss": 0.26533966, "memory(GiB)": 34.88, "step": 145870, "train_speed(iter/s)": 0.411287 }, { "acc": 0.94233713, "epoch": 3.9497197628137437, "grad_norm": 3.9695146083831787, "learning_rate": 5.318806068337192e-09, "loss": 0.40843511, "memory(GiB)": 34.88, "step": 145875, "train_speed(iter/s)": 0.411288 }, { "acc": 0.95265064, "epoch": 3.9498551430969595, "grad_norm": 31.117996215820312, "learning_rate": 5.295583806006465e-09, "loss": 0.21203249, "memory(GiB)": 34.88, "step": 145880, "train_speed(iter/s)": 0.411288 }, { "acc": 0.939538, "epoch": 3.949990523380175, "grad_norm": 4.8657612800598145, "learning_rate": 5.272424118426346e-09, "loss": 0.37516246, "memory(GiB)": 34.88, "step": 145885, "train_speed(iter/s)": 0.411289 }, { "acc": 0.94651222, "epoch": 3.9501259036633902, "grad_norm": 12.23676872253418, "learning_rate": 5.249327005887129e-09, "loss": 0.31809244, "memory(GiB)": 34.88, "step": 145890, "train_speed(iter/s)": 0.41129 }, { "acc": 0.95944252, "epoch": 3.950261283946606, "grad_norm": 6.861852645874023, "learning_rate": 5.2262924686779985e-09, "loss": 0.24696558, "memory(GiB)": 34.88, "step": 145895, "train_speed(iter/s)": 0.411291 }, { "acc": 0.96039944, "epoch": 3.950396664229822, "grad_norm": 4.543176174163818, "learning_rate": 5.203320507088138e-09, "loss": 0.28030891, "memory(GiB)": 34.88, "step": 145900, "train_speed(iter/s)": 0.411291 }, { "acc": 0.95410128, "epoch": 3.950532044513037, "grad_norm": 7.543820858001709, "learning_rate": 5.1804111214045104e-09, "loss": 0.30870376, "memory(GiB)": 34.88, "step": 145905, "train_speed(iter/s)": 0.411292 }, { "acc": 0.96428194, "epoch": 3.9506674247962525, "grad_norm": 7.536491394042969, "learning_rate": 5.157564311914638e-09, "loss": 0.2261781, "memory(GiB)": 34.88, "step": 145910, "train_speed(iter/s)": 0.411293 }, { "acc": 0.9465292, "epoch": 3.9508028050794683, "grad_norm": 8.40860652923584, "learning_rate": 5.134780078904927e-09, "loss": 0.33379388, "memory(GiB)": 34.88, "step": 145915, "train_speed(iter/s)": 0.411294 }, { "acc": 0.95305767, "epoch": 3.9509381853626837, "grad_norm": 7.123035907745361, "learning_rate": 5.112058422660121e-09, "loss": 0.28889453, "memory(GiB)": 34.88, "step": 145920, "train_speed(iter/s)": 0.411294 }, { "acc": 0.96122208, "epoch": 3.951073565645899, "grad_norm": 6.24220085144043, "learning_rate": 5.089399343465521e-09, "loss": 0.22663264, "memory(GiB)": 34.88, "step": 145925, "train_speed(iter/s)": 0.411295 }, { "acc": 0.95515938, "epoch": 3.951208945929115, "grad_norm": 16.507232666015625, "learning_rate": 5.066802841604758e-09, "loss": 0.29321938, "memory(GiB)": 34.88, "step": 145930, "train_speed(iter/s)": 0.411296 }, { "acc": 0.95524416, "epoch": 3.9513443262123307, "grad_norm": 9.822347640991211, "learning_rate": 5.044268917360357e-09, "loss": 0.25836744, "memory(GiB)": 34.88, "step": 145935, "train_speed(iter/s)": 0.411296 }, { "acc": 0.9529727, "epoch": 3.951479706495546, "grad_norm": 5.289950847625732, "learning_rate": 5.021797571014842e-09, "loss": 0.26068347, "memory(GiB)": 34.88, "step": 145940, "train_speed(iter/s)": 0.411297 }, { "acc": 0.96139736, "epoch": 3.9516150867787614, "grad_norm": 4.571329116821289, "learning_rate": 4.999388802850736e-09, "loss": 0.21563754, "memory(GiB)": 34.88, "step": 145945, "train_speed(iter/s)": 0.411298 }, { "acc": 0.9527503, "epoch": 3.951750467061977, "grad_norm": 15.414897918701172, "learning_rate": 4.977042613147231e-09, "loss": 0.321118, "memory(GiB)": 34.88, "step": 145950, "train_speed(iter/s)": 0.411299 }, { "acc": 0.95215263, "epoch": 3.9518858473451925, "grad_norm": 4.027799606323242, "learning_rate": 4.954759002185187e-09, "loss": 0.34084041, "memory(GiB)": 34.88, "step": 145955, "train_speed(iter/s)": 0.411299 }, { "acc": 0.95588875, "epoch": 3.9520212276284084, "grad_norm": 5.801690101623535, "learning_rate": 4.932537970243795e-09, "loss": 0.2722086, "memory(GiB)": 34.88, "step": 145960, "train_speed(iter/s)": 0.4113 }, { "acc": 0.94613857, "epoch": 3.9521566079116237, "grad_norm": 6.379257678985596, "learning_rate": 4.910379517600585e-09, "loss": 0.33069777, "memory(GiB)": 34.88, "step": 145965, "train_speed(iter/s)": 0.411301 }, { "acc": 0.94728384, "epoch": 3.9522919881948395, "grad_norm": 12.73306655883789, "learning_rate": 4.8882836445341956e-09, "loss": 0.38914573, "memory(GiB)": 34.88, "step": 145970, "train_speed(iter/s)": 0.411302 }, { "acc": 0.95848637, "epoch": 3.952427368478055, "grad_norm": 3.2129576206207275, "learning_rate": 4.866250351320488e-09, "loss": 0.28143837, "memory(GiB)": 34.88, "step": 145975, "train_speed(iter/s)": 0.411302 }, { "acc": 0.95549889, "epoch": 3.9525627487612702, "grad_norm": 8.218271255493164, "learning_rate": 4.8442796382364354e-09, "loss": 0.30785301, "memory(GiB)": 34.88, "step": 145980, "train_speed(iter/s)": 0.411303 }, { "acc": 0.95018673, "epoch": 3.952698129044486, "grad_norm": 16.047557830810547, "learning_rate": 4.822371505556236e-09, "loss": 0.30189567, "memory(GiB)": 34.88, "step": 145985, "train_speed(iter/s)": 0.411304 }, { "acc": 0.94845181, "epoch": 3.9528335093277014, "grad_norm": 8.572348594665527, "learning_rate": 4.800525953555198e-09, "loss": 0.35557899, "memory(GiB)": 34.88, "step": 145990, "train_speed(iter/s)": 0.411304 }, { "acc": 0.95645552, "epoch": 3.952968889610917, "grad_norm": 4.373044490814209, "learning_rate": 4.778742982505852e-09, "loss": 0.30288997, "memory(GiB)": 34.88, "step": 145995, "train_speed(iter/s)": 0.411305 }, { "acc": 0.93967075, "epoch": 3.9531042698941325, "grad_norm": 3.515683889389038, "learning_rate": 4.757022592682398e-09, "loss": 0.41150432, "memory(GiB)": 34.88, "step": 146000, "train_speed(iter/s)": 0.411306 }, { "acc": 0.95309639, "epoch": 3.9532396501773484, "grad_norm": 8.555167198181152, "learning_rate": 4.735364784356256e-09, "loss": 0.2706368, "memory(GiB)": 34.88, "step": 146005, "train_speed(iter/s)": 0.411307 }, { "acc": 0.95093813, "epoch": 3.9533750304605637, "grad_norm": 7.559267044067383, "learning_rate": 4.713769557798852e-09, "loss": 0.27981648, "memory(GiB)": 34.88, "step": 146010, "train_speed(iter/s)": 0.411307 }, { "acc": 0.95495481, "epoch": 3.953510410743779, "grad_norm": 11.591612815856934, "learning_rate": 4.692236913281049e-09, "loss": 0.21649246, "memory(GiB)": 34.88, "step": 146015, "train_speed(iter/s)": 0.411308 }, { "acc": 0.96628256, "epoch": 3.953645791026995, "grad_norm": 4.80660343170166, "learning_rate": 4.670766851072052e-09, "loss": 0.18062758, "memory(GiB)": 34.88, "step": 146020, "train_speed(iter/s)": 0.411309 }, { "acc": 0.94933138, "epoch": 3.9537811713102102, "grad_norm": 8.064249992370605, "learning_rate": 4.649359371440507e-09, "loss": 0.33856332, "memory(GiB)": 34.88, "step": 146025, "train_speed(iter/s)": 0.41131 }, { "acc": 0.96366692, "epoch": 3.953916551593426, "grad_norm": 5.91876745223999, "learning_rate": 4.628014474655617e-09, "loss": 0.21094892, "memory(GiB)": 34.88, "step": 146030, "train_speed(iter/s)": 0.41131 }, { "acc": 0.96318207, "epoch": 3.9540519318766414, "grad_norm": 7.181726455688477, "learning_rate": 4.606732160983807e-09, "loss": 0.23161871, "memory(GiB)": 34.88, "step": 146035, "train_speed(iter/s)": 0.411311 }, { "acc": 0.94647055, "epoch": 3.954187312159857, "grad_norm": 5.4153923988342285, "learning_rate": 4.585512430692061e-09, "loss": 0.29161718, "memory(GiB)": 34.88, "step": 146040, "train_speed(iter/s)": 0.411312 }, { "acc": 0.95207624, "epoch": 3.9543226924430726, "grad_norm": 4.461413383483887, "learning_rate": 4.564355284046806e-09, "loss": 0.29177899, "memory(GiB)": 34.88, "step": 146045, "train_speed(iter/s)": 0.411313 }, { "acc": 0.92898712, "epoch": 3.954458072726288, "grad_norm": 10.74235725402832, "learning_rate": 4.5432607213116915e-09, "loss": 0.46897249, "memory(GiB)": 34.88, "step": 146050, "train_speed(iter/s)": 0.411313 }, { "acc": 0.95001545, "epoch": 3.9545934530095037, "grad_norm": 5.651196479797363, "learning_rate": 4.522228742752591e-09, "loss": 0.31144509, "memory(GiB)": 34.88, "step": 146055, "train_speed(iter/s)": 0.411314 }, { "acc": 0.94300919, "epoch": 3.9547288332927195, "grad_norm": 4.15231466293335, "learning_rate": 4.501259348631491e-09, "loss": 0.32800083, "memory(GiB)": 34.88, "step": 146060, "train_speed(iter/s)": 0.411315 }, { "acc": 0.9639308, "epoch": 3.954864213575935, "grad_norm": 8.901144027709961, "learning_rate": 4.480352539212041e-09, "loss": 0.23024502, "memory(GiB)": 34.88, "step": 146065, "train_speed(iter/s)": 0.411316 }, { "acc": 0.95725784, "epoch": 3.9549995938591502, "grad_norm": 6.933360576629639, "learning_rate": 4.459508314755676e-09, "loss": 0.2969023, "memory(GiB)": 34.88, "step": 146070, "train_speed(iter/s)": 0.411317 }, { "acc": 0.95094566, "epoch": 3.955134974142366, "grad_norm": 3.0594875812530518, "learning_rate": 4.438726675523825e-09, "loss": 0.29223483, "memory(GiB)": 34.88, "step": 146075, "train_speed(iter/s)": 0.411317 }, { "acc": 0.95327301, "epoch": 3.9552703544255814, "grad_norm": 13.1749267578125, "learning_rate": 4.418007621776808e-09, "loss": 0.25191488, "memory(GiB)": 34.88, "step": 146080, "train_speed(iter/s)": 0.411318 }, { "acc": 0.95325565, "epoch": 3.9554057347087967, "grad_norm": 6.866179943084717, "learning_rate": 4.397351153773839e-09, "loss": 0.25165517, "memory(GiB)": 34.88, "step": 146085, "train_speed(iter/s)": 0.411318 }, { "acc": 0.94729061, "epoch": 3.9555411149920126, "grad_norm": 9.591423034667969, "learning_rate": 4.376757271774128e-09, "loss": 0.32205091, "memory(GiB)": 34.88, "step": 146090, "train_speed(iter/s)": 0.411319 }, { "acc": 0.95486565, "epoch": 3.9556764952752284, "grad_norm": 7.153317928314209, "learning_rate": 4.356225976035221e-09, "loss": 0.2716794, "memory(GiB)": 34.88, "step": 146095, "train_speed(iter/s)": 0.41132 }, { "acc": 0.94832716, "epoch": 3.9558118755584437, "grad_norm": 7.258706092834473, "learning_rate": 4.335757266814108e-09, "loss": 0.30583324, "memory(GiB)": 34.88, "step": 146100, "train_speed(iter/s)": 0.41132 }, { "acc": 0.95292416, "epoch": 3.955947255841659, "grad_norm": 5.2738471031188965, "learning_rate": 4.315351144367781e-09, "loss": 0.31140127, "memory(GiB)": 34.88, "step": 146105, "train_speed(iter/s)": 0.411321 }, { "acc": 0.96247311, "epoch": 3.956082636124875, "grad_norm": 5.14329719543457, "learning_rate": 4.295007608952122e-09, "loss": 0.19945954, "memory(GiB)": 34.88, "step": 146110, "train_speed(iter/s)": 0.411322 }, { "acc": 0.94464674, "epoch": 3.9562180164080902, "grad_norm": 2.9833738803863525, "learning_rate": 4.274726660821344e-09, "loss": 0.35519838, "memory(GiB)": 34.88, "step": 146115, "train_speed(iter/s)": 0.411323 }, { "acc": 0.95458326, "epoch": 3.956353396691306, "grad_norm": 9.190608024597168, "learning_rate": 4.254508300229109e-09, "loss": 0.22850969, "memory(GiB)": 34.88, "step": 146120, "train_speed(iter/s)": 0.411323 }, { "acc": 0.96750011, "epoch": 3.9564887769745214, "grad_norm": 5.666362285614014, "learning_rate": 4.234352527429632e-09, "loss": 0.1928246, "memory(GiB)": 34.88, "step": 146125, "train_speed(iter/s)": 0.411324 }, { "acc": 0.95723839, "epoch": 3.956624157257737, "grad_norm": 4.011950492858887, "learning_rate": 4.214259342675464e-09, "loss": 0.23701968, "memory(GiB)": 34.88, "step": 146130, "train_speed(iter/s)": 0.411325 }, { "acc": 0.96097145, "epoch": 3.9567595375409526, "grad_norm": 5.328360080718994, "learning_rate": 4.194228746216936e-09, "loss": 0.25176203, "memory(GiB)": 34.88, "step": 146135, "train_speed(iter/s)": 0.411325 }, { "acc": 0.94625778, "epoch": 3.956894917824168, "grad_norm": 8.24421215057373, "learning_rate": 4.174260738306598e-09, "loss": 0.27698002, "memory(GiB)": 34.88, "step": 146140, "train_speed(iter/s)": 0.411326 }, { "acc": 0.94674845, "epoch": 3.9570302981073837, "grad_norm": 8.996766090393066, "learning_rate": 4.1543553191936684e-09, "loss": 0.27850447, "memory(GiB)": 34.88, "step": 146145, "train_speed(iter/s)": 0.411327 }, { "acc": 0.95232983, "epoch": 3.957165678390599, "grad_norm": 7.0886077880859375, "learning_rate": 4.134512489127925e-09, "loss": 0.29047127, "memory(GiB)": 34.88, "step": 146150, "train_speed(iter/s)": 0.411328 }, { "acc": 0.9570488, "epoch": 3.957301058673815, "grad_norm": 3.733860969543457, "learning_rate": 4.1147322483580306e-09, "loss": 0.26225293, "memory(GiB)": 34.88, "step": 146155, "train_speed(iter/s)": 0.411328 }, { "acc": 0.95671415, "epoch": 3.9574364389570302, "grad_norm": 7.256443500518799, "learning_rate": 4.095014597130987e-09, "loss": 0.22840314, "memory(GiB)": 34.88, "step": 146160, "train_speed(iter/s)": 0.411329 }, { "acc": 0.9450222, "epoch": 3.957571819240246, "grad_norm": 9.340621948242188, "learning_rate": 4.075359535694349e-09, "loss": 0.32112048, "memory(GiB)": 34.88, "step": 146165, "train_speed(iter/s)": 0.411329 }, { "acc": 0.95645103, "epoch": 3.9577071995234614, "grad_norm": 4.304732799530029, "learning_rate": 4.05576706429456e-09, "loss": 0.27235084, "memory(GiB)": 34.88, "step": 146170, "train_speed(iter/s)": 0.41133 }, { "acc": 0.94811258, "epoch": 3.9578425798066768, "grad_norm": 5.783386707305908, "learning_rate": 4.036237183176401e-09, "loss": 0.29961951, "memory(GiB)": 34.88, "step": 146175, "train_speed(iter/s)": 0.411331 }, { "acc": 0.95656433, "epoch": 3.9579779600898926, "grad_norm": 4.199314594268799, "learning_rate": 4.016769892585206e-09, "loss": 0.2610435, "memory(GiB)": 34.88, "step": 146180, "train_speed(iter/s)": 0.411332 }, { "acc": 0.95517216, "epoch": 3.958113340373108, "grad_norm": 10.315899848937988, "learning_rate": 3.997365192764645e-09, "loss": 0.27171953, "memory(GiB)": 34.88, "step": 146185, "train_speed(iter/s)": 0.411333 }, { "acc": 0.94610291, "epoch": 3.9582487206563237, "grad_norm": 3.53560733795166, "learning_rate": 3.978023083957278e-09, "loss": 0.341644, "memory(GiB)": 34.88, "step": 146190, "train_speed(iter/s)": 0.411333 }, { "acc": 0.9434906, "epoch": 3.958384100939539, "grad_norm": 2.5935330390930176, "learning_rate": 3.958743566406218e-09, "loss": 0.38695273, "memory(GiB)": 34.88, "step": 146195, "train_speed(iter/s)": 0.411334 }, { "acc": 0.94746609, "epoch": 3.958519481222755, "grad_norm": 4.874824047088623, "learning_rate": 3.939526640352361e-09, "loss": 0.28974361, "memory(GiB)": 34.88, "step": 146200, "train_speed(iter/s)": 0.411335 }, { "acc": 0.94023342, "epoch": 3.9586548615059702, "grad_norm": 5.244492530822754, "learning_rate": 3.9203723060366e-09, "loss": 0.38281138, "memory(GiB)": 34.88, "step": 146205, "train_speed(iter/s)": 0.411336 }, { "acc": 0.93550663, "epoch": 3.9587902417891856, "grad_norm": 6.737128257751465, "learning_rate": 3.9012805636987195e-09, "loss": 0.39820704, "memory(GiB)": 34.88, "step": 146210, "train_speed(iter/s)": 0.411336 }, { "acc": 0.93949776, "epoch": 3.9589256220724014, "grad_norm": 5.401917457580566, "learning_rate": 3.882251413578504e-09, "loss": 0.38297286, "memory(GiB)": 34.88, "step": 146215, "train_speed(iter/s)": 0.411337 }, { "acc": 0.94944735, "epoch": 3.959061002355617, "grad_norm": 5.009105205535889, "learning_rate": 3.863284855914073e-09, "loss": 0.31135325, "memory(GiB)": 34.88, "step": 146220, "train_speed(iter/s)": 0.411338 }, { "acc": 0.95664349, "epoch": 3.9591963826388326, "grad_norm": 4.384483814239502, "learning_rate": 3.8443808909424345e-09, "loss": 0.1928417, "memory(GiB)": 34.88, "step": 146225, "train_speed(iter/s)": 0.411338 }, { "acc": 0.94877443, "epoch": 3.959331762922048, "grad_norm": 8.368244171142578, "learning_rate": 3.825539518901153e-09, "loss": 0.26973736, "memory(GiB)": 34.88, "step": 146230, "train_speed(iter/s)": 0.411339 }, { "acc": 0.93417263, "epoch": 3.9594671432052637, "grad_norm": 19.86539649963379, "learning_rate": 3.806760740026127e-09, "loss": 0.3963356, "memory(GiB)": 34.88, "step": 146235, "train_speed(iter/s)": 0.41134 }, { "acc": 0.9477684, "epoch": 3.959602523488479, "grad_norm": 7.682170391082764, "learning_rate": 3.78804455455159e-09, "loss": 0.36055114, "memory(GiB)": 34.88, "step": 146240, "train_speed(iter/s)": 0.411341 }, { "acc": 0.95934181, "epoch": 3.9597379037716944, "grad_norm": 4.757131576538086, "learning_rate": 3.769390962713441e-09, "loss": 0.24929092, "memory(GiB)": 34.88, "step": 146245, "train_speed(iter/s)": 0.411341 }, { "acc": 0.95652475, "epoch": 3.9598732840549102, "grad_norm": 5.544425964355469, "learning_rate": 3.7507999647448045e-09, "loss": 0.26851945, "memory(GiB)": 34.88, "step": 146250, "train_speed(iter/s)": 0.411342 }, { "acc": 0.95591106, "epoch": 3.960008664338126, "grad_norm": 12.249235153198242, "learning_rate": 3.732271560878247e-09, "loss": 0.31234632, "memory(GiB)": 34.88, "step": 146255, "train_speed(iter/s)": 0.411343 }, { "acc": 0.95137939, "epoch": 3.9601440446213414, "grad_norm": 27.90804100036621, "learning_rate": 3.713805751346339e-09, "loss": 0.24495144, "memory(GiB)": 34.88, "step": 146260, "train_speed(iter/s)": 0.411344 }, { "acc": 0.95476494, "epoch": 3.9602794249045568, "grad_norm": 11.834735870361328, "learning_rate": 3.695402536379982e-09, "loss": 0.25459538, "memory(GiB)": 34.88, "step": 146265, "train_speed(iter/s)": 0.411345 }, { "acc": 0.95184536, "epoch": 3.9604148051877726, "grad_norm": 5.236227512359619, "learning_rate": 3.67706191621008e-09, "loss": 0.29765759, "memory(GiB)": 34.88, "step": 146270, "train_speed(iter/s)": 0.411345 }, { "acc": 0.95211391, "epoch": 3.960550185470988, "grad_norm": 10.654982566833496, "learning_rate": 3.658783891065872e-09, "loss": 0.30787578, "memory(GiB)": 34.88, "step": 146275, "train_speed(iter/s)": 0.411346 }, { "acc": 0.95438623, "epoch": 3.9606855657542037, "grad_norm": 7.831201553344727, "learning_rate": 3.640568461176595e-09, "loss": 0.2554908, "memory(GiB)": 34.88, "step": 146280, "train_speed(iter/s)": 0.411347 }, { "acc": 0.94888468, "epoch": 3.960820946037419, "grad_norm": 5.585099220275879, "learning_rate": 3.6224156267709323e-09, "loss": 0.26423779, "memory(GiB)": 34.88, "step": 146285, "train_speed(iter/s)": 0.411348 }, { "acc": 0.95968552, "epoch": 3.960956326320635, "grad_norm": 6.945546627044678, "learning_rate": 3.604325388075902e-09, "loss": 0.26516709, "memory(GiB)": 34.88, "step": 146290, "train_speed(iter/s)": 0.411348 }, { "acc": 0.94253654, "epoch": 3.9610917066038502, "grad_norm": 4.561420440673828, "learning_rate": 3.5862977453179667e-09, "loss": 0.41666431, "memory(GiB)": 34.88, "step": 146295, "train_speed(iter/s)": 0.411349 }, { "acc": 0.95325184, "epoch": 3.9612270868870656, "grad_norm": 7.281069278717041, "learning_rate": 3.5683326987230344e-09, "loss": 0.30266943, "memory(GiB)": 34.88, "step": 146300, "train_speed(iter/s)": 0.41135 }, { "acc": 0.94449711, "epoch": 3.9613624671702814, "grad_norm": 8.489655494689941, "learning_rate": 3.550430248515903e-09, "loss": 0.3472532, "memory(GiB)": 34.88, "step": 146305, "train_speed(iter/s)": 0.411351 }, { "acc": 0.95433102, "epoch": 3.9614978474534968, "grad_norm": 32.86834716796875, "learning_rate": 3.5325903949213693e-09, "loss": 0.33066473, "memory(GiB)": 34.88, "step": 146310, "train_speed(iter/s)": 0.411351 }, { "acc": 0.96879778, "epoch": 3.9616332277367126, "grad_norm": 7.442050457000732, "learning_rate": 3.514813138162567e-09, "loss": 0.18105812, "memory(GiB)": 34.88, "step": 146315, "train_speed(iter/s)": 0.411352 }, { "acc": 0.94014664, "epoch": 3.961768608019928, "grad_norm": 11.285621643066406, "learning_rate": 3.4970984784626283e-09, "loss": 0.40219698, "memory(GiB)": 34.88, "step": 146320, "train_speed(iter/s)": 0.411353 }, { "acc": 0.94250546, "epoch": 3.9619039883031437, "grad_norm": 10.518949508666992, "learning_rate": 3.47944641604302e-09, "loss": 0.36686525, "memory(GiB)": 34.88, "step": 146325, "train_speed(iter/s)": 0.411354 }, { "acc": 0.95836773, "epoch": 3.962039368586359, "grad_norm": 4.836029052734375, "learning_rate": 3.4618569511246543e-09, "loss": 0.251074, "memory(GiB)": 34.88, "step": 146330, "train_speed(iter/s)": 0.411355 }, { "acc": 0.94691277, "epoch": 3.9621747488695744, "grad_norm": 5.560122489929199, "learning_rate": 3.4443300839284443e-09, "loss": 0.37394199, "memory(GiB)": 34.88, "step": 146335, "train_speed(iter/s)": 0.411355 }, { "acc": 0.95768023, "epoch": 3.9623101291527902, "grad_norm": 7.2621002197265625, "learning_rate": 3.426865814673637e-09, "loss": 0.30051632, "memory(GiB)": 34.88, "step": 146340, "train_speed(iter/s)": 0.411356 }, { "acc": 0.94668865, "epoch": 3.9624455094360056, "grad_norm": 6.07549524307251, "learning_rate": 3.4094641435789237e-09, "loss": 0.33257771, "memory(GiB)": 34.88, "step": 146345, "train_speed(iter/s)": 0.411357 }, { "acc": 0.94931755, "epoch": 3.9625808897192214, "grad_norm": 5.500799179077148, "learning_rate": 3.392125070862442e-09, "loss": 0.32005358, "memory(GiB)": 34.88, "step": 146350, "train_speed(iter/s)": 0.411358 }, { "acc": 0.95388927, "epoch": 3.9627162700024368, "grad_norm": 5.287686347961426, "learning_rate": 3.3748485967417738e-09, "loss": 0.32939649, "memory(GiB)": 34.88, "step": 146355, "train_speed(iter/s)": 0.411358 }, { "acc": 0.94212818, "epoch": 3.9628516502856526, "grad_norm": 3.5078845024108887, "learning_rate": 3.3576347214322807e-09, "loss": 0.31898823, "memory(GiB)": 34.88, "step": 146360, "train_speed(iter/s)": 0.411359 }, { "acc": 0.95703716, "epoch": 3.962987030568868, "grad_norm": 5.085070610046387, "learning_rate": 3.34048344515099e-09, "loss": 0.25191414, "memory(GiB)": 34.88, "step": 146365, "train_speed(iter/s)": 0.41136 }, { "acc": 0.94897213, "epoch": 3.9631224108520833, "grad_norm": 9.264090538024902, "learning_rate": 3.323394768111597e-09, "loss": 0.34323301, "memory(GiB)": 34.88, "step": 146370, "train_speed(iter/s)": 0.41136 }, { "acc": 0.96194315, "epoch": 3.963257791135299, "grad_norm": 3.206796407699585, "learning_rate": 3.30636869052891e-09, "loss": 0.22327352, "memory(GiB)": 34.88, "step": 146375, "train_speed(iter/s)": 0.411361 }, { "acc": 0.94884634, "epoch": 3.963393171418515, "grad_norm": 15.677213668823242, "learning_rate": 3.289405212615514e-09, "loss": 0.35987899, "memory(GiB)": 34.88, "step": 146380, "train_speed(iter/s)": 0.411362 }, { "acc": 0.94264126, "epoch": 3.9635285517017302, "grad_norm": 5.355716705322266, "learning_rate": 3.272504334584552e-09, "loss": 0.41134734, "memory(GiB)": 34.88, "step": 146385, "train_speed(iter/s)": 0.411363 }, { "acc": 0.94566631, "epoch": 3.9636639319849456, "grad_norm": 28.872549057006836, "learning_rate": 3.2556660566474994e-09, "loss": 0.37771707, "memory(GiB)": 34.88, "step": 146390, "train_speed(iter/s)": 0.411363 }, { "acc": 0.95263243, "epoch": 3.9637993122681614, "grad_norm": 4.962930202484131, "learning_rate": 3.2388903790158327e-09, "loss": 0.29475284, "memory(GiB)": 34.88, "step": 146395, "train_speed(iter/s)": 0.411364 }, { "acc": 0.94342871, "epoch": 3.9639346925513768, "grad_norm": 7.208549499511719, "learning_rate": 3.2221773018982526e-09, "loss": 0.42377934, "memory(GiB)": 34.88, "step": 146400, "train_speed(iter/s)": 0.411365 }, { "acc": 0.95026617, "epoch": 3.964070072834592, "grad_norm": 7.565720558166504, "learning_rate": 3.205526825505681e-09, "loss": 0.28193097, "memory(GiB)": 34.88, "step": 146405, "train_speed(iter/s)": 0.411366 }, { "acc": 0.95015869, "epoch": 3.964205453117808, "grad_norm": 4.164715766906738, "learning_rate": 3.188938950045709e-09, "loss": 0.3414753, "memory(GiB)": 34.88, "step": 146410, "train_speed(iter/s)": 0.411366 }, { "acc": 0.93655491, "epoch": 3.9643408334010237, "grad_norm": 9.401202201843262, "learning_rate": 3.172413675726482e-09, "loss": 0.44249845, "memory(GiB)": 34.88, "step": 146415, "train_speed(iter/s)": 0.411367 }, { "acc": 0.96168289, "epoch": 3.964476213684239, "grad_norm": 5.281679630279541, "learning_rate": 3.155951002755591e-09, "loss": 0.21599116, "memory(GiB)": 34.88, "step": 146420, "train_speed(iter/s)": 0.411368 }, { "acc": 0.9586689, "epoch": 3.9646115939674544, "grad_norm": 5.602936744689941, "learning_rate": 3.1395509313378515e-09, "loss": 0.25243001, "memory(GiB)": 34.88, "step": 146425, "train_speed(iter/s)": 0.411368 }, { "acc": 0.96473484, "epoch": 3.9647469742506702, "grad_norm": 3.2543766498565674, "learning_rate": 3.1232134616802997e-09, "loss": 0.2034802, "memory(GiB)": 34.88, "step": 146430, "train_speed(iter/s)": 0.411369 }, { "acc": 0.94181948, "epoch": 3.9648823545338856, "grad_norm": 8.109498977661133, "learning_rate": 3.1069385939860863e-09, "loss": 0.36873579, "memory(GiB)": 34.88, "step": 146435, "train_speed(iter/s)": 0.41137 }, { "acc": 0.93340912, "epoch": 3.9650177348171014, "grad_norm": 5.759860038757324, "learning_rate": 3.0907263284600272e-09, "loss": 0.38658848, "memory(GiB)": 34.88, "step": 146440, "train_speed(iter/s)": 0.411371 }, { "acc": 0.95602093, "epoch": 3.9651531151003168, "grad_norm": 12.72315788269043, "learning_rate": 3.0745766653052717e-09, "loss": 0.27268977, "memory(GiB)": 34.88, "step": 146445, "train_speed(iter/s)": 0.411371 }, { "acc": 0.94487858, "epoch": 3.9652884953835326, "grad_norm": 5.0678935050964355, "learning_rate": 3.058489604723861e-09, "loss": 0.33941982, "memory(GiB)": 34.88, "step": 146450, "train_speed(iter/s)": 0.411372 }, { "acc": 0.93557901, "epoch": 3.965423875666748, "grad_norm": 10.79423999786377, "learning_rate": 3.042465146917281e-09, "loss": 0.38245864, "memory(GiB)": 34.88, "step": 146455, "train_speed(iter/s)": 0.411373 }, { "acc": 0.96477242, "epoch": 3.9655592559499633, "grad_norm": 21.27910041809082, "learning_rate": 3.026503292085906e-09, "loss": 0.25457969, "memory(GiB)": 34.88, "step": 146460, "train_speed(iter/s)": 0.411373 }, { "acc": 0.94470568, "epoch": 3.965694636233179, "grad_norm": 8.9302396774292, "learning_rate": 3.0106040404306663e-09, "loss": 0.30219288, "memory(GiB)": 34.88, "step": 146465, "train_speed(iter/s)": 0.411374 }, { "acc": 0.93359175, "epoch": 3.9658300165163944, "grad_norm": 7.402441501617432, "learning_rate": 2.9947673921497172e-09, "loss": 0.43471408, "memory(GiB)": 34.88, "step": 146470, "train_speed(iter/s)": 0.411375 }, { "acc": 0.94699268, "epoch": 3.9659653967996102, "grad_norm": 8.441940307617188, "learning_rate": 2.9789933474423237e-09, "loss": 0.32088637, "memory(GiB)": 34.88, "step": 146475, "train_speed(iter/s)": 0.411376 }, { "acc": 0.94644051, "epoch": 3.9661007770828256, "grad_norm": 7.025849342346191, "learning_rate": 2.963281906504976e-09, "loss": 0.2980901, "memory(GiB)": 34.88, "step": 146480, "train_speed(iter/s)": 0.411377 }, { "acc": 0.9552206, "epoch": 3.9662361573660414, "grad_norm": 8.714749336242676, "learning_rate": 2.9476330695358283e-09, "loss": 0.32273233, "memory(GiB)": 34.88, "step": 146485, "train_speed(iter/s)": 0.411377 }, { "acc": 0.95500488, "epoch": 3.9663715376492568, "grad_norm": 5.020088195800781, "learning_rate": 2.9320468367297063e-09, "loss": 0.29764674, "memory(GiB)": 34.88, "step": 146490, "train_speed(iter/s)": 0.411378 }, { "acc": 0.9375392, "epoch": 3.966506917932472, "grad_norm": 7.652962684631348, "learning_rate": 2.9165232082830983e-09, "loss": 0.37434461, "memory(GiB)": 34.88, "step": 146495, "train_speed(iter/s)": 0.411379 }, { "acc": 0.9404933, "epoch": 3.966642298215688, "grad_norm": 5.7861647605896, "learning_rate": 2.9010621843891656e-09, "loss": 0.341324, "memory(GiB)": 34.88, "step": 146500, "train_speed(iter/s)": 0.41138 }, { "acc": 0.94410362, "epoch": 3.9667776784989033, "grad_norm": 11.147140502929688, "learning_rate": 2.885663765242177e-09, "loss": 0.35011485, "memory(GiB)": 34.88, "step": 146505, "train_speed(iter/s)": 0.41138 }, { "acc": 0.96495218, "epoch": 3.966913058782119, "grad_norm": 4.909529685974121, "learning_rate": 2.870327951035291e-09, "loss": 0.18205495, "memory(GiB)": 34.88, "step": 146510, "train_speed(iter/s)": 0.411381 }, { "acc": 0.93174934, "epoch": 3.9670484390653344, "grad_norm": 5.617671489715576, "learning_rate": 2.8550547419605583e-09, "loss": 0.40027204, "memory(GiB)": 34.88, "step": 146515, "train_speed(iter/s)": 0.411382 }, { "acc": 0.94916296, "epoch": 3.9671838193485502, "grad_norm": 9.217804908752441, "learning_rate": 2.8398441382089185e-09, "loss": 0.26594338, "memory(GiB)": 34.88, "step": 146520, "train_speed(iter/s)": 0.411383 }, { "acc": 0.93160267, "epoch": 3.9673191996317656, "grad_norm": 10.972739219665527, "learning_rate": 2.8246961399713094e-09, "loss": 0.38258834, "memory(GiB)": 34.88, "step": 146525, "train_speed(iter/s)": 0.411384 }, { "acc": 0.95039959, "epoch": 3.967454579914981, "grad_norm": 6.8935112953186035, "learning_rate": 2.8096107474370053e-09, "loss": 0.31435924, "memory(GiB)": 34.88, "step": 146530, "train_speed(iter/s)": 0.411384 }, { "acc": 0.9455018, "epoch": 3.9675899601981968, "grad_norm": 5.238434791564941, "learning_rate": 2.7945879607952816e-09, "loss": 0.33547134, "memory(GiB)": 34.88, "step": 146535, "train_speed(iter/s)": 0.411385 }, { "acc": 0.94932718, "epoch": 3.967725340481412, "grad_norm": 8.278548240661621, "learning_rate": 2.7796277802343013e-09, "loss": 0.31006589, "memory(GiB)": 34.88, "step": 146540, "train_speed(iter/s)": 0.411386 }, { "acc": 0.94062452, "epoch": 3.967860720764628, "grad_norm": 3.290194511413574, "learning_rate": 2.764730205941674e-09, "loss": 0.39562383, "memory(GiB)": 34.88, "step": 146545, "train_speed(iter/s)": 0.411387 }, { "acc": 0.96522598, "epoch": 3.9679961010478433, "grad_norm": 4.995103359222412, "learning_rate": 2.7498952381038974e-09, "loss": 0.21939278, "memory(GiB)": 34.88, "step": 146550, "train_speed(iter/s)": 0.411387 }, { "acc": 0.95219402, "epoch": 3.968131481331059, "grad_norm": 6.202208042144775, "learning_rate": 2.735122876906361e-09, "loss": 0.297224, "memory(GiB)": 34.88, "step": 146555, "train_speed(iter/s)": 0.411388 }, { "acc": 0.95740471, "epoch": 3.9682668616142744, "grad_norm": 13.00493335723877, "learning_rate": 2.7204131225344532e-09, "loss": 0.28098688, "memory(GiB)": 34.88, "step": 146560, "train_speed(iter/s)": 0.411389 }, { "acc": 0.95766525, "epoch": 3.96840224189749, "grad_norm": 4.970932960510254, "learning_rate": 2.7057659751730084e-09, "loss": 0.26800787, "memory(GiB)": 34.88, "step": 146565, "train_speed(iter/s)": 0.41139 }, { "acc": 0.93563843, "epoch": 3.9685376221807056, "grad_norm": 8.811793327331543, "learning_rate": 2.6911814350046393e-09, "loss": 0.47755408, "memory(GiB)": 34.88, "step": 146570, "train_speed(iter/s)": 0.411391 }, { "acc": 0.9410923, "epoch": 3.9686730024639214, "grad_norm": 7.095344543457031, "learning_rate": 2.676659502212514e-09, "loss": 0.38819015, "memory(GiB)": 34.88, "step": 146575, "train_speed(iter/s)": 0.411391 }, { "acc": 0.95091019, "epoch": 3.9688083827471368, "grad_norm": 3.8428192138671875, "learning_rate": 2.6622001769786915e-09, "loss": 0.28233917, "memory(GiB)": 34.88, "step": 146580, "train_speed(iter/s)": 0.411392 }, { "acc": 0.95843792, "epoch": 3.968943763030352, "grad_norm": 7.165865421295166, "learning_rate": 2.647803459483565e-09, "loss": 0.26882634, "memory(GiB)": 34.88, "step": 146585, "train_speed(iter/s)": 0.411393 }, { "acc": 0.94923124, "epoch": 3.969079143313568, "grad_norm": 21.683612823486328, "learning_rate": 2.633469349908083e-09, "loss": 0.29028831, "memory(GiB)": 34.88, "step": 146590, "train_speed(iter/s)": 0.411394 }, { "acc": 0.93563995, "epoch": 3.9692145235967833, "grad_norm": 26.600858688354492, "learning_rate": 2.6191978484320824e-09, "loss": 0.37148645, "memory(GiB)": 34.88, "step": 146595, "train_speed(iter/s)": 0.411394 }, { "acc": 0.95458593, "epoch": 3.9693499038799986, "grad_norm": 4.685667991638184, "learning_rate": 2.6049889552331827e-09, "loss": 0.24180927, "memory(GiB)": 34.88, "step": 146600, "train_speed(iter/s)": 0.411395 }, { "acc": 0.9508956, "epoch": 3.9694852841632144, "grad_norm": 6.947119235992432, "learning_rate": 2.590842670490666e-09, "loss": 0.20160966, "memory(GiB)": 34.88, "step": 146605, "train_speed(iter/s)": 0.411396 }, { "acc": 0.9405077, "epoch": 3.9696206644464302, "grad_norm": 4.663018703460693, "learning_rate": 2.5767589943810405e-09, "loss": 0.39548044, "memory(GiB)": 34.88, "step": 146610, "train_speed(iter/s)": 0.411397 }, { "acc": 0.95494213, "epoch": 3.9697560447296456, "grad_norm": 5.377068042755127, "learning_rate": 2.5627379270813688e-09, "loss": 0.22595768, "memory(GiB)": 34.88, "step": 146615, "train_speed(iter/s)": 0.411397 }, { "acc": 0.96043863, "epoch": 3.969891425012861, "grad_norm": 3.8448288440704346, "learning_rate": 2.548779468766494e-09, "loss": 0.21935868, "memory(GiB)": 34.88, "step": 146620, "train_speed(iter/s)": 0.411398 }, { "acc": 0.94950161, "epoch": 3.9700268052960768, "grad_norm": 4.450403690338135, "learning_rate": 2.534883619611258e-09, "loss": 0.29095547, "memory(GiB)": 34.88, "step": 146625, "train_speed(iter/s)": 0.411399 }, { "acc": 0.95069284, "epoch": 3.970162185579292, "grad_norm": 6.177492618560791, "learning_rate": 2.5210503797905047e-09, "loss": 0.30347385, "memory(GiB)": 34.88, "step": 146630, "train_speed(iter/s)": 0.4114 }, { "acc": 0.94848385, "epoch": 3.970297565862508, "grad_norm": 9.272049903869629, "learning_rate": 2.50727974947741e-09, "loss": 0.30029891, "memory(GiB)": 34.88, "step": 146635, "train_speed(iter/s)": 0.4114 }, { "acc": 0.93338118, "epoch": 3.9704329461457233, "grad_norm": 9.591551780700684, "learning_rate": 2.493571728843487e-09, "loss": 0.47464542, "memory(GiB)": 34.88, "step": 146640, "train_speed(iter/s)": 0.411401 }, { "acc": 0.95008888, "epoch": 3.970568326428939, "grad_norm": 9.071688652038574, "learning_rate": 2.4799263180613586e-09, "loss": 0.31955898, "memory(GiB)": 34.88, "step": 146645, "train_speed(iter/s)": 0.411402 }, { "acc": 0.94885349, "epoch": 3.9707037067121544, "grad_norm": 6.513902187347412, "learning_rate": 2.466343517301981e-09, "loss": 0.24909058, "memory(GiB)": 34.88, "step": 146650, "train_speed(iter/s)": 0.411403 }, { "acc": 0.93475695, "epoch": 3.97083908699537, "grad_norm": 5.837486267089844, "learning_rate": 2.452823326735202e-09, "loss": 0.48870125, "memory(GiB)": 34.88, "step": 146655, "train_speed(iter/s)": 0.411403 }, { "acc": 0.938095, "epoch": 3.9709744672785856, "grad_norm": 4.700238227844238, "learning_rate": 2.4393657465303135e-09, "loss": 0.42960138, "memory(GiB)": 34.88, "step": 146660, "train_speed(iter/s)": 0.411404 }, { "acc": 0.94617205, "epoch": 3.971109847561801, "grad_norm": 3.8700366020202637, "learning_rate": 2.4259707768560526e-09, "loss": 0.33020558, "memory(GiB)": 34.88, "step": 146665, "train_speed(iter/s)": 0.411405 }, { "acc": 0.9456605, "epoch": 3.9712452278450168, "grad_norm": 5.534514904022217, "learning_rate": 2.412638417880046e-09, "loss": 0.32170477, "memory(GiB)": 34.88, "step": 146670, "train_speed(iter/s)": 0.411405 }, { "acc": 0.9476738, "epoch": 3.971380608128232, "grad_norm": 5.347234725952148, "learning_rate": 2.399368669769921e-09, "loss": 0.33711724, "memory(GiB)": 34.88, "step": 146675, "train_speed(iter/s)": 0.411406 }, { "acc": 0.95404282, "epoch": 3.971515988411448, "grad_norm": 4.711752891540527, "learning_rate": 2.386161532691084e-09, "loss": 0.2848937, "memory(GiB)": 34.88, "step": 146680, "train_speed(iter/s)": 0.411407 }, { "acc": 0.95514135, "epoch": 3.9716513686946633, "grad_norm": 23.62051010131836, "learning_rate": 2.373017006809497e-09, "loss": 0.2575841, "memory(GiB)": 34.88, "step": 146685, "train_speed(iter/s)": 0.411408 }, { "acc": 0.94673824, "epoch": 3.9717867489778786, "grad_norm": 8.81387710571289, "learning_rate": 2.359935092290011e-09, "loss": 0.30527678, "memory(GiB)": 34.88, "step": 146690, "train_speed(iter/s)": 0.411408 }, { "acc": 0.96133614, "epoch": 3.9719221292610944, "grad_norm": 4.536450386047363, "learning_rate": 2.3469157892958138e-09, "loss": 0.217068, "memory(GiB)": 34.88, "step": 146695, "train_speed(iter/s)": 0.411409 }, { "acc": 0.94722157, "epoch": 3.97205750954431, "grad_norm": 4.406680583953857, "learning_rate": 2.33395909799009e-09, "loss": 0.34563975, "memory(GiB)": 34.88, "step": 146700, "train_speed(iter/s)": 0.41141 }, { "acc": 0.95994797, "epoch": 3.9721928898275256, "grad_norm": 4.52159309387207, "learning_rate": 2.3210650185354725e-09, "loss": 0.25866709, "memory(GiB)": 34.88, "step": 146705, "train_speed(iter/s)": 0.411411 }, { "acc": 0.93580265, "epoch": 3.972328270110741, "grad_norm": 34.39369583129883, "learning_rate": 2.3082335510940372e-09, "loss": 0.40251083, "memory(GiB)": 34.88, "step": 146710, "train_speed(iter/s)": 0.411411 }, { "acc": 0.94781637, "epoch": 3.9724636503939568, "grad_norm": 8.077105522155762, "learning_rate": 2.2954646958250847e-09, "loss": 0.30266895, "memory(GiB)": 34.88, "step": 146715, "train_speed(iter/s)": 0.411412 }, { "acc": 0.94914284, "epoch": 3.972599030677172, "grad_norm": 10.722116470336914, "learning_rate": 2.282758452890137e-09, "loss": 0.37577541, "memory(GiB)": 34.88, "step": 146720, "train_speed(iter/s)": 0.411413 }, { "acc": 0.94563951, "epoch": 3.9727344109603875, "grad_norm": 4.622705936431885, "learning_rate": 2.2701148224468297e-09, "loss": 0.33973351, "memory(GiB)": 34.88, "step": 146725, "train_speed(iter/s)": 0.411414 }, { "acc": 0.95313969, "epoch": 3.9728697912436033, "grad_norm": 2.2617058753967285, "learning_rate": 2.2575338046550186e-09, "loss": 0.27548547, "memory(GiB)": 34.88, "step": 146730, "train_speed(iter/s)": 0.411415 }, { "acc": 0.95275259, "epoch": 3.973005171526819, "grad_norm": 5.373500347137451, "learning_rate": 2.24501539967123e-09, "loss": 0.2936847, "memory(GiB)": 34.88, "step": 146735, "train_speed(iter/s)": 0.411415 }, { "acc": 0.94518051, "epoch": 3.9731405518100344, "grad_norm": 3.595337390899658, "learning_rate": 2.2325596076531e-09, "loss": 0.34838896, "memory(GiB)": 34.88, "step": 146740, "train_speed(iter/s)": 0.411416 }, { "acc": 0.95141277, "epoch": 3.97327593209325, "grad_norm": 3.4656498432159424, "learning_rate": 2.2201664287554884e-09, "loss": 0.27463558, "memory(GiB)": 34.88, "step": 146745, "train_speed(iter/s)": 0.411417 }, { "acc": 0.95880013, "epoch": 3.9734113123764656, "grad_norm": 7.620558261871338, "learning_rate": 2.2078358631349217e-09, "loss": 0.30948377, "memory(GiB)": 34.88, "step": 146750, "train_speed(iter/s)": 0.411417 }, { "acc": 0.96317921, "epoch": 3.973546692659681, "grad_norm": 2.5225675106048584, "learning_rate": 2.1955679109457054e-09, "loss": 0.24964337, "memory(GiB)": 34.88, "step": 146755, "train_speed(iter/s)": 0.411418 }, { "acc": 0.93803329, "epoch": 3.9736820729428963, "grad_norm": 6.711739540100098, "learning_rate": 2.1833625723404797e-09, "loss": 0.32608523, "memory(GiB)": 34.88, "step": 146760, "train_speed(iter/s)": 0.411419 }, { "acc": 0.94024887, "epoch": 3.973817453226112, "grad_norm": 8.82175350189209, "learning_rate": 2.171219847472995e-09, "loss": 0.3794158, "memory(GiB)": 34.88, "step": 146765, "train_speed(iter/s)": 0.41142 }, { "acc": 0.94933834, "epoch": 3.973952833509328, "grad_norm": 7.169397830963135, "learning_rate": 2.159139736495337e-09, "loss": 0.29094057, "memory(GiB)": 34.88, "step": 146770, "train_speed(iter/s)": 0.41142 }, { "acc": 0.94507751, "epoch": 3.9740882137925433, "grad_norm": 4.372786045074463, "learning_rate": 2.14712223955848e-09, "loss": 0.29087214, "memory(GiB)": 34.88, "step": 146775, "train_speed(iter/s)": 0.411421 }, { "acc": 0.96827164, "epoch": 3.9742235940757586, "grad_norm": 4.448966979980469, "learning_rate": 2.1351673568134004e-09, "loss": 0.18964199, "memory(GiB)": 34.88, "step": 146780, "train_speed(iter/s)": 0.411422 }, { "acc": 0.95845566, "epoch": 3.9743589743589745, "grad_norm": 2.079421281814575, "learning_rate": 2.1232750884094078e-09, "loss": 0.23347149, "memory(GiB)": 34.88, "step": 146785, "train_speed(iter/s)": 0.411423 }, { "acc": 0.95011463, "epoch": 3.97449435464219, "grad_norm": 8.180519104003906, "learning_rate": 2.111445434495812e-09, "loss": 0.3226902, "memory(GiB)": 34.88, "step": 146790, "train_speed(iter/s)": 0.411423 }, { "acc": 0.9445365, "epoch": 3.9746297349254056, "grad_norm": 7.72609806060791, "learning_rate": 2.0996783952208133e-09, "loss": 0.32000051, "memory(GiB)": 34.88, "step": 146795, "train_speed(iter/s)": 0.411424 }, { "acc": 0.96437626, "epoch": 3.974765115208621, "grad_norm": 24.644311904907227, "learning_rate": 2.0879739707315017e-09, "loss": 0.21721127, "memory(GiB)": 34.88, "step": 146800, "train_speed(iter/s)": 0.411425 }, { "acc": 0.95659981, "epoch": 3.9749004954918368, "grad_norm": 5.261700630187988, "learning_rate": 2.076332161174412e-09, "loss": 0.26275992, "memory(GiB)": 34.88, "step": 146805, "train_speed(iter/s)": 0.411425 }, { "acc": 0.95728703, "epoch": 3.975035875775052, "grad_norm": 5.5083818435668945, "learning_rate": 2.0647529666960786e-09, "loss": 0.29689851, "memory(GiB)": 34.88, "step": 146810, "train_speed(iter/s)": 0.411426 }, { "acc": 0.95551777, "epoch": 3.9751712560582675, "grad_norm": 4.714534282684326, "learning_rate": 2.053236387440816e-09, "loss": 0.22985613, "memory(GiB)": 34.88, "step": 146815, "train_speed(iter/s)": 0.411427 }, { "acc": 0.93740749, "epoch": 3.9753066363414833, "grad_norm": 13.474522590637207, "learning_rate": 2.0417824235534943e-09, "loss": 0.40916367, "memory(GiB)": 34.88, "step": 146820, "train_speed(iter/s)": 0.411428 }, { "acc": 0.94290695, "epoch": 3.9754420166246986, "grad_norm": 6.494436740875244, "learning_rate": 2.0303910751767623e-09, "loss": 0.32706926, "memory(GiB)": 34.88, "step": 146825, "train_speed(iter/s)": 0.411428 }, { "acc": 0.95251141, "epoch": 3.9755773969079145, "grad_norm": 14.368388175964355, "learning_rate": 2.0190623424543796e-09, "loss": 0.23345973, "memory(GiB)": 34.88, "step": 146830, "train_speed(iter/s)": 0.411429 }, { "acc": 0.96689911, "epoch": 3.97571277719113, "grad_norm": 3.786466598510742, "learning_rate": 2.007796225527331e-09, "loss": 0.19427688, "memory(GiB)": 34.88, "step": 146835, "train_speed(iter/s)": 0.41143 }, { "acc": 0.94959774, "epoch": 3.9758481574743456, "grad_norm": 8.423852920532227, "learning_rate": 1.9965927245377103e-09, "loss": 0.27644083, "memory(GiB)": 34.88, "step": 146840, "train_speed(iter/s)": 0.411431 }, { "acc": 0.95580082, "epoch": 3.975983537757561, "grad_norm": 7.582814693450928, "learning_rate": 1.9854518396248367e-09, "loss": 0.27931828, "memory(GiB)": 34.88, "step": 146845, "train_speed(iter/s)": 0.411431 }, { "acc": 0.95437269, "epoch": 3.9761189180407763, "grad_norm": 3.029919385910034, "learning_rate": 1.9743735709291398e-09, "loss": 0.28704247, "memory(GiB)": 34.88, "step": 146850, "train_speed(iter/s)": 0.411432 }, { "acc": 0.94832306, "epoch": 3.976254298323992, "grad_norm": 6.5744709968566895, "learning_rate": 1.9633579185888274e-09, "loss": 0.31665096, "memory(GiB)": 34.88, "step": 146855, "train_speed(iter/s)": 0.411433 }, { "acc": 0.93549423, "epoch": 3.9763896786072075, "grad_norm": 21.780555725097656, "learning_rate": 1.9524048827415547e-09, "loss": 0.3907804, "memory(GiB)": 34.88, "step": 146860, "train_speed(iter/s)": 0.411434 }, { "acc": 0.95834332, "epoch": 3.9765250588904233, "grad_norm": 18.259075164794922, "learning_rate": 1.94151446352553e-09, "loss": 0.22324023, "memory(GiB)": 34.88, "step": 146865, "train_speed(iter/s)": 0.411434 }, { "acc": 0.9380435, "epoch": 3.9766604391736387, "grad_norm": 3.2450780868530273, "learning_rate": 1.930686661076742e-09, "loss": 0.3373796, "memory(GiB)": 34.88, "step": 146870, "train_speed(iter/s)": 0.411435 }, { "acc": 0.97033501, "epoch": 3.9767958194568545, "grad_norm": 3.9821760654449463, "learning_rate": 1.9199214755306246e-09, "loss": 0.19089876, "memory(GiB)": 34.88, "step": 146875, "train_speed(iter/s)": 0.411436 }, { "acc": 0.94481335, "epoch": 3.97693119974007, "grad_norm": 4.08106803894043, "learning_rate": 1.9092189070226106e-09, "loss": 0.37632728, "memory(GiB)": 34.88, "step": 146880, "train_speed(iter/s)": 0.411437 }, { "acc": 0.95189495, "epoch": 3.977066580023285, "grad_norm": 12.537613868713379, "learning_rate": 1.898578955685915e-09, "loss": 0.29681566, "memory(GiB)": 34.88, "step": 146885, "train_speed(iter/s)": 0.411438 }, { "acc": 0.9561348, "epoch": 3.977201960306501, "grad_norm": 3.009430170059204, "learning_rate": 1.8880016216543053e-09, "loss": 0.27676651, "memory(GiB)": 34.88, "step": 146890, "train_speed(iter/s)": 0.411438 }, { "acc": 0.95059996, "epoch": 3.9773373405897168, "grad_norm": 3.744354724884033, "learning_rate": 1.8774869050598846e-09, "loss": 0.33160677, "memory(GiB)": 34.88, "step": 146895, "train_speed(iter/s)": 0.411439 }, { "acc": 0.95332842, "epoch": 3.977472720872932, "grad_norm": 12.719446182250977, "learning_rate": 1.8670348060353118e-09, "loss": 0.26690078, "memory(GiB)": 34.88, "step": 146900, "train_speed(iter/s)": 0.41144 }, { "acc": 0.95005875, "epoch": 3.9776081011561475, "grad_norm": 4.767933368682861, "learning_rate": 1.8566453247104702e-09, "loss": 0.29084163, "memory(GiB)": 34.88, "step": 146905, "train_speed(iter/s)": 0.411441 }, { "acc": 0.94572792, "epoch": 3.9777434814393633, "grad_norm": 8.34227180480957, "learning_rate": 1.8463184612163526e-09, "loss": 0.32871146, "memory(GiB)": 34.88, "step": 146910, "train_speed(iter/s)": 0.411441 }, { "acc": 0.94706964, "epoch": 3.9778788617225787, "grad_norm": 6.4062113761901855, "learning_rate": 1.836054215681732e-09, "loss": 0.37009552, "memory(GiB)": 34.88, "step": 146915, "train_speed(iter/s)": 0.411442 }, { "acc": 0.94046936, "epoch": 3.978014242005794, "grad_norm": 4.629499912261963, "learning_rate": 1.8258525882353814e-09, "loss": 0.41416154, "memory(GiB)": 34.88, "step": 146920, "train_speed(iter/s)": 0.411443 }, { "acc": 0.9508666, "epoch": 3.97814962228901, "grad_norm": 4.099119186401367, "learning_rate": 1.8157135790049637e-09, "loss": 0.30264907, "memory(GiB)": 34.88, "step": 146925, "train_speed(iter/s)": 0.411444 }, { "acc": 0.94708443, "epoch": 3.9782850025722256, "grad_norm": 3.548009157180786, "learning_rate": 1.8056371881181419e-09, "loss": 0.29337916, "memory(GiB)": 34.88, "step": 146930, "train_speed(iter/s)": 0.411444 }, { "acc": 0.95806313, "epoch": 3.978420382855441, "grad_norm": 6.968631267547607, "learning_rate": 1.7956234156998035e-09, "loss": 0.22862594, "memory(GiB)": 34.88, "step": 146935, "train_speed(iter/s)": 0.411445 }, { "acc": 0.95200005, "epoch": 3.9785557631386563, "grad_norm": 15.28158950805664, "learning_rate": 1.7856722618770565e-09, "loss": 0.28612037, "memory(GiB)": 34.88, "step": 146940, "train_speed(iter/s)": 0.411446 }, { "acc": 0.94058704, "epoch": 3.978691143421872, "grad_norm": 3.543073892593384, "learning_rate": 1.7757837267731233e-09, "loss": 0.38852451, "memory(GiB)": 34.88, "step": 146945, "train_speed(iter/s)": 0.411447 }, { "acc": 0.94421673, "epoch": 3.9788265237050875, "grad_norm": 6.936471462249756, "learning_rate": 1.765957810512891e-09, "loss": 0.40729423, "memory(GiB)": 34.88, "step": 146950, "train_speed(iter/s)": 0.411447 }, { "acc": 0.94759626, "epoch": 3.9789619039883033, "grad_norm": 5.035553932189941, "learning_rate": 1.7561945132190275e-09, "loss": 0.27328167, "memory(GiB)": 34.88, "step": 146955, "train_speed(iter/s)": 0.411448 }, { "acc": 0.94806099, "epoch": 3.9790972842715187, "grad_norm": 6.085476398468018, "learning_rate": 1.7464938350136452e-09, "loss": 0.31153417, "memory(GiB)": 34.88, "step": 146960, "train_speed(iter/s)": 0.411449 }, { "acc": 0.94393101, "epoch": 3.9792326645547345, "grad_norm": 11.852261543273926, "learning_rate": 1.736855776018301e-09, "loss": 0.32690382, "memory(GiB)": 34.88, "step": 146965, "train_speed(iter/s)": 0.41145 }, { "acc": 0.94297218, "epoch": 3.97936804483795, "grad_norm": 9.035820007324219, "learning_rate": 1.7272803363539975e-09, "loss": 0.37001586, "memory(GiB)": 34.88, "step": 146970, "train_speed(iter/s)": 0.41145 }, { "acc": 0.94343662, "epoch": 3.979503425121165, "grad_norm": 5.017974376678467, "learning_rate": 1.7177675161406262e-09, "loss": 0.3357429, "memory(GiB)": 34.88, "step": 146975, "train_speed(iter/s)": 0.411451 }, { "acc": 0.94700003, "epoch": 3.979638805404381, "grad_norm": 5.1312785148620605, "learning_rate": 1.7083173154975246e-09, "loss": 0.27562268, "memory(GiB)": 34.88, "step": 146980, "train_speed(iter/s)": 0.411452 }, { "acc": 0.9315877, "epoch": 3.9797741856875963, "grad_norm": 7.069364547729492, "learning_rate": 1.6989297345423644e-09, "loss": 0.41536942, "memory(GiB)": 34.88, "step": 146985, "train_speed(iter/s)": 0.411453 }, { "acc": 0.94019775, "epoch": 3.979909565970812, "grad_norm": 12.120108604431152, "learning_rate": 1.6896047733933724e-09, "loss": 0.32273178, "memory(GiB)": 34.88, "step": 146990, "train_speed(iter/s)": 0.411453 }, { "acc": 0.96977444, "epoch": 3.9800449462540275, "grad_norm": 8.212615013122559, "learning_rate": 1.680342432167666e-09, "loss": 0.15292456, "memory(GiB)": 34.88, "step": 146995, "train_speed(iter/s)": 0.411454 }, { "acc": 0.95238934, "epoch": 3.9801803265372433, "grad_norm": 6.901371479034424, "learning_rate": 1.6711427109806962e-09, "loss": 0.27485149, "memory(GiB)": 34.88, "step": 147000, "train_speed(iter/s)": 0.411454 }, { "acc": 0.95949116, "epoch": 3.9803157068204587, "grad_norm": 3.8214938640594482, "learning_rate": 1.6620056099473601e-09, "loss": 0.28013082, "memory(GiB)": 34.88, "step": 147005, "train_speed(iter/s)": 0.411455 }, { "acc": 0.95185976, "epoch": 3.980451087103674, "grad_norm": 4.782111167907715, "learning_rate": 1.652931129183109e-09, "loss": 0.29633858, "memory(GiB)": 34.88, "step": 147010, "train_speed(iter/s)": 0.411456 }, { "acc": 0.95510807, "epoch": 3.98058646738689, "grad_norm": 5.27842378616333, "learning_rate": 1.6439192688011743e-09, "loss": 0.25554914, "memory(GiB)": 34.88, "step": 147015, "train_speed(iter/s)": 0.411457 }, { "acc": 0.95025539, "epoch": 3.980721847670105, "grad_norm": 5.784901142120361, "learning_rate": 1.6349700289136779e-09, "loss": 0.27701643, "memory(GiB)": 34.88, "step": 147020, "train_speed(iter/s)": 0.411457 }, { "acc": 0.95694847, "epoch": 3.980857227953321, "grad_norm": 5.384755611419678, "learning_rate": 1.6260834096344053e-09, "loss": 0.26892092, "memory(GiB)": 34.88, "step": 147025, "train_speed(iter/s)": 0.411458 }, { "acc": 0.95484533, "epoch": 3.9809926082365363, "grad_norm": 5.928882598876953, "learning_rate": 1.6172594110727034e-09, "loss": 0.29036436, "memory(GiB)": 34.88, "step": 147030, "train_speed(iter/s)": 0.411459 }, { "acc": 0.94467688, "epoch": 3.981127988519752, "grad_norm": 20.521949768066406, "learning_rate": 1.6084980333406927e-09, "loss": 0.32888725, "memory(GiB)": 34.88, "step": 147035, "train_speed(iter/s)": 0.41146 }, { "acc": 0.94982452, "epoch": 3.9812633688029675, "grad_norm": 11.875128746032715, "learning_rate": 1.59979927654772e-09, "loss": 0.32562435, "memory(GiB)": 34.88, "step": 147040, "train_speed(iter/s)": 0.41146 }, { "acc": 0.9511076, "epoch": 3.981398749086183, "grad_norm": 5.889266490936279, "learning_rate": 1.591163140802021e-09, "loss": 0.29038134, "memory(GiB)": 34.88, "step": 147045, "train_speed(iter/s)": 0.411461 }, { "acc": 0.94068031, "epoch": 3.9815341293693987, "grad_norm": 5.478423118591309, "learning_rate": 1.5825896262129416e-09, "loss": 0.33258207, "memory(GiB)": 34.88, "step": 147050, "train_speed(iter/s)": 0.411462 }, { "acc": 0.95236282, "epoch": 3.9816695096526145, "grad_norm": 11.909345626831055, "learning_rate": 1.5740787328870526e-09, "loss": 0.30491536, "memory(GiB)": 34.88, "step": 147055, "train_speed(iter/s)": 0.411463 }, { "acc": 0.95984364, "epoch": 3.98180488993583, "grad_norm": 7.627721309661865, "learning_rate": 1.5656304609309247e-09, "loss": 0.2240284, "memory(GiB)": 34.88, "step": 147060, "train_speed(iter/s)": 0.411463 }, { "acc": 0.93185139, "epoch": 3.981940270219045, "grad_norm": 5.311542510986328, "learning_rate": 1.5572448104511286e-09, "loss": 0.40673356, "memory(GiB)": 34.88, "step": 147065, "train_speed(iter/s)": 0.411464 }, { "acc": 0.95087509, "epoch": 3.982075650502261, "grad_norm": 11.9375638961792, "learning_rate": 1.548921781552015e-09, "loss": 0.30371938, "memory(GiB)": 34.88, "step": 147070, "train_speed(iter/s)": 0.411465 }, { "acc": 0.94620113, "epoch": 3.9822110307854763, "grad_norm": 6.182928085327148, "learning_rate": 1.5406613743379343e-09, "loss": 0.30864198, "memory(GiB)": 34.88, "step": 147075, "train_speed(iter/s)": 0.411466 }, { "acc": 0.94902916, "epoch": 3.9823464110686917, "grad_norm": 11.778186798095703, "learning_rate": 1.5324635889126822e-09, "loss": 0.35822058, "memory(GiB)": 34.88, "step": 147080, "train_speed(iter/s)": 0.411466 }, { "acc": 0.94736919, "epoch": 3.9824817913519075, "grad_norm": 5.743735313415527, "learning_rate": 1.5243284253783888e-09, "loss": 0.33227632, "memory(GiB)": 34.88, "step": 147085, "train_speed(iter/s)": 0.411467 }, { "acc": 0.94616966, "epoch": 3.9826171716351233, "grad_norm": 10.239026069641113, "learning_rate": 1.5162558838377393e-09, "loss": 0.36933522, "memory(GiB)": 34.88, "step": 147090, "train_speed(iter/s)": 0.411468 }, { "acc": 0.94610252, "epoch": 3.9827525519183387, "grad_norm": 9.20882511138916, "learning_rate": 1.5082459643917544e-09, "loss": 0.28235283, "memory(GiB)": 34.88, "step": 147095, "train_speed(iter/s)": 0.411469 }, { "acc": 0.94667139, "epoch": 3.982887932201554, "grad_norm": 5.517665386199951, "learning_rate": 1.5002986671397887e-09, "loss": 0.33313375, "memory(GiB)": 34.88, "step": 147100, "train_speed(iter/s)": 0.411469 }, { "acc": 0.94123125, "epoch": 3.98302331248477, "grad_norm": 5.183467864990234, "learning_rate": 1.4924139921828624e-09, "loss": 0.3863225, "memory(GiB)": 34.88, "step": 147105, "train_speed(iter/s)": 0.41147 }, { "acc": 0.9494688, "epoch": 3.983158692767985, "grad_norm": 4.834721565246582, "learning_rate": 1.4845919396181105e-09, "loss": 0.29070492, "memory(GiB)": 34.88, "step": 147110, "train_speed(iter/s)": 0.411471 }, { "acc": 0.9451026, "epoch": 3.983294073051201, "grad_norm": 4.153716564178467, "learning_rate": 1.4768325095448877e-09, "loss": 0.32110796, "memory(GiB)": 34.88, "step": 147115, "train_speed(iter/s)": 0.411472 }, { "acc": 0.95640011, "epoch": 3.9834294533344163, "grad_norm": 11.578579902648926, "learning_rate": 1.4691357020603293e-09, "loss": 0.30218487, "memory(GiB)": 34.88, "step": 147120, "train_speed(iter/s)": 0.411472 }, { "acc": 0.94318619, "epoch": 3.983564833617632, "grad_norm": 5.77000617980957, "learning_rate": 1.4615015172599046e-09, "loss": 0.37649484, "memory(GiB)": 34.88, "step": 147125, "train_speed(iter/s)": 0.411473 }, { "acc": 0.95609179, "epoch": 3.9837002139008475, "grad_norm": 6.95629358291626, "learning_rate": 1.4539299552396384e-09, "loss": 0.30292141, "memory(GiB)": 34.88, "step": 147130, "train_speed(iter/s)": 0.411474 }, { "acc": 0.93068695, "epoch": 3.983835594184063, "grad_norm": 8.460564613342285, "learning_rate": 1.4464210160950001e-09, "loss": 0.49813623, "memory(GiB)": 34.88, "step": 147135, "train_speed(iter/s)": 0.411475 }, { "acc": 0.95134678, "epoch": 3.9839709744672787, "grad_norm": 4.688092231750488, "learning_rate": 1.4389746999192396e-09, "loss": 0.25391574, "memory(GiB)": 34.88, "step": 147140, "train_speed(iter/s)": 0.411476 }, { "acc": 0.94344568, "epoch": 3.984106354750494, "grad_norm": 12.447735786437988, "learning_rate": 1.4315910068056057e-09, "loss": 0.35625072, "memory(GiB)": 34.88, "step": 147145, "train_speed(iter/s)": 0.411476 }, { "acc": 0.93506813, "epoch": 3.98424173503371, "grad_norm": 7.347886562347412, "learning_rate": 1.4242699368473482e-09, "loss": 0.39035618, "memory(GiB)": 34.88, "step": 147150, "train_speed(iter/s)": 0.411477 }, { "acc": 0.95944319, "epoch": 3.984377115316925, "grad_norm": 3.916494607925415, "learning_rate": 1.4170114901360514e-09, "loss": 0.25712421, "memory(GiB)": 34.88, "step": 147155, "train_speed(iter/s)": 0.411478 }, { "acc": 0.95759754, "epoch": 3.984512495600141, "grad_norm": 7.068069934844971, "learning_rate": 1.409815666761634e-09, "loss": 0.23517332, "memory(GiB)": 34.88, "step": 147160, "train_speed(iter/s)": 0.411479 }, { "acc": 0.94381876, "epoch": 3.9846478758833563, "grad_norm": 7.229661464691162, "learning_rate": 1.4026824668156808e-09, "loss": 0.36127396, "memory(GiB)": 34.88, "step": 147165, "train_speed(iter/s)": 0.411479 }, { "acc": 0.94548645, "epoch": 3.9847832561665717, "grad_norm": 7.037766456604004, "learning_rate": 1.395611890386445e-09, "loss": 0.32015338, "memory(GiB)": 34.88, "step": 147170, "train_speed(iter/s)": 0.41148 }, { "acc": 0.9591795, "epoch": 3.9849186364497875, "grad_norm": 1.7320688962936401, "learning_rate": 1.3886039375632909e-09, "loss": 0.23136132, "memory(GiB)": 34.88, "step": 147175, "train_speed(iter/s)": 0.411481 }, { "acc": 0.93669109, "epoch": 3.985054016733003, "grad_norm": 10.86111831665039, "learning_rate": 1.3816586084328075e-09, "loss": 0.43236017, "memory(GiB)": 34.88, "step": 147180, "train_speed(iter/s)": 0.411482 }, { "acc": 0.9530241, "epoch": 3.9851893970162187, "grad_norm": 3.8135013580322266, "learning_rate": 1.3747759030838036e-09, "loss": 0.31654291, "memory(GiB)": 34.88, "step": 147185, "train_speed(iter/s)": 0.411482 }, { "acc": 0.9417592, "epoch": 3.985324777299434, "grad_norm": 10.371927261352539, "learning_rate": 1.3679558216006475e-09, "loss": 0.41268425, "memory(GiB)": 34.88, "step": 147190, "train_speed(iter/s)": 0.411483 }, { "acc": 0.94853792, "epoch": 3.98546015758265, "grad_norm": 4.595308303833008, "learning_rate": 1.3611983640699285e-09, "loss": 0.26168876, "memory(GiB)": 34.88, "step": 147195, "train_speed(iter/s)": 0.411484 }, { "acc": 0.95172567, "epoch": 3.985595537865865, "grad_norm": 4.2180256843566895, "learning_rate": 1.3545035305760147e-09, "loss": 0.35168738, "memory(GiB)": 34.88, "step": 147200, "train_speed(iter/s)": 0.411484 }, { "acc": 0.941185, "epoch": 3.9857309181490805, "grad_norm": 9.897196769714355, "learning_rate": 1.347871321202719e-09, "loss": 0.37588663, "memory(GiB)": 34.88, "step": 147205, "train_speed(iter/s)": 0.411485 }, { "acc": 0.95288162, "epoch": 3.9858662984322963, "grad_norm": 4.5775227546691895, "learning_rate": 1.3413017360333009e-09, "loss": 0.26169984, "memory(GiB)": 34.88, "step": 147210, "train_speed(iter/s)": 0.411486 }, { "acc": 0.95374489, "epoch": 3.986001678715512, "grad_norm": 2.370793104171753, "learning_rate": 1.3347947751499083e-09, "loss": 0.23349042, "memory(GiB)": 34.88, "step": 147215, "train_speed(iter/s)": 0.411487 }, { "acc": 0.95895596, "epoch": 3.9861370589987275, "grad_norm": 13.65135669708252, "learning_rate": 1.328350438634134e-09, "loss": 0.27853193, "memory(GiB)": 34.88, "step": 147220, "train_speed(iter/s)": 0.411487 }, { "acc": 0.93986359, "epoch": 3.986272439281943, "grad_norm": 10.979406356811523, "learning_rate": 1.3219687265664614e-09, "loss": 0.39496565, "memory(GiB)": 34.88, "step": 147225, "train_speed(iter/s)": 0.411488 }, { "acc": 0.94764271, "epoch": 3.9864078195651587, "grad_norm": 5.623220443725586, "learning_rate": 1.3156496390268189e-09, "loss": 0.29563525, "memory(GiB)": 34.88, "step": 147230, "train_speed(iter/s)": 0.411489 }, { "acc": 0.94760628, "epoch": 3.986543199848374, "grad_norm": 6.830828666687012, "learning_rate": 1.3093931760945789e-09, "loss": 0.3726686, "memory(GiB)": 34.88, "step": 147235, "train_speed(iter/s)": 0.41149 }, { "acc": 0.93563595, "epoch": 3.9866785801315894, "grad_norm": 6.3395676612854, "learning_rate": 1.3031993378485599e-09, "loss": 0.4452157, "memory(GiB)": 34.88, "step": 147240, "train_speed(iter/s)": 0.41149 }, { "acc": 0.94528236, "epoch": 3.986813960414805, "grad_norm": 13.434293746948242, "learning_rate": 1.2970681243653593e-09, "loss": 0.30400562, "memory(GiB)": 34.88, "step": 147245, "train_speed(iter/s)": 0.411491 }, { "acc": 0.95479927, "epoch": 3.986949340698021, "grad_norm": 20.85428810119629, "learning_rate": 1.2909995357221298e-09, "loss": 0.34290006, "memory(GiB)": 34.88, "step": 147250, "train_speed(iter/s)": 0.411492 }, { "acc": 0.95105543, "epoch": 3.9870847209812363, "grad_norm": 12.260530471801758, "learning_rate": 1.2849935719954695e-09, "loss": 0.28152347, "memory(GiB)": 34.88, "step": 147255, "train_speed(iter/s)": 0.411493 }, { "acc": 0.95649548, "epoch": 3.9872201012644517, "grad_norm": 4.706213474273682, "learning_rate": 1.2790502332603107e-09, "loss": 0.24026709, "memory(GiB)": 34.88, "step": 147260, "train_speed(iter/s)": 0.411493 }, { "acc": 0.95046091, "epoch": 3.9873554815476675, "grad_norm": 4.563572883605957, "learning_rate": 1.2731695195904761e-09, "loss": 0.29504366, "memory(GiB)": 34.88, "step": 147265, "train_speed(iter/s)": 0.411494 }, { "acc": 0.94863939, "epoch": 3.987490861830883, "grad_norm": 9.431985855102539, "learning_rate": 1.267351431060898e-09, "loss": 0.34795363, "memory(GiB)": 34.88, "step": 147270, "train_speed(iter/s)": 0.411495 }, { "acc": 0.95125141, "epoch": 3.9876262421140987, "grad_norm": 4.842323303222656, "learning_rate": 1.2615959677431784e-09, "loss": 0.29722426, "memory(GiB)": 34.88, "step": 147275, "train_speed(iter/s)": 0.411496 }, { "acc": 0.96000395, "epoch": 3.987761622397314, "grad_norm": 9.141371726989746, "learning_rate": 1.2559031297100302e-09, "loss": 0.24360521, "memory(GiB)": 34.88, "step": 147280, "train_speed(iter/s)": 0.411496 }, { "acc": 0.95394917, "epoch": 3.98789700268053, "grad_norm": 13.451923370361328, "learning_rate": 1.250272917033055e-09, "loss": 0.3021915, "memory(GiB)": 34.88, "step": 147285, "train_speed(iter/s)": 0.411497 }, { "acc": 0.9616045, "epoch": 3.988032382963745, "grad_norm": 4.406621932983398, "learning_rate": 1.2447053297816355e-09, "loss": 0.23301952, "memory(GiB)": 34.88, "step": 147290, "train_speed(iter/s)": 0.411498 }, { "acc": 0.94051609, "epoch": 3.9881677632469605, "grad_norm": 7.537387371063232, "learning_rate": 1.2392003680268184e-09, "loss": 0.33559713, "memory(GiB)": 34.88, "step": 147295, "train_speed(iter/s)": 0.411499 }, { "acc": 0.93967943, "epoch": 3.9883031435301763, "grad_norm": 11.724502563476562, "learning_rate": 1.233758031836876e-09, "loss": 0.35480163, "memory(GiB)": 34.88, "step": 147300, "train_speed(iter/s)": 0.411499 }, { "acc": 0.95685263, "epoch": 3.9884385238133917, "grad_norm": 4.88943338394165, "learning_rate": 1.2283783212806344e-09, "loss": 0.30979929, "memory(GiB)": 34.88, "step": 147305, "train_speed(iter/s)": 0.4115 }, { "acc": 0.94632397, "epoch": 3.9885739040966075, "grad_norm": 5.983670711517334, "learning_rate": 1.223061236424701e-09, "loss": 0.28806129, "memory(GiB)": 34.88, "step": 147310, "train_speed(iter/s)": 0.411501 }, { "acc": 0.95510988, "epoch": 3.988709284379823, "grad_norm": 6.974583625793457, "learning_rate": 1.2178067773356828e-09, "loss": 0.33195267, "memory(GiB)": 34.88, "step": 147315, "train_speed(iter/s)": 0.411501 }, { "acc": 0.95380974, "epoch": 3.9888446646630387, "grad_norm": 13.302170753479004, "learning_rate": 1.2126149440801857e-09, "loss": 0.2464889, "memory(GiB)": 34.88, "step": 147320, "train_speed(iter/s)": 0.411502 }, { "acc": 0.93510237, "epoch": 3.988980044946254, "grad_norm": 34.859867095947266, "learning_rate": 1.2074857367225969e-09, "loss": 0.4126379, "memory(GiB)": 34.88, "step": 147325, "train_speed(iter/s)": 0.411503 }, { "acc": 0.95810709, "epoch": 3.9891154252294694, "grad_norm": 6.141111850738525, "learning_rate": 1.2024191553273025e-09, "loss": 0.25732975, "memory(GiB)": 34.88, "step": 147330, "train_speed(iter/s)": 0.411504 }, { "acc": 0.96043997, "epoch": 3.989250805512685, "grad_norm": 10.464800834655762, "learning_rate": 1.197415199957579e-09, "loss": 0.2480382, "memory(GiB)": 34.88, "step": 147335, "train_speed(iter/s)": 0.411505 }, { "acc": 0.95567493, "epoch": 3.9893861857959005, "grad_norm": 2.5499236583709717, "learning_rate": 1.1924738706767028e-09, "loss": 0.29350922, "memory(GiB)": 34.88, "step": 147340, "train_speed(iter/s)": 0.411505 }, { "acc": 0.94515285, "epoch": 3.9895215660791163, "grad_norm": 9.384469032287598, "learning_rate": 1.1875951675457298e-09, "loss": 0.3383163, "memory(GiB)": 34.88, "step": 147345, "train_speed(iter/s)": 0.411506 }, { "acc": 0.95034361, "epoch": 3.9896569463623317, "grad_norm": 10.86827564239502, "learning_rate": 1.1827790906268267e-09, "loss": 0.32009134, "memory(GiB)": 34.88, "step": 147350, "train_speed(iter/s)": 0.411507 }, { "acc": 0.94173946, "epoch": 3.9897923266455475, "grad_norm": 7.9376726150512695, "learning_rate": 1.1780256399793843e-09, "loss": 0.38033161, "memory(GiB)": 34.88, "step": 147355, "train_speed(iter/s)": 0.411507 }, { "acc": 0.95323324, "epoch": 3.989927706928763, "grad_norm": 7.014735221862793, "learning_rate": 1.1733348156633488e-09, "loss": 0.24645069, "memory(GiB)": 34.88, "step": 147360, "train_speed(iter/s)": 0.411508 }, { "acc": 0.94040346, "epoch": 3.990063087211978, "grad_norm": 7.142581462860107, "learning_rate": 1.1687066177375556e-09, "loss": 0.37704649, "memory(GiB)": 34.88, "step": 147365, "train_speed(iter/s)": 0.411509 }, { "acc": 0.9583518, "epoch": 3.990198467495194, "grad_norm": 1.9137998819351196, "learning_rate": 1.1641410462602863e-09, "loss": 0.22890587, "memory(GiB)": 34.88, "step": 147370, "train_speed(iter/s)": 0.41151 }, { "acc": 0.95609684, "epoch": 3.99033384777841, "grad_norm": 7.440188884735107, "learning_rate": 1.159638101287601e-09, "loss": 0.29855061, "memory(GiB)": 34.88, "step": 147375, "train_speed(iter/s)": 0.41151 }, { "acc": 0.95491953, "epoch": 3.990469228061625, "grad_norm": 10.824739456176758, "learning_rate": 1.1551977828772259e-09, "loss": 0.30184278, "memory(GiB)": 34.88, "step": 147380, "train_speed(iter/s)": 0.411511 }, { "acc": 0.94103489, "epoch": 3.9906046083448405, "grad_norm": 8.784626960754395, "learning_rate": 1.1508200910835563e-09, "loss": 0.33704123, "memory(GiB)": 34.88, "step": 147385, "train_speed(iter/s)": 0.411512 }, { "acc": 0.95104666, "epoch": 3.9907399886280563, "grad_norm": 12.547748565673828, "learning_rate": 1.1465050259626526e-09, "loss": 0.3135004, "memory(GiB)": 34.88, "step": 147390, "train_speed(iter/s)": 0.411512 }, { "acc": 0.94698315, "epoch": 3.9908753689112717, "grad_norm": 15.153465270996094, "learning_rate": 1.142252587567801e-09, "loss": 0.34020152, "memory(GiB)": 34.88, "step": 147395, "train_speed(iter/s)": 0.411513 }, { "acc": 0.95149899, "epoch": 3.991010749194487, "grad_norm": 8.704910278320312, "learning_rate": 1.1380627759522858e-09, "loss": 0.33120544, "memory(GiB)": 34.88, "step": 147400, "train_speed(iter/s)": 0.411514 }, { "acc": 0.96335897, "epoch": 3.991146129477703, "grad_norm": 4.712035179138184, "learning_rate": 1.1339355911688382e-09, "loss": 0.24114053, "memory(GiB)": 34.88, "step": 147405, "train_speed(iter/s)": 0.411514 }, { "acc": 0.94404984, "epoch": 3.9912815097609187, "grad_norm": 10.50847339630127, "learning_rate": 1.129871033269078e-09, "loss": 0.36985893, "memory(GiB)": 34.88, "step": 147410, "train_speed(iter/s)": 0.411515 }, { "acc": 0.94572067, "epoch": 3.991416890044134, "grad_norm": 9.324174880981445, "learning_rate": 1.1258691023040703e-09, "loss": 0.36551585, "memory(GiB)": 34.88, "step": 147415, "train_speed(iter/s)": 0.411516 }, { "acc": 0.94028778, "epoch": 3.9915522703273494, "grad_norm": 7.76725959777832, "learning_rate": 1.1219297983237704e-09, "loss": 0.4158968, "memory(GiB)": 34.88, "step": 147420, "train_speed(iter/s)": 0.411517 }, { "acc": 0.93788795, "epoch": 3.991687650610565, "grad_norm": 5.796679973602295, "learning_rate": 1.118053121377578e-09, "loss": 0.42984138, "memory(GiB)": 34.88, "step": 147425, "train_speed(iter/s)": 0.411518 }, { "acc": 0.96633005, "epoch": 3.9918230308937805, "grad_norm": 10.386791229248047, "learning_rate": 1.1142390715143384e-09, "loss": 0.21723466, "memory(GiB)": 34.88, "step": 147430, "train_speed(iter/s)": 0.411518 }, { "acc": 0.95127287, "epoch": 3.9919584111769963, "grad_norm": 5.940688610076904, "learning_rate": 1.1104876487806758e-09, "loss": 0.28574953, "memory(GiB)": 34.88, "step": 147435, "train_speed(iter/s)": 0.411519 }, { "acc": 0.95474186, "epoch": 3.9920937914602117, "grad_norm": 9.68445873260498, "learning_rate": 1.1067988532248804e-09, "loss": 0.31842022, "memory(GiB)": 34.88, "step": 147440, "train_speed(iter/s)": 0.41152 }, { "acc": 0.94803324, "epoch": 3.9922291717434275, "grad_norm": 21.520029067993164, "learning_rate": 1.1031726848930219e-09, "loss": 0.29739287, "memory(GiB)": 34.88, "step": 147445, "train_speed(iter/s)": 0.411521 }, { "acc": 0.96009159, "epoch": 3.992364552026643, "grad_norm": 5.144447326660156, "learning_rate": 1.0996091438300598e-09, "loss": 0.19224224, "memory(GiB)": 34.88, "step": 147450, "train_speed(iter/s)": 0.411521 }, { "acc": 0.94608593, "epoch": 3.9924999323098582, "grad_norm": 27.54301643371582, "learning_rate": 1.0961082300803983e-09, "loss": 0.33901191, "memory(GiB)": 34.88, "step": 147455, "train_speed(iter/s)": 0.411522 }, { "acc": 0.94926987, "epoch": 3.992635312593074, "grad_norm": 12.7952880859375, "learning_rate": 1.0926699436889973e-09, "loss": 0.29305568, "memory(GiB)": 34.88, "step": 147460, "train_speed(iter/s)": 0.411523 }, { "acc": 0.96149817, "epoch": 3.9927706928762894, "grad_norm": 8.235223770141602, "learning_rate": 1.0892942846974858e-09, "loss": 0.22172136, "memory(GiB)": 34.88, "step": 147465, "train_speed(iter/s)": 0.411524 }, { "acc": 0.95204554, "epoch": 3.992906073159505, "grad_norm": 8.767952919006348, "learning_rate": 1.0859812531491583e-09, "loss": 0.28941126, "memory(GiB)": 34.88, "step": 147470, "train_speed(iter/s)": 0.411524 }, { "acc": 0.93829594, "epoch": 3.9930414534427205, "grad_norm": 4.863874912261963, "learning_rate": 1.0827308490856438e-09, "loss": 0.40036774, "memory(GiB)": 34.88, "step": 147475, "train_speed(iter/s)": 0.411525 }, { "acc": 0.95585709, "epoch": 3.9931768337259363, "grad_norm": 6.453416347503662, "learning_rate": 1.0795430725469065e-09, "loss": 0.23648641, "memory(GiB)": 34.88, "step": 147480, "train_speed(iter/s)": 0.411526 }, { "acc": 0.95724277, "epoch": 3.9933122140091517, "grad_norm": 7.183837413787842, "learning_rate": 1.0764179235734655e-09, "loss": 0.23034935, "memory(GiB)": 34.88, "step": 147485, "train_speed(iter/s)": 0.411527 }, { "acc": 0.95850019, "epoch": 3.993447594292367, "grad_norm": 6.118348121643066, "learning_rate": 1.0733554022036194e-09, "loss": 0.28370898, "memory(GiB)": 34.88, "step": 147490, "train_speed(iter/s)": 0.411527 }, { "acc": 0.96169834, "epoch": 3.993582974575583, "grad_norm": 9.511422157287598, "learning_rate": 1.0703555084767776e-09, "loss": 0.25033896, "memory(GiB)": 34.88, "step": 147495, "train_speed(iter/s)": 0.411528 }, { "acc": 0.95017071, "epoch": 3.9937183548587982, "grad_norm": 5.794778347015381, "learning_rate": 1.0674182424301285e-09, "loss": 0.28919072, "memory(GiB)": 34.88, "step": 147500, "train_speed(iter/s)": 0.411528 }, { "acc": 0.94522047, "epoch": 3.993853735142014, "grad_norm": 8.357047080993652, "learning_rate": 1.064543604100306e-09, "loss": 0.37953341, "memory(GiB)": 34.88, "step": 147505, "train_speed(iter/s)": 0.411529 }, { "acc": 0.94463959, "epoch": 3.9939891154252294, "grad_norm": 4.755257606506348, "learning_rate": 1.0617315935239437e-09, "loss": 0.29927757, "memory(GiB)": 34.88, "step": 147510, "train_speed(iter/s)": 0.41153 }, { "acc": 0.94890957, "epoch": 3.994124495708445, "grad_norm": 8.230944633483887, "learning_rate": 1.0589822107349e-09, "loss": 0.28786917, "memory(GiB)": 34.88, "step": 147515, "train_speed(iter/s)": 0.411531 }, { "acc": 0.94576645, "epoch": 3.9942598759916605, "grad_norm": 6.507256984710693, "learning_rate": 1.0562954557692537e-09, "loss": 0.32268019, "memory(GiB)": 34.88, "step": 147520, "train_speed(iter/s)": 0.411531 }, { "acc": 0.95418415, "epoch": 3.994395256274876, "grad_norm": 9.619542121887207, "learning_rate": 1.0536713286597528e-09, "loss": 0.28876362, "memory(GiB)": 34.88, "step": 147525, "train_speed(iter/s)": 0.411532 }, { "acc": 0.93435907, "epoch": 3.9945306365580917, "grad_norm": 6.214610576629639, "learning_rate": 1.0511098294391459e-09, "loss": 0.39301517, "memory(GiB)": 34.88, "step": 147530, "train_speed(iter/s)": 0.411533 }, { "acc": 0.94920082, "epoch": 3.994666016841307, "grad_norm": 15.333037376403809, "learning_rate": 1.048610958139626e-09, "loss": 0.29341395, "memory(GiB)": 34.88, "step": 147535, "train_speed(iter/s)": 0.411534 }, { "acc": 0.94291458, "epoch": 3.994801397124523, "grad_norm": 10.463624954223633, "learning_rate": 1.0461747147928315e-09, "loss": 0.33478696, "memory(GiB)": 34.88, "step": 147540, "train_speed(iter/s)": 0.411534 }, { "acc": 0.95823374, "epoch": 3.9949367774077382, "grad_norm": 5.766233921051025, "learning_rate": 1.0438010994292905e-09, "loss": 0.23141375, "memory(GiB)": 34.88, "step": 147545, "train_speed(iter/s)": 0.411535 }, { "acc": 0.95888424, "epoch": 3.995072157690954, "grad_norm": 2.7876880168914795, "learning_rate": 1.0414901120784207e-09, "loss": 0.24626598, "memory(GiB)": 34.88, "step": 147550, "train_speed(iter/s)": 0.411536 }, { "acc": 0.9525835, "epoch": 3.9952075379741694, "grad_norm": 10.084832191467285, "learning_rate": 1.0392417527696403e-09, "loss": 0.27739458, "memory(GiB)": 34.88, "step": 147555, "train_speed(iter/s)": 0.411536 }, { "acc": 0.95286198, "epoch": 3.9953429182573847, "grad_norm": 8.903100967407227, "learning_rate": 1.0370560215301472e-09, "loss": 0.32495043, "memory(GiB)": 34.88, "step": 147560, "train_speed(iter/s)": 0.411537 }, { "acc": 0.95751362, "epoch": 3.9954782985406005, "grad_norm": 5.743078708648682, "learning_rate": 1.0349329183882488e-09, "loss": 0.27504444, "memory(GiB)": 34.88, "step": 147565, "train_speed(iter/s)": 0.411538 }, { "acc": 0.94603214, "epoch": 3.9956136788238164, "grad_norm": 6.652879238128662, "learning_rate": 1.032872443370588e-09, "loss": 0.34037387, "memory(GiB)": 34.88, "step": 147570, "train_speed(iter/s)": 0.411538 }, { "acc": 0.9517334, "epoch": 3.9957490591070317, "grad_norm": 5.803960800170898, "learning_rate": 1.0308745965021425e-09, "loss": 0.31638281, "memory(GiB)": 34.88, "step": 147575, "train_speed(iter/s)": 0.411539 }, { "acc": 0.94800711, "epoch": 3.995884439390247, "grad_norm": 6.333062648773193, "learning_rate": 1.0289393778089997e-09, "loss": 0.37076216, "memory(GiB)": 34.88, "step": 147580, "train_speed(iter/s)": 0.41154 }, { "acc": 0.95094137, "epoch": 3.996019819673463, "grad_norm": 5.028812408447266, "learning_rate": 1.027066787314472e-09, "loss": 0.26057444, "memory(GiB)": 34.88, "step": 147585, "train_speed(iter/s)": 0.411541 }, { "acc": 0.95370903, "epoch": 3.9961551999566782, "grad_norm": 6.554811954498291, "learning_rate": 1.0252568250418717e-09, "loss": 0.32065086, "memory(GiB)": 34.88, "step": 147590, "train_speed(iter/s)": 0.411542 }, { "acc": 0.95588188, "epoch": 3.9962905802398936, "grad_norm": 7.427274227142334, "learning_rate": 1.0235094910150665e-09, "loss": 0.27737763, "memory(GiB)": 34.88, "step": 147595, "train_speed(iter/s)": 0.411542 }, { "acc": 0.92697754, "epoch": 3.9964259605231094, "grad_norm": 10.68136215209961, "learning_rate": 1.0218247852551482e-09, "loss": 0.51776152, "memory(GiB)": 34.88, "step": 147600, "train_speed(iter/s)": 0.411543 }, { "acc": 0.96208286, "epoch": 3.996561340806325, "grad_norm": 5.476965427398682, "learning_rate": 1.0202027077832092e-09, "loss": 0.22782626, "memory(GiB)": 34.88, "step": 147605, "train_speed(iter/s)": 0.411544 }, { "acc": 0.95086136, "epoch": 3.9966967210895405, "grad_norm": 7.68976354598999, "learning_rate": 1.0186432586197862e-09, "loss": 0.3001338, "memory(GiB)": 34.88, "step": 147610, "train_speed(iter/s)": 0.411544 }, { "acc": 0.95303326, "epoch": 3.996832101372756, "grad_norm": 5.968775749206543, "learning_rate": 1.0171464377843066e-09, "loss": 0.29578795, "memory(GiB)": 34.88, "step": 147615, "train_speed(iter/s)": 0.411545 }, { "acc": 0.9351799, "epoch": 3.9969674816559717, "grad_norm": 6.86013126373291, "learning_rate": 1.015712245295642e-09, "loss": 0.35927176, "memory(GiB)": 34.88, "step": 147620, "train_speed(iter/s)": 0.411546 }, { "acc": 0.95368814, "epoch": 3.997102861939187, "grad_norm": 4.704466342926025, "learning_rate": 1.0143406811709992e-09, "loss": 0.26611929, "memory(GiB)": 34.88, "step": 147625, "train_speed(iter/s)": 0.411546 }, { "acc": 0.9468812, "epoch": 3.997238242222403, "grad_norm": 15.638044357299805, "learning_rate": 1.013031745428695e-09, "loss": 0.36550932, "memory(GiB)": 34.88, "step": 147630, "train_speed(iter/s)": 0.411547 }, { "acc": 0.94671612, "epoch": 3.9973736225056182, "grad_norm": 3.925715684890747, "learning_rate": 1.0117854380848263e-09, "loss": 0.37788761, "memory(GiB)": 34.88, "step": 147635, "train_speed(iter/s)": 0.411548 }, { "acc": 0.94884653, "epoch": 3.997509002788834, "grad_norm": 5.118407726287842, "learning_rate": 1.0106017591549345e-09, "loss": 0.35470603, "memory(GiB)": 34.88, "step": 147640, "train_speed(iter/s)": 0.411549 }, { "acc": 0.96359034, "epoch": 3.9976443830720494, "grad_norm": 4.261331081390381, "learning_rate": 1.0094807086534508e-09, "loss": 0.23839359, "memory(GiB)": 34.88, "step": 147645, "train_speed(iter/s)": 0.41155 }, { "acc": 0.9473074, "epoch": 3.9977797633552647, "grad_norm": 13.487349510192871, "learning_rate": 1.0084222865942517e-09, "loss": 0.28276048, "memory(GiB)": 34.88, "step": 147650, "train_speed(iter/s)": 0.41155 }, { "acc": 0.94994316, "epoch": 3.9979151436384806, "grad_norm": 5.057847023010254, "learning_rate": 1.007426492991769e-09, "loss": 0.29626942, "memory(GiB)": 34.88, "step": 147655, "train_speed(iter/s)": 0.411551 }, { "acc": 0.92040119, "epoch": 3.998050523921696, "grad_norm": 19.620494842529297, "learning_rate": 1.0064933278576587e-09, "loss": 0.50493608, "memory(GiB)": 34.88, "step": 147660, "train_speed(iter/s)": 0.411552 }, { "acc": 0.93963737, "epoch": 3.9981859042049117, "grad_norm": 8.133827209472656, "learning_rate": 1.0056227912035766e-09, "loss": 0.38872347, "memory(GiB)": 34.88, "step": 147665, "train_speed(iter/s)": 0.411552 }, { "acc": 0.94227724, "epoch": 3.998321284488127, "grad_norm": 3.547074556350708, "learning_rate": 1.0048148830400695e-09, "loss": 0.37951636, "memory(GiB)": 34.88, "step": 147670, "train_speed(iter/s)": 0.411553 }, { "acc": 0.95030556, "epoch": 3.998456664771343, "grad_norm": 4.965321063995361, "learning_rate": 1.0040696033782378e-09, "loss": 0.33866694, "memory(GiB)": 34.88, "step": 147675, "train_speed(iter/s)": 0.411554 }, { "acc": 0.9474164, "epoch": 3.9985920450545582, "grad_norm": 5.513444423675537, "learning_rate": 1.0033869522269628e-09, "loss": 0.2892971, "memory(GiB)": 34.88, "step": 147680, "train_speed(iter/s)": 0.411555 }, { "acc": 0.94696503, "epoch": 3.9987274253377736, "grad_norm": 4.8685622215271, "learning_rate": 1.0027669295945703e-09, "loss": 0.33566203, "memory(GiB)": 34.88, "step": 147685, "train_speed(iter/s)": 0.411555 }, { "acc": 0.94049063, "epoch": 3.9988628056209894, "grad_norm": 4.691930294036865, "learning_rate": 1.0022095354893862e-09, "loss": 0.32279291, "memory(GiB)": 34.88, "step": 147690, "train_speed(iter/s)": 0.411556 }, { "acc": 0.95590649, "epoch": 3.9989981859042047, "grad_norm": 3.746195077896118, "learning_rate": 1.0017147699175158e-09, "loss": 0.23758223, "memory(GiB)": 34.88, "step": 147695, "train_speed(iter/s)": 0.411557 }, { "acc": 0.94370451, "epoch": 3.9991335661874206, "grad_norm": 4.62047815322876, "learning_rate": 1.001282632886175e-09, "loss": 0.34807866, "memory(GiB)": 34.88, "step": 147700, "train_speed(iter/s)": 0.411557 }, { "acc": 0.94895449, "epoch": 3.999268946470636, "grad_norm": 6.638709545135498, "learning_rate": 1.0009131243998045e-09, "loss": 0.31155128, "memory(GiB)": 34.88, "step": 147705, "train_speed(iter/s)": 0.411558 }, { "acc": 0.96750946, "epoch": 3.9994043267538517, "grad_norm": 8.790685653686523, "learning_rate": 1.0006062444639545e-09, "loss": 0.19332095, "memory(GiB)": 34.88, "step": 147710, "train_speed(iter/s)": 0.411559 }, { "acc": 0.96121578, "epoch": 3.999539707037067, "grad_norm": 4.535000324249268, "learning_rate": 1.0003619930814004e-09, "loss": 0.23318148, "memory(GiB)": 34.88, "step": 147715, "train_speed(iter/s)": 0.411559 }, { "acc": 0.93454285, "epoch": 3.9996750873202824, "grad_norm": 6.8689446449279785, "learning_rate": 1.0001803702560276e-09, "loss": 0.41760101, "memory(GiB)": 34.88, "step": 147720, "train_speed(iter/s)": 0.41156 }, { "acc": 0.94453535, "epoch": 3.9998104676034982, "grad_norm": 3.557826042175293, "learning_rate": 1.0000613759900564e-09, "loss": 0.38825948, "memory(GiB)": 34.88, "step": 147725, "train_speed(iter/s)": 0.411561 }, { "acc": 0.93979759, "epoch": 3.999945847886714, "grad_norm": 7.917184352874756, "learning_rate": 1.0000050102851517e-09, "loss": 0.3615696, "memory(GiB)": 34.88, "step": 147730, "train_speed(iter/s)": 0.411561 }, { "epoch": 4.0, "eval_acc": 0.6270209094608292, "eval_loss": 1.261208415031433, "eval_runtime": 1297.9175, "eval_samples_per_second": 66.495, "eval_steps_per_second": 2.079, "step": 147732 } ], "logging_steps": 5, "max_steps": 147732, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.926674959155016e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }