{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0010077840822604, "eval_steps": 500, "global_step": 2980, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00033592802742012524, "grad_norm": 3.860774278640747, "learning_rate": 0.0, "loss": 1.1374, "memory/device_reserved (GiB)": 64.06, "memory/max_active (GiB)": 51.06, "memory/max_allocated (GiB)": 51.06, "step": 1 }, { "epoch": 0.0006718560548402505, "grad_norm": 3.8414366245269775, "learning_rate": 4.4843049327354265e-08, "loss": 1.1336, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2 }, { "epoch": 0.0010077840822603757, "grad_norm": 3.8133888244628906, "learning_rate": 8.968609865470853e-08, "loss": 1.14, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 3 }, { "epoch": 0.001343712109680501, "grad_norm": 3.817488431930542, "learning_rate": 1.345291479820628e-07, "loss": 1.1333, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 4 }, { "epoch": 0.0016796401371006262, "grad_norm": 3.87361741065979, "learning_rate": 1.7937219730941706e-07, "loss": 1.1607, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 5 }, { "epoch": 0.0020155681645207513, "grad_norm": 3.836265802383423, "learning_rate": 2.242152466367713e-07, "loss": 1.1298, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 6 }, { "epoch": 0.0023514961919408766, "grad_norm": 3.855699062347412, "learning_rate": 2.690582959641256e-07, "loss": 1.1196, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 7 }, { "epoch": 0.002687424219361002, "grad_norm": 3.9243767261505127, "learning_rate": 3.1390134529147985e-07, "loss": 1.1726, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 8 }, { "epoch": 0.003023352246781127, "grad_norm": 3.785141706466675, "learning_rate": 3.587443946188341e-07, "loss": 1.1412, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 9 }, { "epoch": 0.0033592802742012525, "grad_norm": 3.7415523529052734, "learning_rate": 4.0358744394618834e-07, "loss": 1.1342, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 10 }, { "epoch": 0.003695208301621378, "grad_norm": 3.538712739944458, "learning_rate": 4.484304932735426e-07, "loss": 1.1405, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 11 }, { "epoch": 0.004031136329041503, "grad_norm": 3.515927791595459, "learning_rate": 4.932735426008969e-07, "loss": 1.1448, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 12 }, { "epoch": 0.004367064356461628, "grad_norm": 3.455198287963867, "learning_rate": 5.381165919282512e-07, "loss": 1.1312, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 13 }, { "epoch": 0.004702992383881753, "grad_norm": 3.4612064361572266, "learning_rate": 5.829596412556054e-07, "loss": 1.1429, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 14 }, { "epoch": 0.0050389204113018785, "grad_norm": 2.794950485229492, "learning_rate": 6.278026905829597e-07, "loss": 1.1212, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 15 }, { "epoch": 0.005374848438722004, "grad_norm": 2.719169855117798, "learning_rate": 6.72645739910314e-07, "loss": 1.1224, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 16 }, { "epoch": 0.005710776466142129, "grad_norm": 2.6153249740600586, "learning_rate": 7.174887892376682e-07, "loss": 1.0909, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 17 }, { "epoch": 0.006046704493562254, "grad_norm": 2.4956769943237305, "learning_rate": 7.623318385650225e-07, "loss": 1.0978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 18 }, { "epoch": 0.00638263252098238, "grad_norm": 2.4908862113952637, "learning_rate": 8.071748878923767e-07, "loss": 1.1034, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 19 }, { "epoch": 0.006718560548402505, "grad_norm": 1.891632318496704, "learning_rate": 8.520179372197309e-07, "loss": 1.1026, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 20 }, { "epoch": 0.00705448857582263, "grad_norm": 1.67598295211792, "learning_rate": 8.968609865470852e-07, "loss": 1.0758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 21 }, { "epoch": 0.007390416603242756, "grad_norm": 1.6698436737060547, "learning_rate": 9.417040358744395e-07, "loss": 1.0845, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 22 }, { "epoch": 0.007726344630662881, "grad_norm": 1.6616350412368774, "learning_rate": 9.865470852017938e-07, "loss": 1.096, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 23 }, { "epoch": 0.008062272658083005, "grad_norm": 1.6018822193145752, "learning_rate": 1.031390134529148e-06, "loss": 1.0802, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 24 }, { "epoch": 0.008398200685503131, "grad_norm": 1.5432353019714355, "learning_rate": 1.0762331838565023e-06, "loss": 1.0805, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 25 }, { "epoch": 0.008734128712923256, "grad_norm": 1.5558946132659912, "learning_rate": 1.1210762331838566e-06, "loss": 1.0657, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 26 }, { "epoch": 0.009070056740343382, "grad_norm": 1.2005615234375, "learning_rate": 1.1659192825112108e-06, "loss": 1.0545, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 27 }, { "epoch": 0.009405984767763506, "grad_norm": 1.0795950889587402, "learning_rate": 1.2107623318385651e-06, "loss": 1.0341, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 28 }, { "epoch": 0.009741912795183633, "grad_norm": 1.0501154661178589, "learning_rate": 1.2556053811659194e-06, "loss": 1.0337, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 29 }, { "epoch": 0.010077840822603757, "grad_norm": 1.056551456451416, "learning_rate": 1.3004484304932734e-06, "loss": 1.0194, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 30 }, { "epoch": 0.010413768850023883, "grad_norm": 1.0988292694091797, "learning_rate": 1.345291479820628e-06, "loss": 1.0276, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 31 }, { "epoch": 0.010749696877444008, "grad_norm": 1.0233908891677856, "learning_rate": 1.3901345291479822e-06, "loss": 1.0053, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 32 }, { "epoch": 0.011085624904864132, "grad_norm": 0.9269795417785645, "learning_rate": 1.4349775784753365e-06, "loss": 1.0266, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 33 }, { "epoch": 0.011421552932284258, "grad_norm": 0.8556009531021118, "learning_rate": 1.4798206278026907e-06, "loss": 1.0253, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 34 }, { "epoch": 0.011757480959704383, "grad_norm": 0.7898334860801697, "learning_rate": 1.524663677130045e-06, "loss": 0.9778, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 35 }, { "epoch": 0.012093408987124509, "grad_norm": 0.7093439102172852, "learning_rate": 1.5695067264573993e-06, "loss": 0.999, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 36 }, { "epoch": 0.012429337014544633, "grad_norm": 0.7873974442481995, "learning_rate": 1.6143497757847533e-06, "loss": 0.9941, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 37 }, { "epoch": 0.01276526504196476, "grad_norm": 0.8581106662750244, "learning_rate": 1.6591928251121078e-06, "loss": 0.9845, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 38 }, { "epoch": 0.013101193069384884, "grad_norm": 0.8075152635574341, "learning_rate": 1.7040358744394619e-06, "loss": 0.9687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 39 }, { "epoch": 0.01343712109680501, "grad_norm": 0.6883738040924072, "learning_rate": 1.7488789237668164e-06, "loss": 0.9605, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 40 }, { "epoch": 0.013773049124225134, "grad_norm": 0.6211533546447754, "learning_rate": 1.7937219730941704e-06, "loss": 0.9601, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 41 }, { "epoch": 0.01410897715164526, "grad_norm": 0.6408723592758179, "learning_rate": 1.838565022421525e-06, "loss": 0.9427, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 42 }, { "epoch": 0.014444905179065385, "grad_norm": 0.6525437831878662, "learning_rate": 1.883408071748879e-06, "loss": 0.9271, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 43 }, { "epoch": 0.014780833206485511, "grad_norm": 0.6580013036727905, "learning_rate": 1.9282511210762332e-06, "loss": 0.9216, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 44 }, { "epoch": 0.015116761233905636, "grad_norm": 0.6258417367935181, "learning_rate": 1.9730941704035875e-06, "loss": 0.9165, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 45 }, { "epoch": 0.015452689261325762, "grad_norm": 0.556971549987793, "learning_rate": 2.0179372197309418e-06, "loss": 0.9355, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 46 }, { "epoch": 0.015788617288745886, "grad_norm": 0.49705031514167786, "learning_rate": 2.062780269058296e-06, "loss": 0.9348, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 47 }, { "epoch": 0.01612454531616601, "grad_norm": 0.45353949069976807, "learning_rate": 2.1076233183856503e-06, "loss": 0.8993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 48 }, { "epoch": 0.016460473343586135, "grad_norm": 0.44959601759910583, "learning_rate": 2.1524663677130046e-06, "loss": 0.9062, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 49 }, { "epoch": 0.016796401371006263, "grad_norm": 0.433074951171875, "learning_rate": 2.197309417040359e-06, "loss": 0.9191, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 50 }, { "epoch": 0.017132329398426387, "grad_norm": 0.42735880613327026, "learning_rate": 2.242152466367713e-06, "loss": 0.8958, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 51 }, { "epoch": 0.017468257425846512, "grad_norm": 0.4511914551258087, "learning_rate": 2.2869955156950674e-06, "loss": 0.9087, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 52 }, { "epoch": 0.017804185453266636, "grad_norm": 0.4472465217113495, "learning_rate": 2.3318385650224217e-06, "loss": 0.9242, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 53 }, { "epoch": 0.018140113480686764, "grad_norm": 0.45299163460731506, "learning_rate": 2.376681614349776e-06, "loss": 0.8855, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 54 }, { "epoch": 0.01847604150810689, "grad_norm": 0.3957361876964569, "learning_rate": 2.4215246636771302e-06, "loss": 0.9027, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 55 }, { "epoch": 0.018811969535527013, "grad_norm": 0.3683765232563019, "learning_rate": 2.4663677130044845e-06, "loss": 0.8879, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 56 }, { "epoch": 0.019147897562947137, "grad_norm": 0.36740103363990784, "learning_rate": 2.5112107623318388e-06, "loss": 0.8905, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 57 }, { "epoch": 0.019483825590367265, "grad_norm": 0.443046897649765, "learning_rate": 2.556053811659193e-06, "loss": 0.8811, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 58 }, { "epoch": 0.01981975361778739, "grad_norm": 0.41546308994293213, "learning_rate": 2.600896860986547e-06, "loss": 0.8775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 59 }, { "epoch": 0.020155681645207514, "grad_norm": 0.341630220413208, "learning_rate": 2.6457399103139016e-06, "loss": 0.8872, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 60 }, { "epoch": 0.02049160967262764, "grad_norm": 0.37675705552101135, "learning_rate": 2.690582959641256e-06, "loss": 0.8798, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 61 }, { "epoch": 0.020827537700047766, "grad_norm": 0.3483141362667084, "learning_rate": 2.73542600896861e-06, "loss": 0.8599, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 62 }, { "epoch": 0.02116346572746789, "grad_norm": 0.37884631752967834, "learning_rate": 2.7802690582959644e-06, "loss": 0.8813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 63 }, { "epoch": 0.021499393754888015, "grad_norm": 0.3274795413017273, "learning_rate": 2.8251121076233182e-06, "loss": 0.8635, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 64 }, { "epoch": 0.02183532178230814, "grad_norm": 0.3572888970375061, "learning_rate": 2.869955156950673e-06, "loss": 0.8853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 65 }, { "epoch": 0.022171249809728264, "grad_norm": 0.2987566590309143, "learning_rate": 2.9147982062780272e-06, "loss": 0.8505, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 66 }, { "epoch": 0.022507177837148392, "grad_norm": 0.25988075137138367, "learning_rate": 2.9596412556053815e-06, "loss": 0.8645, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 67 }, { "epoch": 0.022843105864568516, "grad_norm": 0.26829901337623596, "learning_rate": 3.0044843049327353e-06, "loss": 0.851, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 68 }, { "epoch": 0.02317903389198864, "grad_norm": 0.2941244840621948, "learning_rate": 3.04932735426009e-06, "loss": 0.8461, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 69 }, { "epoch": 0.023514961919408765, "grad_norm": 0.27264806628227234, "learning_rate": 3.0941704035874443e-06, "loss": 0.8611, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 70 }, { "epoch": 0.023850889946828893, "grad_norm": 0.296802282333374, "learning_rate": 3.1390134529147986e-06, "loss": 0.8485, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 71 }, { "epoch": 0.024186817974249018, "grad_norm": 0.2517745792865753, "learning_rate": 3.1838565022421524e-06, "loss": 0.8606, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 72 }, { "epoch": 0.024522746001669142, "grad_norm": 0.27232834696769714, "learning_rate": 3.2286995515695067e-06, "loss": 0.8561, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 73 }, { "epoch": 0.024858674029089266, "grad_norm": 0.2444978952407837, "learning_rate": 3.2735426008968614e-06, "loss": 0.8318, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 74 }, { "epoch": 0.025194602056509394, "grad_norm": 0.26987606287002563, "learning_rate": 3.3183856502242157e-06, "loss": 0.857, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 75 }, { "epoch": 0.02553053008392952, "grad_norm": 0.23352600634098053, "learning_rate": 3.36322869955157e-06, "loss": 0.8491, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 76 }, { "epoch": 0.025866458111349643, "grad_norm": 0.22162917256355286, "learning_rate": 3.4080717488789238e-06, "loss": 0.8465, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 77 }, { "epoch": 0.026202386138769768, "grad_norm": 0.21253058314323425, "learning_rate": 3.4529147982062785e-06, "loss": 0.839, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 78 }, { "epoch": 0.026538314166189896, "grad_norm": 0.2052978277206421, "learning_rate": 3.4977578475336327e-06, "loss": 0.8351, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 79 }, { "epoch": 0.02687424219361002, "grad_norm": 0.195331871509552, "learning_rate": 3.542600896860987e-06, "loss": 0.8391, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 80 }, { "epoch": 0.027210170221030144, "grad_norm": 0.30454620718955994, "learning_rate": 3.587443946188341e-06, "loss": 0.8328, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 81 }, { "epoch": 0.02754609824845027, "grad_norm": 0.18643732368946075, "learning_rate": 3.632286995515695e-06, "loss": 0.842, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 82 }, { "epoch": 0.027882026275870393, "grad_norm": 0.20035171508789062, "learning_rate": 3.67713004484305e-06, "loss": 0.8422, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 83 }, { "epoch": 0.02821795430329052, "grad_norm": 0.19473004341125488, "learning_rate": 3.721973094170404e-06, "loss": 0.8165, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 84 }, { "epoch": 0.028553882330710646, "grad_norm": 0.1979731321334839, "learning_rate": 3.766816143497758e-06, "loss": 0.8307, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 85 }, { "epoch": 0.02888981035813077, "grad_norm": 0.1841324120759964, "learning_rate": 3.8116591928251122e-06, "loss": 0.8222, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 86 }, { "epoch": 0.029225738385550894, "grad_norm": 0.18352875113487244, "learning_rate": 3.8565022421524665e-06, "loss": 0.8362, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 87 }, { "epoch": 0.029561666412971022, "grad_norm": 0.20249532163143158, "learning_rate": 3.901345291479821e-06, "loss": 0.8362, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 88 }, { "epoch": 0.029897594440391147, "grad_norm": 0.2030438482761383, "learning_rate": 3.946188340807175e-06, "loss": 0.8346, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 89 }, { "epoch": 0.03023352246781127, "grad_norm": 0.19077160954475403, "learning_rate": 3.991031390134529e-06, "loss": 0.8193, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 90 }, { "epoch": 0.030569450495231396, "grad_norm": 0.19612085819244385, "learning_rate": 4.0358744394618836e-06, "loss": 0.8299, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 91 }, { "epoch": 0.030905378522651523, "grad_norm": 0.17948150634765625, "learning_rate": 4.080717488789238e-06, "loss": 0.8042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 92 }, { "epoch": 0.031241306550071648, "grad_norm": 0.2010924071073532, "learning_rate": 4.125560538116592e-06, "loss": 0.7926, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 93 }, { "epoch": 0.03157723457749177, "grad_norm": 0.18491479754447937, "learning_rate": 4.170403587443946e-06, "loss": 0.7955, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 94 }, { "epoch": 0.0319131626049119, "grad_norm": 0.1907823383808136, "learning_rate": 4.215246636771301e-06, "loss": 0.8309, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 95 }, { "epoch": 0.03224909063233202, "grad_norm": 0.16708433628082275, "learning_rate": 4.260089686098655e-06, "loss": 0.8133, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 96 }, { "epoch": 0.03258501865975215, "grad_norm": 0.20673176646232605, "learning_rate": 4.304932735426009e-06, "loss": 0.8196, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 97 }, { "epoch": 0.03292094668717227, "grad_norm": 0.2057620733976364, "learning_rate": 4.3497757847533635e-06, "loss": 0.8162, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 98 }, { "epoch": 0.0332568747145924, "grad_norm": 0.1991223841905594, "learning_rate": 4.394618834080718e-06, "loss": 0.7927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 99 }, { "epoch": 0.033592802742012526, "grad_norm": 0.20465736091136932, "learning_rate": 4.439461883408072e-06, "loss": 0.814, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 100 }, { "epoch": 0.03392873076943265, "grad_norm": 0.1927819401025772, "learning_rate": 4.484304932735426e-06, "loss": 0.8131, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 101 }, { "epoch": 0.034264658796852775, "grad_norm": 0.19050125777721405, "learning_rate": 4.5291479820627806e-06, "loss": 0.8159, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 102 }, { "epoch": 0.0346005868242729, "grad_norm": 0.19135244190692902, "learning_rate": 4.573991031390135e-06, "loss": 0.8338, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 103 }, { "epoch": 0.034936514851693024, "grad_norm": 0.2137562781572342, "learning_rate": 4.618834080717489e-06, "loss": 0.833, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 104 }, { "epoch": 0.03527244287911315, "grad_norm": 0.18024782836437225, "learning_rate": 4.663677130044843e-06, "loss": 0.822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 105 }, { "epoch": 0.03560837090653327, "grad_norm": 0.19821949303150177, "learning_rate": 4.708520179372198e-06, "loss": 0.8272, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 106 }, { "epoch": 0.0359442989339534, "grad_norm": 0.19121558964252472, "learning_rate": 4.753363228699552e-06, "loss": 0.7881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 107 }, { "epoch": 0.03628022696137353, "grad_norm": 0.18633952736854553, "learning_rate": 4.798206278026906e-06, "loss": 0.7999, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 108 }, { "epoch": 0.03661615498879365, "grad_norm": 0.2002972960472107, "learning_rate": 4.8430493273542605e-06, "loss": 0.8211, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 109 }, { "epoch": 0.03695208301621378, "grad_norm": 0.22815167903900146, "learning_rate": 4.887892376681615e-06, "loss": 0.8194, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 110 }, { "epoch": 0.037288011043633905, "grad_norm": 0.2170150876045227, "learning_rate": 4.932735426008969e-06, "loss": 0.8184, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 111 }, { "epoch": 0.037623939071054026, "grad_norm": 0.23426048457622528, "learning_rate": 4.977578475336323e-06, "loss": 0.8174, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 112 }, { "epoch": 0.037959867098474154, "grad_norm": 0.2489815205335617, "learning_rate": 5.0224215246636775e-06, "loss": 0.7815, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 113 }, { "epoch": 0.038295795125894275, "grad_norm": 0.23439639806747437, "learning_rate": 5.067264573991032e-06, "loss": 0.7959, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 114 }, { "epoch": 0.0386317231533144, "grad_norm": 0.18249954283237457, "learning_rate": 5.112107623318386e-06, "loss": 0.8032, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 115 }, { "epoch": 0.03896765118073453, "grad_norm": 0.3929659128189087, "learning_rate": 5.156950672645741e-06, "loss": 0.8258, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 116 }, { "epoch": 0.03930357920815465, "grad_norm": 0.2119808942079544, "learning_rate": 5.201793721973094e-06, "loss": 0.7987, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 117 }, { "epoch": 0.03963950723557478, "grad_norm": 0.21135388314723969, "learning_rate": 5.246636771300448e-06, "loss": 0.8112, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 118 }, { "epoch": 0.0399754352629949, "grad_norm": 0.2291570007801056, "learning_rate": 5.291479820627803e-06, "loss": 0.814, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 119 }, { "epoch": 0.04031136329041503, "grad_norm": 0.21816393733024597, "learning_rate": 5.3363228699551574e-06, "loss": 0.7985, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 120 }, { "epoch": 0.040647291317835156, "grad_norm": 0.206037700176239, "learning_rate": 5.381165919282512e-06, "loss": 0.8092, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 121 }, { "epoch": 0.04098321934525528, "grad_norm": 0.21665015816688538, "learning_rate": 5.426008968609866e-06, "loss": 0.8087, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 122 }, { "epoch": 0.041319147372675405, "grad_norm": 0.21161220967769623, "learning_rate": 5.47085201793722e-06, "loss": 0.7694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 123 }, { "epoch": 0.04165507540009553, "grad_norm": 0.2165442258119583, "learning_rate": 5.5156950672645745e-06, "loss": 0.7985, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 124 }, { "epoch": 0.041991003427515654, "grad_norm": 0.18957743048667908, "learning_rate": 5.560538116591929e-06, "loss": 0.771, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 125 }, { "epoch": 0.04232693145493578, "grad_norm": 0.2394307404756546, "learning_rate": 5.605381165919282e-06, "loss": 0.7981, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 126 }, { "epoch": 0.0426628594823559, "grad_norm": 0.2289624959230423, "learning_rate": 5.6502242152466365e-06, "loss": 0.8051, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 127 }, { "epoch": 0.04299878750977603, "grad_norm": 0.23757943511009216, "learning_rate": 5.695067264573992e-06, "loss": 0.7759, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 128 }, { "epoch": 0.04333471553719616, "grad_norm": 0.2629515826702118, "learning_rate": 5.739910313901346e-06, "loss": 0.787, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 129 }, { "epoch": 0.04367064356461628, "grad_norm": 0.20363588631153107, "learning_rate": 5.7847533632287e-06, "loss": 0.7777, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 130 }, { "epoch": 0.04400657159203641, "grad_norm": 0.27970632910728455, "learning_rate": 5.8295964125560544e-06, "loss": 0.7861, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 131 }, { "epoch": 0.04434249961945653, "grad_norm": 0.22093512117862701, "learning_rate": 5.874439461883409e-06, "loss": 0.7829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 132 }, { "epoch": 0.044678427646876656, "grad_norm": 0.2382962852716446, "learning_rate": 5.919282511210763e-06, "loss": 0.7978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 133 }, { "epoch": 0.045014355674296784, "grad_norm": 0.2365463227033615, "learning_rate": 5.964125560538116e-06, "loss": 0.7718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 134 }, { "epoch": 0.045350283701716905, "grad_norm": 0.24860383570194244, "learning_rate": 6.008968609865471e-06, "loss": 0.8151, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 135 }, { "epoch": 0.04568621172913703, "grad_norm": 0.22541092336177826, "learning_rate": 6.053811659192825e-06, "loss": 0.7882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 136 }, { "epoch": 0.04602213975655716, "grad_norm": 0.22145599126815796, "learning_rate": 6.09865470852018e-06, "loss": 0.8171, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 137 }, { "epoch": 0.04635806778397728, "grad_norm": 0.22580771148204803, "learning_rate": 6.143497757847534e-06, "loss": 0.7675, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 138 }, { "epoch": 0.04669399581139741, "grad_norm": 0.24297337234020233, "learning_rate": 6.188340807174889e-06, "loss": 0.7694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 139 }, { "epoch": 0.04702992383881753, "grad_norm": 0.24827122688293457, "learning_rate": 6.233183856502243e-06, "loss": 0.7882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 140 }, { "epoch": 0.04736585186623766, "grad_norm": 0.2127746194601059, "learning_rate": 6.278026905829597e-06, "loss": 0.7866, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 141 }, { "epoch": 0.047701779893657786, "grad_norm": 0.28129422664642334, "learning_rate": 6.322869955156951e-06, "loss": 0.8026, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 142 }, { "epoch": 0.04803770792107791, "grad_norm": 0.2397235929965973, "learning_rate": 6.367713004484305e-06, "loss": 0.7943, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 143 }, { "epoch": 0.048373635948498035, "grad_norm": 0.22738862037658691, "learning_rate": 6.412556053811659e-06, "loss": 0.7814, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 144 }, { "epoch": 0.048709563975918156, "grad_norm": 0.26011911034584045, "learning_rate": 6.457399103139013e-06, "loss": 0.791, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 145 }, { "epoch": 0.049045492003338284, "grad_norm": 0.4176780581474304, "learning_rate": 6.5022421524663685e-06, "loss": 0.763, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 146 }, { "epoch": 0.04938142003075841, "grad_norm": 0.40030360221862793, "learning_rate": 6.547085201793723e-06, "loss": 0.7733, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 147 }, { "epoch": 0.04971734805817853, "grad_norm": 0.28602272272109985, "learning_rate": 6.591928251121077e-06, "loss": 0.7947, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 148 }, { "epoch": 0.05005327608559866, "grad_norm": 0.3586910367012024, "learning_rate": 6.636771300448431e-06, "loss": 0.786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 149 }, { "epoch": 0.05038920411301879, "grad_norm": 0.3394688367843628, "learning_rate": 6.681614349775786e-06, "loss": 0.7834, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 150 }, { "epoch": 0.05072513214043891, "grad_norm": 0.2922273874282837, "learning_rate": 6.72645739910314e-06, "loss": 0.7837, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 151 }, { "epoch": 0.05106106016785904, "grad_norm": 0.3014032244682312, "learning_rate": 6.771300448430493e-06, "loss": 0.8002, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 152 }, { "epoch": 0.05139698819527916, "grad_norm": 0.2510310113430023, "learning_rate": 6.8161434977578476e-06, "loss": 0.7584, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 153 }, { "epoch": 0.051732916222699286, "grad_norm": 0.23485828936100006, "learning_rate": 6.860986547085202e-06, "loss": 0.7946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 154 }, { "epoch": 0.052068844250119414, "grad_norm": 0.34359028935432434, "learning_rate": 6.905829596412557e-06, "loss": 0.7715, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 155 }, { "epoch": 0.052404772277539535, "grad_norm": 0.363484650850296, "learning_rate": 6.950672645739911e-06, "loss": 0.779, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 156 }, { "epoch": 0.05274070030495966, "grad_norm": 0.2708715796470642, "learning_rate": 6.9955156950672655e-06, "loss": 0.7816, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 157 }, { "epoch": 0.05307662833237979, "grad_norm": 0.2988828122615814, "learning_rate": 7.04035874439462e-06, "loss": 0.7626, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 158 }, { "epoch": 0.05341255635979991, "grad_norm": 0.23665396869182587, "learning_rate": 7.085201793721974e-06, "loss": 0.7739, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 159 }, { "epoch": 0.05374848438722004, "grad_norm": 0.2721739709377289, "learning_rate": 7.1300448430493275e-06, "loss": 0.7741, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 160 }, { "epoch": 0.05408441241464016, "grad_norm": 0.2932314872741699, "learning_rate": 7.174887892376682e-06, "loss": 0.775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 161 }, { "epoch": 0.05442034044206029, "grad_norm": 0.30246731638908386, "learning_rate": 7.219730941704036e-06, "loss": 0.7765, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 162 }, { "epoch": 0.05475626846948042, "grad_norm": 0.2727848291397095, "learning_rate": 7.26457399103139e-06, "loss": 0.7619, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 163 }, { "epoch": 0.05509219649690054, "grad_norm": 0.37808355689048767, "learning_rate": 7.309417040358745e-06, "loss": 0.7937, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 164 }, { "epoch": 0.055428124524320666, "grad_norm": 0.34578219056129456, "learning_rate": 7.3542600896861e-06, "loss": 0.764, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 165 }, { "epoch": 0.055764052551740786, "grad_norm": 0.31241750717163086, "learning_rate": 7.399103139013454e-06, "loss": 0.7827, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 166 }, { "epoch": 0.056099980579160914, "grad_norm": 0.25097721815109253, "learning_rate": 7.443946188340808e-06, "loss": 0.7682, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 167 }, { "epoch": 0.05643590860658104, "grad_norm": 0.27261844277381897, "learning_rate": 7.4887892376681625e-06, "loss": 0.7719, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 168 }, { "epoch": 0.05677183663400116, "grad_norm": 0.24054524302482605, "learning_rate": 7.533632286995516e-06, "loss": 0.7887, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 169 }, { "epoch": 0.05710776466142129, "grad_norm": 0.4097549617290497, "learning_rate": 7.57847533632287e-06, "loss": 0.778, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 170 }, { "epoch": 0.05744369268884142, "grad_norm": 0.3837467133998871, "learning_rate": 7.6233183856502244e-06, "loss": 0.789, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 171 }, { "epoch": 0.05777962071626154, "grad_norm": 0.5347974300384521, "learning_rate": 7.668161434977579e-06, "loss": 0.7734, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 172 }, { "epoch": 0.05811554874368167, "grad_norm": 0.48915672302246094, "learning_rate": 7.713004484304933e-06, "loss": 0.778, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 173 }, { "epoch": 0.05845147677110179, "grad_norm": 0.3636902868747711, "learning_rate": 7.757847533632287e-06, "loss": 0.7497, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 174 }, { "epoch": 0.05878740479852192, "grad_norm": 0.29025304317474365, "learning_rate": 7.802690582959642e-06, "loss": 0.778, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 175 }, { "epoch": 0.059123332825942045, "grad_norm": 0.32287493348121643, "learning_rate": 7.847533632286996e-06, "loss": 0.7698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 176 }, { "epoch": 0.059459260853362166, "grad_norm": 0.413186252117157, "learning_rate": 7.89237668161435e-06, "loss": 0.747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 177 }, { "epoch": 0.05979518888078229, "grad_norm": 0.3426903188228607, "learning_rate": 7.937219730941704e-06, "loss": 0.7938, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 178 }, { "epoch": 0.060131116908202414, "grad_norm": 0.3655383586883545, "learning_rate": 7.982062780269059e-06, "loss": 0.7666, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 179 }, { "epoch": 0.06046704493562254, "grad_norm": 0.24941660463809967, "learning_rate": 8.026905829596413e-06, "loss": 0.779, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 180 }, { "epoch": 0.06080297296304267, "grad_norm": 0.2791953980922699, "learning_rate": 8.071748878923767e-06, "loss": 0.7672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 181 }, { "epoch": 0.06113890099046279, "grad_norm": 0.3397291600704193, "learning_rate": 8.116591928251121e-06, "loss": 0.7665, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 182 }, { "epoch": 0.06147482901788292, "grad_norm": 0.2946860194206238, "learning_rate": 8.161434977578476e-06, "loss": 0.7631, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 183 }, { "epoch": 0.06181075704530305, "grad_norm": 0.2854785919189453, "learning_rate": 8.20627802690583e-06, "loss": 0.7579, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 184 }, { "epoch": 0.06214668507272317, "grad_norm": 0.22444851696491241, "learning_rate": 8.251121076233184e-06, "loss": 0.7903, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 185 }, { "epoch": 0.062482613100143296, "grad_norm": 0.28160595893859863, "learning_rate": 8.295964125560539e-06, "loss": 0.7457, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 186 }, { "epoch": 0.06281854112756342, "grad_norm": 0.31287112832069397, "learning_rate": 8.340807174887893e-06, "loss": 0.7685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 187 }, { "epoch": 0.06315446915498354, "grad_norm": 0.3024842441082001, "learning_rate": 8.385650224215247e-06, "loss": 0.746, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 188 }, { "epoch": 0.06349039718240367, "grad_norm": 0.2338876575231552, "learning_rate": 8.430493273542601e-06, "loss": 0.7531, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 189 }, { "epoch": 0.0638263252098238, "grad_norm": 0.2685573101043701, "learning_rate": 8.475336322869956e-06, "loss": 0.7554, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 190 }, { "epoch": 0.06416225323724392, "grad_norm": 0.2742181420326233, "learning_rate": 8.52017937219731e-06, "loss": 0.75, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 191 }, { "epoch": 0.06449818126466404, "grad_norm": 0.36952659487724304, "learning_rate": 8.565022421524664e-06, "loss": 0.7828, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 192 }, { "epoch": 0.06483410929208418, "grad_norm": 0.4237433671951294, "learning_rate": 8.609865470852018e-06, "loss": 0.7436, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 193 }, { "epoch": 0.0651700373195043, "grad_norm": 0.35980141162872314, "learning_rate": 8.654708520179373e-06, "loss": 0.7796, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 194 }, { "epoch": 0.06550596534692442, "grad_norm": 0.25641289353370667, "learning_rate": 8.699551569506727e-06, "loss": 0.7846, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 195 }, { "epoch": 0.06584189337434454, "grad_norm": 0.2813802659511566, "learning_rate": 8.744394618834081e-06, "loss": 0.7716, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 196 }, { "epoch": 0.06617782140176467, "grad_norm": 0.32241153717041016, "learning_rate": 8.789237668161435e-06, "loss": 0.7634, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 197 }, { "epoch": 0.0665137494291848, "grad_norm": 0.37342357635498047, "learning_rate": 8.83408071748879e-06, "loss": 0.7749, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 198 }, { "epoch": 0.06684967745660492, "grad_norm": 0.5185228586196899, "learning_rate": 8.878923766816144e-06, "loss": 0.7621, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 199 }, { "epoch": 0.06718560548402505, "grad_norm": 0.38124582171440125, "learning_rate": 8.923766816143498e-06, "loss": 0.7817, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 200 }, { "epoch": 0.06752153351144517, "grad_norm": 0.59307461977005, "learning_rate": 8.968609865470853e-06, "loss": 0.7729, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 201 }, { "epoch": 0.0678574615388653, "grad_norm": 0.42220306396484375, "learning_rate": 9.013452914798207e-06, "loss": 0.7545, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 202 }, { "epoch": 0.06819338956628543, "grad_norm": 0.5302855968475342, "learning_rate": 9.058295964125561e-06, "loss": 0.7628, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 203 }, { "epoch": 0.06852931759370555, "grad_norm": 0.3327699601650238, "learning_rate": 9.103139013452915e-06, "loss": 0.7491, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 204 }, { "epoch": 0.06886524562112567, "grad_norm": 0.4224725067615509, "learning_rate": 9.14798206278027e-06, "loss": 0.7718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 205 }, { "epoch": 0.0692011736485458, "grad_norm": 0.35874143242836, "learning_rate": 9.192825112107624e-06, "loss": 0.7647, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 206 }, { "epoch": 0.06953710167596593, "grad_norm": 0.3626912236213684, "learning_rate": 9.237668161434978e-06, "loss": 0.756, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 207 }, { "epoch": 0.06987302970338605, "grad_norm": 0.30916160345077515, "learning_rate": 9.282511210762332e-06, "loss": 0.7563, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 208 }, { "epoch": 0.07020895773080617, "grad_norm": 0.3367006778717041, "learning_rate": 9.327354260089687e-06, "loss": 0.7643, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 209 }, { "epoch": 0.0705448857582263, "grad_norm": 0.2956836521625519, "learning_rate": 9.372197309417041e-06, "loss": 0.7696, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 210 }, { "epoch": 0.07088081378564642, "grad_norm": 0.3731004297733307, "learning_rate": 9.417040358744395e-06, "loss": 0.7425, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 211 }, { "epoch": 0.07121674181306654, "grad_norm": 0.3679034113883972, "learning_rate": 9.46188340807175e-06, "loss": 0.7542, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 212 }, { "epoch": 0.07155266984048668, "grad_norm": 0.41871514916419983, "learning_rate": 9.506726457399104e-06, "loss": 0.7584, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 213 }, { "epoch": 0.0718885978679068, "grad_norm": 0.34097611904144287, "learning_rate": 9.551569506726458e-06, "loss": 0.7814, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 214 }, { "epoch": 0.07222452589532692, "grad_norm": 0.2977738082408905, "learning_rate": 9.596412556053812e-06, "loss": 0.753, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 215 }, { "epoch": 0.07256045392274706, "grad_norm": 0.21634522080421448, "learning_rate": 9.641255605381167e-06, "loss": 0.7334, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 216 }, { "epoch": 0.07289638195016718, "grad_norm": 0.32392317056655884, "learning_rate": 9.686098654708521e-06, "loss": 0.7554, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 217 }, { "epoch": 0.0732323099775873, "grad_norm": 0.3450508713722229, "learning_rate": 9.730941704035875e-06, "loss": 0.7666, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 218 }, { "epoch": 0.07356823800500743, "grad_norm": 0.3506256341934204, "learning_rate": 9.77578475336323e-06, "loss": 0.7589, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 219 }, { "epoch": 0.07390416603242755, "grad_norm": 0.2918302118778229, "learning_rate": 9.820627802690584e-06, "loss": 0.7614, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 220 }, { "epoch": 0.07424009405984767, "grad_norm": 0.2806834280490875, "learning_rate": 9.865470852017938e-06, "loss": 0.7576, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 221 }, { "epoch": 0.07457602208726781, "grad_norm": 0.26966360211372375, "learning_rate": 9.910313901345292e-06, "loss": 0.7461, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 222 }, { "epoch": 0.07491195011468793, "grad_norm": 0.3353106379508972, "learning_rate": 9.955156950672647e-06, "loss": 0.7466, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 223 }, { "epoch": 0.07524787814210805, "grad_norm": 0.34950312972068787, "learning_rate": 1e-05, "loss": 0.7764, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 224 }, { "epoch": 0.07558380616952817, "grad_norm": 0.33150148391723633, "learning_rate": 1.0044843049327355e-05, "loss": 0.7766, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 225 }, { "epoch": 0.07591973419694831, "grad_norm": 0.2445436716079712, "learning_rate": 1.008968609865471e-05, "loss": 0.7739, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 226 }, { "epoch": 0.07625566222436843, "grad_norm": 0.2229977399110794, "learning_rate": 1.0134529147982064e-05, "loss": 0.747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 227 }, { "epoch": 0.07659159025178855, "grad_norm": 0.26681801676750183, "learning_rate": 1.0179372197309418e-05, "loss": 0.7771, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 228 }, { "epoch": 0.07692751827920868, "grad_norm": 0.29688164591789246, "learning_rate": 1.0224215246636772e-05, "loss": 0.7478, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 229 }, { "epoch": 0.0772634463066288, "grad_norm": 0.31546106934547424, "learning_rate": 1.0269058295964126e-05, "loss": 0.7703, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 230 }, { "epoch": 0.07759937433404893, "grad_norm": 0.27454447746276855, "learning_rate": 1.0313901345291482e-05, "loss": 0.7488, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 231 }, { "epoch": 0.07793530236146906, "grad_norm": 0.3007771372795105, "learning_rate": 1.0358744394618837e-05, "loss": 0.7456, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 232 }, { "epoch": 0.07827123038888918, "grad_norm": 0.2504352927207947, "learning_rate": 1.0403587443946188e-05, "loss": 0.7508, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 233 }, { "epoch": 0.0786071584163093, "grad_norm": 0.328306645154953, "learning_rate": 1.0448430493273542e-05, "loss": 0.7412, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 234 }, { "epoch": 0.07894308644372944, "grad_norm": 0.24571651220321655, "learning_rate": 1.0493273542600896e-05, "loss": 0.7437, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 235 }, { "epoch": 0.07927901447114956, "grad_norm": 0.27028197050094604, "learning_rate": 1.0538116591928252e-05, "loss": 0.7508, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 236 }, { "epoch": 0.07961494249856968, "grad_norm": 0.2421112358570099, "learning_rate": 1.0582959641255606e-05, "loss": 0.7724, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 237 }, { "epoch": 0.0799508705259898, "grad_norm": 0.2603287100791931, "learning_rate": 1.062780269058296e-05, "loss": 0.7354, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 238 }, { "epoch": 0.08028679855340994, "grad_norm": 0.25885719060897827, "learning_rate": 1.0672645739910315e-05, "loss": 0.7388, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 239 }, { "epoch": 0.08062272658083006, "grad_norm": 0.23663648962974548, "learning_rate": 1.071748878923767e-05, "loss": 0.7673, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 240 }, { "epoch": 0.08095865460825018, "grad_norm": 0.22866694629192352, "learning_rate": 1.0762331838565023e-05, "loss": 0.7966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 241 }, { "epoch": 0.08129458263567031, "grad_norm": 0.2709510624408722, "learning_rate": 1.0807174887892378e-05, "loss": 0.7452, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 242 }, { "epoch": 0.08163051066309043, "grad_norm": 0.2355438470840454, "learning_rate": 1.0852017937219732e-05, "loss": 0.7468, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 243 }, { "epoch": 0.08196643869051055, "grad_norm": 0.24850589036941528, "learning_rate": 1.0896860986547086e-05, "loss": 0.7568, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 244 }, { "epoch": 0.08230236671793069, "grad_norm": 0.2316807210445404, "learning_rate": 1.094170403587444e-05, "loss": 0.7698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 245 }, { "epoch": 0.08263829474535081, "grad_norm": 0.25488361716270447, "learning_rate": 1.0986547085201795e-05, "loss": 0.7639, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 246 }, { "epoch": 0.08297422277277093, "grad_norm": 0.24398262798786163, "learning_rate": 1.1031390134529149e-05, "loss": 0.7498, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 247 }, { "epoch": 0.08331015080019107, "grad_norm": 0.2506527900695801, "learning_rate": 1.1076233183856503e-05, "loss": 0.7411, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 248 }, { "epoch": 0.08364607882761119, "grad_norm": 0.2662225663661957, "learning_rate": 1.1121076233183858e-05, "loss": 0.7454, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 249 }, { "epoch": 0.08398200685503131, "grad_norm": 0.2969960868358612, "learning_rate": 1.116591928251121e-05, "loss": 0.7646, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 250 }, { "epoch": 0.08431793488245143, "grad_norm": 0.3448776304721832, "learning_rate": 1.1210762331838564e-05, "loss": 0.7465, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 251 }, { "epoch": 0.08465386290987156, "grad_norm": 0.45635470747947693, "learning_rate": 1.1255605381165919e-05, "loss": 0.7564, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 252 }, { "epoch": 0.08498979093729168, "grad_norm": 0.5502303838729858, "learning_rate": 1.1300448430493273e-05, "loss": 0.7466, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 253 }, { "epoch": 0.0853257189647118, "grad_norm": 0.5761117339134216, "learning_rate": 1.1345291479820629e-05, "loss": 0.7578, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 254 }, { "epoch": 0.08566164699213194, "grad_norm": 0.503360390663147, "learning_rate": 1.1390134529147983e-05, "loss": 0.7474, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 255 }, { "epoch": 0.08599757501955206, "grad_norm": 0.4525907039642334, "learning_rate": 1.1434977578475338e-05, "loss": 0.7391, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 256 }, { "epoch": 0.08633350304697218, "grad_norm": 0.4241446554660797, "learning_rate": 1.1479820627802692e-05, "loss": 0.7439, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 257 }, { "epoch": 0.08666943107439232, "grad_norm": 0.358236700296402, "learning_rate": 1.1524663677130046e-05, "loss": 0.754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 258 }, { "epoch": 0.08700535910181244, "grad_norm": 0.31197747588157654, "learning_rate": 1.15695067264574e-05, "loss": 0.768, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 259 }, { "epoch": 0.08734128712923256, "grad_norm": 0.3198027014732361, "learning_rate": 1.1614349775784755e-05, "loss": 0.7502, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 260 }, { "epoch": 0.0876772151566527, "grad_norm": 0.342817485332489, "learning_rate": 1.1659192825112109e-05, "loss": 0.7235, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 261 }, { "epoch": 0.08801314318407281, "grad_norm": 0.269522488117218, "learning_rate": 1.1704035874439463e-05, "loss": 0.7377, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 262 }, { "epoch": 0.08834907121149294, "grad_norm": 0.2761046588420868, "learning_rate": 1.1748878923766817e-05, "loss": 0.7511, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 263 }, { "epoch": 0.08868499923891306, "grad_norm": 0.32368004322052, "learning_rate": 1.1793721973094172e-05, "loss": 0.7501, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 264 }, { "epoch": 0.08902092726633319, "grad_norm": 0.3630366921424866, "learning_rate": 1.1838565022421526e-05, "loss": 0.7368, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 265 }, { "epoch": 0.08935685529375331, "grad_norm": 0.3047336935997009, "learning_rate": 1.188340807174888e-05, "loss": 0.763, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 266 }, { "epoch": 0.08969278332117343, "grad_norm": 0.29261645674705505, "learning_rate": 1.1928251121076233e-05, "loss": 0.7707, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 267 }, { "epoch": 0.09002871134859357, "grad_norm": 0.22888028621673584, "learning_rate": 1.1973094170403587e-05, "loss": 0.7533, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 268 }, { "epoch": 0.09036463937601369, "grad_norm": 0.2985810339450836, "learning_rate": 1.2017937219730941e-05, "loss": 0.7326, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 269 }, { "epoch": 0.09070056740343381, "grad_norm": 0.3387927711009979, "learning_rate": 1.2062780269058296e-05, "loss": 0.7467, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 270 }, { "epoch": 0.09103649543085394, "grad_norm": 0.32486864924430847, "learning_rate": 1.210762331838565e-05, "loss": 0.747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 271 }, { "epoch": 0.09137242345827407, "grad_norm": 0.33888542652130127, "learning_rate": 1.2152466367713006e-05, "loss": 0.7616, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 272 }, { "epoch": 0.09170835148569419, "grad_norm": 0.3376010060310364, "learning_rate": 1.219730941704036e-05, "loss": 0.7493, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 273 }, { "epoch": 0.09204427951311432, "grad_norm": 0.34437698125839233, "learning_rate": 1.2242152466367714e-05, "loss": 0.7534, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 274 }, { "epoch": 0.09238020754053444, "grad_norm": 0.3245162069797516, "learning_rate": 1.2286995515695069e-05, "loss": 0.7577, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 275 }, { "epoch": 0.09271613556795456, "grad_norm": 0.3036693334579468, "learning_rate": 1.2331838565022423e-05, "loss": 0.751, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 276 }, { "epoch": 0.09305206359537468, "grad_norm": 0.3565311133861542, "learning_rate": 1.2376681614349777e-05, "loss": 0.7672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 277 }, { "epoch": 0.09338799162279482, "grad_norm": 0.4365248680114746, "learning_rate": 1.2421524663677131e-05, "loss": 0.7511, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 278 }, { "epoch": 0.09372391965021494, "grad_norm": 0.3977624773979187, "learning_rate": 1.2466367713004486e-05, "loss": 0.7551, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 279 }, { "epoch": 0.09405984767763506, "grad_norm": 0.350166380405426, "learning_rate": 1.251121076233184e-05, "loss": 0.7417, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 280 }, { "epoch": 0.0943957757050552, "grad_norm": 0.4158853590488434, "learning_rate": 1.2556053811659194e-05, "loss": 0.7385, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 281 }, { "epoch": 0.09473170373247532, "grad_norm": 0.390340119600296, "learning_rate": 1.2600896860986549e-05, "loss": 0.7099, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 282 }, { "epoch": 0.09506763175989544, "grad_norm": 0.35540148615837097, "learning_rate": 1.2645739910313903e-05, "loss": 0.7441, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 283 }, { "epoch": 0.09540355978731557, "grad_norm": 0.39935165643692017, "learning_rate": 1.2690582959641257e-05, "loss": 0.7244, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 284 }, { "epoch": 0.0957394878147357, "grad_norm": 0.2923356890678406, "learning_rate": 1.273542600896861e-05, "loss": 0.7354, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 285 }, { "epoch": 0.09607541584215581, "grad_norm": 0.2789289355278015, "learning_rate": 1.2780269058295964e-05, "loss": 0.7623, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 286 }, { "epoch": 0.09641134386957595, "grad_norm": 0.3072217106819153, "learning_rate": 1.2825112107623318e-05, "loss": 0.7431, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 287 }, { "epoch": 0.09674727189699607, "grad_norm": 0.31003618240356445, "learning_rate": 1.2869955156950672e-05, "loss": 0.7624, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 288 }, { "epoch": 0.09708319992441619, "grad_norm": 0.30320051312446594, "learning_rate": 1.2914798206278027e-05, "loss": 0.7387, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 289 }, { "epoch": 0.09741912795183631, "grad_norm": 0.25342392921447754, "learning_rate": 1.2959641255605383e-05, "loss": 0.7334, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 290 }, { "epoch": 0.09775505597925645, "grad_norm": 0.25648149847984314, "learning_rate": 1.3004484304932737e-05, "loss": 0.7301, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 291 }, { "epoch": 0.09809098400667657, "grad_norm": 0.3587304949760437, "learning_rate": 1.3049327354260091e-05, "loss": 0.7688, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 292 }, { "epoch": 0.09842691203409669, "grad_norm": 0.37391534447669983, "learning_rate": 1.3094170403587446e-05, "loss": 0.7524, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 293 }, { "epoch": 0.09876284006151682, "grad_norm": 0.34183064103126526, "learning_rate": 1.31390134529148e-05, "loss": 0.7389, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 294 }, { "epoch": 0.09909876808893694, "grad_norm": 0.34093812108039856, "learning_rate": 1.3183856502242154e-05, "loss": 0.7541, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 295 }, { "epoch": 0.09943469611635707, "grad_norm": 0.3083878457546234, "learning_rate": 1.3228699551569508e-05, "loss": 0.7639, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 296 }, { "epoch": 0.0997706241437772, "grad_norm": 0.31219786405563354, "learning_rate": 1.3273542600896863e-05, "loss": 0.7336, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 297 }, { "epoch": 0.10010655217119732, "grad_norm": 0.3397115468978882, "learning_rate": 1.3318385650224217e-05, "loss": 0.7259, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 298 }, { "epoch": 0.10044248019861744, "grad_norm": 0.3546832501888275, "learning_rate": 1.3363228699551571e-05, "loss": 0.7383, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 299 }, { "epoch": 0.10077840822603758, "grad_norm": 0.2705903947353363, "learning_rate": 1.3408071748878925e-05, "loss": 0.7358, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 300 }, { "epoch": 0.1011143362534577, "grad_norm": 0.2981431484222412, "learning_rate": 1.345291479820628e-05, "loss": 0.7521, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 301 }, { "epoch": 0.10145026428087782, "grad_norm": 0.34352046251296997, "learning_rate": 1.3497757847533632e-05, "loss": 0.7382, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 302 }, { "epoch": 0.10178619230829794, "grad_norm": 0.44481974840164185, "learning_rate": 1.3542600896860987e-05, "loss": 0.7702, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 303 }, { "epoch": 0.10212212033571808, "grad_norm": 0.3619122803211212, "learning_rate": 1.358744394618834e-05, "loss": 0.7328, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 304 }, { "epoch": 0.1024580483631382, "grad_norm": 0.42312827706336975, "learning_rate": 1.3632286995515695e-05, "loss": 0.7539, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 305 }, { "epoch": 0.10279397639055832, "grad_norm": 0.5381478071212769, "learning_rate": 1.367713004484305e-05, "loss": 0.7658, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 306 }, { "epoch": 0.10312990441797845, "grad_norm": 0.6038787961006165, "learning_rate": 1.3721973094170404e-05, "loss": 0.73, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 307 }, { "epoch": 0.10346583244539857, "grad_norm": 0.6978864669799805, "learning_rate": 1.3766816143497758e-05, "loss": 0.7225, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 308 }, { "epoch": 0.1038017604728187, "grad_norm": 0.69808429479599, "learning_rate": 1.3811659192825114e-05, "loss": 0.7574, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 309 }, { "epoch": 0.10413768850023883, "grad_norm": 0.5132732391357422, "learning_rate": 1.3856502242152468e-05, "loss": 0.7387, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 310 }, { "epoch": 0.10447361652765895, "grad_norm": 0.49070775508880615, "learning_rate": 1.3901345291479822e-05, "loss": 0.7393, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 311 }, { "epoch": 0.10480954455507907, "grad_norm": 0.5676565766334534, "learning_rate": 1.3946188340807177e-05, "loss": 0.7463, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 312 }, { "epoch": 0.1051454725824992, "grad_norm": 0.4991559684276581, "learning_rate": 1.3991031390134531e-05, "loss": 0.744, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 313 }, { "epoch": 0.10548140060991933, "grad_norm": 0.4894642233848572, "learning_rate": 1.4035874439461885e-05, "loss": 0.7531, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 314 }, { "epoch": 0.10581732863733945, "grad_norm": 0.41843560338020325, "learning_rate": 1.408071748878924e-05, "loss": 0.7493, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 315 }, { "epoch": 0.10615325666475958, "grad_norm": 0.3178127408027649, "learning_rate": 1.4125560538116594e-05, "loss": 0.7397, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 316 }, { "epoch": 0.1064891846921797, "grad_norm": 0.4183482229709625, "learning_rate": 1.4170403587443948e-05, "loss": 0.7083, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 317 }, { "epoch": 0.10682511271959982, "grad_norm": 0.3971503973007202, "learning_rate": 1.4215246636771302e-05, "loss": 0.7123, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 318 }, { "epoch": 0.10716104074701994, "grad_norm": 0.2594365179538727, "learning_rate": 1.4260089686098655e-05, "loss": 0.7215, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 319 }, { "epoch": 0.10749696877444008, "grad_norm": 0.40541791915893555, "learning_rate": 1.430493273542601e-05, "loss": 0.7466, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 320 }, { "epoch": 0.1078328968018602, "grad_norm": 0.36365875601768494, "learning_rate": 1.4349775784753363e-05, "loss": 0.7495, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 321 }, { "epoch": 0.10816882482928032, "grad_norm": 0.2962689995765686, "learning_rate": 1.4394618834080718e-05, "loss": 0.7286, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 322 }, { "epoch": 0.10850475285670046, "grad_norm": 0.341854453086853, "learning_rate": 1.4439461883408072e-05, "loss": 0.7547, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 323 }, { "epoch": 0.10884068088412058, "grad_norm": 0.35176631808280945, "learning_rate": 1.4484304932735426e-05, "loss": 0.7391, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 324 }, { "epoch": 0.1091766089115407, "grad_norm": 0.256756067276001, "learning_rate": 1.452914798206278e-05, "loss": 0.7207, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 325 }, { "epoch": 0.10951253693896083, "grad_norm": 0.3201386332511902, "learning_rate": 1.4573991031390135e-05, "loss": 0.7306, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 326 }, { "epoch": 0.10984846496638095, "grad_norm": 0.35865697264671326, "learning_rate": 1.461883408071749e-05, "loss": 0.7383, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 327 }, { "epoch": 0.11018439299380108, "grad_norm": 0.2734122574329376, "learning_rate": 1.4663677130044845e-05, "loss": 0.7297, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 328 }, { "epoch": 0.11052032102122121, "grad_norm": 0.32462388277053833, "learning_rate": 1.47085201793722e-05, "loss": 0.7352, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 329 }, { "epoch": 0.11085624904864133, "grad_norm": 0.2905089557170868, "learning_rate": 1.4753363228699554e-05, "loss": 0.7401, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 330 }, { "epoch": 0.11119217707606145, "grad_norm": 0.26488345861434937, "learning_rate": 1.4798206278026908e-05, "loss": 0.7258, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 331 }, { "epoch": 0.11152810510348157, "grad_norm": 0.3152569830417633, "learning_rate": 1.4843049327354262e-05, "loss": 0.7185, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 332 }, { "epoch": 0.11186403313090171, "grad_norm": 0.37020164728164673, "learning_rate": 1.4887892376681616e-05, "loss": 0.7447, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 333 }, { "epoch": 0.11219996115832183, "grad_norm": 0.3870641589164734, "learning_rate": 1.493273542600897e-05, "loss": 0.7473, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 334 }, { "epoch": 0.11253588918574195, "grad_norm": 0.39520522952079773, "learning_rate": 1.4977578475336325e-05, "loss": 0.7317, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 335 }, { "epoch": 0.11287181721316208, "grad_norm": 0.3331829607486725, "learning_rate": 1.5022421524663678e-05, "loss": 0.7212, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 336 }, { "epoch": 0.1132077452405822, "grad_norm": 0.2510432302951813, "learning_rate": 1.5067264573991032e-05, "loss": 0.7341, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 337 }, { "epoch": 0.11354367326800233, "grad_norm": 0.3175155818462372, "learning_rate": 1.5112107623318386e-05, "loss": 0.7488, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 338 }, { "epoch": 0.11387960129542246, "grad_norm": 0.29336267709732056, "learning_rate": 1.515695067264574e-05, "loss": 0.7192, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 339 }, { "epoch": 0.11421552932284258, "grad_norm": 0.3267248570919037, "learning_rate": 1.5201793721973095e-05, "loss": 0.7208, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 340 }, { "epoch": 0.1145514573502627, "grad_norm": 0.3193361163139343, "learning_rate": 1.5246636771300449e-05, "loss": 0.7595, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 341 }, { "epoch": 0.11488738537768284, "grad_norm": 0.32993587851524353, "learning_rate": 1.5291479820627803e-05, "loss": 0.7294, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 342 }, { "epoch": 0.11522331340510296, "grad_norm": 0.3010084927082062, "learning_rate": 1.5336322869955157e-05, "loss": 0.7261, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 343 }, { "epoch": 0.11555924143252308, "grad_norm": 0.3054952919483185, "learning_rate": 1.538116591928251e-05, "loss": 0.7186, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 344 }, { "epoch": 0.1158951694599432, "grad_norm": 0.3478594422340393, "learning_rate": 1.5426008968609866e-05, "loss": 0.7435, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 345 }, { "epoch": 0.11623109748736334, "grad_norm": 0.3793451488018036, "learning_rate": 1.547085201793722e-05, "loss": 0.7297, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 346 }, { "epoch": 0.11656702551478346, "grad_norm": 0.43494248390197754, "learning_rate": 1.5515695067264575e-05, "loss": 0.735, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 347 }, { "epoch": 0.11690295354220358, "grad_norm": 0.5383706092834473, "learning_rate": 1.556053811659193e-05, "loss": 0.7367, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 348 }, { "epoch": 0.11723888156962371, "grad_norm": 0.5652306079864502, "learning_rate": 1.5605381165919283e-05, "loss": 0.7414, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 349 }, { "epoch": 0.11757480959704383, "grad_norm": 0.48770326375961304, "learning_rate": 1.5650224215246637e-05, "loss": 0.7502, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 350 }, { "epoch": 0.11791073762446395, "grad_norm": 0.3837064206600189, "learning_rate": 1.569506726457399e-05, "loss": 0.7443, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 351 }, { "epoch": 0.11824666565188409, "grad_norm": 0.5008506774902344, "learning_rate": 1.573991031390135e-05, "loss": 0.7454, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 352 }, { "epoch": 0.11858259367930421, "grad_norm": 0.5485404133796692, "learning_rate": 1.57847533632287e-05, "loss": 0.769, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 353 }, { "epoch": 0.11891852170672433, "grad_norm": 0.5221803784370422, "learning_rate": 1.5829596412556054e-05, "loss": 0.7305, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 354 }, { "epoch": 0.11925444973414447, "grad_norm": 0.4037081003189087, "learning_rate": 1.587443946188341e-05, "loss": 0.7491, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 355 }, { "epoch": 0.11959037776156459, "grad_norm": 0.4075310528278351, "learning_rate": 1.5919282511210763e-05, "loss": 0.7336, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 356 }, { "epoch": 0.11992630578898471, "grad_norm": 0.4597092568874359, "learning_rate": 1.5964125560538117e-05, "loss": 0.7508, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 357 }, { "epoch": 0.12026223381640483, "grad_norm": 0.5157509446144104, "learning_rate": 1.600896860986547e-05, "loss": 0.7346, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 358 }, { "epoch": 0.12059816184382496, "grad_norm": 0.35816600918769836, "learning_rate": 1.6053811659192826e-05, "loss": 0.7243, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 359 }, { "epoch": 0.12093408987124508, "grad_norm": 0.3856704831123352, "learning_rate": 1.609865470852018e-05, "loss": 0.7283, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 360 }, { "epoch": 0.1212700178986652, "grad_norm": 0.4324001371860504, "learning_rate": 1.6143497757847534e-05, "loss": 0.7542, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 361 }, { "epoch": 0.12160594592608534, "grad_norm": 0.4222812056541443, "learning_rate": 1.618834080717489e-05, "loss": 0.7258, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 362 }, { "epoch": 0.12194187395350546, "grad_norm": 0.29570016264915466, "learning_rate": 1.6233183856502243e-05, "loss": 0.7313, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 363 }, { "epoch": 0.12227780198092558, "grad_norm": 0.3093317449092865, "learning_rate": 1.6278026905829597e-05, "loss": 0.72, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 364 }, { "epoch": 0.12261373000834572, "grad_norm": 0.4275853633880615, "learning_rate": 1.632286995515695e-05, "loss": 0.7401, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 365 }, { "epoch": 0.12294965803576584, "grad_norm": 0.35608187317848206, "learning_rate": 1.6367713004484306e-05, "loss": 0.7245, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 366 }, { "epoch": 0.12328558606318596, "grad_norm": 0.2896178364753723, "learning_rate": 1.641255605381166e-05, "loss": 0.7418, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 367 }, { "epoch": 0.1236215140906061, "grad_norm": 0.31735217571258545, "learning_rate": 1.6457399103139014e-05, "loss": 0.7367, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 368 }, { "epoch": 0.12395744211802621, "grad_norm": 0.36096370220184326, "learning_rate": 1.650224215246637e-05, "loss": 0.7402, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 369 }, { "epoch": 0.12429337014544634, "grad_norm": 0.3733314573764801, "learning_rate": 1.6547085201793723e-05, "loss": 0.7109, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 370 }, { "epoch": 0.12462929817286646, "grad_norm": 0.30303943157196045, "learning_rate": 1.6591928251121077e-05, "loss": 0.719, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 371 }, { "epoch": 0.12496522620028659, "grad_norm": 0.375674843788147, "learning_rate": 1.663677130044843e-05, "loss": 0.7346, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 372 }, { "epoch": 0.1253011542277067, "grad_norm": 0.43827179074287415, "learning_rate": 1.6681614349775786e-05, "loss": 0.7533, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 373 }, { "epoch": 0.12563708225512685, "grad_norm": 0.4023887813091278, "learning_rate": 1.672645739910314e-05, "loss": 0.7429, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 374 }, { "epoch": 0.12597301028254695, "grad_norm": 0.4471234679222107, "learning_rate": 1.6771300448430494e-05, "loss": 0.756, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 375 }, { "epoch": 0.1263089383099671, "grad_norm": 0.31126150488853455, "learning_rate": 1.681614349775785e-05, "loss": 0.7349, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 376 }, { "epoch": 0.12664486633738722, "grad_norm": 0.3177163898944855, "learning_rate": 1.6860986547085203e-05, "loss": 0.7267, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 377 }, { "epoch": 0.12698079436480733, "grad_norm": 0.3261823058128357, "learning_rate": 1.6905829596412557e-05, "loss": 0.7199, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 378 }, { "epoch": 0.12731672239222747, "grad_norm": 0.2811271548271179, "learning_rate": 1.695067264573991e-05, "loss": 0.7272, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 379 }, { "epoch": 0.1276526504196476, "grad_norm": 0.29284271597862244, "learning_rate": 1.6995515695067265e-05, "loss": 0.7376, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 380 }, { "epoch": 0.1279885784470677, "grad_norm": 0.35814058780670166, "learning_rate": 1.704035874439462e-05, "loss": 0.7236, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 381 }, { "epoch": 0.12832450647448784, "grad_norm": 0.4016336500644684, "learning_rate": 1.7085201793721974e-05, "loss": 0.7293, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 382 }, { "epoch": 0.12866043450190798, "grad_norm": 0.42085495591163635, "learning_rate": 1.7130044843049328e-05, "loss": 0.7369, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 383 }, { "epoch": 0.12899636252932808, "grad_norm": 0.3750099837779999, "learning_rate": 1.7174887892376683e-05, "loss": 0.7293, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 384 }, { "epoch": 0.12933229055674822, "grad_norm": 0.33130115270614624, "learning_rate": 1.7219730941704037e-05, "loss": 0.7258, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 385 }, { "epoch": 0.12966821858416835, "grad_norm": 0.32911717891693115, "learning_rate": 1.726457399103139e-05, "loss": 0.7425, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 386 }, { "epoch": 0.13000414661158846, "grad_norm": 0.36555182933807373, "learning_rate": 1.7309417040358745e-05, "loss": 0.7413, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 387 }, { "epoch": 0.1303400746390086, "grad_norm": 0.33170798420906067, "learning_rate": 1.73542600896861e-05, "loss": 0.7324, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 388 }, { "epoch": 0.13067600266642873, "grad_norm": 0.2833818793296814, "learning_rate": 1.7399103139013454e-05, "loss": 0.7345, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 389 }, { "epoch": 0.13101193069384884, "grad_norm": 0.30276745557785034, "learning_rate": 1.7443946188340808e-05, "loss": 0.7329, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 390 }, { "epoch": 0.13134785872126897, "grad_norm": 0.33039918541908264, "learning_rate": 1.7488789237668162e-05, "loss": 0.7474, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 391 }, { "epoch": 0.13168378674868908, "grad_norm": 0.27673622965812683, "learning_rate": 1.7533632286995517e-05, "loss": 0.7296, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 392 }, { "epoch": 0.13201971477610921, "grad_norm": 0.3336869776248932, "learning_rate": 1.757847533632287e-05, "loss": 0.7171, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 393 }, { "epoch": 0.13235564280352935, "grad_norm": 0.3779367506504059, "learning_rate": 1.7623318385650225e-05, "loss": 0.731, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 394 }, { "epoch": 0.13269157083094946, "grad_norm": 0.4090037941932678, "learning_rate": 1.766816143497758e-05, "loss": 0.723, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 395 }, { "epoch": 0.1330274988583696, "grad_norm": 0.36003103852272034, "learning_rate": 1.7713004484304934e-05, "loss": 0.7217, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 396 }, { "epoch": 0.13336342688578973, "grad_norm": 0.3721216022968292, "learning_rate": 1.7757847533632288e-05, "loss": 0.7247, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 397 }, { "epoch": 0.13369935491320983, "grad_norm": 0.43336623907089233, "learning_rate": 1.7802690582959642e-05, "loss": 0.7514, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 398 }, { "epoch": 0.13403528294062997, "grad_norm": 0.5095004439353943, "learning_rate": 1.7847533632286997e-05, "loss": 0.7489, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 399 }, { "epoch": 0.1343712109680501, "grad_norm": 0.40897345542907715, "learning_rate": 1.789237668161435e-05, "loss": 0.736, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 400 }, { "epoch": 0.1347071389954702, "grad_norm": 0.3651188611984253, "learning_rate": 1.7937219730941705e-05, "loss": 0.7035, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 401 }, { "epoch": 0.13504306702289035, "grad_norm": 0.4935242235660553, "learning_rate": 1.798206278026906e-05, "loss": 0.7381, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.33, "memory/max_allocated (GiB)": 64.33, "step": 402 }, { "epoch": 0.13537899505031048, "grad_norm": 0.46836796402931213, "learning_rate": 1.8026905829596414e-05, "loss": 0.7339, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 403 }, { "epoch": 0.1357149230777306, "grad_norm": 0.47353893518447876, "learning_rate": 1.8071748878923768e-05, "loss": 0.7319, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 404 }, { "epoch": 0.13605085110515072, "grad_norm": 0.47695696353912354, "learning_rate": 1.8116591928251122e-05, "loss": 0.7486, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 405 }, { "epoch": 0.13638677913257086, "grad_norm": 0.46619582176208496, "learning_rate": 1.8161434977578477e-05, "loss": 0.7418, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 406 }, { "epoch": 0.13672270715999096, "grad_norm": 0.4392988383769989, "learning_rate": 1.820627802690583e-05, "loss": 0.7446, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 407 }, { "epoch": 0.1370586351874111, "grad_norm": 0.49036988615989685, "learning_rate": 1.8251121076233185e-05, "loss": 0.7321, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 408 }, { "epoch": 0.13739456321483123, "grad_norm": 0.5554208755493164, "learning_rate": 1.829596412556054e-05, "loss": 0.7296, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 409 }, { "epoch": 0.13773049124225134, "grad_norm": 0.49399396777153015, "learning_rate": 1.8340807174887894e-05, "loss": 0.734, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 410 }, { "epoch": 0.13806641926967148, "grad_norm": 0.39668038487434387, "learning_rate": 1.8385650224215248e-05, "loss": 0.7485, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 411 }, { "epoch": 0.1384023472970916, "grad_norm": 0.3053132891654968, "learning_rate": 1.8430493273542602e-05, "loss": 0.7389, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 412 }, { "epoch": 0.13873827532451172, "grad_norm": 0.34494221210479736, "learning_rate": 1.8475336322869956e-05, "loss": 0.7489, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 413 }, { "epoch": 0.13907420335193185, "grad_norm": 0.5764032602310181, "learning_rate": 1.852017937219731e-05, "loss": 0.7388, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 414 }, { "epoch": 0.139410131379352, "grad_norm": 0.7140698432922363, "learning_rate": 1.8565022421524665e-05, "loss": 0.7239, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 415 }, { "epoch": 0.1397460594067721, "grad_norm": 0.8699458241462708, "learning_rate": 1.860986547085202e-05, "loss": 0.7261, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 416 }, { "epoch": 0.14008198743419223, "grad_norm": 0.9290391802787781, "learning_rate": 1.8654708520179373e-05, "loss": 0.7291, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 417 }, { "epoch": 0.14041791546161234, "grad_norm": 0.6528632640838623, "learning_rate": 1.8699551569506728e-05, "loss": 0.7329, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 418 }, { "epoch": 0.14075384348903247, "grad_norm": 0.5138803720474243, "learning_rate": 1.8744394618834082e-05, "loss": 0.7246, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 419 }, { "epoch": 0.1410897715164526, "grad_norm": 0.5890071392059326, "learning_rate": 1.8789237668161436e-05, "loss": 0.7185, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 420 }, { "epoch": 0.1414256995438727, "grad_norm": 0.3960420787334442, "learning_rate": 1.883408071748879e-05, "loss": 0.7409, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 421 }, { "epoch": 0.14176162757129285, "grad_norm": 0.46508699655532837, "learning_rate": 1.8878923766816145e-05, "loss": 0.7421, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 422 }, { "epoch": 0.14209755559871298, "grad_norm": 0.4286838173866272, "learning_rate": 1.89237668161435e-05, "loss": 0.7453, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 423 }, { "epoch": 0.1424334836261331, "grad_norm": 0.3711211085319519, "learning_rate": 1.8968609865470853e-05, "loss": 0.7006, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 424 }, { "epoch": 0.14276941165355322, "grad_norm": 0.4176449477672577, "learning_rate": 1.9013452914798208e-05, "loss": 0.7204, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 425 }, { "epoch": 0.14310533968097336, "grad_norm": 0.37529975175857544, "learning_rate": 1.9058295964125562e-05, "loss": 0.7384, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 426 }, { "epoch": 0.14344126770839347, "grad_norm": 0.33932608366012573, "learning_rate": 1.9103139013452916e-05, "loss": 0.7142, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 427 }, { "epoch": 0.1437771957358136, "grad_norm": 0.32268038392066956, "learning_rate": 1.914798206278027e-05, "loss": 0.7352, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 428 }, { "epoch": 0.14411312376323374, "grad_norm": 0.40566569566726685, "learning_rate": 1.9192825112107625e-05, "loss": 0.7486, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 429 }, { "epoch": 0.14444905179065384, "grad_norm": 0.3235589563846588, "learning_rate": 1.923766816143498e-05, "loss": 0.7134, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 430 }, { "epoch": 0.14478497981807398, "grad_norm": 0.3987158238887787, "learning_rate": 1.9282511210762333e-05, "loss": 0.7328, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 431 }, { "epoch": 0.1451209078454941, "grad_norm": 0.4701874256134033, "learning_rate": 1.9327354260089688e-05, "loss": 0.7309, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 432 }, { "epoch": 0.14545683587291422, "grad_norm": 0.3964593708515167, "learning_rate": 1.9372197309417042e-05, "loss": 0.7177, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 433 }, { "epoch": 0.14579276390033435, "grad_norm": 0.3896920382976532, "learning_rate": 1.9417040358744396e-05, "loss": 0.7187, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 434 }, { "epoch": 0.1461286919277545, "grad_norm": 0.3533352315425873, "learning_rate": 1.946188340807175e-05, "loss": 0.7357, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 435 }, { "epoch": 0.1464646199551746, "grad_norm": 0.43628570437431335, "learning_rate": 1.9506726457399105e-05, "loss": 0.7401, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 436 }, { "epoch": 0.14680054798259473, "grad_norm": 0.41418662667274475, "learning_rate": 1.955156950672646e-05, "loss": 0.7202, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 437 }, { "epoch": 0.14713647601001487, "grad_norm": 0.2758164405822754, "learning_rate": 1.9596412556053813e-05, "loss": 0.72, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 438 }, { "epoch": 0.14747240403743497, "grad_norm": 0.3343847990036011, "learning_rate": 1.9641255605381167e-05, "loss": 0.7161, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 439 }, { "epoch": 0.1478083320648551, "grad_norm": 0.4442453980445862, "learning_rate": 1.9686098654708522e-05, "loss": 0.7325, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 440 }, { "epoch": 0.14814426009227524, "grad_norm": 0.3318524956703186, "learning_rate": 1.9730941704035876e-05, "loss": 0.7207, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 441 }, { "epoch": 0.14848018811969535, "grad_norm": 0.3878946900367737, "learning_rate": 1.977578475336323e-05, "loss": 0.73, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 442 }, { "epoch": 0.14881611614711548, "grad_norm": 0.3518621623516083, "learning_rate": 1.9820627802690585e-05, "loss": 0.7413, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 443 }, { "epoch": 0.14915204417453562, "grad_norm": 0.39418190717697144, "learning_rate": 1.986547085201794e-05, "loss": 0.7301, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 444 }, { "epoch": 0.14948797220195573, "grad_norm": 0.38835054636001587, "learning_rate": 1.9910313901345293e-05, "loss": 0.7184, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 445 }, { "epoch": 0.14982390022937586, "grad_norm": 0.3121154010295868, "learning_rate": 1.9955156950672647e-05, "loss": 0.7238, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 446 }, { "epoch": 0.15015982825679597, "grad_norm": 0.35748159885406494, "learning_rate": 2e-05, "loss": 0.7263, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 447 }, { "epoch": 0.1504957562842161, "grad_norm": 0.38774681091308594, "learning_rate": 1.999999931456536e-05, "loss": 0.7418, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 448 }, { "epoch": 0.15083168431163624, "grad_norm": 0.2831073999404907, "learning_rate": 1.9999997258261518e-05, "loss": 0.7412, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 449 }, { "epoch": 0.15116761233905635, "grad_norm": 0.3552665710449219, "learning_rate": 1.9999993831088767e-05, "loss": 0.7125, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 450 }, { "epoch": 0.15150354036647648, "grad_norm": 0.29511550068855286, "learning_rate": 1.999998903304757e-05, "loss": 0.7339, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 451 }, { "epoch": 0.15183946839389661, "grad_norm": 0.3137618899345398, "learning_rate": 1.999998286413859e-05, "loss": 0.716, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 452 }, { "epoch": 0.15217539642131672, "grad_norm": 0.3489477336406708, "learning_rate": 1.999997532436267e-05, "loss": 0.7202, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 453 }, { "epoch": 0.15251132444873686, "grad_norm": 0.3613846004009247, "learning_rate": 1.9999966413720844e-05, "loss": 0.739, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 454 }, { "epoch": 0.152847252476157, "grad_norm": 0.38756445050239563, "learning_rate": 1.9999956132214336e-05, "loss": 0.7519, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 455 }, { "epoch": 0.1531831805035771, "grad_norm": 0.4111584424972534, "learning_rate": 1.999994447984455e-05, "loss": 0.7167, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 456 }, { "epoch": 0.15351910853099723, "grad_norm": 0.32919639348983765, "learning_rate": 1.999993145661309e-05, "loss": 0.7267, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 457 }, { "epoch": 0.15385503655841737, "grad_norm": 0.25709068775177, "learning_rate": 1.9999917062521736e-05, "loss": 0.7298, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 458 }, { "epoch": 0.15419096458583748, "grad_norm": 0.33127444982528687, "learning_rate": 1.999990129757246e-05, "loss": 0.73, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 459 }, { "epoch": 0.1545268926132576, "grad_norm": 0.45778465270996094, "learning_rate": 1.9999884161767427e-05, "loss": 0.7191, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 460 }, { "epoch": 0.15486282064067775, "grad_norm": 0.38967353105545044, "learning_rate": 1.999986565510899e-05, "loss": 0.7298, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 461 }, { "epoch": 0.15519874866809785, "grad_norm": 0.3549347221851349, "learning_rate": 1.9999845777599675e-05, "loss": 0.7254, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 462 }, { "epoch": 0.155534676695518, "grad_norm": 0.3497800827026367, "learning_rate": 1.9999824529242215e-05, "loss": 0.7238, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 463 }, { "epoch": 0.15587060472293812, "grad_norm": 0.3459688425064087, "learning_rate": 1.9999801910039524e-05, "loss": 0.7212, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 464 }, { "epoch": 0.15620653275035823, "grad_norm": 0.3101448118686676, "learning_rate": 1.9999777919994697e-05, "loss": 0.7399, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 465 }, { "epoch": 0.15654246077777836, "grad_norm": 0.3298377990722656, "learning_rate": 1.999975255911103e-05, "loss": 0.7376, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 466 }, { "epoch": 0.1568783888051985, "grad_norm": 0.3561359643936157, "learning_rate": 1.999972582739199e-05, "loss": 0.7184, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 467 }, { "epoch": 0.1572143168326186, "grad_norm": 0.26435932517051697, "learning_rate": 1.9999697724841248e-05, "loss": 0.7353, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 468 }, { "epoch": 0.15755024486003874, "grad_norm": 0.3041613698005676, "learning_rate": 1.9999668251462652e-05, "loss": 0.7177, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 469 }, { "epoch": 0.15788617288745888, "grad_norm": 0.3483664095401764, "learning_rate": 1.9999637407260253e-05, "loss": 0.7222, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 470 }, { "epoch": 0.15822210091487898, "grad_norm": 0.3272634744644165, "learning_rate": 1.999960519223827e-05, "loss": 0.7239, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 471 }, { "epoch": 0.15855802894229912, "grad_norm": 0.35206225514411926, "learning_rate": 1.9999571606401117e-05, "loss": 0.7222, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 472 }, { "epoch": 0.15889395696971922, "grad_norm": 0.3038778007030487, "learning_rate": 1.9999536649753406e-05, "loss": 0.7362, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 473 }, { "epoch": 0.15922988499713936, "grad_norm": 0.3074759840965271, "learning_rate": 1.9999500322299927e-05, "loss": 0.7427, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 474 }, { "epoch": 0.1595658130245595, "grad_norm": 0.29791179299354553, "learning_rate": 1.9999462624045652e-05, "loss": 0.6886, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 475 }, { "epoch": 0.1599017410519796, "grad_norm": 0.336703360080719, "learning_rate": 1.9999423554995763e-05, "loss": 0.7215, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 476 }, { "epoch": 0.16023766907939974, "grad_norm": 0.2917664647102356, "learning_rate": 1.9999383115155607e-05, "loss": 0.7108, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 477 }, { "epoch": 0.16057359710681987, "grad_norm": 0.29729849100112915, "learning_rate": 1.9999341304530725e-05, "loss": 0.7242, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 478 }, { "epoch": 0.16090952513423998, "grad_norm": 0.2855307459831238, "learning_rate": 1.9999298123126855e-05, "loss": 0.723, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 479 }, { "epoch": 0.1612454531616601, "grad_norm": 0.3037993907928467, "learning_rate": 1.9999253570949915e-05, "loss": 0.7508, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 480 }, { "epoch": 0.16158138118908025, "grad_norm": 0.2842923402786255, "learning_rate": 1.9999207648006014e-05, "loss": 0.7294, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 481 }, { "epoch": 0.16191730921650035, "grad_norm": 0.22936661541461945, "learning_rate": 1.9999160354301444e-05, "loss": 0.726, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 482 }, { "epoch": 0.1622532372439205, "grad_norm": 0.2882472574710846, "learning_rate": 1.999911168984269e-05, "loss": 0.6967, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 483 }, { "epoch": 0.16258916527134062, "grad_norm": 0.27045097947120667, "learning_rate": 1.9999061654636423e-05, "loss": 0.7179, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 484 }, { "epoch": 0.16292509329876073, "grad_norm": 0.28623464703559875, "learning_rate": 1.99990102486895e-05, "loss": 0.714, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 485 }, { "epoch": 0.16326102132618087, "grad_norm": 0.2980729937553406, "learning_rate": 1.9998957472008973e-05, "loss": 0.7251, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 486 }, { "epoch": 0.163596949353601, "grad_norm": 0.253939151763916, "learning_rate": 1.999890332460207e-05, "loss": 0.7251, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 487 }, { "epoch": 0.1639328773810211, "grad_norm": 0.3057451844215393, "learning_rate": 1.9998847806476225e-05, "loss": 0.7065, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 488 }, { "epoch": 0.16426880540844124, "grad_norm": 0.29479166865348816, "learning_rate": 1.9998790917639037e-05, "loss": 0.7336, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 489 }, { "epoch": 0.16460473343586138, "grad_norm": 0.27997326850891113, "learning_rate": 1.999873265809831e-05, "loss": 0.7329, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 490 }, { "epoch": 0.16494066146328148, "grad_norm": 0.2989906966686249, "learning_rate": 1.9998673027862032e-05, "loss": 0.7181, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 491 }, { "epoch": 0.16527658949070162, "grad_norm": 0.3235163390636444, "learning_rate": 1.9998612026938377e-05, "loss": 0.738, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 492 }, { "epoch": 0.16561251751812175, "grad_norm": 0.3035833537578583, "learning_rate": 1.9998549655335702e-05, "loss": 0.7348, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 493 }, { "epoch": 0.16594844554554186, "grad_norm": 0.3057865500450134, "learning_rate": 1.9998485913062568e-05, "loss": 0.7302, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 494 }, { "epoch": 0.166284373572962, "grad_norm": 0.32259151339530945, "learning_rate": 1.99984208001277e-05, "loss": 0.722, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 495 }, { "epoch": 0.16662030160038213, "grad_norm": 0.23317746818065643, "learning_rate": 1.9998354316540033e-05, "loss": 0.7098, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 496 }, { "epoch": 0.16695622962780224, "grad_norm": 0.3072696030139923, "learning_rate": 1.999828646230868e-05, "loss": 0.7467, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 497 }, { "epoch": 0.16729215765522237, "grad_norm": 0.32270392775535583, "learning_rate": 1.9998217237442943e-05, "loss": 0.7144, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 498 }, { "epoch": 0.16762808568264248, "grad_norm": 0.2438098043203354, "learning_rate": 1.9998146641952308e-05, "loss": 0.7253, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 499 }, { "epoch": 0.16796401371006262, "grad_norm": 0.2466651350259781, "learning_rate": 1.999807467584646e-05, "loss": 0.7125, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 500 }, { "epoch": 0.16829994173748275, "grad_norm": 0.2679581046104431, "learning_rate": 1.9998001339135258e-05, "loss": 0.734, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 501 }, { "epoch": 0.16863586976490286, "grad_norm": 0.3029324412345886, "learning_rate": 1.999792663182876e-05, "loss": 0.724, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 502 }, { "epoch": 0.168971797792323, "grad_norm": 0.26291990280151367, "learning_rate": 1.9997850553937196e-05, "loss": 0.7368, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 503 }, { "epoch": 0.16930772581974313, "grad_norm": 0.2831627428531647, "learning_rate": 1.999777310547101e-05, "loss": 0.7229, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 504 }, { "epoch": 0.16964365384716323, "grad_norm": 0.3265828788280487, "learning_rate": 1.9997694286440814e-05, "loss": 0.7202, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 505 }, { "epoch": 0.16997958187458337, "grad_norm": 0.2547825872898102, "learning_rate": 1.9997614096857413e-05, "loss": 0.7171, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 506 }, { "epoch": 0.1703155099020035, "grad_norm": 0.3246128261089325, "learning_rate": 1.9997532536731802e-05, "loss": 0.7284, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 507 }, { "epoch": 0.1706514379294236, "grad_norm": 0.3258628249168396, "learning_rate": 1.9997449606075154e-05, "loss": 0.7166, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 508 }, { "epoch": 0.17098736595684375, "grad_norm": 0.3618040382862091, "learning_rate": 1.9997365304898847e-05, "loss": 0.7295, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 509 }, { "epoch": 0.17132329398426388, "grad_norm": 0.3233949542045593, "learning_rate": 1.9997279633214426e-05, "loss": 0.7214, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 510 }, { "epoch": 0.171659222011684, "grad_norm": 0.26061853766441345, "learning_rate": 1.999719259103365e-05, "loss": 0.7349, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 511 }, { "epoch": 0.17199515003910412, "grad_norm": 0.3738609552383423, "learning_rate": 1.9997104178368445e-05, "loss": 0.7247, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 512 }, { "epoch": 0.17233107806652426, "grad_norm": 0.3553234040737152, "learning_rate": 1.999701439523093e-05, "loss": 0.7368, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 513 }, { "epoch": 0.17266700609394436, "grad_norm": 0.32908791303634644, "learning_rate": 1.9996923241633412e-05, "loss": 0.7218, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 514 }, { "epoch": 0.1730029341213645, "grad_norm": 0.28924527764320374, "learning_rate": 1.9996830717588386e-05, "loss": 0.726, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 515 }, { "epoch": 0.17333886214878463, "grad_norm": 0.2767105996608734, "learning_rate": 1.9996736823108542e-05, "loss": 0.7283, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 516 }, { "epoch": 0.17367479017620474, "grad_norm": 0.26268863677978516, "learning_rate": 1.999664155820675e-05, "loss": 0.7123, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 517 }, { "epoch": 0.17401071820362488, "grad_norm": 0.2727372348308563, "learning_rate": 1.9996544922896065e-05, "loss": 0.7034, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 518 }, { "epoch": 0.174346646231045, "grad_norm": 0.260593444108963, "learning_rate": 1.9996446917189738e-05, "loss": 0.7258, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 519 }, { "epoch": 0.17468257425846512, "grad_norm": 0.2931241989135742, "learning_rate": 1.9996347541101203e-05, "loss": 0.7118, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 520 }, { "epoch": 0.17501850228588525, "grad_norm": 0.34496572613716125, "learning_rate": 1.9996246794644084e-05, "loss": 0.706, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 521 }, { "epoch": 0.1753544303133054, "grad_norm": 0.3949694037437439, "learning_rate": 1.9996144677832194e-05, "loss": 0.7344, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 522 }, { "epoch": 0.1756903583407255, "grad_norm": 0.3518350422382355, "learning_rate": 1.9996041190679526e-05, "loss": 0.7243, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 523 }, { "epoch": 0.17602628636814563, "grad_norm": 0.294494092464447, "learning_rate": 1.9995936333200272e-05, "loss": 0.7026, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 524 }, { "epoch": 0.17636221439556574, "grad_norm": 0.2833363711833954, "learning_rate": 1.9995830105408803e-05, "loss": 0.7202, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 525 }, { "epoch": 0.17669814242298587, "grad_norm": 0.2603585720062256, "learning_rate": 1.9995722507319687e-05, "loss": 0.7144, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 526 }, { "epoch": 0.177034070450406, "grad_norm": 0.28672003746032715, "learning_rate": 1.999561353894767e-05, "loss": 0.7392, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 527 }, { "epoch": 0.1773699984778261, "grad_norm": 0.27522560954093933, "learning_rate": 1.999550320030769e-05, "loss": 0.7178, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 528 }, { "epoch": 0.17770592650524625, "grad_norm": 0.29231297969818115, "learning_rate": 1.9995391491414872e-05, "loss": 0.7038, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 529 }, { "epoch": 0.17804185453266638, "grad_norm": 0.2779041826725006, "learning_rate": 1.9995278412284533e-05, "loss": 0.7236, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 530 }, { "epoch": 0.1783777825600865, "grad_norm": 0.3222300410270691, "learning_rate": 1.9995163962932174e-05, "loss": 0.7403, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 531 }, { "epoch": 0.17871371058750662, "grad_norm": 0.23828525841236115, "learning_rate": 1.9995048143373485e-05, "loss": 0.7365, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 532 }, { "epoch": 0.17904963861492676, "grad_norm": 0.38513311743736267, "learning_rate": 1.999493095362434e-05, "loss": 0.7219, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 533 }, { "epoch": 0.17938556664234687, "grad_norm": 0.26188743114471436, "learning_rate": 1.9994812393700805e-05, "loss": 0.7121, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 534 }, { "epoch": 0.179721494669767, "grad_norm": 0.31856611371040344, "learning_rate": 1.9994692463619135e-05, "loss": 0.7309, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 535 }, { "epoch": 0.18005742269718714, "grad_norm": 0.304776132106781, "learning_rate": 1.999457116339577e-05, "loss": 0.7403, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 536 }, { "epoch": 0.18039335072460724, "grad_norm": 0.3158477246761322, "learning_rate": 1.999444849304734e-05, "loss": 0.7319, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 537 }, { "epoch": 0.18072927875202738, "grad_norm": 0.28986498713493347, "learning_rate": 1.999432445259066e-05, "loss": 0.7083, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 538 }, { "epoch": 0.1810652067794475, "grad_norm": 0.31781911849975586, "learning_rate": 1.9994199042042734e-05, "loss": 0.7379, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 539 }, { "epoch": 0.18140113480686762, "grad_norm": 0.3165777027606964, "learning_rate": 1.9994072261420757e-05, "loss": 0.7175, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 540 }, { "epoch": 0.18173706283428775, "grad_norm": 0.23072701692581177, "learning_rate": 1.9993944110742106e-05, "loss": 0.7041, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 541 }, { "epoch": 0.1820729908617079, "grad_norm": 0.29177820682525635, "learning_rate": 1.999381459002435e-05, "loss": 0.7005, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 542 }, { "epoch": 0.182408918889128, "grad_norm": 0.32971999049186707, "learning_rate": 1.9993683699285246e-05, "loss": 0.6745, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 543 }, { "epoch": 0.18274484691654813, "grad_norm": 0.22389306128025055, "learning_rate": 1.999355143854273e-05, "loss": 0.7129, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 544 }, { "epoch": 0.18308077494396827, "grad_norm": 0.2565278708934784, "learning_rate": 1.999341780781494e-05, "loss": 0.7191, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 545 }, { "epoch": 0.18341670297138837, "grad_norm": 0.25061213970184326, "learning_rate": 1.9993282807120197e-05, "loss": 0.7092, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 546 }, { "epoch": 0.1837526309988085, "grad_norm": 0.23322808742523193, "learning_rate": 1.9993146436477005e-05, "loss": 0.7127, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 547 }, { "epoch": 0.18408855902622864, "grad_norm": 0.2510436773300171, "learning_rate": 1.9993008695904053e-05, "loss": 0.7385, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 548 }, { "epoch": 0.18442448705364875, "grad_norm": 0.26053091883659363, "learning_rate": 1.9992869585420233e-05, "loss": 0.7107, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 549 }, { "epoch": 0.18476041508106889, "grad_norm": 0.2515242397785187, "learning_rate": 1.9992729105044613e-05, "loss": 0.7322, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 550 }, { "epoch": 0.18509634310848902, "grad_norm": 0.2984708249568939, "learning_rate": 1.999258725479645e-05, "loss": 0.718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 551 }, { "epoch": 0.18543227113590913, "grad_norm": 0.24227239191532135, "learning_rate": 1.999244403469518e-05, "loss": 0.7104, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 552 }, { "epoch": 0.18576819916332926, "grad_norm": 0.2649383246898651, "learning_rate": 1.9992299444760452e-05, "loss": 0.729, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 553 }, { "epoch": 0.18610412719074937, "grad_norm": 0.28723978996276855, "learning_rate": 1.999215348501208e-05, "loss": 0.7193, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 554 }, { "epoch": 0.1864400552181695, "grad_norm": 0.27749767899513245, "learning_rate": 1.9992006155470073e-05, "loss": 0.7281, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 555 }, { "epoch": 0.18677598324558964, "grad_norm": 0.250492662191391, "learning_rate": 1.9991857456154628e-05, "loss": 0.6972, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 556 }, { "epoch": 0.18711191127300975, "grad_norm": 0.2487351894378662, "learning_rate": 1.9991707387086135e-05, "loss": 0.7014, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 557 }, { "epoch": 0.18744783930042988, "grad_norm": 0.270300954580307, "learning_rate": 1.999155594828516e-05, "loss": 0.7517, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 558 }, { "epoch": 0.18778376732785002, "grad_norm": 0.2922115921974182, "learning_rate": 1.9991403139772462e-05, "loss": 0.72, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 559 }, { "epoch": 0.18811969535527012, "grad_norm": 0.22579067945480347, "learning_rate": 1.9991248961568994e-05, "loss": 0.7226, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 560 }, { "epoch": 0.18845562338269026, "grad_norm": 0.27574554085731506, "learning_rate": 1.9991093413695892e-05, "loss": 0.6914, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 561 }, { "epoch": 0.1887915514101104, "grad_norm": 0.3169195055961609, "learning_rate": 1.999093649617448e-05, "loss": 0.7017, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 562 }, { "epoch": 0.1891274794375305, "grad_norm": 0.2539238929748535, "learning_rate": 1.9990778209026262e-05, "loss": 0.7155, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 563 }, { "epoch": 0.18946340746495063, "grad_norm": 0.27041199803352356, "learning_rate": 1.9990618552272946e-05, "loss": 0.7262, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 564 }, { "epoch": 0.18979933549237077, "grad_norm": 0.24458210170269012, "learning_rate": 1.999045752593641e-05, "loss": 0.709, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 565 }, { "epoch": 0.19013526351979088, "grad_norm": 0.31953996419906616, "learning_rate": 1.9990295130038738e-05, "loss": 0.7302, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 566 }, { "epoch": 0.190471191547211, "grad_norm": 0.2709726393222809, "learning_rate": 1.999013136460219e-05, "loss": 0.731, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 567 }, { "epoch": 0.19080711957463115, "grad_norm": 0.2620944082736969, "learning_rate": 1.9989966229649213e-05, "loss": 0.6823, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 568 }, { "epoch": 0.19114304760205125, "grad_norm": 0.2631066143512726, "learning_rate": 1.9989799725202444e-05, "loss": 0.7397, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 569 }, { "epoch": 0.1914789756294714, "grad_norm": 0.2912420928478241, "learning_rate": 1.998963185128471e-05, "loss": 0.7235, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 570 }, { "epoch": 0.19181490365689152, "grad_norm": 0.3184170126914978, "learning_rate": 1.998946260791903e-05, "loss": 0.7192, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 571 }, { "epoch": 0.19215083168431163, "grad_norm": 0.311979740858078, "learning_rate": 1.9989291995128598e-05, "loss": 0.736, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 572 }, { "epoch": 0.19248675971173176, "grad_norm": 0.27513575553894043, "learning_rate": 1.99891200129368e-05, "loss": 0.7035, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 573 }, { "epoch": 0.1928226877391519, "grad_norm": 0.28691285848617554, "learning_rate": 1.9988946661367222e-05, "loss": 0.6977, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.36, "memory/max_allocated (GiB)": 63.36, "step": 574 }, { "epoch": 0.193158615766572, "grad_norm": 0.2906058132648468, "learning_rate": 1.9988771940443624e-05, "loss": 0.7226, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 575 }, { "epoch": 0.19349454379399214, "grad_norm": 0.32878226041793823, "learning_rate": 1.9988595850189954e-05, "loss": 0.7056, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 576 }, { "epoch": 0.19383047182141228, "grad_norm": 0.3280046284198761, "learning_rate": 1.998841839063036e-05, "loss": 0.7266, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 577 }, { "epoch": 0.19416639984883238, "grad_norm": 0.2796948552131653, "learning_rate": 1.9988239561789164e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 578 }, { "epoch": 0.19450232787625252, "grad_norm": 0.3493218421936035, "learning_rate": 1.9988059363690876e-05, "loss": 0.7205, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 579 }, { "epoch": 0.19483825590367262, "grad_norm": 0.328278511762619, "learning_rate": 1.998787779636021e-05, "loss": 0.7197, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 580 }, { "epoch": 0.19517418393109276, "grad_norm": 0.23837560415267944, "learning_rate": 1.9987694859822052e-05, "loss": 0.7289, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 581 }, { "epoch": 0.1955101119585129, "grad_norm": 0.3295779526233673, "learning_rate": 1.998751055410147e-05, "loss": 0.6985, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 582 }, { "epoch": 0.195846039985933, "grad_norm": 0.3139457404613495, "learning_rate": 1.998732487922375e-05, "loss": 0.7058, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 583 }, { "epoch": 0.19618196801335314, "grad_norm": 0.2812606990337372, "learning_rate": 1.998713783521433e-05, "loss": 0.7321, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 584 }, { "epoch": 0.19651789604077327, "grad_norm": 0.3028532862663269, "learning_rate": 1.9986949422098856e-05, "loss": 0.714, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 585 }, { "epoch": 0.19685382406819338, "grad_norm": 0.2679670453071594, "learning_rate": 1.9986759639903157e-05, "loss": 0.7224, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 586 }, { "epoch": 0.1971897520956135, "grad_norm": 0.29672771692276, "learning_rate": 1.998656848865325e-05, "loss": 0.7045, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 587 }, { "epoch": 0.19752568012303365, "grad_norm": 0.30539610981941223, "learning_rate": 1.9986375968375337e-05, "loss": 0.7238, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 588 }, { "epoch": 0.19786160815045375, "grad_norm": 0.24463790655136108, "learning_rate": 1.9986182079095817e-05, "loss": 0.711, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 589 }, { "epoch": 0.1981975361778739, "grad_norm": 0.29681357741355896, "learning_rate": 1.9985986820841262e-05, "loss": 0.7184, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 590 }, { "epoch": 0.19853346420529402, "grad_norm": 0.28029459714889526, "learning_rate": 1.998579019363844e-05, "loss": 0.706, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 591 }, { "epoch": 0.19886939223271413, "grad_norm": 0.26092830300331116, "learning_rate": 1.998559219751431e-05, "loss": 0.7092, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 592 }, { "epoch": 0.19920532026013427, "grad_norm": 0.2481130212545395, "learning_rate": 1.9985392832496014e-05, "loss": 0.7153, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 593 }, { "epoch": 0.1995412482875544, "grad_norm": 0.21734526753425598, "learning_rate": 1.9985192098610876e-05, "loss": 0.7, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 594 }, { "epoch": 0.1998771763149745, "grad_norm": 0.23615452647209167, "learning_rate": 1.9984989995886424e-05, "loss": 0.7163, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 595 }, { "epoch": 0.20021310434239464, "grad_norm": 0.2578044831752777, "learning_rate": 1.9984786524350356e-05, "loss": 0.7222, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 596 }, { "epoch": 0.20054903236981478, "grad_norm": 0.2530650496482849, "learning_rate": 1.9984581684030567e-05, "loss": 0.7168, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 597 }, { "epoch": 0.20088496039723489, "grad_norm": 0.22140806913375854, "learning_rate": 1.9984375474955144e-05, "loss": 0.7127, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 598 }, { "epoch": 0.20122088842465502, "grad_norm": 0.2527979016304016, "learning_rate": 1.9984167897152348e-05, "loss": 0.734, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 599 }, { "epoch": 0.20155681645207515, "grad_norm": 0.2699756324291229, "learning_rate": 1.9983958950650636e-05, "loss": 0.71, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 600 }, { "epoch": 0.20189274447949526, "grad_norm": 0.26130616664886475, "learning_rate": 1.9983748635478653e-05, "loss": 0.7213, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 601 }, { "epoch": 0.2022286725069154, "grad_norm": 0.220911905169487, "learning_rate": 1.9983536951665234e-05, "loss": 0.7307, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 602 }, { "epoch": 0.20256460053433553, "grad_norm": 0.24936221539974213, "learning_rate": 1.9983323899239395e-05, "loss": 0.7091, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 603 }, { "epoch": 0.20290052856175564, "grad_norm": 0.278812438249588, "learning_rate": 1.998310947823034e-05, "loss": 0.729, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 604 }, { "epoch": 0.20323645658917577, "grad_norm": 0.23632901906967163, "learning_rate": 1.998289368866747e-05, "loss": 0.6986, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 605 }, { "epoch": 0.20357238461659588, "grad_norm": 0.2952718436717987, "learning_rate": 1.998267653058036e-05, "loss": 0.7069, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 606 }, { "epoch": 0.20390831264401602, "grad_norm": 0.2594706118106842, "learning_rate": 1.9982458003998784e-05, "loss": 0.716, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 607 }, { "epoch": 0.20424424067143615, "grad_norm": 0.2591499388217926, "learning_rate": 1.9982238108952697e-05, "loss": 0.7126, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 608 }, { "epoch": 0.20458016869885626, "grad_norm": 0.21990391612052917, "learning_rate": 1.998201684547224e-05, "loss": 0.7112, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 609 }, { "epoch": 0.2049160967262764, "grad_norm": 0.2563396394252777, "learning_rate": 1.9981794213587756e-05, "loss": 0.7007, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 610 }, { "epoch": 0.20525202475369653, "grad_norm": 0.25910407304763794, "learning_rate": 1.9981570213329757e-05, "loss": 0.7157, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 611 }, { "epoch": 0.20558795278111663, "grad_norm": 0.26716098189353943, "learning_rate": 1.998134484472895e-05, "loss": 0.7152, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 612 }, { "epoch": 0.20592388080853677, "grad_norm": 0.2045973390340805, "learning_rate": 1.9981118107816235e-05, "loss": 0.7325, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 613 }, { "epoch": 0.2062598088359569, "grad_norm": 0.2199867218732834, "learning_rate": 1.998089000262269e-05, "loss": 0.7089, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 614 }, { "epoch": 0.206595736863377, "grad_norm": 0.2266997992992401, "learning_rate": 1.9980660529179592e-05, "loss": 0.6885, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 615 }, { "epoch": 0.20693166489079715, "grad_norm": 0.2260930985212326, "learning_rate": 1.9980429687518387e-05, "loss": 0.7209, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 616 }, { "epoch": 0.20726759291821728, "grad_norm": 0.2586844563484192, "learning_rate": 1.9980197477670733e-05, "loss": 0.7402, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 617 }, { "epoch": 0.2076035209456374, "grad_norm": 0.3053637444972992, "learning_rate": 1.9979963899668456e-05, "loss": 0.7101, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 618 }, { "epoch": 0.20793944897305752, "grad_norm": 0.2510787844657898, "learning_rate": 1.9979728953543577e-05, "loss": 0.6986, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 619 }, { "epoch": 0.20827537700047766, "grad_norm": 0.2602149546146393, "learning_rate": 1.99794926393283e-05, "loss": 0.721, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 620 }, { "epoch": 0.20861130502789776, "grad_norm": 0.6731862425804138, "learning_rate": 1.9979254957055034e-05, "loss": 0.6992, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 621 }, { "epoch": 0.2089472330553179, "grad_norm": 1.436149001121521, "learning_rate": 1.997901590675635e-05, "loss": 0.7194, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 622 }, { "epoch": 0.20928316108273803, "grad_norm": 0.6975224614143372, "learning_rate": 1.997877548846502e-05, "loss": 0.7378, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 623 }, { "epoch": 0.20961908911015814, "grad_norm": 0.8406168222427368, "learning_rate": 1.9978533702214005e-05, "loss": 0.7323, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 624 }, { "epoch": 0.20995501713757828, "grad_norm": 0.7829836010932922, "learning_rate": 1.9978290548036454e-05, "loss": 0.7147, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 625 }, { "epoch": 0.2102909451649984, "grad_norm": 1.0757533311843872, "learning_rate": 1.9978046025965693e-05, "loss": 0.7406, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.88, "memory/max_allocated (GiB)": 64.88, "step": 626 }, { "epoch": 0.21062687319241852, "grad_norm": 0.8185653686523438, "learning_rate": 1.9977800136035247e-05, "loss": 0.735, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 627 }, { "epoch": 0.21096280121983865, "grad_norm": 1.0882409811019897, "learning_rate": 1.9977552878278822e-05, "loss": 0.7309, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 628 }, { "epoch": 0.2112987292472588, "grad_norm": 0.47995564341545105, "learning_rate": 1.997730425273032e-05, "loss": 0.7403, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 629 }, { "epoch": 0.2116346572746789, "grad_norm": 0.7009016275405884, "learning_rate": 1.9977054259423813e-05, "loss": 0.7149, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 630 }, { "epoch": 0.21197058530209903, "grad_norm": 0.5795793533325195, "learning_rate": 1.9976802898393583e-05, "loss": 0.7153, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 631 }, { "epoch": 0.21230651332951916, "grad_norm": 0.5094826221466064, "learning_rate": 1.9976550169674083e-05, "loss": 0.7249, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 632 }, { "epoch": 0.21264244135693927, "grad_norm": 0.5086947083473206, "learning_rate": 1.997629607329996e-05, "loss": 0.716, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.91, "memory/max_allocated (GiB)": 63.91, "step": 633 }, { "epoch": 0.2129783693843594, "grad_norm": 0.4335854947566986, "learning_rate": 1.9976040609306045e-05, "loss": 0.7082, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 634 }, { "epoch": 0.2133142974117795, "grad_norm": 0.42688247561454773, "learning_rate": 1.9975783777727363e-05, "loss": 0.738, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 635 }, { "epoch": 0.21365022543919965, "grad_norm": 0.43436095118522644, "learning_rate": 1.9975525578599116e-05, "loss": 0.7484, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 636 }, { "epoch": 0.21398615346661978, "grad_norm": 0.3987245261669159, "learning_rate": 1.997526601195671e-05, "loss": 0.7292, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 637 }, { "epoch": 0.2143220814940399, "grad_norm": 0.3843563199043274, "learning_rate": 1.9975005077835716e-05, "loss": 0.7181, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.91, "memory/max_allocated (GiB)": 63.91, "step": 638 }, { "epoch": 0.21465800952146002, "grad_norm": 0.32646074891090393, "learning_rate": 1.997474277627191e-05, "loss": 0.7174, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 639 }, { "epoch": 0.21499393754888016, "grad_norm": 0.3831422030925751, "learning_rate": 1.9974479107301255e-05, "loss": 0.6986, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 640 }, { "epoch": 0.21532986557630027, "grad_norm": 0.33058542013168335, "learning_rate": 1.997421407095989e-05, "loss": 0.7222, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 641 }, { "epoch": 0.2156657936037204, "grad_norm": 0.3712143003940582, "learning_rate": 1.997394766728415e-05, "loss": 0.7016, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 642 }, { "epoch": 0.21600172163114054, "grad_norm": 0.3189653754234314, "learning_rate": 1.9973679896310557e-05, "loss": 0.7361, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 643 }, { "epoch": 0.21633764965856064, "grad_norm": 0.37882012128829956, "learning_rate": 1.9973410758075814e-05, "loss": 0.7131, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 644 }, { "epoch": 0.21667357768598078, "grad_norm": 0.3176257014274597, "learning_rate": 1.9973140252616826e-05, "loss": 0.7299, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 645 }, { "epoch": 0.2170095057134009, "grad_norm": 0.35314032435417175, "learning_rate": 1.9972868379970665e-05, "loss": 0.7147, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 646 }, { "epoch": 0.21734543374082102, "grad_norm": 0.28123602271080017, "learning_rate": 1.9972595140174604e-05, "loss": 0.6841, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 647 }, { "epoch": 0.21768136176824116, "grad_norm": 0.31134942173957825, "learning_rate": 1.997232053326611e-05, "loss": 0.7142, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 648 }, { "epoch": 0.2180172897956613, "grad_norm": 0.29849308729171753, "learning_rate": 1.997204455928281e-05, "loss": 0.7157, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 649 }, { "epoch": 0.2183532178230814, "grad_norm": 0.2814985513687134, "learning_rate": 1.9971767218262556e-05, "loss": 0.7326, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 650 }, { "epoch": 0.21868914585050153, "grad_norm": 0.23290561139583588, "learning_rate": 1.9971488510243354e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 651 }, { "epoch": 0.21902507387792167, "grad_norm": 0.34760603308677673, "learning_rate": 1.9971208435263416e-05, "loss": 0.7, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 652 }, { "epoch": 0.21936100190534177, "grad_norm": 0.29134753346443176, "learning_rate": 1.9970926993361135e-05, "loss": 0.7128, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 653 }, { "epoch": 0.2196969299327619, "grad_norm": 0.2638290524482727, "learning_rate": 1.9970644184575098e-05, "loss": 0.7108, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 654 }, { "epoch": 0.22003285796018204, "grad_norm": 0.3046759366989136, "learning_rate": 1.997036000894407e-05, "loss": 0.729, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 655 }, { "epoch": 0.22036878598760215, "grad_norm": 0.23522737622261047, "learning_rate": 1.997007446650701e-05, "loss": 0.7109, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 656 }, { "epoch": 0.22070471401502229, "grad_norm": 0.28842100501060486, "learning_rate": 1.996978755730306e-05, "loss": 0.7122, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 657 }, { "epoch": 0.22104064204244242, "grad_norm": 0.24596646428108215, "learning_rate": 1.9969499281371548e-05, "loss": 0.6988, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 658 }, { "epoch": 0.22137657006986253, "grad_norm": 0.2617340385913849, "learning_rate": 1.9969209638752e-05, "loss": 0.7167, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 659 }, { "epoch": 0.22171249809728266, "grad_norm": 0.25839555263519287, "learning_rate": 1.9968918629484123e-05, "loss": 0.7214, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 660 }, { "epoch": 0.22204842612470277, "grad_norm": 0.24943840503692627, "learning_rate": 1.9968626253607802e-05, "loss": 0.7049, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 661 }, { "epoch": 0.2223843541521229, "grad_norm": 0.24434857070446014, "learning_rate": 1.9968332511163127e-05, "loss": 0.6965, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 662 }, { "epoch": 0.22272028217954304, "grad_norm": 0.23494330048561096, "learning_rate": 1.996803740219036e-05, "loss": 0.6889, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 663 }, { "epoch": 0.22305621020696315, "grad_norm": 0.20974037051200867, "learning_rate": 1.996774092672996e-05, "loss": 0.7184, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 664 }, { "epoch": 0.22339213823438328, "grad_norm": 0.27879008650779724, "learning_rate": 1.9967443084822563e-05, "loss": 0.7084, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 665 }, { "epoch": 0.22372806626180342, "grad_norm": 0.2272268682718277, "learning_rate": 1.996714387650901e-05, "loss": 0.6846, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 666 }, { "epoch": 0.22406399428922352, "grad_norm": 0.6010203957557678, "learning_rate": 1.9966843301830315e-05, "loss": 0.7148, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 667 }, { "epoch": 0.22439992231664366, "grad_norm": 0.29136037826538086, "learning_rate": 1.996654136082768e-05, "loss": 0.6994, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 668 }, { "epoch": 0.2247358503440638, "grad_norm": 0.2758272886276245, "learning_rate": 1.9966238053542493e-05, "loss": 0.7042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 669 }, { "epoch": 0.2250717783714839, "grad_norm": 0.27251964807510376, "learning_rate": 1.9965933380016346e-05, "loss": 0.7069, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 670 }, { "epoch": 0.22540770639890403, "grad_norm": 0.2654094696044922, "learning_rate": 1.9965627340290995e-05, "loss": 0.7077, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 671 }, { "epoch": 0.22574363442632417, "grad_norm": 0.2608623206615448, "learning_rate": 1.99653199344084e-05, "loss": 0.7045, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 672 }, { "epoch": 0.22607956245374428, "grad_norm": 0.2746700346469879, "learning_rate": 1.9965011162410697e-05, "loss": 0.733, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 673 }, { "epoch": 0.2264154904811644, "grad_norm": 0.25305619835853577, "learning_rate": 1.996470102434022e-05, "loss": 0.7156, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 674 }, { "epoch": 0.22675141850858455, "grad_norm": 0.308815598487854, "learning_rate": 1.9964389520239482e-05, "loss": 0.7042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 675 }, { "epoch": 0.22708734653600465, "grad_norm": 0.25653842091560364, "learning_rate": 1.9964076650151186e-05, "loss": 0.7064, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 676 }, { "epoch": 0.2274232745634248, "grad_norm": 0.2752304673194885, "learning_rate": 1.9963762414118223e-05, "loss": 0.7097, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 677 }, { "epoch": 0.22775920259084492, "grad_norm": 0.31443309783935547, "learning_rate": 1.9963446812183672e-05, "loss": 0.7191, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 678 }, { "epoch": 0.22809513061826503, "grad_norm": 0.2320452183485031, "learning_rate": 1.9963129844390795e-05, "loss": 0.7097, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 679 }, { "epoch": 0.22843105864568516, "grad_norm": 0.2593803107738495, "learning_rate": 1.9962811510783044e-05, "loss": 0.7318, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 680 }, { "epoch": 0.2287669866731053, "grad_norm": 0.22918346524238586, "learning_rate": 1.9962491811404064e-05, "loss": 0.7321, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.91, "memory/max_allocated (GiB)": 63.91, "step": 681 }, { "epoch": 0.2291029147005254, "grad_norm": 0.2619474232196808, "learning_rate": 1.9962170746297675e-05, "loss": 0.6987, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 682 }, { "epoch": 0.22943884272794554, "grad_norm": 0.23632533848285675, "learning_rate": 1.9961848315507898e-05, "loss": 0.7253, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 683 }, { "epoch": 0.22977477075536568, "grad_norm": 0.26205384731292725, "learning_rate": 1.9961524519078925e-05, "loss": 0.7264, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 684 }, { "epoch": 0.23011069878278578, "grad_norm": 0.2608633041381836, "learning_rate": 1.9961199357055147e-05, "loss": 0.7199, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 685 }, { "epoch": 0.23044662681020592, "grad_norm": 0.22771219909191132, "learning_rate": 1.9960872829481145e-05, "loss": 0.7119, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 686 }, { "epoch": 0.23078255483762602, "grad_norm": 0.25368234515190125, "learning_rate": 1.996054493640168e-05, "loss": 0.688, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 687 }, { "epoch": 0.23111848286504616, "grad_norm": 0.24484597146511078, "learning_rate": 1.9960215677861698e-05, "loss": 0.7206, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 688 }, { "epoch": 0.2314544108924663, "grad_norm": 0.24626462161540985, "learning_rate": 1.9959885053906336e-05, "loss": 0.7098, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 689 }, { "epoch": 0.2317903389198864, "grad_norm": 0.1921827346086502, "learning_rate": 1.995955306458092e-05, "loss": 0.7161, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 690 }, { "epoch": 0.23212626694730654, "grad_norm": 0.26237455010414124, "learning_rate": 1.9959219709930963e-05, "loss": 0.703, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 691 }, { "epoch": 0.23246219497472667, "grad_norm": 0.262185662984848, "learning_rate": 1.9958884990002164e-05, "loss": 0.704, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 692 }, { "epoch": 0.23279812300214678, "grad_norm": 0.20468053221702576, "learning_rate": 1.99585489048404e-05, "loss": 0.7419, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 693 }, { "epoch": 0.2331340510295669, "grad_norm": 0.256884902715683, "learning_rate": 1.995821145449176e-05, "loss": 0.7138, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 694 }, { "epoch": 0.23346997905698705, "grad_norm": 0.2271503508090973, "learning_rate": 1.9957872639002486e-05, "loss": 0.7147, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 695 }, { "epoch": 0.23380590708440716, "grad_norm": 0.27514851093292236, "learning_rate": 1.995753245841904e-05, "loss": 0.725, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 696 }, { "epoch": 0.2341418351118273, "grad_norm": 0.22524109482765198, "learning_rate": 1.9957190912788046e-05, "loss": 0.7187, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 697 }, { "epoch": 0.23447776313924742, "grad_norm": 0.22807642817497253, "learning_rate": 1.9956848002156332e-05, "loss": 0.7278, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 698 }, { "epoch": 0.23481369116666753, "grad_norm": 0.2409074753522873, "learning_rate": 1.9956503726570902e-05, "loss": 0.6998, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 699 }, { "epoch": 0.23514961919408767, "grad_norm": 0.19400405883789062, "learning_rate": 1.995615808607896e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 700 }, { "epoch": 0.2354855472215078, "grad_norm": 0.21078944206237793, "learning_rate": 1.9955811080727877e-05, "loss": 0.6955, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 701 }, { "epoch": 0.2358214752489279, "grad_norm": 0.22065654397010803, "learning_rate": 1.9955462710565228e-05, "loss": 0.7331, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 702 }, { "epoch": 0.23615740327634804, "grad_norm": 0.20569057762622833, "learning_rate": 1.9955112975638774e-05, "loss": 0.7072, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 703 }, { "epoch": 0.23649333130376818, "grad_norm": 0.2571563124656677, "learning_rate": 1.9954761875996454e-05, "loss": 0.7004, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 704 }, { "epoch": 0.23682925933118829, "grad_norm": 0.2388155162334442, "learning_rate": 1.99544094116864e-05, "loss": 0.7297, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 705 }, { "epoch": 0.23716518735860842, "grad_norm": 0.2695233225822449, "learning_rate": 1.9954055582756932e-05, "loss": 0.7234, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 706 }, { "epoch": 0.23750111538602856, "grad_norm": 0.26092514395713806, "learning_rate": 1.9953700389256554e-05, "loss": 0.7136, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 707 }, { "epoch": 0.23783704341344866, "grad_norm": 0.19189240038394928, "learning_rate": 1.995334383123396e-05, "loss": 0.7169, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 708 }, { "epoch": 0.2381729714408688, "grad_norm": 0.23817986249923706, "learning_rate": 1.9952985908738026e-05, "loss": 0.7231, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 709 }, { "epoch": 0.23850889946828893, "grad_norm": 0.23666416108608246, "learning_rate": 1.9952626621817824e-05, "loss": 0.7314, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 710 }, { "epoch": 0.23884482749570904, "grad_norm": 0.23566284775733948, "learning_rate": 1.99522659705226e-05, "loss": 0.7144, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 711 }, { "epoch": 0.23918075552312917, "grad_norm": 0.2143888622522354, "learning_rate": 1.99519039549018e-05, "loss": 0.7112, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 712 }, { "epoch": 0.2395166835505493, "grad_norm": 0.24613995850086212, "learning_rate": 1.9951540575005053e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 713 }, { "epoch": 0.23985261157796942, "grad_norm": 0.24876317381858826, "learning_rate": 1.995117583088217e-05, "loss": 0.6952, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 714 }, { "epoch": 0.24018853960538955, "grad_norm": 0.25643855333328247, "learning_rate": 1.9950809722583153e-05, "loss": 0.7126, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 715 }, { "epoch": 0.24052446763280966, "grad_norm": 0.24656090140342712, "learning_rate": 1.995044225015819e-05, "loss": 0.7086, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 716 }, { "epoch": 0.2408603956602298, "grad_norm": 0.2330516129732132, "learning_rate": 1.995007341365766e-05, "loss": 0.7305, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 717 }, { "epoch": 0.24119632368764993, "grad_norm": 0.2644723355770111, "learning_rate": 1.9949703213132126e-05, "loss": 0.7104, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 718 }, { "epoch": 0.24153225171507003, "grad_norm": 0.20497214794158936, "learning_rate": 1.9949331648632332e-05, "loss": 0.7191, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 719 }, { "epoch": 0.24186817974249017, "grad_norm": 0.2772520184516907, "learning_rate": 1.9948958720209218e-05, "loss": 0.7087, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 720 }, { "epoch": 0.2422041077699103, "grad_norm": 0.24417628347873688, "learning_rate": 1.994858442791391e-05, "loss": 0.6919, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 721 }, { "epoch": 0.2425400357973304, "grad_norm": 0.20549242198467255, "learning_rate": 1.9948208771797714e-05, "loss": 0.7123, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 722 }, { "epoch": 0.24287596382475055, "grad_norm": 0.20696179568767548, "learning_rate": 1.994783175191213e-05, "loss": 0.7148, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 723 }, { "epoch": 0.24321189185217068, "grad_norm": 0.21624034643173218, "learning_rate": 1.994745336830884e-05, "loss": 0.7196, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 724 }, { "epoch": 0.2435478198795908, "grad_norm": 0.25332891941070557, "learning_rate": 1.9947073621039722e-05, "loss": 0.7211, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 725 }, { "epoch": 0.24388374790701092, "grad_norm": 0.18615300953388214, "learning_rate": 1.9946692510156832e-05, "loss": 0.7272, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 726 }, { "epoch": 0.24421967593443106, "grad_norm": 0.2135411500930786, "learning_rate": 1.994631003571241e-05, "loss": 0.7271, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 727 }, { "epoch": 0.24455560396185116, "grad_norm": 0.21986354887485504, "learning_rate": 1.994592619775889e-05, "loss": 0.7175, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 728 }, { "epoch": 0.2448915319892713, "grad_norm": 0.20808365941047668, "learning_rate": 1.9945540996348894e-05, "loss": 0.7019, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 729 }, { "epoch": 0.24522746001669143, "grad_norm": 0.21725249290466309, "learning_rate": 1.9945154431535227e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 730 }, { "epoch": 0.24556338804411154, "grad_norm": 0.2540811002254486, "learning_rate": 1.994476650337088e-05, "loss": 0.7127, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 731 }, { "epoch": 0.24589931607153168, "grad_norm": 0.196614071726799, "learning_rate": 1.9944377211909038e-05, "loss": 0.7237, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 732 }, { "epoch": 0.2462352440989518, "grad_norm": 0.20969711244106293, "learning_rate": 1.9943986557203066e-05, "loss": 0.7064, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 733 }, { "epoch": 0.24657117212637192, "grad_norm": 0.26558682322502136, "learning_rate": 1.994359453930651e-05, "loss": 0.7154, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 734 }, { "epoch": 0.24690710015379205, "grad_norm": 0.1975460797548294, "learning_rate": 1.9943201158273122e-05, "loss": 0.7077, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 735 }, { "epoch": 0.2472430281812122, "grad_norm": 0.20939072966575623, "learning_rate": 1.9942806414156822e-05, "loss": 0.7245, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 736 }, { "epoch": 0.2475789562086323, "grad_norm": 0.23814813792705536, "learning_rate": 1.9942410307011725e-05, "loss": 0.7364, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 737 }, { "epoch": 0.24791488423605243, "grad_norm": 0.1984374076128006, "learning_rate": 1.9942012836892136e-05, "loss": 0.7145, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 738 }, { "epoch": 0.24825081226347256, "grad_norm": 0.21602284908294678, "learning_rate": 1.994161400385254e-05, "loss": 0.7034, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 739 }, { "epoch": 0.24858674029089267, "grad_norm": 0.21132531762123108, "learning_rate": 1.9941213807947612e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 740 }, { "epoch": 0.2489226683183128, "grad_norm": 0.19361485540866852, "learning_rate": 1.9940812249232215e-05, "loss": 0.7021, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 741 }, { "epoch": 0.2492585963457329, "grad_norm": 0.19168317317962646, "learning_rate": 1.9940409327761396e-05, "loss": 0.7099, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 742 }, { "epoch": 0.24959452437315305, "grad_norm": 0.20364715158939362, "learning_rate": 1.9940005043590388e-05, "loss": 0.7093, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 743 }, { "epoch": 0.24993045240057318, "grad_norm": 0.2314276397228241, "learning_rate": 1.9939599396774615e-05, "loss": 0.7171, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 744 }, { "epoch": 0.2502663804279933, "grad_norm": 0.17658016085624695, "learning_rate": 1.993919238736969e-05, "loss": 0.7032, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 745 }, { "epoch": 0.2506023084554134, "grad_norm": 0.21495448052883148, "learning_rate": 1.9938784015431405e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 746 }, { "epoch": 0.25093823648283353, "grad_norm": 0.22540000081062317, "learning_rate": 1.993837428101574e-05, "loss": 0.7121, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 747 }, { "epoch": 0.2512741645102537, "grad_norm": 0.2515198886394501, "learning_rate": 1.993796318417887e-05, "loss": 0.7282, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 748 }, { "epoch": 0.2516100925376738, "grad_norm": 0.25299009680747986, "learning_rate": 1.9937550724977146e-05, "loss": 0.7083, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 749 }, { "epoch": 0.2519460205650939, "grad_norm": 0.20238816738128662, "learning_rate": 1.9937136903467115e-05, "loss": 0.6941, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 750 }, { "epoch": 0.25228194859251407, "grad_norm": 0.20544445514678955, "learning_rate": 1.99367217197055e-05, "loss": 0.7127, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 751 }, { "epoch": 0.2526178766199342, "grad_norm": 0.22645455598831177, "learning_rate": 1.9936305173749224e-05, "loss": 0.7095, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 752 }, { "epoch": 0.2529538046473543, "grad_norm": 0.2075236737728119, "learning_rate": 1.9935887265655388e-05, "loss": 0.7208, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 753 }, { "epoch": 0.25328973267477445, "grad_norm": 0.19221891462802887, "learning_rate": 1.993546799548128e-05, "loss": 0.7252, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 754 }, { "epoch": 0.25362566070219456, "grad_norm": 0.19825902581214905, "learning_rate": 1.9935047363284377e-05, "loss": 0.7008, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 755 }, { "epoch": 0.25396158872961466, "grad_norm": 0.20288710296154022, "learning_rate": 1.9934625369122344e-05, "loss": 0.7233, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 756 }, { "epoch": 0.2542975167570348, "grad_norm": 0.22206221520900726, "learning_rate": 1.993420201305303e-05, "loss": 0.7122, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 757 }, { "epoch": 0.25463344478445493, "grad_norm": 0.1859467476606369, "learning_rate": 1.9933777295134467e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 758 }, { "epoch": 0.25496937281187504, "grad_norm": 0.21380417048931122, "learning_rate": 1.993335121542489e-05, "loss": 0.7153, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 759 }, { "epoch": 0.2553053008392952, "grad_norm": 0.20895697176456451, "learning_rate": 1.9932923773982698e-05, "loss": 0.7048, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 760 }, { "epoch": 0.2556412288667153, "grad_norm": 0.2304764688014984, "learning_rate": 1.993249497086649e-05, "loss": 0.6939, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 761 }, { "epoch": 0.2559771568941354, "grad_norm": 0.2257523238658905, "learning_rate": 1.9932064806135052e-05, "loss": 0.6942, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 762 }, { "epoch": 0.2563130849215556, "grad_norm": 0.20516656339168549, "learning_rate": 1.993163327984735e-05, "loss": 0.7074, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 763 }, { "epoch": 0.2566490129489757, "grad_norm": 0.22641189396381378, "learning_rate": 1.9931200392062545e-05, "loss": 0.7179, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 764 }, { "epoch": 0.2569849409763958, "grad_norm": 0.2312820553779602, "learning_rate": 1.993076614283998e-05, "loss": 0.7147, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 765 }, { "epoch": 0.25732086900381596, "grad_norm": 0.21879225969314575, "learning_rate": 1.9930330532239183e-05, "loss": 0.7016, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 766 }, { "epoch": 0.25765679703123606, "grad_norm": 0.20371049642562866, "learning_rate": 1.992989356031987e-05, "loss": 0.7058, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 767 }, { "epoch": 0.25799272505865617, "grad_norm": 0.22896867990493774, "learning_rate": 1.992945522714194e-05, "loss": 0.7067, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 768 }, { "epoch": 0.25832865308607633, "grad_norm": 0.22281751036643982, "learning_rate": 1.9929015532765492e-05, "loss": 0.7141, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 769 }, { "epoch": 0.25866458111349644, "grad_norm": 0.23501227796077728, "learning_rate": 1.9928574477250797e-05, "loss": 0.694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 770 }, { "epoch": 0.25900050914091655, "grad_norm": 0.2713620066642761, "learning_rate": 1.992813206065832e-05, "loss": 0.7159, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 771 }, { "epoch": 0.2593364371683367, "grad_norm": 0.23935948312282562, "learning_rate": 1.992768828304871e-05, "loss": 0.7157, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 772 }, { "epoch": 0.2596723651957568, "grad_norm": 0.2163105458021164, "learning_rate": 1.99272431444828e-05, "loss": 0.7148, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 773 }, { "epoch": 0.2600082932231769, "grad_norm": 0.22502820193767548, "learning_rate": 1.9926796645021616e-05, "loss": 0.7079, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 774 }, { "epoch": 0.2603442212505971, "grad_norm": 0.22338055074214935, "learning_rate": 1.992634878472637e-05, "loss": 0.7228, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 775 }, { "epoch": 0.2606801492780172, "grad_norm": 0.2577083706855774, "learning_rate": 1.992589956365845e-05, "loss": 0.7139, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 776 }, { "epoch": 0.2610160773054373, "grad_norm": 0.2621099650859833, "learning_rate": 1.9925448981879443e-05, "loss": 0.6819, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 777 }, { "epoch": 0.26135200533285746, "grad_norm": 0.20656061172485352, "learning_rate": 1.9924997039451117e-05, "loss": 0.6938, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 778 }, { "epoch": 0.26168793336027757, "grad_norm": 0.27024373412132263, "learning_rate": 1.9924543736435428e-05, "loss": 0.6914, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 779 }, { "epoch": 0.2620238613876977, "grad_norm": 0.18014219403266907, "learning_rate": 1.9924089072894518e-05, "loss": 0.7081, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 780 }, { "epoch": 0.2623597894151178, "grad_norm": 0.2392546832561493, "learning_rate": 1.992363304889072e-05, "loss": 0.6846, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 781 }, { "epoch": 0.26269571744253795, "grad_norm": 0.2399860918521881, "learning_rate": 1.9923175664486536e-05, "loss": 0.7059, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 782 }, { "epoch": 0.26303164546995805, "grad_norm": 0.2417343705892563, "learning_rate": 1.9922716919744683e-05, "loss": 0.7413, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 783 }, { "epoch": 0.26336757349737816, "grad_norm": 0.2638155519962311, "learning_rate": 1.9922256814728036e-05, "loss": 0.7216, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 784 }, { "epoch": 0.2637035015247983, "grad_norm": 0.2904418110847473, "learning_rate": 1.9921795349499678e-05, "loss": 0.6822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 785 }, { "epoch": 0.26403942955221843, "grad_norm": 0.22370462119579315, "learning_rate": 1.992133252412287e-05, "loss": 0.7089, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 786 }, { "epoch": 0.26437535757963854, "grad_norm": 0.2620202600955963, "learning_rate": 1.992086833866105e-05, "loss": 0.7201, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 787 }, { "epoch": 0.2647112856070587, "grad_norm": 0.22119149565696716, "learning_rate": 1.992040279317786e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 788 }, { "epoch": 0.2650472136344788, "grad_norm": 0.24984396994113922, "learning_rate": 1.9919935887737113e-05, "loss": 0.7142, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 789 }, { "epoch": 0.2653831416618989, "grad_norm": 0.24000073969364166, "learning_rate": 1.9919467622402826e-05, "loss": 0.7035, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 790 }, { "epoch": 0.2657190696893191, "grad_norm": 0.2274143099784851, "learning_rate": 1.9918997997239186e-05, "loss": 0.7155, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 791 }, { "epoch": 0.2660549977167392, "grad_norm": 0.23579862713813782, "learning_rate": 1.991852701231057e-05, "loss": 0.7018, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 792 }, { "epoch": 0.2663909257441593, "grad_norm": 0.21612533926963806, "learning_rate": 1.9918054667681553e-05, "loss": 0.7041, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 793 }, { "epoch": 0.26672685377157945, "grad_norm": 0.22698023915290833, "learning_rate": 1.9917580963416876e-05, "loss": 0.7173, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 794 }, { "epoch": 0.26706278179899956, "grad_norm": 0.21362797915935516, "learning_rate": 1.9917105899581485e-05, "loss": 0.6855, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 795 }, { "epoch": 0.26739870982641967, "grad_norm": 0.20813031494617462, "learning_rate": 1.9916629476240505e-05, "loss": 0.7007, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 796 }, { "epoch": 0.26773463785383983, "grad_norm": 0.2702796459197998, "learning_rate": 1.9916151693459243e-05, "loss": 0.7018, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 797 }, { "epoch": 0.26807056588125994, "grad_norm": 0.24850773811340332, "learning_rate": 1.99156725513032e-05, "loss": 0.7148, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 798 }, { "epoch": 0.26840649390868004, "grad_norm": 0.26115962862968445, "learning_rate": 1.991519204983806e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 799 }, { "epoch": 0.2687424219361002, "grad_norm": 0.2280198186635971, "learning_rate": 1.991471018912969e-05, "loss": 0.7101, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 800 }, { "epoch": 0.2690783499635203, "grad_norm": 0.2713622450828552, "learning_rate": 1.991422696924415e-05, "loss": 0.7064, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 801 }, { "epoch": 0.2694142779909404, "grad_norm": 0.33529016375541687, "learning_rate": 1.9913742390247683e-05, "loss": 0.694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 802 }, { "epoch": 0.2697502060183606, "grad_norm": 0.31363144516944885, "learning_rate": 1.991325645220672e-05, "loss": 0.7201, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 803 }, { "epoch": 0.2700861340457807, "grad_norm": 0.2858002185821533, "learning_rate": 1.9912769155187873e-05, "loss": 0.7121, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 804 }, { "epoch": 0.2704220620732008, "grad_norm": 0.21364323794841766, "learning_rate": 1.9912280499257947e-05, "loss": 0.7236, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 805 }, { "epoch": 0.27075799010062096, "grad_norm": 0.25752902030944824, "learning_rate": 1.9911790484483927e-05, "loss": 0.692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 806 }, { "epoch": 0.27109391812804107, "grad_norm": 0.26068854331970215, "learning_rate": 1.9911299110932996e-05, "loss": 0.7132, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 807 }, { "epoch": 0.2714298461554612, "grad_norm": 0.21932169795036316, "learning_rate": 1.9910806378672502e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 808 }, { "epoch": 0.27176577418288134, "grad_norm": 0.1910598874092102, "learning_rate": 1.9910312287770002e-05, "loss": 0.7069, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 809 }, { "epoch": 0.27210170221030144, "grad_norm": 0.17579004168510437, "learning_rate": 1.9909816838293228e-05, "loss": 0.7127, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 810 }, { "epoch": 0.27243763023772155, "grad_norm": 0.20364148914813995, "learning_rate": 1.9909320030310096e-05, "loss": 0.7026, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 811 }, { "epoch": 0.2727735582651417, "grad_norm": 0.1985860913991928, "learning_rate": 1.9908821863888712e-05, "loss": 0.7164, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 812 }, { "epoch": 0.2731094862925618, "grad_norm": 0.23132288455963135, "learning_rate": 1.9908322339097373e-05, "loss": 0.7061, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 813 }, { "epoch": 0.2734454143199819, "grad_norm": 0.16978834569454193, "learning_rate": 1.990782145600455e-05, "loss": 0.7116, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 814 }, { "epoch": 0.2737813423474021, "grad_norm": 0.18511927127838135, "learning_rate": 1.990731921467892e-05, "loss": 0.7003, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 815 }, { "epoch": 0.2741172703748222, "grad_norm": 0.1960989236831665, "learning_rate": 1.9906815615189318e-05, "loss": 0.7115, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 816 }, { "epoch": 0.2744531984022423, "grad_norm": 0.21849356591701508, "learning_rate": 1.990631065760479e-05, "loss": 0.6963, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 817 }, { "epoch": 0.27478912642966247, "grad_norm": 0.18424612283706665, "learning_rate": 1.9905804341994563e-05, "loss": 0.7071, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 818 }, { "epoch": 0.2751250544570826, "grad_norm": 0.2039215862751007, "learning_rate": 1.9905296668428034e-05, "loss": 0.7013, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 819 }, { "epoch": 0.2754609824845027, "grad_norm": 0.2014269232749939, "learning_rate": 1.9904787636974808e-05, "loss": 0.7201, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 820 }, { "epoch": 0.27579691051192284, "grad_norm": 0.19675973057746887, "learning_rate": 1.9904277247704664e-05, "loss": 0.718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 821 }, { "epoch": 0.27613283853934295, "grad_norm": 0.1907966136932373, "learning_rate": 1.9903765500687567e-05, "loss": 0.7036, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 822 }, { "epoch": 0.27646876656676306, "grad_norm": 0.2230924814939499, "learning_rate": 1.9903252395993675e-05, "loss": 0.6818, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 823 }, { "epoch": 0.2768046945941832, "grad_norm": 0.20777219533920288, "learning_rate": 1.9902737933693328e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 824 }, { "epoch": 0.2771406226216033, "grad_norm": 0.1924518495798111, "learning_rate": 1.9902222113857048e-05, "loss": 0.6899, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 825 }, { "epoch": 0.27747655064902343, "grad_norm": 0.21956555545330048, "learning_rate": 1.990170493655555e-05, "loss": 0.7076, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 826 }, { "epoch": 0.2778124786764436, "grad_norm": 0.21287444233894348, "learning_rate": 1.9901186401859732e-05, "loss": 0.7015, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 827 }, { "epoch": 0.2781484067038637, "grad_norm": 0.25627419352531433, "learning_rate": 1.990066650984068e-05, "loss": 0.711, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 828 }, { "epoch": 0.2784843347312838, "grad_norm": 0.24478359520435333, "learning_rate": 1.9900145260569658e-05, "loss": 0.7113, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.88, "memory/max_allocated (GiB)": 64.88, "step": 829 }, { "epoch": 0.278820262758704, "grad_norm": 0.20776227116584778, "learning_rate": 1.989962265411813e-05, "loss": 0.7083, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 830 }, { "epoch": 0.2791561907861241, "grad_norm": 0.25500836968421936, "learning_rate": 1.9899098690557734e-05, "loss": 0.706, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 831 }, { "epoch": 0.2794921188135442, "grad_norm": 0.2429259568452835, "learning_rate": 1.98985733699603e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 832 }, { "epoch": 0.27982804684096435, "grad_norm": 0.21822884678840637, "learning_rate": 1.9898046692397844e-05, "loss": 0.714, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 833 }, { "epoch": 0.28016397486838446, "grad_norm": 0.24174237251281738, "learning_rate": 1.9897518657942565e-05, "loss": 0.6805, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 834 }, { "epoch": 0.28049990289580456, "grad_norm": 0.2364027053117752, "learning_rate": 1.989698926666685e-05, "loss": 0.7067, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 835 }, { "epoch": 0.28083583092322467, "grad_norm": 0.23798900842666626, "learning_rate": 1.9896458518643268e-05, "loss": 0.7054, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 836 }, { "epoch": 0.28117175895064483, "grad_norm": 0.21158860623836517, "learning_rate": 1.9895926413944583e-05, "loss": 0.7036, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.33, "memory/max_allocated (GiB)": 64.33, "step": 837 }, { "epoch": 0.28150768697806494, "grad_norm": 0.2143760472536087, "learning_rate": 1.989539295264374e-05, "loss": 0.7082, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 838 }, { "epoch": 0.28184361500548505, "grad_norm": 0.24484501779079437, "learning_rate": 1.9894858134813866e-05, "loss": 0.7049, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 839 }, { "epoch": 0.2821795430329052, "grad_norm": 0.24839910864830017, "learning_rate": 1.989432196052828e-05, "loss": 0.7128, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 840 }, { "epoch": 0.2825154710603253, "grad_norm": 0.17565467953681946, "learning_rate": 1.9893784429860478e-05, "loss": 0.737, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 841 }, { "epoch": 0.2828513990877454, "grad_norm": 0.2560480833053589, "learning_rate": 1.9893245542884156e-05, "loss": 0.6983, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 842 }, { "epoch": 0.2831873271151656, "grad_norm": 0.264873743057251, "learning_rate": 1.9892705299673186e-05, "loss": 0.695, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 843 }, { "epoch": 0.2835232551425857, "grad_norm": 0.2233102172613144, "learning_rate": 1.989216370030163e-05, "loss": 0.7191, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 844 }, { "epoch": 0.2838591831700058, "grad_norm": 0.2373446822166443, "learning_rate": 1.989162074484373e-05, "loss": 0.7021, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 845 }, { "epoch": 0.28419511119742596, "grad_norm": 0.21703708171844482, "learning_rate": 1.9891076433373922e-05, "loss": 0.6945, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 846 }, { "epoch": 0.28453103922484607, "grad_norm": 0.18749774992465973, "learning_rate": 1.9890530765966823e-05, "loss": 0.7161, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 847 }, { "epoch": 0.2848669672522662, "grad_norm": 0.22870956361293793, "learning_rate": 1.988998374269724e-05, "loss": 0.6782, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 848 }, { "epoch": 0.28520289527968634, "grad_norm": 0.23865585029125214, "learning_rate": 1.9889435363640154e-05, "loss": 0.7312, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 849 }, { "epoch": 0.28553882330710645, "grad_norm": 0.2357468456029892, "learning_rate": 1.9888885628870746e-05, "loss": 0.6949, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 850 }, { "epoch": 0.28587475133452656, "grad_norm": 0.26059865951538086, "learning_rate": 1.988833453846438e-05, "loss": 0.7279, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 851 }, { "epoch": 0.2862106793619467, "grad_norm": 0.25471925735473633, "learning_rate": 1.9887782092496598e-05, "loss": 0.7172, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 852 }, { "epoch": 0.2865466073893668, "grad_norm": 0.2026413381099701, "learning_rate": 1.9887228291043134e-05, "loss": 0.738, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 853 }, { "epoch": 0.28688253541678693, "grad_norm": 0.20447924733161926, "learning_rate": 1.9886673134179912e-05, "loss": 0.7004, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 854 }, { "epoch": 0.2872184634442071, "grad_norm": 0.2857646048069, "learning_rate": 1.9886116621983035e-05, "loss": 0.7181, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 855 }, { "epoch": 0.2875543914716272, "grad_norm": 0.28440600633621216, "learning_rate": 1.9885558754528787e-05, "loss": 0.7041, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 856 }, { "epoch": 0.2878903194990473, "grad_norm": 0.2817153036594391, "learning_rate": 1.9884999531893652e-05, "loss": 0.7043, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 857 }, { "epoch": 0.28822624752646747, "grad_norm": 0.3072553873062134, "learning_rate": 1.988443895415429e-05, "loss": 0.7304, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 858 }, { "epoch": 0.2885621755538876, "grad_norm": 0.3040078580379486, "learning_rate": 1.9883877021387546e-05, "loss": 0.7216, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 859 }, { "epoch": 0.2888981035813077, "grad_norm": 0.1878989040851593, "learning_rate": 1.9883313733670454e-05, "loss": 0.7078, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 860 }, { "epoch": 0.28923403160872785, "grad_norm": 0.2911360561847687, "learning_rate": 1.9882749091080243e-05, "loss": 0.7152, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 861 }, { "epoch": 0.28956995963614796, "grad_norm": 0.23838651180267334, "learning_rate": 1.9882183093694303e-05, "loss": 0.7147, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 862 }, { "epoch": 0.28990588766356806, "grad_norm": 0.21449866890907288, "learning_rate": 1.9881615741590234e-05, "loss": 0.7303, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 863 }, { "epoch": 0.2902418156909882, "grad_norm": 0.24682728946208954, "learning_rate": 1.9881047034845814e-05, "loss": 0.7109, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 864 }, { "epoch": 0.29057774371840833, "grad_norm": 0.1929609179496765, "learning_rate": 1.9880476973538997e-05, "loss": 0.6976, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 865 }, { "epoch": 0.29091367174582844, "grad_norm": 0.22822022438049316, "learning_rate": 1.987990555774794e-05, "loss": 0.7146, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 866 }, { "epoch": 0.2912495997732486, "grad_norm": 0.22590182721614838, "learning_rate": 1.987933278755097e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 867 }, { "epoch": 0.2915855278006687, "grad_norm": 0.21149161458015442, "learning_rate": 1.987875866302661e-05, "loss": 0.6876, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 868 }, { "epoch": 0.2919214558280888, "grad_norm": 0.18582719564437866, "learning_rate": 1.9878183184253565e-05, "loss": 0.7038, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 869 }, { "epoch": 0.292257383855509, "grad_norm": 0.22504571080207825, "learning_rate": 1.987760635131072e-05, "loss": 0.7035, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 870 }, { "epoch": 0.2925933118829291, "grad_norm": 0.25057491660118103, "learning_rate": 1.9877028164277157e-05, "loss": 0.7112, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 871 }, { "epoch": 0.2929292399103492, "grad_norm": 0.20263908803462982, "learning_rate": 1.987644862323214e-05, "loss": 0.7061, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 872 }, { "epoch": 0.29326516793776936, "grad_norm": 0.25321272015571594, "learning_rate": 1.9875867728255113e-05, "loss": 0.7187, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 873 }, { "epoch": 0.29360109596518946, "grad_norm": 0.2271207571029663, "learning_rate": 1.9875285479425708e-05, "loss": 0.6922, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 874 }, { "epoch": 0.29393702399260957, "grad_norm": 0.24387258291244507, "learning_rate": 1.9874701876823744e-05, "loss": 0.6908, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 875 }, { "epoch": 0.29427295202002973, "grad_norm": 0.20313531160354614, "learning_rate": 1.9874116920529227e-05, "loss": 0.7111, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 876 }, { "epoch": 0.29460888004744984, "grad_norm": 0.2283056080341339, "learning_rate": 1.9873530610622346e-05, "loss": 0.7315, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 877 }, { "epoch": 0.29494480807486995, "grad_norm": 0.2123650163412094, "learning_rate": 1.9872942947183476e-05, "loss": 0.7062, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 878 }, { "epoch": 0.2952807361022901, "grad_norm": 0.22730810940265656, "learning_rate": 1.987235393029318e-05, "loss": 0.7115, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 879 }, { "epoch": 0.2956166641297102, "grad_norm": 0.20364241302013397, "learning_rate": 1.98717635600322e-05, "loss": 0.6989, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 880 }, { "epoch": 0.2959525921571303, "grad_norm": 0.18921135365962982, "learning_rate": 1.9871171836481476e-05, "loss": 0.7093, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 881 }, { "epoch": 0.2962885201845505, "grad_norm": 0.18614009022712708, "learning_rate": 1.9870578759722118e-05, "loss": 0.7079, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 882 }, { "epoch": 0.2966244482119706, "grad_norm": 0.21734708547592163, "learning_rate": 1.9869984329835433e-05, "loss": 0.699, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 883 }, { "epoch": 0.2969603762393907, "grad_norm": 0.22311677038669586, "learning_rate": 1.9869388546902906e-05, "loss": 0.7003, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 884 }, { "epoch": 0.29729630426681086, "grad_norm": 0.21207013726234436, "learning_rate": 1.986879141100621e-05, "loss": 0.6933, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.88, "memory/max_allocated (GiB)": 64.88, "step": 885 }, { "epoch": 0.29763223229423097, "grad_norm": 0.2211737483739853, "learning_rate": 1.9868192922227215e-05, "loss": 0.7089, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 886 }, { "epoch": 0.2979681603216511, "grad_norm": 0.2039278894662857, "learning_rate": 1.9867593080647952e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 887 }, { "epoch": 0.29830408834907124, "grad_norm": 0.23748478293418884, "learning_rate": 1.9866991886350665e-05, "loss": 0.7089, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 888 }, { "epoch": 0.29864001637649135, "grad_norm": 0.20315399765968323, "learning_rate": 1.986638933941776e-05, "loss": 0.7039, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 889 }, { "epoch": 0.29897594440391145, "grad_norm": 0.21712535619735718, "learning_rate": 1.986578543993184e-05, "loss": 0.6995, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 890 }, { "epoch": 0.29931187243133156, "grad_norm": 0.23007312417030334, "learning_rate": 1.9865180187975696e-05, "loss": 0.7082, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 891 }, { "epoch": 0.2996478004587517, "grad_norm": 0.20234467089176178, "learning_rate": 1.9864573583632296e-05, "loss": 0.6979, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 892 }, { "epoch": 0.29998372848617183, "grad_norm": 0.22285524010658264, "learning_rate": 1.98639656269848e-05, "loss": 0.7037, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 893 }, { "epoch": 0.30031965651359194, "grad_norm": 0.2026350349187851, "learning_rate": 1.9863356318116545e-05, "loss": 0.7226, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 894 }, { "epoch": 0.3006555845410121, "grad_norm": 0.25353869795799255, "learning_rate": 1.9862745657111073e-05, "loss": 0.7242, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 895 }, { "epoch": 0.3009915125684322, "grad_norm": 0.22221441566944122, "learning_rate": 1.986213364405208e-05, "loss": 0.6887, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 896 }, { "epoch": 0.3013274405958523, "grad_norm": 0.19834984838962555, "learning_rate": 1.986152027902348e-05, "loss": 0.6957, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 897 }, { "epoch": 0.3016633686232725, "grad_norm": 0.20635148882865906, "learning_rate": 1.9860905562109348e-05, "loss": 0.7116, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 898 }, { "epoch": 0.3019992966506926, "grad_norm": 0.24739904701709747, "learning_rate": 1.9860289493393954e-05, "loss": 0.7233, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 899 }, { "epoch": 0.3023352246781127, "grad_norm": 0.2110607922077179, "learning_rate": 1.985967207296176e-05, "loss": 0.7186, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 900 }, { "epoch": 0.30267115270553285, "grad_norm": 0.2284429669380188, "learning_rate": 1.9859053300897398e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 901 }, { "epoch": 0.30300708073295296, "grad_norm": 0.24928246438503265, "learning_rate": 1.98584331772857e-05, "loss": 0.707, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 902 }, { "epoch": 0.30334300876037307, "grad_norm": 0.2727356553077698, "learning_rate": 1.9857811702211674e-05, "loss": 0.7036, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 903 }, { "epoch": 0.30367893678779323, "grad_norm": 0.21596474945545197, "learning_rate": 1.9857188875760518e-05, "loss": 0.6981, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 904 }, { "epoch": 0.30401486481521334, "grad_norm": 0.21135996282100677, "learning_rate": 1.985656469801761e-05, "loss": 0.691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 905 }, { "epoch": 0.30435079284263344, "grad_norm": 0.2388085126876831, "learning_rate": 1.9855939169068516e-05, "loss": 0.7234, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 906 }, { "epoch": 0.3046867208700536, "grad_norm": 0.24545812606811523, "learning_rate": 1.9855312288998994e-05, "loss": 0.7094, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 907 }, { "epoch": 0.3050226488974737, "grad_norm": 0.24142342805862427, "learning_rate": 1.9854684057894974e-05, "loss": 0.6988, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 908 }, { "epoch": 0.3053585769248938, "grad_norm": 0.20108214020729065, "learning_rate": 1.9854054475842582e-05, "loss": 0.7267, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 909 }, { "epoch": 0.305694504952314, "grad_norm": 0.23008401691913605, "learning_rate": 1.9853423542928127e-05, "loss": 0.7014, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 910 }, { "epoch": 0.3060304329797341, "grad_norm": 0.22917543351650238, "learning_rate": 1.98527912592381e-05, "loss": 0.7296, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 911 }, { "epoch": 0.3063663610071542, "grad_norm": 0.19841435551643372, "learning_rate": 1.985215762485918e-05, "loss": 0.7176, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 912 }, { "epoch": 0.30670228903457436, "grad_norm": 0.26442745327949524, "learning_rate": 1.9851522639878224e-05, "loss": 0.692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 913 }, { "epoch": 0.30703821706199447, "grad_norm": 0.2502175271511078, "learning_rate": 1.9850886304382287e-05, "loss": 0.7125, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 914 }, { "epoch": 0.3073741450894146, "grad_norm": 0.24350905418395996, "learning_rate": 1.9850248618458602e-05, "loss": 0.7068, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 915 }, { "epoch": 0.30771007311683474, "grad_norm": 0.27106228470802307, "learning_rate": 1.9849609582194583e-05, "loss": 0.7149, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 916 }, { "epoch": 0.30804600114425484, "grad_norm": 0.23167943954467773, "learning_rate": 1.9848969195677836e-05, "loss": 0.6917, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 917 }, { "epoch": 0.30838192917167495, "grad_norm": 0.2288145124912262, "learning_rate": 1.9848327458996148e-05, "loss": 0.6796, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 918 }, { "epoch": 0.3087178571990951, "grad_norm": 0.2759672999382019, "learning_rate": 1.9847684372237495e-05, "loss": 0.69, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 919 }, { "epoch": 0.3090537852265152, "grad_norm": 0.20777562260627747, "learning_rate": 1.9847039935490033e-05, "loss": 0.694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 920 }, { "epoch": 0.30938971325393533, "grad_norm": 0.26104292273521423, "learning_rate": 1.984639414884211e-05, "loss": 0.7019, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 921 }, { "epoch": 0.3097256412813555, "grad_norm": 0.2325393706560135, "learning_rate": 1.9845747012382254e-05, "loss": 0.7129, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 922 }, { "epoch": 0.3100615693087756, "grad_norm": 0.23927433788776398, "learning_rate": 1.9845098526199176e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 923 }, { "epoch": 0.3103974973361957, "grad_norm": 0.2050437331199646, "learning_rate": 1.984444869038178e-05, "loss": 0.6885, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 924 }, { "epoch": 0.31073342536361587, "grad_norm": 0.24451914429664612, "learning_rate": 1.9843797505019142e-05, "loss": 0.7118, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 925 }, { "epoch": 0.311069353391036, "grad_norm": 0.2669634521007538, "learning_rate": 1.984314497020054e-05, "loss": 0.7218, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 926 }, { "epoch": 0.3114052814184561, "grad_norm": 0.2813180983066559, "learning_rate": 1.9842491086015417e-05, "loss": 0.6882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 927 }, { "epoch": 0.31174120944587624, "grad_norm": 0.24238574504852295, "learning_rate": 1.9841835852553423e-05, "loss": 0.7206, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 928 }, { "epoch": 0.31207713747329635, "grad_norm": 0.2022181749343872, "learning_rate": 1.9841179269904377e-05, "loss": 0.6956, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 929 }, { "epoch": 0.31241306550071646, "grad_norm": 0.22444438934326172, "learning_rate": 1.9840521338158292e-05, "loss": 0.7097, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 930 }, { "epoch": 0.3127489935281366, "grad_norm": 0.3180549740791321, "learning_rate": 1.9839862057405353e-05, "loss": 0.6973, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 931 }, { "epoch": 0.31308492155555673, "grad_norm": 0.34312689304351807, "learning_rate": 1.983920142773595e-05, "loss": 0.7096, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 932 }, { "epoch": 0.31342084958297683, "grad_norm": 0.33485761284828186, "learning_rate": 1.9838539449240634e-05, "loss": 0.7096, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 933 }, { "epoch": 0.313756777610397, "grad_norm": 0.22414952516555786, "learning_rate": 1.9837876122010166e-05, "loss": 0.6955, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 934 }, { "epoch": 0.3140927056378171, "grad_norm": 0.21611790359020233, "learning_rate": 1.983721144613547e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 935 }, { "epoch": 0.3144286336652372, "grad_norm": 0.2602505087852478, "learning_rate": 1.983654542170767e-05, "loss": 0.7108, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 936 }, { "epoch": 0.3147645616926574, "grad_norm": 0.3123088479042053, "learning_rate": 1.9835878048818067e-05, "loss": 0.7241, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 937 }, { "epoch": 0.3151004897200775, "grad_norm": 0.27489927411079407, "learning_rate": 1.9835209327558148e-05, "loss": 0.6991, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 938 }, { "epoch": 0.3154364177474976, "grad_norm": 0.2061733603477478, "learning_rate": 1.983453925801959e-05, "loss": 0.7338, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 939 }, { "epoch": 0.31577234577491775, "grad_norm": 0.23927247524261475, "learning_rate": 1.9833867840294244e-05, "loss": 0.7021, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 940 }, { "epoch": 0.31610827380233786, "grad_norm": 0.2700663208961487, "learning_rate": 1.9833195074474164e-05, "loss": 0.7016, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 941 }, { "epoch": 0.31644420182975797, "grad_norm": 0.2348504662513733, "learning_rate": 1.9832520960651563e-05, "loss": 0.6777, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 942 }, { "epoch": 0.31678012985717807, "grad_norm": 0.2416856735944748, "learning_rate": 1.9831845498918864e-05, "loss": 0.7139, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 943 }, { "epoch": 0.31711605788459823, "grad_norm": 0.19785180687904358, "learning_rate": 1.9831168689368664e-05, "loss": 0.7325, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 944 }, { "epoch": 0.31745198591201834, "grad_norm": 0.2524378299713135, "learning_rate": 1.9830490532093736e-05, "loss": 0.7073, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.88, "memory/max_allocated (GiB)": 64.88, "step": 945 }, { "epoch": 0.31778791393943845, "grad_norm": 0.26881784200668335, "learning_rate": 1.9829811027187055e-05, "loss": 0.6958, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 946 }, { "epoch": 0.3181238419668586, "grad_norm": 0.2342870980501175, "learning_rate": 1.9829130174741764e-05, "loss": 0.6772, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 947 }, { "epoch": 0.3184597699942787, "grad_norm": 0.2531738579273224, "learning_rate": 1.982844797485121e-05, "loss": 0.7172, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 948 }, { "epoch": 0.3187956980216988, "grad_norm": 0.22550205886363983, "learning_rate": 1.98277644276089e-05, "loss": 0.7153, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 949 }, { "epoch": 0.319131626049119, "grad_norm": 0.2465781271457672, "learning_rate": 1.9827079533108555e-05, "loss": 0.6909, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 950 }, { "epoch": 0.3194675540765391, "grad_norm": 0.2455533891916275, "learning_rate": 1.9826393291444055e-05, "loss": 0.675, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 951 }, { "epoch": 0.3198034821039592, "grad_norm": 0.21988341212272644, "learning_rate": 1.9825705702709476e-05, "loss": 0.7229, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 952 }, { "epoch": 0.32013941013137936, "grad_norm": 0.23466193675994873, "learning_rate": 1.9825016766999075e-05, "loss": 0.7088, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 953 }, { "epoch": 0.32047533815879947, "grad_norm": 0.2252553403377533, "learning_rate": 1.9824326484407304e-05, "loss": 0.7167, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 954 }, { "epoch": 0.3208112661862196, "grad_norm": 0.23027218878269196, "learning_rate": 1.9823634855028783e-05, "loss": 0.7027, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 955 }, { "epoch": 0.32114719421363974, "grad_norm": 0.23044389486312866, "learning_rate": 1.9822941878958332e-05, "loss": 0.6906, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 956 }, { "epoch": 0.32148312224105985, "grad_norm": 0.20422300696372986, "learning_rate": 1.9822247556290948e-05, "loss": 0.7063, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 957 }, { "epoch": 0.32181905026847996, "grad_norm": 0.2500421702861786, "learning_rate": 1.982155188712181e-05, "loss": 0.6834, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 958 }, { "epoch": 0.3221549782959001, "grad_norm": 0.21093815565109253, "learning_rate": 1.982085487154629e-05, "loss": 0.7195, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 959 }, { "epoch": 0.3224909063233202, "grad_norm": 0.21235249936580658, "learning_rate": 1.982015650965993e-05, "loss": 0.6988, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 960 }, { "epoch": 0.32282683435074033, "grad_norm": 0.24326187372207642, "learning_rate": 1.9819456801558478e-05, "loss": 0.7233, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 961 }, { "epoch": 0.3231627623781605, "grad_norm": 0.21311625838279724, "learning_rate": 1.9818755747337848e-05, "loss": 0.7028, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 962 }, { "epoch": 0.3234986904055806, "grad_norm": 0.22935864329338074, "learning_rate": 1.981805334709415e-05, "loss": 0.6872, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 963 }, { "epoch": 0.3238346184330007, "grad_norm": 0.2193230390548706, "learning_rate": 1.981734960092367e-05, "loss": 0.6797, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 964 }, { "epoch": 0.32417054646042087, "grad_norm": 0.20151540637016296, "learning_rate": 1.9816644508922883e-05, "loss": 0.7006, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 965 }, { "epoch": 0.324506474487841, "grad_norm": 0.24653306603431702, "learning_rate": 1.981593807118845e-05, "loss": 0.7024, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 966 }, { "epoch": 0.3248424025152611, "grad_norm": 0.24920491874217987, "learning_rate": 1.981523028781721e-05, "loss": 0.7112, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 967 }, { "epoch": 0.32517833054268125, "grad_norm": 0.1994745135307312, "learning_rate": 1.9814521158906195e-05, "loss": 0.6929, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 968 }, { "epoch": 0.32551425857010136, "grad_norm": 0.27705755829811096, "learning_rate": 1.9813810684552615e-05, "loss": 0.6894, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 969 }, { "epoch": 0.32585018659752146, "grad_norm": 0.2515392601490021, "learning_rate": 1.981309886485387e-05, "loss": 0.7065, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 970 }, { "epoch": 0.3261861146249416, "grad_norm": 0.20856019854545593, "learning_rate": 1.981238569990754e-05, "loss": 0.7076, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 971 }, { "epoch": 0.32652204265236173, "grad_norm": 0.33476194739341736, "learning_rate": 1.9811671189811386e-05, "loss": 0.6918, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 972 }, { "epoch": 0.32685797067978184, "grad_norm": 0.284696102142334, "learning_rate": 1.9810955334663362e-05, "loss": 0.6953, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 973 }, { "epoch": 0.327193898707202, "grad_norm": 0.2066802829504013, "learning_rate": 1.9810238134561605e-05, "loss": 0.6882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 974 }, { "epoch": 0.3275298267346221, "grad_norm": 0.2826342284679413, "learning_rate": 1.9809519589604428e-05, "loss": 0.7124, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 975 }, { "epoch": 0.3278657547620422, "grad_norm": 0.2334241271018982, "learning_rate": 1.9808799699890334e-05, "loss": 0.7135, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 976 }, { "epoch": 0.3282016827894624, "grad_norm": 0.2624005079269409, "learning_rate": 1.9808078465518016e-05, "loss": 0.6972, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 977 }, { "epoch": 0.3285376108168825, "grad_norm": 0.2875312268733978, "learning_rate": 1.9807355886586346e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 978 }, { "epoch": 0.3288735388443026, "grad_norm": 0.24949242174625397, "learning_rate": 1.9806631963194377e-05, "loss": 0.6968, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 979 }, { "epoch": 0.32920946687172276, "grad_norm": 0.25570958852767944, "learning_rate": 1.9805906695441347e-05, "loss": 0.7028, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 980 }, { "epoch": 0.32954539489914286, "grad_norm": 0.2543680667877197, "learning_rate": 1.9805180083426684e-05, "loss": 0.7082, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 981 }, { "epoch": 0.32988132292656297, "grad_norm": 0.29169875383377075, "learning_rate": 1.9804452127249995e-05, "loss": 0.7002, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 982 }, { "epoch": 0.33021725095398313, "grad_norm": 0.25746166706085205, "learning_rate": 1.9803722827011076e-05, "loss": 0.7177, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 983 }, { "epoch": 0.33055317898140324, "grad_norm": 0.2694299519062042, "learning_rate": 1.9802992182809905e-05, "loss": 0.6954, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 984 }, { "epoch": 0.33088910700882335, "grad_norm": 0.23677214980125427, "learning_rate": 1.980226019474664e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 985 }, { "epoch": 0.3312250350362435, "grad_norm": 0.24593789875507355, "learning_rate": 1.9801526862921626e-05, "loss": 0.6974, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 986 }, { "epoch": 0.3315609630636636, "grad_norm": 0.27058273553848267, "learning_rate": 1.98007921874354e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 987 }, { "epoch": 0.3318968910910837, "grad_norm": 0.2672067880630493, "learning_rate": 1.980005616838867e-05, "loss": 0.6767, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 988 }, { "epoch": 0.3322328191185039, "grad_norm": 0.23933427035808563, "learning_rate": 1.9799318805882343e-05, "loss": 0.7004, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 989 }, { "epoch": 0.332568747145924, "grad_norm": 0.23800741136074066, "learning_rate": 1.979858010001749e-05, "loss": 0.687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 990 }, { "epoch": 0.3329046751733441, "grad_norm": 0.2212238758802414, "learning_rate": 1.9797840050895386e-05, "loss": 0.7105, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 991 }, { "epoch": 0.33324060320076426, "grad_norm": 0.2500266432762146, "learning_rate": 1.979709865861748e-05, "loss": 0.6959, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 992 }, { "epoch": 0.33357653122818437, "grad_norm": 0.2129405438899994, "learning_rate": 1.9796355923285407e-05, "loss": 0.7134, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 993 }, { "epoch": 0.3339124592556045, "grad_norm": 0.23799124360084534, "learning_rate": 1.9795611845000984e-05, "loss": 0.7163, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 994 }, { "epoch": 0.33424838728302464, "grad_norm": 0.2047109305858612, "learning_rate": 1.979486642386622e-05, "loss": 0.7117, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 995 }, { "epoch": 0.33458431531044475, "grad_norm": 0.24331605434417725, "learning_rate": 1.9794119659983298e-05, "loss": 0.7118, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 996 }, { "epoch": 0.33492024333786485, "grad_norm": 0.21632151305675507, "learning_rate": 1.979337155345459e-05, "loss": 0.6638, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 997 }, { "epoch": 0.33525617136528496, "grad_norm": 0.2610107660293579, "learning_rate": 1.9792622104382656e-05, "loss": 0.685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 998 }, { "epoch": 0.3355920993927051, "grad_norm": 0.24587205052375793, "learning_rate": 1.9791871312870227e-05, "loss": 0.7149, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 999 }, { "epoch": 0.33592802742012523, "grad_norm": 0.18137818574905396, "learning_rate": 1.9791119179020233e-05, "loss": 0.7061, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1000 }, { "epoch": 0.33626395544754534, "grad_norm": 0.2171933799982071, "learning_rate": 1.9790365702935784e-05, "loss": 0.695, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1001 }, { "epoch": 0.3365998834749655, "grad_norm": 0.24088340997695923, "learning_rate": 1.9789610884720166e-05, "loss": 0.713, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1002 }, { "epoch": 0.3369358115023856, "grad_norm": 0.20021305978298187, "learning_rate": 1.978885472447686e-05, "loss": 0.7052, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1003 }, { "epoch": 0.3372717395298057, "grad_norm": 0.1974472999572754, "learning_rate": 1.9788097222309516e-05, "loss": 0.7111, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1004 }, { "epoch": 0.3376076675572259, "grad_norm": 0.2017873078584671, "learning_rate": 1.978733837832199e-05, "loss": 0.6957, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1005 }, { "epoch": 0.337943595584646, "grad_norm": 0.18206778168678284, "learning_rate": 1.9786578192618302e-05, "loss": 0.6831, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1006 }, { "epoch": 0.3382795236120661, "grad_norm": 0.19271983206272125, "learning_rate": 1.9785816665302665e-05, "loss": 0.6783, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1007 }, { "epoch": 0.33861545163948625, "grad_norm": 0.2162712961435318, "learning_rate": 1.9785053796479476e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1008 }, { "epoch": 0.33895137966690636, "grad_norm": 0.22306868433952332, "learning_rate": 1.9784289586253315e-05, "loss": 0.6739, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1009 }, { "epoch": 0.33928730769432647, "grad_norm": 0.1895294487476349, "learning_rate": 1.978352403472894e-05, "loss": 0.7015, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1010 }, { "epoch": 0.33962323572174663, "grad_norm": 0.18622514605522156, "learning_rate": 1.9782757142011303e-05, "loss": 0.7158, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1011 }, { "epoch": 0.33995916374916674, "grad_norm": 0.21714970469474792, "learning_rate": 1.9781988908205533e-05, "loss": 0.7082, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1012 }, { "epoch": 0.34029509177658684, "grad_norm": 0.24277912080287933, "learning_rate": 1.9781219333416947e-05, "loss": 0.706, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1013 }, { "epoch": 0.340631019804007, "grad_norm": 0.19508770108222961, "learning_rate": 1.978044841775104e-05, "loss": 0.7067, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1014 }, { "epoch": 0.3409669478314271, "grad_norm": 0.2031334638595581, "learning_rate": 1.9779676161313495e-05, "loss": 0.6897, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1015 }, { "epoch": 0.3413028758588472, "grad_norm": 0.22519442439079285, "learning_rate": 1.9778902564210184e-05, "loss": 0.7063, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1016 }, { "epoch": 0.3416388038862674, "grad_norm": 0.19350269436836243, "learning_rate": 1.977812762654715e-05, "loss": 0.6858, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1017 }, { "epoch": 0.3419747319136875, "grad_norm": 0.19575759768486023, "learning_rate": 1.9777351348430632e-05, "loss": 0.7156, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1018 }, { "epoch": 0.3423106599411076, "grad_norm": 0.2445569634437561, "learning_rate": 1.9776573729967044e-05, "loss": 0.7093, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1019 }, { "epoch": 0.34264658796852776, "grad_norm": 0.21470047533512115, "learning_rate": 1.977579477126299e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1020 }, { "epoch": 0.34298251599594787, "grad_norm": 0.174539253115654, "learning_rate": 1.977501447242525e-05, "loss": 0.7218, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1021 }, { "epoch": 0.343318444023368, "grad_norm": 0.2377941608428955, "learning_rate": 1.97742328335608e-05, "loss": 0.6936, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1022 }, { "epoch": 0.34365437205078814, "grad_norm": 0.24724538624286652, "learning_rate": 1.9773449854776783e-05, "loss": 0.7042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1023 }, { "epoch": 0.34399030007820824, "grad_norm": 0.21222631633281708, "learning_rate": 1.9772665536180545e-05, "loss": 0.6959, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1024 }, { "epoch": 0.34432622810562835, "grad_norm": 0.20252744853496552, "learning_rate": 1.9771879877879602e-05, "loss": 0.6838, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1025 }, { "epoch": 0.3446621561330485, "grad_norm": 0.20828157663345337, "learning_rate": 1.977109287998166e-05, "loss": 0.6919, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1026 }, { "epoch": 0.3449980841604686, "grad_norm": 0.22710876166820526, "learning_rate": 1.9770304542594602e-05, "loss": 0.6931, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1027 }, { "epoch": 0.34533401218788873, "grad_norm": 0.19955196976661682, "learning_rate": 1.9769514865826497e-05, "loss": 0.7051, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1028 }, { "epoch": 0.3456699402153089, "grad_norm": 0.21717113256454468, "learning_rate": 1.9768723849785603e-05, "loss": 0.6701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1029 }, { "epoch": 0.346005868242729, "grad_norm": 0.22128178179264069, "learning_rate": 1.976793149458036e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1030 }, { "epoch": 0.3463417962701491, "grad_norm": 0.2145567089319229, "learning_rate": 1.9767137800319386e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1031 }, { "epoch": 0.34667772429756927, "grad_norm": 0.2353924661874771, "learning_rate": 1.9766342767111485e-05, "loss": 0.7064, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1032 }, { "epoch": 0.3470136523249894, "grad_norm": 0.2450014054775238, "learning_rate": 1.9765546395065648e-05, "loss": 0.6956, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1033 }, { "epoch": 0.3473495803524095, "grad_norm": 0.2043733149766922, "learning_rate": 1.976474868429105e-05, "loss": 0.7098, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1034 }, { "epoch": 0.34768550837982964, "grad_norm": 0.20504653453826904, "learning_rate": 1.9763949634897042e-05, "loss": 0.6994, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1035 }, { "epoch": 0.34802143640724975, "grad_norm": 0.19594494998455048, "learning_rate": 1.9763149246993162e-05, "loss": 0.6867, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 1036 }, { "epoch": 0.34835736443466986, "grad_norm": 0.2059994637966156, "learning_rate": 1.976234752068914e-05, "loss": 0.6718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1037 }, { "epoch": 0.34869329246209, "grad_norm": 0.2508191466331482, "learning_rate": 1.976154445609488e-05, "loss": 0.6975, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1038 }, { "epoch": 0.34902922048951013, "grad_norm": 0.19884321093559265, "learning_rate": 1.9760740053320465e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1039 }, { "epoch": 0.34936514851693024, "grad_norm": 0.24857667088508606, "learning_rate": 1.9759934312476173e-05, "loss": 0.6924, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1040 }, { "epoch": 0.3497010765443504, "grad_norm": 0.21630264818668365, "learning_rate": 1.9759127233672462e-05, "loss": 0.7019, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1041 }, { "epoch": 0.3500370045717705, "grad_norm": 0.22203056514263153, "learning_rate": 1.9758318817019967e-05, "loss": 0.7021, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1042 }, { "epoch": 0.3503729325991906, "grad_norm": 0.270611047744751, "learning_rate": 1.9757509062629518e-05, "loss": 0.6818, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1043 }, { "epoch": 0.3507088606266108, "grad_norm": 0.21452705562114716, "learning_rate": 1.975669797061212e-05, "loss": 0.6821, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1044 }, { "epoch": 0.3510447886540309, "grad_norm": 0.2517752945423126, "learning_rate": 1.9755885541078955e-05, "loss": 0.7165, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1045 }, { "epoch": 0.351380716681451, "grad_norm": 0.2262014001607895, "learning_rate": 1.9755071774141406e-05, "loss": 0.7258, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1046 }, { "epoch": 0.35171664470887115, "grad_norm": 0.1925652176141739, "learning_rate": 1.9754256669911024e-05, "loss": 0.688, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1047 }, { "epoch": 0.35205257273629126, "grad_norm": 0.2051168531179428, "learning_rate": 1.9753440228499557e-05, "loss": 0.7041, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1048 }, { "epoch": 0.35238850076371137, "grad_norm": 0.1845807135105133, "learning_rate": 1.975262245001892e-05, "loss": 0.6953, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1049 }, { "epoch": 0.3527244287911315, "grad_norm": 0.2358422577381134, "learning_rate": 1.9751803334581224e-05, "loss": 0.7131, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1050 }, { "epoch": 0.35306035681855163, "grad_norm": 0.2397981435060501, "learning_rate": 1.9750982882298756e-05, "loss": 0.7092, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1051 }, { "epoch": 0.35339628484597174, "grad_norm": 0.2073984444141388, "learning_rate": 1.9750161093283992e-05, "loss": 0.7109, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1052 }, { "epoch": 0.35373221287339185, "grad_norm": 0.21556656062602997, "learning_rate": 1.974933796764959e-05, "loss": 0.6715, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1053 }, { "epoch": 0.354068140900812, "grad_norm": 0.2138376533985138, "learning_rate": 1.9748513505508383e-05, "loss": 0.6765, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1054 }, { "epoch": 0.3544040689282321, "grad_norm": 0.19851158559322357, "learning_rate": 1.9747687706973405e-05, "loss": 0.6997, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1055 }, { "epoch": 0.3547399969556522, "grad_norm": 0.20968717336654663, "learning_rate": 1.974686057215785e-05, "loss": 0.7101, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1056 }, { "epoch": 0.3550759249830724, "grad_norm": 0.2216339260339737, "learning_rate": 1.9746032101175115e-05, "loss": 0.7161, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1057 }, { "epoch": 0.3554118530104925, "grad_norm": 0.20920734107494354, "learning_rate": 1.974520229413877e-05, "loss": 0.6845, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1058 }, { "epoch": 0.3557477810379126, "grad_norm": 0.19664138555526733, "learning_rate": 1.974437115116257e-05, "loss": 0.7005, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1059 }, { "epoch": 0.35608370906533277, "grad_norm": 0.22012348473072052, "learning_rate": 1.974353867236046e-05, "loss": 0.7326, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1060 }, { "epoch": 0.3564196370927529, "grad_norm": 0.17379169166088104, "learning_rate": 1.9742704857846554e-05, "loss": 0.7101, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1061 }, { "epoch": 0.356755565120173, "grad_norm": 0.209682896733284, "learning_rate": 1.974186970773516e-05, "loss": 0.7123, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1062 }, { "epoch": 0.35709149314759314, "grad_norm": 0.20476800203323364, "learning_rate": 1.974103322214077e-05, "loss": 0.7026, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1063 }, { "epoch": 0.35742742117501325, "grad_norm": 0.2220449596643448, "learning_rate": 1.9740195401178047e-05, "loss": 0.7191, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1064 }, { "epoch": 0.35776334920243336, "grad_norm": 0.1869790405035019, "learning_rate": 1.9739356244961853e-05, "loss": 0.7082, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1065 }, { "epoch": 0.3580992772298535, "grad_norm": 0.20361874997615814, "learning_rate": 1.9738515753607217e-05, "loss": 0.71, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1066 }, { "epoch": 0.3584352052572736, "grad_norm": 0.23827201128005981, "learning_rate": 1.9737673927229368e-05, "loss": 0.7129, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1067 }, { "epoch": 0.35877113328469373, "grad_norm": 0.22870229184627533, "learning_rate": 1.9736830765943708e-05, "loss": 0.6969, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1068 }, { "epoch": 0.3591070613121139, "grad_norm": 0.22680653631687164, "learning_rate": 1.973598626986582e-05, "loss": 0.7189, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1069 }, { "epoch": 0.359442989339534, "grad_norm": 0.2034851461648941, "learning_rate": 1.973514043911147e-05, "loss": 0.7013, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1070 }, { "epoch": 0.3597789173669541, "grad_norm": 0.24098221957683563, "learning_rate": 1.973429327379662e-05, "loss": 0.7069, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1071 }, { "epoch": 0.36011484539437427, "grad_norm": 0.2837368845939636, "learning_rate": 1.9733444774037403e-05, "loss": 0.6818, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1072 }, { "epoch": 0.3604507734217944, "grad_norm": 0.27142760157585144, "learning_rate": 1.9732594939950132e-05, "loss": 0.7017, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1073 }, { "epoch": 0.3607867014492145, "grad_norm": 0.18347255885601044, "learning_rate": 1.9731743771651308e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1074 }, { "epoch": 0.36112262947663465, "grad_norm": 0.24700132012367249, "learning_rate": 1.9730891269257618e-05, "loss": 0.6785, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1075 }, { "epoch": 0.36145855750405476, "grad_norm": 0.24800372123718262, "learning_rate": 1.9730037432885932e-05, "loss": 0.6965, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1076 }, { "epoch": 0.36179448553147486, "grad_norm": 0.19282451272010803, "learning_rate": 1.9729182262653297e-05, "loss": 0.6915, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1077 }, { "epoch": 0.362130413558895, "grad_norm": 0.20536118745803833, "learning_rate": 1.9728325758676942e-05, "loss": 0.6684, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1078 }, { "epoch": 0.36246634158631513, "grad_norm": 0.21619465947151184, "learning_rate": 1.972746792107429e-05, "loss": 0.6985, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1079 }, { "epoch": 0.36280226961373524, "grad_norm": 0.22421997785568237, "learning_rate": 1.972660874996293e-05, "loss": 0.7058, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1080 }, { "epoch": 0.3631381976411554, "grad_norm": 0.2543918788433075, "learning_rate": 1.9725748245460652e-05, "loss": 0.7086, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1081 }, { "epoch": 0.3634741256685755, "grad_norm": 0.17841540277004242, "learning_rate": 1.9724886407685414e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1082 }, { "epoch": 0.3638100536959956, "grad_norm": 0.2042849361896515, "learning_rate": 1.9724023236755364e-05, "loss": 0.6949, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1083 }, { "epoch": 0.3641459817234158, "grad_norm": 0.21780085563659668, "learning_rate": 1.9723158732788836e-05, "loss": 0.7211, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1084 }, { "epoch": 0.3644819097508359, "grad_norm": 0.20779995620250702, "learning_rate": 1.9722292895904337e-05, "loss": 0.7189, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1085 }, { "epoch": 0.364817837778256, "grad_norm": 0.17519085109233856, "learning_rate": 1.972142572622056e-05, "loss": 0.708, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1086 }, { "epoch": 0.36515376580567616, "grad_norm": 0.20945300161838531, "learning_rate": 1.972055722385639e-05, "loss": 0.71, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1087 }, { "epoch": 0.36548969383309626, "grad_norm": 0.20755285024642944, "learning_rate": 1.9719687388930883e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1088 }, { "epoch": 0.36582562186051637, "grad_norm": 0.21213299036026, "learning_rate": 1.971881622156328e-05, "loss": 0.69, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1089 }, { "epoch": 0.36616154988793653, "grad_norm": 0.20332206785678864, "learning_rate": 1.9717943721873008e-05, "loss": 0.6721, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1090 }, { "epoch": 0.36649747791535664, "grad_norm": 0.24471624195575714, "learning_rate": 1.971706988997968e-05, "loss": 0.692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1091 }, { "epoch": 0.36683340594277675, "grad_norm": 0.22686660289764404, "learning_rate": 1.971619472600308e-05, "loss": 0.697, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1092 }, { "epoch": 0.3671693339701969, "grad_norm": 0.20892827212810516, "learning_rate": 1.9715318230063185e-05, "loss": 0.7131, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1093 }, { "epoch": 0.367505261997617, "grad_norm": 0.2358163297176361, "learning_rate": 1.9714440402280153e-05, "loss": 0.6957, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.91, "memory/max_allocated (GiB)": 63.91, "step": 1094 }, { "epoch": 0.3678411900250371, "grad_norm": 0.24343250691890717, "learning_rate": 1.971356124277432e-05, "loss": 0.6926, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1095 }, { "epoch": 0.3681771180524573, "grad_norm": 0.20389246940612793, "learning_rate": 1.971268075166621e-05, "loss": 0.6739, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1096 }, { "epoch": 0.3685130460798774, "grad_norm": 0.2217426598072052, "learning_rate": 1.971179892907652e-05, "loss": 0.696, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1097 }, { "epoch": 0.3688489741072975, "grad_norm": 0.1834961473941803, "learning_rate": 1.9710915775126144e-05, "loss": 0.698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1098 }, { "epoch": 0.36918490213471766, "grad_norm": 0.20995289087295532, "learning_rate": 1.9710031289936146e-05, "loss": 0.7189, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1099 }, { "epoch": 0.36952083016213777, "grad_norm": 0.23321516811847687, "learning_rate": 1.970914547362778e-05, "loss": 0.7086, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1100 }, { "epoch": 0.3698567581895579, "grad_norm": 0.20416924357414246, "learning_rate": 1.970825832632248e-05, "loss": 0.685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1101 }, { "epoch": 0.37019268621697804, "grad_norm": 0.22139036655426025, "learning_rate": 1.970736984814186e-05, "loss": 0.6839, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1102 }, { "epoch": 0.37052861424439815, "grad_norm": 0.22204357385635376, "learning_rate": 1.9706480039207722e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1103 }, { "epoch": 0.37086454227181825, "grad_norm": 0.19906076788902283, "learning_rate": 1.9705588899642045e-05, "loss": 0.7154, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1104 }, { "epoch": 0.37120047029923836, "grad_norm": 0.19306680560112, "learning_rate": 1.9704696429566993e-05, "loss": 0.6886, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1105 }, { "epoch": 0.3715363983266585, "grad_norm": 0.19758370518684387, "learning_rate": 1.970380262910491e-05, "loss": 0.6915, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1106 }, { "epoch": 0.37187232635407863, "grad_norm": 0.1952657550573349, "learning_rate": 1.9702907498378326e-05, "loss": 0.6797, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1107 }, { "epoch": 0.37220825438149874, "grad_norm": 0.18436743319034576, "learning_rate": 1.9702011037509955e-05, "loss": 0.7189, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1108 }, { "epoch": 0.3725441824089189, "grad_norm": 0.18979348242282867, "learning_rate": 1.9701113246622685e-05, "loss": 0.6748, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1109 }, { "epoch": 0.372880110436339, "grad_norm": 0.20334599912166595, "learning_rate": 1.9700214125839595e-05, "loss": 0.6992, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1110 }, { "epoch": 0.3732160384637591, "grad_norm": 0.20938101410865784, "learning_rate": 1.969931367528394e-05, "loss": 0.7033, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1111 }, { "epoch": 0.3735519664911793, "grad_norm": 0.17872728407382965, "learning_rate": 1.9698411895079158e-05, "loss": 0.6951, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1112 }, { "epoch": 0.3738878945185994, "grad_norm": 0.19739395380020142, "learning_rate": 1.969750878534888e-05, "loss": 0.7075, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1113 }, { "epoch": 0.3742238225460195, "grad_norm": 0.22137324512004852, "learning_rate": 1.9696604346216902e-05, "loss": 0.7055, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1114 }, { "epoch": 0.37455975057343965, "grad_norm": 0.23762616515159607, "learning_rate": 1.9695698577807214e-05, "loss": 0.6951, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1115 }, { "epoch": 0.37489567860085976, "grad_norm": 0.2351841777563095, "learning_rate": 1.9694791480243983e-05, "loss": 0.702, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1116 }, { "epoch": 0.37523160662827987, "grad_norm": 0.25578582286834717, "learning_rate": 1.9693883053651566e-05, "loss": 0.6893, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1117 }, { "epoch": 0.37556753465570003, "grad_norm": 0.237987220287323, "learning_rate": 1.9692973298154488e-05, "loss": 0.7022, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1118 }, { "epoch": 0.37590346268312014, "grad_norm": 0.2328849881887436, "learning_rate": 1.9692062213877473e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1119 }, { "epoch": 0.37623939071054024, "grad_norm": 0.1859012395143509, "learning_rate": 1.9691149800945416e-05, "loss": 0.7138, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1120 }, { "epoch": 0.3765753187379604, "grad_norm": 0.20735210180282593, "learning_rate": 1.9690236059483392e-05, "loss": 0.684, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1121 }, { "epoch": 0.3769112467653805, "grad_norm": 0.23790688812732697, "learning_rate": 1.9689320989616666e-05, "loss": 0.6905, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1122 }, { "epoch": 0.3772471747928006, "grad_norm": 0.23762153089046478, "learning_rate": 1.9688404591470683e-05, "loss": 0.7112, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1123 }, { "epoch": 0.3775831028202208, "grad_norm": 0.1873386651277542, "learning_rate": 1.968748686517107e-05, "loss": 0.7128, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1124 }, { "epoch": 0.3779190308476409, "grad_norm": 0.24788321554660797, "learning_rate": 1.9686567810843633e-05, "loss": 0.7038, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1125 }, { "epoch": 0.378254958875061, "grad_norm": 0.26858991384506226, "learning_rate": 1.9685647428614367e-05, "loss": 0.7129, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1126 }, { "epoch": 0.37859088690248116, "grad_norm": 0.2397158294916153, "learning_rate": 1.968472571860944e-05, "loss": 0.719, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1127 }, { "epoch": 0.37892681492990127, "grad_norm": 0.2509825527667999, "learning_rate": 1.9683802680955206e-05, "loss": 0.7223, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1128 }, { "epoch": 0.3792627429573214, "grad_norm": 0.2194094955921173, "learning_rate": 1.96828783157782e-05, "loss": 0.6965, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1129 }, { "epoch": 0.37959867098474154, "grad_norm": 0.23861511051654816, "learning_rate": 1.9681952623205146e-05, "loss": 0.6866, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1130 }, { "epoch": 0.37993459901216164, "grad_norm": 0.21188931167125702, "learning_rate": 1.968102560336294e-05, "loss": 0.6907, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1131 }, { "epoch": 0.38027052703958175, "grad_norm": 0.20271416008472443, "learning_rate": 1.968009725637867e-05, "loss": 0.7049, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1132 }, { "epoch": 0.3806064550670019, "grad_norm": 0.2373126596212387, "learning_rate": 1.967916758237959e-05, "loss": 0.7053, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1133 }, { "epoch": 0.380942383094422, "grad_norm": 0.186924546957016, "learning_rate": 1.9678236581493156e-05, "loss": 0.6836, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1134 }, { "epoch": 0.38127831112184213, "grad_norm": 0.22696498036384583, "learning_rate": 1.9677304253846992e-05, "loss": 0.7006, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1135 }, { "epoch": 0.3816142391492623, "grad_norm": 0.22512127459049225, "learning_rate": 1.9676370599568905e-05, "loss": 0.6975, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1136 }, { "epoch": 0.3819501671766824, "grad_norm": 0.20869943499565125, "learning_rate": 1.9675435618786893e-05, "loss": 0.7027, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1137 }, { "epoch": 0.3822860952041025, "grad_norm": 0.22593830525875092, "learning_rate": 1.9674499311629124e-05, "loss": 0.6974, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1138 }, { "epoch": 0.38262202323152267, "grad_norm": 0.2566017508506775, "learning_rate": 1.967356167822396e-05, "loss": 0.7273, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1139 }, { "epoch": 0.3829579512589428, "grad_norm": 0.24297082424163818, "learning_rate": 1.9672622718699933e-05, "loss": 0.7034, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1140 }, { "epoch": 0.3832938792863629, "grad_norm": 0.2506338953971863, "learning_rate": 1.9671682433185762e-05, "loss": 0.6876, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1141 }, { "epoch": 0.38362980731378304, "grad_norm": 0.25198444724082947, "learning_rate": 1.967074082181035e-05, "loss": 0.7076, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1142 }, { "epoch": 0.38396573534120315, "grad_norm": 0.2753666043281555, "learning_rate": 1.966979788470278e-05, "loss": 0.7201, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1143 }, { "epoch": 0.38430166336862326, "grad_norm": 0.19870725274085999, "learning_rate": 1.966885362199231e-05, "loss": 0.672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1144 }, { "epoch": 0.3846375913960434, "grad_norm": 0.24382136762142181, "learning_rate": 1.96679080338084e-05, "loss": 0.7085, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1145 }, { "epoch": 0.38497351942346353, "grad_norm": 0.22534064948558807, "learning_rate": 1.9666961120280664e-05, "loss": 0.6829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1146 }, { "epoch": 0.38530944745088364, "grad_norm": 0.23273350298404694, "learning_rate": 1.9666012881538915e-05, "loss": 0.728, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1147 }, { "epoch": 0.3856453754783038, "grad_norm": 0.223614901304245, "learning_rate": 1.966506331771315e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1148 }, { "epoch": 0.3859813035057239, "grad_norm": 0.23338815569877625, "learning_rate": 1.9664112428933534e-05, "loss": 0.7248, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1149 }, { "epoch": 0.386317231533144, "grad_norm": 0.20041580498218536, "learning_rate": 1.9663160215330424e-05, "loss": 0.7099, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1150 }, { "epoch": 0.3866531595605642, "grad_norm": 0.21498240530490875, "learning_rate": 1.966220667703436e-05, "loss": 0.7155, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1151 }, { "epoch": 0.3869890875879843, "grad_norm": 0.20108960568904877, "learning_rate": 1.9661251814176055e-05, "loss": 0.7072, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1152 }, { "epoch": 0.3873250156154044, "grad_norm": 0.22014199197292328, "learning_rate": 1.966029562688641e-05, "loss": 0.6784, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1153 }, { "epoch": 0.38766094364282455, "grad_norm": 0.18663105368614197, "learning_rate": 1.9659338115296506e-05, "loss": 0.6836, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1154 }, { "epoch": 0.38799687167024466, "grad_norm": 0.20290139317512512, "learning_rate": 1.9658379279537604e-05, "loss": 0.705, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1155 }, { "epoch": 0.38833279969766477, "grad_norm": 0.20251742005348206, "learning_rate": 1.965741911974115e-05, "loss": 0.698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1156 }, { "epoch": 0.38866872772508493, "grad_norm": 0.1958988606929779, "learning_rate": 1.9656457636038768e-05, "loss": 0.6885, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1157 }, { "epoch": 0.38900465575250504, "grad_norm": 0.20581062138080597, "learning_rate": 1.9655494828562264e-05, "loss": 0.6718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1158 }, { "epoch": 0.38934058377992514, "grad_norm": 0.17878402769565582, "learning_rate": 1.9654530697443627e-05, "loss": 0.6938, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1159 }, { "epoch": 0.38967651180734525, "grad_norm": 0.18379399180412292, "learning_rate": 1.9653565242815027e-05, "loss": 0.6851, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1160 }, { "epoch": 0.3900124398347654, "grad_norm": 0.20466750860214233, "learning_rate": 1.9652598464808815e-05, "loss": 0.6926, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1161 }, { "epoch": 0.3903483678621855, "grad_norm": 0.23170116543769836, "learning_rate": 1.9651630363557527e-05, "loss": 0.6967, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1162 }, { "epoch": 0.3906842958896056, "grad_norm": 0.23289258778095245, "learning_rate": 1.965066093919387e-05, "loss": 0.7017, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1163 }, { "epoch": 0.3910202239170258, "grad_norm": 0.2803998589515686, "learning_rate": 1.9649690191850745e-05, "loss": 0.6919, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1164 }, { "epoch": 0.3913561519444459, "grad_norm": 0.26042699813842773, "learning_rate": 1.964871812166123e-05, "loss": 0.7042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1165 }, { "epoch": 0.391692079971866, "grad_norm": 0.23340445756912231, "learning_rate": 1.9647744728758574e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1166 }, { "epoch": 0.39202800799928617, "grad_norm": 0.26349112391471863, "learning_rate": 1.9646770013276226e-05, "loss": 0.6907, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1167 }, { "epoch": 0.3923639360267063, "grad_norm": 0.20064353942871094, "learning_rate": 1.9645793975347805e-05, "loss": 0.7136, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1168 }, { "epoch": 0.3926998640541264, "grad_norm": 0.2537063956260681, "learning_rate": 1.964481661510711e-05, "loss": 0.7037, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1169 }, { "epoch": 0.39303579208154654, "grad_norm": 0.22620949149131775, "learning_rate": 1.9643837932688125e-05, "loss": 0.6931, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1170 }, { "epoch": 0.39337172010896665, "grad_norm": 0.18600347638130188, "learning_rate": 1.9642857928225016e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.33, "memory/max_allocated (GiB)": 64.33, "step": 1171 }, { "epoch": 0.39370764813638676, "grad_norm": 0.249084934592247, "learning_rate": 1.964187660185213e-05, "loss": 0.694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1172 }, { "epoch": 0.3940435761638069, "grad_norm": 0.198327898979187, "learning_rate": 1.964089395370399e-05, "loss": 0.7069, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1173 }, { "epoch": 0.394379504191227, "grad_norm": 0.24478067457675934, "learning_rate": 1.9639909983915304e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1174 }, { "epoch": 0.39471543221864713, "grad_norm": 0.24369961023330688, "learning_rate": 1.9638924692620967e-05, "loss": 0.704, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1175 }, { "epoch": 0.3950513602460673, "grad_norm": 0.21254433691501617, "learning_rate": 1.9637938079956045e-05, "loss": 0.708, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1176 }, { "epoch": 0.3953872882734874, "grad_norm": 0.23249691724777222, "learning_rate": 1.963695014605579e-05, "loss": 0.6914, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1177 }, { "epoch": 0.3957232163009075, "grad_norm": 0.20742124319076538, "learning_rate": 1.963596089105564e-05, "loss": 0.677, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1178 }, { "epoch": 0.3960591443283277, "grad_norm": 0.2570185959339142, "learning_rate": 1.9634970315091203e-05, "loss": 0.7301, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1179 }, { "epoch": 0.3963950723557478, "grad_norm": 0.20579330623149872, "learning_rate": 1.9633978418298275e-05, "loss": 0.7089, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1180 }, { "epoch": 0.3967310003831679, "grad_norm": 0.21698375046253204, "learning_rate": 1.9632985200812832e-05, "loss": 0.7012, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1181 }, { "epoch": 0.39706692841058805, "grad_norm": 0.27267947793006897, "learning_rate": 1.963199066277103e-05, "loss": 0.7019, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1182 }, { "epoch": 0.39740285643800816, "grad_norm": 0.23824501037597656, "learning_rate": 1.9630994804309214e-05, "loss": 0.69, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1183 }, { "epoch": 0.39773878446542826, "grad_norm": 0.24528904259204865, "learning_rate": 1.9629997625563896e-05, "loss": 0.712, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1184 }, { "epoch": 0.3980747124928484, "grad_norm": 0.24642422795295715, "learning_rate": 1.9628999126671777e-05, "loss": 0.6751, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1185 }, { "epoch": 0.39841064052026853, "grad_norm": 0.20438744127750397, "learning_rate": 1.9627999307769743e-05, "loss": 0.6921, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1186 }, { "epoch": 0.39874656854768864, "grad_norm": 0.2527939975261688, "learning_rate": 1.962699816899485e-05, "loss": 0.7235, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1187 }, { "epoch": 0.3990824965751088, "grad_norm": 0.26445651054382324, "learning_rate": 1.9625995710484346e-05, "loss": 0.6977, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1188 }, { "epoch": 0.3994184246025289, "grad_norm": 0.2692646384239197, "learning_rate": 1.962499193237565e-05, "loss": 0.7079, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1189 }, { "epoch": 0.399754352629949, "grad_norm": 0.1968051940202713, "learning_rate": 1.9623986834806374e-05, "loss": 0.6897, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1190 }, { "epoch": 0.4000902806573692, "grad_norm": 0.22019387781620026, "learning_rate": 1.9622980417914296e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1191 }, { "epoch": 0.4004262086847893, "grad_norm": 0.26850125193595886, "learning_rate": 1.962197268183739e-05, "loss": 0.7172, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1192 }, { "epoch": 0.4007621367122094, "grad_norm": 0.19982381165027618, "learning_rate": 1.9620963626713792e-05, "loss": 0.6953, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1193 }, { "epoch": 0.40109806473962956, "grad_norm": 0.2585846483707428, "learning_rate": 1.9619953252681844e-05, "loss": 0.6988, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1194 }, { "epoch": 0.40143399276704966, "grad_norm": 0.24434837698936462, "learning_rate": 1.9618941559880045e-05, "loss": 0.69, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1195 }, { "epoch": 0.40176992079446977, "grad_norm": 0.2150118052959442, "learning_rate": 1.9617928548447088e-05, "loss": 0.7165, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1196 }, { "epoch": 0.40210584882188993, "grad_norm": 0.26999324560165405, "learning_rate": 1.961691421852185e-05, "loss": 0.7049, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1197 }, { "epoch": 0.40244177684931004, "grad_norm": 0.28004488348960876, "learning_rate": 1.961589857024337e-05, "loss": 0.6929, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1198 }, { "epoch": 0.40277770487673015, "grad_norm": 0.23913702368736267, "learning_rate": 1.961488160375089e-05, "loss": 0.6797, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1199 }, { "epoch": 0.4031136329041503, "grad_norm": 0.22319504618644714, "learning_rate": 1.9613863319183818e-05, "loss": 0.7033, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1200 }, { "epoch": 0.4034495609315704, "grad_norm": 0.2681514322757721, "learning_rate": 1.9612843716681753e-05, "loss": 0.6886, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1201 }, { "epoch": 0.4037854889589905, "grad_norm": 0.1858861744403839, "learning_rate": 1.9611822796384464e-05, "loss": 0.675, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1202 }, { "epoch": 0.4041214169864107, "grad_norm": 0.23796413838863373, "learning_rate": 1.9610800558431904e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1203 }, { "epoch": 0.4044573450138308, "grad_norm": 0.22899137437343597, "learning_rate": 1.9609777002964212e-05, "loss": 0.7194, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1204 }, { "epoch": 0.4047932730412509, "grad_norm": 0.17599482834339142, "learning_rate": 1.9608752130121706e-05, "loss": 0.6754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 1205 }, { "epoch": 0.40512920106867106, "grad_norm": 0.2124035805463791, "learning_rate": 1.960772594004488e-05, "loss": 0.7043, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1206 }, { "epoch": 0.40546512909609117, "grad_norm": 0.2124132215976715, "learning_rate": 1.9606698432874407e-05, "loss": 0.7133, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1207 }, { "epoch": 0.4058010571235113, "grad_norm": 0.26117992401123047, "learning_rate": 1.960566960875115e-05, "loss": 0.7092, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1208 }, { "epoch": 0.40613698515093144, "grad_norm": 0.1942119151353836, "learning_rate": 1.9604639467816153e-05, "loss": 0.6894, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1209 }, { "epoch": 0.40647291317835155, "grad_norm": 0.18946926295757294, "learning_rate": 1.9603608010210625e-05, "loss": 0.697, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1210 }, { "epoch": 0.40680884120577165, "grad_norm": 0.2215575873851776, "learning_rate": 1.960257523607597e-05, "loss": 0.6751, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1211 }, { "epoch": 0.40714476923319176, "grad_norm": 0.2007640153169632, "learning_rate": 1.9601541145553763e-05, "loss": 0.7067, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1212 }, { "epoch": 0.4074806972606119, "grad_norm": 0.22950848937034607, "learning_rate": 1.9600505738785772e-05, "loss": 0.7038, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1213 }, { "epoch": 0.40781662528803203, "grad_norm": 0.22855666279792786, "learning_rate": 1.959946901591393e-05, "loss": 0.7008, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1214 }, { "epoch": 0.40815255331545214, "grad_norm": 0.20112906396389008, "learning_rate": 1.9598430977080363e-05, "loss": 0.7051, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1215 }, { "epoch": 0.4084884813428723, "grad_norm": 0.18133755028247833, "learning_rate": 1.9597391622427374e-05, "loss": 0.7141, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1216 }, { "epoch": 0.4088244093702924, "grad_norm": 0.2116776704788208, "learning_rate": 1.9596350952097438e-05, "loss": 0.6837, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1217 }, { "epoch": 0.4091603373977125, "grad_norm": 0.2306061089038849, "learning_rate": 1.9595308966233225e-05, "loss": 0.7066, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1218 }, { "epoch": 0.4094962654251327, "grad_norm": 0.22522464394569397, "learning_rate": 1.9594265664977575e-05, "loss": 0.6977, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1219 }, { "epoch": 0.4098321934525528, "grad_norm": 0.184628427028656, "learning_rate": 1.959322104847351e-05, "loss": 0.6864, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1220 }, { "epoch": 0.4101681214799729, "grad_norm": 0.1929190754890442, "learning_rate": 1.9592175116864228e-05, "loss": 0.6863, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1221 }, { "epoch": 0.41050404950739305, "grad_norm": 0.18393518030643463, "learning_rate": 1.9591127870293123e-05, "loss": 0.7143, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1222 }, { "epoch": 0.41083997753481316, "grad_norm": 0.18890589475631714, "learning_rate": 1.9590079308903754e-05, "loss": 0.6994, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1223 }, { "epoch": 0.41117590556223327, "grad_norm": 0.2214229702949524, "learning_rate": 1.9589029432839865e-05, "loss": 0.7001, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1224 }, { "epoch": 0.41151183358965343, "grad_norm": 0.2229766845703125, "learning_rate": 1.9587978242245378e-05, "loss": 0.7109, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1225 }, { "epoch": 0.41184776161707354, "grad_norm": 0.17016974091529846, "learning_rate": 1.9586925737264398e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1226 }, { "epoch": 0.41218368964449364, "grad_norm": 0.22504094243049622, "learning_rate": 1.9585871918041216e-05, "loss": 0.713, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1227 }, { "epoch": 0.4125196176719138, "grad_norm": 0.2176039069890976, "learning_rate": 1.9584816784720294e-05, "loss": 0.6862, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1228 }, { "epoch": 0.4128555456993339, "grad_norm": 0.20431897044181824, "learning_rate": 1.9583760337446272e-05, "loss": 0.6828, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1229 }, { "epoch": 0.413191473726754, "grad_norm": 0.21481989324092865, "learning_rate": 1.9582702576363976e-05, "loss": 0.709, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1230 }, { "epoch": 0.4135274017541742, "grad_norm": 0.17403313517570496, "learning_rate": 1.9581643501618414e-05, "loss": 0.7113, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1231 }, { "epoch": 0.4138633297815943, "grad_norm": 0.2443041205406189, "learning_rate": 1.9580583113354774e-05, "loss": 0.7286, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1232 }, { "epoch": 0.4141992578090144, "grad_norm": 0.22420264780521393, "learning_rate": 1.9579521411718415e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1233 }, { "epoch": 0.41453518583643456, "grad_norm": 0.20615363121032715, "learning_rate": 1.9578458396854893e-05, "loss": 0.6911, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1234 }, { "epoch": 0.41487111386385467, "grad_norm": 0.21927350759506226, "learning_rate": 1.957739406890992e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1235 }, { "epoch": 0.4152070418912748, "grad_norm": 0.19368137419223785, "learning_rate": 1.957632842802941e-05, "loss": 0.7038, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1236 }, { "epoch": 0.41554296991869494, "grad_norm": 0.22061222791671753, "learning_rate": 1.9575261474359442e-05, "loss": 0.7053, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1237 }, { "epoch": 0.41587889794611504, "grad_norm": 0.20413507521152496, "learning_rate": 1.9574193208046285e-05, "loss": 0.6934, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1238 }, { "epoch": 0.41621482597353515, "grad_norm": 0.21308071911334991, "learning_rate": 1.957312362923639e-05, "loss": 0.6725, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1239 }, { "epoch": 0.4165507540009553, "grad_norm": 0.20272590219974518, "learning_rate": 1.9572052738076372e-05, "loss": 0.699, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1240 }, { "epoch": 0.4168866820283754, "grad_norm": 0.18768027424812317, "learning_rate": 1.9570980534713043e-05, "loss": 0.7017, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1241 }, { "epoch": 0.41722261005579553, "grad_norm": 0.20826032757759094, "learning_rate": 1.9569907019293387e-05, "loss": 0.7064, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1242 }, { "epoch": 0.4175585380832157, "grad_norm": 0.16821108758449554, "learning_rate": 1.956883219196457e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1243 }, { "epoch": 0.4178944661106358, "grad_norm": 0.20395101606845856, "learning_rate": 1.9567756052873933e-05, "loss": 0.6947, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1244 }, { "epoch": 0.4182303941380559, "grad_norm": 0.23204350471496582, "learning_rate": 1.9566678602169e-05, "loss": 0.6685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1245 }, { "epoch": 0.41856632216547607, "grad_norm": 0.1806616634130478, "learning_rate": 1.956559983999748e-05, "loss": 0.6841, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1246 }, { "epoch": 0.4189022501928962, "grad_norm": 0.1954452097415924, "learning_rate": 1.9564519766507257e-05, "loss": 0.6889, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1247 }, { "epoch": 0.4192381782203163, "grad_norm": 0.2089233547449112, "learning_rate": 1.956343838184639e-05, "loss": 0.6893, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1248 }, { "epoch": 0.41957410624773644, "grad_norm": 0.2287808358669281, "learning_rate": 1.956235568616313e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1249 }, { "epoch": 0.41991003427515655, "grad_norm": 0.21085773408412933, "learning_rate": 1.956127167960589e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1250 }, { "epoch": 0.42024596230257666, "grad_norm": 0.277823269367218, "learning_rate": 1.9560186362323284e-05, "loss": 0.6932, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1251 }, { "epoch": 0.4205818903299968, "grad_norm": 0.1956104040145874, "learning_rate": 1.9559099734464092e-05, "loss": 0.7087, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1252 }, { "epoch": 0.42091781835741693, "grad_norm": 0.26910173892974854, "learning_rate": 1.9558011796177274e-05, "loss": 0.7115, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1253 }, { "epoch": 0.42125374638483704, "grad_norm": 0.2642664313316345, "learning_rate": 1.9556922547611967e-05, "loss": 0.6947, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1254 }, { "epoch": 0.4215896744122572, "grad_norm": 0.25931277871131897, "learning_rate": 1.9555831988917503e-05, "loss": 0.7094, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1255 }, { "epoch": 0.4219256024396773, "grad_norm": 0.21452711522579193, "learning_rate": 1.9554740120243378e-05, "loss": 0.717, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1256 }, { "epoch": 0.4222615304670974, "grad_norm": 0.20916353166103363, "learning_rate": 1.9553646941739272e-05, "loss": 0.7239, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1257 }, { "epoch": 0.4225974584945176, "grad_norm": 0.21548748016357422, "learning_rate": 1.9552552453555052e-05, "loss": 0.6766, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1258 }, { "epoch": 0.4229333865219377, "grad_norm": 0.23661397397518158, "learning_rate": 1.955145665584075e-05, "loss": 0.6882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1259 }, { "epoch": 0.4232693145493578, "grad_norm": 0.25346270203590393, "learning_rate": 1.9550359548746588e-05, "loss": 0.6992, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1260 }, { "epoch": 0.42360524257677795, "grad_norm": 0.209383025765419, "learning_rate": 1.954926113242297e-05, "loss": 0.7081, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1261 }, { "epoch": 0.42394117060419806, "grad_norm": 0.27952101826667786, "learning_rate": 1.9548161407020467e-05, "loss": 0.7043, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1262 }, { "epoch": 0.42427709863161817, "grad_norm": 0.25478053092956543, "learning_rate": 1.954706037268984e-05, "loss": 0.6959, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1263 }, { "epoch": 0.42461302665903833, "grad_norm": 0.2264872044324875, "learning_rate": 1.954595802958203e-05, "loss": 0.7064, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1264 }, { "epoch": 0.42494895468645844, "grad_norm": 0.22827763855457306, "learning_rate": 1.9544854377848147e-05, "loss": 0.7085, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1265 }, { "epoch": 0.42528488271387854, "grad_norm": 0.22681662440299988, "learning_rate": 1.9543749417639493e-05, "loss": 0.6952, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1266 }, { "epoch": 0.42562081074129865, "grad_norm": 0.2203156054019928, "learning_rate": 1.954264314910754e-05, "loss": 0.6987, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1267 }, { "epoch": 0.4259567387687188, "grad_norm": 0.2548235356807709, "learning_rate": 1.9541535572403946e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1268 }, { "epoch": 0.4262926667961389, "grad_norm": 0.21482475101947784, "learning_rate": 1.9540426687680542e-05, "loss": 0.691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1269 }, { "epoch": 0.426628594823559, "grad_norm": 0.20755089819431305, "learning_rate": 1.9539316495089343e-05, "loss": 0.6788, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1270 }, { "epoch": 0.4269645228509792, "grad_norm": 0.24536123871803284, "learning_rate": 1.9538204994782544e-05, "loss": 0.6952, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1271 }, { "epoch": 0.4273004508783993, "grad_norm": 0.23965293169021606, "learning_rate": 1.9537092186912515e-05, "loss": 0.6867, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1272 }, { "epoch": 0.4276363789058194, "grad_norm": 0.24929127097129822, "learning_rate": 1.9535978071631808e-05, "loss": 0.6799, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1273 }, { "epoch": 0.42797230693323957, "grad_norm": 0.20675593614578247, "learning_rate": 1.9534862649093153e-05, "loss": 0.7096, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1274 }, { "epoch": 0.4283082349606597, "grad_norm": 0.18509900569915771, "learning_rate": 1.9533745919449462e-05, "loss": 0.7038, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1275 }, { "epoch": 0.4286441629880798, "grad_norm": 0.21557915210723877, "learning_rate": 1.953262788285382e-05, "loss": 0.6938, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1276 }, { "epoch": 0.42898009101549994, "grad_norm": 0.2415819764137268, "learning_rate": 1.9531508539459495e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1277 }, { "epoch": 0.42931601904292005, "grad_norm": 0.2417486011981964, "learning_rate": 1.9530387889419943e-05, "loss": 0.723, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1278 }, { "epoch": 0.42965194707034016, "grad_norm": 0.2054450362920761, "learning_rate": 1.952926593288878e-05, "loss": 0.7028, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1279 }, { "epoch": 0.4299878750977603, "grad_norm": 0.1922212392091751, "learning_rate": 1.9528142670019817e-05, "loss": 0.6739, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1280 }, { "epoch": 0.4303238031251804, "grad_norm": 0.1927880346775055, "learning_rate": 1.952701810096704e-05, "loss": 0.7083, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1281 }, { "epoch": 0.43065973115260053, "grad_norm": 0.21027445793151855, "learning_rate": 1.9525892225884605e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1282 }, { "epoch": 0.4309956591800207, "grad_norm": 0.17448924481868744, "learning_rate": 1.9524765044926862e-05, "loss": 0.6928, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1283 }, { "epoch": 0.4313315872074408, "grad_norm": 0.17480161786079407, "learning_rate": 1.9523636558248332e-05, "loss": 0.689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1284 }, { "epoch": 0.4316675152348609, "grad_norm": 0.2037777602672577, "learning_rate": 1.9522506766003717e-05, "loss": 0.7091, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1285 }, { "epoch": 0.4320034432622811, "grad_norm": 0.1657009720802307, "learning_rate": 1.9521375668347892e-05, "loss": 0.6839, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1286 }, { "epoch": 0.4323393712897012, "grad_norm": 0.2143884152173996, "learning_rate": 1.9520243265435915e-05, "loss": 0.709, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1287 }, { "epoch": 0.4326752993171213, "grad_norm": 0.2329479604959488, "learning_rate": 1.951910955742303e-05, "loss": 0.689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1288 }, { "epoch": 0.43301122734454145, "grad_norm": 0.22207500040531158, "learning_rate": 1.951797454446465e-05, "loss": 0.6612, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1289 }, { "epoch": 0.43334715537196156, "grad_norm": 0.20140953361988068, "learning_rate": 1.951683822671637e-05, "loss": 0.7096, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1290 }, { "epoch": 0.43368308339938166, "grad_norm": 0.23205067217350006, "learning_rate": 1.9515700604333966e-05, "loss": 0.6907, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1291 }, { "epoch": 0.4340190114268018, "grad_norm": 0.20840562880039215, "learning_rate": 1.9514561677473388e-05, "loss": 0.7015, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1292 }, { "epoch": 0.43435493945422193, "grad_norm": 0.20433883368968964, "learning_rate": 1.9513421446290773e-05, "loss": 0.7035, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1293 }, { "epoch": 0.43469086748164204, "grad_norm": 0.20315122604370117, "learning_rate": 1.9512279910942425e-05, "loss": 0.6964, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1294 }, { "epoch": 0.4350267955090622, "grad_norm": 0.20644329488277435, "learning_rate": 1.9511137071584843e-05, "loss": 0.7005, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1295 }, { "epoch": 0.4353627235364823, "grad_norm": 0.19397835433483124, "learning_rate": 1.9509992928374686e-05, "loss": 0.6785, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1296 }, { "epoch": 0.4356986515639024, "grad_norm": 0.1880999505519867, "learning_rate": 1.9508847481468808e-05, "loss": 0.7152, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1297 }, { "epoch": 0.4360345795913226, "grad_norm": 0.18840964138507843, "learning_rate": 1.950770073102423e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1298 }, { "epoch": 0.4363705076187427, "grad_norm": 0.18173588812351227, "learning_rate": 1.9506552677198155e-05, "loss": 0.6917, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1299 }, { "epoch": 0.4367064356461628, "grad_norm": 0.20877601206302643, "learning_rate": 1.9505403320147974e-05, "loss": 0.6944, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1300 }, { "epoch": 0.43704236367358296, "grad_norm": 0.19468212127685547, "learning_rate": 1.950425266003124e-05, "loss": 0.699, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 1301 }, { "epoch": 0.43737829170100306, "grad_norm": 0.20906776189804077, "learning_rate": 1.95031006970057e-05, "loss": 0.6839, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1302 }, { "epoch": 0.43771421972842317, "grad_norm": 0.18358586728572845, "learning_rate": 1.950194743122927e-05, "loss": 0.7193, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1303 }, { "epoch": 0.43805014775584333, "grad_norm": 0.20065729320049286, "learning_rate": 1.950079286286005e-05, "loss": 0.7127, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1304 }, { "epoch": 0.43838607578326344, "grad_norm": 0.18285039067268372, "learning_rate": 1.949963699205631e-05, "loss": 0.7146, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1305 }, { "epoch": 0.43872200381068355, "grad_norm": 0.20312485098838806, "learning_rate": 1.9498479818976518e-05, "loss": 0.7153, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1306 }, { "epoch": 0.4390579318381037, "grad_norm": 0.18035942316055298, "learning_rate": 1.949732134377929e-05, "loss": 0.683, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1307 }, { "epoch": 0.4393938598655238, "grad_norm": 0.17266309261322021, "learning_rate": 1.9496161566623454e-05, "loss": 0.6922, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1308 }, { "epoch": 0.4397297878929439, "grad_norm": 0.1954328417778015, "learning_rate": 1.9495000487667987e-05, "loss": 0.6817, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1309 }, { "epoch": 0.4400657159203641, "grad_norm": 0.19256800413131714, "learning_rate": 1.9493838107072066e-05, "loss": 0.6852, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1310 }, { "epoch": 0.4404016439477842, "grad_norm": 0.20500880479812622, "learning_rate": 1.9492674424995033e-05, "loss": 0.7018, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1311 }, { "epoch": 0.4407375719752043, "grad_norm": 0.20491847395896912, "learning_rate": 1.949150944159642e-05, "loss": 0.6787, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1312 }, { "epoch": 0.44107350000262446, "grad_norm": 0.18385806679725647, "learning_rate": 1.9490343157035927e-05, "loss": 0.7233, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1313 }, { "epoch": 0.44140942803004457, "grad_norm": 0.19703102111816406, "learning_rate": 1.9489175571473436e-05, "loss": 0.6822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1314 }, { "epoch": 0.4417453560574647, "grad_norm": 0.20053766667842865, "learning_rate": 1.9488006685069006e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1315 }, { "epoch": 0.44208128408488484, "grad_norm": 0.1924598515033722, "learning_rate": 1.948683649798288e-05, "loss": 0.726, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1316 }, { "epoch": 0.44241721211230495, "grad_norm": 0.2261778563261032, "learning_rate": 1.9485665010375476e-05, "loss": 0.7012, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1317 }, { "epoch": 0.44275314013972505, "grad_norm": 0.20827920734882355, "learning_rate": 1.9484492222407387e-05, "loss": 0.693, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1318 }, { "epoch": 0.44308906816714516, "grad_norm": 0.2148757427930832, "learning_rate": 1.9483318134239387e-05, "loss": 0.694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1319 }, { "epoch": 0.4434249961945653, "grad_norm": 0.2401062250137329, "learning_rate": 1.9482142746032426e-05, "loss": 0.7173, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1320 }, { "epoch": 0.44376092422198543, "grad_norm": 0.21949723362922668, "learning_rate": 1.948096605794764e-05, "loss": 0.6975, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1321 }, { "epoch": 0.44409685224940554, "grad_norm": 0.17916172742843628, "learning_rate": 1.9479788070146332e-05, "loss": 0.6827, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1322 }, { "epoch": 0.4444327802768257, "grad_norm": 0.21445995569229126, "learning_rate": 1.9478608782789993e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1323 }, { "epoch": 0.4447687083042458, "grad_norm": 0.20723626017570496, "learning_rate": 1.9477428196040288e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1324 }, { "epoch": 0.4451046363316659, "grad_norm": 0.19633178412914276, "learning_rate": 1.9476246310059054e-05, "loss": 0.7084, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1325 }, { "epoch": 0.4454405643590861, "grad_norm": 0.18858569860458374, "learning_rate": 1.9475063125008315e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1326 }, { "epoch": 0.4457764923865062, "grad_norm": 0.1865893304347992, "learning_rate": 1.9473878641050276e-05, "loss": 0.7033, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1327 }, { "epoch": 0.4461124204139263, "grad_norm": 0.18488432466983795, "learning_rate": 1.9472692858347307e-05, "loss": 0.6583, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1328 }, { "epoch": 0.44644834844134645, "grad_norm": 0.21241115033626556, "learning_rate": 1.9471505777061966e-05, "loss": 0.6924, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1329 }, { "epoch": 0.44678427646876656, "grad_norm": 0.19937264919281006, "learning_rate": 1.9470317397356985e-05, "loss": 0.6756, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1330 }, { "epoch": 0.44712020449618667, "grad_norm": 0.19093842804431915, "learning_rate": 1.946912771939528e-05, "loss": 0.7078, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1331 }, { "epoch": 0.44745613252360683, "grad_norm": 0.23509865999221802, "learning_rate": 1.9467936743339935e-05, "loss": 0.6824, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1332 }, { "epoch": 0.44779206055102694, "grad_norm": 0.22724629938602448, "learning_rate": 1.946674446935422e-05, "loss": 0.7144, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1333 }, { "epoch": 0.44812798857844705, "grad_norm": 0.1858832836151123, "learning_rate": 1.946555089760158e-05, "loss": 0.689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1334 }, { "epoch": 0.4484639166058672, "grad_norm": 0.22736816108226776, "learning_rate": 1.9464356028245634e-05, "loss": 0.6965, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1335 }, { "epoch": 0.4487998446332873, "grad_norm": 0.21219515800476074, "learning_rate": 1.9463159861450188e-05, "loss": 0.7083, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1336 }, { "epoch": 0.4491357726607074, "grad_norm": 0.19516068696975708, "learning_rate": 1.9461962397379223e-05, "loss": 0.6901, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1337 }, { "epoch": 0.4494717006881276, "grad_norm": 0.24700413644313812, "learning_rate": 1.9460763636196886e-05, "loss": 0.7245, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1338 }, { "epoch": 0.4498076287155477, "grad_norm": 0.17952999472618103, "learning_rate": 1.945956357806752e-05, "loss": 0.6932, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1339 }, { "epoch": 0.4501435567429678, "grad_norm": 0.24988879263401031, "learning_rate": 1.9458362223155634e-05, "loss": 0.6861, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1340 }, { "epoch": 0.45047948477038796, "grad_norm": 0.1931113600730896, "learning_rate": 1.945715957162592e-05, "loss": 0.7057, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1341 }, { "epoch": 0.45081541279780807, "grad_norm": 0.21072101593017578, "learning_rate": 1.9455955623643242e-05, "loss": 0.6591, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1342 }, { "epoch": 0.4511513408252282, "grad_norm": 0.20378580689430237, "learning_rate": 1.9454750379372652e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1343 }, { "epoch": 0.45148726885264834, "grad_norm": 0.1485109031200409, "learning_rate": 1.945354383897937e-05, "loss": 0.665, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1344 }, { "epoch": 0.45182319688006845, "grad_norm": 0.18815067410469055, "learning_rate": 1.9452336002628792e-05, "loss": 0.7039, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1345 }, { "epoch": 0.45215912490748855, "grad_norm": 0.17161579430103302, "learning_rate": 1.9451126870486502e-05, "loss": 0.6826, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1346 }, { "epoch": 0.4524950529349087, "grad_norm": 0.18077875673770905, "learning_rate": 1.9449916442718252e-05, "loss": 0.6994, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1347 }, { "epoch": 0.4528309809623288, "grad_norm": 0.17857612669467926, "learning_rate": 1.9448704719489982e-05, "loss": 0.7003, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1348 }, { "epoch": 0.45316690898974893, "grad_norm": 0.1772158145904541, "learning_rate": 1.9447491700967803e-05, "loss": 0.6979, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1349 }, { "epoch": 0.4535028370171691, "grad_norm": 0.19822661578655243, "learning_rate": 1.9446277387317996e-05, "loss": 0.6883, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1350 }, { "epoch": 0.4538387650445892, "grad_norm": 0.1891266256570816, "learning_rate": 1.9445061778707036e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1351 }, { "epoch": 0.4541746930720093, "grad_norm": 0.23558178544044495, "learning_rate": 1.9443844875301563e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1352 }, { "epoch": 0.45451062109942947, "grad_norm": 0.2210281640291214, "learning_rate": 1.94426266772684e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1353 }, { "epoch": 0.4548465491268496, "grad_norm": 0.20804066956043243, "learning_rate": 1.9441407184774544e-05, "loss": 0.702, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1354 }, { "epoch": 0.4551824771542697, "grad_norm": 0.20247051119804382, "learning_rate": 1.9440186397987174e-05, "loss": 0.7004, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1355 }, { "epoch": 0.45551840518168984, "grad_norm": 0.23589922487735748, "learning_rate": 1.9438964317073644e-05, "loss": 0.6752, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1356 }, { "epoch": 0.45585433320910995, "grad_norm": 0.1996549367904663, "learning_rate": 1.943774094220148e-05, "loss": 0.6936, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1357 }, { "epoch": 0.45619026123653006, "grad_norm": 0.2543560862541199, "learning_rate": 1.9436516273538397e-05, "loss": 0.698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1358 }, { "epoch": 0.4565261892639502, "grad_norm": 0.23023778200149536, "learning_rate": 1.943529031125228e-05, "loss": 0.6809, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1359 }, { "epoch": 0.45686211729137033, "grad_norm": 0.19558236002922058, "learning_rate": 1.943406305551119e-05, "loss": 0.6911, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1360 }, { "epoch": 0.45719804531879044, "grad_norm": 0.20411239564418793, "learning_rate": 1.9432834506483372e-05, "loss": 0.686, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1361 }, { "epoch": 0.4575339733462106, "grad_norm": 0.2297290861606598, "learning_rate": 1.9431604664337237e-05, "loss": 0.7035, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1362 }, { "epoch": 0.4578699013736307, "grad_norm": 0.19318898022174835, "learning_rate": 1.9430373529241384e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1363 }, { "epoch": 0.4582058294010508, "grad_norm": 0.20020577311515808, "learning_rate": 1.9429141101364587e-05, "loss": 0.6798, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1364 }, { "epoch": 0.458541757428471, "grad_norm": 0.18251527845859528, "learning_rate": 1.9427907380875798e-05, "loss": 0.7008, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1365 }, { "epoch": 0.4588776854558911, "grad_norm": 0.18526975810527802, "learning_rate": 1.9426672367944138e-05, "loss": 0.6784, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1366 }, { "epoch": 0.4592136134833112, "grad_norm": 0.1748555302619934, "learning_rate": 1.942543606273891e-05, "loss": 0.7001, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1367 }, { "epoch": 0.45954954151073135, "grad_norm": 0.196889266371727, "learning_rate": 1.9424198465429603e-05, "loss": 0.703, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1368 }, { "epoch": 0.45988546953815146, "grad_norm": 0.17519696056842804, "learning_rate": 1.942295957618587e-05, "loss": 0.6906, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1369 }, { "epoch": 0.46022139756557157, "grad_norm": 0.1742429882287979, "learning_rate": 1.942171939517755e-05, "loss": 0.7192, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1370 }, { "epoch": 0.46055732559299173, "grad_norm": 0.18117381632328033, "learning_rate": 1.942047792257465e-05, "loss": 0.6998, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1371 }, { "epoch": 0.46089325362041184, "grad_norm": 0.17496126890182495, "learning_rate": 1.9419235158547364e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1372 }, { "epoch": 0.46122918164783194, "grad_norm": 0.16641588509082794, "learning_rate": 1.9417991103266057e-05, "loss": 0.6965, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1373 }, { "epoch": 0.46156510967525205, "grad_norm": 0.1752045452594757, "learning_rate": 1.941674575690128e-05, "loss": 0.6913, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1374 }, { "epoch": 0.4619010377026722, "grad_norm": 0.18761932849884033, "learning_rate": 1.941549911962374e-05, "loss": 0.693, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1375 }, { "epoch": 0.4622369657300923, "grad_norm": 0.21826286613941193, "learning_rate": 1.9414251191604346e-05, "loss": 0.6756, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1376 }, { "epoch": 0.4625728937575124, "grad_norm": 0.22248631715774536, "learning_rate": 1.9413001973014168e-05, "loss": 0.6925, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1377 }, { "epoch": 0.4629088217849326, "grad_norm": 0.2066732794046402, "learning_rate": 1.9411751464024454e-05, "loss": 0.7153, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1378 }, { "epoch": 0.4632447498123527, "grad_norm": 0.18018369376659393, "learning_rate": 1.941049966480664e-05, "loss": 0.6998, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1379 }, { "epoch": 0.4635806778397728, "grad_norm": 0.1986708641052246, "learning_rate": 1.9409246575532328e-05, "loss": 0.6999, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1380 }, { "epoch": 0.46391660586719297, "grad_norm": 0.2085271030664444, "learning_rate": 1.94079921963733e-05, "loss": 0.67, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1381 }, { "epoch": 0.4642525338946131, "grad_norm": 0.1968480944633484, "learning_rate": 1.940673652750151e-05, "loss": 0.6845, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1382 }, { "epoch": 0.4645884619220332, "grad_norm": 0.21383634209632874, "learning_rate": 1.9405479569089106e-05, "loss": 0.6932, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 1383 }, { "epoch": 0.46492438994945334, "grad_norm": 0.2520258128643036, "learning_rate": 1.9404221321308393e-05, "loss": 0.6992, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1384 }, { "epoch": 0.46526031797687345, "grad_norm": 0.18794883787631989, "learning_rate": 1.940296178433186e-05, "loss": 0.7138, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1385 }, { "epoch": 0.46559624600429356, "grad_norm": 0.2004384696483612, "learning_rate": 1.940170095833217e-05, "loss": 0.6857, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1386 }, { "epoch": 0.4659321740317137, "grad_norm": 0.23224051296710968, "learning_rate": 1.9400438843482174e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1387 }, { "epoch": 0.4662681020591338, "grad_norm": 0.2232130914926529, "learning_rate": 1.9399175439954883e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1388 }, { "epoch": 0.46660403008655393, "grad_norm": 0.20993375778198242, "learning_rate": 1.93979107479235e-05, "loss": 0.7015, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1389 }, { "epoch": 0.4669399581139741, "grad_norm": 0.2373615950345993, "learning_rate": 1.9396644767561392e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1390 }, { "epoch": 0.4672758861413942, "grad_norm": 0.21682406961917877, "learning_rate": 1.939537749904211e-05, "loss": 0.6876, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1391 }, { "epoch": 0.4676118141688143, "grad_norm": 0.2231883555650711, "learning_rate": 1.9394108942539385e-05, "loss": 0.6899, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1392 }, { "epoch": 0.4679477421962345, "grad_norm": 0.20736142992973328, "learning_rate": 1.9392839098227113e-05, "loss": 0.7064, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1393 }, { "epoch": 0.4682836702236546, "grad_norm": 0.20000410079956055, "learning_rate": 1.9391567966279376e-05, "loss": 0.6998, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1394 }, { "epoch": 0.4686195982510747, "grad_norm": 0.19033566117286682, "learning_rate": 1.9390295546870433e-05, "loss": 0.6596, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1395 }, { "epoch": 0.46895552627849485, "grad_norm": 0.18689462542533875, "learning_rate": 1.938902184017471e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1396 }, { "epoch": 0.46929145430591496, "grad_norm": 0.20024730265140533, "learning_rate": 1.9387746846366817e-05, "loss": 0.6804, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1397 }, { "epoch": 0.46962738233333506, "grad_norm": 0.23982472717761993, "learning_rate": 1.9386470565621537e-05, "loss": 0.701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1398 }, { "epoch": 0.4699633103607552, "grad_norm": 0.19783392548561096, "learning_rate": 1.938519299811384e-05, "loss": 0.6712, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1399 }, { "epoch": 0.47029923838817533, "grad_norm": 0.2056698501110077, "learning_rate": 1.9383914144018855e-05, "loss": 0.6761, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1400 }, { "epoch": 0.47063516641559544, "grad_norm": 0.22496846318244934, "learning_rate": 1.93826340035119e-05, "loss": 0.6988, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1401 }, { "epoch": 0.4709710944430156, "grad_norm": 0.22938084602355957, "learning_rate": 1.9381352576768463e-05, "loss": 0.7154, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1402 }, { "epoch": 0.4713070224704357, "grad_norm": 0.1569157987833023, "learning_rate": 1.9380069863964217e-05, "loss": 0.6911, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1403 }, { "epoch": 0.4716429504978558, "grad_norm": 0.21299511194229126, "learning_rate": 1.9378785865274995e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1404 }, { "epoch": 0.471978878525276, "grad_norm": 0.19390840828418732, "learning_rate": 1.9377500580876827e-05, "loss": 0.6926, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1405 }, { "epoch": 0.4723148065526961, "grad_norm": 0.21792326867580414, "learning_rate": 1.9376214010945903e-05, "loss": 0.7103, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1406 }, { "epoch": 0.4726507345801162, "grad_norm": 0.20423167943954468, "learning_rate": 1.9374926155658595e-05, "loss": 0.6938, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1407 }, { "epoch": 0.47298666260753636, "grad_norm": 0.2083568423986435, "learning_rate": 1.937363701519145e-05, "loss": 0.6722, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 1408 }, { "epoch": 0.47332259063495646, "grad_norm": 0.23408862948417664, "learning_rate": 1.9372346589721197e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1409 }, { "epoch": 0.47365851866237657, "grad_norm": 0.21032261848449707, "learning_rate": 1.9371054879424733e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1410 }, { "epoch": 0.47399444668979673, "grad_norm": 0.2304481416940689, "learning_rate": 1.9369761884479134e-05, "loss": 0.7078, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1411 }, { "epoch": 0.47433037471721684, "grad_norm": 0.2269359529018402, "learning_rate": 1.9368467605061657e-05, "loss": 0.7003, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1412 }, { "epoch": 0.47466630274463695, "grad_norm": 0.19239094853401184, "learning_rate": 1.9367172041349726e-05, "loss": 0.675, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1413 }, { "epoch": 0.4750022307720571, "grad_norm": 0.2090403437614441, "learning_rate": 1.9365875193520945e-05, "loss": 0.7223, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1414 }, { "epoch": 0.4753381587994772, "grad_norm": 0.19874194264411926, "learning_rate": 1.93645770617531e-05, "loss": 0.6919, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1415 }, { "epoch": 0.4756740868268973, "grad_norm": 0.22798849642276764, "learning_rate": 1.9363277646224148e-05, "loss": 0.6779, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1416 }, { "epoch": 0.4760100148543175, "grad_norm": 0.2619932293891907, "learning_rate": 1.9361976947112217e-05, "loss": 0.7058, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1417 }, { "epoch": 0.4763459428817376, "grad_norm": 0.2306714802980423, "learning_rate": 1.936067496459562e-05, "loss": 0.7086, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1418 }, { "epoch": 0.4766818709091577, "grad_norm": 0.19739894568920135, "learning_rate": 1.9359371698852838e-05, "loss": 0.6986, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1419 }, { "epoch": 0.47701779893657786, "grad_norm": 0.23563553392887115, "learning_rate": 1.9358067150062535e-05, "loss": 0.7288, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1420 }, { "epoch": 0.47735372696399797, "grad_norm": 0.23486225306987762, "learning_rate": 1.9356761318403545e-05, "loss": 0.6814, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1421 }, { "epoch": 0.4776896549914181, "grad_norm": 0.251692533493042, "learning_rate": 1.935545420405488e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1422 }, { "epoch": 0.47802558301883824, "grad_norm": 0.20277434587478638, "learning_rate": 1.9354145807195732e-05, "loss": 0.7072, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1423 }, { "epoch": 0.47836151104625835, "grad_norm": 0.2001824676990509, "learning_rate": 1.9352836128005466e-05, "loss": 0.6747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1424 }, { "epoch": 0.47869743907367845, "grad_norm": 0.17704342305660248, "learning_rate": 1.9351525166663613e-05, "loss": 0.7242, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1425 }, { "epoch": 0.4790333671010986, "grad_norm": 0.20951610803604126, "learning_rate": 1.93502129233499e-05, "loss": 0.7067, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1426 }, { "epoch": 0.4793692951285187, "grad_norm": 0.23121432960033417, "learning_rate": 1.9348899398244213e-05, "loss": 0.6812, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 1427 }, { "epoch": 0.47970522315593883, "grad_norm": 0.16765089333057404, "learning_rate": 1.9347584591526615e-05, "loss": 0.6747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1428 }, { "epoch": 0.48004115118335894, "grad_norm": 0.21730932593345642, "learning_rate": 1.9346268503377356e-05, "loss": 0.6942, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1429 }, { "epoch": 0.4803770792107791, "grad_norm": 0.23921093344688416, "learning_rate": 1.934495113397685e-05, "loss": 0.6865, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1430 }, { "epoch": 0.4807130072381992, "grad_norm": 0.23341304063796997, "learning_rate": 1.9343632483505695e-05, "loss": 0.7156, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1431 }, { "epoch": 0.4810489352656193, "grad_norm": 0.18016646802425385, "learning_rate": 1.9342312552144656e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1432 }, { "epoch": 0.4813848632930395, "grad_norm": 0.2070443332195282, "learning_rate": 1.934099134007468e-05, "loss": 0.7006, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1433 }, { "epoch": 0.4817207913204596, "grad_norm": 0.18956837058067322, "learning_rate": 1.933966884747689e-05, "loss": 0.6954, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1434 }, { "epoch": 0.4820567193478797, "grad_norm": 0.20793603360652924, "learning_rate": 1.9338345074532584e-05, "loss": 0.6957, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1435 }, { "epoch": 0.48239264737529985, "grad_norm": 0.20836354792118073, "learning_rate": 1.9337020021423228e-05, "loss": 0.7097, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1436 }, { "epoch": 0.48272857540271996, "grad_norm": 0.22913068532943726, "learning_rate": 1.9335693688330472e-05, "loss": 0.6802, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1437 }, { "epoch": 0.48306450343014007, "grad_norm": 0.1764751672744751, "learning_rate": 1.9334366075436137e-05, "loss": 0.6841, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1438 }, { "epoch": 0.48340043145756023, "grad_norm": 0.23334400355815887, "learning_rate": 1.9333037182922227e-05, "loss": 0.7035, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1439 }, { "epoch": 0.48373635948498034, "grad_norm": 0.25306928157806396, "learning_rate": 1.9331707010970912e-05, "loss": 0.7091, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1440 }, { "epoch": 0.48407228751240045, "grad_norm": 0.20022523403167725, "learning_rate": 1.9330375559764544e-05, "loss": 0.6668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1441 }, { "epoch": 0.4844082155398206, "grad_norm": 0.23309944570064545, "learning_rate": 1.9329042829485646e-05, "loss": 0.6835, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1442 }, { "epoch": 0.4847441435672407, "grad_norm": 0.19640877842903137, "learning_rate": 1.932770882031691e-05, "loss": 0.6958, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1443 }, { "epoch": 0.4850800715946608, "grad_norm": 0.2331898808479309, "learning_rate": 1.9326373532441226e-05, "loss": 0.6805, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1444 }, { "epoch": 0.485415999622081, "grad_norm": 0.2129966765642166, "learning_rate": 1.9325036966041636e-05, "loss": 0.7092, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1445 }, { "epoch": 0.4857519276495011, "grad_norm": 0.20164890587329865, "learning_rate": 1.9323699121301362e-05, "loss": 0.6912, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1446 }, { "epoch": 0.4860878556769212, "grad_norm": 0.21117407083511353, "learning_rate": 1.932235999840381e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1447 }, { "epoch": 0.48642378370434136, "grad_norm": 0.22514909505844116, "learning_rate": 1.932101959753256e-05, "loss": 0.6913, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1448 }, { "epoch": 0.48675971173176147, "grad_norm": 0.25378841161727905, "learning_rate": 1.931967791887136e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1449 }, { "epoch": 0.4870956397591816, "grad_norm": 0.20331723988056183, "learning_rate": 1.9318334962604136e-05, "loss": 0.6866, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1450 }, { "epoch": 0.48743156778660174, "grad_norm": 0.23872892558574677, "learning_rate": 1.931699072891499e-05, "loss": 0.712, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1451 }, { "epoch": 0.48776749581402185, "grad_norm": 0.23409844934940338, "learning_rate": 1.9315645217988193e-05, "loss": 0.6879, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1452 }, { "epoch": 0.48810342384144195, "grad_norm": 0.23055881261825562, "learning_rate": 1.9314298430008206e-05, "loss": 0.7112, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1453 }, { "epoch": 0.4884393518688621, "grad_norm": 0.22461643815040588, "learning_rate": 1.9312950365159654e-05, "loss": 0.7101, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1454 }, { "epoch": 0.4887752798962822, "grad_norm": 0.2277369201183319, "learning_rate": 1.9311601023627336e-05, "loss": 0.6986, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1455 }, { "epoch": 0.48911120792370233, "grad_norm": 0.2088526338338852, "learning_rate": 1.9310250405596235e-05, "loss": 0.6969, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1456 }, { "epoch": 0.4894471359511225, "grad_norm": 0.21497194468975067, "learning_rate": 1.930889851125149e-05, "loss": 0.6897, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1457 }, { "epoch": 0.4897830639785426, "grad_norm": 0.2250993251800537, "learning_rate": 1.9307545340778445e-05, "loss": 0.687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1458 }, { "epoch": 0.4901189920059627, "grad_norm": 0.16859346628189087, "learning_rate": 1.9306190894362595e-05, "loss": 0.705, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1459 }, { "epoch": 0.49045492003338287, "grad_norm": 0.24914784729480743, "learning_rate": 1.9304835172189612e-05, "loss": 0.6999, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1460 }, { "epoch": 0.490790848060803, "grad_norm": 0.20849786698818207, "learning_rate": 1.930347817444535e-05, "loss": 0.7065, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1461 }, { "epoch": 0.4911267760882231, "grad_norm": 0.21382348239421844, "learning_rate": 1.9302119901315838e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1462 }, { "epoch": 0.49146270411564325, "grad_norm": 0.2065667361021042, "learning_rate": 1.930076035298728e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1463 }, { "epoch": 0.49179863214306335, "grad_norm": 0.1844252347946167, "learning_rate": 1.9299399529646047e-05, "loss": 0.695, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1464 }, { "epoch": 0.49213456017048346, "grad_norm": 0.1987643837928772, "learning_rate": 1.9298037431478694e-05, "loss": 0.7048, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1465 }, { "epoch": 0.4924704881979036, "grad_norm": 0.17791785299777985, "learning_rate": 1.929667405867194e-05, "loss": 0.6821, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1466 }, { "epoch": 0.49280641622532373, "grad_norm": 0.22581560909748077, "learning_rate": 1.9295309411412697e-05, "loss": 0.6948, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1467 }, { "epoch": 0.49314234425274384, "grad_norm": 0.21015821397304535, "learning_rate": 1.929394348988803e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1468 }, { "epoch": 0.493478272280164, "grad_norm": 0.22638465464115143, "learning_rate": 1.9292576294285195e-05, "loss": 0.7095, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1469 }, { "epoch": 0.4938142003075841, "grad_norm": 0.19823519885540009, "learning_rate": 1.9291207824791612e-05, "loss": 0.6953, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1470 }, { "epoch": 0.4941501283350042, "grad_norm": 0.15541918575763702, "learning_rate": 1.9289838081594884e-05, "loss": 0.6954, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1471 }, { "epoch": 0.4944860563624244, "grad_norm": 0.21121729910373688, "learning_rate": 1.9288467064882785e-05, "loss": 0.6925, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1472 }, { "epoch": 0.4948219843898445, "grad_norm": 0.20608629286289215, "learning_rate": 1.928709477484326e-05, "loss": 0.7049, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1473 }, { "epoch": 0.4951579124172646, "grad_norm": 0.20945687592029572, "learning_rate": 1.9285721211664438e-05, "loss": 0.7074, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1474 }, { "epoch": 0.49549384044468475, "grad_norm": 0.1722106784582138, "learning_rate": 1.9284346375534605e-05, "loss": 0.7179, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1475 }, { "epoch": 0.49582976847210486, "grad_norm": 0.19266247749328613, "learning_rate": 1.9282970266642248e-05, "loss": 0.714, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1476 }, { "epoch": 0.49616569649952497, "grad_norm": 0.2048259973526001, "learning_rate": 1.9281592885176006e-05, "loss": 0.6904, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1477 }, { "epoch": 0.49650162452694513, "grad_norm": 0.19975155591964722, "learning_rate": 1.92802142313247e-05, "loss": 0.6952, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1478 }, { "epoch": 0.49683755255436524, "grad_norm": 0.19213365018367767, "learning_rate": 1.9278834305277324e-05, "loss": 0.6978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1479 }, { "epoch": 0.49717348058178534, "grad_norm": 0.17526374757289886, "learning_rate": 1.927745310722305e-05, "loss": 0.6764, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1480 }, { "epoch": 0.49750940860920545, "grad_norm": 0.19917893409729004, "learning_rate": 1.927607063735122e-05, "loss": 0.678, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1481 }, { "epoch": 0.4978453366366256, "grad_norm": 0.19802528619766235, "learning_rate": 1.9274686895851354e-05, "loss": 0.6892, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1482 }, { "epoch": 0.4981812646640457, "grad_norm": 0.17614029347896576, "learning_rate": 1.927330188291315e-05, "loss": 0.698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1483 }, { "epoch": 0.4985171926914658, "grad_norm": 0.23959068953990936, "learning_rate": 1.9271915598726467e-05, "loss": 0.6885, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1484 }, { "epoch": 0.498853120718886, "grad_norm": 0.19318076968193054, "learning_rate": 1.927052804348135e-05, "loss": 0.6985, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1485 }, { "epoch": 0.4991890487463061, "grad_norm": 0.18171164393424988, "learning_rate": 1.9269139217368016e-05, "loss": 0.699, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.88, "memory/max_allocated (GiB)": 64.88, "step": 1486 }, { "epoch": 0.4995249767737262, "grad_norm": 0.1843922734260559, "learning_rate": 1.9267749120576854e-05, "loss": 0.7001, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1487 }, { "epoch": 0.49986090480114637, "grad_norm": 0.2036571204662323, "learning_rate": 1.926635775329843e-05, "loss": 0.6862, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1488 }, { "epoch": 0.5001968328285665, "grad_norm": 0.22589778900146484, "learning_rate": 1.9264965115723475e-05, "loss": 0.6809, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1489 }, { "epoch": 0.5005327608559866, "grad_norm": 0.2077331393957138, "learning_rate": 1.9263571208042907e-05, "loss": 0.6856, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1490 }, { "epoch": 0.5008686888834067, "grad_norm": 0.17682644724845886, "learning_rate": 1.9262176030447813e-05, "loss": 0.6883, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1491 }, { "epoch": 0.5012046169108269, "grad_norm": 0.18876290321350098, "learning_rate": 1.926077958312945e-05, "loss": 0.6928, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1492 }, { "epoch": 0.501540544938247, "grad_norm": 0.2179119735956192, "learning_rate": 1.9259381866279256e-05, "loss": 0.6974, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1493 }, { "epoch": 0.5018764729656671, "grad_norm": 0.20989850163459778, "learning_rate": 1.925798288008884e-05, "loss": 0.6985, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1494 }, { "epoch": 0.5022124009930873, "grad_norm": 0.23507481813430786, "learning_rate": 1.9256582624749983e-05, "loss": 0.6961, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1495 }, { "epoch": 0.5025483290205074, "grad_norm": 0.20868328213691711, "learning_rate": 1.925518110045464e-05, "loss": 0.696, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1496 }, { "epoch": 0.5028842570479275, "grad_norm": 0.18784640729427338, "learning_rate": 1.9253778307394946e-05, "loss": 0.6909, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1497 }, { "epoch": 0.5032201850753476, "grad_norm": 0.22107739746570587, "learning_rate": 1.9252374245763202e-05, "loss": 0.7008, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1498 }, { "epoch": 0.5035561131027677, "grad_norm": 0.19235330820083618, "learning_rate": 1.925096891575189e-05, "loss": 0.7123, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1499 }, { "epoch": 0.5038920411301878, "grad_norm": 0.1673002541065216, "learning_rate": 1.924956231755366e-05, "loss": 0.7043, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1500 }, { "epoch": 0.504227969157608, "grad_norm": 0.20226986706256866, "learning_rate": 1.9248154451361337e-05, "loss": 0.7191, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1501 }, { "epoch": 0.5045638971850281, "grad_norm": 0.18950946629047394, "learning_rate": 1.9246745317367924e-05, "loss": 0.7012, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1502 }, { "epoch": 0.5048998252124482, "grad_norm": 0.18109910190105438, "learning_rate": 1.9245334915766594e-05, "loss": 0.6657, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1503 }, { "epoch": 0.5052357532398684, "grad_norm": 0.20201358199119568, "learning_rate": 1.9243923246750693e-05, "loss": 0.6759, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1504 }, { "epoch": 0.5055716812672885, "grad_norm": 0.16958659887313843, "learning_rate": 1.9242510310513745e-05, "loss": 0.7149, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1505 }, { "epoch": 0.5059076092947086, "grad_norm": 0.16893534362316132, "learning_rate": 1.9241096107249442e-05, "loss": 0.6796, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1506 }, { "epoch": 0.5062435373221288, "grad_norm": 0.17122559249401093, "learning_rate": 1.923968063715165e-05, "loss": 0.7086, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1507 }, { "epoch": 0.5065794653495489, "grad_norm": 0.16610386967658997, "learning_rate": 1.923826390041442e-05, "loss": 0.6788, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1508 }, { "epoch": 0.506915393376969, "grad_norm": 0.16226287186145782, "learning_rate": 1.9236845897231967e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1509 }, { "epoch": 0.5072513214043891, "grad_norm": 0.18747307360172272, "learning_rate": 1.9235426627798676e-05, "loss": 0.7039, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1510 }, { "epoch": 0.5075872494318092, "grad_norm": 0.1635931134223938, "learning_rate": 1.9234006092309107e-05, "loss": 0.689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1511 }, { "epoch": 0.5079231774592293, "grad_norm": 0.19617435336112976, "learning_rate": 1.9232584290958005e-05, "loss": 0.6863, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1512 }, { "epoch": 0.5082591054866494, "grad_norm": 0.1791463941335678, "learning_rate": 1.9231161223940278e-05, "loss": 0.6892, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1513 }, { "epoch": 0.5085950335140696, "grad_norm": 0.1660844087600708, "learning_rate": 1.9229736891451008e-05, "loss": 0.7095, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1514 }, { "epoch": 0.5089309615414898, "grad_norm": 0.1755116581916809, "learning_rate": 1.9228311293685452e-05, "loss": 0.7005, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1515 }, { "epoch": 0.5092668895689099, "grad_norm": 0.17272621393203735, "learning_rate": 1.9226884430839043e-05, "loss": 0.6887, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1516 }, { "epoch": 0.50960281759633, "grad_norm": 0.1923849731683731, "learning_rate": 1.9225456303107386e-05, "loss": 0.6858, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1517 }, { "epoch": 0.5099387456237501, "grad_norm": 0.2013208270072937, "learning_rate": 1.9224026910686256e-05, "loss": 0.6922, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1518 }, { "epoch": 0.5102746736511702, "grad_norm": 0.19366668164730072, "learning_rate": 1.9222596253771603e-05, "loss": 0.7078, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1519 }, { "epoch": 0.5106106016785904, "grad_norm": 0.1859414428472519, "learning_rate": 1.9221164332559558e-05, "loss": 0.6632, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1520 }, { "epoch": 0.5109465297060105, "grad_norm": 0.18135270476341248, "learning_rate": 1.9219731147246408e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1521 }, { "epoch": 0.5112824577334306, "grad_norm": 0.19683216512203217, "learning_rate": 1.9218296698028635e-05, "loss": 0.6811, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1522 }, { "epoch": 0.5116183857608507, "grad_norm": 0.18395625054836273, "learning_rate": 1.9216860985102875e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1523 }, { "epoch": 0.5119543137882708, "grad_norm": 0.17683137953281403, "learning_rate": 1.921542400866595e-05, "loss": 0.6921, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1524 }, { "epoch": 0.5122902418156909, "grad_norm": 0.19524303078651428, "learning_rate": 1.9213985768914847e-05, "loss": 0.7039, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1525 }, { "epoch": 0.5126261698431112, "grad_norm": 0.184491828083992, "learning_rate": 1.921254626604674e-05, "loss": 0.6745, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1526 }, { "epoch": 0.5129620978705313, "grad_norm": 0.19479092955589294, "learning_rate": 1.921110550025895e-05, "loss": 0.7002, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1527 }, { "epoch": 0.5132980258979514, "grad_norm": 0.18591095507144928, "learning_rate": 1.9209663471749002e-05, "loss": 0.6947, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1528 }, { "epoch": 0.5136339539253715, "grad_norm": 0.195206880569458, "learning_rate": 1.920822018071457e-05, "loss": 0.7235, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1529 }, { "epoch": 0.5139698819527916, "grad_norm": 0.17234022915363312, "learning_rate": 1.9206775627353508e-05, "loss": 0.7121, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1530 }, { "epoch": 0.5143058099802117, "grad_norm": 0.22241456806659698, "learning_rate": 1.9205329811863858e-05, "loss": 0.6829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1531 }, { "epoch": 0.5146417380076319, "grad_norm": 0.19127725064754486, "learning_rate": 1.9203882734443812e-05, "loss": 0.6985, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1532 }, { "epoch": 0.514977666035052, "grad_norm": 0.20573344826698303, "learning_rate": 1.9202434395291747e-05, "loss": 0.7071, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1533 }, { "epoch": 0.5153135940624721, "grad_norm": 0.1937270164489746, "learning_rate": 1.9200984794606213e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1534 }, { "epoch": 0.5156495220898922, "grad_norm": 0.19315697252750397, "learning_rate": 1.9199533932585933e-05, "loss": 0.6996, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1535 }, { "epoch": 0.5159854501173123, "grad_norm": 0.18746024370193481, "learning_rate": 1.9198081809429797e-05, "loss": 0.6947, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1536 }, { "epoch": 0.5163213781447324, "grad_norm": 0.1936243325471878, "learning_rate": 1.9196628425336875e-05, "loss": 0.6948, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 1537 }, { "epoch": 0.5166573061721527, "grad_norm": 0.22679823637008667, "learning_rate": 1.919517378050641e-05, "loss": 0.6799, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1538 }, { "epoch": 0.5169932341995728, "grad_norm": 0.1746831089258194, "learning_rate": 1.9193717875137804e-05, "loss": 0.6829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1539 }, { "epoch": 0.5173291622269929, "grad_norm": 0.18550334870815277, "learning_rate": 1.9192260709430656e-05, "loss": 0.6816, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1540 }, { "epoch": 0.517665090254413, "grad_norm": 0.23355203866958618, "learning_rate": 1.9190802283584715e-05, "loss": 0.6819, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 1541 }, { "epoch": 0.5180010182818331, "grad_norm": 0.22483447194099426, "learning_rate": 1.9189342597799917e-05, "loss": 0.7026, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1542 }, { "epoch": 0.5183369463092532, "grad_norm": 0.225198432803154, "learning_rate": 1.918788165227636e-05, "loss": 0.6875, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1543 }, { "epoch": 0.5186728743366734, "grad_norm": 0.19325199723243713, "learning_rate": 1.918641944721433e-05, "loss": 0.7067, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1544 }, { "epoch": 0.5190088023640935, "grad_norm": 0.20017001032829285, "learning_rate": 1.9184955982814264e-05, "loss": 0.6888, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1545 }, { "epoch": 0.5193447303915136, "grad_norm": 0.19337458908557892, "learning_rate": 1.9183491259276794e-05, "loss": 0.6871, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1546 }, { "epoch": 0.5196806584189337, "grad_norm": 0.18911990523338318, "learning_rate": 1.9182025276802712e-05, "loss": 0.6756, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1547 }, { "epoch": 0.5200165864463538, "grad_norm": 0.2515266239643097, "learning_rate": 1.918055803559298e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1548 }, { "epoch": 0.520352514473774, "grad_norm": 0.18839380145072937, "learning_rate": 1.9179089535848747e-05, "loss": 0.7028, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1549 }, { "epoch": 0.5206884425011942, "grad_norm": 0.21934109926223755, "learning_rate": 1.917761977777132e-05, "loss": 0.6839, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1550 }, { "epoch": 0.5210243705286143, "grad_norm": 0.20385879278182983, "learning_rate": 1.917614876156218e-05, "loss": 0.7078, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1551 }, { "epoch": 0.5213602985560344, "grad_norm": 0.20210306346416473, "learning_rate": 1.9174676487422984e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1552 }, { "epoch": 0.5216962265834545, "grad_norm": 0.21935595571994781, "learning_rate": 1.917320295555557e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1553 }, { "epoch": 0.5220321546108746, "grad_norm": 0.18801093101501465, "learning_rate": 1.9171728166161936e-05, "loss": 0.7095, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1554 }, { "epoch": 0.5223680826382947, "grad_norm": 0.24968314170837402, "learning_rate": 1.9170252119444253e-05, "loss": 0.6937, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1555 }, { "epoch": 0.5227040106657149, "grad_norm": 0.18299461901187897, "learning_rate": 1.916877481560487e-05, "loss": 0.6753, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1556 }, { "epoch": 0.523039938693135, "grad_norm": 0.19367821514606476, "learning_rate": 1.9167296254846306e-05, "loss": 0.6785, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1557 }, { "epoch": 0.5233758667205551, "grad_norm": 0.19881606101989746, "learning_rate": 1.9165816437371255e-05, "loss": 0.6865, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1558 }, { "epoch": 0.5237117947479752, "grad_norm": 0.2322845309972763, "learning_rate": 1.9164335363382577e-05, "loss": 0.6968, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1559 }, { "epoch": 0.5240477227753954, "grad_norm": 0.18601854145526886, "learning_rate": 1.9162853033083305e-05, "loss": 0.71, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1560 }, { "epoch": 0.5243836508028155, "grad_norm": 0.1800646185874939, "learning_rate": 1.9161369446676653e-05, "loss": 0.6885, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1561 }, { "epoch": 0.5247195788302356, "grad_norm": 0.17015723884105682, "learning_rate": 1.9159884604366003e-05, "loss": 0.699, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1562 }, { "epoch": 0.5250555068576558, "grad_norm": 0.1914314180612564, "learning_rate": 1.91583985063549e-05, "loss": 0.6976, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1563 }, { "epoch": 0.5253914348850759, "grad_norm": 0.1710716038942337, "learning_rate": 1.9156911152847075e-05, "loss": 0.693, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1564 }, { "epoch": 0.525727362912496, "grad_norm": 0.21323710680007935, "learning_rate": 1.915542254404642e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1565 }, { "epoch": 0.5260632909399161, "grad_norm": 0.17344063520431519, "learning_rate": 1.9153932680157005e-05, "loss": 0.6969, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1566 }, { "epoch": 0.5263992189673362, "grad_norm": 0.17653602361679077, "learning_rate": 1.915244156138307e-05, "loss": 0.6845, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1567 }, { "epoch": 0.5267351469947563, "grad_norm": 0.2026679366827011, "learning_rate": 1.915094918792904e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1568 }, { "epoch": 0.5270710750221765, "grad_norm": 0.17406532168388367, "learning_rate": 1.914945555999948e-05, "loss": 0.6858, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1569 }, { "epoch": 0.5274070030495966, "grad_norm": 0.19436043500900269, "learning_rate": 1.914796067779916e-05, "loss": 0.7124, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1570 }, { "epoch": 0.5277429310770168, "grad_norm": 0.20153744518756866, "learning_rate": 1.9146464541533004e-05, "loss": 0.7141, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1571 }, { "epoch": 0.5280788591044369, "grad_norm": 0.17944276332855225, "learning_rate": 1.9144967151406116e-05, "loss": 0.7141, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1572 }, { "epoch": 0.528414787131857, "grad_norm": 0.17449866235256195, "learning_rate": 1.9143468507623765e-05, "loss": 0.6686, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1573 }, { "epoch": 0.5287507151592771, "grad_norm": 0.19511596858501434, "learning_rate": 1.9141968610391396e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1574 }, { "epoch": 0.5290866431866973, "grad_norm": 0.19997483491897583, "learning_rate": 1.914046745991463e-05, "loss": 0.6877, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1575 }, { "epoch": 0.5294225712141174, "grad_norm": 0.1910419762134552, "learning_rate": 1.9138965056399247e-05, "loss": 0.6973, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1576 }, { "epoch": 0.5297584992415375, "grad_norm": 0.2459632009267807, "learning_rate": 1.9137461400051212e-05, "loss": 0.6957, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1577 }, { "epoch": 0.5300944272689576, "grad_norm": 0.1898786574602127, "learning_rate": 1.913595649107666e-05, "loss": 0.7016, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1578 }, { "epoch": 0.5304303552963777, "grad_norm": 0.21175220608711243, "learning_rate": 1.9134450329681887e-05, "loss": 0.6925, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1579 }, { "epoch": 0.5307662833237978, "grad_norm": 0.19915130734443665, "learning_rate": 1.9132942916073372e-05, "loss": 0.6918, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1580 }, { "epoch": 0.531102211351218, "grad_norm": 0.24658668041229248, "learning_rate": 1.913143425045776e-05, "loss": 0.7102, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1581 }, { "epoch": 0.5314381393786382, "grad_norm": 0.23979420959949493, "learning_rate": 1.9129924333041873e-05, "loss": 0.6987, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1582 }, { "epoch": 0.5317740674060583, "grad_norm": 0.21082429587841034, "learning_rate": 1.9128413164032698e-05, "loss": 0.6724, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1583 }, { "epoch": 0.5321099954334784, "grad_norm": 0.2305683195590973, "learning_rate": 1.91269007436374e-05, "loss": 0.7047, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1584 }, { "epoch": 0.5324459234608985, "grad_norm": 0.21840015053749084, "learning_rate": 1.9125387072063303e-05, "loss": 0.6665, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1585 }, { "epoch": 0.5327818514883186, "grad_norm": 0.2112293392419815, "learning_rate": 1.912387214951792e-05, "loss": 0.6672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1586 }, { "epoch": 0.5331177795157388, "grad_norm": 0.24760490655899048, "learning_rate": 1.9122355976208923e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1587 }, { "epoch": 0.5334537075431589, "grad_norm": 0.1987069845199585, "learning_rate": 1.9120838552344163e-05, "loss": 0.7056, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1588 }, { "epoch": 0.533789635570579, "grad_norm": 0.18206988275051117, "learning_rate": 1.9119319878131657e-05, "loss": 0.6782, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1589 }, { "epoch": 0.5341255635979991, "grad_norm": 0.19052347540855408, "learning_rate": 1.9117799953779596e-05, "loss": 0.7119, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1590 }, { "epoch": 0.5344614916254192, "grad_norm": 0.18891851603984833, "learning_rate": 1.911627877949634e-05, "loss": 0.6769, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1591 }, { "epoch": 0.5347974196528393, "grad_norm": 0.1741276979446411, "learning_rate": 1.9114756355490422e-05, "loss": 0.6761, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1592 }, { "epoch": 0.5351333476802596, "grad_norm": 0.21062324941158295, "learning_rate": 1.9113232681970552e-05, "loss": 0.6711, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1593 }, { "epoch": 0.5354692757076797, "grad_norm": 0.18733778595924377, "learning_rate": 1.91117077591456e-05, "loss": 0.7055, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1594 }, { "epoch": 0.5358052037350998, "grad_norm": 0.23252615332603455, "learning_rate": 1.9110181587224612e-05, "loss": 0.6702, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1595 }, { "epoch": 0.5361411317625199, "grad_norm": 0.21133330464363098, "learning_rate": 1.910865416641681e-05, "loss": 0.6968, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1596 }, { "epoch": 0.53647705978994, "grad_norm": 0.17415083944797516, "learning_rate": 1.9107125496931584e-05, "loss": 0.6773, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1597 }, { "epoch": 0.5368129878173601, "grad_norm": 0.20122858881950378, "learning_rate": 1.910559557897849e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1598 }, { "epoch": 0.5371489158447803, "grad_norm": 0.2144148200750351, "learning_rate": 1.9104064412767267e-05, "loss": 0.7025, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1599 }, { "epoch": 0.5374848438722004, "grad_norm": 0.1738111823797226, "learning_rate": 1.910253199850781e-05, "loss": 0.699, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1600 }, { "epoch": 0.5378207718996205, "grad_norm": 0.17443053424358368, "learning_rate": 1.9100998336410197e-05, "loss": 0.6744, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1601 }, { "epoch": 0.5381566999270406, "grad_norm": 0.20320284366607666, "learning_rate": 1.9099463426684673e-05, "loss": 0.7272, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1602 }, { "epoch": 0.5384926279544607, "grad_norm": 0.17199864983558655, "learning_rate": 1.9097927269541655e-05, "loss": 0.6642, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.91, "memory/max_allocated (GiB)": 63.91, "step": 1603 }, { "epoch": 0.5388285559818808, "grad_norm": 0.20487238466739655, "learning_rate": 1.9096389865191723e-05, "loss": 0.7055, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1604 }, { "epoch": 0.5391644840093011, "grad_norm": 0.19202208518981934, "learning_rate": 1.909485121384565e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1605 }, { "epoch": 0.5395004120367212, "grad_norm": 0.17673121392726898, "learning_rate": 1.9093311315714346e-05, "loss": 0.6709, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1606 }, { "epoch": 0.5398363400641413, "grad_norm": 0.1906535029411316, "learning_rate": 1.9091770171008925e-05, "loss": 0.6885, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1607 }, { "epoch": 0.5401722680915614, "grad_norm": 0.17932428419589996, "learning_rate": 1.9090227779940656e-05, "loss": 0.6978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1608 }, { "epoch": 0.5405081961189815, "grad_norm": 0.16506674885749817, "learning_rate": 1.908868414272097e-05, "loss": 0.711, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1609 }, { "epoch": 0.5408441241464016, "grad_norm": 0.18035876750946045, "learning_rate": 1.9087139259561492e-05, "loss": 0.6793, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1610 }, { "epoch": 0.5411800521738218, "grad_norm": 0.17083697021007538, "learning_rate": 1.9085593130674005e-05, "loss": 0.6926, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1611 }, { "epoch": 0.5415159802012419, "grad_norm": 0.17115992307662964, "learning_rate": 1.9084045756270452e-05, "loss": 0.6964, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1612 }, { "epoch": 0.541851908228662, "grad_norm": 0.18127618730068207, "learning_rate": 1.908249713656297e-05, "loss": 0.6766, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1613 }, { "epoch": 0.5421878362560821, "grad_norm": 0.1832122802734375, "learning_rate": 1.9080947271763844e-05, "loss": 0.6862, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1614 }, { "epoch": 0.5425237642835022, "grad_norm": 0.17840322852134705, "learning_rate": 1.907939616208555e-05, "loss": 0.7233, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1615 }, { "epoch": 0.5428596923109223, "grad_norm": 0.15838129818439484, "learning_rate": 1.907784380774072e-05, "loss": 0.6819, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1616 }, { "epoch": 0.5431956203383425, "grad_norm": 0.18116821348667145, "learning_rate": 1.9076290208942155e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1617 }, { "epoch": 0.5435315483657627, "grad_norm": 0.18691386282444, "learning_rate": 1.9074735365902846e-05, "loss": 0.6751, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1618 }, { "epoch": 0.5438674763931828, "grad_norm": 0.17807120084762573, "learning_rate": 1.9073179278835933e-05, "loss": 0.6754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1619 }, { "epoch": 0.5442034044206029, "grad_norm": 0.16509756445884705, "learning_rate": 1.9071621947954737e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1620 }, { "epoch": 0.544539332448023, "grad_norm": 0.18063640594482422, "learning_rate": 1.9070063373472752e-05, "loss": 0.7086, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1621 }, { "epoch": 0.5448752604754431, "grad_norm": 0.15916401147842407, "learning_rate": 1.906850355560363e-05, "loss": 0.7089, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1622 }, { "epoch": 0.5452111885028632, "grad_norm": 0.1623428761959076, "learning_rate": 1.906694249456121e-05, "loss": 0.6734, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1623 }, { "epoch": 0.5455471165302834, "grad_norm": 0.18913334608078003, "learning_rate": 1.9065380190559486e-05, "loss": 0.7095, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1624 }, { "epoch": 0.5458830445577035, "grad_norm": 0.17289045453071594, "learning_rate": 1.9063816643812636e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1625 }, { "epoch": 0.5462189725851236, "grad_norm": 0.17258763313293457, "learning_rate": 1.9062251854534994e-05, "loss": 0.6866, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1626 }, { "epoch": 0.5465549006125437, "grad_norm": 0.19381941854953766, "learning_rate": 1.906068582294108e-05, "loss": 0.6984, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1627 }, { "epoch": 0.5468908286399639, "grad_norm": 0.21527345478534698, "learning_rate": 1.9059118549245573e-05, "loss": 0.6987, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1628 }, { "epoch": 0.547226756667384, "grad_norm": 0.20660719275474548, "learning_rate": 1.9057550033663327e-05, "loss": 0.6807, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1629 }, { "epoch": 0.5475626846948042, "grad_norm": 0.21273833513259888, "learning_rate": 1.9055980276409362e-05, "loss": 0.6842, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1630 }, { "epoch": 0.5478986127222243, "grad_norm": 0.22147096693515778, "learning_rate": 1.905440927769887e-05, "loss": 0.6865, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1631 }, { "epoch": 0.5482345407496444, "grad_norm": 0.18694795668125153, "learning_rate": 1.905283703774722e-05, "loss": 0.7025, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1632 }, { "epoch": 0.5485704687770645, "grad_norm": 0.2204403430223465, "learning_rate": 1.9051263556769946e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1633 }, { "epoch": 0.5489063968044846, "grad_norm": 0.23297937214374542, "learning_rate": 1.9049688834982744e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1634 }, { "epoch": 0.5492423248319047, "grad_norm": 0.22678658366203308, "learning_rate": 1.9048112872601495e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1635 }, { "epoch": 0.5495782528593249, "grad_norm": 0.19325412809848785, "learning_rate": 1.904653566984224e-05, "loss": 0.6866, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1636 }, { "epoch": 0.549914180886745, "grad_norm": 0.23597854375839233, "learning_rate": 1.9044957226921194e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1637 }, { "epoch": 0.5502501089141651, "grad_norm": 0.1870901882648468, "learning_rate": 1.904337754405474e-05, "loss": 0.6682, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1638 }, { "epoch": 0.5505860369415853, "grad_norm": 0.23121440410614014, "learning_rate": 1.9041796621459427e-05, "loss": 0.6846, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1639 }, { "epoch": 0.5509219649690054, "grad_norm": 0.19382226467132568, "learning_rate": 1.9040214459351987e-05, "loss": 0.6936, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1640 }, { "epoch": 0.5512578929964255, "grad_norm": 0.1948491930961609, "learning_rate": 1.9038631057949314e-05, "loss": 0.6892, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1641 }, { "epoch": 0.5515938210238457, "grad_norm": 0.21314026415348053, "learning_rate": 1.9037046417468462e-05, "loss": 0.6738, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1642 }, { "epoch": 0.5519297490512658, "grad_norm": 0.2500882148742676, "learning_rate": 1.9035460538126677e-05, "loss": 0.6824, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1643 }, { "epoch": 0.5522656770786859, "grad_norm": 0.26947304606437683, "learning_rate": 1.9033873420141353e-05, "loss": 0.6772, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1644 }, { "epoch": 0.552601605106106, "grad_norm": 0.20876623690128326, "learning_rate": 1.9032285063730064e-05, "loss": 0.6772, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1645 }, { "epoch": 0.5529375331335261, "grad_norm": 0.20035113394260406, "learning_rate": 1.9030695469110557e-05, "loss": 0.6933, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1646 }, { "epoch": 0.5532734611609462, "grad_norm": 0.21659909188747406, "learning_rate": 1.9029104636500745e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1647 }, { "epoch": 0.5536093891883664, "grad_norm": 0.24262401461601257, "learning_rate": 1.9027512566118702e-05, "loss": 0.6904, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1648 }, { "epoch": 0.5539453172157865, "grad_norm": 0.1927570104598999, "learning_rate": 1.902591925818269e-05, "loss": 0.6893, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1649 }, { "epoch": 0.5542812452432067, "grad_norm": 0.1934763342142105, "learning_rate": 1.9024324712911127e-05, "loss": 0.6685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1650 }, { "epoch": 0.5546171732706268, "grad_norm": 0.1880747526884079, "learning_rate": 1.9022728930522603e-05, "loss": 0.6897, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1651 }, { "epoch": 0.5549531012980469, "grad_norm": 0.24623467028141022, "learning_rate": 1.9021131911235878e-05, "loss": 0.6634, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1652 }, { "epoch": 0.555289029325467, "grad_norm": 0.23481234908103943, "learning_rate": 1.9019533655269885e-05, "loss": 0.6982, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1653 }, { "epoch": 0.5556249573528872, "grad_norm": 0.17508332431316376, "learning_rate": 1.9017934162843727e-05, "loss": 0.6885, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1654 }, { "epoch": 0.5559608853803073, "grad_norm": 0.1810387223958969, "learning_rate": 1.9016333434176668e-05, "loss": 0.6781, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1655 }, { "epoch": 0.5562968134077274, "grad_norm": 0.20148205757141113, "learning_rate": 1.9014731469488148e-05, "loss": 0.6754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1656 }, { "epoch": 0.5566327414351475, "grad_norm": 0.22217608988285065, "learning_rate": 1.9013128268997775e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1657 }, { "epoch": 0.5569686694625676, "grad_norm": 0.1991013139486313, "learning_rate": 1.901152383292533e-05, "loss": 0.6922, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1658 }, { "epoch": 0.5573045974899877, "grad_norm": 0.17727527022361755, "learning_rate": 1.900991816149076e-05, "loss": 0.6857, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1659 }, { "epoch": 0.557640525517408, "grad_norm": 0.18470142781734467, "learning_rate": 1.9008311254914175e-05, "loss": 0.6929, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1660 }, { "epoch": 0.557976453544828, "grad_norm": 0.20954380929470062, "learning_rate": 1.900670311341587e-05, "loss": 0.7064, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1661 }, { "epoch": 0.5583123815722482, "grad_norm": 0.21556305885314941, "learning_rate": 1.9005093737216294e-05, "loss": 0.6932, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1662 }, { "epoch": 0.5586483095996683, "grad_norm": 0.16899865865707397, "learning_rate": 1.9003483126536076e-05, "loss": 0.6831, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1663 }, { "epoch": 0.5589842376270884, "grad_norm": 0.24527119100093842, "learning_rate": 1.9001871281596004e-05, "loss": 0.6715, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1664 }, { "epoch": 0.5593201656545085, "grad_norm": 0.283232718706131, "learning_rate": 1.9000258202617047e-05, "loss": 0.7122, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1665 }, { "epoch": 0.5596560936819287, "grad_norm": 0.25289666652679443, "learning_rate": 1.899864388982033e-05, "loss": 0.6936, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1666 }, { "epoch": 0.5599920217093488, "grad_norm": 0.1779143363237381, "learning_rate": 1.8997028343427158e-05, "loss": 0.6789, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1667 }, { "epoch": 0.5603279497367689, "grad_norm": 0.2497989684343338, "learning_rate": 1.8995411563659004e-05, "loss": 0.6765, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1668 }, { "epoch": 0.560663877764189, "grad_norm": 0.27632588148117065, "learning_rate": 1.8993793550737507e-05, "loss": 0.6663, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1669 }, { "epoch": 0.5609998057916091, "grad_norm": 0.23591376841068268, "learning_rate": 1.899217430488447e-05, "loss": 0.689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1670 }, { "epoch": 0.5613357338190292, "grad_norm": 0.19311995804309845, "learning_rate": 1.8990553826321876e-05, "loss": 0.6947, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1671 }, { "epoch": 0.5616716618464493, "grad_norm": 0.2507106363773346, "learning_rate": 1.8988932115271868e-05, "loss": 0.7017, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1672 }, { "epoch": 0.5620075898738696, "grad_norm": 0.2758147716522217, "learning_rate": 1.898730917195676e-05, "loss": 0.6771, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1673 }, { "epoch": 0.5623435179012897, "grad_norm": 0.2092687487602234, "learning_rate": 1.8985684996599043e-05, "loss": 0.68, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1674 }, { "epoch": 0.5626794459287098, "grad_norm": 0.2037680745124817, "learning_rate": 1.8984059589421366e-05, "loss": 0.6836, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1675 }, { "epoch": 0.5630153739561299, "grad_norm": 0.2536575496196747, "learning_rate": 1.8982432950646548e-05, "loss": 0.6944, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1676 }, { "epoch": 0.56335130198355, "grad_norm": 0.24373716115951538, "learning_rate": 1.8980805080497583e-05, "loss": 0.7013, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1677 }, { "epoch": 0.5636872300109701, "grad_norm": 0.2397514134645462, "learning_rate": 1.8979175979197634e-05, "loss": 0.7042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1678 }, { "epoch": 0.5640231580383903, "grad_norm": 0.19762028753757477, "learning_rate": 1.8977545646970025e-05, "loss": 0.7065, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1679 }, { "epoch": 0.5643590860658104, "grad_norm": 0.19008877873420715, "learning_rate": 1.8975914084038253e-05, "loss": 0.6997, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1680 }, { "epoch": 0.5646950140932305, "grad_norm": 0.2774099111557007, "learning_rate": 1.8974281290625986e-05, "loss": 0.6882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1681 }, { "epoch": 0.5650309421206506, "grad_norm": 0.23637881875038147, "learning_rate": 1.8972647266957057e-05, "loss": 0.6863, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1682 }, { "epoch": 0.5653668701480707, "grad_norm": 0.18794824182987213, "learning_rate": 1.897101201325547e-05, "loss": 0.6734, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1683 }, { "epoch": 0.5657027981754909, "grad_norm": 0.2459852248430252, "learning_rate": 1.8969375529745403e-05, "loss": 0.6875, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1684 }, { "epoch": 0.5660387262029111, "grad_norm": 0.23894619941711426, "learning_rate": 1.8967737816651186e-05, "loss": 0.6769, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1685 }, { "epoch": 0.5663746542303312, "grad_norm": 0.20968502759933472, "learning_rate": 1.896609887419733e-05, "loss": 0.6877, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1686 }, { "epoch": 0.5667105822577513, "grad_norm": 0.2048633098602295, "learning_rate": 1.896445870260852e-05, "loss": 0.6764, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1687 }, { "epoch": 0.5670465102851714, "grad_norm": 0.18945613503456116, "learning_rate": 1.89628173021096e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1688 }, { "epoch": 0.5673824383125915, "grad_norm": 0.19932080805301666, "learning_rate": 1.896117467292558e-05, "loss": 0.7132, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1689 }, { "epoch": 0.5677183663400116, "grad_norm": 0.2087021917104721, "learning_rate": 1.895953081528164e-05, "loss": 0.694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1690 }, { "epoch": 0.5680542943674318, "grad_norm": 0.20058640837669373, "learning_rate": 1.895788572940314e-05, "loss": 0.7043, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1691 }, { "epoch": 0.5683902223948519, "grad_norm": 0.1672135442495346, "learning_rate": 1.8956239415515597e-05, "loss": 0.68, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1692 }, { "epoch": 0.568726150422272, "grad_norm": 0.17762701213359833, "learning_rate": 1.8954591873844696e-05, "loss": 0.6917, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1693 }, { "epoch": 0.5690620784496921, "grad_norm": 0.18038049340248108, "learning_rate": 1.8952943104616298e-05, "loss": 0.6744, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1694 }, { "epoch": 0.5693980064771123, "grad_norm": 0.1751021295785904, "learning_rate": 1.8951293108056427e-05, "loss": 0.6876, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1695 }, { "epoch": 0.5697339345045324, "grad_norm": 0.18465204536914825, "learning_rate": 1.894964188439127e-05, "loss": 0.6874, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1696 }, { "epoch": 0.5700698625319526, "grad_norm": 0.1882217973470688, "learning_rate": 1.8947989433847195e-05, "loss": 0.6889, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1697 }, { "epoch": 0.5704057905593727, "grad_norm": 0.19508220255374908, "learning_rate": 1.8946335756650728e-05, "loss": 0.68, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1698 }, { "epoch": 0.5707417185867928, "grad_norm": 0.19176779687404633, "learning_rate": 1.8944680853028572e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1699 }, { "epoch": 0.5710776466142129, "grad_norm": 0.18638736009597778, "learning_rate": 1.8943024723207583e-05, "loss": 0.6698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1700 }, { "epoch": 0.571413574641633, "grad_norm": 0.1983148455619812, "learning_rate": 1.89413673674148e-05, "loss": 0.6922, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1701 }, { "epoch": 0.5717495026690531, "grad_norm": 0.20571444928646088, "learning_rate": 1.8939708785877425e-05, "loss": 0.6842, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1702 }, { "epoch": 0.5720854306964733, "grad_norm": 0.15925487875938416, "learning_rate": 1.8938048978822833e-05, "loss": 0.6854, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1703 }, { "epoch": 0.5724213587238934, "grad_norm": 0.1796940416097641, "learning_rate": 1.893638794647855e-05, "loss": 0.679, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1704 }, { "epoch": 0.5727572867513135, "grad_norm": 0.20155984163284302, "learning_rate": 1.8934725689072292e-05, "loss": 0.6628, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1705 }, { "epoch": 0.5730932147787337, "grad_norm": 0.1889120191335678, "learning_rate": 1.8933062206831926e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1706 }, { "epoch": 0.5734291428061538, "grad_norm": 0.17928199470043182, "learning_rate": 1.89313974999855e-05, "loss": 0.6805, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1707 }, { "epoch": 0.5737650708335739, "grad_norm": 0.1973838359117508, "learning_rate": 1.8929731568761215e-05, "loss": 0.7276, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1708 }, { "epoch": 0.5741009988609941, "grad_norm": 0.15852299332618713, "learning_rate": 1.8928064413387458e-05, "loss": 0.6908, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1709 }, { "epoch": 0.5744369268884142, "grad_norm": 0.15766502916812897, "learning_rate": 1.892639603409277e-05, "loss": 0.6814, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1710 }, { "epoch": 0.5747728549158343, "grad_norm": 0.15892161428928375, "learning_rate": 1.8924726431105865e-05, "loss": 0.695, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1711 }, { "epoch": 0.5751087829432544, "grad_norm": 0.17210668325424194, "learning_rate": 1.8923055604655617e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1712 }, { "epoch": 0.5754447109706745, "grad_norm": 0.16826483607292175, "learning_rate": 1.8921383554971086e-05, "loss": 0.6789, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1713 }, { "epoch": 0.5757806389980946, "grad_norm": 0.15550929307937622, "learning_rate": 1.8919710282281477e-05, "loss": 0.6967, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1714 }, { "epoch": 0.5761165670255148, "grad_norm": 0.19900649785995483, "learning_rate": 1.8918035786816182e-05, "loss": 0.6775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1715 }, { "epoch": 0.5764524950529349, "grad_norm": 0.16790708899497986, "learning_rate": 1.891636006880475e-05, "loss": 0.6662, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1716 }, { "epoch": 0.576788423080355, "grad_norm": 0.17633429169654846, "learning_rate": 1.8914683128476897e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1717 }, { "epoch": 0.5771243511077752, "grad_norm": 0.1709427535533905, "learning_rate": 1.8913004966062517e-05, "loss": 0.7237, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1718 }, { "epoch": 0.5774602791351953, "grad_norm": 0.17175304889678955, "learning_rate": 1.8911325581791652e-05, "loss": 0.6922, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1719 }, { "epoch": 0.5777962071626154, "grad_norm": 0.18664473295211792, "learning_rate": 1.8909644975894536e-05, "loss": 0.6831, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1720 }, { "epoch": 0.5781321351900356, "grad_norm": 0.1901276707649231, "learning_rate": 1.890796314860155e-05, "loss": 0.687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1721 }, { "epoch": 0.5784680632174557, "grad_norm": 0.17903174459934235, "learning_rate": 1.8906280100143256e-05, "loss": 0.702, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1722 }, { "epoch": 0.5788039912448758, "grad_norm": 0.190928116440773, "learning_rate": 1.8904595830750377e-05, "loss": 0.6954, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1723 }, { "epoch": 0.5791399192722959, "grad_norm": 0.15625306963920593, "learning_rate": 1.89029103406538e-05, "loss": 0.6762, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1724 }, { "epoch": 0.579475847299716, "grad_norm": 0.1741672158241272, "learning_rate": 1.8901223630084585e-05, "loss": 0.6743, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1725 }, { "epoch": 0.5798117753271361, "grad_norm": 0.15567809343338013, "learning_rate": 1.8899535699273962e-05, "loss": 0.6935, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1726 }, { "epoch": 0.5801477033545562, "grad_norm": 0.19022375345230103, "learning_rate": 1.889784654845332e-05, "loss": 0.7111, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1727 }, { "epoch": 0.5804836313819765, "grad_norm": 0.1764315813779831, "learning_rate": 1.8896156177854222e-05, "loss": 0.6708, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1728 }, { "epoch": 0.5808195594093966, "grad_norm": 0.16486001014709473, "learning_rate": 1.8894464587708398e-05, "loss": 0.6806, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1729 }, { "epoch": 0.5811554874368167, "grad_norm": 0.17097002267837524, "learning_rate": 1.8892771778247733e-05, "loss": 0.6952, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1730 }, { "epoch": 0.5814914154642368, "grad_norm": 0.16449660062789917, "learning_rate": 1.8891077749704298e-05, "loss": 0.6812, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1731 }, { "epoch": 0.5818273434916569, "grad_norm": 0.1913132220506668, "learning_rate": 1.8889382502310324e-05, "loss": 0.6914, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1732 }, { "epoch": 0.582163271519077, "grad_norm": 0.18322235345840454, "learning_rate": 1.8887686036298198e-05, "loss": 0.6738, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1733 }, { "epoch": 0.5824991995464972, "grad_norm": 0.20651252567768097, "learning_rate": 1.8885988351900494e-05, "loss": 0.6928, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1734 }, { "epoch": 0.5828351275739173, "grad_norm": 0.1795605719089508, "learning_rate": 1.8884289449349933e-05, "loss": 0.6849, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1735 }, { "epoch": 0.5831710556013374, "grad_norm": 0.1930541694164276, "learning_rate": 1.8882589328879418e-05, "loss": 0.6625, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1736 }, { "epoch": 0.5835069836287575, "grad_norm": 0.17090748250484467, "learning_rate": 1.888088799072201e-05, "loss": 0.6864, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1737 }, { "epoch": 0.5838429116561776, "grad_norm": 0.1544804871082306, "learning_rate": 1.887918543511094e-05, "loss": 0.6872, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1738 }, { "epoch": 0.5841788396835977, "grad_norm": 0.18659590184688568, "learning_rate": 1.8877481662279613e-05, "loss": 0.6833, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1739 }, { "epoch": 0.584514767711018, "grad_norm": 0.16867046058177948, "learning_rate": 1.8875776672461585e-05, "loss": 0.6745, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1740 }, { "epoch": 0.5848506957384381, "grad_norm": 0.18130789697170258, "learning_rate": 1.8874070465890595e-05, "loss": 0.6796, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1741 }, { "epoch": 0.5851866237658582, "grad_norm": 0.16781260073184967, "learning_rate": 1.8872363042800535e-05, "loss": 0.689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1742 }, { "epoch": 0.5855225517932783, "grad_norm": 0.17756648361682892, "learning_rate": 1.8870654403425475e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1743 }, { "epoch": 0.5858584798206984, "grad_norm": 0.15220077335834503, "learning_rate": 1.8868944547999646e-05, "loss": 0.6735, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1744 }, { "epoch": 0.5861944078481185, "grad_norm": 0.17110206186771393, "learning_rate": 1.8867233476757446e-05, "loss": 0.7125, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1745 }, { "epoch": 0.5865303358755387, "grad_norm": 0.1626214236021042, "learning_rate": 1.8865521189933442e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1746 }, { "epoch": 0.5868662639029588, "grad_norm": 0.14362972974777222, "learning_rate": 1.8863807687762364e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1747 }, { "epoch": 0.5872021919303789, "grad_norm": 0.17425186932086945, "learning_rate": 1.8862092970479114e-05, "loss": 0.6852, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1748 }, { "epoch": 0.587538119957799, "grad_norm": 0.20697705447673798, "learning_rate": 1.886037703831875e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1749 }, { "epoch": 0.5878740479852191, "grad_norm": 0.1699458360671997, "learning_rate": 1.8858659891516516e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1750 }, { "epoch": 0.5882099760126392, "grad_norm": 0.18608300387859344, "learning_rate": 1.88569415303078e-05, "loss": 0.6768, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1751 }, { "epoch": 0.5885459040400595, "grad_norm": 0.20370203256607056, "learning_rate": 1.885522195492817e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1752 }, { "epoch": 0.5888818320674796, "grad_norm": 0.2009270340204239, "learning_rate": 1.885350116561336e-05, "loss": 0.6964, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1753 }, { "epoch": 0.5892177600948997, "grad_norm": 0.2059970498085022, "learning_rate": 1.8851779162599264e-05, "loss": 0.694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1754 }, { "epoch": 0.5895536881223198, "grad_norm": 0.19541947543621063, "learning_rate": 1.885005594612195e-05, "loss": 0.698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1755 }, { "epoch": 0.5898896161497399, "grad_norm": 0.1837528496980667, "learning_rate": 1.8848331516417643e-05, "loss": 0.6996, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1756 }, { "epoch": 0.59022554417716, "grad_norm": 0.1948401778936386, "learning_rate": 1.8846605873722742e-05, "loss": 0.6921, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1757 }, { "epoch": 0.5905614722045802, "grad_norm": 0.20334544777870178, "learning_rate": 1.8844879018273813e-05, "loss": 0.7056, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1758 }, { "epoch": 0.5908974002320003, "grad_norm": 0.1821884959936142, "learning_rate": 1.8843150950307584e-05, "loss": 0.6831, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1759 }, { "epoch": 0.5912333282594204, "grad_norm": 0.2196470946073532, "learning_rate": 1.884142167006095e-05, "loss": 0.6715, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1760 }, { "epoch": 0.5915692562868405, "grad_norm": 0.21598514914512634, "learning_rate": 1.8839691177770973e-05, "loss": 0.6973, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1761 }, { "epoch": 0.5919051843142606, "grad_norm": 0.1872314214706421, "learning_rate": 1.8837959473674877e-05, "loss": 0.7053, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1762 }, { "epoch": 0.5922411123416808, "grad_norm": 0.23151858150959015, "learning_rate": 1.883622655801006e-05, "loss": 0.71, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1763 }, { "epoch": 0.592577040369101, "grad_norm": 0.19166727364063263, "learning_rate": 1.8834492431014083e-05, "loss": 0.684, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1764 }, { "epoch": 0.5929129683965211, "grad_norm": 0.2217445820569992, "learning_rate": 1.883275709292467e-05, "loss": 0.6928, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1765 }, { "epoch": 0.5932488964239412, "grad_norm": 0.2025127410888672, "learning_rate": 1.883102054397971e-05, "loss": 0.6897, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1766 }, { "epoch": 0.5935848244513613, "grad_norm": 0.2128908932209015, "learning_rate": 1.882928278441727e-05, "loss": 0.6752, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1767 }, { "epoch": 0.5939207524787814, "grad_norm": 0.18258699774742126, "learning_rate": 1.8827543814475565e-05, "loss": 0.7012, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1768 }, { "epoch": 0.5942566805062015, "grad_norm": 0.21052925288677216, "learning_rate": 1.8825803634392993e-05, "loss": 0.687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1769 }, { "epoch": 0.5945926085336217, "grad_norm": 0.2500614821910858, "learning_rate": 1.8824062244408105e-05, "loss": 0.691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1770 }, { "epoch": 0.5949285365610418, "grad_norm": 0.24490578472614288, "learning_rate": 1.8822319644759622e-05, "loss": 0.6795, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1771 }, { "epoch": 0.5952644645884619, "grad_norm": 0.16830392181873322, "learning_rate": 1.8820575835686435e-05, "loss": 0.6983, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1772 }, { "epoch": 0.595600392615882, "grad_norm": 0.16275399923324585, "learning_rate": 1.8818830817427596e-05, "loss": 0.6901, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1773 }, { "epoch": 0.5959363206433022, "grad_norm": 0.21874119341373444, "learning_rate": 1.881708459022232e-05, "loss": 0.7027, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1774 }, { "epoch": 0.5962722486707223, "grad_norm": 0.2294093519449234, "learning_rate": 1.8815337154310003e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1775 }, { "epoch": 0.5966081766981425, "grad_norm": 0.21608346700668335, "learning_rate": 1.8813588509930185e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1776 }, { "epoch": 0.5969441047255626, "grad_norm": 0.19509248435497284, "learning_rate": 1.8811838657322584e-05, "loss": 0.7033, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1777 }, { "epoch": 0.5972800327529827, "grad_norm": 0.20145680010318756, "learning_rate": 1.8810087596727087e-05, "loss": 0.691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1778 }, { "epoch": 0.5976159607804028, "grad_norm": 0.25379833579063416, "learning_rate": 1.8808335328383737e-05, "loss": 0.6913, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1779 }, { "epoch": 0.5979518888078229, "grad_norm": 0.21683110296726227, "learning_rate": 1.8806581852532748e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1780 }, { "epoch": 0.598287816835243, "grad_norm": 0.16603609919548035, "learning_rate": 1.8804827169414498e-05, "loss": 0.7018, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1781 }, { "epoch": 0.5986237448626631, "grad_norm": 0.1930864453315735, "learning_rate": 1.8803071279269534e-05, "loss": 0.6947, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1782 }, { "epoch": 0.5989596728900833, "grad_norm": 0.20336216688156128, "learning_rate": 1.8801314182338564e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1783 }, { "epoch": 0.5992956009175034, "grad_norm": 0.2302026003599167, "learning_rate": 1.8799555878862462e-05, "loss": 0.6652, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1784 }, { "epoch": 0.5996315289449236, "grad_norm": 0.18300718069076538, "learning_rate": 1.8797796369082268e-05, "loss": 0.6866, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1785 }, { "epoch": 0.5999674569723437, "grad_norm": 0.18634699285030365, "learning_rate": 1.879603565323919e-05, "loss": 0.6969, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1786 }, { "epoch": 0.6003033849997638, "grad_norm": 0.19631671905517578, "learning_rate": 1.8794273731574598e-05, "loss": 0.6854, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1787 }, { "epoch": 0.6006393130271839, "grad_norm": 0.2100342959165573, "learning_rate": 1.879251060433003e-05, "loss": 0.657, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1788 }, { "epoch": 0.6009752410546041, "grad_norm": 0.16150042414665222, "learning_rate": 1.8790746271747186e-05, "loss": 0.7004, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1789 }, { "epoch": 0.6013111690820242, "grad_norm": 0.17783187329769135, "learning_rate": 1.8788980734067932e-05, "loss": 0.6741, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1790 }, { "epoch": 0.6016470971094443, "grad_norm": 0.15114320814609528, "learning_rate": 1.8787213991534302e-05, "loss": 0.7089, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1791 }, { "epoch": 0.6019830251368644, "grad_norm": 0.1658099740743637, "learning_rate": 1.878544604438849e-05, "loss": 0.6907, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1792 }, { "epoch": 0.6023189531642845, "grad_norm": 0.18677948415279388, "learning_rate": 1.8783676892872865e-05, "loss": 0.691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1793 }, { "epoch": 0.6026548811917046, "grad_norm": 0.20348158478736877, "learning_rate": 1.8781906537229946e-05, "loss": 0.6775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1794 }, { "epoch": 0.6029908092191248, "grad_norm": 0.17317348718643188, "learning_rate": 1.8780134977702433e-05, "loss": 0.7043, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1795 }, { "epoch": 0.603326737246545, "grad_norm": 0.23411837220191956, "learning_rate": 1.877836221453318e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1796 }, { "epoch": 0.6036626652739651, "grad_norm": 0.20034384727478027, "learning_rate": 1.8776588247965212e-05, "loss": 0.6665, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1797 }, { "epoch": 0.6039985933013852, "grad_norm": 0.1900482326745987, "learning_rate": 1.8774813078241715e-05, "loss": 0.6838, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1798 }, { "epoch": 0.6043345213288053, "grad_norm": 0.18524214625358582, "learning_rate": 1.877303670560604e-05, "loss": 0.6828, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1799 }, { "epoch": 0.6046704493562254, "grad_norm": 0.1753096580505371, "learning_rate": 1.8771259130301706e-05, "loss": 0.7143, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1800 }, { "epoch": 0.6050063773836456, "grad_norm": 0.2064599245786667, "learning_rate": 1.87694803525724e-05, "loss": 0.686, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1801 }, { "epoch": 0.6053423054110657, "grad_norm": 0.16747163236141205, "learning_rate": 1.876770037266196e-05, "loss": 0.6838, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1802 }, { "epoch": 0.6056782334384858, "grad_norm": 0.23209406435489655, "learning_rate": 1.8765919190814403e-05, "loss": 0.6879, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1803 }, { "epoch": 0.6060141614659059, "grad_norm": 0.1828191727399826, "learning_rate": 1.8764136807273906e-05, "loss": 0.6965, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1804 }, { "epoch": 0.606350089493326, "grad_norm": 0.23373225331306458, "learning_rate": 1.8762353222284813e-05, "loss": 0.7214, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1805 }, { "epoch": 0.6066860175207461, "grad_norm": 0.17375865578651428, "learning_rate": 1.8760568436091623e-05, "loss": 0.6841, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1806 }, { "epoch": 0.6070219455481664, "grad_norm": 0.18453386425971985, "learning_rate": 1.8758782448939014e-05, "loss": 0.6683, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1807 }, { "epoch": 0.6073578735755865, "grad_norm": 0.17521442472934723, "learning_rate": 1.8756995261071814e-05, "loss": 0.6882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1808 }, { "epoch": 0.6076938016030066, "grad_norm": 0.17214451730251312, "learning_rate": 1.8755206872735033e-05, "loss": 0.6701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1809 }, { "epoch": 0.6080297296304267, "grad_norm": 0.16550898551940918, "learning_rate": 1.8753417284173827e-05, "loss": 0.6921, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1810 }, { "epoch": 0.6083656576578468, "grad_norm": 0.17696690559387207, "learning_rate": 1.875162649563353e-05, "loss": 0.6807, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1811 }, { "epoch": 0.6087015856852669, "grad_norm": 0.16586963832378387, "learning_rate": 1.874983450735963e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1812 }, { "epoch": 0.6090375137126871, "grad_norm": 0.1676538586616516, "learning_rate": 1.8748041319597796e-05, "loss": 0.6989, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1813 }, { "epoch": 0.6093734417401072, "grad_norm": 0.1557154357433319, "learning_rate": 1.8746246932593842e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1814 }, { "epoch": 0.6097093697675273, "grad_norm": 0.17898768186569214, "learning_rate": 1.8744451346593755e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 1815 }, { "epoch": 0.6100452977949474, "grad_norm": 0.14287227392196655, "learning_rate": 1.874265456184369e-05, "loss": 0.6876, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1816 }, { "epoch": 0.6103812258223675, "grad_norm": 0.17136920988559723, "learning_rate": 1.874085657858996e-05, "loss": 0.6819, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1817 }, { "epoch": 0.6107171538497876, "grad_norm": 0.15871772170066833, "learning_rate": 1.8739057397079044e-05, "loss": 0.665, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1818 }, { "epoch": 0.6110530818772079, "grad_norm": 0.17326702177524567, "learning_rate": 1.8737257017557587e-05, "loss": 0.6935, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1819 }, { "epoch": 0.611389009904628, "grad_norm": 0.16140910983085632, "learning_rate": 1.8735455440272402e-05, "loss": 0.6937, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1820 }, { "epoch": 0.6117249379320481, "grad_norm": 0.16829454898834229, "learning_rate": 1.873365266547046e-05, "loss": 0.6864, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1821 }, { "epoch": 0.6120608659594682, "grad_norm": 0.1774343103170395, "learning_rate": 1.8731848693398894e-05, "loss": 0.6697, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1822 }, { "epoch": 0.6123967939868883, "grad_norm": 0.19891831278800964, "learning_rate": 1.8730043524305008e-05, "loss": 0.7012, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 1823 }, { "epoch": 0.6127327220143084, "grad_norm": 0.21063485741615295, "learning_rate": 1.8728237158436265e-05, "loss": 0.7106, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1824 }, { "epoch": 0.6130686500417286, "grad_norm": 0.15672187507152557, "learning_rate": 1.8726429596040297e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1825 }, { "epoch": 0.6134045780691487, "grad_norm": 0.20255352556705475, "learning_rate": 1.8724620837364895e-05, "loss": 0.6962, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1826 }, { "epoch": 0.6137405060965688, "grad_norm": 0.1940629482269287, "learning_rate": 1.8722810882658022e-05, "loss": 0.7109, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1827 }, { "epoch": 0.6140764341239889, "grad_norm": 0.20297513902187347, "learning_rate": 1.8720999732167785e-05, "loss": 0.6762, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1828 }, { "epoch": 0.614412362151409, "grad_norm": 0.15779969096183777, "learning_rate": 1.871918738614248e-05, "loss": 0.6879, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1829 }, { "epoch": 0.6147482901788291, "grad_norm": 0.19083496928215027, "learning_rate": 1.8717373844830557e-05, "loss": 0.6893, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1830 }, { "epoch": 0.6150842182062493, "grad_norm": 0.15591786801815033, "learning_rate": 1.8715559108480626e-05, "loss": 0.6924, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1831 }, { "epoch": 0.6154201462336695, "grad_norm": 0.15654416382312775, "learning_rate": 1.8713743177341466e-05, "loss": 0.6978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1832 }, { "epoch": 0.6157560742610896, "grad_norm": 0.16384129226207733, "learning_rate": 1.871192605166201e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1833 }, { "epoch": 0.6160920022885097, "grad_norm": 0.17706811428070068, "learning_rate": 1.8710107731691368e-05, "loss": 0.69, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1834 }, { "epoch": 0.6164279303159298, "grad_norm": 0.1596466600894928, "learning_rate": 1.8708288217678806e-05, "loss": 0.6967, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1835 }, { "epoch": 0.6167638583433499, "grad_norm": 0.18994903564453125, "learning_rate": 1.870646750987376e-05, "loss": 0.6898, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1836 }, { "epoch": 0.61709978637077, "grad_norm": 0.19478839635849, "learning_rate": 1.870464560852582e-05, "loss": 0.6861, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1837 }, { "epoch": 0.6174357143981902, "grad_norm": 0.16658814251422882, "learning_rate": 1.8702822513884745e-05, "loss": 0.6852, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1838 }, { "epoch": 0.6177716424256103, "grad_norm": 0.20021116733551025, "learning_rate": 1.870099822620046e-05, "loss": 0.6987, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1839 }, { "epoch": 0.6181075704530304, "grad_norm": 0.18175658583641052, "learning_rate": 1.8699172745723052e-05, "loss": 0.6748, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1840 }, { "epoch": 0.6184434984804505, "grad_norm": 0.17747937142848969, "learning_rate": 1.8697346072702766e-05, "loss": 0.6753, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.91, "memory/max_allocated (GiB)": 63.91, "step": 1841 }, { "epoch": 0.6187794265078707, "grad_norm": 0.21870142221450806, "learning_rate": 1.869551820739002e-05, "loss": 0.6782, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1842 }, { "epoch": 0.6191153545352908, "grad_norm": 0.19858503341674805, "learning_rate": 1.8693689150035387e-05, "loss": 0.6769, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1843 }, { "epoch": 0.619451282562711, "grad_norm": 0.20212040841579437, "learning_rate": 1.8691858900889607e-05, "loss": 0.6823, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1844 }, { "epoch": 0.6197872105901311, "grad_norm": 0.17945465445518494, "learning_rate": 1.8690027460203584e-05, "loss": 0.7016, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1845 }, { "epoch": 0.6201231386175512, "grad_norm": 0.1616770476102829, "learning_rate": 1.8688194828228388e-05, "loss": 0.6941, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1846 }, { "epoch": 0.6204590666449713, "grad_norm": 0.18776434659957886, "learning_rate": 1.868636100521524e-05, "loss": 0.6752, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1847 }, { "epoch": 0.6207949946723914, "grad_norm": 0.17003680765628815, "learning_rate": 1.8684525991415543e-05, "loss": 0.6915, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1848 }, { "epoch": 0.6211309226998115, "grad_norm": 0.1717095822095871, "learning_rate": 1.868268978708085e-05, "loss": 0.688, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1849 }, { "epoch": 0.6214668507272317, "grad_norm": 0.19505982100963593, "learning_rate": 1.868085239246288e-05, "loss": 0.7083, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1850 }, { "epoch": 0.6218027787546518, "grad_norm": 0.17121922969818115, "learning_rate": 1.867901380781351e-05, "loss": 0.7042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1851 }, { "epoch": 0.622138706782072, "grad_norm": 0.15487492084503174, "learning_rate": 1.8677174033384794e-05, "loss": 0.7062, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1852 }, { "epoch": 0.6224746348094921, "grad_norm": 0.1751570999622345, "learning_rate": 1.8675333069428944e-05, "loss": 0.6858, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1853 }, { "epoch": 0.6228105628369122, "grad_norm": 0.1708514243364334, "learning_rate": 1.867349091619832e-05, "loss": 0.6633, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1854 }, { "epoch": 0.6231464908643323, "grad_norm": 0.2106497585773468, "learning_rate": 1.8671647573945467e-05, "loss": 0.6834, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1855 }, { "epoch": 0.6234824188917525, "grad_norm": 0.16231974959373474, "learning_rate": 1.866980304292308e-05, "loss": 0.7056, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1856 }, { "epoch": 0.6238183469191726, "grad_norm": 0.19034039974212646, "learning_rate": 1.8667957323384017e-05, "loss": 0.692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1857 }, { "epoch": 0.6241542749465927, "grad_norm": 0.22089827060699463, "learning_rate": 1.8666110415581308e-05, "loss": 0.682, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1858 }, { "epoch": 0.6244902029740128, "grad_norm": 0.1695169359445572, "learning_rate": 1.8664262319768134e-05, "loss": 0.6903, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1859 }, { "epoch": 0.6248261310014329, "grad_norm": 0.19894210994243622, "learning_rate": 1.866241303619785e-05, "loss": 0.7026, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1860 }, { "epoch": 0.625162059028853, "grad_norm": 0.20747967064380646, "learning_rate": 1.8660562565123962e-05, "loss": 0.6939, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1861 }, { "epoch": 0.6254979870562732, "grad_norm": 0.20176073908805847, "learning_rate": 1.8658710906800153e-05, "loss": 0.7003, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1862 }, { "epoch": 0.6258339150836933, "grad_norm": 0.17110425233840942, "learning_rate": 1.865685806148026e-05, "loss": 0.6719, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1863 }, { "epoch": 0.6261698431111135, "grad_norm": 0.21454089879989624, "learning_rate": 1.865500402941828e-05, "loss": 0.6887, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1864 }, { "epoch": 0.6265057711385336, "grad_norm": 0.18830138444900513, "learning_rate": 1.865314881086838e-05, "loss": 0.6914, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1865 }, { "epoch": 0.6268416991659537, "grad_norm": 0.1771438866853714, "learning_rate": 1.865129240608488e-05, "loss": 0.6812, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1866 }, { "epoch": 0.6271776271933738, "grad_norm": 0.157863587141037, "learning_rate": 1.8649434815322278e-05, "loss": 0.68, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1867 }, { "epoch": 0.627513555220794, "grad_norm": 0.18498072028160095, "learning_rate": 1.864757603883522e-05, "loss": 0.7105, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1868 }, { "epoch": 0.6278494832482141, "grad_norm": 0.1759609580039978, "learning_rate": 1.8645716076878517e-05, "loss": 0.6782, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1869 }, { "epoch": 0.6281854112756342, "grad_norm": 0.1892760545015335, "learning_rate": 1.864385492970715e-05, "loss": 0.6888, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1870 }, { "epoch": 0.6285213393030543, "grad_norm": 0.17111992835998535, "learning_rate": 1.864199259757626e-05, "loss": 0.6785, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1871 }, { "epoch": 0.6288572673304744, "grad_norm": 0.18400414288043976, "learning_rate": 1.8640129080741146e-05, "loss": 0.7053, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1872 }, { "epoch": 0.6291931953578945, "grad_norm": 0.17503906786441803, "learning_rate": 1.8638264379457268e-05, "loss": 0.6943, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1873 }, { "epoch": 0.6295291233853147, "grad_norm": 0.20198848843574524, "learning_rate": 1.8636398493980258e-05, "loss": 0.6981, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1874 }, { "epoch": 0.6298650514127349, "grad_norm": 0.19911038875579834, "learning_rate": 1.86345314245659e-05, "loss": 0.6998, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1875 }, { "epoch": 0.630200979440155, "grad_norm": 0.16210822761058807, "learning_rate": 1.863266317147015e-05, "loss": 0.6975, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1876 }, { "epoch": 0.6305369074675751, "grad_norm": 0.21089531481266022, "learning_rate": 1.8630793734949115e-05, "loss": 0.6776, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1877 }, { "epoch": 0.6308728354949952, "grad_norm": 0.21675601601600647, "learning_rate": 1.8628923115259073e-05, "loss": 0.6794, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1878 }, { "epoch": 0.6312087635224153, "grad_norm": 0.19584503769874573, "learning_rate": 1.8627051312656462e-05, "loss": 0.6669, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1879 }, { "epoch": 0.6315446915498355, "grad_norm": 0.1708870679140091, "learning_rate": 1.862517832739788e-05, "loss": 0.6729, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1880 }, { "epoch": 0.6318806195772556, "grad_norm": 0.20046943426132202, "learning_rate": 1.8623304159740093e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1881 }, { "epoch": 0.6322165476046757, "grad_norm": 0.2257988452911377, "learning_rate": 1.8621428809940024e-05, "loss": 0.6762, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1882 }, { "epoch": 0.6325524756320958, "grad_norm": 0.21841472387313843, "learning_rate": 1.861955227825475e-05, "loss": 0.6818, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1883 }, { "epoch": 0.6328884036595159, "grad_norm": 0.18061350286006927, "learning_rate": 1.861767456494153e-05, "loss": 0.6807, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1884 }, { "epoch": 0.633224331686936, "grad_norm": 0.20002758502960205, "learning_rate": 1.861579567025777e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1885 }, { "epoch": 0.6335602597143561, "grad_norm": 0.20995399355888367, "learning_rate": 1.8613915594461044e-05, "loss": 0.6992, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1886 }, { "epoch": 0.6338961877417764, "grad_norm": 0.2011539340019226, "learning_rate": 1.861203433780908e-05, "loss": 0.7103, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1887 }, { "epoch": 0.6342321157691965, "grad_norm": 0.2481609433889389, "learning_rate": 1.8610151900559778e-05, "loss": 0.6686, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1888 }, { "epoch": 0.6345680437966166, "grad_norm": 0.19610139727592468, "learning_rate": 1.86082682829712e-05, "loss": 0.6911, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1889 }, { "epoch": 0.6349039718240367, "grad_norm": 0.2118106633424759, "learning_rate": 1.8606383485301553e-05, "loss": 0.6982, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1890 }, { "epoch": 0.6352398998514568, "grad_norm": 0.20777934789657593, "learning_rate": 1.8604497507809228e-05, "loss": 0.6935, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1891 }, { "epoch": 0.6355758278788769, "grad_norm": 0.1977759152650833, "learning_rate": 1.8602610350752767e-05, "loss": 0.672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1892 }, { "epoch": 0.6359117559062971, "grad_norm": 0.19525784254074097, "learning_rate": 1.860072201439087e-05, "loss": 0.676, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1893 }, { "epoch": 0.6362476839337172, "grad_norm": 0.21267564594745636, "learning_rate": 1.8598832498982406e-05, "loss": 0.703, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1894 }, { "epoch": 0.6365836119611373, "grad_norm": 0.19754306972026825, "learning_rate": 1.8596941804786403e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1895 }, { "epoch": 0.6369195399885574, "grad_norm": 0.16438494622707367, "learning_rate": 1.8595049932062053e-05, "loss": 0.7, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1896 }, { "epoch": 0.6372554680159775, "grad_norm": 0.19665424525737762, "learning_rate": 1.8593156881068703e-05, "loss": 0.6912, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1897 }, { "epoch": 0.6375913960433977, "grad_norm": 0.1799023449420929, "learning_rate": 1.8591262652065867e-05, "loss": 0.7012, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1898 }, { "epoch": 0.6379273240708179, "grad_norm": 0.1956806778907776, "learning_rate": 1.858936724531322e-05, "loss": 0.6755, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1899 }, { "epoch": 0.638263252098238, "grad_norm": 0.1998363733291626, "learning_rate": 1.8587470661070593e-05, "loss": 0.6943, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1900 }, { "epoch": 0.6385991801256581, "grad_norm": 0.20190270245075226, "learning_rate": 1.8585572899597986e-05, "loss": 0.6775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1901 }, { "epoch": 0.6389351081530782, "grad_norm": 0.1849663257598877, "learning_rate": 1.8583673961155562e-05, "loss": 0.6904, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1902 }, { "epoch": 0.6392710361804983, "grad_norm": 0.19452545046806335, "learning_rate": 1.8581773846003638e-05, "loss": 0.7019, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1903 }, { "epoch": 0.6396069642079184, "grad_norm": 0.19488267600536346, "learning_rate": 1.8579872554402686e-05, "loss": 0.7043, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1904 }, { "epoch": 0.6399428922353386, "grad_norm": 0.16584494709968567, "learning_rate": 1.857797008661336e-05, "loss": 0.7015, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1905 }, { "epoch": 0.6402788202627587, "grad_norm": 0.18828211724758148, "learning_rate": 1.8576066442896456e-05, "loss": 0.675, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1906 }, { "epoch": 0.6406147482901788, "grad_norm": 0.17334984242916107, "learning_rate": 1.8574161623512946e-05, "loss": 0.6727, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1907 }, { "epoch": 0.6409506763175989, "grad_norm": 0.174107626080513, "learning_rate": 1.8572255628723946e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1908 }, { "epoch": 0.641286604345019, "grad_norm": 0.18062277138233185, "learning_rate": 1.8570348458790753e-05, "loss": 0.6996, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1909 }, { "epoch": 0.6416225323724392, "grad_norm": 0.1749410331249237, "learning_rate": 1.8568440113974808e-05, "loss": 0.7138, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1910 }, { "epoch": 0.6419584603998594, "grad_norm": 0.18544788658618927, "learning_rate": 1.8566530594537723e-05, "loss": 0.6935, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1911 }, { "epoch": 0.6422943884272795, "grad_norm": 0.17909856140613556, "learning_rate": 1.8564619900741267e-05, "loss": 0.6865, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1912 }, { "epoch": 0.6426303164546996, "grad_norm": 0.18915507197380066, "learning_rate": 1.8562708032847375e-05, "loss": 0.6935, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1913 }, { "epoch": 0.6429662444821197, "grad_norm": 0.19410815834999084, "learning_rate": 1.8560794991118134e-05, "loss": 0.6736, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1914 }, { "epoch": 0.6433021725095398, "grad_norm": 0.200113907456398, "learning_rate": 1.8558880775815797e-05, "loss": 0.6803, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1915 }, { "epoch": 0.6436381005369599, "grad_norm": 0.1930086314678192, "learning_rate": 1.855696538720278e-05, "loss": 0.6945, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1916 }, { "epoch": 0.6439740285643801, "grad_norm": 0.1669212132692337, "learning_rate": 1.855504882554166e-05, "loss": 0.6839, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 1917 }, { "epoch": 0.6443099565918002, "grad_norm": 0.20450827479362488, "learning_rate": 1.8553131091095173e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1918 }, { "epoch": 0.6446458846192203, "grad_norm": 0.1714690774679184, "learning_rate": 1.855121218412621e-05, "loss": 0.6797, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1919 }, { "epoch": 0.6449818126466405, "grad_norm": 0.19271785020828247, "learning_rate": 1.8549292104897828e-05, "loss": 0.6992, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1920 }, { "epoch": 0.6453177406740606, "grad_norm": 0.15931448340415955, "learning_rate": 1.854737085367325e-05, "loss": 0.677, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1921 }, { "epoch": 0.6456536687014807, "grad_norm": 0.20557136833667755, "learning_rate": 1.8545448430715852e-05, "loss": 0.6886, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1922 }, { "epoch": 0.6459895967289009, "grad_norm": 0.19275924563407898, "learning_rate": 1.854352483628917e-05, "loss": 0.6601, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1923 }, { "epoch": 0.646325524756321, "grad_norm": 0.17794351279735565, "learning_rate": 1.854160007065691e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1924 }, { "epoch": 0.6466614527837411, "grad_norm": 0.15948468446731567, "learning_rate": 1.8539674134082927e-05, "loss": 0.6936, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1925 }, { "epoch": 0.6469973808111612, "grad_norm": 0.16430293023586273, "learning_rate": 1.8537747026831245e-05, "loss": 0.7048, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1926 }, { "epoch": 0.6473333088385813, "grad_norm": 0.1836216002702713, "learning_rate": 1.8535818749166038e-05, "loss": 0.6907, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1927 }, { "epoch": 0.6476692368660014, "grad_norm": 0.1590539813041687, "learning_rate": 1.8533889301351658e-05, "loss": 0.6911, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1928 }, { "epoch": 0.6480051648934216, "grad_norm": 0.1610390543937683, "learning_rate": 1.85319586836526e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1929 }, { "epoch": 0.6483410929208417, "grad_norm": 0.17949345707893372, "learning_rate": 1.8530026896333533e-05, "loss": 0.6861, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1930 }, { "epoch": 0.6486770209482619, "grad_norm": 0.15841178596019745, "learning_rate": 1.8528093939659274e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1931 }, { "epoch": 0.649012948975682, "grad_norm": 0.18386036157608032, "learning_rate": 1.8526159813894806e-05, "loss": 0.6945, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1932 }, { "epoch": 0.6493488770031021, "grad_norm": 0.1749316304922104, "learning_rate": 1.8524224519305275e-05, "loss": 0.6968, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1933 }, { "epoch": 0.6496848050305222, "grad_norm": 0.18962568044662476, "learning_rate": 1.8522288056155985e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1934 }, { "epoch": 0.6500207330579424, "grad_norm": 0.19381685554981232, "learning_rate": 1.8520350424712396e-05, "loss": 0.6997, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1935 }, { "epoch": 0.6503566610853625, "grad_norm": 0.17297117412090302, "learning_rate": 1.8518411625240137e-05, "loss": 0.6798, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1936 }, { "epoch": 0.6506925891127826, "grad_norm": 0.2025928795337677, "learning_rate": 1.8516471658004987e-05, "loss": 0.6712, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 1937 }, { "epoch": 0.6510285171402027, "grad_norm": 0.20263047516345978, "learning_rate": 1.851453052327289e-05, "loss": 0.6888, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1938 }, { "epoch": 0.6513644451676228, "grad_norm": 0.2033727467060089, "learning_rate": 1.851258822130996e-05, "loss": 0.6938, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1939 }, { "epoch": 0.6517003731950429, "grad_norm": 0.1978379786014557, "learning_rate": 1.851064475238245e-05, "loss": 0.6724, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1940 }, { "epoch": 0.652036301222463, "grad_norm": 0.2782594859600067, "learning_rate": 1.8508700116756788e-05, "loss": 0.7077, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1941 }, { "epoch": 0.6523722292498833, "grad_norm": 0.20394806563854218, "learning_rate": 1.8506754314699558e-05, "loss": 0.6906, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1942 }, { "epoch": 0.6527081572773034, "grad_norm": 0.21546640992164612, "learning_rate": 1.8504807346477508e-05, "loss": 0.6776, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1943 }, { "epoch": 0.6530440853047235, "grad_norm": 0.21987834572792053, "learning_rate": 1.8502859212357536e-05, "loss": 0.6712, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1944 }, { "epoch": 0.6533800133321436, "grad_norm": 0.2210492342710495, "learning_rate": 1.8500909912606707e-05, "loss": 0.6978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1945 }, { "epoch": 0.6537159413595637, "grad_norm": 0.17487424612045288, "learning_rate": 1.8498959447492244e-05, "loss": 0.7002, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1946 }, { "epoch": 0.6540518693869838, "grad_norm": 0.20245526731014252, "learning_rate": 1.8497007817281537e-05, "loss": 0.6755, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1947 }, { "epoch": 0.654387797414404, "grad_norm": 0.20469547808170319, "learning_rate": 1.8495055022242118e-05, "loss": 0.6785, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1948 }, { "epoch": 0.6547237254418241, "grad_norm": 0.22830277681350708, "learning_rate": 1.8493101062641702e-05, "loss": 0.7007, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 62.87, "memory/max_allocated (GiB)": 62.87, "step": 1949 }, { "epoch": 0.6550596534692442, "grad_norm": 0.21698667109012604, "learning_rate": 1.849114593874814e-05, "loss": 0.6859, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1950 }, { "epoch": 0.6553955814966643, "grad_norm": 0.26702556014060974, "learning_rate": 1.8489189650829456e-05, "loss": 0.692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1951 }, { "epoch": 0.6557315095240844, "grad_norm": 0.19126519560813904, "learning_rate": 1.848723219915384e-05, "loss": 0.7038, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1952 }, { "epoch": 0.6560674375515045, "grad_norm": 0.21519999206066132, "learning_rate": 1.8485273583989624e-05, "loss": 0.6841, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1953 }, { "epoch": 0.6564033655789248, "grad_norm": 0.2056800127029419, "learning_rate": 1.8483313805605313e-05, "loss": 0.6829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1954 }, { "epoch": 0.6567392936063449, "grad_norm": 0.1807548999786377, "learning_rate": 1.8481352864269567e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1955 }, { "epoch": 0.657075221633765, "grad_norm": 0.16633407771587372, "learning_rate": 1.8479390760251204e-05, "loss": 0.6702, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1956 }, { "epoch": 0.6574111496611851, "grad_norm": 0.2042681872844696, "learning_rate": 1.84774274938192e-05, "loss": 0.6822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1957 }, { "epoch": 0.6577470776886052, "grad_norm": 0.19341742992401123, "learning_rate": 1.8475463065242698e-05, "loss": 0.6712, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1958 }, { "epoch": 0.6580830057160253, "grad_norm": 0.1701231747865677, "learning_rate": 1.8473497474790997e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1959 }, { "epoch": 0.6584189337434455, "grad_norm": 0.19508136808872223, "learning_rate": 1.8471530722733545e-05, "loss": 0.7022, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1960 }, { "epoch": 0.6587548617708656, "grad_norm": 0.1911623179912567, "learning_rate": 1.8469562809339964e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1961 }, { "epoch": 0.6590907897982857, "grad_norm": 0.18700528144836426, "learning_rate": 1.846759373488003e-05, "loss": 0.6948, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1962 }, { "epoch": 0.6594267178257058, "grad_norm": 0.1701691448688507, "learning_rate": 1.8465623499623676e-05, "loss": 0.6872, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1963 }, { "epoch": 0.6597626458531259, "grad_norm": 0.1701321005821228, "learning_rate": 1.8463652103840997e-05, "loss": 0.6776, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1964 }, { "epoch": 0.660098573880546, "grad_norm": 0.16564571857452393, "learning_rate": 1.8461679547802244e-05, "loss": 0.7032, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1965 }, { "epoch": 0.6604345019079663, "grad_norm": 0.16671985387802124, "learning_rate": 1.8459705831777825e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1966 }, { "epoch": 0.6607704299353864, "grad_norm": 0.1705619990825653, "learning_rate": 1.845773095603832e-05, "loss": 0.6874, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1967 }, { "epoch": 0.6611063579628065, "grad_norm": 0.1770312637090683, "learning_rate": 1.8455754920854448e-05, "loss": 0.6668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1968 }, { "epoch": 0.6614422859902266, "grad_norm": 0.19639846682548523, "learning_rate": 1.8453777726497103e-05, "loss": 0.6792, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1969 }, { "epoch": 0.6617782140176467, "grad_norm": 0.16575050354003906, "learning_rate": 1.8451799373237332e-05, "loss": 0.6869, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1970 }, { "epoch": 0.6621141420450668, "grad_norm": 0.17771001160144806, "learning_rate": 1.8449819861346347e-05, "loss": 0.704, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1971 }, { "epoch": 0.662450070072487, "grad_norm": 0.1484578549861908, "learning_rate": 1.8447839191095505e-05, "loss": 0.6743, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1972 }, { "epoch": 0.6627859980999071, "grad_norm": 0.1456988900899887, "learning_rate": 1.844585736275633e-05, "loss": 0.6817, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1973 }, { "epoch": 0.6631219261273272, "grad_norm": 0.16559912264347076, "learning_rate": 1.8443874376600508e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1974 }, { "epoch": 0.6634578541547473, "grad_norm": 0.1518491953611374, "learning_rate": 1.844189023289988e-05, "loss": 0.6857, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1975 }, { "epoch": 0.6637937821821674, "grad_norm": 0.15849937498569489, "learning_rate": 1.843990493192645e-05, "loss": 0.6644, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 1976 }, { "epoch": 0.6641297102095876, "grad_norm": 0.1550321877002716, "learning_rate": 1.843791847395237e-05, "loss": 0.6856, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1977 }, { "epoch": 0.6644656382370078, "grad_norm": 0.17623160779476166, "learning_rate": 1.843593085924996e-05, "loss": 0.6858, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1978 }, { "epoch": 0.6648015662644279, "grad_norm": 0.18678979575634003, "learning_rate": 1.8433942088091696e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1979 }, { "epoch": 0.665137494291848, "grad_norm": 0.17743723094463348, "learning_rate": 1.8431952160750215e-05, "loss": 0.6545, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1980 }, { "epoch": 0.6654734223192681, "grad_norm": 0.18588578701019287, "learning_rate": 1.842996107749831e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1981 }, { "epoch": 0.6658093503466882, "grad_norm": 0.205877885222435, "learning_rate": 1.8427968838608926e-05, "loss": 0.7033, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 1982 }, { "epoch": 0.6661452783741083, "grad_norm": 0.17739476263523102, "learning_rate": 1.8425975444355176e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1983 }, { "epoch": 0.6664812064015285, "grad_norm": 0.1830516904592514, "learning_rate": 1.842398089501033e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1984 }, { "epoch": 0.6668171344289486, "grad_norm": 0.1803082674741745, "learning_rate": 1.842198519084782e-05, "loss": 0.6898, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1985 }, { "epoch": 0.6671530624563687, "grad_norm": 0.19893908500671387, "learning_rate": 1.841998833214122e-05, "loss": 0.704, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1986 }, { "epoch": 0.6674889904837888, "grad_norm": 0.17574958503246307, "learning_rate": 1.841799031916428e-05, "loss": 0.6827, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1987 }, { "epoch": 0.667824918511209, "grad_norm": 0.18279321491718292, "learning_rate": 1.84159911521909e-05, "loss": 0.6763, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1988 }, { "epoch": 0.6681608465386291, "grad_norm": 0.15140925347805023, "learning_rate": 1.841399083149514e-05, "loss": 0.6648, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1989 }, { "epoch": 0.6684967745660493, "grad_norm": 0.2002575695514679, "learning_rate": 1.841198935735122e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1990 }, { "epoch": 0.6688327025934694, "grad_norm": 0.20425546169281006, "learning_rate": 1.840998673003351e-05, "loss": 0.683, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1991 }, { "epoch": 0.6691686306208895, "grad_norm": 0.16014505922794342, "learning_rate": 1.8407982949816548e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1992 }, { "epoch": 0.6695045586483096, "grad_norm": 0.18232354521751404, "learning_rate": 1.8405978016975024e-05, "loss": 0.6953, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1993 }, { "epoch": 0.6698404866757297, "grad_norm": 0.16427826881408691, "learning_rate": 1.840397193178379e-05, "loss": 0.6936, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1994 }, { "epoch": 0.6701764147031498, "grad_norm": 0.1840374767780304, "learning_rate": 1.8401964694517854e-05, "loss": 0.6806, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 1995 }, { "epoch": 0.6705123427305699, "grad_norm": 0.19824440777301788, "learning_rate": 1.8399956305452383e-05, "loss": 0.6815, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1996 }, { "epoch": 0.6708482707579901, "grad_norm": 0.18119248747825623, "learning_rate": 1.83979467648627e-05, "loss": 0.7048, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1997 }, { "epoch": 0.6711841987854102, "grad_norm": 0.16717658936977386, "learning_rate": 1.8395936073024282e-05, "loss": 0.6851, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1998 }, { "epoch": 0.6715201268128304, "grad_norm": 0.20388975739479065, "learning_rate": 1.8393924230212773e-05, "loss": 0.691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 1999 }, { "epoch": 0.6718560548402505, "grad_norm": 0.19056497514247894, "learning_rate": 1.8391911236703973e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2000 }, { "epoch": 0.6721919828676706, "grad_norm": 0.14745865762233734, "learning_rate": 1.8389897092773835e-05, "loss": 0.6941, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2001 }, { "epoch": 0.6725279108950907, "grad_norm": 0.1969132423400879, "learning_rate": 1.838788179869847e-05, "loss": 0.6709, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2002 }, { "epoch": 0.6728638389225109, "grad_norm": 0.17386949062347412, "learning_rate": 1.838586535475415e-05, "loss": 0.6778, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2003 }, { "epoch": 0.673199766949931, "grad_norm": 0.1558786928653717, "learning_rate": 1.8383847761217302e-05, "loss": 0.6803, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2004 }, { "epoch": 0.6735356949773511, "grad_norm": 0.15472672879695892, "learning_rate": 1.838182901836451e-05, "loss": 0.6777, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2005 }, { "epoch": 0.6738716230047712, "grad_norm": 0.17029443383216858, "learning_rate": 1.8379809126472523e-05, "loss": 0.6795, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2006 }, { "epoch": 0.6742075510321913, "grad_norm": 0.16377229988574982, "learning_rate": 1.8377788085818237e-05, "loss": 0.6988, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2007 }, { "epoch": 0.6745434790596114, "grad_norm": 0.19119687378406525, "learning_rate": 1.8375765896678713e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2008 }, { "epoch": 0.6748794070870316, "grad_norm": 0.16610102355480194, "learning_rate": 1.837374255933116e-05, "loss": 0.6667, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2009 }, { "epoch": 0.6752153351144518, "grad_norm": 0.1690191626548767, "learning_rate": 1.837171807405296e-05, "loss": 0.6672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2010 }, { "epoch": 0.6755512631418719, "grad_norm": 0.16797277331352234, "learning_rate": 1.8369692441121643e-05, "loss": 0.6809, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2011 }, { "epoch": 0.675887191169292, "grad_norm": 0.1661200374364853, "learning_rate": 1.8367665660814893e-05, "loss": 0.6867, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2012 }, { "epoch": 0.6762231191967121, "grad_norm": 0.16407592594623566, "learning_rate": 1.8365637733410552e-05, "loss": 0.7106, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2013 }, { "epoch": 0.6765590472241322, "grad_norm": 0.17653118073940277, "learning_rate": 1.836360865918663e-05, "loss": 0.6791, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2014 }, { "epoch": 0.6768949752515524, "grad_norm": 0.1566901057958603, "learning_rate": 1.836157843842128e-05, "loss": 0.6715, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2015 }, { "epoch": 0.6772309032789725, "grad_norm": 0.21481764316558838, "learning_rate": 1.835954707139282e-05, "loss": 0.6946, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2016 }, { "epoch": 0.6775668313063926, "grad_norm": 0.1804042011499405, "learning_rate": 1.835751455837973e-05, "loss": 0.7003, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2017 }, { "epoch": 0.6779027593338127, "grad_norm": 0.1597243994474411, "learning_rate": 1.8355480899660635e-05, "loss": 0.686, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2018 }, { "epoch": 0.6782386873612328, "grad_norm": 0.20956066250801086, "learning_rate": 1.835344609551432e-05, "loss": 0.7, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2019 }, { "epoch": 0.6785746153886529, "grad_norm": 0.20315749943256378, "learning_rate": 1.8351410146219736e-05, "loss": 0.691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2020 }, { "epoch": 0.6789105434160732, "grad_norm": 0.19715960323810577, "learning_rate": 1.8349373052055986e-05, "loss": 0.6898, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2021 }, { "epoch": 0.6792464714434933, "grad_norm": 0.16842970252037048, "learning_rate": 1.8347334813302326e-05, "loss": 0.6924, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2022 }, { "epoch": 0.6795823994709134, "grad_norm": 0.19112178683280945, "learning_rate": 1.834529543023817e-05, "loss": 0.6998, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2023 }, { "epoch": 0.6799183274983335, "grad_norm": 0.1846497803926468, "learning_rate": 1.834325490314309e-05, "loss": 0.6957, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2024 }, { "epoch": 0.6802542555257536, "grad_norm": 0.16865724325180054, "learning_rate": 1.8341213232296826e-05, "loss": 0.6773, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2025 }, { "epoch": 0.6805901835531737, "grad_norm": 0.21063148975372314, "learning_rate": 1.8339170417979253e-05, "loss": 0.6651, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2026 }, { "epoch": 0.6809261115805939, "grad_norm": 0.16686420142650604, "learning_rate": 1.833712646047042e-05, "loss": 0.6887, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2027 }, { "epoch": 0.681262039608014, "grad_norm": 0.19670535624027252, "learning_rate": 1.8335081360050522e-05, "loss": 0.6735, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2028 }, { "epoch": 0.6815979676354341, "grad_norm": 0.20493093132972717, "learning_rate": 1.833303511699992e-05, "loss": 0.6639, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2029 }, { "epoch": 0.6819338956628542, "grad_norm": 0.17878615856170654, "learning_rate": 1.8330987731599127e-05, "loss": 0.693, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2030 }, { "epoch": 0.6822698236902743, "grad_norm": 0.22150678932666779, "learning_rate": 1.832893920412881e-05, "loss": 0.6769, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2031 }, { "epoch": 0.6826057517176944, "grad_norm": 0.20228317379951477, "learning_rate": 1.8326889534869795e-05, "loss": 0.687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2032 }, { "epoch": 0.6829416797451147, "grad_norm": 0.1935778260231018, "learning_rate": 1.832483872410307e-05, "loss": 0.7094, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2033 }, { "epoch": 0.6832776077725348, "grad_norm": 0.192636638879776, "learning_rate": 1.8322786772109768e-05, "loss": 0.6578, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2034 }, { "epoch": 0.6836135357999549, "grad_norm": 0.18469230830669403, "learning_rate": 1.832073367917119e-05, "loss": 0.6631, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2035 }, { "epoch": 0.683949463827375, "grad_norm": 0.20018813014030457, "learning_rate": 1.8318679445568787e-05, "loss": 0.6882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2036 }, { "epoch": 0.6842853918547951, "grad_norm": 0.1738501340150833, "learning_rate": 1.8316624071584166e-05, "loss": 0.6751, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2037 }, { "epoch": 0.6846213198822152, "grad_norm": 0.1694960594177246, "learning_rate": 1.831456755749909e-05, "loss": 0.6818, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2038 }, { "epoch": 0.6849572479096354, "grad_norm": 0.18662837147712708, "learning_rate": 1.8312509903595486e-05, "loss": 0.6831, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2039 }, { "epoch": 0.6852931759370555, "grad_norm": 0.1609218269586563, "learning_rate": 1.8310451110155425e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2040 }, { "epoch": 0.6856291039644756, "grad_norm": 0.1905919909477234, "learning_rate": 1.8308391177461145e-05, "loss": 0.6983, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2041 }, { "epoch": 0.6859650319918957, "grad_norm": 0.16298726201057434, "learning_rate": 1.8306330105795035e-05, "loss": 0.6707, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2042 }, { "epoch": 0.6863009600193158, "grad_norm": 0.18927934765815735, "learning_rate": 1.8304267895439637e-05, "loss": 0.6897, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2043 }, { "epoch": 0.686636888046736, "grad_norm": 0.16360414028167725, "learning_rate": 1.8302204546677663e-05, "loss": 0.7032, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2044 }, { "epoch": 0.6869728160741562, "grad_norm": 0.1720922589302063, "learning_rate": 1.830014005979196e-05, "loss": 0.6681, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2045 }, { "epoch": 0.6873087441015763, "grad_norm": 0.18569208681583405, "learning_rate": 1.829807443506555e-05, "loss": 0.7183, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2046 }, { "epoch": 0.6876446721289964, "grad_norm": 0.16840863227844238, "learning_rate": 1.82960076727816e-05, "loss": 0.6864, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2047 }, { "epoch": 0.6879806001564165, "grad_norm": 0.16385090351104736, "learning_rate": 1.8293939773223436e-05, "loss": 0.6742, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2048 }, { "epoch": 0.6883165281838366, "grad_norm": 0.17369790375232697, "learning_rate": 1.829187073667454e-05, "loss": 0.7024, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2049 }, { "epoch": 0.6886524562112567, "grad_norm": 0.1818377524614334, "learning_rate": 1.828980056341855e-05, "loss": 0.6839, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2050 }, { "epoch": 0.6889883842386768, "grad_norm": 0.17919602990150452, "learning_rate": 1.828772925373926e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2051 }, { "epoch": 0.689324312266097, "grad_norm": 0.2294008731842041, "learning_rate": 1.8285656807920618e-05, "loss": 0.6764, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2052 }, { "epoch": 0.6896602402935171, "grad_norm": 0.16330400109291077, "learning_rate": 1.828358322624673e-05, "loss": 0.6794, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2053 }, { "epoch": 0.6899961683209372, "grad_norm": 0.19082807004451752, "learning_rate": 1.828150850900186e-05, "loss": 0.6754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2054 }, { "epoch": 0.6903320963483573, "grad_norm": 0.19658911228179932, "learning_rate": 1.8279432656470423e-05, "loss": 0.6911, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2055 }, { "epoch": 0.6906680243757775, "grad_norm": 0.1837824136018753, "learning_rate": 1.8277355668936988e-05, "loss": 0.6799, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2056 }, { "epoch": 0.6910039524031976, "grad_norm": 0.18333743512630463, "learning_rate": 1.8275277546686284e-05, "loss": 0.6886, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2057 }, { "epoch": 0.6913398804306178, "grad_norm": 0.1749243289232254, "learning_rate": 1.8273198290003198e-05, "loss": 0.6789, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2058 }, { "epoch": 0.6916758084580379, "grad_norm": 0.18433038890361786, "learning_rate": 1.8271117899172767e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2059 }, { "epoch": 0.692011736485458, "grad_norm": 0.18672677874565125, "learning_rate": 1.8269036374480184e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2060 }, { "epoch": 0.6923476645128781, "grad_norm": 0.157998725771904, "learning_rate": 1.8266953716210797e-05, "loss": 0.6864, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2061 }, { "epoch": 0.6926835925402982, "grad_norm": 0.1728176325559616, "learning_rate": 1.826486992465012e-05, "loss": 0.6743, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2062 }, { "epoch": 0.6930195205677183, "grad_norm": 0.1984737664461136, "learning_rate": 1.8262785000083802e-05, "loss": 0.7001, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2063 }, { "epoch": 0.6933554485951385, "grad_norm": 0.16647957265377045, "learning_rate": 1.8260698942797666e-05, "loss": 0.6604, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2064 }, { "epoch": 0.6936913766225586, "grad_norm": 0.2395494282245636, "learning_rate": 1.8258611753077683e-05, "loss": 0.6811, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2065 }, { "epoch": 0.6940273046499787, "grad_norm": 0.21041718125343323, "learning_rate": 1.825652343120998e-05, "loss": 0.6907, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2066 }, { "epoch": 0.6943632326773989, "grad_norm": 0.18939247727394104, "learning_rate": 1.8254433977480832e-05, "loss": 0.6898, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2067 }, { "epoch": 0.694699160704819, "grad_norm": 0.2082677185535431, "learning_rate": 1.825234339217668e-05, "loss": 0.7003, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2068 }, { "epoch": 0.6950350887322391, "grad_norm": 0.190574511885643, "learning_rate": 1.825025167558412e-05, "loss": 0.6822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2069 }, { "epoch": 0.6953710167596593, "grad_norm": 0.20761741697788239, "learning_rate": 1.8248158827989896e-05, "loss": 0.6822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2070 }, { "epoch": 0.6957069447870794, "grad_norm": 0.19223745167255402, "learning_rate": 1.8246064849680907e-05, "loss": 0.6822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2071 }, { "epoch": 0.6960428728144995, "grad_norm": 0.1762181520462036, "learning_rate": 1.8243969740944214e-05, "loss": 0.7055, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2072 }, { "epoch": 0.6963788008419196, "grad_norm": 0.18997590243816376, "learning_rate": 1.8241873502067028e-05, "loss": 0.678, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2073 }, { "epoch": 0.6967147288693397, "grad_norm": 0.1743624061346054, "learning_rate": 1.8239776133336713e-05, "loss": 0.673, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2074 }, { "epoch": 0.6970506568967598, "grad_norm": 0.19707445800304413, "learning_rate": 1.8237677635040794e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2075 }, { "epoch": 0.69738658492418, "grad_norm": 0.17389462888240814, "learning_rate": 1.8235578007466946e-05, "loss": 0.6953, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2076 }, { "epoch": 0.6977225129516001, "grad_norm": 0.17878705263137817, "learning_rate": 1.8233477250903005e-05, "loss": 0.6816, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2077 }, { "epoch": 0.6980584409790203, "grad_norm": 0.1693015694618225, "learning_rate": 1.823137536563695e-05, "loss": 0.6743, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2078 }, { "epoch": 0.6983943690064404, "grad_norm": 0.17620840668678284, "learning_rate": 1.8229272351956926e-05, "loss": 0.6765, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2079 }, { "epoch": 0.6987302970338605, "grad_norm": 0.19526976346969604, "learning_rate": 1.8227168210151232e-05, "loss": 0.6991, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2080 }, { "epoch": 0.6990662250612806, "grad_norm": 0.16891402006149292, "learning_rate": 1.8225062940508314e-05, "loss": 0.6958, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2081 }, { "epoch": 0.6994021530887008, "grad_norm": 0.2252303808927536, "learning_rate": 1.8222956543316776e-05, "loss": 0.6968, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2082 }, { "epoch": 0.6997380811161209, "grad_norm": 0.16805095970630646, "learning_rate": 1.822084901886538e-05, "loss": 0.6929, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2083 }, { "epoch": 0.700074009143541, "grad_norm": 0.17328625917434692, "learning_rate": 1.821874036744304e-05, "loss": 0.669, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2084 }, { "epoch": 0.7004099371709611, "grad_norm": 0.17553700506687164, "learning_rate": 1.8216630589338823e-05, "loss": 0.6837, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2085 }, { "epoch": 0.7007458651983812, "grad_norm": 0.17966561019420624, "learning_rate": 1.8214519684841952e-05, "loss": 0.6745, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2086 }, { "epoch": 0.7010817932258013, "grad_norm": 0.1577139049768448, "learning_rate": 1.8212407654241805e-05, "loss": 0.6563, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2087 }, { "epoch": 0.7014177212532215, "grad_norm": 0.17269065976142883, "learning_rate": 1.821029449782792e-05, "loss": 0.6833, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2088 }, { "epoch": 0.7017536492806417, "grad_norm": 0.18227995932102203, "learning_rate": 1.820818021588997e-05, "loss": 0.6712, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2089 }, { "epoch": 0.7020895773080618, "grad_norm": 0.15160268545150757, "learning_rate": 1.8206064808717805e-05, "loss": 0.6836, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2090 }, { "epoch": 0.7024255053354819, "grad_norm": 0.17194031178951263, "learning_rate": 1.8203948276601416e-05, "loss": 0.7055, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2091 }, { "epoch": 0.702761433362902, "grad_norm": 0.167857363820076, "learning_rate": 1.8201830619830956e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2092 }, { "epoch": 0.7030973613903221, "grad_norm": 0.15602438151836395, "learning_rate": 1.819971183869672e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2093 }, { "epoch": 0.7034332894177423, "grad_norm": 0.16480368375778198, "learning_rate": 1.819759193348918e-05, "loss": 0.692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2094 }, { "epoch": 0.7037692174451624, "grad_norm": 0.16923309862613678, "learning_rate": 1.8195470904498932e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2095 }, { "epoch": 0.7041051454725825, "grad_norm": 0.17508955299854279, "learning_rate": 1.8193348752016747e-05, "loss": 0.6854, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2096 }, { "epoch": 0.7044410735000026, "grad_norm": 0.19099928438663483, "learning_rate": 1.8191225476333546e-05, "loss": 0.6919, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2097 }, { "epoch": 0.7047770015274227, "grad_norm": 0.16285018622875214, "learning_rate": 1.8189101077740397e-05, "loss": 0.6994, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2098 }, { "epoch": 0.7051129295548428, "grad_norm": 0.21119102835655212, "learning_rate": 1.8186975556528533e-05, "loss": 0.7076, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2099 }, { "epoch": 0.705448857582263, "grad_norm": 0.196133553981781, "learning_rate": 1.8184848912989333e-05, "loss": 0.6938, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2100 }, { "epoch": 0.7057847856096832, "grad_norm": 0.19909845292568207, "learning_rate": 1.8182721147414333e-05, "loss": 0.6681, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2101 }, { "epoch": 0.7061207136371033, "grad_norm": 0.18549852073192596, "learning_rate": 1.8180592260095224e-05, "loss": 0.6777, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2102 }, { "epoch": 0.7064566416645234, "grad_norm": 0.16108164191246033, "learning_rate": 1.8178462251323842e-05, "loss": 0.6766, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2103 }, { "epoch": 0.7067925696919435, "grad_norm": 0.20701463520526886, "learning_rate": 1.8176331121392187e-05, "loss": 0.7028, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2104 }, { "epoch": 0.7071284977193636, "grad_norm": 0.18731267750263214, "learning_rate": 1.817419887059241e-05, "loss": 0.7028, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2105 }, { "epoch": 0.7074644257467837, "grad_norm": 0.16708257794380188, "learning_rate": 1.8172065499216814e-05, "loss": 0.6797, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2106 }, { "epoch": 0.7078003537742039, "grad_norm": 0.19045716524124146, "learning_rate": 1.8169931007557857e-05, "loss": 0.6776, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2107 }, { "epoch": 0.708136281801624, "grad_norm": 0.21595804393291473, "learning_rate": 1.816779539590815e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2108 }, { "epoch": 0.7084722098290441, "grad_norm": 0.17887833714485168, "learning_rate": 1.8165658664560454e-05, "loss": 0.6967, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2109 }, { "epoch": 0.7088081378564642, "grad_norm": 0.1622818112373352, "learning_rate": 1.8163520813807688e-05, "loss": 0.7123, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2110 }, { "epoch": 0.7091440658838843, "grad_norm": 0.2040170431137085, "learning_rate": 1.816138184394293e-05, "loss": 0.7047, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2111 }, { "epoch": 0.7094799939113045, "grad_norm": 0.165832981467247, "learning_rate": 1.8159241755259397e-05, "loss": 0.6693, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2112 }, { "epoch": 0.7098159219387247, "grad_norm": 0.16870060563087463, "learning_rate": 1.815710054805047e-05, "loss": 0.6856, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2113 }, { "epoch": 0.7101518499661448, "grad_norm": 0.15871456265449524, "learning_rate": 1.8154958222609677e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2114 }, { "epoch": 0.7104877779935649, "grad_norm": 0.18714942038059235, "learning_rate": 1.8152814779230713e-05, "loss": 0.686, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2115 }, { "epoch": 0.710823706020985, "grad_norm": 0.17482289671897888, "learning_rate": 1.8150670218207404e-05, "loss": 0.6833, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2116 }, { "epoch": 0.7111596340484051, "grad_norm": 0.15894484519958496, "learning_rate": 1.8148524539833748e-05, "loss": 0.6861, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2117 }, { "epoch": 0.7114955620758252, "grad_norm": 0.20110437273979187, "learning_rate": 1.8146377744403888e-05, "loss": 0.7036, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2118 }, { "epoch": 0.7118314901032454, "grad_norm": 0.17190666496753693, "learning_rate": 1.8144229832212125e-05, "loss": 0.6942, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2119 }, { "epoch": 0.7121674181306655, "grad_norm": 0.15248608589172363, "learning_rate": 1.8142080803552902e-05, "loss": 0.7098, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2120 }, { "epoch": 0.7125033461580856, "grad_norm": 0.16053253412246704, "learning_rate": 1.8139930658720827e-05, "loss": 0.7, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2121 }, { "epoch": 0.7128392741855057, "grad_norm": 0.17906269431114197, "learning_rate": 1.813777939801066e-05, "loss": 0.665, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2122 }, { "epoch": 0.7131752022129259, "grad_norm": 0.1757165789604187, "learning_rate": 1.8135627021717305e-05, "loss": 0.6858, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2123 }, { "epoch": 0.713511130240346, "grad_norm": 0.17801719903945923, "learning_rate": 1.813347353013583e-05, "loss": 0.7036, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2124 }, { "epoch": 0.7138470582677662, "grad_norm": 0.1632409393787384, "learning_rate": 1.8131318923561445e-05, "loss": 0.6809, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2125 }, { "epoch": 0.7141829862951863, "grad_norm": 0.17996473610401154, "learning_rate": 1.8129163202289518e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2126 }, { "epoch": 0.7145189143226064, "grad_norm": 0.1798054277896881, "learning_rate": 1.812700636661558e-05, "loss": 0.6975, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2127 }, { "epoch": 0.7148548423500265, "grad_norm": 0.16827169060707092, "learning_rate": 1.8124848416835296e-05, "loss": 0.6998, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2128 }, { "epoch": 0.7151907703774466, "grad_norm": 0.17053727805614471, "learning_rate": 1.8122689353244494e-05, "loss": 0.6983, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2129 }, { "epoch": 0.7155266984048667, "grad_norm": 0.1514924168586731, "learning_rate": 1.8120529176139156e-05, "loss": 0.6839, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2130 }, { "epoch": 0.7158626264322869, "grad_norm": 0.17138291895389557, "learning_rate": 1.811836788581541e-05, "loss": 0.6677, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2131 }, { "epoch": 0.716198554459707, "grad_norm": 0.16039468348026276, "learning_rate": 1.8116205482569545e-05, "loss": 0.683, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2132 }, { "epoch": 0.7165344824871271, "grad_norm": 0.15078607201576233, "learning_rate": 1.8114041966697996e-05, "loss": 0.6964, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2133 }, { "epoch": 0.7168704105145473, "grad_norm": 0.1593494415283203, "learning_rate": 1.8111877338497353e-05, "loss": 0.6643, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2134 }, { "epoch": 0.7172063385419674, "grad_norm": 0.1813177764415741, "learning_rate": 1.8109711598264355e-05, "loss": 0.6748, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2135 }, { "epoch": 0.7175422665693875, "grad_norm": 0.1635948270559311, "learning_rate": 1.8107544746295908e-05, "loss": 0.6803, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2136 }, { "epoch": 0.7178781945968077, "grad_norm": 0.16988180577754974, "learning_rate": 1.8105376782889045e-05, "loss": 0.6744, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2137 }, { "epoch": 0.7182141226242278, "grad_norm": 0.17109480500221252, "learning_rate": 1.810320770834097e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 2138 }, { "epoch": 0.7185500506516479, "grad_norm": 0.16497187316417694, "learning_rate": 1.810103752294904e-05, "loss": 0.7056, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2139 }, { "epoch": 0.718885978679068, "grad_norm": 0.16880196332931519, "learning_rate": 1.8098866227010757e-05, "loss": 0.6953, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2140 }, { "epoch": 0.7192219067064881, "grad_norm": 0.16314849257469177, "learning_rate": 1.8096693820823772e-05, "loss": 0.6756, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2141 }, { "epoch": 0.7195578347339082, "grad_norm": 0.16825518012046814, "learning_rate": 1.80945203046859e-05, "loss": 0.6768, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2142 }, { "epoch": 0.7198937627613284, "grad_norm": 0.15119491517543793, "learning_rate": 1.8092345678895094e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2143 }, { "epoch": 0.7202296907887485, "grad_norm": 0.16447509825229645, "learning_rate": 1.8090169943749477e-05, "loss": 0.7031, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2144 }, { "epoch": 0.7205656188161687, "grad_norm": 0.18155162036418915, "learning_rate": 1.8087993099547304e-05, "loss": 0.709, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2145 }, { "epoch": 0.7209015468435888, "grad_norm": 0.15753014385700226, "learning_rate": 1.8085815146587e-05, "loss": 0.675, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2146 }, { "epoch": 0.7212374748710089, "grad_norm": 0.17372463643550873, "learning_rate": 1.8083636085167126e-05, "loss": 0.6694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2147 }, { "epoch": 0.721573402898429, "grad_norm": 0.17002524435520172, "learning_rate": 1.808145591558641e-05, "loss": 0.6724, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2148 }, { "epoch": 0.7219093309258492, "grad_norm": 0.16958989202976227, "learning_rate": 1.8079274638143723e-05, "loss": 0.6781, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2149 }, { "epoch": 0.7222452589532693, "grad_norm": 0.1649906039237976, "learning_rate": 1.8077092253138088e-05, "loss": 0.7015, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2150 }, { "epoch": 0.7225811869806894, "grad_norm": 0.18279482424259186, "learning_rate": 1.8074908760868677e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2151 }, { "epoch": 0.7229171150081095, "grad_norm": 0.18447770178318024, "learning_rate": 1.8072724161634828e-05, "loss": 0.7016, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2152 }, { "epoch": 0.7232530430355296, "grad_norm": 0.16889910399913788, "learning_rate": 1.8070538455736014e-05, "loss": 0.6882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2153 }, { "epoch": 0.7235889710629497, "grad_norm": 0.17347656190395355, "learning_rate": 1.8068351643471872e-05, "loss": 0.6834, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2154 }, { "epoch": 0.7239248990903698, "grad_norm": 0.18452788889408112, "learning_rate": 1.8066163725142177e-05, "loss": 0.6644, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2155 }, { "epoch": 0.72426082711779, "grad_norm": 0.16384117305278778, "learning_rate": 1.8063974701046874e-05, "loss": 0.6799, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2156 }, { "epoch": 0.7245967551452102, "grad_norm": 0.1817983239889145, "learning_rate": 1.806178457148604e-05, "loss": 0.6875, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2157 }, { "epoch": 0.7249326831726303, "grad_norm": 0.15920335054397583, "learning_rate": 1.8059593336759923e-05, "loss": 0.6956, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2158 }, { "epoch": 0.7252686112000504, "grad_norm": 0.18136830627918243, "learning_rate": 1.8057400997168905e-05, "loss": 0.6948, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2159 }, { "epoch": 0.7256045392274705, "grad_norm": 0.18496869504451752, "learning_rate": 1.805520755301353e-05, "loss": 0.7099, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2160 }, { "epoch": 0.7259404672548906, "grad_norm": 0.18209947645664215, "learning_rate": 1.8053013004594487e-05, "loss": 0.6934, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2161 }, { "epoch": 0.7262763952823108, "grad_norm": 0.14689095318317413, "learning_rate": 1.8050817352212627e-05, "loss": 0.6956, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2162 }, { "epoch": 0.7266123233097309, "grad_norm": 0.20414549112319946, "learning_rate": 1.804862059616894e-05, "loss": 0.672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2163 }, { "epoch": 0.726948251337151, "grad_norm": 0.16625095903873444, "learning_rate": 1.8046422736764576e-05, "loss": 0.6772, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2164 }, { "epoch": 0.7272841793645711, "grad_norm": 0.15981008112430573, "learning_rate": 1.8044223774300825e-05, "loss": 0.6879, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2165 }, { "epoch": 0.7276201073919912, "grad_norm": 0.1689300835132599, "learning_rate": 1.8042023709079148e-05, "loss": 0.6863, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2166 }, { "epoch": 0.7279560354194113, "grad_norm": 0.16441726684570312, "learning_rate": 1.8039822541401135e-05, "loss": 0.6876, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2167 }, { "epoch": 0.7282919634468316, "grad_norm": 0.1784304976463318, "learning_rate": 1.803762027156854e-05, "loss": 0.6784, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2168 }, { "epoch": 0.7286278914742517, "grad_norm": 0.177323117852211, "learning_rate": 1.803541689988327e-05, "loss": 0.6831, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2169 }, { "epoch": 0.7289638195016718, "grad_norm": 0.19423002004623413, "learning_rate": 1.8033212426647377e-05, "loss": 0.6991, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2170 }, { "epoch": 0.7292997475290919, "grad_norm": 0.17455323040485382, "learning_rate": 1.803100685216306e-05, "loss": 0.6706, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2171 }, { "epoch": 0.729635675556512, "grad_norm": 0.19349448382854462, "learning_rate": 1.8028800176732678e-05, "loss": 0.6561, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2172 }, { "epoch": 0.7299716035839321, "grad_norm": 0.1910330355167389, "learning_rate": 1.8026592400658737e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2173 }, { "epoch": 0.7303075316113523, "grad_norm": 0.1859377771615982, "learning_rate": 1.8024383524243897e-05, "loss": 0.7015, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2174 }, { "epoch": 0.7306434596387724, "grad_norm": 0.1567779779434204, "learning_rate": 1.8022173547790963e-05, "loss": 0.6998, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2175 }, { "epoch": 0.7309793876661925, "grad_norm": 0.19209493696689606, "learning_rate": 1.801996247160289e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2176 }, { "epoch": 0.7313153156936126, "grad_norm": 0.16397486627101898, "learning_rate": 1.8017750295982797e-05, "loss": 0.6732, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2177 }, { "epoch": 0.7316512437210327, "grad_norm": 0.16712795197963715, "learning_rate": 1.8015537021233935e-05, "loss": 0.6608, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2178 }, { "epoch": 0.7319871717484528, "grad_norm": 0.1803709715604782, "learning_rate": 1.8013322647659724e-05, "loss": 0.6701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2179 }, { "epoch": 0.7323230997758731, "grad_norm": 0.16757218539714813, "learning_rate": 1.801110717556372e-05, "loss": 0.6764, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2180 }, { "epoch": 0.7326590278032932, "grad_norm": 0.1798989325761795, "learning_rate": 1.8008890605249634e-05, "loss": 0.6736, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2181 }, { "epoch": 0.7329949558307133, "grad_norm": 0.16284377872943878, "learning_rate": 1.8006672937021336e-05, "loss": 0.6913, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2182 }, { "epoch": 0.7333308838581334, "grad_norm": 0.15917831659317017, "learning_rate": 1.800445417118283e-05, "loss": 0.6861, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2183 }, { "epoch": 0.7336668118855535, "grad_norm": 0.18040789663791656, "learning_rate": 1.8002234308038288e-05, "loss": 0.6901, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2184 }, { "epoch": 0.7340027399129736, "grad_norm": 0.1854567527770996, "learning_rate": 1.800001334789202e-05, "loss": 0.6628, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2185 }, { "epoch": 0.7343386679403938, "grad_norm": 0.16443747282028198, "learning_rate": 1.799779129104849e-05, "loss": 0.6862, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2186 }, { "epoch": 0.7346745959678139, "grad_norm": 0.17949970066547394, "learning_rate": 1.7995568137812315e-05, "loss": 0.6776, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2187 }, { "epoch": 0.735010523995234, "grad_norm": 0.22359958291053772, "learning_rate": 1.7993343888488258e-05, "loss": 0.682, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2188 }, { "epoch": 0.7353464520226541, "grad_norm": 0.18799610435962677, "learning_rate": 1.7991118543381238e-05, "loss": 0.6922, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2189 }, { "epoch": 0.7356823800500742, "grad_norm": 0.1843491643667221, "learning_rate": 1.7988892102796316e-05, "loss": 0.6783, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2190 }, { "epoch": 0.7360183080774944, "grad_norm": 0.1852397322654724, "learning_rate": 1.7986664567038712e-05, "loss": 0.6663, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2191 }, { "epoch": 0.7363542361049146, "grad_norm": 0.22222097218036652, "learning_rate": 1.798443593641379e-05, "loss": 0.6842, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2192 }, { "epoch": 0.7366901641323347, "grad_norm": 0.1981949359178543, "learning_rate": 1.7982206211227068e-05, "loss": 0.6865, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2193 }, { "epoch": 0.7370260921597548, "grad_norm": 0.18420977890491486, "learning_rate": 1.7979975391784213e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2194 }, { "epoch": 0.7373620201871749, "grad_norm": 0.25854235887527466, "learning_rate": 1.7977743478391036e-05, "loss": 0.7012, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2195 }, { "epoch": 0.737697948214595, "grad_norm": 0.18097294867038727, "learning_rate": 1.7975510471353508e-05, "loss": 0.7085, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2196 }, { "epoch": 0.7380338762420151, "grad_norm": 0.18609584867954254, "learning_rate": 1.797327637097774e-05, "loss": 0.6933, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2197 }, { "epoch": 0.7383698042694353, "grad_norm": 0.1900567263364792, "learning_rate": 1.7971041177570005e-05, "loss": 0.6825, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2198 }, { "epoch": 0.7387057322968554, "grad_norm": 0.1605897694826126, "learning_rate": 1.7968804891436712e-05, "loss": 0.6777, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2199 }, { "epoch": 0.7390416603242755, "grad_norm": 0.1711585819721222, "learning_rate": 1.796656751288443e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2200 }, { "epoch": 0.7393775883516956, "grad_norm": 0.16835719347000122, "learning_rate": 1.796432904221988e-05, "loss": 0.6907, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2201 }, { "epoch": 0.7397135163791158, "grad_norm": 0.18112914264202118, "learning_rate": 1.7962089479749916e-05, "loss": 0.6578, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2202 }, { "epoch": 0.7400494444065359, "grad_norm": 0.17467544972896576, "learning_rate": 1.795984882578156e-05, "loss": 0.6901, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2203 }, { "epoch": 0.7403853724339561, "grad_norm": 0.19046129286289215, "learning_rate": 1.795760708062197e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2204 }, { "epoch": 0.7407213004613762, "grad_norm": 0.16235515475273132, "learning_rate": 1.7955364244578465e-05, "loss": 0.6887, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2205 }, { "epoch": 0.7410572284887963, "grad_norm": 0.1868659108877182, "learning_rate": 1.795312031795851e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2206 }, { "epoch": 0.7413931565162164, "grad_norm": 0.16028574109077454, "learning_rate": 1.7950875301069715e-05, "loss": 0.698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2207 }, { "epoch": 0.7417290845436365, "grad_norm": 0.17637988924980164, "learning_rate": 1.7948629194219843e-05, "loss": 0.6908, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2208 }, { "epoch": 0.7420650125710566, "grad_norm": 0.15789324045181274, "learning_rate": 1.7946381997716805e-05, "loss": 0.6569, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2209 }, { "epoch": 0.7424009405984767, "grad_norm": 0.17704571783542633, "learning_rate": 1.7944133711868668e-05, "loss": 0.6702, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2210 }, { "epoch": 0.7427368686258969, "grad_norm": 0.1725846827030182, "learning_rate": 1.7941884336983633e-05, "loss": 0.681, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2211 }, { "epoch": 0.743072796653317, "grad_norm": 0.17176103591918945, "learning_rate": 1.793963387337007e-05, "loss": 0.701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2212 }, { "epoch": 0.7434087246807372, "grad_norm": 0.15990853309631348, "learning_rate": 1.7937382321336476e-05, "loss": 0.6929, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2213 }, { "epoch": 0.7437446527081573, "grad_norm": 0.1714084893465042, "learning_rate": 1.793512968119152e-05, "loss": 0.679, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2214 }, { "epoch": 0.7440805807355774, "grad_norm": 0.15463753044605255, "learning_rate": 1.7932875953244005e-05, "loss": 0.6936, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2215 }, { "epoch": 0.7444165087629975, "grad_norm": 0.19459226727485657, "learning_rate": 1.793062113780289e-05, "loss": 0.655, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2216 }, { "epoch": 0.7447524367904177, "grad_norm": 0.15284456312656403, "learning_rate": 1.792836523517728e-05, "loss": 0.6968, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2217 }, { "epoch": 0.7450883648178378, "grad_norm": 0.1901380866765976, "learning_rate": 1.7926108245676427e-05, "loss": 0.6842, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2218 }, { "epoch": 0.7454242928452579, "grad_norm": 0.1665056347846985, "learning_rate": 1.7923850169609735e-05, "loss": 0.6761, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2219 }, { "epoch": 0.745760220872678, "grad_norm": 0.19753603637218475, "learning_rate": 1.792159100728676e-05, "loss": 0.682, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2220 }, { "epoch": 0.7460961489000981, "grad_norm": 0.17437076568603516, "learning_rate": 1.7919330759017203e-05, "loss": 0.6827, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2221 }, { "epoch": 0.7464320769275182, "grad_norm": 0.17159032821655273, "learning_rate": 1.7917069425110915e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2222 }, { "epoch": 0.7467680049549384, "grad_norm": 0.18058086931705475, "learning_rate": 1.791480700587789e-05, "loss": 0.6824, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2223 }, { "epoch": 0.7471039329823586, "grad_norm": 0.17718617618083954, "learning_rate": 1.7912543501628283e-05, "loss": 0.6577, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2224 }, { "epoch": 0.7474398610097787, "grad_norm": 0.17278440296649933, "learning_rate": 1.7910278912672387e-05, "loss": 0.6857, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2225 }, { "epoch": 0.7477757890371988, "grad_norm": 0.1691308170557022, "learning_rate": 1.7908013239320646e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2226 }, { "epoch": 0.7481117170646189, "grad_norm": 0.1601637899875641, "learning_rate": 1.790574648188366e-05, "loss": 0.6622, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2227 }, { "epoch": 0.748447645092039, "grad_norm": 0.1570720672607422, "learning_rate": 1.7903478640672166e-05, "loss": 0.6948, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2228 }, { "epoch": 0.7487835731194592, "grad_norm": 0.17764347791671753, "learning_rate": 1.790120971599706e-05, "loss": 0.6634, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2229 }, { "epoch": 0.7491195011468793, "grad_norm": 0.1657078117132187, "learning_rate": 1.789893970816938e-05, "loss": 0.6979, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2230 }, { "epoch": 0.7494554291742994, "grad_norm": 0.16045540571212769, "learning_rate": 1.789666861750031e-05, "loss": 0.7042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2231 }, { "epoch": 0.7497913572017195, "grad_norm": 0.19051800668239594, "learning_rate": 1.7894396444301193e-05, "loss": 0.6849, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2232 }, { "epoch": 0.7501272852291396, "grad_norm": 0.19363099336624146, "learning_rate": 1.7892123188883513e-05, "loss": 0.6864, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2233 }, { "epoch": 0.7504632132565597, "grad_norm": 0.16818444430828094, "learning_rate": 1.7889848851558902e-05, "loss": 0.6789, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2234 }, { "epoch": 0.75079914128398, "grad_norm": 0.1845105141401291, "learning_rate": 1.7887573432639148e-05, "loss": 0.6808, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2235 }, { "epoch": 0.7511350693114001, "grad_norm": 0.16466645896434784, "learning_rate": 1.7885296932436168e-05, "loss": 0.6606, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2236 }, { "epoch": 0.7514709973388202, "grad_norm": 0.16758593916893005, "learning_rate": 1.7883019351262053e-05, "loss": 0.6928, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2237 }, { "epoch": 0.7518069253662403, "grad_norm": 0.16980601847171783, "learning_rate": 1.7880740689429022e-05, "loss": 0.6933, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2238 }, { "epoch": 0.7521428533936604, "grad_norm": 0.17684699594974518, "learning_rate": 1.7878460947249456e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2239 }, { "epoch": 0.7524787814210805, "grad_norm": 0.1560872197151184, "learning_rate": 1.7876180125035875e-05, "loss": 0.6974, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2240 }, { "epoch": 0.7528147094485007, "grad_norm": 0.19217194616794586, "learning_rate": 1.7873898223100946e-05, "loss": 0.6877, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2241 }, { "epoch": 0.7531506374759208, "grad_norm": 0.18428172171115875, "learning_rate": 1.7871615241757492e-05, "loss": 0.7097, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2242 }, { "epoch": 0.7534865655033409, "grad_norm": 0.17330655455589294, "learning_rate": 1.7869331181318478e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2243 }, { "epoch": 0.753822493530761, "grad_norm": 0.21076127886772156, "learning_rate": 1.7867046042097022e-05, "loss": 0.6897, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2244 }, { "epoch": 0.7541584215581811, "grad_norm": 0.1803259253501892, "learning_rate": 1.7864759824406386e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2245 }, { "epoch": 0.7544943495856012, "grad_norm": 0.19544856250286102, "learning_rate": 1.7862472528559978e-05, "loss": 0.7082, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2246 }, { "epoch": 0.7548302776130215, "grad_norm": 0.1816917061805725, "learning_rate": 1.7860184154871357e-05, "loss": 0.6898, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2247 }, { "epoch": 0.7551662056404416, "grad_norm": 0.19725486636161804, "learning_rate": 1.785789470365423e-05, "loss": 0.679, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2248 }, { "epoch": 0.7555021336678617, "grad_norm": 0.18724575638771057, "learning_rate": 1.7855604175222453e-05, "loss": 0.6674, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2249 }, { "epoch": 0.7558380616952818, "grad_norm": 0.21519505977630615, "learning_rate": 1.7853312569890025e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2250 }, { "epoch": 0.7561739897227019, "grad_norm": 0.14752627909183502, "learning_rate": 1.7851019887971094e-05, "loss": 0.6969, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2251 }, { "epoch": 0.756509917750122, "grad_norm": 0.23212985694408417, "learning_rate": 1.7848726129779957e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2252 }, { "epoch": 0.7568458457775422, "grad_norm": 0.20939801633358002, "learning_rate": 1.784643129563106e-05, "loss": 0.6693, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2253 }, { "epoch": 0.7571817738049623, "grad_norm": 0.18505805730819702, "learning_rate": 1.7844135385838998e-05, "loss": 0.69, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2254 }, { "epoch": 0.7575177018323824, "grad_norm": 0.21421431005001068, "learning_rate": 1.78418384007185e-05, "loss": 0.6895, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2255 }, { "epoch": 0.7578536298598025, "grad_norm": 0.1605301797389984, "learning_rate": 1.7839540340584464e-05, "loss": 0.6875, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2256 }, { "epoch": 0.7581895578872226, "grad_norm": 0.1635611653327942, "learning_rate": 1.7837241205751917e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2257 }, { "epoch": 0.7585254859146427, "grad_norm": 0.1713227778673172, "learning_rate": 1.783494099653604e-05, "loss": 0.6755, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2258 }, { "epoch": 0.758861413942063, "grad_norm": 0.16058655083179474, "learning_rate": 1.7832639713252166e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.91, "memory/max_allocated (GiB)": 63.91, "step": 2259 }, { "epoch": 0.7591973419694831, "grad_norm": 0.1638094037771225, "learning_rate": 1.783033735621577e-05, "loss": 0.6795, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2260 }, { "epoch": 0.7595332699969032, "grad_norm": 0.1486586630344391, "learning_rate": 1.7828033925742474e-05, "loss": 0.681, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2261 }, { "epoch": 0.7598691980243233, "grad_norm": 0.1618136465549469, "learning_rate": 1.7825729422148045e-05, "loss": 0.6975, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2262 }, { "epoch": 0.7602051260517434, "grad_norm": 0.16773007810115814, "learning_rate": 1.7823423845748407e-05, "loss": 0.6999, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2263 }, { "epoch": 0.7605410540791635, "grad_norm": 0.1545884907245636, "learning_rate": 1.782111719685962e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2264 }, { "epoch": 0.7608769821065836, "grad_norm": 0.16375984251499176, "learning_rate": 1.7818809475797896e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 2265 }, { "epoch": 0.7612129101340038, "grad_norm": 0.1513642966747284, "learning_rate": 1.7816500682879593e-05, "loss": 0.6791, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.88, "memory/max_allocated (GiB)": 64.88, "step": 2266 }, { "epoch": 0.7615488381614239, "grad_norm": 0.16515706479549408, "learning_rate": 1.7814190818421213e-05, "loss": 0.6746, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2267 }, { "epoch": 0.761884766188844, "grad_norm": 0.15748944878578186, "learning_rate": 1.781187988273942e-05, "loss": 0.6721, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2268 }, { "epoch": 0.7622206942162641, "grad_norm": 0.1717555671930313, "learning_rate": 1.7809567876151e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2269 }, { "epoch": 0.7625566222436843, "grad_norm": 0.1595650166273117, "learning_rate": 1.7807254798972903e-05, "loss": 0.6782, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2270 }, { "epoch": 0.7628925502711044, "grad_norm": 0.1622859686613083, "learning_rate": 1.7804940651522224e-05, "loss": 0.7076, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2271 }, { "epoch": 0.7632284782985246, "grad_norm": 0.15597715973854065, "learning_rate": 1.7802625434116203e-05, "loss": 0.7044, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2272 }, { "epoch": 0.7635644063259447, "grad_norm": 0.1536310911178589, "learning_rate": 1.7800309147072222e-05, "loss": 0.6968, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2273 }, { "epoch": 0.7639003343533648, "grad_norm": 0.15450182557106018, "learning_rate": 1.7797991790707818e-05, "loss": 0.6918, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2274 }, { "epoch": 0.7642362623807849, "grad_norm": 0.17134584486484528, "learning_rate": 1.7795673365340668e-05, "loss": 0.6959, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2275 }, { "epoch": 0.764572190408205, "grad_norm": 0.15465383231639862, "learning_rate": 1.7793353871288596e-05, "loss": 0.6634, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2276 }, { "epoch": 0.7649081184356251, "grad_norm": 0.18214333057403564, "learning_rate": 1.7791033308869575e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2277 }, { "epoch": 0.7652440464630453, "grad_norm": 0.14783616364002228, "learning_rate": 1.778871167840173e-05, "loss": 0.6865, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2278 }, { "epoch": 0.7655799744904654, "grad_norm": 0.17595599591732025, "learning_rate": 1.778638898020332e-05, "loss": 0.6898, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2279 }, { "epoch": 0.7659159025178855, "grad_norm": 0.1540672481060028, "learning_rate": 1.7784065214592754e-05, "loss": 0.7084, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2280 }, { "epoch": 0.7662518305453057, "grad_norm": 0.1826910525560379, "learning_rate": 1.7781740381888595e-05, "loss": 0.6958, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2281 }, { "epoch": 0.7665877585727258, "grad_norm": 0.18547263741493225, "learning_rate": 1.7779414482409546e-05, "loss": 0.675, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2282 }, { "epoch": 0.7669236866001459, "grad_norm": 0.1805611550807953, "learning_rate": 1.7777087516474458e-05, "loss": 0.6834, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2283 }, { "epoch": 0.7672596146275661, "grad_norm": 0.1734287589788437, "learning_rate": 1.7774759484402326e-05, "loss": 0.6608, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2284 }, { "epoch": 0.7675955426549862, "grad_norm": 0.16371791064739227, "learning_rate": 1.7772430386512292e-05, "loss": 0.6792, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2285 }, { "epoch": 0.7679314706824063, "grad_norm": 0.16191737353801727, "learning_rate": 1.777010022312365e-05, "loss": 0.6663, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2286 }, { "epoch": 0.7682673987098264, "grad_norm": 0.16550180315971375, "learning_rate": 1.7767768994555833e-05, "loss": 0.6747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2287 }, { "epoch": 0.7686033267372465, "grad_norm": 0.16807955503463745, "learning_rate": 1.7765436701128415e-05, "loss": 0.6767, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2288 }, { "epoch": 0.7689392547646666, "grad_norm": 0.16950267553329468, "learning_rate": 1.7763103343161132e-05, "loss": 0.7068, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2289 }, { "epoch": 0.7692751827920868, "grad_norm": 0.16010825335979462, "learning_rate": 1.7760768920973852e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2290 }, { "epoch": 0.769611110819507, "grad_norm": 0.1655026525259018, "learning_rate": 1.7758433434886596e-05, "loss": 0.7086, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2291 }, { "epoch": 0.7699470388469271, "grad_norm": 0.1816457211971283, "learning_rate": 1.7756096885219524e-05, "loss": 0.6523, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2292 }, { "epoch": 0.7702829668743472, "grad_norm": 0.1868417114019394, "learning_rate": 1.7753759272292955e-05, "loss": 0.713, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2293 }, { "epoch": 0.7706188949017673, "grad_norm": 0.16111350059509277, "learning_rate": 1.7751420596427335e-05, "loss": 0.6769, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2294 }, { "epoch": 0.7709548229291874, "grad_norm": 0.17183822393417358, "learning_rate": 1.7749080857943275e-05, "loss": 0.6838, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2295 }, { "epoch": 0.7712907509566076, "grad_norm": 0.18988075852394104, "learning_rate": 1.7746740057161517e-05, "loss": 0.6651, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2296 }, { "epoch": 0.7716266789840277, "grad_norm": 0.19389359652996063, "learning_rate": 1.7744398194402955e-05, "loss": 0.6822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2297 }, { "epoch": 0.7719626070114478, "grad_norm": 0.1859840601682663, "learning_rate": 1.7742055269988632e-05, "loss": 0.6952, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2298 }, { "epoch": 0.7722985350388679, "grad_norm": 0.171892911195755, "learning_rate": 1.7739711284239726e-05, "loss": 0.6737, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2299 }, { "epoch": 0.772634463066288, "grad_norm": 0.18350598216056824, "learning_rate": 1.773736623747757e-05, "loss": 0.672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2300 }, { "epoch": 0.7729703910937081, "grad_norm": 0.18300417065620422, "learning_rate": 1.773502013002364e-05, "loss": 0.7062, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2301 }, { "epoch": 0.7733063191211283, "grad_norm": 0.19460231065750122, "learning_rate": 1.7732672962199555e-05, "loss": 0.6778, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2302 }, { "epoch": 0.7736422471485485, "grad_norm": 0.21163822710514069, "learning_rate": 1.7730324734327083e-05, "loss": 0.6774, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2303 }, { "epoch": 0.7739781751759686, "grad_norm": 0.193067729473114, "learning_rate": 1.7727975446728133e-05, "loss": 0.6919, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2304 }, { "epoch": 0.7743141032033887, "grad_norm": 0.17103691399097443, "learning_rate": 1.772562509972476e-05, "loss": 0.7021, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2305 }, { "epoch": 0.7746500312308088, "grad_norm": 0.1852322518825531, "learning_rate": 1.772327369363917e-05, "loss": 0.6754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2306 }, { "epoch": 0.7749859592582289, "grad_norm": 0.1649942398071289, "learning_rate": 1.7720921228793712e-05, "loss": 0.6841, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2307 }, { "epoch": 0.7753218872856491, "grad_norm": 0.23740170896053314, "learning_rate": 1.7718567705510872e-05, "loss": 0.6668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2308 }, { "epoch": 0.7756578153130692, "grad_norm": 0.18616420030593872, "learning_rate": 1.7716213124113292e-05, "loss": 0.6957, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2309 }, { "epoch": 0.7759937433404893, "grad_norm": 0.18486975133419037, "learning_rate": 1.771385748492375e-05, "loss": 0.6987, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2310 }, { "epoch": 0.7763296713679094, "grad_norm": 0.18597844243049622, "learning_rate": 1.7711500788265176e-05, "loss": 0.6859, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2311 }, { "epoch": 0.7766655993953295, "grad_norm": 0.21884965896606445, "learning_rate": 1.770914303446064e-05, "loss": 0.6817, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2312 }, { "epoch": 0.7770015274227496, "grad_norm": 0.18268120288848877, "learning_rate": 1.7706784223833363e-05, "loss": 0.6863, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2313 }, { "epoch": 0.7773374554501699, "grad_norm": 0.19660544395446777, "learning_rate": 1.7704424356706708e-05, "loss": 0.6817, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2314 }, { "epoch": 0.77767338347759, "grad_norm": 0.18073685467243195, "learning_rate": 1.7702063433404174e-05, "loss": 0.6599, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 2315 }, { "epoch": 0.7780093115050101, "grad_norm": 0.24500328302383423, "learning_rate": 1.7699701454249423e-05, "loss": 0.6994, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2316 }, { "epoch": 0.7783452395324302, "grad_norm": 0.19175612926483154, "learning_rate": 1.7697338419566245e-05, "loss": 0.6915, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2317 }, { "epoch": 0.7786811675598503, "grad_norm": 0.19061541557312012, "learning_rate": 1.769497432967858e-05, "loss": 0.6803, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2318 }, { "epoch": 0.7790170955872704, "grad_norm": 0.19047662615776062, "learning_rate": 1.769260918491052e-05, "loss": 0.6777, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2319 }, { "epoch": 0.7793530236146905, "grad_norm": 0.16880135238170624, "learning_rate": 1.769024298558629e-05, "loss": 0.681, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2320 }, { "epoch": 0.7796889516421107, "grad_norm": 0.1755119413137436, "learning_rate": 1.768787573203027e-05, "loss": 0.6661, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2321 }, { "epoch": 0.7800248796695308, "grad_norm": 0.1634109914302826, "learning_rate": 1.768550742456697e-05, "loss": 0.669, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2322 }, { "epoch": 0.7803608076969509, "grad_norm": 0.17036142945289612, "learning_rate": 1.7683138063521065e-05, "loss": 0.6716, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2323 }, { "epoch": 0.780696735724371, "grad_norm": 0.1661994606256485, "learning_rate": 1.768076764921736e-05, "loss": 0.6831, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2324 }, { "epoch": 0.7810326637517911, "grad_norm": 0.17423014342784882, "learning_rate": 1.7678396181980803e-05, "loss": 0.6773, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2325 }, { "epoch": 0.7813685917792113, "grad_norm": 0.16582773625850677, "learning_rate": 1.7676023662136495e-05, "loss": 0.7101, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2326 }, { "epoch": 0.7817045198066315, "grad_norm": 0.17409344017505646, "learning_rate": 1.7673650090009678e-05, "loss": 0.6928, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2327 }, { "epoch": 0.7820404478340516, "grad_norm": 0.19146880507469177, "learning_rate": 1.767127546592574e-05, "loss": 0.6897, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2328 }, { "epoch": 0.7823763758614717, "grad_norm": 0.1485276222229004, "learning_rate": 1.7668899790210203e-05, "loss": 0.6898, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2329 }, { "epoch": 0.7827123038888918, "grad_norm": 0.21041271090507507, "learning_rate": 1.766652306318875e-05, "loss": 0.6782, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2330 }, { "epoch": 0.7830482319163119, "grad_norm": 0.16188134253025055, "learning_rate": 1.7664145285187193e-05, "loss": 0.6867, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2331 }, { "epoch": 0.783384159943732, "grad_norm": 0.1727166622877121, "learning_rate": 1.7661766456531498e-05, "loss": 0.7045, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2332 }, { "epoch": 0.7837200879711522, "grad_norm": 0.16021575033664703, "learning_rate": 1.765938657754777e-05, "loss": 0.6918, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2333 }, { "epoch": 0.7840560159985723, "grad_norm": 0.17832477390766144, "learning_rate": 1.7657005648562256e-05, "loss": 0.6743, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2334 }, { "epoch": 0.7843919440259924, "grad_norm": 0.18725121021270752, "learning_rate": 1.7654623669901355e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2335 }, { "epoch": 0.7847278720534125, "grad_norm": 0.17483161389827728, "learning_rate": 1.7652240641891603e-05, "loss": 0.6869, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2336 }, { "epoch": 0.7850638000808327, "grad_norm": 0.17323976755142212, "learning_rate": 1.764985656485968e-05, "loss": 0.6865, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2337 }, { "epoch": 0.7853997281082528, "grad_norm": 0.1795969307422638, "learning_rate": 1.764747143913242e-05, "loss": 0.6858, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2338 }, { "epoch": 0.785735656135673, "grad_norm": 0.16431249678134918, "learning_rate": 1.7645085265036782e-05, "loss": 0.6933, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2339 }, { "epoch": 0.7860715841630931, "grad_norm": 0.1667041778564453, "learning_rate": 1.7642698042899885e-05, "loss": 0.6681, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2340 }, { "epoch": 0.7864075121905132, "grad_norm": 0.1767832338809967, "learning_rate": 1.764030977304899e-05, "loss": 0.6767, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2341 }, { "epoch": 0.7867434402179333, "grad_norm": 0.18804217875003815, "learning_rate": 1.763792045581149e-05, "loss": 0.6754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2342 }, { "epoch": 0.7870793682453534, "grad_norm": 0.20729148387908936, "learning_rate": 1.763553009151493e-05, "loss": 0.6966, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2343 }, { "epoch": 0.7874152962727735, "grad_norm": 0.21641787886619568, "learning_rate": 1.7633138680486998e-05, "loss": 0.7036, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2344 }, { "epoch": 0.7877512243001937, "grad_norm": 0.17416198551654816, "learning_rate": 1.763074622305553e-05, "loss": 0.705, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2345 }, { "epoch": 0.7880871523276138, "grad_norm": 0.2110961526632309, "learning_rate": 1.7628352719548493e-05, "loss": 0.6614, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2346 }, { "epoch": 0.788423080355034, "grad_norm": 0.20582734048366547, "learning_rate": 1.7625958170294014e-05, "loss": 0.68, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2347 }, { "epoch": 0.788759008382454, "grad_norm": 0.15108443796634674, "learning_rate": 1.762356257562035e-05, "loss": 0.6743, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2348 }, { "epoch": 0.7890949364098742, "grad_norm": 0.20548704266548157, "learning_rate": 1.7621165935855903e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2349 }, { "epoch": 0.7894308644372943, "grad_norm": 0.1986665278673172, "learning_rate": 1.7618768251329225e-05, "loss": 0.6614, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2350 }, { "epoch": 0.7897667924647145, "grad_norm": 0.1734458953142166, "learning_rate": 1.7616369522369005e-05, "loss": 0.6855, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2351 }, { "epoch": 0.7901027204921346, "grad_norm": 0.16346769034862518, "learning_rate": 1.7613969749304078e-05, "loss": 0.6826, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2352 }, { "epoch": 0.7904386485195547, "grad_norm": 0.19178706407546997, "learning_rate": 1.761156893246342e-05, "loss": 0.6845, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2353 }, { "epoch": 0.7907745765469748, "grad_norm": 0.18727295100688934, "learning_rate": 1.7609167072176156e-05, "loss": 0.6812, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2354 }, { "epoch": 0.7911105045743949, "grad_norm": 0.20175288617610931, "learning_rate": 1.7606764168771546e-05, "loss": 0.6755, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2355 }, { "epoch": 0.791446432601815, "grad_norm": 0.16877217590808868, "learning_rate": 1.7604360222578996e-05, "loss": 0.684, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2356 }, { "epoch": 0.7917823606292352, "grad_norm": 0.1519220620393753, "learning_rate": 1.7601955233928056e-05, "loss": 0.6731, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2357 }, { "epoch": 0.7921182886566553, "grad_norm": 0.19235354661941528, "learning_rate": 1.7599549203148422e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2358 }, { "epoch": 0.7924542166840755, "grad_norm": 0.18987828493118286, "learning_rate": 1.759714213056993e-05, "loss": 0.6593, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2359 }, { "epoch": 0.7927901447114956, "grad_norm": 0.17309880256652832, "learning_rate": 1.759473401652255e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2360 }, { "epoch": 0.7931260727389157, "grad_norm": 0.1600409895181656, "learning_rate": 1.759232486133641e-05, "loss": 0.7032, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2361 }, { "epoch": 0.7934620007663358, "grad_norm": 0.19000153243541718, "learning_rate": 1.7589914665341772e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2362 }, { "epoch": 0.793797928793756, "grad_norm": 0.1886920928955078, "learning_rate": 1.758750342886904e-05, "loss": 0.6713, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2363 }, { "epoch": 0.7941338568211761, "grad_norm": 0.19688953459262848, "learning_rate": 1.758509115224877e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2364 }, { "epoch": 0.7944697848485962, "grad_norm": 0.17757391929626465, "learning_rate": 1.7582677835811642e-05, "loss": 0.678, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2365 }, { "epoch": 0.7948057128760163, "grad_norm": 0.15715079009532928, "learning_rate": 1.7580263479888502e-05, "loss": 0.7005, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2366 }, { "epoch": 0.7951416409034364, "grad_norm": 0.18205584585666656, "learning_rate": 1.757784808481032e-05, "loss": 0.6929, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2367 }, { "epoch": 0.7954775689308565, "grad_norm": 0.16367244720458984, "learning_rate": 1.7575431650908217e-05, "loss": 0.6789, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2368 }, { "epoch": 0.7958134969582766, "grad_norm": 0.14386479556560516, "learning_rate": 1.757301417851345e-05, "loss": 0.6653, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2369 }, { "epoch": 0.7961494249856969, "grad_norm": 0.19865265488624573, "learning_rate": 1.7570595667957437e-05, "loss": 0.6795, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 2370 }, { "epoch": 0.796485353013117, "grad_norm": 0.16123834252357483, "learning_rate": 1.7568176119571708e-05, "loss": 0.6916, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2371 }, { "epoch": 0.7968212810405371, "grad_norm": 0.1852705329656601, "learning_rate": 1.7565755533687956e-05, "loss": 0.6736, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2372 }, { "epoch": 0.7971572090679572, "grad_norm": 0.2025170475244522, "learning_rate": 1.7563333910638015e-05, "loss": 0.681, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2373 }, { "epoch": 0.7974931370953773, "grad_norm": 0.1587519645690918, "learning_rate": 1.7560911250753858e-05, "loss": 0.6775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2374 }, { "epoch": 0.7978290651227974, "grad_norm": 0.1988045871257782, "learning_rate": 1.7558487554367593e-05, "loss": 0.6959, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2375 }, { "epoch": 0.7981649931502176, "grad_norm": 0.16733504831790924, "learning_rate": 1.755606282181149e-05, "loss": 0.6748, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2376 }, { "epoch": 0.7985009211776377, "grad_norm": 0.1784433275461197, "learning_rate": 1.7553637053417935e-05, "loss": 0.6668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2377 }, { "epoch": 0.7988368492050578, "grad_norm": 0.1654539257287979, "learning_rate": 1.7551210249519473e-05, "loss": 0.6972, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2378 }, { "epoch": 0.7991727772324779, "grad_norm": 0.16347962617874146, "learning_rate": 1.7548782410448796e-05, "loss": 0.6863, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2379 }, { "epoch": 0.799508705259898, "grad_norm": 0.15703856945037842, "learning_rate": 1.7546353536538712e-05, "loss": 0.6753, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2380 }, { "epoch": 0.7998446332873181, "grad_norm": 0.15860989689826965, "learning_rate": 1.7543923628122204e-05, "loss": 0.6517, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2381 }, { "epoch": 0.8001805613147384, "grad_norm": 0.1687031388282776, "learning_rate": 1.7541492685532376e-05, "loss": 0.681, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2382 }, { "epoch": 0.8005164893421585, "grad_norm": 0.16427642107009888, "learning_rate": 1.7539060709102475e-05, "loss": 0.6818, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2383 }, { "epoch": 0.8008524173695786, "grad_norm": 0.16507183015346527, "learning_rate": 1.7536627699165892e-05, "loss": 0.684, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2384 }, { "epoch": 0.8011883453969987, "grad_norm": 0.162554532289505, "learning_rate": 1.7534193656056162e-05, "loss": 0.6589, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2385 }, { "epoch": 0.8015242734244188, "grad_norm": 0.15333376824855804, "learning_rate": 1.7531758580106966e-05, "loss": 0.6913, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2386 }, { "epoch": 0.8018602014518389, "grad_norm": 0.16106007993221283, "learning_rate": 1.7529322471652118e-05, "loss": 0.6727, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2387 }, { "epoch": 0.8021961294792591, "grad_norm": 0.1654864251613617, "learning_rate": 1.7526885331025574e-05, "loss": 0.6632, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2388 }, { "epoch": 0.8025320575066792, "grad_norm": 0.19046755135059357, "learning_rate": 1.7524447158561433e-05, "loss": 0.6661, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2389 }, { "epoch": 0.8028679855340993, "grad_norm": 0.16168922185897827, "learning_rate": 1.7522007954593946e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2390 }, { "epoch": 0.8032039135615194, "grad_norm": 0.20417600870132446, "learning_rate": 1.7519567719457487e-05, "loss": 0.7063, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2391 }, { "epoch": 0.8035398415889395, "grad_norm": 0.17935656011104584, "learning_rate": 1.7517126453486583e-05, "loss": 0.6841, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2392 }, { "epoch": 0.8038757696163596, "grad_norm": 0.15628941357135773, "learning_rate": 1.7514684157015896e-05, "loss": 0.6824, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2393 }, { "epoch": 0.8042116976437799, "grad_norm": 0.1769922375679016, "learning_rate": 1.7512240830380243e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2394 }, { "epoch": 0.8045476256712, "grad_norm": 0.17563670873641968, "learning_rate": 1.7509796473914563e-05, "loss": 0.6683, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2395 }, { "epoch": 0.8048835536986201, "grad_norm": 0.17661839723587036, "learning_rate": 1.7507351087953947e-05, "loss": 0.7137, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2396 }, { "epoch": 0.8052194817260402, "grad_norm": 0.19405999779701233, "learning_rate": 1.750490467283363e-05, "loss": 0.6934, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2397 }, { "epoch": 0.8055554097534603, "grad_norm": 0.1562695950269699, "learning_rate": 1.7502457228888973e-05, "loss": 0.6939, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2398 }, { "epoch": 0.8058913377808804, "grad_norm": 0.1690487116575241, "learning_rate": 1.75000087564555e-05, "loss": 0.6771, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2399 }, { "epoch": 0.8062272658083006, "grad_norm": 0.16574996709823608, "learning_rate": 1.7497559255868858e-05, "loss": 0.6949, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2400 }, { "epoch": 0.8065631938357207, "grad_norm": 0.16369135677814484, "learning_rate": 1.7495108727464848e-05, "loss": 0.6947, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2401 }, { "epoch": 0.8068991218631408, "grad_norm": 0.16633741557598114, "learning_rate": 1.7492657171579397e-05, "loss": 0.6679, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2402 }, { "epoch": 0.8072350498905609, "grad_norm": 0.2131980061531067, "learning_rate": 1.7490204588548583e-05, "loss": 0.6837, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2403 }, { "epoch": 0.807570977917981, "grad_norm": 0.1562916338443756, "learning_rate": 1.7487750978708632e-05, "loss": 0.6888, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2404 }, { "epoch": 0.8079069059454012, "grad_norm": 0.17632761597633362, "learning_rate": 1.7485296342395894e-05, "loss": 0.6735, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2405 }, { "epoch": 0.8082428339728214, "grad_norm": 0.17419672012329102, "learning_rate": 1.7482840679946863e-05, "loss": 0.6732, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2406 }, { "epoch": 0.8085787620002415, "grad_norm": 0.1765204221010208, "learning_rate": 1.748038399169819e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2407 }, { "epoch": 0.8089146900276616, "grad_norm": 0.17992979288101196, "learning_rate": 1.7477926277986644e-05, "loss": 0.6765, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2408 }, { "epoch": 0.8092506180550817, "grad_norm": 0.2088874727487564, "learning_rate": 1.7475467539149155e-05, "loss": 0.6727, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2409 }, { "epoch": 0.8095865460825018, "grad_norm": 0.15966756641864777, "learning_rate": 1.7473007775522776e-05, "loss": 0.7046, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2410 }, { "epoch": 0.8099224741099219, "grad_norm": 0.23669953644275665, "learning_rate": 1.7470546987444712e-05, "loss": 0.7014, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2411 }, { "epoch": 0.8102584021373421, "grad_norm": 0.15118619799613953, "learning_rate": 1.7468085175252305e-05, "loss": 0.6717, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2412 }, { "epoch": 0.8105943301647622, "grad_norm": 0.21073377132415771, "learning_rate": 1.746562233928304e-05, "loss": 0.6666, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2413 }, { "epoch": 0.8109302581921823, "grad_norm": 0.15244829654693604, "learning_rate": 1.746315847987453e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.91, "memory/max_allocated (GiB)": 63.91, "step": 2414 }, { "epoch": 0.8112661862196024, "grad_norm": 0.1649143248796463, "learning_rate": 1.746069359736455e-05, "loss": 0.6659, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2415 }, { "epoch": 0.8116021142470226, "grad_norm": 0.14775896072387695, "learning_rate": 1.7458227692090996e-05, "loss": 0.692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2416 }, { "epoch": 0.8119380422744427, "grad_norm": 0.1501971334218979, "learning_rate": 1.745576076439191e-05, "loss": 0.6851, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2417 }, { "epoch": 0.8122739703018629, "grad_norm": 0.15194779634475708, "learning_rate": 1.7453292814605483e-05, "loss": 0.6972, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2418 }, { "epoch": 0.812609898329283, "grad_norm": 0.1535957306623459, "learning_rate": 1.745082384307003e-05, "loss": 0.6836, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2419 }, { "epoch": 0.8129458263567031, "grad_norm": 0.15802249312400818, "learning_rate": 1.7448353850124023e-05, "loss": 0.6869, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2420 }, { "epoch": 0.8132817543841232, "grad_norm": 0.15560102462768555, "learning_rate": 1.7445882836106055e-05, "loss": 0.6752, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2421 }, { "epoch": 0.8136176824115433, "grad_norm": 0.14627616107463837, "learning_rate": 1.7443410801354883e-05, "loss": 0.6852, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2422 }, { "epoch": 0.8139536104389634, "grad_norm": 0.15867553651332855, "learning_rate": 1.744093774620938e-05, "loss": 0.6687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2423 }, { "epoch": 0.8142895384663835, "grad_norm": 0.1542111337184906, "learning_rate": 1.7438463671008575e-05, "loss": 0.6747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2424 }, { "epoch": 0.8146254664938037, "grad_norm": 0.15214788913726807, "learning_rate": 1.7435988576091628e-05, "loss": 0.6596, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2425 }, { "epoch": 0.8149613945212238, "grad_norm": 0.17587636411190033, "learning_rate": 1.7433512461797848e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2426 }, { "epoch": 0.815297322548644, "grad_norm": 0.15496322512626648, "learning_rate": 1.7431035328466674e-05, "loss": 0.6763, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2427 }, { "epoch": 0.8156332505760641, "grad_norm": 0.15076065063476562, "learning_rate": 1.7428557176437685e-05, "loss": 0.6719, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2428 }, { "epoch": 0.8159691786034842, "grad_norm": 0.15515895187854767, "learning_rate": 1.7426078006050606e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2429 }, { "epoch": 0.8163051066309043, "grad_norm": 0.1596962809562683, "learning_rate": 1.74235978176453e-05, "loss": 0.6875, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2430 }, { "epoch": 0.8166410346583245, "grad_norm": 0.16570642590522766, "learning_rate": 1.742111661156177e-05, "loss": 0.6608, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2431 }, { "epoch": 0.8169769626857446, "grad_norm": 0.15050017833709717, "learning_rate": 1.7418634388140155e-05, "loss": 0.7034, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2432 }, { "epoch": 0.8173128907131647, "grad_norm": 0.15339799225330353, "learning_rate": 1.7416151147720737e-05, "loss": 0.6628, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2433 }, { "epoch": 0.8176488187405848, "grad_norm": 0.14424458146095276, "learning_rate": 1.7413666890643932e-05, "loss": 0.6701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2434 }, { "epoch": 0.8179847467680049, "grad_norm": 0.15271639823913574, "learning_rate": 1.74111816172503e-05, "loss": 0.674, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2435 }, { "epoch": 0.818320674795425, "grad_norm": 0.16858024895191193, "learning_rate": 1.7408695327880543e-05, "loss": 0.6794, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2436 }, { "epoch": 0.8186566028228452, "grad_norm": 0.1468784362077713, "learning_rate": 1.74062080228755e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2437 }, { "epoch": 0.8189925308502654, "grad_norm": 0.15777739882469177, "learning_rate": 1.7403719702576136e-05, "loss": 0.6866, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2438 }, { "epoch": 0.8193284588776855, "grad_norm": 0.1436665952205658, "learning_rate": 1.740123036732358e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2439 }, { "epoch": 0.8196643869051056, "grad_norm": 0.15090444684028625, "learning_rate": 1.7398740017459083e-05, "loss": 0.6787, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2440 }, { "epoch": 0.8200003149325257, "grad_norm": 0.1622697412967682, "learning_rate": 1.7396248653324043e-05, "loss": 0.6656, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2441 }, { "epoch": 0.8203362429599458, "grad_norm": 0.16935284435749054, "learning_rate": 1.7393756275259984e-05, "loss": 0.6829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2442 }, { "epoch": 0.820672170987366, "grad_norm": 0.14884966611862183, "learning_rate": 1.7391262883608586e-05, "loss": 0.6583, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2443 }, { "epoch": 0.8210080990147861, "grad_norm": 0.1662713885307312, "learning_rate": 1.7388768478711662e-05, "loss": 0.7033, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2444 }, { "epoch": 0.8213440270422062, "grad_norm": 0.16708777844905853, "learning_rate": 1.7386273060911155e-05, "loss": 0.6766, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2445 }, { "epoch": 0.8216799550696263, "grad_norm": 0.16161343455314636, "learning_rate": 1.7383776630549163e-05, "loss": 0.6699, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2446 }, { "epoch": 0.8220158830970464, "grad_norm": 0.15926700830459595, "learning_rate": 1.7381279187967905e-05, "loss": 0.6788, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2447 }, { "epoch": 0.8223518111244665, "grad_norm": 0.1594790667295456, "learning_rate": 1.7378780733509755e-05, "loss": 0.6846, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2448 }, { "epoch": 0.8226877391518868, "grad_norm": 0.17838738858699799, "learning_rate": 1.7376281267517213e-05, "loss": 0.6621, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2449 }, { "epoch": 0.8230236671793069, "grad_norm": 0.17380473017692566, "learning_rate": 1.7373780790332926e-05, "loss": 0.668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2450 }, { "epoch": 0.823359595206727, "grad_norm": 0.16213832795619965, "learning_rate": 1.737127930229968e-05, "loss": 0.684, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2451 }, { "epoch": 0.8236955232341471, "grad_norm": 0.17480574548244476, "learning_rate": 1.736877680376039e-05, "loss": 0.6842, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2452 }, { "epoch": 0.8240314512615672, "grad_norm": 0.1701703667640686, "learning_rate": 1.7366273295058116e-05, "loss": 0.692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2453 }, { "epoch": 0.8243673792889873, "grad_norm": 0.1558620184659958, "learning_rate": 1.736376877653606e-05, "loss": 0.6586, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2454 }, { "epoch": 0.8247033073164075, "grad_norm": 0.16206151247024536, "learning_rate": 1.736126324853756e-05, "loss": 0.6602, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2455 }, { "epoch": 0.8250392353438276, "grad_norm": 0.15723837912082672, "learning_rate": 1.7358756711406086e-05, "loss": 0.698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2456 }, { "epoch": 0.8253751633712477, "grad_norm": 0.1645335704088211, "learning_rate": 1.7356249165485256e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2457 }, { "epoch": 0.8257110913986678, "grad_norm": 0.1714131236076355, "learning_rate": 1.735374061111882e-05, "loss": 0.6656, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2458 }, { "epoch": 0.8260470194260879, "grad_norm": 0.15203602612018585, "learning_rate": 1.7351231048650668e-05, "loss": 0.6835, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2459 }, { "epoch": 0.826382947453508, "grad_norm": 0.18941837549209595, "learning_rate": 1.734872047842483e-05, "loss": 0.6826, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2460 }, { "epoch": 0.8267188754809283, "grad_norm": 0.15029144287109375, "learning_rate": 1.7346208900785464e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2461 }, { "epoch": 0.8270548035083484, "grad_norm": 0.18364566564559937, "learning_rate": 1.7343696316076887e-05, "loss": 0.6559, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2462 }, { "epoch": 0.8273907315357685, "grad_norm": 0.1790556162595749, "learning_rate": 1.734118272464353e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2463 }, { "epoch": 0.8277266595631886, "grad_norm": 0.16254691779613495, "learning_rate": 1.7338668126829983e-05, "loss": 0.6768, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2464 }, { "epoch": 0.8280625875906087, "grad_norm": 0.17198826372623444, "learning_rate": 1.733615252298096e-05, "loss": 0.6532, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2465 }, { "epoch": 0.8283985156180288, "grad_norm": 0.16716623306274414, "learning_rate": 1.7333635913441318e-05, "loss": 0.6638, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2466 }, { "epoch": 0.828734443645449, "grad_norm": 0.1705513447523117, "learning_rate": 1.733111829855605e-05, "loss": 0.6732, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2467 }, { "epoch": 0.8290703716728691, "grad_norm": 0.17779666185379028, "learning_rate": 1.732859967867029e-05, "loss": 0.6891, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2468 }, { "epoch": 0.8294062997002892, "grad_norm": 0.15639829635620117, "learning_rate": 1.732608005412931e-05, "loss": 0.7055, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2469 }, { "epoch": 0.8297422277277093, "grad_norm": 0.17803196609020233, "learning_rate": 1.732355942527851e-05, "loss": 0.6886, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2470 }, { "epoch": 0.8300781557551294, "grad_norm": 0.16513438522815704, "learning_rate": 1.7321037792463443e-05, "loss": 0.6782, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2471 }, { "epoch": 0.8304140837825496, "grad_norm": 0.16129668056964874, "learning_rate": 1.731851515602979e-05, "loss": 0.6742, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2472 }, { "epoch": 0.8307500118099698, "grad_norm": 0.18959437310695648, "learning_rate": 1.7315991516323367e-05, "loss": 0.6733, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2473 }, { "epoch": 0.8310859398373899, "grad_norm": 0.16119951009750366, "learning_rate": 1.7313466873690138e-05, "loss": 0.6794, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2474 }, { "epoch": 0.83142186786481, "grad_norm": 0.18129962682724, "learning_rate": 1.7310941228476193e-05, "loss": 0.6797, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2475 }, { "epoch": 0.8317577958922301, "grad_norm": 0.17987863719463348, "learning_rate": 1.730841458102777e-05, "loss": 0.6772, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2476 }, { "epoch": 0.8320937239196502, "grad_norm": 0.16040240228176117, "learning_rate": 1.7305886931691236e-05, "loss": 0.6811, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2477 }, { "epoch": 0.8324296519470703, "grad_norm": 0.19826239347457886, "learning_rate": 1.7303358280813102e-05, "loss": 0.7023, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2478 }, { "epoch": 0.8327655799744904, "grad_norm": 0.1694411188364029, "learning_rate": 1.730082862874001e-05, "loss": 0.7167, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2479 }, { "epoch": 0.8331015080019106, "grad_norm": 0.15437017381191254, "learning_rate": 1.7298297975818742e-05, "loss": 0.6588, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2480 }, { "epoch": 0.8334374360293307, "grad_norm": 0.1580757200717926, "learning_rate": 1.729576632239622e-05, "loss": 0.6877, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2481 }, { "epoch": 0.8337733640567508, "grad_norm": 0.1450474113225937, "learning_rate": 1.7293233668819498e-05, "loss": 0.7011, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2482 }, { "epoch": 0.834109292084171, "grad_norm": 0.1870322972536087, "learning_rate": 1.729070001543577e-05, "loss": 0.685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2483 }, { "epoch": 0.8344452201115911, "grad_norm": 0.16672980785369873, "learning_rate": 1.728816536259237e-05, "loss": 0.6993, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2484 }, { "epoch": 0.8347811481390112, "grad_norm": 0.15414488315582275, "learning_rate": 1.7285629710636765e-05, "loss": 0.6695, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2485 }, { "epoch": 0.8351170761664314, "grad_norm": 0.17818352580070496, "learning_rate": 1.7283093059916554e-05, "loss": 0.6929, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2486 }, { "epoch": 0.8354530041938515, "grad_norm": 0.15828678011894226, "learning_rate": 1.7280555410779482e-05, "loss": 0.6801, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2487 }, { "epoch": 0.8357889322212716, "grad_norm": 0.16684278845787048, "learning_rate": 1.7278016763573432e-05, "loss": 0.6866, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2488 }, { "epoch": 0.8361248602486917, "grad_norm": 0.1573028862476349, "learning_rate": 1.7275477118646413e-05, "loss": 0.7103, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2489 }, { "epoch": 0.8364607882761118, "grad_norm": 0.15713296830654144, "learning_rate": 1.727293647634658e-05, "loss": 0.6983, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2490 }, { "epoch": 0.8367967163035319, "grad_norm": 0.1479916274547577, "learning_rate": 1.7270394837022224e-05, "loss": 0.6802, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2491 }, { "epoch": 0.8371326443309521, "grad_norm": 0.14914245903491974, "learning_rate": 1.7267852201021765e-05, "loss": 0.6775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2492 }, { "epoch": 0.8374685723583722, "grad_norm": 0.16983330249786377, "learning_rate": 1.726530856869377e-05, "loss": 0.698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2493 }, { "epoch": 0.8378045003857924, "grad_norm": 0.14444522559642792, "learning_rate": 1.7262763940386938e-05, "loss": 0.6755, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2494 }, { "epoch": 0.8381404284132125, "grad_norm": 0.17919710278511047, "learning_rate": 1.72602183164501e-05, "loss": 0.7022, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2495 }, { "epoch": 0.8384763564406326, "grad_norm": 0.17923341691493988, "learning_rate": 1.725767169723223e-05, "loss": 0.6809, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2496 }, { "epoch": 0.8388122844680527, "grad_norm": 0.1662752777338028, "learning_rate": 1.7255124083082435e-05, "loss": 0.6776, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2497 }, { "epoch": 0.8391482124954729, "grad_norm": 0.16527189314365387, "learning_rate": 1.7252575474349963e-05, "loss": 0.6926, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2498 }, { "epoch": 0.839484140522893, "grad_norm": 0.15301430225372314, "learning_rate": 1.7250025871384193e-05, "loss": 0.6721, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2499 }, { "epoch": 0.8398200685503131, "grad_norm": 0.15101280808448792, "learning_rate": 1.724747527453464e-05, "loss": 0.6989, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2500 }, { "epoch": 0.8401559965777332, "grad_norm": 0.16570772230625153, "learning_rate": 1.7244923684150962e-05, "loss": 0.688, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2501 }, { "epoch": 0.8404919246051533, "grad_norm": 0.18086573481559753, "learning_rate": 1.7242371100582947e-05, "loss": 0.6615, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2502 }, { "epoch": 0.8408278526325734, "grad_norm": 0.15886549651622772, "learning_rate": 1.723981752418052e-05, "loss": 0.6826, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2503 }, { "epoch": 0.8411637806599936, "grad_norm": 0.16718269884586334, "learning_rate": 1.7237262955293737e-05, "loss": 0.6637, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2504 }, { "epoch": 0.8414997086874137, "grad_norm": 0.15440413355827332, "learning_rate": 1.723470739427281e-05, "loss": 0.6705, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2505 }, { "epoch": 0.8418356367148339, "grad_norm": 0.16005559265613556, "learning_rate": 1.7232150841468057e-05, "loss": 0.6793, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2506 }, { "epoch": 0.842171564742254, "grad_norm": 0.1415112018585205, "learning_rate": 1.7229593297229963e-05, "loss": 0.663, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2507 }, { "epoch": 0.8425074927696741, "grad_norm": 0.16514471173286438, "learning_rate": 1.7227034761909124e-05, "loss": 0.6978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2508 }, { "epoch": 0.8428434207970942, "grad_norm": 0.1677076369524002, "learning_rate": 1.7224475235856286e-05, "loss": 0.6766, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2509 }, { "epoch": 0.8431793488245144, "grad_norm": 0.16046936810016632, "learning_rate": 1.7221914719422325e-05, "loss": 0.6936, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2510 }, { "epoch": 0.8435152768519345, "grad_norm": 0.15549536049365997, "learning_rate": 1.7219353212958253e-05, "loss": 0.6821, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2511 }, { "epoch": 0.8438512048793546, "grad_norm": 0.14083769917488098, "learning_rate": 1.721679071681522e-05, "loss": 0.6801, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2512 }, { "epoch": 0.8441871329067747, "grad_norm": 0.16507047414779663, "learning_rate": 1.7214227231344514e-05, "loss": 0.6852, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.88, "memory/max_allocated (GiB)": 64.88, "step": 2513 }, { "epoch": 0.8445230609341948, "grad_norm": 0.15452487766742706, "learning_rate": 1.7211662756897555e-05, "loss": 0.67, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2514 }, { "epoch": 0.8448589889616149, "grad_norm": 0.15042343735694885, "learning_rate": 1.7209097293825893e-05, "loss": 0.6979, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2515 }, { "epoch": 0.8451949169890351, "grad_norm": 0.16362598538398743, "learning_rate": 1.720653084248122e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2516 }, { "epoch": 0.8455308450164553, "grad_norm": 0.13653448224067688, "learning_rate": 1.720396340321537e-05, "loss": 0.6608, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2517 }, { "epoch": 0.8458667730438754, "grad_norm": 0.16789478063583374, "learning_rate": 1.72013949763803e-05, "loss": 0.6816, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2518 }, { "epoch": 0.8462027010712955, "grad_norm": 0.15833880007266998, "learning_rate": 1.719882556232811e-05, "loss": 0.6986, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2519 }, { "epoch": 0.8465386290987156, "grad_norm": 0.1505403071641922, "learning_rate": 1.7196255161411032e-05, "loss": 0.6726, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2520 }, { "epoch": 0.8468745571261357, "grad_norm": 0.1794440597295761, "learning_rate": 1.7193683773981436e-05, "loss": 0.6928, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2521 }, { "epoch": 0.8472104851535559, "grad_norm": 0.15403883159160614, "learning_rate": 1.7191111400391826e-05, "loss": 0.6671, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2522 }, { "epoch": 0.847546413180976, "grad_norm": 0.17897894978523254, "learning_rate": 1.7188538040994833e-05, "loss": 0.6638, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2523 }, { "epoch": 0.8478823412083961, "grad_norm": 0.16455374658107758, "learning_rate": 1.718596369614324e-05, "loss": 0.6672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2524 }, { "epoch": 0.8482182692358162, "grad_norm": 0.16444776952266693, "learning_rate": 1.7183388366189954e-05, "loss": 0.6754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2525 }, { "epoch": 0.8485541972632363, "grad_norm": 0.16740667819976807, "learning_rate": 1.7180812051488015e-05, "loss": 0.6706, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2526 }, { "epoch": 0.8488901252906564, "grad_norm": 0.15587039291858673, "learning_rate": 1.717823475239061e-05, "loss": 0.6896, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2527 }, { "epoch": 0.8492260533180767, "grad_norm": 0.16334708034992218, "learning_rate": 1.7175656469251044e-05, "loss": 0.6893, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2528 }, { "epoch": 0.8495619813454968, "grad_norm": 0.17269985377788544, "learning_rate": 1.717307720242277e-05, "loss": 0.7107, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2529 }, { "epoch": 0.8498979093729169, "grad_norm": 0.18447677791118622, "learning_rate": 1.7170496952259373e-05, "loss": 0.6927, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2530 }, { "epoch": 0.850233837400337, "grad_norm": 0.15891650319099426, "learning_rate": 1.716791571911457e-05, "loss": 0.693, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2531 }, { "epoch": 0.8505697654277571, "grad_norm": 0.16263794898986816, "learning_rate": 1.7165333503342214e-05, "loss": 0.6781, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2532 }, { "epoch": 0.8509056934551772, "grad_norm": 0.16339480876922607, "learning_rate": 1.7162750305296296e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2533 }, { "epoch": 0.8512416214825973, "grad_norm": 0.17963950335979462, "learning_rate": 1.7160166125330934e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2534 }, { "epoch": 0.8515775495100175, "grad_norm": 0.15708619356155396, "learning_rate": 1.7157580963800388e-05, "loss": 0.6751, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2535 }, { "epoch": 0.8519134775374376, "grad_norm": 0.17360621690750122, "learning_rate": 1.715499482105905e-05, "loss": 0.6792, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2536 }, { "epoch": 0.8522494055648577, "grad_norm": 0.16546760499477386, "learning_rate": 1.7152407697461448e-05, "loss": 0.664, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2537 }, { "epoch": 0.8525853335922778, "grad_norm": 0.19119395315647125, "learning_rate": 1.7149819593362238e-05, "loss": 0.6704, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2538 }, { "epoch": 0.852921261619698, "grad_norm": 0.15741541981697083, "learning_rate": 1.714723050911622e-05, "loss": 0.6673, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2539 }, { "epoch": 0.853257189647118, "grad_norm": 0.15252657234668732, "learning_rate": 1.714464044507832e-05, "loss": 0.6809, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2540 }, { "epoch": 0.8535931176745383, "grad_norm": 0.1631644070148468, "learning_rate": 1.7142049401603603e-05, "loss": 0.6859, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2541 }, { "epoch": 0.8539290457019584, "grad_norm": 0.16142095625400543, "learning_rate": 1.713945737904727e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2542 }, { "epoch": 0.8542649737293785, "grad_norm": 0.16353552043437958, "learning_rate": 1.713686437776465e-05, "loss": 0.7113, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2543 }, { "epoch": 0.8546009017567986, "grad_norm": 0.142432302236557, "learning_rate": 1.713427039811121e-05, "loss": 0.6654, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2544 }, { "epoch": 0.8549368297842187, "grad_norm": 0.15673799812793732, "learning_rate": 1.713167544044255e-05, "loss": 0.6867, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2545 }, { "epoch": 0.8552727578116388, "grad_norm": 0.16428114473819733, "learning_rate": 1.712907950511441e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2546 }, { "epoch": 0.855608685839059, "grad_norm": 0.15354901552200317, "learning_rate": 1.7126482592482654e-05, "loss": 0.6871, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2547 }, { "epoch": 0.8559446138664791, "grad_norm": 0.1767520308494568, "learning_rate": 1.7123884702903285e-05, "loss": 0.6701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2548 }, { "epoch": 0.8562805418938992, "grad_norm": 0.16422566771507263, "learning_rate": 1.712128583673244e-05, "loss": 0.6925, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2549 }, { "epoch": 0.8566164699213193, "grad_norm": 0.15032604336738586, "learning_rate": 1.7118685994326388e-05, "loss": 0.6852, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2550 }, { "epoch": 0.8569523979487395, "grad_norm": 0.17810635268688202, "learning_rate": 1.711608517604154e-05, "loss": 0.6768, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2551 }, { "epoch": 0.8572883259761596, "grad_norm": 0.17012904584407806, "learning_rate": 1.7113483382234427e-05, "loss": 0.6798, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2552 }, { "epoch": 0.8576242540035798, "grad_norm": 0.15039609372615814, "learning_rate": 1.7110880613261725e-05, "loss": 0.6864, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2553 }, { "epoch": 0.8579601820309999, "grad_norm": 0.17981339991092682, "learning_rate": 1.7108276869480236e-05, "loss": 0.7028, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2554 }, { "epoch": 0.85829611005842, "grad_norm": 0.15467439591884613, "learning_rate": 1.7105672151246902e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2555 }, { "epoch": 0.8586320380858401, "grad_norm": 0.14786791801452637, "learning_rate": 1.7103066458918798e-05, "loss": 0.6656, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2556 }, { "epoch": 0.8589679661132602, "grad_norm": 0.1537974774837494, "learning_rate": 1.7100459792853126e-05, "loss": 0.6971, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2557 }, { "epoch": 0.8593038941406803, "grad_norm": 0.16270732879638672, "learning_rate": 1.7097852153407228e-05, "loss": 0.6773, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2558 }, { "epoch": 0.8596398221681005, "grad_norm": 0.17013554275035858, "learning_rate": 1.7095243540938574e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2559 }, { "epoch": 0.8599757501955206, "grad_norm": 0.15913386642932892, "learning_rate": 1.7092633955804775e-05, "loss": 0.6965, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2560 }, { "epoch": 0.8603116782229407, "grad_norm": 0.15653778612613678, "learning_rate": 1.709002339836357e-05, "loss": 0.6973, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2561 }, { "epoch": 0.8606476062503609, "grad_norm": 0.1603243052959442, "learning_rate": 1.708741186897283e-05, "loss": 0.67, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2562 }, { "epoch": 0.860983534277781, "grad_norm": 0.1679769605398178, "learning_rate": 1.7084799367990567e-05, "loss": 0.682, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2563 }, { "epoch": 0.8613194623052011, "grad_norm": 0.1350453644990921, "learning_rate": 1.7082185895774916e-05, "loss": 0.6934, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2564 }, { "epoch": 0.8616553903326213, "grad_norm": 0.1511058360338211, "learning_rate": 1.7079571452684147e-05, "loss": 0.6663, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2565 }, { "epoch": 0.8619913183600414, "grad_norm": 0.1508803516626358, "learning_rate": 1.7076956039076675e-05, "loss": 0.668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2566 }, { "epoch": 0.8623272463874615, "grad_norm": 0.14077262580394745, "learning_rate": 1.707433965531103e-05, "loss": 0.6725, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2567 }, { "epoch": 0.8626631744148816, "grad_norm": 0.16274341940879822, "learning_rate": 1.7071722301745887e-05, "loss": 0.7132, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2568 }, { "epoch": 0.8629991024423017, "grad_norm": 0.15650038421154022, "learning_rate": 1.7069103978740056e-05, "loss": 0.6736, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2569 }, { "epoch": 0.8633350304697218, "grad_norm": 0.1550072431564331, "learning_rate": 1.706648468665247e-05, "loss": 0.6875, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2570 }, { "epoch": 0.863670958497142, "grad_norm": 0.15168079733848572, "learning_rate": 1.70638644258422e-05, "loss": 0.6807, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2571 }, { "epoch": 0.8640068865245621, "grad_norm": 0.16695082187652588, "learning_rate": 1.7061243196668446e-05, "loss": 0.6996, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2572 }, { "epoch": 0.8643428145519823, "grad_norm": 0.15143343806266785, "learning_rate": 1.705862099949055e-05, "loss": 0.6652, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2573 }, { "epoch": 0.8646787425794024, "grad_norm": 0.16620059311389923, "learning_rate": 1.7055997834667983e-05, "loss": 0.668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2574 }, { "epoch": 0.8650146706068225, "grad_norm": 0.16029229760169983, "learning_rate": 1.705337370256034e-05, "loss": 0.6765, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2575 }, { "epoch": 0.8653505986342426, "grad_norm": 0.1665741503238678, "learning_rate": 1.7050748603527357e-05, "loss": 0.6894, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2576 }, { "epoch": 0.8656865266616628, "grad_norm": 0.18587860465049744, "learning_rate": 1.7048122537928904e-05, "loss": 0.678, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2577 }, { "epoch": 0.8660224546890829, "grad_norm": 0.17274078726768494, "learning_rate": 1.7045495506124976e-05, "loss": 0.6904, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2578 }, { "epoch": 0.866358382716503, "grad_norm": 0.15508146584033966, "learning_rate": 1.704286750847571e-05, "loss": 0.6613, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2579 }, { "epoch": 0.8666943107439231, "grad_norm": 0.18886514008045197, "learning_rate": 1.7040238545341364e-05, "loss": 0.6622, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2580 }, { "epoch": 0.8670302387713432, "grad_norm": 0.16713543236255646, "learning_rate": 1.7037608617082336e-05, "loss": 0.6799, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2581 }, { "epoch": 0.8673661667987633, "grad_norm": 0.18369442224502563, "learning_rate": 1.7034977724059158e-05, "loss": 0.6656, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2582 }, { "epoch": 0.8677020948261835, "grad_norm": 0.15396620333194733, "learning_rate": 1.703234586663249e-05, "loss": 0.6739, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2583 }, { "epoch": 0.8680380228536037, "grad_norm": 0.1719007045030594, "learning_rate": 1.7029713045163122e-05, "loss": 0.6804, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2584 }, { "epoch": 0.8683739508810238, "grad_norm": 0.15633593499660492, "learning_rate": 1.7027079260011984e-05, "loss": 0.6931, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2585 }, { "epoch": 0.8687098789084439, "grad_norm": 0.1556859165430069, "learning_rate": 1.7024444511540134e-05, "loss": 0.6679, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2586 }, { "epoch": 0.869045806935864, "grad_norm": 0.1499675214290619, "learning_rate": 1.7021808800108754e-05, "loss": 0.7185, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2587 }, { "epoch": 0.8693817349632841, "grad_norm": 0.1641291379928589, "learning_rate": 1.701917212607917e-05, "loss": 0.6807, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2588 }, { "epoch": 0.8697176629907042, "grad_norm": 0.14163051545619965, "learning_rate": 1.701653448981284e-05, "loss": 0.6871, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2589 }, { "epoch": 0.8700535910181244, "grad_norm": 0.15602654218673706, "learning_rate": 1.7013895891671343e-05, "loss": 0.685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2590 }, { "epoch": 0.8703895190455445, "grad_norm": 0.15301181375980377, "learning_rate": 1.70112563320164e-05, "loss": 0.7096, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2591 }, { "epoch": 0.8707254470729646, "grad_norm": 0.15060502290725708, "learning_rate": 1.700861581120986e-05, "loss": 0.6796, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2592 }, { "epoch": 0.8710613751003847, "grad_norm": 0.15713447332382202, "learning_rate": 1.7005974329613698e-05, "loss": 0.6665, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2593 }, { "epoch": 0.8713973031278048, "grad_norm": 0.16213203966617584, "learning_rate": 1.7003331887590032e-05, "loss": 0.6871, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2594 }, { "epoch": 0.8717332311552249, "grad_norm": 0.1891789734363556, "learning_rate": 1.700068848550111e-05, "loss": 0.6821, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2595 }, { "epoch": 0.8720691591826452, "grad_norm": 0.16049416363239288, "learning_rate": 1.6998044123709303e-05, "loss": 0.6872, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2596 }, { "epoch": 0.8724050872100653, "grad_norm": 0.14438121020793915, "learning_rate": 1.699539880257712e-05, "loss": 0.7008, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2597 }, { "epoch": 0.8727410152374854, "grad_norm": 0.17644664645195007, "learning_rate": 1.6992752522467194e-05, "loss": 0.6738, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2598 }, { "epoch": 0.8730769432649055, "grad_norm": 0.17599496245384216, "learning_rate": 1.6990105283742307e-05, "loss": 0.6869, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2599 }, { "epoch": 0.8734128712923256, "grad_norm": 0.14474521577358246, "learning_rate": 1.6987457086765348e-05, "loss": 0.6827, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2600 }, { "epoch": 0.8737487993197457, "grad_norm": 0.18364128470420837, "learning_rate": 1.698480793189936e-05, "loss": 0.701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2601 }, { "epoch": 0.8740847273471659, "grad_norm": 0.18028028309345245, "learning_rate": 1.69821578195075e-05, "loss": 0.6707, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2602 }, { "epoch": 0.874420655374586, "grad_norm": 0.15144146978855133, "learning_rate": 1.6979506749953073e-05, "loss": 0.6881, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2603 }, { "epoch": 0.8747565834020061, "grad_norm": 0.17805716395378113, "learning_rate": 1.6976854723599495e-05, "loss": 0.6825, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2604 }, { "epoch": 0.8750925114294262, "grad_norm": 0.15791764855384827, "learning_rate": 1.6974201740810334e-05, "loss": 0.6588, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2605 }, { "epoch": 0.8754284394568463, "grad_norm": 0.15165983140468597, "learning_rate": 1.6971547801949273e-05, "loss": 0.6882, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2606 }, { "epoch": 0.8757643674842664, "grad_norm": 0.16083984076976776, "learning_rate": 1.6968892907380136e-05, "loss": 0.6861, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2607 }, { "epoch": 0.8761002955116867, "grad_norm": 0.1850558966398239, "learning_rate": 1.6966237057466874e-05, "loss": 0.6792, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2608 }, { "epoch": 0.8764362235391068, "grad_norm": 0.14679314196109772, "learning_rate": 1.6963580252573563e-05, "loss": 0.6761, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2609 }, { "epoch": 0.8767721515665269, "grad_norm": 0.18305039405822754, "learning_rate": 1.6960922493064423e-05, "loss": 0.7023, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2610 }, { "epoch": 0.877108079593947, "grad_norm": 0.1708424985408783, "learning_rate": 1.69582637793038e-05, "loss": 0.7018, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2611 }, { "epoch": 0.8774440076213671, "grad_norm": 0.17962023615837097, "learning_rate": 1.6955604111656163e-05, "loss": 0.6769, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2612 }, { "epoch": 0.8777799356487872, "grad_norm": 0.16735267639160156, "learning_rate": 1.695294349048612e-05, "loss": 0.6792, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2613 }, { "epoch": 0.8781158636762074, "grad_norm": 0.17670570313930511, "learning_rate": 1.6950281916158406e-05, "loss": 0.6737, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2614 }, { "epoch": 0.8784517917036275, "grad_norm": 0.1766437590122223, "learning_rate": 1.694761938903789e-05, "loss": 0.7013, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2615 }, { "epoch": 0.8787877197310476, "grad_norm": 0.18341784179210663, "learning_rate": 1.694495590948957e-05, "loss": 0.6899, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2616 }, { "epoch": 0.8791236477584677, "grad_norm": 0.160801500082016, "learning_rate": 1.694229147787857e-05, "loss": 0.6713, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2617 }, { "epoch": 0.8794595757858878, "grad_norm": 0.16934692859649658, "learning_rate": 1.693962609457016e-05, "loss": 0.6926, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2618 }, { "epoch": 0.879795503813308, "grad_norm": 0.17015908658504486, "learning_rate": 1.6936959759929715e-05, "loss": 0.6834, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2619 }, { "epoch": 0.8801314318407282, "grad_norm": 0.16269046068191528, "learning_rate": 1.6934292474322763e-05, "loss": 0.6667, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2620 }, { "epoch": 0.8804673598681483, "grad_norm": 0.1571124643087387, "learning_rate": 1.693162423811495e-05, "loss": 0.7103, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2621 }, { "epoch": 0.8808032878955684, "grad_norm": 0.16332194209098816, "learning_rate": 1.6928955051672056e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2622 }, { "epoch": 0.8811392159229885, "grad_norm": 0.16072824597358704, "learning_rate": 1.6926284915359995e-05, "loss": 0.6674, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2623 }, { "epoch": 0.8814751439504086, "grad_norm": 0.1771463304758072, "learning_rate": 1.692361382954481e-05, "loss": 0.6823, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2624 }, { "epoch": 0.8818110719778287, "grad_norm": 0.14388959109783173, "learning_rate": 1.6920941794592668e-05, "loss": 0.6854, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2625 }, { "epoch": 0.8821470000052489, "grad_norm": 0.1789434552192688, "learning_rate": 1.6918268810869868e-05, "loss": 0.6792, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2626 }, { "epoch": 0.882482928032669, "grad_norm": 0.1593075543642044, "learning_rate": 1.6915594878742843e-05, "loss": 0.6692, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2627 }, { "epoch": 0.8828188560600891, "grad_norm": 0.16388072073459625, "learning_rate": 1.691291999857816e-05, "loss": 0.6685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2628 }, { "epoch": 0.8831547840875092, "grad_norm": 0.160007044672966, "learning_rate": 1.69102441707425e-05, "loss": 0.6672, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2629 }, { "epoch": 0.8834907121149294, "grad_norm": 0.18241548538208008, "learning_rate": 1.690756739560269e-05, "loss": 0.6771, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2630 }, { "epoch": 0.8838266401423495, "grad_norm": 0.16787125170230865, "learning_rate": 1.690488967352568e-05, "loss": 0.6774, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2631 }, { "epoch": 0.8841625681697697, "grad_norm": 0.14366672933101654, "learning_rate": 1.690221100487855e-05, "loss": 0.6912, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2632 }, { "epoch": 0.8844984961971898, "grad_norm": 0.1713666021823883, "learning_rate": 1.689953139002851e-05, "loss": 0.6883, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2633 }, { "epoch": 0.8848344242246099, "grad_norm": 0.15350285172462463, "learning_rate": 1.6896850829342902e-05, "loss": 0.6787, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2634 }, { "epoch": 0.88517035225203, "grad_norm": 0.1527474820613861, "learning_rate": 1.6894169323189196e-05, "loss": 0.6668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2635 }, { "epoch": 0.8855062802794501, "grad_norm": 0.14574049413204193, "learning_rate": 1.6891486871934992e-05, "loss": 0.6983, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2636 }, { "epoch": 0.8858422083068702, "grad_norm": 0.16140592098236084, "learning_rate": 1.688880347594801e-05, "loss": 0.6822, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2637 }, { "epoch": 0.8861781363342903, "grad_norm": 0.17138025164604187, "learning_rate": 1.6886119135596124e-05, "loss": 0.6789, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2638 }, { "epoch": 0.8865140643617105, "grad_norm": 0.14791138470172882, "learning_rate": 1.688343385124731e-05, "loss": 0.6783, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2639 }, { "epoch": 0.8868499923891306, "grad_norm": 0.1501293033361435, "learning_rate": 1.688074762326969e-05, "loss": 0.6572, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2640 }, { "epoch": 0.8871859204165508, "grad_norm": 0.15813863277435303, "learning_rate": 1.6878060452031513e-05, "loss": 0.6901, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2641 }, { "epoch": 0.8875218484439709, "grad_norm": 0.1586528718471527, "learning_rate": 1.687537233790115e-05, "loss": 0.6653, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2642 }, { "epoch": 0.887857776471391, "grad_norm": 0.16095049679279327, "learning_rate": 1.6872683281247108e-05, "loss": 0.6916, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2643 }, { "epoch": 0.8881937044988111, "grad_norm": 0.1554139107465744, "learning_rate": 1.6869993282438017e-05, "loss": 0.6725, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2644 }, { "epoch": 0.8885296325262313, "grad_norm": 0.1834341585636139, "learning_rate": 1.6867302341842652e-05, "loss": 0.6788, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2645 }, { "epoch": 0.8888655605536514, "grad_norm": 0.1469312310218811, "learning_rate": 1.6864610459829896e-05, "loss": 0.6877, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2646 }, { "epoch": 0.8892014885810715, "grad_norm": 0.15963239967823029, "learning_rate": 1.686191763676878e-05, "loss": 0.6609, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2647 }, { "epoch": 0.8895374166084916, "grad_norm": 0.17538712918758392, "learning_rate": 1.685922387302844e-05, "loss": 0.721, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2648 }, { "epoch": 0.8898733446359117, "grad_norm": 0.14463353157043457, "learning_rate": 1.685652916897817e-05, "loss": 0.6628, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2649 }, { "epoch": 0.8902092726633318, "grad_norm": 0.17936477065086365, "learning_rate": 1.6853833524987373e-05, "loss": 0.6895, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2650 }, { "epoch": 0.890545200690752, "grad_norm": 0.16537579894065857, "learning_rate": 1.685113694142559e-05, "loss": 0.6713, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2651 }, { "epoch": 0.8908811287181722, "grad_norm": 0.18496526777744293, "learning_rate": 1.684843941866248e-05, "loss": 0.6942, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2652 }, { "epoch": 0.8912170567455923, "grad_norm": 0.16410601139068604, "learning_rate": 1.6845740957067843e-05, "loss": 0.6867, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2653 }, { "epoch": 0.8915529847730124, "grad_norm": 0.17120228707790375, "learning_rate": 1.6843041557011607e-05, "loss": 0.6846, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2654 }, { "epoch": 0.8918889128004325, "grad_norm": 0.16279949247837067, "learning_rate": 1.6840341218863817e-05, "loss": 0.6775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2655 }, { "epoch": 0.8922248408278526, "grad_norm": 0.1598556488752365, "learning_rate": 1.6837639942994654e-05, "loss": 0.6701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2656 }, { "epoch": 0.8925607688552728, "grad_norm": 0.17240117490291595, "learning_rate": 1.6834937729774434e-05, "loss": 0.6685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2657 }, { "epoch": 0.8928966968826929, "grad_norm": 0.15496256947517395, "learning_rate": 1.683223457957359e-05, "loss": 0.6654, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2658 }, { "epoch": 0.893232624910113, "grad_norm": 0.17827165126800537, "learning_rate": 1.682953049276269e-05, "loss": 0.7151, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2659 }, { "epoch": 0.8935685529375331, "grad_norm": 0.19293858110904694, "learning_rate": 1.6826825469712432e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2660 }, { "epoch": 0.8939044809649532, "grad_norm": 0.174866184592247, "learning_rate": 1.682411951079363e-05, "loss": 0.6626, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2661 }, { "epoch": 0.8942404089923733, "grad_norm": 0.17338675260543823, "learning_rate": 1.682141261637725e-05, "loss": 0.6829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2662 }, { "epoch": 0.8945763370197936, "grad_norm": 0.1709415763616562, "learning_rate": 1.681870478683436e-05, "loss": 0.6808, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2663 }, { "epoch": 0.8949122650472137, "grad_norm": 0.19059765338897705, "learning_rate": 1.681599602253617e-05, "loss": 0.6677, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2664 }, { "epoch": 0.8952481930746338, "grad_norm": 0.16391000151634216, "learning_rate": 1.681328632385402e-05, "loss": 0.6959, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2665 }, { "epoch": 0.8955841211020539, "grad_norm": 0.1540207862854004, "learning_rate": 1.6810575691159376e-05, "loss": 0.6735, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2666 }, { "epoch": 0.895920049129474, "grad_norm": 0.15477418899536133, "learning_rate": 1.680786412482382e-05, "loss": 0.6745, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2667 }, { "epoch": 0.8962559771568941, "grad_norm": 0.17009304463863373, "learning_rate": 1.6805151625219087e-05, "loss": 0.6808, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2668 }, { "epoch": 0.8965919051843143, "grad_norm": 0.1578136384487152, "learning_rate": 1.6802438192717008e-05, "loss": 0.6828, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2669 }, { "epoch": 0.8969278332117344, "grad_norm": 0.16718196868896484, "learning_rate": 1.6799723827689575e-05, "loss": 0.6744, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2670 }, { "epoch": 0.8972637612391545, "grad_norm": 0.15888792276382446, "learning_rate": 1.6797008530508882e-05, "loss": 0.6667, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2671 }, { "epoch": 0.8975996892665746, "grad_norm": 0.1556367427110672, "learning_rate": 1.6794292301547166e-05, "loss": 0.7147, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2672 }, { "epoch": 0.8979356172939947, "grad_norm": 0.170558899641037, "learning_rate": 1.6791575141176782e-05, "loss": 0.695, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2673 }, { "epoch": 0.8982715453214148, "grad_norm": 0.1898297816514969, "learning_rate": 1.6788857049770223e-05, "loss": 0.6912, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2674 }, { "epoch": 0.8986074733488351, "grad_norm": 0.17052839696407318, "learning_rate": 1.6786138027700097e-05, "loss": 0.6791, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 2675 }, { "epoch": 0.8989434013762552, "grad_norm": 0.1896497756242752, "learning_rate": 1.6783418075339154e-05, "loss": 0.6693, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2676 }, { "epoch": 0.8992793294036753, "grad_norm": 0.1519479751586914, "learning_rate": 1.6780697193060257e-05, "loss": 0.6838, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2677 }, { "epoch": 0.8996152574310954, "grad_norm": 0.1731639802455902, "learning_rate": 1.677797538123641e-05, "loss": 0.6585, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2678 }, { "epoch": 0.8999511854585155, "grad_norm": 0.1845872402191162, "learning_rate": 1.677525264024073e-05, "loss": 0.6766, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2679 }, { "epoch": 0.9002871134859356, "grad_norm": 0.1554720401763916, "learning_rate": 1.6772528970446473e-05, "loss": 0.6773, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2680 }, { "epoch": 0.9006230415133558, "grad_norm": 0.1763555258512497, "learning_rate": 1.676980437222702e-05, "loss": 0.6781, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2681 }, { "epoch": 0.9009589695407759, "grad_norm": 0.1504562795162201, "learning_rate": 1.6767078845955878e-05, "loss": 0.6552, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2682 }, { "epoch": 0.901294897568196, "grad_norm": 0.19477611780166626, "learning_rate": 1.676435239200668e-05, "loss": 0.7023, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2683 }, { "epoch": 0.9016308255956161, "grad_norm": 0.15987509489059448, "learning_rate": 1.6761625010753182e-05, "loss": 0.673, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2684 }, { "epoch": 0.9019667536230362, "grad_norm": 0.16899892687797546, "learning_rate": 1.675889670256928e-05, "loss": 0.6691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2685 }, { "epoch": 0.9023026816504564, "grad_norm": 0.1516263335943222, "learning_rate": 1.675616746782899e-05, "loss": 0.6811, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2686 }, { "epoch": 0.9026386096778766, "grad_norm": 0.17016251385211945, "learning_rate": 1.6753437306906443e-05, "loss": 0.6671, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2687 }, { "epoch": 0.9029745377052967, "grad_norm": 0.1375616192817688, "learning_rate": 1.6750706220175923e-05, "loss": 0.6711, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2688 }, { "epoch": 0.9033104657327168, "grad_norm": 0.16088984906673431, "learning_rate": 1.674797420801182e-05, "loss": 0.66, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2689 }, { "epoch": 0.9036463937601369, "grad_norm": 0.1766490638256073, "learning_rate": 1.6745241270788653e-05, "loss": 0.666, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2690 }, { "epoch": 0.903982321787557, "grad_norm": 0.15763142704963684, "learning_rate": 1.6742507408881076e-05, "loss": 0.6719, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2691 }, { "epoch": 0.9043182498149771, "grad_norm": 0.15448199212551117, "learning_rate": 1.673977262266387e-05, "loss": 0.6978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 2692 }, { "epoch": 0.9046541778423972, "grad_norm": 0.16035893559455872, "learning_rate": 1.6737036912511926e-05, "loss": 0.7127, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2693 }, { "epoch": 0.9049901058698174, "grad_norm": 0.14723555743694305, "learning_rate": 1.6734300278800288e-05, "loss": 0.6583, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2694 }, { "epoch": 0.9053260338972375, "grad_norm": 0.15902888774871826, "learning_rate": 1.6731562721904104e-05, "loss": 0.6978, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2695 }, { "epoch": 0.9056619619246576, "grad_norm": 0.1523541808128357, "learning_rate": 1.672882424219866e-05, "loss": 0.6669, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2696 }, { "epoch": 0.9059978899520778, "grad_norm": 0.1601976752281189, "learning_rate": 1.6726084840059368e-05, "loss": 0.6913, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2697 }, { "epoch": 0.9063338179794979, "grad_norm": 0.16912305355072021, "learning_rate": 1.6723344515861764e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2698 }, { "epoch": 0.906669746006918, "grad_norm": 0.17659853398799896, "learning_rate": 1.67206032699815e-05, "loss": 0.6691, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2699 }, { "epoch": 0.9070056740343382, "grad_norm": 0.18283498287200928, "learning_rate": 1.671786110279438e-05, "loss": 0.6999, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2700 }, { "epoch": 0.9073416020617583, "grad_norm": 0.17213284969329834, "learning_rate": 1.6715118014676314e-05, "loss": 0.6732, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2701 }, { "epoch": 0.9076775300891784, "grad_norm": 0.18431000411510468, "learning_rate": 1.671237400600334e-05, "loss": 0.682, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2702 }, { "epoch": 0.9080134581165985, "grad_norm": 0.17817820608615875, "learning_rate": 1.670962907715163e-05, "loss": 0.6889, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2703 }, { "epoch": 0.9083493861440186, "grad_norm": 0.17087610065937042, "learning_rate": 1.6706883228497472e-05, "loss": 0.6711, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2704 }, { "epoch": 0.9086853141714387, "grad_norm": 0.16457776725292206, "learning_rate": 1.670413646041729e-05, "loss": 0.6644, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2705 }, { "epoch": 0.9090212421988589, "grad_norm": 0.17649903893470764, "learning_rate": 1.6701388773287636e-05, "loss": 0.6496, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2706 }, { "epoch": 0.909357170226279, "grad_norm": 0.16509291529655457, "learning_rate": 1.669864016748517e-05, "loss": 0.6794, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2707 }, { "epoch": 0.9096930982536992, "grad_norm": 0.16604620218276978, "learning_rate": 1.6695890643386697e-05, "loss": 0.7036, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2708 }, { "epoch": 0.9100290262811193, "grad_norm": 0.17447997629642487, "learning_rate": 1.669314020136914e-05, "loss": 0.697, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2709 }, { "epoch": 0.9103649543085394, "grad_norm": 0.16194209456443787, "learning_rate": 1.6690388841809552e-05, "loss": 0.6809, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2710 }, { "epoch": 0.9107008823359595, "grad_norm": 0.1530616730451584, "learning_rate": 1.6687636565085103e-05, "loss": 0.6935, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2711 }, { "epoch": 0.9110368103633797, "grad_norm": 0.16692908108234406, "learning_rate": 1.6684883371573093e-05, "loss": 0.677, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2712 }, { "epoch": 0.9113727383907998, "grad_norm": 0.15375438332557678, "learning_rate": 1.6682129261650954e-05, "loss": 0.6694, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2713 }, { "epoch": 0.9117086664182199, "grad_norm": 0.17198780179023743, "learning_rate": 1.6679374235696235e-05, "loss": 0.6628, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2714 }, { "epoch": 0.91204459444564, "grad_norm": 0.17038719356060028, "learning_rate": 1.6676618294086615e-05, "loss": 0.6941, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2715 }, { "epoch": 0.9123805224730601, "grad_norm": 0.17709742486476898, "learning_rate": 1.6673861437199898e-05, "loss": 0.6969, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2716 }, { "epoch": 0.9127164505004802, "grad_norm": 0.18108899891376495, "learning_rate": 1.6671103665414015e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2717 }, { "epoch": 0.9130523785279004, "grad_norm": 0.16549566388130188, "learning_rate": 1.666834497910702e-05, "loss": 0.6871, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2718 }, { "epoch": 0.9133883065553206, "grad_norm": 0.17230720818042755, "learning_rate": 1.6665585378657084e-05, "loss": 0.6775, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2719 }, { "epoch": 0.9137242345827407, "grad_norm": 0.15964113175868988, "learning_rate": 1.6662824864442525e-05, "loss": 0.6721, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2720 }, { "epoch": 0.9140601626101608, "grad_norm": 0.15903528034687042, "learning_rate": 1.6660063436841768e-05, "loss": 0.6924, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2721 }, { "epoch": 0.9143960906375809, "grad_norm": 0.1981235146522522, "learning_rate": 1.6657301096233365e-05, "loss": 0.6825, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2722 }, { "epoch": 0.914732018665001, "grad_norm": 0.19022466242313385, "learning_rate": 1.6654537842996004e-05, "loss": 0.7104, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2723 }, { "epoch": 0.9150679466924212, "grad_norm": 0.14589586853981018, "learning_rate": 1.6651773677508483e-05, "loss": 0.6811, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2724 }, { "epoch": 0.9154038747198413, "grad_norm": 0.17720259726047516, "learning_rate": 1.664900860014974e-05, "loss": 0.6782, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2725 }, { "epoch": 0.9157398027472614, "grad_norm": 0.18439707159996033, "learning_rate": 1.6646242611298828e-05, "loss": 0.6845, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2726 }, { "epoch": 0.9160757307746815, "grad_norm": 0.1595352441072464, "learning_rate": 1.6643475711334924e-05, "loss": 0.6708, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2727 }, { "epoch": 0.9164116588021016, "grad_norm": 0.19239361584186554, "learning_rate": 1.664070790063734e-05, "loss": 0.6668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2728 }, { "epoch": 0.9167475868295217, "grad_norm": 0.13824346661567688, "learning_rate": 1.663793917958551e-05, "loss": 0.6716, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2729 }, { "epoch": 0.917083514856942, "grad_norm": 0.17941710352897644, "learning_rate": 1.6635169548558975e-05, "loss": 0.6726, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2730 }, { "epoch": 0.9174194428843621, "grad_norm": 0.18641529977321625, "learning_rate": 1.6632399007937428e-05, "loss": 0.6764, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2731 }, { "epoch": 0.9177553709117822, "grad_norm": 0.1696467399597168, "learning_rate": 1.662962755810067e-05, "loss": 0.6876, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2732 }, { "epoch": 0.9180912989392023, "grad_norm": 0.1668597012758255, "learning_rate": 1.6626855199428627e-05, "loss": 0.66, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2733 }, { "epoch": 0.9184272269666224, "grad_norm": 0.20853568613529205, "learning_rate": 1.6624081932301362e-05, "loss": 0.6864, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2734 }, { "epoch": 0.9187631549940425, "grad_norm": 0.20341797173023224, "learning_rate": 1.6621307757099047e-05, "loss": 0.6719, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2735 }, { "epoch": 0.9190990830214627, "grad_norm": 0.1644001454114914, "learning_rate": 1.6618532674201984e-05, "loss": 0.6755, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2736 }, { "epoch": 0.9194350110488828, "grad_norm": 0.17561772465705872, "learning_rate": 1.661575668399061e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2737 }, { "epoch": 0.9197709390763029, "grad_norm": 0.211312934756279, "learning_rate": 1.6612979786845464e-05, "loss": 0.6862, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2738 }, { "epoch": 0.920106867103723, "grad_norm": 0.18580298125743866, "learning_rate": 1.6610201983147227e-05, "loss": 0.6898, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2739 }, { "epoch": 0.9204427951311431, "grad_norm": 0.14360739290714264, "learning_rate": 1.6607423273276704e-05, "loss": 0.6923, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2740 }, { "epoch": 0.9207787231585632, "grad_norm": 0.20576274394989014, "learning_rate": 1.6604643657614816e-05, "loss": 0.6986, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2741 }, { "epoch": 0.9211146511859835, "grad_norm": 0.18533964455127716, "learning_rate": 1.6601863136542612e-05, "loss": 0.6849, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2742 }, { "epoch": 0.9214505792134036, "grad_norm": 0.15276221930980682, "learning_rate": 1.6599081710441268e-05, "loss": 0.6715, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2743 }, { "epoch": 0.9217865072408237, "grad_norm": 0.15566468238830566, "learning_rate": 1.659629937969208e-05, "loss": 0.673, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2744 }, { "epoch": 0.9221224352682438, "grad_norm": 0.15427102148532867, "learning_rate": 1.6593516144676463e-05, "loss": 0.696, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2745 }, { "epoch": 0.9224583632956639, "grad_norm": 0.13570086658000946, "learning_rate": 1.6590732005775973e-05, "loss": 0.677, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2746 }, { "epoch": 0.922794291323084, "grad_norm": 0.14732946455478668, "learning_rate": 1.6587946963372265e-05, "loss": 0.6933, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2747 }, { "epoch": 0.9231302193505041, "grad_norm": 0.15773151814937592, "learning_rate": 1.658516101784715e-05, "loss": 0.6596, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2748 }, { "epoch": 0.9234661473779243, "grad_norm": 0.16999563574790955, "learning_rate": 1.658237416958253e-05, "loss": 0.6818, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2749 }, { "epoch": 0.9238020754053444, "grad_norm": 0.1520407646894455, "learning_rate": 1.657958641896045e-05, "loss": 0.6889, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2750 }, { "epoch": 0.9241380034327645, "grad_norm": 0.16584531962871552, "learning_rate": 1.6576797766363076e-05, "loss": 0.6792, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2751 }, { "epoch": 0.9244739314601846, "grad_norm": 0.17814040184020996, "learning_rate": 1.6574008212172693e-05, "loss": 0.6872, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2752 }, { "epoch": 0.9248098594876047, "grad_norm": 0.15287013351917267, "learning_rate": 1.6571217756771715e-05, "loss": 0.6666, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2753 }, { "epoch": 0.9251457875150249, "grad_norm": 0.1479470282793045, "learning_rate": 1.6568426400542677e-05, "loss": 0.6919, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2754 }, { "epoch": 0.9254817155424451, "grad_norm": 0.15577034652233124, "learning_rate": 1.6565634143868233e-05, "loss": 0.6879, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2755 }, { "epoch": 0.9258176435698652, "grad_norm": 0.15794548392295837, "learning_rate": 1.6562840987131173e-05, "loss": 0.6879, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2756 }, { "epoch": 0.9261535715972853, "grad_norm": 0.5026611089706421, "learning_rate": 1.656004693071439e-05, "loss": 0.6802, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2757 }, { "epoch": 0.9264894996247054, "grad_norm": 0.18188124895095825, "learning_rate": 1.6557251975000927e-05, "loss": 0.6795, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2758 }, { "epoch": 0.9268254276521255, "grad_norm": 0.137889102101326, "learning_rate": 1.6554456120373927e-05, "loss": 0.6846, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2759 }, { "epoch": 0.9271613556795456, "grad_norm": 0.17380598187446594, "learning_rate": 1.6551659367216668e-05, "loss": 0.6808, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2760 }, { "epoch": 0.9274972837069658, "grad_norm": 0.14677859842777252, "learning_rate": 1.6548861715912545e-05, "loss": 0.6652, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2761 }, { "epoch": 0.9278332117343859, "grad_norm": 0.16943100094795227, "learning_rate": 1.6546063166845083e-05, "loss": 0.6884, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2762 }, { "epoch": 0.928169139761806, "grad_norm": 0.16153059899806976, "learning_rate": 1.6543263720397924e-05, "loss": 0.6678, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2763 }, { "epoch": 0.9285050677892261, "grad_norm": 0.1658906787633896, "learning_rate": 1.654046337695484e-05, "loss": 0.6909, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2764 }, { "epoch": 0.9288409958166463, "grad_norm": 0.1842864602804184, "learning_rate": 1.6537662136899713e-05, "loss": 0.7048, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2765 }, { "epoch": 0.9291769238440664, "grad_norm": 0.17683345079421997, "learning_rate": 1.6534860000616566e-05, "loss": 0.7001, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2766 }, { "epoch": 0.9295128518714866, "grad_norm": 0.16385233402252197, "learning_rate": 1.653205696848953e-05, "loss": 0.6734, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2767 }, { "epoch": 0.9298487798989067, "grad_norm": 0.20403815805912018, "learning_rate": 1.6529253040902868e-05, "loss": 0.6845, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2768 }, { "epoch": 0.9301847079263268, "grad_norm": 0.15197688341140747, "learning_rate": 1.6526448218240952e-05, "loss": 0.6856, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2769 }, { "epoch": 0.9305206359537469, "grad_norm": 0.20093265175819397, "learning_rate": 1.65236425008883e-05, "loss": 0.6825, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2770 }, { "epoch": 0.930856563981167, "grad_norm": 0.19122830033302307, "learning_rate": 1.652083588922953e-05, "loss": 0.6969, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2771 }, { "epoch": 0.9311924920085871, "grad_norm": 0.17479050159454346, "learning_rate": 1.651802838364939e-05, "loss": 0.6743, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2772 }, { "epoch": 0.9315284200360073, "grad_norm": 0.22152359783649445, "learning_rate": 1.6515219984532764e-05, "loss": 0.6639, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2773 }, { "epoch": 0.9318643480634274, "grad_norm": 0.1752488762140274, "learning_rate": 1.6512410692264634e-05, "loss": 0.6655, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2774 }, { "epoch": 0.9322002760908475, "grad_norm": 0.18608272075653076, "learning_rate": 1.6509600507230124e-05, "loss": 0.6747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2775 }, { "epoch": 0.9325362041182677, "grad_norm": 0.1636887490749359, "learning_rate": 1.6506789429814474e-05, "loss": 0.6604, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2776 }, { "epoch": 0.9328721321456878, "grad_norm": 0.1680314540863037, "learning_rate": 1.650397746040304e-05, "loss": 0.6776, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2777 }, { "epoch": 0.9332080601731079, "grad_norm": 0.15484634041786194, "learning_rate": 1.6501164599381315e-05, "loss": 0.6785, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2778 }, { "epoch": 0.9335439882005281, "grad_norm": 0.16044968366622925, "learning_rate": 1.64983508471349e-05, "loss": 0.6596, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2779 }, { "epoch": 0.9338799162279482, "grad_norm": 0.16327182948589325, "learning_rate": 1.6495536204049526e-05, "loss": 0.6628, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2780 }, { "epoch": 0.9342158442553683, "grad_norm": 0.173807293176651, "learning_rate": 1.649272067051104e-05, "loss": 0.6644, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2781 }, { "epoch": 0.9345517722827884, "grad_norm": 0.15695132315158844, "learning_rate": 1.648990424690542e-05, "loss": 0.6784, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2782 }, { "epoch": 0.9348877003102085, "grad_norm": 0.17704139649868011, "learning_rate": 1.648708693361875e-05, "loss": 0.6662, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2783 }, { "epoch": 0.9352236283376286, "grad_norm": 0.1708238571882248, "learning_rate": 1.6484268731037264e-05, "loss": 0.6796, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2784 }, { "epoch": 0.9355595563650488, "grad_norm": 0.158633291721344, "learning_rate": 1.648144963954729e-05, "loss": 0.7004, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2785 }, { "epoch": 0.935895484392469, "grad_norm": 0.16193822026252747, "learning_rate": 1.6478629659535287e-05, "loss": 0.675, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2786 }, { "epoch": 0.936231412419889, "grad_norm": 0.13768506050109863, "learning_rate": 1.6475808791387847e-05, "loss": 0.6697, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2787 }, { "epoch": 0.9365673404473092, "grad_norm": 0.14996258914470673, "learning_rate": 1.6472987035491664e-05, "loss": 0.662, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2788 }, { "epoch": 0.9369032684747293, "grad_norm": 0.13347609341144562, "learning_rate": 1.6470164392233567e-05, "loss": 0.6774, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2789 }, { "epoch": 0.9372391965021494, "grad_norm": 0.15005432069301605, "learning_rate": 1.6467340862000504e-05, "loss": 0.6986, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2790 }, { "epoch": 0.9375751245295696, "grad_norm": 0.15605835616588593, "learning_rate": 1.6464516445179546e-05, "loss": 0.6796, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2791 }, { "epoch": 0.9379110525569897, "grad_norm": 0.15852224826812744, "learning_rate": 1.646169114215788e-05, "loss": 0.6787, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2792 }, { "epoch": 0.9382469805844098, "grad_norm": 0.1698504090309143, "learning_rate": 1.645886495332282e-05, "loss": 0.6734, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2793 }, { "epoch": 0.9385829086118299, "grad_norm": 0.1877134144306183, "learning_rate": 1.64560378790618e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2794 }, { "epoch": 0.93891883663925, "grad_norm": 0.15126767754554749, "learning_rate": 1.6453209919762376e-05, "loss": 0.6909, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2795 }, { "epoch": 0.9392547646666701, "grad_norm": 0.14641760289669037, "learning_rate": 1.6450381075812223e-05, "loss": 0.6859, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2796 }, { "epoch": 0.9395906926940903, "grad_norm": 0.16419899463653564, "learning_rate": 1.6447551347599134e-05, "loss": 0.6969, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2797 }, { "epoch": 0.9399266207215105, "grad_norm": 0.16691061854362488, "learning_rate": 1.6444720735511038e-05, "loss": 0.6942, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2798 }, { "epoch": 0.9402625487489306, "grad_norm": 0.1556505560874939, "learning_rate": 1.6441889239935963e-05, "loss": 0.6783, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2799 }, { "epoch": 0.9405984767763507, "grad_norm": 0.15214872360229492, "learning_rate": 1.643905686126208e-05, "loss": 0.6944, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2800 }, { "epoch": 0.9409344048037708, "grad_norm": 0.185982808470726, "learning_rate": 1.6436223599877664e-05, "loss": 0.6929, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2801 }, { "epoch": 0.9412703328311909, "grad_norm": 0.17545364797115326, "learning_rate": 1.6433389456171126e-05, "loss": 0.6616, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2802 }, { "epoch": 0.941606260858611, "grad_norm": 0.16039732098579407, "learning_rate": 1.6430554430530983e-05, "loss": 0.6522, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2803 }, { "epoch": 0.9419421888860312, "grad_norm": 0.14088039100170135, "learning_rate": 1.642771852334588e-05, "loss": 0.6915, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2804 }, { "epoch": 0.9422781169134513, "grad_norm": 0.15912586450576782, "learning_rate": 1.6424881735004587e-05, "loss": 0.6778, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2805 }, { "epoch": 0.9426140449408714, "grad_norm": 0.16312262415885925, "learning_rate": 1.642204406589599e-05, "loss": 0.6802, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2806 }, { "epoch": 0.9429499729682915, "grad_norm": 0.16229736804962158, "learning_rate": 1.6419205516409094e-05, "loss": 0.6795, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2807 }, { "epoch": 0.9432859009957116, "grad_norm": 0.16460958123207092, "learning_rate": 1.6416366086933026e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2808 }, { "epoch": 0.9436218290231317, "grad_norm": 0.16157154738903046, "learning_rate": 1.6413525777857037e-05, "loss": 0.6826, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2809 }, { "epoch": 0.943957757050552, "grad_norm": 0.1565696895122528, "learning_rate": 1.6410684589570498e-05, "loss": 0.6913, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2810 }, { "epoch": 0.9442936850779721, "grad_norm": 0.1644894927740097, "learning_rate": 1.64078425224629e-05, "loss": 0.6932, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2811 }, { "epoch": 0.9446296131053922, "grad_norm": 0.15979817509651184, "learning_rate": 1.6404999576923844e-05, "loss": 0.6723, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2812 }, { "epoch": 0.9449655411328123, "grad_norm": 0.17008109390735626, "learning_rate": 1.6402155753343064e-05, "loss": 0.6848, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2813 }, { "epoch": 0.9453014691602324, "grad_norm": 0.19002798199653625, "learning_rate": 1.639931105211042e-05, "loss": 0.6678, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2814 }, { "epoch": 0.9456373971876525, "grad_norm": 0.13670577108860016, "learning_rate": 1.6396465473615876e-05, "loss": 0.674, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2815 }, { "epoch": 0.9459733252150727, "grad_norm": 0.13549281656742096, "learning_rate": 1.6393619018249522e-05, "loss": 0.6733, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2816 }, { "epoch": 0.9463092532424928, "grad_norm": 0.1528649628162384, "learning_rate": 1.6390771686401575e-05, "loss": 0.6652, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2817 }, { "epoch": 0.9466451812699129, "grad_norm": 0.1540534645318985, "learning_rate": 1.638792347846236e-05, "loss": 0.6754, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2818 }, { "epoch": 0.946981109297333, "grad_norm": 0.14385341107845306, "learning_rate": 1.638507439482234e-05, "loss": 0.7042, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2819 }, { "epoch": 0.9473170373247531, "grad_norm": 0.16507968306541443, "learning_rate": 1.6382224435872075e-05, "loss": 0.6703, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2820 }, { "epoch": 0.9476529653521732, "grad_norm": 0.18964500725269318, "learning_rate": 1.6379373602002268e-05, "loss": 0.6758, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2821 }, { "epoch": 0.9479888933795935, "grad_norm": 0.17994408309459686, "learning_rate": 1.6376521893603722e-05, "loss": 0.6739, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2822 }, { "epoch": 0.9483248214070136, "grad_norm": 0.13961055874824524, "learning_rate": 1.6373669311067372e-05, "loss": 0.6941, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2823 }, { "epoch": 0.9486607494344337, "grad_norm": 0.1868407279253006, "learning_rate": 1.6370815854784275e-05, "loss": 0.6791, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2824 }, { "epoch": 0.9489966774618538, "grad_norm": 0.18826429545879364, "learning_rate": 1.6367961525145594e-05, "loss": 0.6571, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2825 }, { "epoch": 0.9493326054892739, "grad_norm": 0.18961596488952637, "learning_rate": 1.6365106322542625e-05, "loss": 0.6887, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2826 }, { "epoch": 0.949668533516694, "grad_norm": 0.15489639341831207, "learning_rate": 1.636225024736678e-05, "loss": 0.6737, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2827 }, { "epoch": 0.9500044615441142, "grad_norm": 0.16903269290924072, "learning_rate": 1.635939330000959e-05, "loss": 0.6902, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2828 }, { "epoch": 0.9503403895715343, "grad_norm": 0.16143798828125, "learning_rate": 1.63565354808627e-05, "loss": 0.7116, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2829 }, { "epoch": 0.9506763175989544, "grad_norm": 0.1531323343515396, "learning_rate": 1.6353676790317878e-05, "loss": 0.6654, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2830 }, { "epoch": 0.9510122456263745, "grad_norm": 0.1373724341392517, "learning_rate": 1.6350817228767024e-05, "loss": 0.6538, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2831 }, { "epoch": 0.9513481736537946, "grad_norm": 0.16465404629707336, "learning_rate": 1.634795679660214e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2832 }, { "epoch": 0.9516841016812148, "grad_norm": 0.15219633281230927, "learning_rate": 1.634509549421535e-05, "loss": 0.6807, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2833 }, { "epoch": 0.952020029708635, "grad_norm": 0.14487630128860474, "learning_rate": 1.6342233321998912e-05, "loss": 0.6769, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2834 }, { "epoch": 0.9523559577360551, "grad_norm": 0.18681640923023224, "learning_rate": 1.633937028034518e-05, "loss": 0.6781, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2835 }, { "epoch": 0.9526918857634752, "grad_norm": 0.16558964550495148, "learning_rate": 1.633650636964665e-05, "loss": 0.6645, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2836 }, { "epoch": 0.9530278137908953, "grad_norm": 0.16898314654827118, "learning_rate": 1.6333641590295917e-05, "loss": 0.6814, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2837 }, { "epoch": 0.9533637418183154, "grad_norm": 0.14037619531154633, "learning_rate": 1.6330775942685713e-05, "loss": 0.6597, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2838 }, { "epoch": 0.9536996698457355, "grad_norm": 0.15658144652843475, "learning_rate": 1.6327909427208878e-05, "loss": 0.6698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2839 }, { "epoch": 0.9540355978731557, "grad_norm": 0.1587803214788437, "learning_rate": 1.632504204425837e-05, "loss": 0.6871, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2840 }, { "epoch": 0.9543715259005758, "grad_norm": 0.17054854333400726, "learning_rate": 1.6322173794227277e-05, "loss": 0.6697, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2841 }, { "epoch": 0.9547074539279959, "grad_norm": 0.17159830033779144, "learning_rate": 1.6319304677508792e-05, "loss": 0.6707, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2842 }, { "epoch": 0.955043381955416, "grad_norm": 0.15128764510154724, "learning_rate": 1.6316434694496237e-05, "loss": 0.6953, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2843 }, { "epoch": 0.9553793099828362, "grad_norm": 0.16326795518398285, "learning_rate": 1.6313563845583052e-05, "loss": 0.6559, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2844 }, { "epoch": 0.9557152380102563, "grad_norm": 0.15316429734230042, "learning_rate": 1.6310692131162785e-05, "loss": 0.6777, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2845 }, { "epoch": 0.9560511660376765, "grad_norm": 0.1459018886089325, "learning_rate": 1.6307819551629114e-05, "loss": 0.6895, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2846 }, { "epoch": 0.9563870940650966, "grad_norm": 0.16267679631710052, "learning_rate": 1.6304946107375835e-05, "loss": 0.6829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2847 }, { "epoch": 0.9567230220925167, "grad_norm": 0.15323404967784882, "learning_rate": 1.630207179879686e-05, "loss": 0.6576, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2848 }, { "epoch": 0.9570589501199368, "grad_norm": 0.16808795928955078, "learning_rate": 1.629919662628621e-05, "loss": 0.7105, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2849 }, { "epoch": 0.9573948781473569, "grad_norm": 0.16371938586235046, "learning_rate": 1.6296320590238046e-05, "loss": 0.6805, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2850 }, { "epoch": 0.957730806174777, "grad_norm": 0.14372631907463074, "learning_rate": 1.6293443691046626e-05, "loss": 0.6914, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2851 }, { "epoch": 0.9580667342021972, "grad_norm": 0.16194842755794525, "learning_rate": 1.6290565929106337e-05, "loss": 0.6651, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2852 }, { "epoch": 0.9584026622296173, "grad_norm": 0.152812659740448, "learning_rate": 1.6287687304811692e-05, "loss": 0.69, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2853 }, { "epoch": 0.9587385902570374, "grad_norm": 0.15830448269844055, "learning_rate": 1.6284807818557298e-05, "loss": 0.6715, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2854 }, { "epoch": 0.9590745182844576, "grad_norm": 0.16885600984096527, "learning_rate": 1.62819274707379e-05, "loss": 0.687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2855 }, { "epoch": 0.9594104463118777, "grad_norm": 0.18989531695842743, "learning_rate": 1.627904626174836e-05, "loss": 0.7145, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2856 }, { "epoch": 0.9597463743392978, "grad_norm": 0.15001614391803741, "learning_rate": 1.6276164191983653e-05, "loss": 0.6887, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2857 }, { "epoch": 0.9600823023667179, "grad_norm": 0.1430666595697403, "learning_rate": 1.6273281261838874e-05, "loss": 0.6658, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2858 }, { "epoch": 0.9604182303941381, "grad_norm": 0.1446053832769394, "learning_rate": 1.627039747170923e-05, "loss": 0.6756, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2859 }, { "epoch": 0.9607541584215582, "grad_norm": 0.13670247793197632, "learning_rate": 1.626751282199005e-05, "loss": 0.6711, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2860 }, { "epoch": 0.9610900864489783, "grad_norm": 0.1337873637676239, "learning_rate": 1.626462731307679e-05, "loss": 0.7028, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2861 }, { "epoch": 0.9614260144763984, "grad_norm": 0.14410564303398132, "learning_rate": 1.6261740945365012e-05, "loss": 0.6797, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2862 }, { "epoch": 0.9617619425038185, "grad_norm": 0.132269486784935, "learning_rate": 1.6258853719250396e-05, "loss": 0.6812, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2863 }, { "epoch": 0.9620978705312386, "grad_norm": 0.13551385700702667, "learning_rate": 1.6255965635128745e-05, "loss": 0.6863, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2864 }, { "epoch": 0.9624337985586588, "grad_norm": 0.1410723626613617, "learning_rate": 1.6253076693395983e-05, "loss": 0.67, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2865 }, { "epoch": 0.962769726586079, "grad_norm": 0.14751696586608887, "learning_rate": 1.6250186894448135e-05, "loss": 0.6981, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2866 }, { "epoch": 0.9631056546134991, "grad_norm": 0.16252553462982178, "learning_rate": 1.6247296238681364e-05, "loss": 0.6702, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2867 }, { "epoch": 0.9634415826409192, "grad_norm": 0.15964703261852264, "learning_rate": 1.6244404726491937e-05, "loss": 0.6786, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2868 }, { "epoch": 0.9637775106683393, "grad_norm": 0.16625089943408966, "learning_rate": 1.6241512358276244e-05, "loss": 0.6774, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2869 }, { "epoch": 0.9641134386957594, "grad_norm": 0.15678860247135162, "learning_rate": 1.623861913443079e-05, "loss": 0.644, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2870 }, { "epoch": 0.9644493667231796, "grad_norm": 0.1583281308412552, "learning_rate": 1.62357250553522e-05, "loss": 0.7084, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2871 }, { "epoch": 0.9647852947505997, "grad_norm": 0.19575071334838867, "learning_rate": 1.623283012143721e-05, "loss": 0.6988, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2872 }, { "epoch": 0.9651212227780198, "grad_norm": 0.15569289028644562, "learning_rate": 1.622993433308268e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2873 }, { "epoch": 0.9654571508054399, "grad_norm": 0.14890728890895844, "learning_rate": 1.6227037690685587e-05, "loss": 0.6847, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2874 }, { "epoch": 0.96579307883286, "grad_norm": 0.16705311834812164, "learning_rate": 1.6224140194643024e-05, "loss": 0.6832, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2875 }, { "epoch": 0.9661290068602801, "grad_norm": 0.16163070499897003, "learning_rate": 1.6221241845352192e-05, "loss": 0.6868, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2876 }, { "epoch": 0.9664649348877004, "grad_norm": 0.1636509746313095, "learning_rate": 1.621834264321042e-05, "loss": 0.6785, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.83, "memory/max_allocated (GiB)": 64.83, "step": 2877 }, { "epoch": 0.9668008629151205, "grad_norm": 0.1693284809589386, "learning_rate": 1.621544258861516e-05, "loss": 0.689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2878 }, { "epoch": 0.9671367909425406, "grad_norm": 0.15469908714294434, "learning_rate": 1.621254168196396e-05, "loss": 0.6836, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2879 }, { "epoch": 0.9674727189699607, "grad_norm": 0.1619737595319748, "learning_rate": 1.6209639923654502e-05, "loss": 0.6895, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2880 }, { "epoch": 0.9678086469973808, "grad_norm": 0.1477537453174591, "learning_rate": 1.6206737314084574e-05, "loss": 0.6844, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2881 }, { "epoch": 0.9681445750248009, "grad_norm": 0.1619211584329605, "learning_rate": 1.6203833853652092e-05, "loss": 0.6755, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2882 }, { "epoch": 0.9684805030522211, "grad_norm": 0.16599607467651367, "learning_rate": 1.6200929542755076e-05, "loss": 0.6812, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2883 }, { "epoch": 0.9688164310796412, "grad_norm": 0.14893466234207153, "learning_rate": 1.6198024381791677e-05, "loss": 0.6781, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2884 }, { "epoch": 0.9691523591070613, "grad_norm": 0.18290504813194275, "learning_rate": 1.619511837116015e-05, "loss": 0.6742, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2885 }, { "epoch": 0.9694882871344814, "grad_norm": 0.18262949585914612, "learning_rate": 1.619221151125887e-05, "loss": 0.682, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2886 }, { "epoch": 0.9698242151619015, "grad_norm": 0.17302608489990234, "learning_rate": 1.6189303802486332e-05, "loss": 0.677, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2887 }, { "epoch": 0.9701601431893216, "grad_norm": 0.20643551647663116, "learning_rate": 1.6186395245241145e-05, "loss": 0.6738, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2888 }, { "epoch": 0.9704960712167419, "grad_norm": 0.16963914036750793, "learning_rate": 1.6183485839922032e-05, "loss": 0.6718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2889 }, { "epoch": 0.970831999244162, "grad_norm": 0.19627614319324493, "learning_rate": 1.6180575586927836e-05, "loss": 0.6685, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2890 }, { "epoch": 0.9711679272715821, "grad_norm": 0.2076500654220581, "learning_rate": 1.617766448665751e-05, "loss": 0.6763, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2891 }, { "epoch": 0.9715038552990022, "grad_norm": 0.17073532938957214, "learning_rate": 1.617475253951014e-05, "loss": 0.6689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2892 }, { "epoch": 0.9718397833264223, "grad_norm": 0.18967114388942719, "learning_rate": 1.6171839745884897e-05, "loss": 0.697, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2893 }, { "epoch": 0.9721757113538424, "grad_norm": 0.1827068328857422, "learning_rate": 1.6168926106181107e-05, "loss": 0.6995, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2894 }, { "epoch": 0.9725116393812626, "grad_norm": 0.17867188155651093, "learning_rate": 1.616601162079818e-05, "loss": 0.6917, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2895 }, { "epoch": 0.9728475674086827, "grad_norm": 0.17082832753658295, "learning_rate": 1.6163096290135656e-05, "loss": 0.6902, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2896 }, { "epoch": 0.9731834954361028, "grad_norm": 0.1834239810705185, "learning_rate": 1.616018011459319e-05, "loss": 0.6728, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2897 }, { "epoch": 0.9735194234635229, "grad_norm": 0.15439344942569733, "learning_rate": 1.615726309457055e-05, "loss": 0.689, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2898 }, { "epoch": 0.973855351490943, "grad_norm": 0.15687115490436554, "learning_rate": 1.6154345230467624e-05, "loss": 0.6778, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2899 }, { "epoch": 0.9741912795183632, "grad_norm": 0.17096814513206482, "learning_rate": 1.6151426522684408e-05, "loss": 0.6961, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2900 }, { "epoch": 0.9745272075457834, "grad_norm": 0.1550922393798828, "learning_rate": 1.6148506971621024e-05, "loss": 0.6687, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2901 }, { "epoch": 0.9748631355732035, "grad_norm": 0.15498653054237366, "learning_rate": 1.6145586577677702e-05, "loss": 0.6698, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2902 }, { "epoch": 0.9751990636006236, "grad_norm": 0.14789482951164246, "learning_rate": 1.614266534125479e-05, "loss": 0.6664, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2903 }, { "epoch": 0.9755349916280437, "grad_norm": 0.16519397497177124, "learning_rate": 1.6139743262752753e-05, "loss": 0.6841, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2904 }, { "epoch": 0.9758709196554638, "grad_norm": 0.15663783252239227, "learning_rate": 1.6136820342572165e-05, "loss": 0.6941, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2905 }, { "epoch": 0.9762068476828839, "grad_norm": 0.15408457815647125, "learning_rate": 1.6133896581113723e-05, "loss": 0.6716, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2906 }, { "epoch": 0.9765427757103041, "grad_norm": 0.1665946990251541, "learning_rate": 1.613097197877824e-05, "loss": 0.6962, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2907 }, { "epoch": 0.9768787037377242, "grad_norm": 0.17166510224342346, "learning_rate": 1.6128046535966634e-05, "loss": 0.6869, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2908 }, { "epoch": 0.9772146317651443, "grad_norm": 0.15486276149749756, "learning_rate": 1.612512025307995e-05, "loss": 0.6954, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2909 }, { "epoch": 0.9775505597925644, "grad_norm": 0.1554611623287201, "learning_rate": 1.612219313051934e-05, "loss": 0.6788, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2910 }, { "epoch": 0.9778864878199846, "grad_norm": 0.17611290514469147, "learning_rate": 1.611926516868608e-05, "loss": 0.671, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2911 }, { "epoch": 0.9782224158474047, "grad_norm": 0.1668335497379303, "learning_rate": 1.6116336367981547e-05, "loss": 0.6755, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2912 }, { "epoch": 0.9785583438748248, "grad_norm": 0.16048111021518707, "learning_rate": 1.6113406728807247e-05, "loss": 0.6701, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2913 }, { "epoch": 0.978894271902245, "grad_norm": 0.1782110184431076, "learning_rate": 1.6110476251564793e-05, "loss": 0.6819, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2914 }, { "epoch": 0.9792301999296651, "grad_norm": 0.14191658794879913, "learning_rate": 1.6107544936655916e-05, "loss": 0.6715, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2915 }, { "epoch": 0.9795661279570852, "grad_norm": 0.1830700784921646, "learning_rate": 1.6104612784482462e-05, "loss": 0.6987, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2916 }, { "epoch": 0.9799020559845053, "grad_norm": 0.2031489461660385, "learning_rate": 1.610167979544639e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2917 }, { "epoch": 0.9802379840119254, "grad_norm": 0.13997502624988556, "learning_rate": 1.609874596994977e-05, "loss": 0.6656, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.34, "memory/max_allocated (GiB)": 64.34, "step": 2918 }, { "epoch": 0.9805739120393455, "grad_norm": 0.22135496139526367, "learning_rate": 1.60958113083948e-05, "loss": 0.6676, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2919 }, { "epoch": 0.9809098400667657, "grad_norm": 0.14349541068077087, "learning_rate": 1.6092875811183773e-05, "loss": 0.7027, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2920 }, { "epoch": 0.9812457680941858, "grad_norm": 0.18086475133895874, "learning_rate": 1.6089939478719115e-05, "loss": 0.6718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2921 }, { "epoch": 0.981581696121606, "grad_norm": 0.17939381301403046, "learning_rate": 1.6087002311403358e-05, "loss": 0.6747, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2922 }, { "epoch": 0.9819176241490261, "grad_norm": 0.14848659932613373, "learning_rate": 1.608406430963915e-05, "loss": 0.6813, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2923 }, { "epoch": 0.9822535521764462, "grad_norm": 0.17931801080703735, "learning_rate": 1.6081125473829247e-05, "loss": 0.713, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2924 }, { "epoch": 0.9825894802038663, "grad_norm": 0.166982963681221, "learning_rate": 1.607818580437653e-05, "loss": 0.6904, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2925 }, { "epoch": 0.9829254082312865, "grad_norm": 0.14602026343345642, "learning_rate": 1.6075245301683992e-05, "loss": 0.6859, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2926 }, { "epoch": 0.9832613362587066, "grad_norm": 0.17140492796897888, "learning_rate": 1.607230396615473e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2927 }, { "epoch": 0.9835972642861267, "grad_norm": 0.17097865045070648, "learning_rate": 1.6069361798191966e-05, "loss": 0.6878, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2928 }, { "epoch": 0.9839331923135468, "grad_norm": 0.1491735875606537, "learning_rate": 1.6066418798199033e-05, "loss": 0.6555, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2929 }, { "epoch": 0.9842691203409669, "grad_norm": 0.1727997064590454, "learning_rate": 1.6063474966579378e-05, "loss": 0.6571, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2930 }, { "epoch": 0.984605048368387, "grad_norm": 0.17974188923835754, "learning_rate": 1.606053030373656e-05, "loss": 0.668, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2931 }, { "epoch": 0.9849409763958072, "grad_norm": 0.18227393925189972, "learning_rate": 1.6057584810074257e-05, "loss": 0.6709, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2932 }, { "epoch": 0.9852769044232274, "grad_norm": 0.18395885825157166, "learning_rate": 1.6054638485996257e-05, "loss": 0.6914, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2933 }, { "epoch": 0.9856128324506475, "grad_norm": 0.1827104240655899, "learning_rate": 1.605169133190646e-05, "loss": 0.6751, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2934 }, { "epoch": 0.9859487604780676, "grad_norm": 0.210716113448143, "learning_rate": 1.6048743348208884e-05, "loss": 0.6915, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2935 }, { "epoch": 0.9862846885054877, "grad_norm": 0.16972891986370087, "learning_rate": 1.604579453530766e-05, "loss": 0.676, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 63.85, "memory/max_allocated (GiB)": 63.85, "step": 2936 }, { "epoch": 0.9866206165329078, "grad_norm": 0.16418002545833588, "learning_rate": 1.604284489360703e-05, "loss": 0.6853, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2937 }, { "epoch": 0.986956544560328, "grad_norm": 0.1633177399635315, "learning_rate": 1.603989442351135e-05, "loss": 0.6718, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2938 }, { "epoch": 0.9872924725877481, "grad_norm": 0.146047443151474, "learning_rate": 1.6036943125425095e-05, "loss": 0.6955, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2939 }, { "epoch": 0.9876284006151682, "grad_norm": 0.15751013159751892, "learning_rate": 1.6033990999752847e-05, "loss": 0.6854, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2940 }, { "epoch": 0.9879643286425883, "grad_norm": 0.15987730026245117, "learning_rate": 1.6031038046899303e-05, "loss": 0.6917, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2941 }, { "epoch": 0.9883002566700084, "grad_norm": 0.1604411005973816, "learning_rate": 1.6028084267269276e-05, "loss": 0.6573, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2942 }, { "epoch": 0.9886361846974285, "grad_norm": 0.15673711895942688, "learning_rate": 1.6025129661267688e-05, "loss": 0.6629, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2943 }, { "epoch": 0.9889721127248488, "grad_norm": 0.1611650586128235, "learning_rate": 1.6022174229299584e-05, "loss": 0.6761, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2944 }, { "epoch": 0.9893080407522689, "grad_norm": 0.1652398258447647, "learning_rate": 1.6019217971770106e-05, "loss": 0.6735, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2945 }, { "epoch": 0.989643968779689, "grad_norm": 0.15635129809379578, "learning_rate": 1.6016260889084523e-05, "loss": 0.665, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2946 }, { "epoch": 0.9899798968071091, "grad_norm": 0.17412208020687103, "learning_rate": 1.6013302981648207e-05, "loss": 0.6767, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2947 }, { "epoch": 0.9903158248345292, "grad_norm": 0.1552392989397049, "learning_rate": 1.6010344249866658e-05, "loss": 0.6863, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2948 }, { "epoch": 0.9906517528619493, "grad_norm": 0.16742928326129913, "learning_rate": 1.6007384694145477e-05, "loss": 0.6777, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2949 }, { "epoch": 0.9909876808893695, "grad_norm": 0.14997267723083496, "learning_rate": 1.600442431489037e-05, "loss": 0.6872, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2950 }, { "epoch": 0.9913236089167896, "grad_norm": 0.17823077738285065, "learning_rate": 1.600146311250718e-05, "loss": 0.7024, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2951 }, { "epoch": 0.9916595369442097, "grad_norm": 0.1631101667881012, "learning_rate": 1.5998501087401842e-05, "loss": 0.6756, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2952 }, { "epoch": 0.9919954649716298, "grad_norm": 0.15142762660980225, "learning_rate": 1.599553823998041e-05, "loss": 0.6598, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2953 }, { "epoch": 0.9923313929990499, "grad_norm": 0.16788306832313538, "learning_rate": 1.5992574570649053e-05, "loss": 0.6886, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2954 }, { "epoch": 0.99266732102647, "grad_norm": 0.15272101759910583, "learning_rate": 1.5989610079814055e-05, "loss": 0.6559, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2955 }, { "epoch": 0.9930032490538903, "grad_norm": 0.16314291954040527, "learning_rate": 1.5986644767881805e-05, "loss": 0.6716, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2956 }, { "epoch": 0.9933391770813104, "grad_norm": 0.15909305214881897, "learning_rate": 1.5983678635258814e-05, "loss": 0.6587, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2957 }, { "epoch": 0.9936751051087305, "grad_norm": 0.15368904173374176, "learning_rate": 1.5980711682351685e-05, "loss": 0.6937, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2958 }, { "epoch": 0.9940110331361506, "grad_norm": 0.17513196170330048, "learning_rate": 1.5977743909567166e-05, "loss": 0.7119, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2959 }, { "epoch": 0.9943469611635707, "grad_norm": 0.1625467836856842, "learning_rate": 1.5974775317312094e-05, "loss": 0.6772, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.4, "memory/max_allocated (GiB)": 64.4, "step": 2960 }, { "epoch": 0.9946828891909908, "grad_norm": 0.1565469205379486, "learning_rate": 1.5971805905993422e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2961 }, { "epoch": 0.9950188172184109, "grad_norm": 0.1748880296945572, "learning_rate": 1.5968835676018216e-05, "loss": 0.6842, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2962 }, { "epoch": 0.9953547452458311, "grad_norm": 0.16412098705768585, "learning_rate": 1.5965864627793662e-05, "loss": 0.6843, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2963 }, { "epoch": 0.9956906732732512, "grad_norm": 0.16886356472969055, "learning_rate": 1.5962892761727044e-05, "loss": 0.6707, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2964 }, { "epoch": 0.9960266013006713, "grad_norm": 0.15419377386569977, "learning_rate": 1.5959920078225774e-05, "loss": 0.6731, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2965 }, { "epoch": 0.9963625293280914, "grad_norm": 0.16146303713321686, "learning_rate": 1.595694657769736e-05, "loss": 0.6627, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2966 }, { "epoch": 0.9966984573555115, "grad_norm": 0.1507372409105301, "learning_rate": 1.595397226054944e-05, "loss": 0.6679, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2967 }, { "epoch": 0.9970343853829317, "grad_norm": 0.1817164272069931, "learning_rate": 1.5950997127189744e-05, "loss": 0.6823, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2968 }, { "epoch": 0.9973703134103519, "grad_norm": 0.18103072047233582, "learning_rate": 1.594802117802613e-05, "loss": 0.6943, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2969 }, { "epoch": 0.997706241437772, "grad_norm": 0.1648203432559967, "learning_rate": 1.594504441346656e-05, "loss": 0.6792, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2970 }, { "epoch": 0.9980421694651921, "grad_norm": 0.1676451414823532, "learning_rate": 1.5942066833919107e-05, "loss": 0.6757, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2971 }, { "epoch": 0.9983780974926122, "grad_norm": 0.14616689085960388, "learning_rate": 1.5939088439791963e-05, "loss": 0.6829, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2972 }, { "epoch": 0.9987140255200323, "grad_norm": 0.17457380890846252, "learning_rate": 1.5936109231493423e-05, "loss": 0.6855, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2973 }, { "epoch": 0.9990499535474524, "grad_norm": 0.16862210631370544, "learning_rate": 1.59331292094319e-05, "loss": 0.679, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2974 }, { "epoch": 0.9993858815748726, "grad_norm": 0.15578851103782654, "learning_rate": 1.5930148374015915e-05, "loss": 0.6656, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2975 }, { "epoch": 0.9997218096022927, "grad_norm": 0.16697797179222107, "learning_rate": 1.59271667256541e-05, "loss": 0.707, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2976 }, { "epoch": 1.0, "grad_norm": 0.1576320081949234, "learning_rate": 1.5924184264755203e-05, "loss": 0.6893, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2977 }, { "epoch": 1.0003359280274202, "grad_norm": 0.16428636014461517, "learning_rate": 1.5921200991728078e-05, "loss": 0.6484, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2978 }, { "epoch": 1.0006718560548402, "grad_norm": 0.15751852095127106, "learning_rate": 1.5918216906981697e-05, "loss": 0.6774, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.82, "memory/max_allocated (GiB)": 64.82, "step": 2979 }, { "epoch": 1.0010077840822604, "grad_norm": 0.1473652571439743, "learning_rate": 1.5915232010925133e-05, "loss": 0.6849, "memory/device_reserved (GiB)": 76.08, "memory/max_active (GiB)": 64.89, "memory/max_allocated (GiB)": 64.89, "step": 2980 } ], "logging_steps": 1, "max_steps": 8931, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 298, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.839365383990018e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }